#light // I.e. lightweight syntax
namespace Nassar
// An F# module is a "static class." All the declarations
// that follow are static members of this class.
module Huffman
type Incidence = HashMultiMap<char, int>
type CharSet = Set<char>
let addToCount (d : Incidence) (c : char) =
// For some reason, the "do" is necessary here.
do match d.TryFind c with
Some(n) -> d.Replace(c, n + 1)
_ -> d.Add(c, 1)
d // Return the dictionary.
let count (s : string) =
// I only just noticed that System.String implements
// IEnumerable. The F# compiler correctly inferred from
// the type of Incidence that s has to be a sequence of
// char.
s |> Seq.fold addToCount (Incidence.Create())
type HuffmanNode =
Leaf of HuffmanLeaf
Tree of HuffmanTree
and HuffmanLeaf = {Count: int;
Char: char;}
and HuffmanTree = {Count: int;
Chars: CharSet;
Left: HuffmanNode;
Right: HuffmanNode;}
let get_count node =
match node with
Leaf l -> l.Count
Tree t -> t.Count
let has_char node c =
match node with
Leaf l -> l.Char = c
Tree t -> t.Chars.Contains c
let get_set node =
match node with
Leaf leaf -> CharSet.Singleton leaf.Char
Tree tree -> tree.Chars
let merge_nodes left right =
Tree { Count = get_count left + get_count right;
Chars = (get_set left) + (get_set right);
Left = left;
Right = right }
let node_priority l r =
get_count l - get_count r
let rec reduce_list l =
// What I should really do is put all the new leaf nodes
// into a priority queue, and pull off two at a time as
// long as the length of the queue is > 1. Then
// make_tree wouldn't have to convert the sequence
// it creates from the dictionary into a list.
match l with
l::r::tail -> (merge_nodes l r)::tail
|> List.sort node_priority
// ...and recurse.
|> reduce_list
[tree] -> tree
[] -> failwith "Can't operate on empty list!"
let rec encode h s =
// Encode a character by branching to the leaf node
// with that character.
let rec encode' node c =
match node with
Leaf l -> []
// TODO Add an accumulator to make this tail-recursive,
// rather than prepending the new element.
Tree t -> if has_char t.Left c then 0::(encode' t.Left c) else 1::(encode' t.Right c)
s > Seq.map (fun c -> encode' h c) > Seq.concat
// Decode the int list l using the tree h.
let rec decode h l =
let rec decode' node l =
match node, l with
// If we've arrived at a leaf, this must be the character
// we're looking for. Add it to the sequence, and resume
// from the top of the tree.
// TODO Add an accumulator to make this tail-recursive.
Leaf leaf, _ -> leaf.Char :: (decode h l)
// A 0 in the stream means a left branch during
// the encoding, and v.v.
Tree tree, n::tail -> if n = 0 then decode' tree.Left tail else decode' tree.Right tail
// Something was wrong in the encoding.
_, _ -> failwith "Something has gone very wrong."
match l with
[] -> []
// Start navigating down the tree.
_ -> decode' h l
let make_tree s =
let d = count s
// IDictionary<_, _> implements
// ICollection<KeyValuePair<_,_>>, so enumerate the
// pairs and convert each into a leaf node.
let leaves = d |> Seq.map (fun pair ->
Leaf { Count = pair.Value;
Char = pair.Key }; )
|> Seq.to_list
|> List.sort node_priority
reduce_list leaves
Sunday, January 20, 2008
Huffman Coding in F#
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment