This module implements Huffman coding for text compression and decompression.
Functions
character_frequency
Analyzes a string and returns character frequency counts.
from dsa.huffman import character_frequency
freq = character_frequency(s)
Dictionary mapping each character to its frequency count
from dsa.huffman import character_frequency
text = "hello world"
freq = character_frequency(text)
print(freq) # {'h': 1, 'e': 1, 'l': 3, 'o': 2, ' ': 1, 'w': 1, 'r': 1, 'd': 1}
build_frequency_table
Builds a priority queue of characters based on their frequencies.
from dsa.huffman import build_frequency_table
pq = build_frequency_table(s)
Priority queue containing TreeNodes of characters, prioritized by frequency
from dsa.huffman import build_frequency_table
text = "hello"
pq = build_frequency_table(text)
# Priority queue with characters sorted by frequency
build_huffman_tree
Constructs a Huffman tree from a priority queue of character frequencies.
from dsa.huffman import build_huffman_tree
tree = build_huffman_tree(pq)
Priority queue containing TreeNodes of characters based on frequencies
The constructed Huffman tree
from dsa.huffman import build_frequency_table, build_huffman_tree
text = "hello world"
pq = build_frequency_table(text)
huffman_tree = build_huffman_tree(pq)
build_huffman_dictionary
Generates a Huffman encoding dictionary from a tree node.
from dsa.huffman import build_huffman_dictionary
huff_dict = build_huffman_dictionary(node, bit_string="")
The root node of the Huffman tree
The current bit string path (used internally during recursion)
Dictionary mapping each character to its binary encoding
from dsa.huffman import build_frequency_table, build_huffman_tree, build_huffman_dictionary
text = "hello"
pq = build_frequency_table(text)
tree = build_huffman_tree(pq)
huff_dict = build_huffman_dictionary(tree.root)
print(huff_dict) # {'h': '00', 'e': '01', 'l': '1', 'o': '10', ...}
huffman_encode
Encodes a string using a Huffman dictionary.
from dsa.huffman import huffman_encode
encoded = huffman_encode(st, hd)
The Huffman dictionary mapping characters to bit strings
The encoded string as a binary bit string
from dsa.huffman import (
build_frequency_table,
build_huffman_tree,
build_huffman_dictionary,
huffman_encode
)
text = "hello"
pq = build_frequency_table(text)
tree = build_huffman_tree(pq)
huff_dict = build_huffman_dictionary(tree.root)
encoded = huffman_encode(text, huff_dict)
print(encoded) # Binary string like "0001111010"
huffman_decode
Decodes encoded data using a Huffman tree.
from dsa.huffman import huffman_decode
decoded = huffman_decode(encoded_data, tree)
The binary bit string to decode
The Huffman tree used for decoding
The decoded original string
from dsa.huffman import (
build_frequency_table,
build_huffman_tree,
build_huffman_dictionary,
huffman_encode,
huffman_decode
)
original = "hello world"
# Encode
pq = build_frequency_table(original)
tree = build_huffman_tree(pq)
huff_dict = build_huffman_dictionary(tree.root)
encoded = huffman_encode(original, huff_dict)
# Decode
decoded = huffman_decode(encoded, tree)
print(decoded == original) # True
bitstring_to_bytes
Converts a binary bit string to bytes.
from dsa.huffman import bitstring_to_bytes
byte_data = bitstring_to_bytes(s)
The binary bit string to convert
The bit string converted to bytes
from dsa.huffman import bitstring_to_bytes
bit_string = "0110100001100101011011000110110001101111"
byte_data = bitstring_to_bytes(bit_string)
print(byte_data) # b'hello'
bytes_to_bitstring
Converts bytes back to a binary bit string.
from dsa.huffman import bytes_to_bitstring
bit_string = bytes_to_bitstring(ba, bitlength=8)
The bit length for the last byte (handles padding)
The bytes converted to a binary bit string
from dsa.huffman import bytes_to_bitstring
byte_data = b'hello'
bit_string = bytes_to_bitstring(byte_data)
print(bit_string) # Binary representation
Complete Compression Example
from dsa.huffman import (
build_frequency_table,
build_huffman_tree,
build_huffman_dictionary,
huffman_encode,
huffman_decode,
bitstring_to_bytes,
bytes_to_bitstring
)
# Original text
original = "hello world! this is a huffman encoding example."
# Compression
pq = build_frequency_table(original)
tree = build_huffman_tree(pq)
huff_dict = build_huffman_dictionary(tree.root)
encoded = huffman_encode(original, huff_dict)
# Convert to bytes for storage
compressed_bytes = bitstring_to_bytes(encoded)
print(f"Original size: {len(original)} characters ({len(original) * 8} bits)")
print(f"Compressed size: {len(compressed_bytes)} bytes ({len(encoded)} bits)")
print(f"Compression ratio: {len(encoded) / (len(original) * 8) * 100:.1f}%")
# Decompression
retrieved_bitstring = bytes_to_bitstring(compressed_bytes, len(encoded) % 8 or 8)
decoded = huffman_decode(encoded, tree)
print(f"Decompression successful: {decoded == original}")