Skip to main content
This module implements Huffman coding for text compression and decompression.

Functions

character_frequency

Analyzes a string and returns character frequency counts.
from dsa.huffman import character_frequency

freq = character_frequency(s)
s
str
required
The string to analyze
freq
dict
Dictionary mapping each character to its frequency count
Example
from dsa.huffman import character_frequency

text = "hello world"
freq = character_frequency(text)
print(freq)  # {'h': 1, 'e': 1, 'l': 3, 'o': 2, ' ': 1, 'w': 1, 'r': 1, 'd': 1}

build_frequency_table

Builds a priority queue of characters based on their frequencies.
from dsa.huffman import build_frequency_table

pq = build_frequency_table(s)
s
str
required
The string to encode
pq
PriorityQueue
Priority queue containing TreeNodes of characters, prioritized by frequency
Example
from dsa.huffman import build_frequency_table

text = "hello"
pq = build_frequency_table(text)
# Priority queue with characters sorted by frequency

build_huffman_tree

Constructs a Huffman tree from a priority queue of character frequencies.
from dsa.huffman import build_huffman_tree

tree = build_huffman_tree(pq)
pq
PriorityQueue
required
Priority queue containing TreeNodes of characters based on frequencies
tree
Tree
The constructed Huffman tree
Example
from dsa.huffman import build_frequency_table, build_huffman_tree

text = "hello world"
pq = build_frequency_table(text)
huffman_tree = build_huffman_tree(pq)

build_huffman_dictionary

Generates a Huffman encoding dictionary from a tree node.
from dsa.huffman import build_huffman_dictionary

huff_dict = build_huffman_dictionary(node, bit_string="")
node
TreeNode
required
The root node of the Huffman tree
bit_string
str
default:"\"\""
The current bit string path (used internally during recursion)
huff_dict
dict
Dictionary mapping each character to its binary encoding
Example
from dsa.huffman import build_frequency_table, build_huffman_tree, build_huffman_dictionary

text = "hello"
pq = build_frequency_table(text)
tree = build_huffman_tree(pq)
huff_dict = build_huffman_dictionary(tree.root)
print(huff_dict)  # {'h': '00', 'e': '01', 'l': '1', 'o': '10', ...}

huffman_encode

Encodes a string using a Huffman dictionary.
from dsa.huffman import huffman_encode

encoded = huffman_encode(st, hd)
st
str
required
The string to encode
hd
dict
required
The Huffman dictionary mapping characters to bit strings
encoded
str
The encoded string as a binary bit string
from dsa.huffman import (
    build_frequency_table,
    build_huffman_tree,
    build_huffman_dictionary,
    huffman_encode
)

text = "hello"
pq = build_frequency_table(text)
tree = build_huffman_tree(pq)
huff_dict = build_huffman_dictionary(tree.root)

encoded = huffman_encode(text, huff_dict)
print(encoded)  # Binary string like "0001111010"

huffman_decode

Decodes encoded data using a Huffman tree.
from dsa.huffman import huffman_decode

decoded = huffman_decode(encoded_data, tree)
encoded_data
str
required
The binary bit string to decode
tree
Tree
required
The Huffman tree used for decoding
decoded
str
The decoded original string
Example
from dsa.huffman import (
    build_frequency_table,
    build_huffman_tree,
    build_huffman_dictionary,
    huffman_encode,
    huffman_decode
)

original = "hello world"

# Encode
pq = build_frequency_table(original)
tree = build_huffman_tree(pq)
huff_dict = build_huffman_dictionary(tree.root)
encoded = huffman_encode(original, huff_dict)

# Decode
decoded = huffman_decode(encoded, tree)
print(decoded == original)  # True

bitstring_to_bytes

Converts a binary bit string to bytes.
from dsa.huffman import bitstring_to_bytes

byte_data = bitstring_to_bytes(s)
s
str
required
The binary bit string to convert
byte_data
bytes
The bit string converted to bytes
Example
from dsa.huffman import bitstring_to_bytes

bit_string = "0110100001100101011011000110110001101111"
byte_data = bitstring_to_bytes(bit_string)
print(byte_data)  # b'hello'

bytes_to_bitstring

Converts bytes back to a binary bit string.
from dsa.huffman import bytes_to_bitstring

bit_string = bytes_to_bitstring(ba, bitlength=8)
ba
bytes
required
The bytes to convert
bitlength
int
default:"8"
The bit length for the last byte (handles padding)
bit_string
str
The bytes converted to a binary bit string
Example
from dsa.huffman import bytes_to_bitstring

byte_data = b'hello'
bit_string = bytes_to_bitstring(byte_data)
print(bit_string)  # Binary representation
from dsa.huffman import (
    build_frequency_table,
    build_huffman_tree,
    build_huffman_dictionary,
    huffman_encode,
    huffman_decode,
    bitstring_to_bytes,
    bytes_to_bitstring
)

# Original text
original = "hello world! this is a huffman encoding example."

# Compression
pq = build_frequency_table(original)
tree = build_huffman_tree(pq)
huff_dict = build_huffman_dictionary(tree.root)
encoded = huffman_encode(original, huff_dict)

# Convert to bytes for storage
compressed_bytes = bitstring_to_bytes(encoded)

print(f"Original size: {len(original)} characters ({len(original) * 8} bits)")
print(f"Compressed size: {len(compressed_bytes)} bytes ({len(encoded)} bits)")
print(f"Compression ratio: {len(encoded) / (len(original) * 8) * 100:.1f}%")

# Decompression
retrieved_bitstring = bytes_to_bitstring(compressed_bytes, len(encoded) % 8 or 8)
decoded = huffman_decode(encoded, tree)

print(f"Decompression successful: {decoded == original}")

Build docs developers (and LLMs) love