65 lines
1.6 KiB
Python
65 lines
1.6 KiB
Python
"""
|
|
Huffman 编码 — 贪心算法
|
|
构建最优前缀码实现数据压缩
|
|
"""
|
|
import heapq
|
|
from collections import Counter
|
|
|
|
class HuffmanNode:
|
|
def __init__(self, char, freq):
|
|
self.char = char
|
|
self.freq = freq
|
|
self.left = None
|
|
self.right = None
|
|
|
|
def __lt__(self, other):
|
|
return self.freq < other.freq
|
|
|
|
def build_huffman_tree(text):
|
|
"""构建 Huffman 树"""
|
|
freq = Counter(text)
|
|
heap = [HuffmanNode(char, f) for char, f in freq.items()]
|
|
heapq.heapify(heap)
|
|
|
|
while len(heap) > 1:
|
|
left = heapq.heappop(heap)
|
|
right = heapq.heappop(heap)
|
|
merged = HuffmanNode(None, left.freq + right.freq)
|
|
merged.left = left
|
|
merged.right = right
|
|
heapq.heappush(heap, merged)
|
|
|
|
return heap[0] if heap else None
|
|
|
|
def generate_codes(node, prefix="", code_map=None):
|
|
"""从 Huffman 树生成编码"""
|
|
if code_map is None:
|
|
code_map = {}
|
|
|
|
if node is None:
|
|
return code_map
|
|
|
|
if node.char is not None:
|
|
code_map[node.char] = prefix or "0"
|
|
else:
|
|
generate_codes(node.left, prefix + "0", code_map)
|
|
generate_codes(node.right, prefix + "1", code_map)
|
|
|
|
return code_map
|
|
|
|
if __name__ == "__main__":
|
|
text = "ABRACADABRA"
|
|
print(f"原文: {text}")
|
|
|
|
root = build_huffman_tree(text)
|
|
codes = generate_codes(root)
|
|
|
|
print("\nHuffman 编码:")
|
|
for char, code in sorted(codes.items()):
|
|
print(f" '{char}': {code}")
|
|
|
|
encoded = "".join(codes[c] for c in text)
|
|
print(f"\n编码结果: {encoded}")
|
|
print(f"原始大小: {len(text) * 8} bits")
|
|
print(f"压缩大小: {len(encoded)} bits")
|