Files

65 lines
1.6 KiB
Python
Raw Permalink Normal View History

2026-06-16 09:35:51 +08:00
"""
Huffman 编码 — 贪心算法
构建最优前缀码实现数据压缩
"""
import heapq
from collections import Counter
class HuffmanNode:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.left = None
self.right = None
def __lt__(self, other):
return self.freq < other.freq
def build_huffman_tree(text):
"""构建 Huffman 树"""
freq = Counter(text)
heap = [HuffmanNode(char, f) for char, f in freq.items()]
heapq.heapify(heap)
while len(heap) > 1:
left = heapq.heappop(heap)
right = heapq.heappop(heap)
merged = HuffmanNode(None, left.freq + right.freq)
merged.left = left
merged.right = right
heapq.heappush(heap, merged)
return heap[0] if heap else None
def generate_codes(node, prefix="", code_map=None):
"""从 Huffman 树生成编码"""
if code_map is None:
code_map = {}
if node is None:
return code_map
if node.char is not None:
code_map[node.char] = prefix or "0"
else:
generate_codes(node.left, prefix + "0", code_map)
generate_codes(node.right, prefix + "1", code_map)
return code_map
if __name__ == "__main__":
text = "ABRACADABRA"
print(f"原文: {text}")
root = build_huffman_tree(text)
codes = generate_codes(root)
print("\nHuffman 编码:")
for char, code in sorted(codes.items()):
print(f" '{char}': {code}")
encoded = "".join(codes[c] for c in text)
print(f"\n编码结果: {encoded}")
print(f"原始大小: {len(text) * 8} bits")
print(f"压缩大小: {len(encoded)} bits")