224 lines
6.4 KiB
C
224 lines
6.4 KiB
C
|
|
#include <stdio.h>
|
||
|
|
#include <stdlib.h>
|
||
|
|
#include <string.h>
|
||
|
|
|
||
|
|
#define MAX_TREE_HT 256
|
||
|
|
|
||
|
|
// 定义 Huffman 树的节点结构
|
||
|
|
typedef struct Node {
|
||
|
|
int weight; // 节点的权重,即字符的频率
|
||
|
|
char data; // 存储字符
|
||
|
|
struct Node *left, *right; // 左右子节点
|
||
|
|
} Node;
|
||
|
|
|
||
|
|
// 比较函数,用于 qsort 按照权重排序
|
||
|
|
int compare(const void* a, const void* b) {
|
||
|
|
return (*(Node**)a)->weight - (*(Node**)b)->weight;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 创建一个新的节点
|
||
|
|
Node* newNode(char data, int weight) {
|
||
|
|
Node* temp = (Node*)malloc(sizeof(Node));
|
||
|
|
temp->left = temp->right = NULL;
|
||
|
|
temp->data = data;
|
||
|
|
temp->weight = weight;
|
||
|
|
return temp;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 构建 Huffman 树
|
||
|
|
Node* buildHuffmanTree(char data[], int weight[], int size) {
|
||
|
|
// 创建一个指向节点的数组
|
||
|
|
Node** nodeArr = (Node**)malloc(size * sizeof(Node*));
|
||
|
|
// 将每个字符及其权重转化为节点并存入数组
|
||
|
|
for (int i = 0; i < size; ++i) {
|
||
|
|
nodeArr[i] = newNode(data[i], weight[i]);
|
||
|
|
}
|
||
|
|
// 使用 qsort 按照权重排序节点
|
||
|
|
qsort(nodeArr, size, sizeof(Node*), compare);
|
||
|
|
// 构建 Huffman 树
|
||
|
|
while (size > 1) {
|
||
|
|
// 取出两个最小的节点
|
||
|
|
Node* left = nodeArr[0];
|
||
|
|
Node* right = nodeArr[1];
|
||
|
|
// 创建一个新的父节点,权重为两个子节点的权重之和
|
||
|
|
Node* parent = newNode('\0', left->weight + right->weight);
|
||
|
|
parent->left = left;
|
||
|
|
parent->right = right;
|
||
|
|
// 将父节点插入到 heap 中
|
||
|
|
nodeArr[0] = parent;
|
||
|
|
nodeArr[1] = nodeArr[size - 1]; // 将最后一个节点放到第二个位置
|
||
|
|
size--;
|
||
|
|
// 重新排序数组
|
||
|
|
qsort(nodeArr, size, sizeof(Node*), compare);
|
||
|
|
}
|
||
|
|
Node* root = nodeArr[0];
|
||
|
|
free(nodeArr);
|
||
|
|
return root;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 打印 Huffman 编码
|
||
|
|
void printHuffmanCodes(Node* root, int arr[], int top, char* codes[]) {
|
||
|
|
if (root->left) {
|
||
|
|
arr[top] = 0;
|
||
|
|
printHuffmanCodes(root->left, arr, top + 1, codes);
|
||
|
|
}
|
||
|
|
|
||
|
|
if (root->right) {
|
||
|
|
arr[top] = 1;
|
||
|
|
printHuffmanCodes(root->right, arr, top + 1, codes);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 如果是叶节点,打印字符和对应的编码
|
||
|
|
if (!root->left && !root->right) {
|
||
|
|
codes[root->data] = (char*)malloc((top + 1) * sizeof(char));
|
||
|
|
if (!codes[root->data]) {
|
||
|
|
fprintf(stderr, "Memory allocation failed\n");
|
||
|
|
exit(1);
|
||
|
|
}
|
||
|
|
for (int i = 0; i < top; ++i)
|
||
|
|
codes[root->data][i] = '0' + arr[i];
|
||
|
|
codes[root->data][top] = '\0';
|
||
|
|
printf("%c: %s\n", root->data, codes[root->data]);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// 获取字符的 Huffman 编码
|
||
|
|
const char* getHuffmanCode(Node* root, char ch, int arr[], int top, char* codes[]) {
|
||
|
|
if (codes[ch])
|
||
|
|
return codes[ch];
|
||
|
|
|
||
|
|
if (root->left) {
|
||
|
|
arr[top] = 0;
|
||
|
|
const char* code = getHuffmanCode(root->left, ch, arr, top + 1, codes);
|
||
|
|
if (code)
|
||
|
|
return code;
|
||
|
|
}
|
||
|
|
|
||
|
|
if (root->right) {
|
||
|
|
arr[top] = 1;
|
||
|
|
const char* code = getHuffmanCode(root->right, ch, arr, top + 1, codes);
|
||
|
|
if (code)
|
||
|
|
return code;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 如果是叶节点,检查是否是我们要找的字符
|
||
|
|
if (!root->left && !root->right && root->data == ch) {
|
||
|
|
codes[root->data] = (char*)malloc((top + 1) * sizeof(char));
|
||
|
|
if (!codes[root->data]) {
|
||
|
|
fprintf(stderr, "Memory allocation failed\n");
|
||
|
|
exit(1);
|
||
|
|
}
|
||
|
|
for (int i = 0; i < top; ++i)
|
||
|
|
codes[root->data][i] = '0' + arr[i];
|
||
|
|
codes[root->data][top] = '\0';
|
||
|
|
return codes[root->data];
|
||
|
|
}
|
||
|
|
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 编码函数:使用 Huffman 树编码文本
|
||
|
|
void encode(Node* root, const char* str, char* encodedStr, char* codes[]) {
|
||
|
|
int arr[MAX_TREE_HT], top = 0;
|
||
|
|
printf("Huffman Codes:\n");
|
||
|
|
printHuffmanCodes(root, arr, top, codes);
|
||
|
|
|
||
|
|
printf("\nEncoded Text: ");
|
||
|
|
for (int i = 0; str[i] != '\0'; i++) {
|
||
|
|
const char* code = getHuffmanCode(root, str[i], arr, 0, codes);
|
||
|
|
if (code) {
|
||
|
|
printf("%s", code);
|
||
|
|
strcat(encodedStr, code); // 将每个字符的编码拼接到最终的编码字符串中
|
||
|
|
} else {
|
||
|
|
fprintf(stderr, "Character '%c' not found in Huffman tree\n", str[i]);
|
||
|
|
exit(1);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
printf("\n");
|
||
|
|
}
|
||
|
|
|
||
|
|
// 解码函数:从 Huffman 树解码编码文本
|
||
|
|
void decode(Node* root, const char* encodedStr) {
|
||
|
|
Node* current = root; // 从根节点开始
|
||
|
|
printf("\nDecoded Text: ");
|
||
|
|
|
||
|
|
for (int i = 0; encodedStr[i] != '\0'; i++) {
|
||
|
|
// 根据编码字符串的每个字符决定树的遍历方向
|
||
|
|
if (encodedStr[i] == '0') {
|
||
|
|
current = current->left; // 向左子节点移动
|
||
|
|
} else if (encodedStr[i] == '1') {
|
||
|
|
current = current->right; // 向右子节点移动
|
||
|
|
}
|
||
|
|
|
||
|
|
// 如果到达叶节点,输出字符并返回根节点
|
||
|
|
if (!current->left && !current->right) {
|
||
|
|
printf("%c", current->data);
|
||
|
|
current = root; // 重置为根节点,准备解码下一个字符
|
||
|
|
}
|
||
|
|
}
|
||
|
|
printf("\n");
|
||
|
|
}
|
||
|
|
|
||
|
|
// 释放 Huffman 树的内存
|
||
|
|
void freeHuffmanTree(Node* node) {
|
||
|
|
if (node == NULL)
|
||
|
|
return;
|
||
|
|
|
||
|
|
freeHuffmanTree(node->left);
|
||
|
|
freeHuffmanTree(node->right);
|
||
|
|
free(node);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 释放 Huffman 编码的内存
|
||
|
|
void freeHuffmanCodes(char* codes[]) {
|
||
|
|
for (int i = 0; i < 256; i++) {
|
||
|
|
if (codes[i]) {
|
||
|
|
free(codes[i]);
|
||
|
|
codes[i] = NULL;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
int main() {
|
||
|
|
const char* text = "hello huffman coding";
|
||
|
|
|
||
|
|
// 计算每个字符的频率
|
||
|
|
int freq[256] = {0};
|
||
|
|
for (int i = 0; text[i] != '\0'; i++) {
|
||
|
|
freq[(unsigned char)text[i]]++;
|
||
|
|
}
|
||
|
|
|
||
|
|
// 构建字符和频率的数组
|
||
|
|
char data[256];
|
||
|
|
int frequencies[256];
|
||
|
|
int size = 0;
|
||
|
|
for (int i = 0; i < 256; i++) {
|
||
|
|
if (freq[i] > 0) {
|
||
|
|
data[size] = (char)i;
|
||
|
|
frequencies[size] = freq[i];
|
||
|
|
size++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// 构建 Huffman 树
|
||
|
|
Node* root = buildHuffmanTree(data, frequencies, size);
|
||
|
|
|
||
|
|
// 初始化 Huffman 编码数组
|
||
|
|
char* codes[256] = {NULL};
|
||
|
|
|
||
|
|
// 编码
|
||
|
|
char encodedText[MAX_TREE_HT * strlen(text)];
|
||
|
|
memset(encodedText, 0, sizeof(encodedText));
|
||
|
|
encode(root, text, encodedText, codes);
|
||
|
|
|
||
|
|
// 解码
|
||
|
|
decode(root, encodedText);
|
||
|
|
|
||
|
|
// 释放 Huffman 树的内存
|
||
|
|
freeHuffmanTree(root);
|
||
|
|
|
||
|
|
// 释放 Huffman 编码的内存
|
||
|
|
freeHuffmanCodes(codes);
|
||
|
|
|
||
|
|
return 0;
|
||
|
|
}
|