#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
// 字符频度结点
typedef struct {
unsigned char uch;
unsigned long weight;
} TmpNode;
// 哈夫曼树结点
typedef struct {
unsigned char uch;
unsigned long weight;
char *code;
int parent, lchild, rchild;
} HufNode, *HufTree;
// 选择最小和次小的两个结点的函数
void SelectMinTwo(HufNode *huf_tree, unsigned int n, int *s1, int *s2){
// 找最小
unsigned int i;
unsigned long min = ULONG_MAX;
for(i = 0; i < n; ++i)
if(huf_tree[i].parent == 0 && huf_tree[i].weight < min){
min = huf_tree[i].weight;
*s1 = i;
}
huf_tree[*s1].parent=1; // 标记此结点已被选中
// 找次小
min=ULONG_MAX;
for(i = 0; i < n; ++i)
if(huf_tree[i].parent == 0 && huf_tree[i].weight < min){
min = huf_tree[i].weight;
*s2 = i;
}
}
// 建立哈夫曼树
void CreateTree(HufNode *huf_tree, unsigned int char_kinds, unsigned int node_num){
unsigned int i;
int s1, s2;
for(i = char_kinds; i < node_num; ++i) {
SelectMinTwo(huf_tree, i, &s1, &s2); // 选择最小的两个结点
huf_tree[s1].parent = huf_tree[s2].parent = i;
huf_tree[i].lchild = s1;
huf_tree[i].rchild = s2;
huf_tree[i].weight = huf_tree[s1].weight + huf_tree[s2].weight;
}
}
// 生成哈夫曼编码
void HufCode(HufNode *huf_tree, unsigned char_kinds){
unsigned int i;
int cur, next, index;
char *code_tmp = (char *)malloc(256*sizeof(char));
code_tmp[256-1] = '\0';
for(i = 0; i < char_kinds; ++i) {
index = 256-1;
for(cur = i, next = huf_tree[i].parent; next != 0;
cur = next, next = huf_tree[next].parent)
if(huf_tree[next].lchild == cur)
code_tmp[--index] = '0'; // 左‘0’
else
code_tmp[--index] = '1'; // 右‘1’
huf_tree[i].code = (char *)malloc((256-index)*sizeof(char));
strcpy(huf_tree[i].code, &code_tmp[index]);
}
free(code_tmp);
}
// 压缩函数
int compress(char *ifname, char *ofname){
unsigned int i, j;
unsigned int char_kinds;
unsigned char char_temp; // 暂存8bits字符
unsigned long file_len = 0;
FILE *infile, *outfile;
TmpNode node_temp;
unsigned int node_num;
HufTree huf_tree;
char code_buf[256] = "\0";
unsigned int code_len;
TmpNode *tmp_nodes =(TmpNode *)malloc(256*sizeof(TmpNode));
// 初始化暂存结点
for(i = 0; i < 256; ++i){
tmp_nodes[i].weight = 0;
tmp_nodes[i].uch = (unsigned char)i; // 数组的256个下标与256种字符对应
}
infile = fopen(ifname, "rb");
if (infile == NULL)
return -1;
fread((char *)&char_temp, sizeof(unsigned char), 1, infile); // 读入一个字符
while(!feof(infile)){
++tmp_nodes[char_temp].weight; // 统计下标对应字符的权重,利用数组的随机访问快速统计字符频度
++file_len;
fread((char *)&char_temp, sizeof(unsigned char), 1, infile); // 读入一个字符
}
fclose(infile);
// 排序,将频度为零的放最后
for(i = 0; i < 256-1; ++i)
for(j = i+1; j < 256; ++j)
if(tmp_nodes[i].weight < tmp_nodes[j].weight){
node_temp = tmp_nodes[i];
tmp_nodes[i] = tmp_nodes[j];
tmp_nodes[j] = node_temp;
}
// 统计实际的字符种类(出现次数不为0)
for(i = 0; i < 256; ++i)
if(tmp_nodes[i].weight == 0)
break;
char_kinds = i;
if (char_kinds == 1){
outfile = fopen(ofname, "wb");
fwrite((char *)&char_kinds, sizeof(unsigned int), 1, outfile); // 写入字符种类
fwrite((char *)&tmp_nodes[0].uch, sizeof(unsigned char), 1, outfile); // 写入唯一的字符
fwrite((char *)&tmp_nodes[0].weight, sizeof(unsigned long), 1, outfile); // 写入字符频度
free(tmp_nodes);
fclose(outfile);
}
else{
node_num = 2 * char_kinds - 1;
huf_tree = (HufNode *)malloc(node_num*sizeof(HufNode));
for(i = 0; i < char_kinds; ++i) {
huf_tree[i].uch = tmp_nodes[i].uch;
huf_tree[i].weight = tmp_nodes[i].weight;
huf_tree[i].parent = 0;
}
free(tmp_nodes);
for(; i < node_num; ++i)
huf_tree[i].parent = 0;
CreateTree(huf_tree, char_kinds, node_num);
HufCode(huf_tree, char_kinds);
outfile = fopen(ofname, "wb");
fwrite((char *)&char_kinds, sizeof(unsigned int), 1, outfile); // 写入字符种类
for(i = 0; i < char_kinds; ++i){
fwrite((char *)&huf_tree[i].uch, sizeof(unsigned char), 1, outfile); // 写入字符
fwrite((char *)&huf_tree[i].weight, sizeof(unsigned long), 1, outfile); // 写入字符对应权重
}
fwrite((char *)&file_len, sizeof(unsigned long), 1, outfile); // 写入文件长度
infile = fopen(ifname, "rb");
fread((char *)&char_temp, sizeof(unsigned char), 1, infile);
while(!feof(infile)){
for(i = 0; i < char_kinds; ++i)
if(char_temp == huf_tree[i].uch)
strcat(code_buf, huf_tree[i].code);
while(strlen(code_buf) >= 8){
char_temp = '\0'; // 清空字符暂存空间,改为暂存字符对应编码
for(i = 0; i < 8; ++i){
char_temp <<= 1;
if(code_buf[i] == '1')
char_temp |= 1;
}
fwrite((char *)&char_temp, sizeof(unsigned char), 1, outfile); // 将字节对应编码存入文件
strcpy(code_buf, code_buf+8); // 编码缓存去除已处理的前八位
}
fread((char *)&char_temp, sizeof(unsigned char), 1, infile);
}
// 处理最后不足8bits编码
code_len = strlen(code_buf);
if(code_len > 0){
char_temp = '\0';
for(i = 0; i < code_len; ++i){
char_temp <<= 1;
if(code_buf[i] == '1')
char_temp |= 1;
}
char_temp <<= 8-code_len; // 将编码字段从尾部移到字节的高位
fwrite((char *)&char_temp, sizeof(unsigned char), 1, outfile); // 存入最后一个字节
}
fclose(infile);
fclose(outfile);
for(i = 0; i < char_kinds; ++i)
free(huf_tree[i].code);
free(huf_tree);
}
}
// 解压函数
int extract(char *ifname, char *ofname){
unsigned int i;
unsigned long file_len;
unsigned long writen_len = 0;
FILE *infile, *outfile;
unsigned int char_kinds;
unsigned int node_num;
HufTree huf_tree;
unsigned char code_temp;
unsigned int root;
infile = fopen(ifname, "rb");
if (infile == NULL)
return -1;
// 读取压缩文件前端的字符及对应编码,用于重建哈夫曼树
fread((char *)&char_kinds, sizeof(unsigned int), 1, infile); // 读取字符种类数
if (char_kinds == 1){
fread((char *)&code_temp, sizeof(unsigned char), 1, infile); // 读取唯一的字符
fread((char *)&file_len, sizeof(unsigned long), 1, infile); // 读取文件长度
outfile = fopen(ofname, "wb"); // 打开压缩后将生成的文件
while (file_len--)
fwrite((char *)&code_temp, sizeof(unsigned char), 1, outfile);
fclose(infile);
fclose(outfile);
}
else{
node_num = 2 * char_kinds - 1; // 根据字符种类数,计算建立哈夫曼树所需结点数
huf_tree = (HufNode *)malloc(node_num*sizeof(HufNode));
// �