%%% 下面用MATLAB实现哈夫曼编码的例程(以子函数形式给出):
clear all;
fprintf('Reading data ....')
data=imread('E:\temp\originalimage(holl).bmp')
data=uint8(data);% 读入数据,并将数据限制为unit8型
fprintf('Done!\n')
fprintf('Comqressing data...')
[zipped,info]=norm2huff(data); %进行压缩编码
fprintf('Done!\n')
fprintf('Decomqressing data...')
unzipped=huff2norm(zipped,info);% 进行解压缩
fprintf('Done!\n')
isOK=isequal(data(:),unzipped(:)) %显示压缩效果
whos data zipped unzipped
%%%%%%%%%% norm2huff %%%%%%%%%%
% NORM2HUFF 哈夫曼编码器
% 对于输入向量,NORM2HUFF(X) 返回向量的哈夫曼编码后的码串
% 矩阵用X(:)形式输入
% 输入限制为 unit8格式,输出为unit8的序列
% [….,info] 返回解码需要的解码信息
% INFO.pad =最后添加的比特数
% INFO.huffcodes=Huffman码字
% INFO.ratio=压缩率
% INFO.length=原始数据长度
% INFO.maxcodelen=最大码长
% function [zipped, info] = norm2huff(vector)
if ~isa(vector,'unit8'),
error ('input argument must be a unit8 vector');
end %保证输入为unit8的数据
vector = vector(:)'; %将输入向量转换为行向量
f=frequency(vector); %计算个元素出现的概率
simbols=find(f~=0); %寻找数出现的所有元素
f=f(simbols);
[f,sortindex]=sort(f); %将元素按照出现概率排列
simbols=simbols(sortindex);
len=length(simbols);
simbols_index=num2cell(1:len);
codeword_tmp=cell(len,1);
while length(f)>1,
index1=simbols_index{1};
index2=simbols_index{2};
codeword_tmp(index1)=addnode(codeword_tmp(index1),unit8(0));
codeword_tmp(index2)=addnode(codeword_tmp(index1),unit8(1));
f=[sum(f(1:2)) f(3:end)];
simbol_index=[{[index1 index2]} simbols_index(3:end)];
%将数据重新排列,是两个节点的频率尽量与前一个节点的频率相当
[f,sortindex]=sort(f);
simbols_index=simbols(sortindex);
end %对应相应的元素和码字
codeword=cell(256,1);
code(simbols)=codeword_tmp; %计算总的字符串长度
len=0;
for index=1:length(vector),
len=len+length(codeword{double(vector(index))+1});
end %产生0 1序列
string=repmat(unit8(0),1,len);
pointer=1;
for index=1:length(vector),
code = codeword{double(vector(index))+1};
len=length(code);
string(pointer+(0:len-1))=code;
poiter=pointer+len;
end
%如果需要的话加零
len=length(string);
pad=8-mod(len,8);
if pad>0,
string =[string unit8(zeros(1,pad))];
end
%保存实际有用的码字
codewod=codeword(simbols);
codelen=zeros(size(codeword));
weights=2.^(0:23);
maxcodelen=0;
for index=1:length(codeword),
len=length(codeword{index});
if len>maxcodelen,
maxcodelen=len;
end
if len>0,
code=sum(weights(codeword{index}==1));
code=bitset(code,len+1);
codeword(index)=code;
codelen(index)=len;
end
end
codeword=[codeword{:}];
%计算压缩后的向量
cols=length(string)/8;
string=reshape(string,8,cols);
weights=2.^(0:7);
zipped=uni8(weights*double(string));
%存储数据到一个稀疏矩阵
huffcodes=sparse(1,1);
for index=1:numel(codeword),
huffcodes(codeword(index),1)=simbols(index);
end
%产生信息结构体
info.pad=pad;
info.huffcodes=huffcodes;
info.rato=cols./length(vector);
info.length=length(vector);
info.maxcodelen=maxcodelen;
%%%%%%%%%% addnode %%%%%%%%%%%%
function codeword_new=addnode(codeword_old,item)
codewod_new=cell(size(codeword_old)),
codeword_new{index}=[item codeword_old{index}];
%%%%%%%% huff2norm %%%%%%%%%%%%
function vector =huff2norm(zipped,info)
% HUFF2NORM 哈夫曼解码器
if ~isa(zipped,'unit8'),
error('input argument must be a unit vector')
end %产生01 序列
len=length(zipped );
string=repmat(unit8(0),1,len.*8);
bitindex=1:8;
for index =1:len,
string(bitindex+8.*(index-1)) ,
unit8(bitget(zipped(index),bitindex));
end %调整字符串
string=logical(string(:)');
len=length(string);
string((len-info.pad.l+1):end)=[ ];
len=length(string); %解码
weights=2>(0:51);
vector=repmat(unit8(0),1,info.length);
vectorindex=1;
codeindex=1;
code=0;
for index=1:len,
code=bitset(code,codeindex,string(index));
codeindex=codeindex+1;
byte=decode(bitset(code,codeindex),info);
if byte>0,
vector(vectorindex)=byte-1;
codeindex=1;
code=0;
vectorindex=vectorindex+1;
end
end
%%%%%%%%%%%%% decode %%%%%%%%%%
function byte=decode(code,info)
byte=info.huffcode(code);
% %%%%%%%%%% frequence %%%%%%%%%%
function f=frequency(vector)
% FREQUENCY 计算元素出现概率
if ~isa(vector,'unit8'),
error('input argument must be a unit8 vector ')
end
f=repmat(0,1,256); %扫描向量
len=length(vector);
for index=1:255,
f(index+1)=sum(vector==unit8(index));
end %归一化
f=f./len;
% 运行上述程序,得到结果为:
% Name Size Bytes Class
% data 256*256 65535 unit8 arrray
% unzipped 1*65535 65535 unit8 arrray
% zipped 1*57712 57712 unit8 arrray
% Grand total is 188784 elements using 188784 bytes
% 其中压缩的信息结构体info 为:
% pad:7
% huffcodes:[108471*1 double]
% ratio:0.8806
% length:65535
% maxcodelen:16