/**************************************************************
LZARI.C -- A Data Compression Program
(tab = 4 spaces)
***************************************************************
4/7/1989 Haruhiko Okumura
Use, distribute, and modify this program freely.
Please send me your improved versions.
PC-VAN SCIENCE
NIFTY-Serve PAF01022
CompuServe 74050,1022
**************************************************************/
/********************************************************************
lzari.cpp -- A Data Compression Class
created: 2004/10/04
created: 4:10:2004 16:44
file base: lzari
file ext: cpp
author: 阙荣文 (querw@sina.com)
purpose: 如上所述,lzari.c提供了lzari压缩算法的实现,基于lzari.c我把它
做成了一个c++类方便使用
*********************************************************************/
#include "StdAfx.h"
//#include <stdio.h>
//#include <stdlib.h>
//#include <string.h>
//#include <ctype.h>
#include "Lzari.h"
LZARI::LZARI()
{
infile = NULL;
outfile = NULL;
textsize = 0;
codesize = 0;
printcount = 0;
low = 0;
high = Q4;
value = 0;
shifts = 0;/* counts for magnifying low and high around Q2 */
m_bMem = FALSE;
m_pInBuffer = NULL;
m_nInLength = 0;
m_nInCur = 0;
//m_pOutBuffer = NULL;
m_nOutLength = 0;
// m_nOutCur = 0;
buffer_putbit = 0;
mask_putbit = 128;
buffer_getbit = 0;
mask_getbit = 0;
}
LZARI::~LZARI()
{
Release();
}
void LZARI::Error(char *message)
{
#ifdef _OUTPUT_STATUS
printf("\n%s\n", message);
#endif
//exit(EXIT_FAILURE);
int e = 1;
throw e;
}
void LZARI::PutBit(int bit) /* Output one bit (bit = 0,1) */
{
if (bit) buffer_putbit |= mask_putbit;
if ((mask_putbit >>= 1) == 0)
{
if (!m_bMem)
{
if (putc(buffer_putbit, outfile) == EOF) Error("Write Error");
}
else
{
//if (m_nOutCur == m_nOutLength) Error("Write Error");
//m_pOutBuffer[m_nOutCur++] = buffer;
m_OutBuffer.push_back(buffer_putbit);
//m_nOutCur++;
}
buffer_putbit = 0;
mask_putbit = 128;
codesize++;
}
}
void LZARI::FlushBitBuffer(void) /* Send remaining bits */
{
int i;
for (i = 0; i < 7; i++) PutBit(0);
}
int LZARI::GetBit(void) /* Get one bit (0 or 1) */
{
if ((mask_getbit >>= 1) == 0)
{
if (!m_bMem)
buffer_getbit = getc(infile);
else
buffer_getbit = m_pInBuffer[m_nInCur++];
mask_getbit = 128;
}
return ((buffer_getbit & mask_getbit) != 0);
}
/********** LZSS with multiple binary trees **********/
void LZARI::InitTree(void) /* Initialize trees */
{
int i;
/* For i = 0 to N - 1, rson[i] and lson[i] will be the right and
left children of node i. These nodes need not be initialized.
Also, dad[i] is the parent of node i. These are initialized to
NIL (= N), which stands for 'not used.'
For i = 0 to 255, rson[N + i + 1] is the root of the tree
for strings that begin with character i. These are initialized
to NIL. Note there are 256 trees. */
for (i = N + 1; i <= N + 256; i++) rson[i] = NIL; /* root */
for (i = 0; i < N; i++) dad[i] = NIL; /* node */
}
void LZARI::InsertNode(int r)
/* Inserts string of length F, text_buf[r..r+F-1], into one of the
trees (text_buf[r]'th tree) and returns the longest-match position
and length via the global variables match_position and match_length.
If match_length = F, then removes the old node in favor of the new
one, because the old one will be deleted sooner.
Note r plays double role, as tree node and position in buffer. */
{
int i, p, cmp, temp;
unsigned char *key;
cmp = 1; key = &text_buf[r]; p = N + 1 + key[0];
rson[r] = lson[r] = NIL; match_length = 0;
for ( ; ; )
{
if (cmp >= 0)
{
if (rson[p] != NIL) p = rson[p];
else { rson[p] = r; dad[r] = p; return; }
} else
{
if (lson[p] != NIL) p = lson[p];
else { lson[p] = r; dad[r] = p; return; }
}
for (i = 1; i < F; i++)
if ((cmp = key[i] - text_buf[p + i]) != 0) break;
if (i > THRESHOLD)
{
if (i > match_length)
{
match_position = (r - p) & (N - 1);
if ((match_length = i) >= F) break;
} else if (i == match_length)
{
if ((temp = (r - p) & (N - 1)) < match_position)
match_position = temp;
}
}
}
dad[r] = dad[p]; lson[r] = lson[p]; rson[r] = rson[p];
dad[lson[p]] = r; dad[rson[p]] = r;
if (rson[dad[p]] == p) rson[dad[p]] = r;
else lson[dad[p]] = r;
dad[p] = NIL; /* remove p */
}
void LZARI::DeleteNode(int p) /* Delete node p from tree */
{
int q;
if (dad[p] == NIL) return; /* not in tree */
if (rson[p] == NIL) q = lson[p];
else if (lson[p] == NIL) q = rson[p];
else
{
q = lson[p];
if (rson[q] != NIL)
{
do { q = rson[q]; } while (rson[q] != NIL);
rson[dad[q]] = lson[q]; dad[lson[q]] = dad[q];
lson[q] = lson[p]; dad[lson[p]] = q;
}
rson[q] = rson[p]; dad[rson[p]] = q;
}
dad[q] = dad[p];
if (rson[dad[p]] == p) rson[dad[p]] = q;
else lson[dad[p]] = q;
dad[p] = NIL;
}
/********** Arithmetic Compression **********/
/* If you are not familiar with arithmetic compression, you should read
I. E. Witten, R. M. Neal, and J. G. Cleary,
Communications of the ACM, Vol. 30, pp. 520-540 (1987),
from which much have been borrowed. */
/* character code = 0, 1, ..., N_CHAR - 1 */
void LZARI::StartModel(void) /* Initialize model */
{
int ch, sym, i;
sym_cum[N_CHAR] = 0;
for (sym = N_CHAR; sym >= 1; sym--)
{
ch = sym - 1;
char_to_sym[ch] = sym; sym_to_char[sym] = ch;
sym_freq[sym] = 1;
sym_cum[sym - 1] = sym_cum[sym] + sym_freq[sym];
}
sym_freq[0] = 0; /* sentinel (!= sym_freq[1]) */
position_cum[N] = 0;
for (i = N; i >= 1; i--)
position_cum[i - 1] = position_cum[i] + 10000 / (i + 200);
/* empirical distribution function (quite tentative) */
/* Please devise a better mechanism! */
}
void LZARI::UpdateModel(int sym)
{
int i, c, ch_i, ch_sym;
if (sym_cum[0] >= MAX_CUM)
{
c = 0;
for (i = N_CHAR; i > 0; i--)
{
sym_cum[i] = c;
c += (sym_freq[i] = (sym_freq[i] + 1) >> 1);
}
sym_cum[0] = c;
}
for (i = sym; sym_freq[i] == sym_freq[i - 1]; i--) ;
if (i < sym)
{
ch_i = sym_to_char[i]; ch_sym = sym_to_char[sym];
sym_to_char[i] = ch_sym; sym_to_char[sym] = ch_i;
char_to_sym[ch_i] = sym; char_to_sym[ch_sym] = i;
}
sym_freq[i]++;
while (--i >= 0) sym_cum[i]++;
}
void LZARI::Output(int bit) /* Output 1 bit, followed by its complements */
{
PutBit(bit);
for ( ; shifts > 0; shifts--) PutBit(! bit);
}
void LZARI::EncodeChar(int ch)
{
int sym;
unsigned long int range;
sym = char_to_sym[ch];
range = high - low;
high = low + (range * sym_cum[sym - 1]) / sym_cum[0];
low += (range * sym_cum[sym ]) / sym_cum[0];
for ( ; ; )
{
if (high <= Q2) Output(0);
else if (low >= Q2)
{
Output(1); low -= Q2; high -= Q2;
}
else if (low >= Q1 && high <= Q3)
{
shifts++; low -= Q1; high -= Q1;
}
else break;
low += low;
high += high;
}
UpdateModel(sym);
}
void LZARI::EncodePosition(int position)
{
unsigned long int range;
range = high - low;
high = low + (range * position_cum[position ]) / position_cum[0];
low += (range * position_cum[position + 1]) / position_cum[0];
for ( ; ; )
{
if (high <= Q2) Output(0);
else if (low >= Q2)
{
Output(1); low -= Q2; high -= Q2;
}
else if (low >= Q1 && high <= Q3)
{
shifts++; low -= Q1; high -= Q1;
}
else break;
low += low;
high += high;
}
}
void LZARI::EncodeEnd(void)
{
shifts++;
if (low < Q1) Output(0); else Output(1);
FlushBitBuffer(); /* flush bits remaining in buffer */
}
int LZARI::BinarySearchSym(unsigned int x)
/* 1 if x >= sym_cum[