/*******************************************************************************
* *
* HUF.C by Shaun Case April 1991 *
* *
* Written in Borland C++ 2.0 under MS-DOS 3.3 *
* *
* Uses Huffman encoding to compress a single file. *
* This program is in the public domain. *
* *
* atman%ecst.csuchico.edu@RELAY.CS.NET *
* *
* *
*******************************************************************************/
#include <stdio.h>
#include <math.h>
#include <dir.h> /* for storing filename in output file */
#define FALSE 0 /* i'm still deciding on a style... */
#define TRUE !FALSE
/*
* for lots of interesting (?) diagnostics, uncomment the following:
#define VERBOSE
*
*/
typedef struct chardata { /* template for leaves and nodes in tree */
short charnum; /* which character to be encoded */
/* don't want it lost in the sort */
unsigned long total; /* total occurances in the file */
short seq_length; /* length of this char's huffman code (bits)*/
short bit_sequence; /* the huffman code sequence for this char */
double frequency; /* frequency of occurance, < 1.0 */
struct chardata *up, *left, *right; /* pointers to rest of tree */
};
typedef struct decode_table_element { /* template for decode table element (wow) */
unsigned char letter; /* which character to decode to */
char spare; /* force 16-bit word alignment */
short left; /* index of lower left element from tree */
short right; /* index of lower right element from tree */
};
int compare(const void*, const void*); /* prototype for compare() for qsort() */
struct chardata *huftable[256]; /* array that points to leaves in tree */
struct chardata *root=NULL; /* future root of tree */
struct decode_table_element *decode_table; /* pointer to decode table */
short array_max_index=0; /* max number of elements in array (to be */
/* determined in create_decode_table() */
long real_bit_total=0L;
double avg_bits_per_symbol=0; /* averages num of bits needed to represent */
unsigned long total; /* total number of unencoded bytes */
long real_bits_total = 0L;
FILE *infile; /* file ptr to original file (uncompressed) */
FILE *outfile; /* file ptr to output fiel (compressed) */
char *infile_name; /* ptr to name of input file */
char *outfile_name; /* ptr to name of output file */
int main(int argc, char **argv)
{
#ifdef VERBOSE
void show_encode_info(void); /* prototype */
void show_encoded_file_size(void); /* prototype */
void show_decode_table(void); /* prototype */
#endif
void dumptable(void); /* prototype */
short create_tree(void); /* prototype */
void gen_bit_sequences(struct chardata *node); /* prototype */
short create_decode_table(void); /* prototype */
short compress(void); /* prototype */
register short c; /* a character */
if (argc != 3) { /* check command line arguments */
puts("'huf file1 file2' encodes file1 into file2.");
return 1;
}
puts("Huf by Shaun Case, 1991, Public Domain");
infile_name = argv[1];
outfile_name = argv[2];
puts("Analyzing data.");
for (c=0; c < 256; c++) /* initialize decode table */
{
if ((huftable[c] = (struct chardata *)malloc(sizeof (struct chardata)))== NULL)
{
printf("Unable to allocate space for %dth huftable node.",c);
return 1;
}
huftable[c]->charnum = c; /* need to know who we are after qsort() gets done with us */
huftable[c]->total = 0L;
huftable[c]->frequency = 0;
huftable[c]->up = NULL;
huftable[c]->left = NULL;
huftable[c]->right = NULL;
}
if ((infile=fopen(infile_name, "rb")) == NULL) /* open the input file */
{
printf("Unable to open %s.\n", infile_name);
return 1;
}
while (!feof(infile)) /* get character distribution data */
{
c = fgetc(infile);
if (feof(infile))
continue;
++total;
++huftable[c]->total; /* increment total for char c */
}
if (total == 0L) /* handle empty file */
{
puts("Input file is empty, aborting.");
return 0;
}
fclose(infile);
/* sort according to frequency of occurance */
qsort((void *)huftable, (size_t)256, (size_t)sizeof(struct chardata *),
compare);
dumptable(); /* fill huftable with distribution freqs */
if (create_tree() != TRUE) /* make the huffman tree (bit sequences) */
return 1;
#ifdef VERBOSE
printf("\nFreq of root is %f.\n",root->frequency); /* this should be 1.0 if all went well */
puts("\nBit sequences:\n\n");
#endif
avg_bits_per_symbol = 0;
gen_bit_sequences(root); /* build bit sequences, stuff seqs & */
/* lengths into associated leaves */
#ifdef VERBOSE
printf("\n\nActual bits per symbol = %f", avg_bits_per_symbol);
printf("\nActual final data size w/o table should be %f\n",
avg_bits_per_symbol * (double)total / ((double) 8));
show_encoded_file_size();
#endif
puts("Building decode table...");
if (create_decode_table() != TRUE) /* create decode array from tree */
{
puts("Unable to allocate space for decode table, exiting.");
return 1;
}
printf("Decode table built, contains %d elements.\n",array_max_index + 1);
#ifdef VERBOSE
show_decode_table();
show_encode_info();
#endif
if (compress() != 0) /* now that we have all necessary info, */
return 1; /* perform the compression. */
puts("Done.");
return 0;
}
/*****************************************************************************
* this code is going to be a little strange -- we have an array of items *
* that are going to be the leaves of the tree, and we have to go upward *
* linking them together t