/* trees.c -- output deflated data using Huffman coding
* Copyright (C) 1992-1993 Jean-loup Gailly
* This is free software; you can redistribute it and/or modify it under the
* terms of the GNU General Public License, see the file COPYING.
*/
/*
* PURPOSE
*
* Encode various sets of source values using variable-length
* binary code trees.
*
* DISCUSSION
*
* The PKZIP "deflation" process uses several Huffman trees. The more
* common source values are represented by shorter bit sequences.
*
* Each code tree is stored in the ZIP file in a compressed form
* which is itself a Huffman encoding of the lengths of
* all the code strings (in ascending order by source values).
* The actual code strings are reconstructed from the lengths in
* the UNZIP process, as described in the "application note"
* (APPNOTE.TXT) distributed as part of PKWARE's PKZIP program.
*
* REFERENCES
*
* Lynch, Thomas J.
* Data Compression: Techniques and Applications, pp. 53-55.
* Lifetime Learning Publications, 1985. ISBN 0-534-03418-7.
*
* Storer, James A.
* Data Compression: Methods and Theory, pp. 49-50.
* Computer Science Press, 1988. ISBN 0-7167-8156-5.
*
* Sedgewick, R.
* Algorithms, p290.
* Addison-Wesley, 1983. ISBN 0-201-06672-6.
*
* INTERFACE
*
* void ct_init (ush *attr, int *methodp)
* Allocate the match buffer, initialize the various tables and save
* the location of the internal file attribute (ascii/binary) and
* method (DEFLATE/STORE)
*
* void ct_tally (int dist, int lc);
* Save the match info and tally the frequency counts.
*
* long flush_block (char *buf, ulg stored_len, int eof)
* Determine the best encoding for the current block: dynamic trees,
* static trees or store, and output the encoded block to the zip
* file. Returns the total compressed length for the file so far.
*
*/
#include "gzip.h"
/* ===========================================================================
* Constants
*/
/* All codes must not exceed MAX_BITS bits */
#define MAX_BITS 15
/* Bit length codes must not exceed MAX_BL_BITS bits */
#define MAX_BL_BITS 7
/* number of length codes, not counting the special END_BLOCK code */
#define LENGTH_CODES 29
/* number of literal bytes 0..255 */
#define LITERALS 256
/* end of block literal code */
#define END_BLOCK 256
/* number of Literal or Length codes, including the END_BLOCK code */
#define L_CODES (LITERALS+1+LENGTH_CODES)
/* number of distance codes */
#define D_CODES 30
/* number of codes used to transfer the bit lengths */
#define BL_CODES 19
local int near extra_lbits[LENGTH_CODES]
/* extra bits for each length code */
= {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0};
local int near extra_dbits[D_CODES]
/* extra bits for each distance code */
= {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13};
local int near extra_blbits[BL_CODES]
/* extra bits for each bit length code */
= {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7};
#define STORED_BLOCK 0
/* The three kinds of block type */
#define STATIC_TREES 1
/* The three kinds of block type */
#define DYN_TREES 2
#define LIT_BUFSIZE 0x2000
#ifndef DIST_BUFSIZE
#define DIST_BUFSIZE LIT_BUFSIZE
#endif
/* Sizes of match buffers for literals/lengths and distances. There are
* 4 reasons for limiting LIT_BUFSIZE to 64K:
* - frequencies can be kept in 16 bit counters
* - if compression is not successful for the first block, all input data is
* still in the window so we can still emit a stored block even when input
* comes from standard input. (This can also be done for all blocks if
* LIT_BUFSIZE is not greater than 32K.)
* - if compression is not successful for a file smaller than 64K, we can
* even emit a stored file instead of a stored block (saving 5 bytes).
* - creating new Huffman trees less frequently may not provide fast
* adaptation to changes in the input data statistics. (Take for
* example a binary file with poorly compressible code followed by
* a highly compressible string table.) Smaller buffer sizes give
* fast adaptation but have of course the overhead of transmitting trees
* more frequently.
* - I can't count above 4
* The current code is general and allows DIST_BUFSIZE < LIT_BUFSIZE (to save
* memory at the expense of compression). Some optimizations would be possible
* if we rely on DIST_BUFSIZE == LIT_BUFSIZE.
*/
#if LIT_BUFSIZE > INBUFSIZ
error cannot overlay l_buf and inbuf
#endif
/* repeat previous bit length 3-6 times (2 bits of repeat count) */
#define REP_3_6 16
/* repeat a zero length 3-10 times (3 bits of repeat count) */
#define REPZ_3_10 17
/* repeat a zero length 11-138 times (7 bits of repeat count) */
#define REPZ_11_138 18
/* ===========================================================================
* Local data
*/
/* Data structure describing a single value and its code string. */
typedef struct ct_data {
union {
ush freq; /* frequency count */
ush code; /* bit string */
} fc;
union {
ush dad; /* father node in Huffman tree */
ush len; /* length of bit string */
} dl;
} ct_data;
#define Freq fc.freq
#define Code fc.code
#define Dad dl.dad
#define Len dl.len
/* maximum heap size */
#define HEAP_SIZE (2*L_CODES+1)
local ct_data near dyn_ltree[HEAP_SIZE]; /* literal and length tree */
local ct_data near dyn_dtree[2 * D_CODES + 1]; /* distance tree */
/* The static literal tree. Since the bit lengths are imposed, there is no
* need for the L_CODES extra codes used during heap construction. However
* The codes 286 and 287 are needed to build a canonical tree (see ct_init
* below).
*/
local ct_data near static_ltree[L_CODES + 2];
local ct_data near static_dtree[D_CODES];
/* The static distance tree. (Actually a trivial tree since all codes use
* 5 bits.)
*/
/* Huffman tree for the bit lengths */
local ct_data near bl_tree[2 * BL_CODES + 1];
typedef struct tree_desc {
ct_data near * dyn_tree; /* the dynamic tree */
ct_data near * static_tree; /* corresponding static tree or NULL */
int near * extra_bits; /* extra bits for each code or NULL */
int extra_base; /* base index for extra_bits */
int elems; /* max number of elements in the tree */
int max_length; /* max bit length for the codes */
int max_code; /* largest code with non zero frequency */
} tree_desc;
local tree_desc near l_desc = {
dyn_ltree, static_ltree, extra_lbits, LITERALS + 1, L_CODES, MAX_BITS, 0
};
local tree_desc near d_desc = {
dyn_dtree, static_dtree, extra_dbits, 0, D_CODES, MAX_BITS, 0
};
local tree_desc near bl_desc = {
bl_tree, ( ct_data near *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS, 0
};
/* number of codes at each bit length for an optimal tree */
local ush near bl_count[MAX_BITS + 1];
/* The lengths of the bit length codes are sent in order of decreasing
* probability, to avoid transmitting the lengths for unused bit length codes.
*/
local uch near bl_order[BL_CODES] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
local int near heap[2 * L_CODES + 1]; /* heap used to build the Huffman trees */
local int heap_len = 0; /* number of elements in the heap */
local int heap_max = 0; /* element of largest frequency */
/* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
* The same heap array is used to build all trees.
*/
/* Depth of each subtree used as tie breaker for trees of equal frequency */
local uch near depth[2 * L_CODES + 1];
/* length code for each normalized match length (0 == MIN_MATCH) */
local uch length_code[MAX_MATCH - MIN_MATCH + 1];
/* distance codes. The first 256 values correspond to the distances
* 3 .. 258, the last 256 values correspond to the top 8 bits of
* the 15 bit distances.
*/
local uch dist_code[512];
/* First normalized length for each code (0 = MIN_MATCH) */
local int near base_length[LENGTH_CODES];
/* First normalized distance for each code (0 = distance of 1) */
local int near base_dist[D_CODES];
/* DECLARE(uch, l_buf, LIT_BUFSIZE); buffer for literals or lengths */
#de