/* trees.c -- output deflated data using Huffman coding1* Copyright (C) 1995-2024 Jean-loup Gailly2* detect_data_type() function provided freely by Cosmin Truta, 20063* For conditions of distribution and use, see copyright notice in zlib.h4*/56/*7* ALGORITHM8*9* The "deflation" process uses several Huffman trees. The more10* common source values are represented by shorter bit sequences.11*12* Each code tree is stored in a compressed form which is itself13* a Huffman encoding of the lengths of all the code strings (in14* ascending order by source values). The actual code strings are15* reconstructed from the lengths in the inflate process, as described16* in the deflate specification.17*18* REFERENCES19*20* Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".21* Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc22*23* Storer, James A.24* Data Compression: Methods and Theory, pp. 49-50.25* Computer Science Press, 1988. ISBN 0-7167-8156-5.26*27* Sedgewick, R.28* Algorithms, p290.29* Addison-Wesley, 1983. ISBN 0-201-06672-6.30*/3132/* @(#) $Id$ */3334/* #define GEN_TREES_H */3536#include "deflate.h"3738#ifdef ZLIB_DEBUG39# include <ctype.h>40#endif4142/* ===========================================================================43* Constants44*/4546#define MAX_BL_BITS 747/* Bit length codes must not exceed MAX_BL_BITS bits */4849#define END_BLOCK 25650/* end of block literal code */5152#define REP_3_6 1653/* repeat previous bit length 3-6 times (2 bits of repeat count) */5455#define REPZ_3_10 1756/* repeat a zero length 3-10 times (3 bits of repeat count) */5758#define REPZ_11_138 1859/* repeat a zero length 11-138 times (7 bits of repeat count) */6061local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */62= {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};6364local const int extra_dbits[D_CODES] /* extra bits for each distance code */65= {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};6667local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */68= {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};6970local const uch bl_order[BL_CODES]71= {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};72/* The lengths of the bit length codes are sent in order of decreasing73* probability, to avoid transmitting the lengths for unused bit length codes.74*/7576/* ===========================================================================77* Local data. These are initialized only once.78*/7980#define DIST_CODE_LEN 512 /* see definition of array dist_code below */8182#if defined(GEN_TREES_H) || !defined(STDC)83/* non ANSI compilers may not accept trees.h */8485local ct_data static_ltree[L_CODES+2];86/* The static literal tree. Since the bit lengths are imposed, there is no87* need for the L_CODES extra codes used during heap construction. However88* The codes 286 and 287 are needed to build a canonical tree (see _tr_init89* below).90*/9192local ct_data static_dtree[D_CODES];93/* The static distance tree. (Actually a trivial tree since all codes use94* 5 bits.)95*/9697uch _dist_code[DIST_CODE_LEN];98/* Distance codes. The first 256 values correspond to the distances99* 3 .. 258, the last 256 values correspond to the top 8 bits of100* the 15 bit distances.101*/102103uch _length_code[MAX_MATCH-MIN_MATCH+1];104/* length code for each normalized match length (0 == MIN_MATCH) */105106local int base_length[LENGTH_CODES];107/* First normalized length for each code (0 = MIN_MATCH) */108109local int base_dist[D_CODES];110/* First normalized distance for each code (0 = distance of 1) */111112#else113# include "trees.h"114#endif /* GEN_TREES_H */115116struct static_tree_desc_s {117const ct_data *static_tree; /* static tree or NULL */118const intf *extra_bits; /* extra bits for each code or NULL */119int extra_base; /* base index for extra_bits */120int elems; /* max number of elements in the tree */121int max_length; /* max bit length for the codes */122};123124#ifdef NO_INIT_GLOBAL_POINTERS125# define TCONST126#else127# define TCONST const128#endif129130local TCONST static_tree_desc static_l_desc =131{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};132133local TCONST static_tree_desc static_d_desc =134{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS};135136local TCONST static_tree_desc static_bl_desc =137{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS};138139/* ===========================================================================140* Output a short LSB first on the stream.141* IN assertion: there is enough room in pendingBuf.142*/143#define put_short(s, w) { \144put_byte(s, (uch)((w) & 0xff)); \145put_byte(s, (uch)((ush)(w) >> 8)); \146}147148/* ===========================================================================149* Reverse the first len bits of a code, using straightforward code (a faster150* method would use a table)151* IN assertion: 1 <= len <= 15152*/153local unsigned bi_reverse(unsigned code, int len) {154register unsigned res = 0;155do {156res |= code & 1;157code >>= 1, res <<= 1;158} while (--len > 0);159return res >> 1;160}161162/* ===========================================================================163* Flush the bit buffer, keeping at most 7 bits in it.164*/165local void bi_flush(deflate_state *s) {166if (s->bi_valid == 16) {167put_short(s, s->bi_buf);168s->bi_buf = 0;169s->bi_valid = 0;170} else if (s->bi_valid >= 8) {171put_byte(s, (Byte)s->bi_buf);172s->bi_buf >>= 8;173s->bi_valid -= 8;174}175}176177/* ===========================================================================178* Flush the bit buffer and align the output on a byte boundary179*/180local void bi_windup(deflate_state *s) {181if (s->bi_valid > 8) {182put_short(s, s->bi_buf);183} else if (s->bi_valid > 0) {184put_byte(s, (Byte)s->bi_buf);185}186s->bi_buf = 0;187s->bi_valid = 0;188#ifdef ZLIB_DEBUG189s->bits_sent = (s->bits_sent + 7) & ~7;190#endif191}192193/* ===========================================================================194* Generate the codes for a given tree and bit counts (which need not be195* optimal).196* IN assertion: the array bl_count contains the bit length statistics for197* the given tree and the field len is set for all tree elements.198* OUT assertion: the field code is set for all tree elements of non199* zero code length.200*/201local void gen_codes(ct_data *tree, int max_code, ushf *bl_count) {202ush next_code[MAX_BITS+1]; /* next code value for each bit length */203unsigned code = 0; /* running code value */204int bits; /* bit index */205int n; /* code index */206207/* The distribution counts are first used to generate the code values208* without bit reversal.209*/210for (bits = 1; bits <= MAX_BITS; bits++) {211code = (code + bl_count[bits - 1]) << 1;212next_code[bits] = (ush)code;213}214/* Check that the bit counts in bl_count are consistent. The last code215* must be all ones.216*/217Assert (code + bl_count[MAX_BITS] - 1 == (1 << MAX_BITS) - 1,218"inconsistent bit counts");219Tracev((stderr,"\ngen_codes: max_code %d ", max_code));220221for (n = 0; n <= max_code; n++) {222int len = tree[n].Len;223if (len == 0) continue;224/* Now reverse the bits */225tree[n].Code = (ush)bi_reverse(next_code[len]++, len);226227Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",228n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len] - 1));229}230}231232#ifdef GEN_TREES_H233local void gen_trees_header(void);234#endif235236#ifndef ZLIB_DEBUG237# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)238/* Send a code of the given tree. c and tree must not have side effects */239240#else /* !ZLIB_DEBUG */241# define send_code(s, c, tree) \242{ if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \243send_bits(s, tree[c].Code, tree[c].Len); }244#endif245246/* ===========================================================================247* Send a value on a given number of bits.248* IN assertion: length <= 16 and value fits in length bits.249*/250#ifdef ZLIB_DEBUG251local void send_bits(deflate_state *s, int value, int length) {252Tracevv((stderr," l %2d v %4x ", length, value));253Assert(length > 0 && length <= 15, "invalid length");254s->bits_sent += (ulg)length;255256/* If not enough room in bi_buf, use (valid) bits from bi_buf and257* (16 - bi_valid) bits from value, leaving (width - (16 - bi_valid))258* unused bits in value.259*/260if (s->bi_valid > (int)Buf_size - length) {261s->bi_buf |= (ush)value << s->bi_valid;262put_short(s, s->bi_buf);263s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);264s->bi_valid += length - Buf_size;265} else {266s->bi_buf |= (ush)value << s->bi_valid;267s->bi_valid += length;268}269}270#else /* !ZLIB_DEBUG */271272#define send_bits(s, value, length) \273{ int len = length;\274if (s->bi_valid > (int)Buf_size - len) {\275int val = (int)value;\276s->bi_buf |= (ush)val << s->bi_valid;\277put_short(s, s->bi_buf);\278s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\279s->bi_valid += len - Buf_size;\280} else {\281s->bi_buf |= (ush)(value) << s->bi_valid;\282s->bi_valid += len;\283}\284}285#endif /* ZLIB_DEBUG */286287288/* the arguments must not have side effects */289290/* ===========================================================================291* Initialize the various 'constant' tables.292*/293local void tr_static_init(void) {294#if defined(GEN_TREES_H) || !defined(STDC)295static int static_init_done = 0;296int n; /* iterates over tree elements */297int bits; /* bit counter */298int length; /* length value */299int code; /* code value */300int dist; /* distance index */301ush bl_count[MAX_BITS+1];302/* number of codes at each bit length for an optimal tree */303304if (static_init_done) return;305306/* For some embedded targets, global variables are not initialized: */307#ifdef NO_INIT_GLOBAL_POINTERS308static_l_desc.static_tree = static_ltree;309static_l_desc.extra_bits = extra_lbits;310static_d_desc.static_tree = static_dtree;311static_d_desc.extra_bits = extra_dbits;312static_bl_desc.extra_bits = extra_blbits;313#endif314315/* Initialize the mapping length (0..255) -> length code (0..28) */316length = 0;317for (code = 0; code < LENGTH_CODES-1; code++) {318base_length[code] = length;319for (n = 0; n < (1 << extra_lbits[code]); n++) {320_length_code[length++] = (uch)code;321}322}323Assert (length == 256, "tr_static_init: length != 256");324/* Note that the length 255 (match length 258) can be represented325* in two different ways: code 284 + 5 bits or code 285, so we326* overwrite length_code[255] to use the best encoding:327*/328_length_code[length - 1] = (uch)code;329330/* Initialize the mapping dist (0..32K) -> dist code (0..29) */331dist = 0;332for (code = 0 ; code < 16; code++) {333base_dist[code] = dist;334for (n = 0; n < (1 << extra_dbits[code]); n++) {335_dist_code[dist++] = (uch)code;336}337}338Assert (dist == 256, "tr_static_init: dist != 256");339dist >>= 7; /* from now on, all distances are divided by 128 */340for ( ; code < D_CODES; code++) {341base_dist[code] = dist << 7;342for (n = 0; n < (1 << (extra_dbits[code] - 7)); n++) {343_dist_code[256 + dist++] = (uch)code;344}345}346Assert (dist == 256, "tr_static_init: 256 + dist != 512");347348/* Construct the codes of the static literal tree */349for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;350n = 0;351while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;352while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;353while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;354while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;355/* Codes 286 and 287 do not exist, but we must include them in the356* tree construction to get a canonical Huffman tree (longest code357* all ones)358*/359gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);360361/* The static distance tree is trivial: */362for (n = 0; n < D_CODES; n++) {363static_dtree[n].Len = 5;364static_dtree[n].Code = bi_reverse((unsigned)n, 5);365}366static_init_done = 1;367368# ifdef GEN_TREES_H369gen_trees_header();370# endif371#endif /* defined(GEN_TREES_H) || !defined(STDC) */372}373374/* ===========================================================================375* Generate the file trees.h describing the static trees.376*/377#ifdef GEN_TREES_H378# ifndef ZLIB_DEBUG379# include <stdio.h>380# endif381382# define SEPARATOR(i, last, width) \383((i) == (last)? "\n};\n\n" : \384((i) % (width) == (width) - 1 ? ",\n" : ", "))385386void gen_trees_header(void) {387FILE *header = fopen("trees.h", "w");388int i;389390Assert (header != NULL, "Can't open trees.h");391fprintf(header,392"/* header created automatically with -DGEN_TREES_H */\n\n");393394fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");395for (i = 0; i < L_CODES+2; i++) {396fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,397static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));398}399400fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");401for (i = 0; i < D_CODES; i++) {402fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,403static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));404}405406fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n");407for (i = 0; i < DIST_CODE_LEN; i++) {408fprintf(header, "%2u%s", _dist_code[i],409SEPARATOR(i, DIST_CODE_LEN-1, 20));410}411412fprintf(header,413"const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");414for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {415fprintf(header, "%2u%s", _length_code[i],416SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));417}418419fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");420for (i = 0; i < LENGTH_CODES; i++) {421fprintf(header, "%1u%s", base_length[i],422SEPARATOR(i, LENGTH_CODES-1, 20));423}424425fprintf(header, "local const int base_dist[D_CODES] = {\n");426for (i = 0; i < D_CODES; i++) {427fprintf(header, "%5u%s", base_dist[i],428SEPARATOR(i, D_CODES-1, 10));429}430431fclose(header);432}433#endif /* GEN_TREES_H */434435/* ===========================================================================436* Initialize a new block.437*/438local void init_block(deflate_state *s) {439int n; /* iterates over tree elements */440441/* Initialize the trees. */442for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0;443for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0;444for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;445446s->dyn_ltree[END_BLOCK].Freq = 1;447s->opt_len = s->static_len = 0L;448s->sym_next = s->matches = 0;449}450451/* ===========================================================================452* Initialize the tree data structures for a new zlib stream.453*/454void ZLIB_INTERNAL _tr_init(deflate_state *s) {455tr_static_init();456457s->l_desc.dyn_tree = s->dyn_ltree;458s->l_desc.stat_desc = &static_l_desc;459460s->d_desc.dyn_tree = s->dyn_dtree;461s->d_desc.stat_desc = &static_d_desc;462463s->bl_desc.dyn_tree = s->bl_tree;464s->bl_desc.stat_desc = &static_bl_desc;465466s->bi_buf = 0;467s->bi_valid = 0;468#ifdef ZLIB_DEBUG469s->compressed_len = 0L;470s->bits_sent = 0L;471#endif472473/* Initialize the first block of the first file: */474init_block(s);475}476477#define SMALLEST 1478/* Index within the heap array of least frequent node in the Huffman tree */479480481/* ===========================================================================482* Remove the smallest element from the heap and recreate the heap with483* one less element. Updates heap and heap_len.484*/485#define pqremove(s, tree, top) \486{\487top = s->heap[SMALLEST]; \488s->heap[SMALLEST] = s->heap[s->heap_len--]; \489pqdownheap(s, tree, SMALLEST); \490}491492/* ===========================================================================493* Compares to subtrees, using the tree depth as tie breaker when494* the subtrees have equal frequency. This minimizes the worst case length.495*/496#define smaller(tree, n, m, depth) \497(tree[n].Freq < tree[m].Freq || \498(tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))499500/* ===========================================================================501* Restore the heap property by moving down the tree starting at node k,502* exchanging a node with the smallest of its two sons if necessary, stopping503* when the heap property is re-established (each father smaller than its504* two sons).505*/506local void pqdownheap(deflate_state *s, ct_data *tree, int k) {507int v = s->heap[k];508int j = k << 1; /* left son of k */509while (j <= s->heap_len) {510/* Set j to the smallest of the two sons: */511if (j < s->heap_len &&512smaller(tree, s->heap[j + 1], s->heap[j], s->depth)) {513j++;514}515/* Exit if v is smaller than both sons */516if (smaller(tree, v, s->heap[j], s->depth)) break;517518/* Exchange v with the smallest son */519s->heap[k] = s->heap[j]; k = j;520521/* And continue down the tree, setting j to the left son of k */522j <<= 1;523}524s->heap[k] = v;525}526527/* ===========================================================================528* Compute the optimal bit lengths for a tree and update the total bit length529* for the current block.530* IN assertion: the fields freq and dad are set, heap[heap_max] and531* above are the tree nodes sorted by increasing frequency.532* OUT assertions: the field len is set to the optimal bit length, the533* array bl_count contains the frequencies for each bit length.534* The length opt_len is updated; static_len is also updated if stree is535* not null.536*/537local void gen_bitlen(deflate_state *s, tree_desc *desc) {538ct_data *tree = desc->dyn_tree;539int max_code = desc->max_code;540const ct_data *stree = desc->stat_desc->static_tree;541const intf *extra = desc->stat_desc->extra_bits;542int base = desc->stat_desc->extra_base;543int max_length = desc->stat_desc->max_length;544int h; /* heap index */545int n, m; /* iterate over the tree elements */546int bits; /* bit length */547int xbits; /* extra bits */548ush f; /* frequency */549int overflow = 0; /* number of elements with bit length too large */550551for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;552553/* In a first pass, compute the optimal bit lengths (which may554* overflow in the case of the bit length tree).555*/556tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */557558for (h = s->heap_max + 1; h < HEAP_SIZE; h++) {559n = s->heap[h];560bits = tree[tree[n].Dad].Len + 1;561if (bits > max_length) bits = max_length, overflow++;562tree[n].Len = (ush)bits;563/* We overwrite tree[n].Dad which is no longer needed */564565if (n > max_code) continue; /* not a leaf node */566567s->bl_count[bits]++;568xbits = 0;569if (n >= base) xbits = extra[n - base];570f = tree[n].Freq;571s->opt_len += (ulg)f * (unsigned)(bits + xbits);572if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits);573}574if (overflow == 0) return;575576Tracev((stderr,"\nbit length overflow\n"));577/* This happens for example on obj2 and pic of the Calgary corpus */578579/* Find the first bit length which could increase: */580do {581bits = max_length - 1;582while (s->bl_count[bits] == 0) bits--;583s->bl_count[bits]--; /* move one leaf down the tree */584s->bl_count[bits + 1] += 2; /* move one overflow item as its brother */585s->bl_count[max_length]--;586/* The brother of the overflow item also moves one step up,587* but this does not affect bl_count[max_length]588*/589overflow -= 2;590} while (overflow > 0);591592/* Now recompute all bit lengths, scanning in increasing frequency.593* h is still equal to HEAP_SIZE. (It is simpler to reconstruct all594* lengths instead of fixing only the wrong ones. This idea is taken595* from 'ar' written by Haruhiko Okumura.)596*/597for (bits = max_length; bits != 0; bits--) {598n = s->bl_count[bits];599while (n != 0) {600m = s->heap[--h];601if (m > max_code) continue;602if ((unsigned) tree[m].Len != (unsigned) bits) {603Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));604s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq;605tree[m].Len = (ush)bits;606}607n--;608}609}610}611612#ifdef DUMP_BL_TREE613# include <stdio.h>614#endif615616/* ===========================================================================617* Construct one Huffman tree and assigns the code bit strings and lengths.618* Update the total bit length for the current block.619* IN assertion: the field freq is set for all tree elements.620* OUT assertions: the fields len and code are set to the optimal bit length621* and corresponding code. The length opt_len is updated; static_len is622* also updated if stree is not null. The field max_code is set.623*/624local void build_tree(deflate_state *s, tree_desc *desc) {625ct_data *tree = desc->dyn_tree;626const ct_data *stree = desc->stat_desc->static_tree;627int elems = desc->stat_desc->elems;628int n, m; /* iterate over heap elements */629int max_code = -1; /* largest code with non zero frequency */630int node; /* new node being created */631632/* Construct the initial heap, with least frequent element in633* heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n + 1].634* heap[0] is not used.635*/636s->heap_len = 0, s->heap_max = HEAP_SIZE;637638for (n = 0; n < elems; n++) {639if (tree[n].Freq != 0) {640s->heap[++(s->heap_len)] = max_code = n;641s->depth[n] = 0;642} else {643tree[n].Len = 0;644}645}646647/* The pkzip format requires that at least one distance code exists,648* and that at least one bit should be sent even if there is only one649* possible code. So to avoid special checks later on we force at least650* two codes of non zero frequency.651*/652while (s->heap_len < 2) {653node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);654tree[node].Freq = 1;655s->depth[node] = 0;656s->opt_len--; if (stree) s->static_len -= stree[node].Len;657/* node is 0 or 1 so it does not have extra bits */658}659desc->max_code = max_code;660661/* The elements heap[heap_len/2 + 1 .. heap_len] are leaves of the tree,662* establish sub-heaps of increasing lengths:663*/664for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);665666/* Construct the Huffman tree by repeatedly combining the least two667* frequent nodes.668*/669node = elems; /* next internal node of the tree */670do {671pqremove(s, tree, n); /* n = node of least frequency */672m = s->heap[SMALLEST]; /* m = node of next least frequency */673674s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */675s->heap[--(s->heap_max)] = m;676677/* Create a new node father of n and m */678tree[node].Freq = tree[n].Freq + tree[m].Freq;679s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ?680s->depth[n] : s->depth[m]) + 1);681tree[n].Dad = tree[m].Dad = (ush)node;682#ifdef DUMP_BL_TREE683if (tree == s->bl_tree) {684fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",685node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);686}687#endif688/* and insert the new node in the heap */689s->heap[SMALLEST] = node++;690pqdownheap(s, tree, SMALLEST);691692} while (s->heap_len >= 2);693694s->heap[--(s->heap_max)] = s->heap[SMALLEST];695696/* At this point, the fields freq and dad are set. We can now697* generate the bit lengths.698*/699gen_bitlen(s, (tree_desc *)desc);700701/* The field len is now set, we can generate the bit codes */702gen_codes ((ct_data *)tree, max_code, s->bl_count);703}704705/* ===========================================================================706* Scan a literal or distance tree to determine the frequencies of the codes707* in the bit length tree.708*/709local void scan_tree(deflate_state *s, ct_data *tree, int max_code) {710int n; /* iterates over all tree elements */711int prevlen = -1; /* last emitted length */712int curlen; /* length of current code */713int nextlen = tree[0].Len; /* length of next code */714int count = 0; /* repeat count of the current code */715int max_count = 7; /* max repeat count */716int min_count = 4; /* min repeat count */717718if (nextlen == 0) max_count = 138, min_count = 3;719tree[max_code + 1].Len = (ush)0xffff; /* guard */720721for (n = 0; n <= max_code; n++) {722curlen = nextlen; nextlen = tree[n + 1].Len;723if (++count < max_count && curlen == nextlen) {724continue;725} else if (count < min_count) {726s->bl_tree[curlen].Freq += count;727} else if (curlen != 0) {728if (curlen != prevlen) s->bl_tree[curlen].Freq++;729s->bl_tree[REP_3_6].Freq++;730} else if (count <= 10) {731s->bl_tree[REPZ_3_10].Freq++;732} else {733s->bl_tree[REPZ_11_138].Freq++;734}735count = 0; prevlen = curlen;736if (nextlen == 0) {737max_count = 138, min_count = 3;738} else if (curlen == nextlen) {739max_count = 6, min_count = 3;740} else {741max_count = 7, min_count = 4;742}743}744}745746/* ===========================================================================747* Send a literal or distance tree in compressed form, using the codes in748* bl_tree.749*/750local void send_tree(deflate_state *s, ct_data *tree, int max_code) {751int n; /* iterates over all tree elements */752int prevlen = -1; /* last emitted length */753int curlen; /* length of current code */754int nextlen = tree[0].Len; /* length of next code */755int count = 0; /* repeat count of the current code */756int max_count = 7; /* max repeat count */757int min_count = 4; /* min repeat count */758759/* tree[max_code + 1].Len = -1; */ /* guard already set */760if (nextlen == 0) max_count = 138, min_count = 3;761762for (n = 0; n <= max_code; n++) {763curlen = nextlen; nextlen = tree[n + 1].Len;764if (++count < max_count && curlen == nextlen) {765continue;766} else if (count < min_count) {767do { send_code(s, curlen, s->bl_tree); } while (--count != 0);768769} else if (curlen != 0) {770if (curlen != prevlen) {771send_code(s, curlen, s->bl_tree); count--;772}773Assert(count >= 3 && count <= 6, " 3_6?");774send_code(s, REP_3_6, s->bl_tree); send_bits(s, count - 3, 2);775776} else if (count <= 10) {777send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count - 3, 3);778779} else {780send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count - 11, 7);781}782count = 0; prevlen = curlen;783if (nextlen == 0) {784max_count = 138, min_count = 3;785} else if (curlen == nextlen) {786max_count = 6, min_count = 3;787} else {788max_count = 7, min_count = 4;789}790}791}792793/* ===========================================================================794* Construct the Huffman tree for the bit lengths and return the index in795* bl_order of the last bit length code to send.796*/797local int build_bl_tree(deflate_state *s) {798int max_blindex; /* index of last bit length code of non zero freq */799800/* Determine the bit length frequencies for literal and distance trees */801scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);802scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);803804/* Build the bit length tree: */805build_tree(s, (tree_desc *)(&(s->bl_desc)));806/* opt_len now includes the length of the tree representations, except the807* lengths of the bit lengths codes and the 5 + 5 + 4 bits for the counts.808*/809810/* Determine the number of bit length codes to send. The pkzip format811* requires that at least 4 bit length codes be sent. (appnote.txt says812* 3 but the actual value used is 4.)813*/814for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {815if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;816}817/* Update opt_len to include the bit length tree and counts */818s->opt_len += 3*((ulg)max_blindex + 1) + 5 + 5 + 4;819Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",820s->opt_len, s->static_len));821822return max_blindex;823}824825/* ===========================================================================826* Send the header for a block using dynamic Huffman trees: the counts, the827* lengths of the bit length codes, the literal tree and the distance tree.828* IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.829*/830local void send_all_trees(deflate_state *s, int lcodes, int dcodes,831int blcodes) {832int rank; /* index in bl_order */833834Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");835Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,836"too many codes");837Tracev((stderr, "\nbl counts: "));838send_bits(s, lcodes - 257, 5); /* not +255 as stated in appnote.txt */839send_bits(s, dcodes - 1, 5);840send_bits(s, blcodes - 4, 4); /* not -3 as stated in appnote.txt */841for (rank = 0; rank < blcodes; rank++) {842Tracev((stderr, "\nbl code %2d ", bl_order[rank]));843send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);844}845Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));846847send_tree(s, (ct_data *)s->dyn_ltree, lcodes - 1); /* literal tree */848Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));849850send_tree(s, (ct_data *)s->dyn_dtree, dcodes - 1); /* distance tree */851Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));852}853854/* ===========================================================================855* Send a stored block856*/857void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,858ulg stored_len, int last) {859send_bits(s, (STORED_BLOCK<<1) + last, 3); /* send block type */860bi_windup(s); /* align on byte boundary */861put_short(s, (ush)stored_len);862put_short(s, (ush)~stored_len);863if (stored_len)864zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len);865s->pending += stored_len;866#ifdef ZLIB_DEBUG867s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;868s->compressed_len += (stored_len + 4) << 3;869s->bits_sent += 2*16;870s->bits_sent += stored_len << 3;871#endif872}873874/* ===========================================================================875* Flush the bits in the bit buffer to pending output (leaves at most 7 bits)876*/877void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s) {878bi_flush(s);879}880881/* ===========================================================================882* Send one empty static block to give enough lookahead for inflate.883* This takes 10 bits, of which 7 may remain in the bit buffer.884*/885void ZLIB_INTERNAL _tr_align(deflate_state *s) {886send_bits(s, STATIC_TREES<<1, 3);887send_code(s, END_BLOCK, static_ltree);888#ifdef ZLIB_DEBUG889s->compressed_len += 10L; /* 3 for block type, 7 for EOB */890#endif891bi_flush(s);892}893894/* ===========================================================================895* Send the block data compressed using the given Huffman trees896*/897local void compress_block(deflate_state *s, const ct_data *ltree,898const ct_data *dtree) {899unsigned dist; /* distance of matched string */900int lc; /* match length or unmatched char (if dist == 0) */901unsigned sx = 0; /* running index in symbol buffers */902unsigned code; /* the code to send */903int extra; /* number of extra bits to send */904905if (s->sym_next != 0) do {906#ifdef LIT_MEM907dist = s->d_buf[sx];908lc = s->l_buf[sx++];909#else910dist = s->sym_buf[sx++] & 0xff;911dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;912lc = s->sym_buf[sx++];913#endif914if (dist == 0) {915send_code(s, lc, ltree); /* send a literal byte */916Tracecv(isgraph(lc), (stderr," '%c' ", lc));917} else {918/* Here, lc is the match length - MIN_MATCH */919code = _length_code[lc];920send_code(s, code + LITERALS + 1, ltree); /* send length code */921extra = extra_lbits[code];922if (extra != 0) {923lc -= base_length[code];924send_bits(s, lc, extra); /* send the extra length bits */925}926dist--; /* dist is now the match distance - 1 */927code = d_code(dist);928Assert (code < D_CODES, "bad d_code");929930send_code(s, code, dtree); /* send the distance code */931extra = extra_dbits[code];932if (extra != 0) {933dist -= (unsigned)base_dist[code];934send_bits(s, dist, extra); /* send the extra distance bits */935}936} /* literal or match pair ? */937938/* Check for no overlay of pending_buf on needed symbols */939#ifdef LIT_MEM940Assert(s->pending < 2 * (s->lit_bufsize + sx), "pendingBuf overflow");941#else942Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");943#endif944945} while (sx < s->sym_next);946947send_code(s, END_BLOCK, ltree);948}949950/* ===========================================================================951* Check if the data type is TEXT or BINARY, using the following algorithm:952* - TEXT if the two conditions below are satisfied:953* a) There are no non-portable control characters belonging to the954* "block list" (0..6, 14..25, 28..31).955* b) There is at least one printable character belonging to the956* "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).957* - BINARY otherwise.958* - The following partially-portable control characters form a959* "gray list" that is ignored in this detection algorithm:960* (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).961* IN assertion: the fields Freq of dyn_ltree are set.962*/963local int detect_data_type(deflate_state *s) {964/* block_mask is the bit mask of block-listed bytes965* set bits 0..6, 14..25, and 28..31966* 0xf3ffc07f = binary 11110011111111111100000001111111967*/968unsigned long block_mask = 0xf3ffc07fUL;969int n;970971/* Check for non-textual ("block-listed") bytes. */972for (n = 0; n <= 31; n++, block_mask >>= 1)973if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0))974return Z_BINARY;975976/* Check for textual ("allow-listed") bytes. */977if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0978|| s->dyn_ltree[13].Freq != 0)979return Z_TEXT;980for (n = 32; n < LITERALS; n++)981if (s->dyn_ltree[n].Freq != 0)982return Z_TEXT;983984/* There are no "block-listed" or "allow-listed" bytes:985* this stream either is empty or has tolerated ("gray-listed") bytes only.986*/987return Z_BINARY;988}989990/* ===========================================================================991* Determine the best encoding for the current block: dynamic trees, static992* trees or store, and write out the encoded block.993*/994void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf,995ulg stored_len, int last) {996ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */997int max_blindex = 0; /* index of last bit length code of non zero freq */998999/* Build the Huffman trees unless a stored block is forced */1000if (s->level > 0) {10011002/* Check if the file is binary or text */1003if (s->strm->data_type == Z_UNKNOWN)1004s->strm->data_type = detect_data_type(s);10051006/* Construct the literal and distance trees */1007build_tree(s, (tree_desc *)(&(s->l_desc)));1008Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,1009s->static_len));10101011build_tree(s, (tree_desc *)(&(s->d_desc)));1012Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,1013s->static_len));1014/* At this point, opt_len and static_len are the total bit lengths of1015* the compressed block data, excluding the tree representations.1016*/10171018/* Build the bit length tree for the above two trees, and get the index1019* in bl_order of the last bit length code to send.1020*/1021max_blindex = build_bl_tree(s);10221023/* Determine the best encoding. Compute the block lengths in bytes. */1024opt_lenb = (s->opt_len + 3 + 7) >> 3;1025static_lenb = (s->static_len + 3 + 7) >> 3;10261027Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",1028opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,1029s->sym_next / 3));10301031#ifndef FORCE_STATIC1032if (static_lenb <= opt_lenb || s->strategy == Z_FIXED)1033#endif1034opt_lenb = static_lenb;10351036} else {1037Assert(buf != (char*)0, "lost buf");1038opt_lenb = static_lenb = stored_len + 5; /* force a stored block */1039}10401041#ifdef FORCE_STORED1042if (buf != (char*)0) { /* force stored block */1043#else1044if (stored_len + 4 <= opt_lenb && buf != (char*)0) {1045/* 4: two words for the lengths */1046#endif1047/* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.1048* Otherwise we can't have processed more than WSIZE input bytes since1049* the last block flush, because compression would have been1050* successful. If LIT_BUFSIZE <= WSIZE, it is never too late to1051* transform a block into a stored block.1052*/1053_tr_stored_block(s, buf, stored_len, last);10541055} else if (static_lenb == opt_lenb) {1056send_bits(s, (STATIC_TREES<<1) + last, 3);1057compress_block(s, (const ct_data *)static_ltree,1058(const ct_data *)static_dtree);1059#ifdef ZLIB_DEBUG1060s->compressed_len += 3 + s->static_len;1061#endif1062} else {1063send_bits(s, (DYN_TREES<<1) + last, 3);1064send_all_trees(s, s->l_desc.max_code + 1, s->d_desc.max_code + 1,1065max_blindex + 1);1066compress_block(s, (const ct_data *)s->dyn_ltree,1067(const ct_data *)s->dyn_dtree);1068#ifdef ZLIB_DEBUG1069s->compressed_len += 3 + s->opt_len;1070#endif1071}1072Assert (s->compressed_len == s->bits_sent, "bad compressed size");1073/* The above check is made mod 2^32, for files larger than 512 MB1074* and uLong implemented on 32 bits.1075*/1076init_block(s);10771078if (last) {1079bi_windup(s);1080#ifdef ZLIB_DEBUG1081s->compressed_len += 7; /* align on byte boundary */1082#endif1083}1084Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len >> 3,1085s->compressed_len - 7*last));1086}10871088/* ===========================================================================1089* Save the match info and tally the frequency counts. Return true if1090* the current block must be flushed.1091*/1092int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) {1093#ifdef LIT_MEM1094s->d_buf[s->sym_next] = (ush)dist;1095s->l_buf[s->sym_next++] = (uch)lc;1096#else1097s->sym_buf[s->sym_next++] = (uch)dist;1098s->sym_buf[s->sym_next++] = (uch)(dist >> 8);1099s->sym_buf[s->sym_next++] = (uch)lc;1100#endif1101if (dist == 0) {1102/* lc is the unmatched char */1103s->dyn_ltree[lc].Freq++;1104} else {1105s->matches++;1106/* Here, lc is the match length - MIN_MATCH */1107dist--; /* dist = match distance - 1 */1108Assert((ush)dist < (ush)MAX_DIST(s) &&1109(ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&1110(ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match");11111112s->dyn_ltree[_length_code[lc] + LITERALS + 1].Freq++;1113s->dyn_dtree[d_code(dist)].Freq++;1114}1115return (s->sym_next == s->sym_end);1116}111711181119