X-Git-Url: https://git.lizzy.rs/?a=blobdiff_plain;f=trees.c;h=f73fd99c37bdb2aa4a3aee91237f2e7de6a040ce;hb=HEAD;hp=eb69d49a5648ab54591029e64332d91134ea80b1;hpb=64b2e892035cf6ea98800c54dce0d63730d50272;p=zlib.git diff --git a/trees.c b/trees.c index eb69d49..f73fd99 100644 --- a/trees.c +++ b/trees.c @@ -1,6 +1,7 @@ /* trees.c -- output deflated data using Huffman coding - * Copyright (C) 1995 Jean-loup Gailly - * For conditions of distribution and use, see copyright notice in zlib.h + * Copyright (C) 1995-2021 Jean-loup Gailly + * detect_data_type() function provided freely by Cosmin Truta, 2006 + * For conditions of distribution and use, see copyright notice in zlib.h */ /* @@ -29,11 +30,13 @@ * Addison-Wesley, 1983. ISBN 0-201-06672-6. */ -/* $Id: trees.c,v 1.4 1995/05/01 16:53:44 jloup Exp $ */ +/* @(#) $Id$ */ + +/* #define GEN_TREES_H */ #include "deflate.h" -#ifdef DEBUG +#ifdef ZLIB_DEBUG # include #endif @@ -56,35 +59,34 @@ #define REPZ_11_138 18 /* repeat a zero length 11-138 times (7 bits of repeat count) */ -local int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ +local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; -local int extra_dbits[D_CODES] /* extra bits for each distance code */ +local const int extra_dbits[D_CODES] /* extra bits for each distance code */ = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; -local int extra_blbits[BL_CODES]/* extra bits for each bit length code */ +local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; -local uch bl_order[BL_CODES] +local const uch bl_order[BL_CODES] = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; /* The lengths of the bit length codes are sent in order of decreasing * probability, to avoid transmitting the lengths for unused bit length codes. */ -#define Buf_size (8 * 2*sizeof(char)) -/* Number of bits used within bi_buf. (bi_buf might be implemented on - * more than 16 bits on some systems.) - */ - /* =========================================================================== * Local data. These are initialized only once. - * To do: initialize at compile time to be completely reentrant. ??? */ +#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ + +#if defined(GEN_TREES_H) || !defined(STDC) +/* non ANSI compilers may not accept trees.h */ + local ct_data static_ltree[L_CODES+2]; /* The static literal tree. Since the bit lengths are imposed, there is no * need for the L_CODES extra codes used during heap construction. However - * The codes 286 and 287 are needed to build a canonical tree (see ct_init + * The codes 286 and 287 are needed to build a canonical tree (see _tr_init * below). */ @@ -93,13 +95,13 @@ local ct_data static_dtree[D_CODES]; * 5 bits.) */ -local uch dist_code[512]; -/* distance codes. The first 256 values correspond to the distances +uch _dist_code[DIST_CODE_LEN]; +/* Distance codes. The first 256 values correspond to the distances * 3 .. 258, the last 256 values correspond to the top 8 bits of * the 15 bit distances. */ -local uch length_code[MAX_MATCH-MIN_MATCH+1]; +uch _length_code[MAX_MATCH-MIN_MATCH+1]; /* length code for each normalized match length (0 == MIN_MATCH) */ local int base_length[LENGTH_CODES]; @@ -108,73 +110,129 @@ local int base_length[LENGTH_CODES]; local int base_dist[D_CODES]; /* First normalized distance for each code (0 = distance of 1) */ +#else +# include "trees.h" +#endif /* GEN_TREES_H */ + struct static_tree_desc_s { - ct_data *static_tree; /* static tree or NULL */ - int *extra_bits; /* extra bits for each code or NULL */ + const ct_data *static_tree; /* static tree or NULL */ + const intf *extra_bits; /* extra bits for each code or NULL */ int extra_base; /* base index for extra_bits */ int elems; /* max number of elements in the tree */ int max_length; /* max bit length for the codes */ }; -local static_tree_desc static_l_desc = +local const static_tree_desc static_l_desc = {static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; -local static_tree_desc static_d_desc = +local const static_tree_desc static_d_desc = {static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; -local static_tree_desc static_bl_desc = -{(ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; +local const static_tree_desc static_bl_desc = +{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; /* =========================================================================== * Local (static) routines in this file. */ -local void ct_static_init __P((void)); -local void init_block __P((deflate_state *s)); -local void pqdownheap __P((deflate_state *s, ct_data *tree, int k)); -local void gen_bitlen __P((deflate_state *s, tree_desc *desc)); -local void gen_codes __P((ct_data *tree, int max_code, ush bl_count[])); -local void build_tree __P((deflate_state *s, tree_desc *desc)); -local void scan_tree __P((deflate_state *s, ct_data *tree, int max_code)); -local void send_tree __P((deflate_state *s, ct_data *tree, int max_code)); -local int build_bl_tree __P((deflate_state *s)); -local void send_all_trees __P((deflate_state *s, int lcodes, int dcodes, - int blcodes)); -local void compress_block __P((deflate_state *s, ct_data *ltree, - ct_data *dtree)); -local void set_data_type __P((deflate_state *s)); -local void send_bits __P((deflate_state *s, int value, int length)); -local unsigned bi_reverse __P((unsigned value, int length)); -local void bi_windup __P((deflate_state *s)); -local void copy_block __P((deflate_state *s, char *buf, unsigned len, - int header)); - -#ifndef DEBUG +local void tr_static_init OF((void)); +local void init_block OF((deflate_state *s)); +local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); +local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); +local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); +local void build_tree OF((deflate_state *s, tree_desc *desc)); +local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); +local int build_bl_tree OF((deflate_state *s)); +local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, + int blcodes)); +local void compress_block OF((deflate_state *s, const ct_data *ltree, + const ct_data *dtree)); +local int detect_data_type OF((deflate_state *s)); +local unsigned bi_reverse OF((unsigned code, int len)); +local void bi_windup OF((deflate_state *s)); +local void bi_flush OF((deflate_state *s)); + +#ifdef GEN_TREES_H +local void gen_trees_header OF((void)); +#endif + +#ifndef ZLIB_DEBUG # define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) /* Send a code of the given tree. c and tree must not have side effects */ -#else /* DEBUG */ +#else /* !ZLIB_DEBUG */ # define send_code(s, c, tree) \ - { if (verbose>1) fprintf(stderr,"\ncd %3d ",(c)); \ + { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ send_bits(s, tree[c].Code, tree[c].Len); } #endif -#define d_code(dist) \ - ((dist) < 256 ? dist_code[dist] : dist_code[256+((dist)>>7)]) -/* Mapping from a distance to a distance code. dist is the distance - 1 and - * must not have side effects. dist_code[256] and dist_code[257] are never - * used. +/* =========================================================================== + * Output a short LSB first on the stream. + * IN assertion: there is enough room in pendingBuf. + */ +#define put_short(s, w) { \ + put_byte(s, (uch)((w) & 0xff)); \ + put_byte(s, (uch)((ush)(w) >> 8)); \ +} + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. */ +#ifdef ZLIB_DEBUG +local void send_bits OF((deflate_state *s, int value, int length)); + +local void send_bits(s, value, length) + deflate_state *s; + int value; /* value to send */ + int length; /* number of bits */ +{ + Tracevv((stderr," l %2d v %4x ", length, value)); + Assert(length > 0 && length <= 15, "invalid length"); + s->bits_sent += (ulg)length; + + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * unused bits in value. + */ + if (s->bi_valid > (int)Buf_size - length) { + s->bi_buf |= (ush)value << s->bi_valid; + put_short(s, s->bi_buf); + s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); + s->bi_valid += length - Buf_size; + } else { + s->bi_buf |= (ush)value << s->bi_valid; + s->bi_valid += length; + } +} +#else /* !ZLIB_DEBUG */ + +#define send_bits(s, value, length) \ +{ int len = length;\ + if (s->bi_valid > (int)Buf_size - len) {\ + int val = (int)value;\ + s->bi_buf |= (ush)val << s->bi_valid;\ + put_short(s, s->bi_buf);\ + s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ + s->bi_valid += len - Buf_size;\ + } else {\ + s->bi_buf |= (ush)(value) << s->bi_valid;\ + s->bi_valid += len;\ + }\ +} +#endif /* ZLIB_DEBUG */ + -#define MAX(a,b) (a >= b ? a : b) /* the arguments must not have side effects */ /* =========================================================================== * Initialize the various 'constant' tables. - * To do: do this at compile time. */ -local void ct_static_init() +local void tr_static_init() { +#if defined(GEN_TREES_H) || !defined(STDC) + static int static_init_done = 0; int n; /* iterates over tree elements */ int bits; /* bit counter */ int length; /* length value */ @@ -183,38 +241,49 @@ local void ct_static_init() ush bl_count[MAX_BITS+1]; /* number of codes at each bit length for an optimal tree */ + if (static_init_done) return; + + /* For some embedded targets, global variables are not initialized: */ +#ifdef NO_INIT_GLOBAL_POINTERS + static_l_desc.static_tree = static_ltree; + static_l_desc.extra_bits = extra_lbits; + static_d_desc.static_tree = static_dtree; + static_d_desc.extra_bits = extra_dbits; + static_bl_desc.extra_bits = extra_blbits; +#endif + /* Initialize the mapping length (0..255) -> length code (0..28) */ length = 0; for (code = 0; code < LENGTH_CODES-1; code++) { base_length[code] = length; for (n = 0; n < (1< dist code (0..29) */ dist = 0; for (code = 0 ; code < 16; code++) { base_dist[code] = dist; for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ for ( ; code < D_CODES; code++) { base_dist[code] = dist << 7; for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { - dist_code[256 + dist++] = (uch)code; + _dist_code[256 + dist++] = (uch)code; } } - Assert (dist == 256, "ct_static_init: 256+dist != 512"); + Assert (dist == 256, "tr_static_init: 256+dist != 512"); /* Construct the codes of the static literal tree */ for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; @@ -232,21 +301,85 @@ local void ct_static_init() /* The static distance tree is trivial: */ for (n = 0; n < D_CODES; n++) { static_dtree[n].Len = 5; - static_dtree[n].Code = bi_reverse(n, 5); + static_dtree[n].Code = bi_reverse((unsigned)n, 5); } + static_init_done = 1; + +# ifdef GEN_TREES_H + gen_trees_header(); +# endif +#endif /* defined(GEN_TREES_H) || !defined(STDC) */ } /* =========================================================================== - * Initialize the tree data structures for a new zlib stream. + * Genererate the file trees.h describing the static trees. */ -void ct_init(s) - deflate_state *s; +#ifdef GEN_TREES_H +# ifndef ZLIB_DEBUG +# include +# endif + +# define SEPARATOR(i, last, width) \ + ((i) == (last)? "\n};\n\n" : \ + ((i) % (width) == (width)-1 ? ",\n" : ", ")) + +void gen_trees_header() { - if (static_dtree[0].Len == 0) { - ct_static_init(); /* To do: at compile time */ + FILE *header = fopen("trees.h", "w"); + int i; + + Assert (header != NULL, "Can't open trees.h"); + fprintf(header, + "/* header created automatically with -DGEN_TREES_H */\n\n"); + + fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); + for (i = 0; i < L_CODES+2; i++) { + fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, + static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); } - s->compressed_len = 0L; + fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, + static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); + } + + fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n"); + for (i = 0; i < DIST_CODE_LEN; i++) { + fprintf(header, "%2u%s", _dist_code[i], + SEPARATOR(i, DIST_CODE_LEN-1, 20)); + } + + fprintf(header, + "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); + for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { + fprintf(header, "%2u%s", _length_code[i], + SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); + } + + fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); + for (i = 0; i < LENGTH_CODES; i++) { + fprintf(header, "%1u%s", base_length[i], + SEPARATOR(i, LENGTH_CODES-1, 20)); + } + + fprintf(header, "local const int base_dist[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "%5u%s", base_dist[i], + SEPARATOR(i, D_CODES-1, 10)); + } + + fclose(header); +} +#endif /* GEN_TREES_H */ + +/* =========================================================================== + * Initialize the tree data structures for a new zlib stream. + */ +void ZLIB_INTERNAL _tr_init(s) + deflate_state *s; +{ + tr_static_init(); s->l_desc.dyn_tree = s->dyn_ltree; s->l_desc.stat_desc = &static_l_desc; @@ -259,7 +392,8 @@ void ct_init(s) s->bi_buf = 0; s->bi_valid = 0; -#ifdef DEBUG +#ifdef ZLIB_DEBUG + s->compressed_len = 0L; s->bits_sent = 0L; #endif @@ -282,7 +416,7 @@ local void init_block(s) s->dyn_ltree[END_BLOCK].Freq = 1; s->opt_len = s->static_len = 0L; - s->last_lit = s->matches = 0; + s->sym_next = s->matches = 0; } #define SMALLEST 1 @@ -324,9 +458,9 @@ local void pqdownheap(s, tree, k) while (j <= s->heap_len) { /* Set j to the smallest of the two sons: */ if (j < s->heap_len && - smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { - j++; - } + smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { + j++; + } /* Exit if v is smaller than both sons */ if (smaller(tree, v, s->heap[j], s->depth)) break; @@ -353,12 +487,12 @@ local void gen_bitlen(s, desc) deflate_state *s; tree_desc *desc; /* the tree descriptor */ { - ct_data *tree = desc->dyn_tree; - int max_code = desc->max_code; - ct_data *stree = desc->stat_desc->static_tree; - int *extra = desc->stat_desc->extra_bits; - int base = desc->stat_desc->extra_base; - int max_length = desc->stat_desc->max_length; + ct_data *tree = desc->dyn_tree; + int max_code = desc->max_code; + const ct_data *stree = desc->stat_desc->static_tree; + const intf *extra = desc->stat_desc->extra_bits; + int base = desc->stat_desc->extra_base; + int max_length = desc->stat_desc->max_length; int h; /* heap index */ int n, m; /* iterate over the tree elements */ int bits; /* bit length */ @@ -386,12 +520,12 @@ local void gen_bitlen(s, desc) xbits = 0; if (n >= base) xbits = extra[n-base]; f = tree[n].Freq; - s->opt_len += (ulg)f * (bits + xbits); - if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits); + s->opt_len += (ulg)f * (unsigned)(bits + xbits); + if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits); } if (overflow == 0) return; - Trace((stderr,"\nbit length overflow\n")); + Tracev((stderr,"\nbit length overflow\n")); /* This happens for example on obj2 and pic of the Calgary corpus */ /* Find the first bit length which could increase: */ @@ -417,10 +551,9 @@ local void gen_bitlen(s, desc) while (n != 0) { m = s->heap[--h]; if (m > max_code) continue; - if (tree[m].Len != (unsigned) bits) { - Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); - s->opt_len += ((long)bits - (long)tree[m].Len) - *(long)tree[m].Freq; + if ((unsigned) tree[m].Len != (unsigned) bits) { + Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq; tree[m].Len = (ush)bits; } n--; @@ -439,10 +572,10 @@ local void gen_bitlen(s, desc) local void gen_codes (tree, max_code, bl_count) ct_data *tree; /* the tree to decorate */ int max_code; /* largest code with non zero frequency */ - ush bl_count[]; /* number of codes at each bit length */ + ushf *bl_count; /* number of codes at each bit length */ { ush next_code[MAX_BITS+1]; /* next code value for each bit length */ - ush code = 0; /* running code value */ + unsigned code = 0; /* running code value */ int bits; /* bit index */ int n; /* code index */ @@ -450,7 +583,8 @@ local void gen_codes (tree, max_code, bl_count) * without bit reversal. */ for (bits = 1; bits <= MAX_BITS; bits++) { - next_code[bits] = code = (code + bl_count[bits-1]) << 1; + code = (code + bl_count[bits-1]) << 1; + next_code[bits] = (ush)code; } /* Check that the bit counts in bl_count are consistent. The last code * must be all ones. @@ -463,9 +597,9 @@ local void gen_codes (tree, max_code, bl_count) int len = tree[n].Len; if (len == 0) continue; /* Now reverse the bits */ - tree[n].Code = bi_reverse(next_code[len]++, len); + tree[n].Code = (ush)bi_reverse(next_code[len]++, len); - Tracec(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ", + Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ", n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1)); } } @@ -482,13 +616,12 @@ local void build_tree(s, desc) deflate_state *s; tree_desc *desc; /* the tree descriptor */ { - ct_data *tree = desc->dyn_tree; - ct_data *stree = desc->stat_desc->static_tree; - int elems = desc->stat_desc->elems; + ct_data *tree = desc->dyn_tree; + const ct_data *stree = desc->stat_desc->static_tree; + int elems = desc->stat_desc->elems; int n, m; /* iterate over heap elements */ int max_code = -1; /* largest code with non zero frequency */ - int node = elems; /* next internal node of the tree */ - int new; /* new node being created */ + int node; /* new node being created */ /* Construct the initial heap, with least frequent element in * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. @@ -511,11 +644,11 @@ local void build_tree(s, desc) * two codes of non zero frequency. */ while (s->heap_len < 2) { - new = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); - tree[new].Freq = 1; - s->depth[new] = 0; - s->opt_len--; if (stree) s->static_len -= stree[new].Len; - /* new is 0 or 1 so it does not have extra bits */ + node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); + tree[node].Freq = 1; + s->depth[node] = 0; + s->opt_len--; if (stree) s->static_len -= stree[node].Len; + /* node is 0 or 1 so it does not have extra bits */ } desc->max_code = max_code; @@ -527,6 +660,7 @@ local void build_tree(s, desc) /* Construct the Huffman tree by repeatedly combining the least two * frequent nodes. */ + node = elems; /* next internal node of the tree */ do { pqremove(s, tree, n); /* n = node of least frequency */ m = s->heap[SMALLEST]; /* m = node of next least frequency */ @@ -536,7 +670,8 @@ local void build_tree(s, desc) /* Create a new node father of n and m */ tree[node].Freq = tree[n].Freq + tree[m].Freq; - s->depth[node] = (uch) (MAX(s->depth[n], s->depth[m]) + 1); + s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ? + s->depth[n] : s->depth[m]) + 1); tree[n].Dad = tree[m].Dad = (ush)node; #ifdef DUMP_BL_TREE if (tree == s->bl_tree) { @@ -684,9 +819,9 @@ local int build_bl_tree(s) if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; } /* Update opt_len to include the bit length tree and counts */ - s->opt_len += 3*(max_blindex+1) + 5+5+4; + s->opt_len += 3*((ulg)max_blindex+1) + 5+5+4; Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", - s->opt_len, s->static_len)); + s->opt_len, s->static_len)); return max_blindex; } @@ -725,81 +860,102 @@ local void send_all_trees(s, lcodes, dcodes, blcodes) /* =========================================================================== * Send a stored block */ -void ct_stored_block(s, buf, stored_len, eof) +void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last) deflate_state *s; - char *buf; /* input block */ + charf *buf; /* input block */ ulg stored_len; /* length of input block */ - int eof; /* true if this is the last block for a file */ + int last; /* one if this is the last block for a file */ { - send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */ - s->compressed_len = (s->compressed_len + 3 + 7) & ~7L; + send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */ + bi_windup(s); /* align on byte boundary */ + put_short(s, (ush)stored_len); + put_short(s, (ush)~stored_len); + if (stored_len) + zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len); + s->pending += stored_len; +#ifdef ZLIB_DEBUG + s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; s->compressed_len += (stored_len + 4) << 3; + s->bits_sent += 2*16; + s->bits_sent += stored_len<<3; +#endif +} - copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ +/* =========================================================================== + * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) + */ +void ZLIB_INTERNAL _tr_flush_bits(s) + deflate_state *s; +{ + bi_flush(s); +} + +/* =========================================================================== + * Send one empty static block to give enough lookahead for inflate. + * This takes 10 bits, of which 7 may remain in the bit buffer. + */ +void ZLIB_INTERNAL _tr_align(s) + deflate_state *s; +{ + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef ZLIB_DEBUG + s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ +#endif + bi_flush(s); } /* =========================================================================== * Determine the best encoding for the current block: dynamic trees, static - * trees or store, and output the encoded block to the zip file. This function - * returns the total compressed length for the file so far. + * trees or store, and write out the encoded block. */ -ulg ct_flush_block(s, buf, stored_len, eof) +void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) deflate_state *s; - char *buf; /* input block, or NULL if too old */ + charf *buf; /* input block, or NULL if too old */ ulg stored_len; /* length of input block */ - int eof; /* true if this is the last block for a file */ + int last; /* one if this is the last block for a file */ { ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ - int max_blindex; /* index of last bit length code of non zero freq */ + int max_blindex = 0; /* index of last bit length code of non zero freq */ - /* Check if the file is ascii or binary */ - if (s->data_type == UNKNOWN) set_data_type(s); + /* Build the Huffman trees unless a stored block is forced */ + if (s->level > 0) { - /* Construct the literal and distance trees */ - build_tree(s, (tree_desc *)(&(s->l_desc))); - Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, - s->static_len)); + /* Check if the file is binary or text */ + if (s->strm->data_type == Z_UNKNOWN) + s->strm->data_type = detect_data_type(s); - build_tree(s, (tree_desc *)(&(s->d_desc))); - Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, - s->static_len)); - /* At this point, opt_len and static_len are the total bit lengths of - * the compressed block data, excluding the tree representations. - */ + /* Construct the literal and distance trees */ + build_tree(s, (tree_desc *)(&(s->l_desc))); + Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); - /* Build the bit length tree for the above two trees, and get the index - * in bl_order of the last bit length code to send. - */ - max_blindex = build_bl_tree(s); + build_tree(s, (tree_desc *)(&(s->d_desc))); + Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ - /* Determine the best encoding. Compute first the block length in bytes */ - opt_lenb = (s->opt_len+3+7)>>3; - static_lenb = (s->static_len+3+7)>>3; + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(s); - Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", - opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, - s->last_lit)); + /* Determine the best encoding. Compute the block lengths in bytes. */ + opt_lenb = (s->opt_len+3+7)>>3; + static_lenb = (s->static_len+3+7)>>3; - if (static_lenb <= opt_lenb) opt_lenb = static_lenb; + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, + s->sym_next / 3)); - /* If compression failed and this is the first and last block, - * and if the .zip file can be seeked (to rewrite the local header), - * the whole file is transformed into a stored file: - */ -#ifdef STORED_FILE_OK -# ifdef FORCE_STORED_FILE - if (eof && compressed_len == 0L) { /* force stored file */ -# else - if (stored_len <= opt_lenb && eof && s->compressed_len==0L && seekable()) { -# endif - /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */ - if (buf == (char*)0) error ("block vanished"); + if (static_lenb <= opt_lenb) opt_lenb = static_lenb; - copy_block(buf, (unsigned)stored_len, 0); /* without header */ - s->compressed_len = stored_len << 3; - s->method = STORED; - } else -#endif /* STORED_FILE_OK */ + } else { + Assert(buf != (char*)0, "lost buf"); + opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ + } #ifdef FORCE_STORED if (buf != (char*)0) { /* force stored block */ @@ -813,83 +969,72 @@ ulg ct_flush_block(s, buf, stored_len, eof) * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to * transform a block into a stored block. */ - ct_stored_block(s, buf, stored_len, eof); + _tr_stored_block(s, buf, stored_len, last); #ifdef FORCE_STATIC } else if (static_lenb >= 0) { /* force static trees */ #else - } else if (static_lenb == opt_lenb) { + } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { #endif - send_bits(s, (STATIC_TREES<<1)+eof, 3); - compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree); + send_bits(s, (STATIC_TREES<<1)+last, 3); + compress_block(s, (const ct_data *)static_ltree, + (const ct_data *)static_dtree); +#ifdef ZLIB_DEBUG s->compressed_len += 3 + s->static_len; +#endif } else { - send_bits(s, (DYN_TREES<<1)+eof, 3); + send_bits(s, (DYN_TREES<<1)+last, 3); send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, - max_blindex+1); - compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree); + max_blindex+1); + compress_block(s, (const ct_data *)s->dyn_ltree, + (const ct_data *)s->dyn_dtree); +#ifdef ZLIB_DEBUG s->compressed_len += 3 + s->opt_len; +#endif } Assert (s->compressed_len == s->bits_sent, "bad compressed size"); + /* The above check is made mod 2^32, for files larger than 512 MB + * and uLong implemented on 32 bits. + */ init_block(s); - if (eof) { + if (last) { bi_windup(s); +#ifdef ZLIB_DEBUG s->compressed_len += 7; /* align on byte boundary */ +#endif } Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, - s->compressed_len-7*eof)); - - return s->compressed_len >> 3; + s->compressed_len-7*last)); } /* =========================================================================== * Save the match info and tally the frequency counts. Return true if * the current block must be flushed. */ -int ct_tally (s, dist, lc) +int ZLIB_INTERNAL _tr_tally (s, dist, lc) deflate_state *s; - int dist; /* distance of matched string */ - int lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ + unsigned dist; /* distance of matched string */ + unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ { - s->d_buf[s->last_lit] = (ush)dist; - s->l_buf[s->last_lit++] = (uch)lc; + s->sym_buf[s->sym_next++] = dist; + s->sym_buf[s->sym_next++] = dist >> 8; + s->sym_buf[s->sym_next++] = lc; if (dist == 0) { /* lc is the unmatched char */ s->dyn_ltree[lc].Freq++; } else { - s->matches++; + s->matches++; /* Here, lc is the match length - MIN_MATCH */ dist--; /* dist = match distance - 1 */ Assert((ush)dist < (ush)MAX_DIST(s) && (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && - (ush)d_code(dist) < (ush)D_CODES, "ct_tally: bad match"); + (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); - s->dyn_ltree[length_code[lc]+LITERALS+1].Freq++; + s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; s->dyn_dtree[d_code(dist)].Freq++; } - - /* Try to guess if it is profitable to stop the current block here */ - if (s->level > 2 && (s->last_lit & 0xfff) == 0) { - /* Compute an upper bound for the compressed length */ - ulg out_length = (ulg)s->last_lit*8L; - ulg in_length = (ulg)s->strstart - s->block_start; - int dcode; - for (dcode = 0; dcode < D_CODES; dcode++) { - out_length += (ulg)s->dyn_dtree[dcode].Freq * - (5L+extra_dbits[dcode]); - } - out_length >>= 3; - Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", - s->last_lit, in_length, out_length, - 100L - out_length*100L/in_length)); - if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; - } - return (s->last_lit == s->lit_bufsize-1); - /* We avoid equality with lit_bufsize because of wraparound at 64K - * on 16 bit machines and because stored blocks are restricted to - * 64K-1 bytes. - */ + return (s->sym_next == s->sym_end); } /* =========================================================================== @@ -897,104 +1042,91 @@ int ct_tally (s, dist, lc) */ local void compress_block(s, ltree, dtree) deflate_state *s; - ct_data *ltree; /* literal tree */ - ct_data *dtree; /* distance tree */ + const ct_data *ltree; /* literal tree */ + const ct_data *dtree; /* distance tree */ { unsigned dist; /* distance of matched string */ int lc; /* match length or unmatched char (if dist == 0) */ - unsigned lx = 0; /* running index in l_buf */ + unsigned sx = 0; /* running index in sym_buf */ unsigned code; /* the code to send */ int extra; /* number of extra bits to send */ - if (s->last_lit != 0) do { - dist = s->d_buf[lx]; - lc = s->l_buf[lx++]; + if (s->sym_next != 0) do { + dist = s->sym_buf[sx++] & 0xff; + dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; + lc = s->sym_buf[sx++]; if (dist == 0) { send_code(s, lc, ltree); /* send a literal byte */ Tracecv(isgraph(lc), (stderr," '%c' ", lc)); } else { /* Here, lc is the match length - MIN_MATCH */ - code = length_code[lc]; + code = _length_code[lc]; send_code(s, code+LITERALS+1, ltree); /* send the length code */ extra = extra_lbits[code]; if (extra != 0) { lc -= base_length[code]; send_bits(s, lc, extra); /* send the extra length bits */ } - dist--; /* dist is now the match distance - 1 */ + dist--; /* dist is now the match distance - 1 */ code = d_code(dist); Assert (code < D_CODES, "bad d_code"); send_code(s, code, dtree); /* send the distance code */ extra = extra_dbits[code]; if (extra != 0) { - dist -= base_dist[code]; + dist -= (unsigned)base_dist[code]; send_bits(s, dist, extra); /* send the extra distance bits */ } } /* literal or match pair ? */ - /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ - Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow"); + /* Check that the overlay between pending_buf and sym_buf is ok: */ + Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); - } while (lx < s->last_lit); + } while (sx < s->sym_next); send_code(s, END_BLOCK, ltree); } /* =========================================================================== - * Set the data type to ASCII or BINARY, using a crude approximation: - * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise. - * IN assertion: the fields freq of dyn_ltree are set and the total of all - * frequencies does not exceed 64K (to fit in an int on 16 bit machines). + * Check if the data type is TEXT or BINARY, using the following algorithm: + * - TEXT if the two conditions below are satisfied: + * a) There are no non-portable control characters belonging to the + * "block list" (0..6, 14..25, 28..31). + * b) There is at least one printable character belonging to the + * "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). + * - BINARY otherwise. + * - The following partially-portable control characters form a + * "gray list" that is ignored in this detection algorithm: + * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). + * IN assertion: the fields Freq of dyn_ltree are set. */ -local void set_data_type(s) +local int detect_data_type(s) deflate_state *s; { - int n = 0; - unsigned ascii_freq = 0; - unsigned bin_freq = 0; - while (n < 7) bin_freq += s->dyn_ltree[n++].Freq; - while (n < 128) ascii_freq += s->dyn_ltree[n++].Freq; - while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq; - s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? BINARY : ASCII); -} - -/* =========================================================================== - * Output a short LSB first on the stream. - * IN assertion: there is enough room in pendingBuf. - */ -#define put_short(s, w) { \ - put_byte(s, (uch)((w) & 0xff)); \ - put_byte(s, (uch)((ush)(w) >> 8)); \ -} - -/* =========================================================================== - * Send a value on a given number of bits. - * IN assertion: length <= 16 and value fits in length bits. - */ -local void send_bits(s, value, length) - deflate_state *s; - int value; /* value to send */ - int length; /* number of bits */ -{ -#ifdef DEBUG - Tracev((stderr," l %2d v %4x ", length, value)); - Assert(length > 0 && length <= 15, "invalid length"); - s->bits_sent += (ulg)length; -#endif - /* If not enough room in bi_buf, use (valid) bits from bi_buf and - * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) - * unused bits in value. + /* block_mask is the bit mask of block-listed bytes + * set bits 0..6, 14..25, and 28..31 + * 0xf3ffc07f = binary 11110011111111111100000001111111 */ - if (s->bi_valid > (int)Buf_size - length) { - s->bi_buf |= (value << s->bi_valid); - put_short(s, s->bi_buf); - s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); - s->bi_valid += length - Buf_size; - } else { - s->bi_buf |= value << s->bi_valid; - s->bi_valid += length; - } + unsigned long block_mask = 0xf3ffc07fUL; + int n; + + /* Check for non-textual ("block-listed") bytes. */ + for (n = 0; n <= 31; n++, block_mask >>= 1) + if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0)) + return Z_BINARY; + + /* Check for textual ("allow-listed") bytes. */ + if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 + || s->dyn_ltree[13].Freq != 0) + return Z_TEXT; + for (n = 32; n < LITERALS; n++) + if (s->dyn_ltree[n].Freq != 0) + return Z_TEXT; + + /* There are no "block-listed" or "allow-listed" bytes: + * this stream either is empty or has tolerated ("gray-listed") bytes only. + */ + return Z_BINARY; } /* =========================================================================== @@ -1015,46 +1147,36 @@ local unsigned bi_reverse(code, len) } /* =========================================================================== - * Write out any remaining bits in an incomplete byte. + * Flush the bit buffer, keeping at most 7 bits in it. */ -local void bi_windup(s) +local void bi_flush(s) deflate_state *s; { - if (s->bi_valid > 8) { + if (s->bi_valid == 16) { put_short(s, s->bi_buf); - } else if (s->bi_valid > 0) { + s->bi_buf = 0; + s->bi_valid = 0; + } else if (s->bi_valid >= 8) { put_byte(s, (Byte)s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; } - s->bi_buf = 0; - s->bi_valid = 0; -#ifdef DEBUG - s->bits_sent = (s->bits_sent+7) & ~7; -#endif } /* =========================================================================== - * Copy a stored block, storing first the length and its - * one's complement if requested. + * Flush the bit buffer and align the output on a byte boundary */ -local void copy_block(s, buf, len, header) +local void bi_windup(s) deflate_state *s; - char *buf; /* the input data */ - unsigned len; /* its length */ - int header; /* true if block header must be written */ { - bi_windup(s); /* align on byte boundary */ - - if (header) { - put_short(s, (ush)len); - put_short(s, (ush)~len); -#ifdef DEBUG - s->bits_sent += 2*16; -#endif + if (s->bi_valid > 8) { + put_short(s, s->bi_buf); + } else if (s->bi_valid > 0) { + put_byte(s, (Byte)s->bi_buf); } -#ifdef DEBUG - s->bits_sent += (ulg)len<<3; + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef ZLIB_DEBUG + s->bits_sent = (s->bits_sent+7) & ~7; #endif - while (len--) { - put_byte(s, *buf++); - } }