]> git.lizzy.rs Git - plan9front.git/blob - sys/src/libflate/deflate.c
pc: add vmx device
[plan9front.git] / sys / src / libflate / deflate.c
1 #include <u.h>
2 #include <libc.h>
3 #include <flate.h>
4
5 typedef struct Chain    Chain;
6 typedef struct Chains   Chains;
7 typedef struct Dyncode  Dyncode;
8 typedef struct Huff     Huff;
9 typedef struct LZblock  LZblock;
10 typedef struct LZstate  LZstate;
11
12 enum
13 {
14         /*
15          * deflate format paramaters
16          */
17         DeflateUnc      = 0,                    /* uncompressed block */
18         DeflateFix      = 1,                    /* fixed huffman codes */
19         DeflateDyn      = 2,                    /* dynamic huffman codes */
20
21         DeflateEob      = 256,                  /* end of block code in lit/len book */
22         DeflateMaxBlock = 64*1024-1,            /* maximum size of uncompressed block */
23
24         DeflateMaxExp   = 10,                   /* maximum expansion for a block */
25
26         LenStart        = 257,                  /* start of length codes in litlen */
27         Nlitlen         = 288,                  /* number of litlen codes */
28         Noff            = 30,                   /* number of offset codes */
29         Nclen           = 19,                   /* number of codelen codes */
30
31         MaxOff          = 32*1024,
32         MinMatch        = 3,                    /* shortest match possible */
33         MaxMatch        = 258,                  /* longest match possible */
34
35         /*
36          * huffman code paramaters
37          */
38         MaxLeaf         = Nlitlen,
39         MaxHuffBits     = 16,                   /* max bits in a huffman code */
40         ChainMem        = 2 * (MaxHuffBits - 1) * MaxHuffBits,
41
42         /*
43          * coding of the lz parse
44          */
45         LenFlag         = 1 << 3,
46         LenShift        = 4,                    /* leaves enough space for MinMatchMaxOff */
47         MaxLitRun       = LenFlag - 1,
48
49         /*
50          * internal lz paramaters
51          */
52         DeflateOut      = 4096,                 /* output buffer size */
53         BlockSize       = 8192,                 /* attempted input read quanta */
54         DeflateBlock    = DeflateMaxBlock & ~(BlockSize - 1),
55         MinMatchMaxOff  = 4096,                 /* max profitable offset for small match;
56                                                  * assumes 8 bits for len, 5+10 for offset
57                                                  * DONT CHANGE WITHOUT CHANGING LZPARSE CONSTANTS
58                                                  */
59         HistSlop        = 512,                  /* must be at lead MaxMatch */
60         HistBlock       = 64*1024,
61         HistSize        = HistBlock + HistSlop,
62
63         HashLog         = 13,
64         HashSize        = 1<<HashLog,
65
66         MaxOffCode      = 256,                  /* biggest offset looked up in direct table */
67
68         EstLitBits      = 8,
69         EstLenBits      = 4,
70         EstOffBits      = 5,
71 };
72
73 /*
74  * knuth vol. 3 multiplicative hashing
75  * each byte x chosen according to rules
76  * 1/4 < x < 3/10, 1/3 x < < 3/7, 4/7 < x < 2/3, 7/10 < x < 3/4
77  * with reasonable spread between the bytes & their complements
78  *
79  * the 3 byte value appears to be as almost good as the 4 byte value,
80  * and might be faster on some machines
81  */
82 /*
83 #define hashit(c)       (((ulong)(c) * 0x6b43a9) >> (24 - HashLog))
84 */
85 #define hashit(c)       ((((ulong)(c) & 0xffffff) * 0x6b43a9b5) >> (32 - HashLog))
86
87 /*
88  * lempel-ziv style compression state
89  */
90 struct LZstate
91 {
92         uchar   hist[HistSize];
93         ulong   pos;                            /* current location in history buffer */
94         ulong   avail;                          /* data available after pos */
95         int     eof;
96         ushort  hash[HashSize];                 /* hash chains */
97         ushort  nexts[MaxOff];
98         int     now;                            /* pos in hash chains */
99         int     dot;                            /* dawn of time in history */
100         int     prevlen;                        /* lazy matching state */
101         int     prevoff;
102         int     maxcheck;                       /* compressor tuning */
103
104         uchar   obuf[DeflateOut];
105         uchar   *out;                           /* current position in the output buffer */
106         uchar   *eout;
107         ulong   bits;                           /* bit shift register */
108         int     nbits;
109         int     rbad;                           /* got an error reading the buffer */
110         int     wbad;                           /* got an error writing the buffer */
111         int     (*w)(void*, void*, int);
112         void    *wr;
113
114         ulong   totr;                           /* total input size */
115         ulong   totw;                           /* total output size */
116         int     debug;
117 };
118
119 struct LZblock
120 {
121         ushort  parse[DeflateMaxBlock / 2 + 1];
122         int     lastv;                          /* value being constucted for parse */
123         ulong   litlencount[Nlitlen];
124         ulong   offcount[Noff];
125         ushort  *eparse;                        /* limit for parse table */
126         int     bytes;                          /* consumed from the input */
127         int     excost;                         /* cost of encoding extra len & off bits */
128 };
129
130 /*
131  * huffman code table
132  */
133 struct Huff
134 {
135         short   bits;                           /* length of the code */
136         ushort  encode;                         /* the code */
137 };
138
139 /*
140  * encoding of dynamic huffman trees
141  */
142 struct Dyncode
143 {
144         int     nlit;
145         int     noff;
146         int     nclen;
147         int     ncode;
148         Huff    codetab[Nclen];
149         uchar   codes[Nlitlen+Noff];
150         uchar   codeaux[Nlitlen+Noff];
151 };
152
153 static  int     deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int));
154 static  int     lzcomp(LZstate*, LZblock*, uchar*, ushort*, int finish);
155 static  void    wrblock(LZstate*, int, ushort*, ushort*, Huff*, Huff*);
156 static  int     bitcost(Huff*, ulong*, int);
157 static  int     huffcodes(Dyncode*, Huff*, Huff*);
158 static  void    wrdyncode(LZstate*, Dyncode*);
159 static  void    lzput(LZstate*, ulong bits, int nbits);
160 static  void    lzflushbits(LZstate*);
161 static  void    lzflush(LZstate *lz);
162 static  void    lzwrite(LZstate *lz, void *buf, int n);
163
164 static  int     hufftabinit(Huff*, int, ulong*, int);
165 static  int     mkgzprecode(Huff*, ulong *, int, int);
166
167 static  int     mkprecode(Huff*, ulong *, int, int, ulong*);
168 static  void    nextchain(Chains*, int);
169 static  void    leafsort(ulong*, ushort*, int, int);
170
171 /* conversion from len to code word */
172 static int lencode[MaxMatch];
173
174 /*
175  * conversion from off to code word
176  * off <= MaxOffCode ? offcode[off] : bigoffcode[off >> 7]
177 */
178 static int offcode[MaxOffCode];
179 static int bigoffcode[256];
180
181 /* litlen code words LenStart-285 extra bits */
182 static int litlenbase[Nlitlen-LenStart];
183 static int litlenextra[Nlitlen-LenStart] =
184 {
185 /* 257 */       0, 0, 0,
186 /* 260 */       0, 0, 0, 0, 0, 1, 1, 1, 1, 2,
187 /* 270 */       2, 2, 2, 3, 3, 3, 3, 4, 4, 4,
188 /* 280 */       4, 5, 5, 5, 5, 0, 0, 0
189 };
190
191 /* offset code word extra bits */
192 static int offbase[Noff];
193 static int offextra[] =
194 {
195         0,  0,  0,  0,  1,  1,  2,  2,  3,  3,
196         4,  4,  5,  5,  6,  6,  7,  7,  8,  8,
197         9,  9,  10, 10, 11, 11, 12, 12, 13, 13,
198         0,  0,
199 };
200
201 /* order code lengths */
202 static int clenorder[Nclen] =
203 {
204         16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
205 };
206
207 /* static huffman tables */
208 static  Huff    litlentab[Nlitlen];
209 static  Huff    offtab[Noff];
210 static  Huff    hofftab[Noff];
211
212 /* bit reversal for brain dead endian swap in huffman codes */
213 static  uchar   revtab[256];
214 static  ulong   nlits;
215 static  ulong   nmatches;
216
217 int
218 deflateinit(void)
219 {
220         ulong bitcount[MaxHuffBits];
221         int i, j, ci, n;
222
223         /* byte reverse table */
224         for(i=0; i<256; i++)
225                 for(j=0; j<8; j++)
226                         if(i & (1<<j))
227                                 revtab[i] |= 0x80 >> j;
228
229         /* static Litlen bit lengths */
230         for(i=0; i<144; i++)
231                 litlentab[i].bits = 8;
232         for(i=144; i<256; i++)
233                 litlentab[i].bits = 9;
234         for(i=256; i<280; i++)
235                 litlentab[i].bits = 7;
236         for(i=280; i<Nlitlen; i++)
237                 litlentab[i].bits = 8;
238
239         memset(bitcount, 0, sizeof(bitcount));
240         bitcount[8] += 144 - 0;
241         bitcount[9] += 256 - 144;
242         bitcount[7] += 280 - 256;
243         bitcount[8] += Nlitlen - 280;
244
245         if(!hufftabinit(litlentab, Nlitlen, bitcount, 9))
246                 return FlateInternal;
247
248         /* static offset bit lengths */
249         for(i = 0; i < Noff; i++)
250                 offtab[i].bits = 5;
251
252         memset(bitcount, 0, sizeof(bitcount));
253         bitcount[5] = Noff;
254
255         if(!hufftabinit(offtab, Noff, bitcount, 5))
256                 return FlateInternal;
257
258         bitcount[0] = 0;
259         bitcount[1] = 0;
260         if(!mkgzprecode(hofftab, bitcount, 2, MaxHuffBits))
261                 return FlateInternal;
262
263         /* conversion tables for lens & offs to codes */
264         ci = 0;
265         for(i = LenStart; i < 286; i++){
266                 n = ci + (1 << litlenextra[i - LenStart]);
267                 litlenbase[i - LenStart] = ci;
268                 for(; ci < n; ci++)
269                         lencode[ci] = i;
270         }
271         /* patch up special case for len MaxMatch */
272         lencode[MaxMatch-MinMatch] = 285;
273         litlenbase[285-LenStart] = MaxMatch-MinMatch;
274
275         ci = 0;
276         for(i = 0; i < 16; i++){
277                 n = ci + (1 << offextra[i]);
278                 offbase[i] = ci;
279                 for(; ci < n; ci++)
280                         offcode[ci] = i;
281         }
282
283         ci = ci >> 7;
284         for(; i < 30; i++){
285                 n = ci + (1 << (offextra[i] - 7));
286                 offbase[i] = ci << 7;
287                 for(; ci < n; ci++)
288                         bigoffcode[ci] = i;
289         }
290         return FlateOk;
291 }
292
293 static void
294 deflatereset(LZstate *lz, int level, int debug)
295 {
296         memset(lz->nexts, 0, sizeof lz->nexts);
297         memset(lz->hash, 0, sizeof lz->hash);
298         lz->totr = 0;
299         lz->totw = 0;
300         lz->pos = 0;
301         lz->avail = 0;
302         lz->out = lz->obuf;
303         lz->eout = &lz->obuf[DeflateOut];
304         lz->prevlen = MinMatch - 1;
305         lz->prevoff = 0;
306         lz->now = MaxOff + 1;
307         lz->dot = lz->now;
308         lz->bits = 0;
309         lz->nbits = 0;
310         lz->maxcheck = (1 << level);
311         lz->maxcheck -= lz->maxcheck >> 2;
312         if(lz->maxcheck < 2)
313                 lz->maxcheck = 2;
314         else if(lz->maxcheck > 1024)
315                 lz->maxcheck = 1024;
316
317         lz->debug = debug;
318 }
319
320 int
321 deflate(void *wr, int (*w)(void*, void*, int), void *rr, int (*r)(void*, void*, int), int level, int debug)
322 {
323         LZstate *lz;
324         LZblock *lzb;
325         int ok;
326
327         lz = malloc(sizeof *lz + sizeof *lzb);
328         if(lz == nil)
329                 return FlateNoMem;
330         lzb = (LZblock*)&lz[1];
331
332         deflatereset(lz, level, debug);
333         lz->w = w;
334         lz->wr = wr;
335         lz->wbad = 0;
336         lz->rbad = 0;
337         lz->eof = 0;
338         ok = FlateOk;
339         while(!lz->eof || lz->avail){
340                 ok = deflateb(lz, lzb, rr, r);
341                 if(ok != FlateOk)
342                         break;
343         }
344         if(ok == FlateOk && lz->rbad)
345                 ok = FlateInputFail;
346         if(ok == FlateOk && lz->wbad)
347                 ok = FlateOutputFail;
348         free(lz);
349         return ok;
350 }
351
352 static int
353 deflateb(LZstate *lz, LZblock *lzb, void *rr, int (*r)(void*, void*, int))
354 {
355         Dyncode dyncode, hdyncode;
356         Huff dlitlentab[Nlitlen], dofftab[Noff], hlitlentab[Nlitlen];
357         ulong litcount[Nlitlen];
358         long nunc, ndyn, nfix, nhuff;
359         uchar *slop, *hslop;
360         ulong ep;
361         int i, n, m, mm, nslop;
362
363         memset(lzb->litlencount, 0, sizeof lzb->litlencount);
364         memset(lzb->offcount, 0, sizeof lzb->offcount);
365         lzb->litlencount[DeflateEob]++;
366
367         lzb->bytes = 0;
368         lzb->eparse = lzb->parse;
369         lzb->lastv = 0;
370         lzb->excost = 0;
371
372         slop = &lz->hist[lz->pos];
373         n = lz->avail;
374         while(n < DeflateBlock && (!lz->eof || lz->avail)){
375                 /*
376                  * fill the buffer as much as possible,
377                  * while leaving room for MaxOff history behind lz->pos,
378                  * and not reading more than we can handle.
379                  *
380                  * make sure we read at least HistSlop bytes.
381                  */
382                 if(!lz->eof){
383                         ep = lz->pos + lz->avail;
384                         if(ep >= HistBlock)
385                                 ep -= HistBlock;
386                         m = HistBlock - MaxOff - lz->avail;
387                         if(m > HistBlock - n)
388                                 m = HistBlock - n;
389                         if(m > (HistBlock + HistSlop) - ep)
390                                 m = (HistBlock + HistSlop) - ep;
391                         if(m & ~(BlockSize - 1))
392                                 m &= ~(BlockSize - 1);
393
394                         /*
395                          * be nice to the caller: stop reads that are too small.
396                          * can only get here when we've already filled the buffer some
397                          */
398                         if(m < HistSlop){
399                                 if(!m || !lzb->bytes)
400                                         return FlateInternal;
401                                 break;
402                         }
403
404                         mm = (*r)(rr, &lz->hist[ep], m);
405                         if(mm > 0){
406                                 /*
407                                  * wrap data to end if we're read it from the beginning
408                                  * this way, we don't have to wrap searches.
409                                  *
410                                  * wrap reads past the end to the beginning.
411                                  * this way, we can guarantee minimum size reads.
412                                  */
413                                 if(ep < HistSlop)
414                                         memmove(&lz->hist[ep + HistBlock], &lz->hist[ep], HistSlop - ep);
415                                 else if(ep + mm > HistBlock)
416                                         memmove(&lz->hist[0], &lz->hist[HistBlock], ep + mm - HistBlock);
417
418                                 lz->totr += mm;
419                                 n += mm;
420                                 lz->avail += mm;
421                         }else{
422                                 if(mm < 0)
423                                         lz->rbad = 1;
424                                 lz->eof = 1;
425                         }
426                 }
427                 ep = lz->pos + lz->avail;
428                 if(ep > HistSize)
429                         ep = HistSize;
430                 if(lzb->bytes + ep - lz->pos > DeflateMaxBlock)
431                         ep = lz->pos + DeflateMaxBlock - lzb->bytes;
432                 m = lzcomp(lz, lzb, &lz->hist[ep], lzb->eparse, lz->eof);
433                 lzb->bytes += m;
434                 lz->pos = (lz->pos + m) & (HistBlock - 1);
435                 lz->avail -= m;
436         }
437         if(lzb->lastv)
438                 *lzb->eparse++ = lzb->lastv;
439         if(lzb->eparse > lzb->parse + nelem(lzb->parse))
440                 return FlateInternal;
441         nunc = lzb->bytes;
442
443         if(!mkgzprecode(dlitlentab, lzb->litlencount, Nlitlen, MaxHuffBits)
444         || !mkgzprecode(dofftab, lzb->offcount, Noff, MaxHuffBits))
445                 return FlateInternal;
446
447         ndyn = huffcodes(&dyncode, dlitlentab, dofftab);
448         if(ndyn < 0)
449                 return FlateInternal;
450         ndyn += bitcost(dlitlentab, lzb->litlencount, Nlitlen)
451                 + bitcost(dofftab, lzb->offcount, Noff)
452                 + lzb->excost;
453
454         memset(litcount, 0, sizeof litcount);
455
456         nslop = nunc;
457         if(nslop > &lz->hist[HistSize] - slop)
458                 nslop = &lz->hist[HistSize] - slop;
459
460         for(i = 0; i < nslop; i++)
461                 litcount[slop[i]]++;
462         hslop = &lz->hist[HistSlop - nslop];
463         for(; i < nunc; i++)
464                 litcount[hslop[i]]++;
465         litcount[DeflateEob]++;
466
467         if(!mkgzprecode(hlitlentab, litcount, Nlitlen, MaxHuffBits))
468                 return FlateInternal;
469         nhuff = huffcodes(&hdyncode, hlitlentab, hofftab);
470         if(nhuff < 0)
471                 return FlateInternal;
472         nhuff += bitcost(hlitlentab, litcount, Nlitlen);
473
474         nfix = bitcost(litlentab, lzb->litlencount, Nlitlen)
475                 + bitcost(offtab, lzb->offcount, Noff)
476                 + lzb->excost;
477
478         lzput(lz, lz->eof && !lz->avail, 1);
479
480         if(lz->debug){
481                 fprint(2, "block: bytes=%lud entries=%zd extra bits=%d\n\tuncompressed=%lud fixed=%lud dynamic=%lud huffman=%lud\n",
482                         nunc, lzb->eparse - lzb->parse, lzb->excost, (nunc + 4) * 8, nfix, ndyn, nhuff);
483                 fprint(2, "\tnlit=%lud matches=%lud eof=%d\n", nlits, nmatches, lz->eof && !lz->avail);
484         }
485
486         if((nunc + 4) * 8 < ndyn && (nunc + 4) * 8 < nfix && (nunc + 4) * 8 < nhuff){
487                 lzput(lz, DeflateUnc, 2);
488                 lzflushbits(lz);
489
490                 lzput(lz, nunc & 0xff, 8);
491                 lzput(lz, (nunc >> 8) & 0xff, 8);
492                 lzput(lz, ~nunc & 0xff, 8);
493                 lzput(lz, (~nunc >> 8) & 0xff, 8);
494                 lzflush(lz);
495
496                 lzwrite(lz, slop, nslop);
497                 lzwrite(lz, &lz->hist[HistSlop], nunc - nslop);
498         }else if(ndyn < nfix && ndyn < nhuff){
499                 lzput(lz, DeflateDyn, 2);
500
501                 wrdyncode(lz, &dyncode);
502                 wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, dlitlentab, dofftab);
503                 lzput(lz, dlitlentab[DeflateEob].encode, dlitlentab[DeflateEob].bits);
504         }else if(nhuff < nfix){
505                 lzput(lz, DeflateDyn, 2);
506
507                 wrdyncode(lz, &hdyncode);
508
509                 m = 0;
510                 for(i = nunc; i > MaxLitRun; i -= MaxLitRun)
511                         lzb->parse[m++] = MaxLitRun;
512                 lzb->parse[m++] = i;
513
514                 wrblock(lz, slop - lz->hist, lzb->parse, lzb->parse + m, hlitlentab, hofftab);
515                 lzput(lz, hlitlentab[DeflateEob].encode, hlitlentab[DeflateEob].bits);
516         }else{
517                 lzput(lz, DeflateFix, 2);
518
519                 wrblock(lz, slop - lz->hist, lzb->parse, lzb->eparse, litlentab, offtab);
520                 lzput(lz, litlentab[DeflateEob].encode, litlentab[DeflateEob].bits);
521         }
522
523         if(lz->eof && !lz->avail){
524                 lzflushbits(lz);
525                 lzflush(lz);
526         }
527         return FlateOk;
528 }
529
530 static void
531 lzwrite(LZstate *lz, void *buf, int n)
532 {
533         int nw;
534
535         if(n && lz->w){
536                 nw = (*lz->w)(lz->wr, buf, n);
537                 if(nw != n){
538                         lz->w = nil;
539                         lz->wbad = 1;
540                 }else
541                         lz->totw += n;
542         }
543 }
544
545 static void
546 lzflush(LZstate *lz)
547 {
548         lzwrite(lz, lz->obuf, lz->out - lz->obuf);
549         lz->out = lz->obuf;
550 }
551
552 static void
553 lzput(LZstate *lz, ulong bits, int nbits)
554 {
555         bits = (bits << lz->nbits) | lz->bits;
556         for(nbits += lz->nbits; nbits >= 8; nbits -= 8){
557                 *lz->out++ = bits;
558                 if(lz->out == lz->eout)
559                         lzflush(lz);
560                 bits >>= 8;
561         }
562         lz->bits = bits;
563         lz->nbits = nbits;
564 }
565
566 static void
567 lzflushbits(LZstate *lz)
568 {
569         if(lz->nbits)
570                 lzput(lz, 0, 8 - (lz->nbits & 7));
571 }
572
573 /*
574  * write out a block of n samples,
575  * given lz encoding and counts for huffman tables
576  */
577 static void
578 wrblock(LZstate *out, int litoff, ushort *soff, ushort *eoff, Huff *litlentab, Huff *offtab)
579 {
580         ushort *off;
581         int i, run, offset, lit, len, c;
582
583         if(out->debug > 2){
584                 for(off = soff; off < eoff; ){
585                         offset = *off++;
586                         run = offset & MaxLitRun;
587                         if(run){
588                                 for(i = 0; i < run; i++){
589                                         lit = out->hist[litoff & (HistBlock - 1)];
590                                         litoff++;
591                                         fprint(2, "\tlit %.2ux %c\n", lit, lit);
592                                 }
593                                 if(!(offset & LenFlag))
594                                         continue;
595                                 len = offset >> LenShift;
596                                 offset = *off++;
597                         }else if(offset & LenFlag){
598                                 len = offset >> LenShift;
599                                 offset = *off++;
600                         }else{
601                                 len = 0;
602                                 offset >>= LenShift;
603                         }
604                         litoff += len + MinMatch;
605                         fprint(2, "\t<%d, %d>\n", offset + 1, len + MinMatch);
606                 }
607         }
608
609         for(off = soff; off < eoff; ){
610                 offset = *off++;
611                 run = offset & MaxLitRun;
612                 if(run){
613                         for(i = 0; i < run; i++){
614                                 lit = out->hist[litoff & (HistBlock - 1)];
615                                 litoff++;
616                                 lzput(out, litlentab[lit].encode, litlentab[lit].bits);
617                         }
618                         if(!(offset & LenFlag))
619                                 continue;
620                         len = offset >> LenShift;
621                         offset = *off++;
622                 }else if(offset & LenFlag){
623                         len = offset >> LenShift;
624                         offset = *off++;
625                 }else{
626                         len = 0;
627                         offset >>= LenShift;
628                 }
629                 litoff += len + MinMatch;
630                 c = lencode[len];
631                 lzput(out, litlentab[c].encode, litlentab[c].bits);
632                 c -= LenStart;
633                 if(litlenextra[c])
634                         lzput(out, len - litlenbase[c], litlenextra[c]);
635
636                 if(offset < MaxOffCode)
637                         c = offcode[offset];
638                 else
639                         c = bigoffcode[offset >> 7];
640                 lzput(out, offtab[c].encode, offtab[c].bits);
641                 if(offextra[c])
642                         lzput(out, offset - offbase[c], offextra[c]);
643         }
644 }
645
646 /*
647  * look for the longest, closest string which matches
648  * the next prefix.  the clever part here is looking for
649  * a string 1 longer than the previous best match.
650  *
651  * follows the recommendation of limiting number of chains
652  * which are checked.  this appears to be the best heuristic.
653  */
654 static int
655 lzmatch(int now, int then, uchar *p, uchar *es, ushort *nexts, uchar *hist, int runlen, int check, int *m)
656 {
657         uchar *s, *t;
658         int ml, off, last;
659
660         ml = check;
661         if(runlen >= 8)
662                 check >>= 2;
663         *m = 0;
664         if(p + runlen >= es)
665                 return runlen;
666         last = 0;
667         for(; check-- > 0; then = nexts[then & (MaxOff-1)]){
668                 off = (ushort)(now - then);
669                 if(off <= last || off > MaxOff)
670                         break;
671                 s = p + runlen;
672                 t = hist + (((p - hist) - off) & (HistBlock-1));
673                 t += runlen;
674                 for(; s >= p; s--){
675                         if(*s != *t)
676                                 goto matchloop;
677                         t--;
678                 }
679
680                 /*
681                  * we have a new best match.
682                  * extend it to it's maximum length
683                  */
684                 t += runlen + 2;
685                 s += runlen + 2;
686                 for(; s < es; s++){
687                         if(*s != *t)
688                                 break;
689                         t++;
690                 }
691                 runlen = s - p;
692                 *m = off - 1;
693                 if(s == es || runlen > ml)
694                         break;
695 matchloop:;
696                 last = off;
697         }
698         return runlen;
699 }
700
701 static int
702 lzcomp(LZstate *lz, LZblock *lzb, uchar *ep, ushort *parse, int finish)
703 {
704         ulong cont, excost, *litlencount, *offcount;
705         uchar *p, *q, *s, *es;
706         ushort *nexts, *hash;
707         int v, i, h, runlen, n, now, then, m, prevlen, prevoff, maxdefer;
708
709         litlencount = lzb->litlencount;
710         offcount = lzb->offcount;
711         nexts = lz->nexts;
712         hash = lz->hash;
713         now = lz->now;
714
715         p = &lz->hist[lz->pos];
716         if(lz->prevlen != MinMatch - 1)
717                 p++;
718
719         /*
720          * hash in the links for any hanging link positions,
721          * and calculate the hash for the current position.
722          */
723         n = MinMatch;
724         if(n > ep - p)
725                 n = ep - p;
726         cont = 0;
727         for(i = 0; i < n - 1; i++){
728                 m = now - ((MinMatch-1) - i);
729                 if(m < lz->dot)
730                         continue;
731                 s = lz->hist + (((p - lz->hist) - (now - m)) & (HistBlock-1));
732
733                 cont = (s[0] << 16) | (s[1] << 8) | s[2];
734                 h = hashit(cont);
735                 prevoff = 0;
736                 for(then = hash[h]; ; then = nexts[then & (MaxOff-1)]){
737                         v = (ushort)(now - then);
738                         if(v <= prevoff || v >= (MinMatch-1) - i)
739                                 break;
740                         prevoff = v;
741                 }
742                 if(then == (ushort)m)
743                         continue;
744                 nexts[m & (MaxOff-1)] = hash[h];
745                 hash[h] = m;
746         }
747         for(i = 0; i < n; i++)
748                 cont = (cont << 8) | p[i];
749
750         /*
751          * now must point to the index in the nexts array
752          * corresponding to p's position in the history
753          */
754         prevlen = lz->prevlen;
755         prevoff = lz->prevoff;
756         maxdefer = lz->maxcheck >> 2;
757         excost = 0;
758         v = lzb->lastv;
759         for(;;){
760                 es = p + MaxMatch;
761                 if(es > ep){
762                         if(!finish || p >= ep)
763                                 break;
764                         es = ep;
765                 }
766
767                 h = hashit(cont);
768                 runlen = lzmatch(now, hash[h], p, es, nexts, lz->hist, prevlen, lz->maxcheck, &m);
769
770                 /*
771                  * back out of small matches too far in the past
772                  */
773                 if(runlen == MinMatch && m >= MinMatchMaxOff){
774                         runlen = MinMatch - 1;
775                         m = 0;
776                 }
777
778                 /*
779                  * record the encoding and increment counts for huffman trees
780                  * if we get a match, defer selecting it until we check for
781                  * a longer match at the next position.
782                  */
783                 if(prevlen >= runlen && prevlen != MinMatch - 1){
784                         /*
785                          * old match at least as good; use that one
786                          */
787                         n = prevlen - MinMatch;
788                         if(v || n){
789                                 *parse++ = v | LenFlag | (n << LenShift);
790                                 *parse++ = prevoff;
791                         }else
792                                 *parse++ = prevoff << LenShift;
793                         v = 0;
794
795                         n = lencode[n];
796                         litlencount[n]++;
797                         excost += litlenextra[n - LenStart];
798
799                         if(prevoff < MaxOffCode)
800                                 n = offcode[prevoff];
801                         else
802                                 n = bigoffcode[prevoff >> 7];
803                         offcount[n]++;
804                         excost += offextra[n];
805
806                         runlen = prevlen - 1;
807                         prevlen = MinMatch - 1;
808                         nmatches++;
809                 }else if(runlen == MinMatch - 1){
810                         /*
811                          * no match; just put out the literal
812                          */
813                         if(++v == MaxLitRun){
814                                 *parse++ = v;
815                                 v = 0;
816                         }
817                         litlencount[*p]++;
818                         nlits++;
819                         runlen = 1;
820                 }else{
821                         if(prevlen != MinMatch - 1){
822                                 /*
823                                  * longer match now. output previous literal,
824                                  * update current match, and try again
825                                  */
826                                 if(++v == MaxLitRun){
827                                         *parse++ = v;
828                                         v = 0;
829                                 }
830                                 litlencount[p[-1]]++;
831                                 nlits++;
832                         }
833
834                         prevoff = m;
835
836                         if(runlen < maxdefer){
837                                 prevlen = runlen;
838                                 runlen = 1;
839                         }else{
840                                 n = runlen - MinMatch;
841                                 if(v || n){
842                                         *parse++ = v | LenFlag | (n << LenShift);
843                                         *parse++ = prevoff;
844                                 }else
845                                         *parse++ = prevoff << LenShift;
846                                 v = 0;
847
848                                 n = lencode[n];
849                                 litlencount[n]++;
850                                 excost += litlenextra[n - LenStart];
851
852                                 if(prevoff < MaxOffCode)
853                                         n = offcode[prevoff];
854                                 else
855                                         n = bigoffcode[prevoff >> 7];
856                                 offcount[n]++;
857                                 excost += offextra[n];
858
859                                 prevlen = MinMatch - 1;
860                                 nmatches++;
861                         }
862                 }
863
864                 /*
865                  * update the hash for the newly matched data
866                  * this is constructed so the link for the old
867                  * match in this position must be at the end of a chain,
868                  * and will expire when this match is added, ie it will
869                  * never be examined by the match loop.
870                  * add to the hash chain only if we have the real hash data.
871                  */
872                 for(q = p + runlen; p != q; p++){
873                         if(p + MinMatch <= ep){
874                                 h = hashit(cont);
875                                 nexts[now & (MaxOff-1)] = hash[h];
876                                 hash[h] = now;
877                                 if(p + MinMatch < ep)
878                                         cont = (cont << 8) | p[MinMatch];
879                         }
880                         now++;
881                 }
882         }
883
884         /*
885          * we can just store away the lazy state and
886          * pick it up next time.  the last block will have finish set
887          * so we won't have any pending matches
888          * however, we need to correct for how much we've encoded
889          */
890         if(prevlen != MinMatch - 1)
891                 p--;
892
893         lzb->excost += excost;
894         lzb->eparse = parse;
895         lzb->lastv = v;
896
897         lz->now = now;
898         lz->prevlen = prevlen;
899         lz->prevoff = prevoff;
900
901         return p - &lz->hist[lz->pos];
902 }
903
904 /*
905  * make up the dynamic code tables, and return the number of bits
906  * needed to transmit them.
907  */
908 static int
909 huffcodes(Dyncode *dc, Huff *littab, Huff *offtab)
910 {
911         Huff *codetab;
912         uchar *codes, *codeaux;
913         ulong codecount[Nclen], excost;
914         int i, n, m, v, c, nlit, noff, ncode, nclen;
915
916         codetab = dc->codetab;
917         codes = dc->codes;
918         codeaux = dc->codeaux;
919
920         /*
921          * trim the sizes of the tables
922          */
923         for(nlit = Nlitlen; nlit > 257 && littab[nlit-1].bits == 0; nlit--)
924                 ;
925         for(noff = Noff; noff > 1 && offtab[noff-1].bits == 0; noff--)
926                 ;
927
928         /*
929          * make the code-length code
930          */
931         for(i = 0; i < nlit; i++)
932                 codes[i] = littab[i].bits;
933         for(i = 0; i < noff; i++)
934                 codes[i + nlit] = offtab[i].bits;
935
936         /*
937          * run-length compress the code-length code
938          */
939         excost = 0;
940         c = 0;
941         ncode = nlit+noff;
942         for(i = 0; i < ncode; ){
943                 n = i + 1;
944                 v = codes[i];
945                 while(n < ncode && v == codes[n])
946                         n++;
947                 n -= i;
948                 i += n;
949                 if(v == 0){
950                         while(n >= 11){
951                                 m = n;
952                                 if(m > 138)
953                                         m = 138;
954                                 codes[c] = 18;
955                                 codeaux[c++] = m - 11;
956                                 n -= m;
957                                 excost += 7;
958                         }
959                         if(n >= 3){
960                                 codes[c] = 17;
961                                 codeaux[c++] = n - 3;
962                                 n = 0;
963                                 excost += 3;
964                         }
965                 }
966                 while(n--){
967                         codes[c++] = v;
968                         while(n >= 3){
969                                 m = n;
970                                 if(m > 6)
971                                         m = 6;
972                                 codes[c] = 16;
973                                 codeaux[c++] = m - 3;
974                                 n -= m;
975                                 excost += 3;
976                         }
977                 }
978         }
979
980         memset(codecount, 0, sizeof codecount);
981         for(i = 0; i < c; i++)
982                 codecount[codes[i]]++;
983         if(!mkgzprecode(codetab, codecount, Nclen, 8))
984                 return -1;
985
986         for(nclen = Nclen; nclen > 4 && codetab[clenorder[nclen-1]].bits == 0; nclen--)
987                 ;
988
989         dc->nlit = nlit;
990         dc->noff = noff;
991         dc->nclen = nclen;
992         dc->ncode = c;
993
994         return 5 + 5 + 4 + nclen * 3 + bitcost(codetab, codecount, Nclen) + excost;
995 }
996
997 static void
998 wrdyncode(LZstate *out, Dyncode *dc)
999 {
1000         Huff *codetab;
1001         uchar *codes, *codeaux;
1002         int i, v, c;
1003
1004         /*
1005          * write out header, then code length code lengths,
1006          * and code lengths
1007          */
1008         lzput(out, dc->nlit-257, 5);
1009         lzput(out, dc->noff-1, 5);
1010         lzput(out, dc->nclen-4, 4);
1011
1012         codetab = dc->codetab;
1013         for(i = 0; i < dc->nclen; i++)
1014                 lzput(out, codetab[clenorder[i]].bits, 3);
1015
1016         codes = dc->codes;
1017         codeaux = dc->codeaux;
1018         c = dc->ncode;
1019         for(i = 0; i < c; i++){
1020                 v = codes[i];
1021                 lzput(out, codetab[v].encode, codetab[v].bits);
1022                 if(v >= 16){
1023                         if(v == 16)
1024                                 lzput(out, codeaux[i], 2);
1025                         else if(v == 17)
1026                                 lzput(out, codeaux[i], 3);
1027                         else /* v == 18 */
1028                                 lzput(out, codeaux[i], 7);
1029                 }
1030         }
1031 }
1032
1033 static int
1034 bitcost(Huff *tab, ulong *count, int n)
1035 {
1036         ulong tot;
1037         int i;
1038
1039         tot = 0;
1040         for(i = 0; i < n; i++)
1041                 tot += count[i] * tab[i].bits;
1042         return tot;
1043 }
1044
1045 static int
1046 mkgzprecode(Huff *tab, ulong *count, int n, int maxbits)
1047 {
1048         ulong bitcount[MaxHuffBits];
1049         int i, nbits;
1050
1051         nbits = mkprecode(tab, count, n, maxbits, bitcount);
1052         for(i = 0; i < n; i++){
1053                 if(tab[i].bits == -1)
1054                         tab[i].bits = 0;
1055                 else if(tab[i].bits == 0){
1056                         if(nbits != 0 || bitcount[0] != 1)
1057                                 return 0;
1058                         bitcount[1] = 1;
1059                         bitcount[0] = 0;
1060                         nbits = 1;
1061                         tab[i].bits = 1;
1062                 }
1063         }
1064         if(bitcount[0] != 0)
1065                 return 0;
1066         return hufftabinit(tab, n, bitcount, nbits);
1067 }
1068
1069 static int
1070 hufftabinit(Huff *tab, int n, ulong *bitcount, int nbits)
1071 {
1072         ulong code, nc[MaxHuffBits];
1073         int i, bits;
1074
1075         code = 0;
1076         for(bits = 1; bits <= nbits; bits++){
1077                 code = (code + bitcount[bits-1]) << 1;
1078                 nc[bits] = code;
1079         }
1080
1081         for(i = 0; i < n; i++){
1082                 bits = tab[i].bits;
1083                 if(bits){
1084                         code = nc[bits]++ << (16 - bits);
1085                         if(code & ~0xffff)
1086                                 return 0;
1087                         tab[i].encode = revtab[code >> 8] | (revtab[code & 0xff] << 8);
1088                 }
1089         }
1090         return 1;
1091 }
1092
1093
1094 /*
1095  * this should be in a library
1096  */
1097 struct Chain
1098 {
1099         ulong   count;                          /* occurances of everything in the chain */
1100         ushort  leaf;                           /* leaves to the left of chain, or leaf value */
1101         char    col;                            /* ref count for collecting unused chains */
1102         char    gen;                            /* need to generate chains for next lower level */
1103         Chain   *up;                            /* Chain up in the lists */
1104 };
1105
1106 struct Chains
1107 {
1108         Chain   *lists[(MaxHuffBits - 1) * 2];
1109         ulong   leafcount[MaxLeaf];             /* sorted list of leaf counts */
1110         ushort  leafmap[MaxLeaf];               /* map to actual leaf number */
1111         int     nleaf;                          /* number of leaves */
1112         Chain   chains[ChainMem];
1113         Chain   *echains;
1114         Chain   *free;
1115         char    col;
1116         int     nlists;
1117 };
1118
1119 /*
1120  * fast, low space overhead algorithm for max depth huffman type codes
1121  *
1122  * J. Katajainen, A. Moffat and A. Turpin, "A fast and space-economical
1123  * algorithm for length-limited coding," Proc. Intl. Symp. on Algorithms
1124  * and Computation, Cairns, Australia, Dec. 1995, Lecture Notes in Computer
1125  * Science, Vol 1004, J. Staples, P. Eades, N. Katoh, and A. Moffat, eds.,
1126  * pp 12-21, Springer Verlag, New York, 1995.
1127  */
1128 static int
1129 mkprecode(Huff *tab, ulong *count, int n, int maxbits, ulong *bitcount)
1130 {
1131         Chains cs;
1132         Chain *c;
1133         int i, m, em, bits;
1134
1135         /*
1136          * set up the sorted list of leaves
1137          */
1138         m = 0;
1139         for(i = 0; i < n; i++){
1140                 tab[i].bits = -1;
1141                 tab[i].encode = 0;
1142                 if(count[i] != 0){
1143                         cs.leafcount[m] = count[i];
1144                         cs.leafmap[m] = i;
1145                         m++;
1146                 }
1147         }
1148         if(m < 2){
1149                 if(m != 0){
1150                         tab[cs.leafmap[0]].bits = 0;
1151                         bitcount[0] = 1;
1152                 }else
1153                         bitcount[0] = 0;
1154                 return 0;
1155         }
1156         cs.nleaf = m;
1157         leafsort(cs.leafcount, cs.leafmap, 0, m);
1158
1159         for(i = 0; i < m; i++)
1160                 cs.leafcount[i] = count[cs.leafmap[i]];
1161
1162         /*
1163          * set up free list
1164          */
1165         cs.free = &cs.chains[2];
1166         cs.echains = &cs.chains[ChainMem];
1167         cs.col = 1;
1168
1169         /*
1170          * initialize chains for each list
1171          */
1172         c = &cs.chains[0];
1173         c->count = cs.leafcount[0];
1174         c->leaf = 1;
1175         c->col = cs.col;
1176         c->up = nil;
1177         c->gen = 0;
1178         cs.chains[1] = cs.chains[0];
1179         cs.chains[1].leaf = 2;
1180         cs.chains[1].count = cs.leafcount[1];
1181         for(i = 0; i < maxbits-1; i++){
1182                 cs.lists[i * 2] = &cs.chains[0];
1183                 cs.lists[i * 2 + 1] = &cs.chains[1];
1184         }
1185
1186         cs.nlists = 2 * (maxbits - 1);
1187         m = 2 * m - 2;
1188         for(i = 2; i < m; i++)
1189                 nextchain(&cs, cs.nlists - 2);
1190
1191         bits = 0;
1192         bitcount[0] = cs.nleaf;
1193         for(c = cs.lists[cs.nlists - 1]; c != nil; c = c->up){
1194                 m = c->leaf;
1195                 bitcount[bits++] -= m;
1196                 bitcount[bits] = m;
1197         }
1198         m = 0;
1199         for(i = bits; i >= 0; i--)
1200                 for(em = m + bitcount[i]; m < em; m++)
1201                         tab[cs.leafmap[m]].bits = i;
1202
1203         return bits;
1204 }
1205
1206 /*
1207  * calculate the next chain on the list
1208  * we can always toss out the old chain
1209  */
1210 static void
1211 nextchain(Chains *cs, int list)
1212 {
1213         Chain *c, *oc;
1214         int i, nleaf, sumc;
1215
1216         oc = cs->lists[list + 1];
1217         cs->lists[list] = oc;
1218         if(oc == nil)
1219                 return;
1220
1221         /*
1222          * make sure we have all chains needed to make sumc
1223          * note it is possible to generate only one of these,
1224          * use twice that value for sumc, and then generate
1225          * the second if that preliminary sumc would be chosen.
1226          * however, this appears to be slower on current tests
1227          */
1228         if(oc->gen){
1229                 nextchain(cs, list - 2);
1230                 nextchain(cs, list - 2);
1231                 oc->gen = 0;
1232         }
1233
1234         /*
1235          * pick up the chain we're going to add;
1236          * collect unused chains no free ones are left
1237          */
1238         for(c = cs->free; ; c++){
1239                 if(c >= cs->echains){
1240                         cs->col++;
1241                         for(i = 0; i < cs->nlists; i++)
1242                                 for(c = cs->lists[i]; c != nil; c = c->up)
1243                                         c->col = cs->col;
1244                         c = cs->chains;
1245                 }
1246                 if(c->col != cs->col)
1247                         break;
1248         }
1249
1250         /*
1251          * pick the cheapest of
1252          * 1) the next package from the previous list
1253          * 2) the next leaf
1254          */
1255         nleaf = oc->leaf;
1256         sumc = 0;
1257         if(list > 0 && cs->lists[list-1] != nil)
1258                 sumc = cs->lists[list-2]->count + cs->lists[list-1]->count;
1259         if(sumc != 0 && (nleaf >= cs->nleaf || cs->leafcount[nleaf] > sumc)){
1260                 c->count = sumc;
1261                 c->leaf = oc->leaf;
1262                 c->up = cs->lists[list-1];
1263                 c->gen = 1;
1264         }else if(nleaf >= cs->nleaf){
1265                 cs->lists[list + 1] = nil;
1266                 return;
1267         }else{
1268                 c->leaf = nleaf + 1;
1269                 c->count = cs->leafcount[nleaf];
1270                 c->up = oc->up;
1271                 c->gen = 0;
1272         }
1273         cs->free = c + 1;
1274
1275         cs->lists[list + 1] = c;
1276         c->col = cs->col;
1277 }
1278
1279 static int
1280 pivot(ulong *c, int a, int n)
1281 {
1282         int j, pi, pj, pk;
1283
1284         j = n/6;
1285         pi = a + j;     /* 1/6 */
1286         j += j;
1287         pj = pi + j;    /* 1/2 */
1288         pk = pj + j;    /* 5/6 */
1289         if(c[pi] < c[pj]){
1290                 if(c[pi] < c[pk]){
1291                         if(c[pj] < c[pk])
1292                                 return pj;
1293                         return pk;
1294                 }
1295                 return pi;
1296         }
1297         if(c[pj] < c[pk]){
1298                 if(c[pi] < c[pk])
1299                         return pi;
1300                 return pk;
1301         }
1302         return pj;
1303 }
1304
1305 static  void
1306 leafsort(ulong *leafcount, ushort *leafmap, int a, int n)
1307 {
1308         ulong t;
1309         int j, pi, pj, pn;
1310
1311         while(n > 1){
1312                 if(n > 10){
1313                         pi = pivot(leafcount, a, n);
1314                 }else
1315                         pi = a + (n>>1);
1316
1317                 t = leafcount[pi];
1318                 leafcount[pi] = leafcount[a];
1319                 leafcount[a] = t;
1320                 t = leafmap[pi];
1321                 leafmap[pi] = leafmap[a];
1322                 leafmap[a] = t;
1323                 pi = a;
1324                 pn = a + n;
1325                 pj = pn;
1326                 for(;;){
1327                         do
1328                                 pi++;
1329                         while(pi < pn && (leafcount[pi] < leafcount[a] || leafcount[pi] == leafcount[a] && leafmap[pi] > leafmap[a]));
1330                         do
1331                                 pj--;
1332                         while(pj > a && (leafcount[pj] > leafcount[a] || leafcount[pj] == leafcount[a] && leafmap[pj] < leafmap[a]));
1333                         if(pj < pi)
1334                                 break;
1335                         t = leafcount[pi];
1336                         leafcount[pi] = leafcount[pj];
1337                         leafcount[pj] = t;
1338                         t = leafmap[pi];
1339                         leafmap[pi] = leafmap[pj];
1340                         leafmap[pj] = t;
1341                 }
1342                 t = leafcount[a];
1343                 leafcount[a] = leafcount[pj];
1344                 leafcount[pj] = t;
1345                 t = leafmap[a];
1346                 leafmap[a] = leafmap[pj];
1347                 leafmap[pj] = t;
1348                 j = pj - a;
1349
1350                 n = n-j-1;
1351                 if(j >= n){
1352                         leafsort(leafcount, leafmap, a, j);
1353                         a += j+1;
1354                 }else{
1355                         leafsort(leafcount, leafmap, a + (j+1), n);
1356                         n = j;
1357                 }
1358         }
1359 }