7 typedef struct Chain Chain;
8 typedef struct Chains Chains;
9 typedef struct Huff Huff;
13 ZBase = 2, /* base of code to encode 0 runs */
14 LitBase = ZBase-1, /* base of literal values */
17 MaxLeaf = MaxLit+LitBase,
19 MaxHuffBits = 16, /* limit of bits in a huffman code */
20 ChainMem = 2 * (MaxHuffBits - 1) * MaxHuffBits,
28 short bits; /* length of the code */
29 ulong encode; /* the code */
34 ulong count; /* occurances of everything in the chain */
35 ushort leaf; /* leaves to the left of chain, or leaf value */
36 char col; /* ref count for collecting unused chains */
37 char gen; /* need to generate chains for next lower level */
38 Chain *up; /* Chain up in the lists */
43 Chain *lists[(MaxHuffBits - 1) * 2];
44 ulong leafcount[MaxLeaf]; /* sorted list of leaf counts */
45 ushort leafmap[MaxLeaf]; /* map to actual leaf number */
46 int nleaf; /* number of leaves */
47 Chain chains[ChainMem];
54 static jmp_buf errjmp;
60 static ulong leafcount[MaxLeaf];
65 static ulong maxblocksym;
67 static void henc(int);
68 static void hbflush(void);
70 static void mkprecode(Huff*, ulong *, int, int);
71 static ulong sendtab(Huff *tab, int maxleaf, ushort *map);
72 static int elimBits(int b, ulong *bused, char *tmtf, int maxbits);
73 static void elimBit(int b, char *tmtf, int maxbits);
74 static void nextchain(Chains*, int);
75 static void hufftabinit(Huff*, int, ulong*, int);
76 static void leafsort(ulong*, ushort*, int, int);
78 static void bitput(int, int);
80 static int mtf(uchar*, int);
81 static void fatal(char*, ...);
83 static ulong headerbits;
84 static ulong charmapbits;
85 static ulong hufftabbits;
86 static ulong databits;
91 sac(uchar *dst, uchar *src, int n)
94 ulong *perm, cmap[256];
104 for(i = 0; i < n/2; i++){
114 * set up the output buffer
120 perm = malloc((n+1)*sizeof *perm);
129 for(i = 0; i < MaxLeaf; i++)
133 for(i = 0; i < n/2; i++){
140 i = ssortbyte(src, (int*)perm, nil, n);
146 * send the map of chars which occur in this block
148 for(i = 0; i < 256; i++)
150 for(i = 0; i < n; i++)
153 bitput(cmap[0] != 0, 1);
154 for(i = 0; i < 256; i = c){
155 for(c = i + 1; c < 256 && (cmap[c] != 0) == (cmap[i] != 0); c++)
158 bitput(c - i - 1, 8);
162 * initialize mtf state
165 for(i = 0; i < 256; i++){
172 maxblocksym = c + LitBase;
174 for(i = 0; i <= n; i++){
178 henc(mtf(front, src[c]));
183 bitput(0, 8 - nbits);
194 for(nbits += n; nbits >= 8; nbits -= 8){
195 c = bits << (32 - nbits);
199 bout[bpos++] = c >> 24;
204 mtf(uchar *front, int c)
208 for(i = 0; front[i] != c; i++)
210 for(v = i; i > 0; i--)
211 front[i] = front[i - 1];
217 * encode a run of zeros
218 * convert to funny run length coding:
219 * add one, omit implicit top 1 bit.
266 mkprecode(tab, leafcount, maxblocksym, MaxHuffBits);
268 hufftabbits += sendtab(tab, maxblocksym, nil);
273 for(i = 0; i < hbpos; i++){
277 fatal("bad tables %d", i);
279 bitput(tab[s].encode, b);
284 sendtab(Huff *tab, int maxleaf, ushort *map)
286 ulong ccount[MaxHuffBits+1], bused[MaxHuffBits+1];
287 Huff codetab[MaxHuffBits+1];
288 char tmtf[MaxHuffBits+1];
290 int i, b, max, ttb, s, elim;
293 * make up the table table
294 * over cleverness: the data is known to be a huffman table
295 * check for fullness at each bit level, and wipe it out from
296 * the possibilities; when nothing exists except 0, stop.
298 for(i = 0; i <= MaxHuffBits; i++){
304 tmtf[MaxHuffBits] = 0;
307 for(i = 0; i < maxleaf && elim != MaxHuffBits; i++){
312 for(s = 0; tmtf[s] != b; s++)
314 fatal("bitlength not found");
320 elim += elimBits(b, bused, tmtf, MaxHuffBits);
322 if(elim != MaxHuffBits && elim != 0)
323 fatal("incomplete huffman table");
326 * make up and send the table table
329 mkprecode(codetab, ccount, MaxHuffBits+1, max);
330 for(i = 0; i <= max; i++){
338 for(i = 0; i <= MaxHuffBits && elim != max; i++){
340 for(s = 0; tmtf[s] != b; s++)
342 fatal("bitlength not found");
344 while(max - elim < (1 << (ttb-1)))
349 elim += elimBits(b, bused, tmtf, max);
352 fatal("incomplete huffman table table");
357 for(i = 0; i <= MaxHuffBits; i++){
362 tmtf[MaxHuffBits] = 0;
364 for(i = 0; i < maxleaf && elim != MaxHuffBits; i++){
369 for(s = 0; tmtf[s] != b; s++)
371 fatal("bitlength not found");
372 ttb = codetab[s].bits;
374 fatal("bad huffman table table");
376 bitput(codetab[s].encode, ttb);
381 elim += elimBits(b, bused, tmtf, MaxHuffBits);
383 if(elim != MaxHuffBits && elim != 0)
384 fatal("incomplete huffman table");
390 elimBits(int b, ulong *bused, char *tmtf, int maxbits)
400 * increase bits counts for all descendants
402 for(bb = b + 1; bb < maxbits; bb++){
403 bused[bb] += 1 << (bb - b);
404 if(bused[bb] == (1 << bb)){
406 elimBit(bb, tmtf, maxbits);
411 * steal bits from parent & check for fullness
415 if(bused[b] == (1 << b)){
417 elimBit(b, tmtf, maxbits);
419 if((bused[b] & 1) == 0)
426 elimBit(int b, char *tmtf, int maxbits)
430 for(bb = 0; bb < maxbits; bb++)
434 fatal("elim bit not found");
435 while(++bb <= maxbits)
436 tmtf[bb - 1] = tmtf[bb];
440 * fast?, in-place huffman codes
442 * A. Moffat, J. Katajainen, "In-Place Calculation of Minimum-Redundancy Codes",
444 * counts must be sorted, and may be aliased to bitlens
447 fmkprecode(ulong *bitcount, ulong *bitlen, ulong *counts, int n, int maxbits)
449 int leaf, tree, treet, depth, nodes, internal;
452 * form the internal tree structure:
453 * [0, tree) are parent pointers,
454 * [tree, treet) are weights of internal nodes,
455 * [leaf, n) are weights of remaining leaves.
457 * note that at the end, there are n-1 internal nodes
461 for(treet = 0; treet != n - 1; treet++){
462 if(leaf >= n || tree < treet && bitlen[tree] < counts[leaf]){
463 bitlen[treet] = bitlen[tree];
464 bitlen[tree++] = treet;
466 bitlen[treet] = counts[leaf++];
467 if(leaf >= n || tree < treet && bitlen[tree] < counts[leaf]){
468 bitlen[treet] += bitlen[tree];
469 bitlen[tree++] = treet;
471 bitlen[treet] += counts[leaf++];
473 if(tree != treet - 1)
474 fatal("bad fast mkprecode");
477 * calculate depth of internal nodes
480 for(tree = n - 3; tree >= 0; tree--)
481 bitlen[tree] = bitlen[bitlen[tree]] + 1;
484 * calculate depth of leaves:
485 * at each depth, leaves(depth) = nodes(depth) - internal(depth)
486 * and nodes(depth+1) = 2 * internal(depth)
491 for(nodes = 1; nodes > 0; nodes = 2 * internal){
493 while(tree >= 0 && bitlen[tree] == depth){
499 bitcount[depth] = nodes;
501 bitlen[--leaf] = depth;
504 if(leaf != 0 || tree != -1)
505 fatal("bad leaves in fast mkprecode");
511 * fast, low space overhead algorithm for max depth huffman type codes
513 * J. Katajainen, A. Moffat and A. Turpin, "A fast and space-economical
514 * algorithm for length-limited coding," Proc. Intl. Symp. on Algorithms
515 * and Computation, Cairns, Australia, Dec. 1995, Lecture Notes in Computer
516 * Science, Vol 1004, J. Staples, P. Eades, N. Katoh, and A. Moffat, eds.,
517 * pp 12-21, Springer Verlag, New York, 1995.
520 mkprecode(Huff *tab, ulong *count, int n, int maxbits)
524 ulong bitcount[MaxHuffBits];
528 * set up the sorted list of leaves
531 for(i = 0; i < n; i++){
535 cs.leafcount[m] = count[i];
549 leafsort(cs.leafcount, cs.leafmap, 0, m);
554 bits = fmkprecode(bitcount, cs.leafcount, cs.leafcount, m, maxbits);
556 for(i = 0; i < m; i++)
557 tab[cs.leafmap[i]].bits = cs.leafcount[i];
560 for(i = 0; i < m; i++)
561 cs.leafcount[i] = count[cs.leafmap[i]];
566 cs.free = &cs.chains[2];
567 cs.echains = &cs.chains[ChainMem];
571 * initialize chains for each list
574 c->count = cs.leafcount[0];
579 cs.chains[1] = cs.chains[0];
580 cs.chains[1].leaf = 2;
581 cs.chains[1].count = cs.leafcount[1];
582 for(i = 0; i < maxbits-1; i++){
583 cs.lists[i * 2] = &cs.chains[0];
584 cs.lists[i * 2 + 1] = &cs.chains[1];
587 cs.nlists = 2 * (maxbits - 1);
589 for(i = 2; i < m; i++)
590 nextchain(&cs, cs.nlists - 2);
593 bitcount[0] = cs.nleaf;
594 for(c = cs.lists[cs.nlists - 1]; c != nil; c = c->up){
596 bitcount[bits++] -= m;
600 for(i = bits; i >= 0; i--)
601 for(em = m + bitcount[i]; m < em; m++)
602 tab[cs.leafmap[m]].bits = i;
604 hufftabinit(tab, n, bitcount, bits);
608 * calculate the next chain on the list
609 * we can always toss out the old chain
612 nextchain(Chains *cs, int list)
617 oc = cs->lists[list + 1];
618 cs->lists[list] = oc;
623 * make sure we have all chains needed to make sumc
624 * note it is possible to generate only one of these,
625 * use twice that value for sumc, and then generate
626 * the second if that preliminary sumc would be chosen.
627 * however, this appears to be slower on current tests
630 nextchain(cs, list - 2);
631 nextchain(cs, list - 2);
636 * pick up the chain we're going to add;
637 * collect unused chains no free ones are left
639 for(c = cs->free; ; c++){
640 if(c >= cs->echains){
642 for(i = 0; i < cs->nlists; i++)
643 for(c = cs->lists[i]; c != nil; c = c->up)
647 if(c->col != cs->col)
652 * pick the cheapest of
653 * 1) the next package from the previous list
658 if(list > 0 && cs->lists[list-1] != nil)
659 sumc = cs->lists[list-2]->count + cs->lists[list-1]->count;
660 if(sumc != 0 && (nleaf >= cs->nleaf || cs->leafcount[nleaf] > sumc)){
663 c->up = cs->lists[list-1];
665 }else if(nleaf >= cs->nleaf){
666 cs->lists[list + 1] = nil;
670 c->count = cs->leafcount[nleaf];
676 cs->lists[list + 1] = c;
681 hufftabinit(Huff *tab, int n, ulong *bitcount, int nbits)
683 ulong code, nc[MaxHuffBits];
687 for(bits = 1; bits <= nbits; bits++){
688 code = (code + bitcount[bits-1]) << 1;
692 for(i = 0; i < n; i++){
695 tab[i].encode = nc[bits]++;
700 pivot(ulong *c, int a, int n)
705 pi = a + j; /* 1/6 */
707 pj = pi + j; /* 1/2 */
708 pk = pj + j; /* 5/6 */
726 leafsort(ulong *leafcount, ushort *leafmap, int a, int n)
733 pi = pivot(leafcount, a, n);
738 leafcount[pi] = leafcount[a];
741 leafmap[pi] = leafmap[a];
749 while(pi < pn && (leafcount[pi] < leafcount[a] || leafcount[pi] == leafcount[a] && leafmap[pi] > leafmap[a]));
752 while(pj > a && (leafcount[pj] > leafcount[a] || leafcount[pj] == leafcount[a] && leafmap[pj] < leafmap[a]));
756 leafcount[pi] = leafcount[pj];
759 leafmap[pi] = leafmap[pj];
763 leafcount[a] = leafcount[pj];
766 leafmap[a] = leafmap[pj];
772 leafsort(leafcount, leafmap, a, j);
775 leafsort(leafcount, leafmap, a + (j+1), n);
782 fatal(char *fmt, ...)
788 doprint(buf, buf+sizeof(buf), fmt, arg);