Support for Ada
See http://zlib-ada.sourceforge.net/
-amd64/ by Mikhail Teterin <mi@ALDAN.algebra.com>
- asm code for AMD64
- See patch at http://www.freebsd.org/cgi/query-pr.cgi?pr=bin/96393
-
-asm686/ by Brian Raiter <breadbox@muppetlabs.com>
- asm code for Pentium and PPro/PII, using the AT&T (GNU as) syntax
- See http://www.muppetlabs.com/~breadbox/software/assembly.html
-
blast/ by Mark Adler <madler@alumni.caltech.edu>
Decompressor for output of PKWare Data Compression Library (DCL)
infback9/ by Mark Adler <madler@alumni.caltech.edu>
Unsupported diffs to infback to decode the deflate64 format
-inflate86/ by Chris Anderson <christop@charm.net>
- Tuned x86 gcc asm code to replace inflate_fast()
-
iostream/ by Kevin Ruland <kevin@rodin.wustl.edu>
A C++ I/O streams interface to the zlib gz* functions
and Kevin Ruland <kevin@rodin.wustl.edu>
Yet another C++ I/O streams interface
-masmx64/ by Gilles Vollant <info@winimage.com>
- x86 64-bit (AMD64 and Intel EM64t) code for x64 assembler to
- replace longest_match() and inflate_fast(), also masm x86
- 64-bits translation of Chris Anderson inflate_fast()
-
-masmx86/ by Gilles Vollant <info@winimage.com>
- x86 asm code to replace longest_match() and inflate_fast(),
- for Visual C++ and MASM (32 bits).
- Based on Brian Raiter (asm686) and Chris Anderson (inflate86)
-
minizip/ by Gilles Vollant <info@winimage.com>
Mini zip and unzip based on zlib
Includes Zip64 support by Mathias Svensson <mathias@result42.com>
+++ /dev/null
-/*
- * match.S -- optimized version of longest_match()
- * based on the similar work by Gilles Vollant, and Brian Raiter, written 1998
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the BSD License. Use by owners of Che Guevarra
- * parafernalia is prohibited, where possible, and highly discouraged
- * elsewhere.
- */
-
-#ifndef NO_UNDERLINE
-# define match_init _match_init
-# define longest_match _longest_match
-#endif
-
-#define scanend ebx
-#define scanendw bx
-#define chainlenwmask edx /* high word: current chain len low word: s->wmask */
-#define curmatch rsi
-#define curmatchd esi
-#define windowbestlen r8
-#define scanalign r9
-#define scanalignd r9d
-#define window r10
-#define bestlen r11
-#define bestlend r11d
-#define scanstart r12d
-#define scanstartw r12w
-#define scan r13
-#define nicematch r14d
-#define limit r15
-#define limitd r15d
-#define prev rcx
-
-/*
- * The 258 is a "magic number, not a parameter -- changing it
- * breaks the hell loose
- */
-#define MAX_MATCH (258)
-#define MIN_MATCH (3)
-#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1)
-#define MAX_MATCH_8 ((MAX_MATCH + 7) & ~7)
-
-/* stack frame offsets */
-#define LocalVarsSize (112)
-#define _chainlenwmask ( 8-LocalVarsSize)(%rsp)
-#define _windowbestlen (16-LocalVarsSize)(%rsp)
-#define save_r14 (24-LocalVarsSize)(%rsp)
-#define save_rsi (32-LocalVarsSize)(%rsp)
-#define save_rbx (40-LocalVarsSize)(%rsp)
-#define save_r12 (56-LocalVarsSize)(%rsp)
-#define save_r13 (64-LocalVarsSize)(%rsp)
-#define save_r15 (80-LocalVarsSize)(%rsp)
-
-
-.globl match_init, longest_match
-
-/*
- * On AMD64 the first argument of a function (in our case -- the pointer to
- * deflate_state structure) is passed in %rdi, hence our offsets below are
- * all off of that.
- */
-
-/* you can check the structure offset by running
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "deflate.h"
-
-void print_depl()
-{
-deflate_state ds;
-deflate_state *s=&ds;
-printf("size pointer=%u\n",(int)sizeof(void*));
-
-printf("#define dsWSize (%3u)(%%rdi)\n",(int)(((char*)&(s->w_size))-((char*)s)));
-printf("#define dsWMask (%3u)(%%rdi)\n",(int)(((char*)&(s->w_mask))-((char*)s)));
-printf("#define dsWindow (%3u)(%%rdi)\n",(int)(((char*)&(s->window))-((char*)s)));
-printf("#define dsPrev (%3u)(%%rdi)\n",(int)(((char*)&(s->prev))-((char*)s)));
-printf("#define dsMatchLen (%3u)(%%rdi)\n",(int)(((char*)&(s->match_length))-((char*)s)));
-printf("#define dsPrevMatch (%3u)(%%rdi)\n",(int)(((char*)&(s->prev_match))-((char*)s)));
-printf("#define dsStrStart (%3u)(%%rdi)\n",(int)(((char*)&(s->strstart))-((char*)s)));
-printf("#define dsMatchStart (%3u)(%%rdi)\n",(int)(((char*)&(s->match_start))-((char*)s)));
-printf("#define dsLookahead (%3u)(%%rdi)\n",(int)(((char*)&(s->lookahead))-((char*)s)));
-printf("#define dsPrevLen (%3u)(%%rdi)\n",(int)(((char*)&(s->prev_length))-((char*)s)));
-printf("#define dsMaxChainLen (%3u)(%%rdi)\n",(int)(((char*)&(s->max_chain_length))-((char*)s)));
-printf("#define dsGoodMatch (%3u)(%%rdi)\n",(int)(((char*)&(s->good_match))-((char*)s)));
-printf("#define dsNiceMatch (%3u)(%%rdi)\n",(int)(((char*)&(s->nice_match))-((char*)s)));
-}
-
-*/
-
-
-/*
- to compile for XCode 3.2 on MacOSX x86_64
- - run "gcc -g -c -DXCODE_MAC_X64_STRUCTURE amd64-match.S"
- */
-
-
-#ifndef CURRENT_LINX_XCODE_MAC_X64_STRUCTURE
-#define dsWSize ( 68)(%rdi)
-#define dsWMask ( 76)(%rdi)
-#define dsWindow ( 80)(%rdi)
-#define dsPrev ( 96)(%rdi)
-#define dsMatchLen (144)(%rdi)
-#define dsPrevMatch (148)(%rdi)
-#define dsStrStart (156)(%rdi)
-#define dsMatchStart (160)(%rdi)
-#define dsLookahead (164)(%rdi)
-#define dsPrevLen (168)(%rdi)
-#define dsMaxChainLen (172)(%rdi)
-#define dsGoodMatch (188)(%rdi)
-#define dsNiceMatch (192)(%rdi)
-
-#else
-
-#ifndef STRUCT_OFFSET
-# define STRUCT_OFFSET (0)
-#endif
-
-
-#define dsWSize ( 56 + STRUCT_OFFSET)(%rdi)
-#define dsWMask ( 64 + STRUCT_OFFSET)(%rdi)
-#define dsWindow ( 72 + STRUCT_OFFSET)(%rdi)
-#define dsPrev ( 88 + STRUCT_OFFSET)(%rdi)
-#define dsMatchLen (136 + STRUCT_OFFSET)(%rdi)
-#define dsPrevMatch (140 + STRUCT_OFFSET)(%rdi)
-#define dsStrStart (148 + STRUCT_OFFSET)(%rdi)
-#define dsMatchStart (152 + STRUCT_OFFSET)(%rdi)
-#define dsLookahead (156 + STRUCT_OFFSET)(%rdi)
-#define dsPrevLen (160 + STRUCT_OFFSET)(%rdi)
-#define dsMaxChainLen (164 + STRUCT_OFFSET)(%rdi)
-#define dsGoodMatch (180 + STRUCT_OFFSET)(%rdi)
-#define dsNiceMatch (184 + STRUCT_OFFSET)(%rdi)
-
-#endif
-
-
-
-
-.text
-
-/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */
-
-longest_match:
-/*
- * Retrieve the function arguments. %curmatch will hold cur_match
- * throughout the entire function (passed via rsi on amd64).
- * rdi will hold the pointer to the deflate_state (first arg on amd64)
- */
- mov %rsi, save_rsi
- mov %rbx, save_rbx
- mov %r12, save_r12
- mov %r13, save_r13
- mov %r14, save_r14
- mov %r15, save_r15
-
-/* uInt wmask = s->w_mask; */
-/* unsigned chain_length = s->max_chain_length; */
-/* if (s->prev_length >= s->good_match) { */
-/* chain_length >>= 2; */
-/* } */
-
- movl dsPrevLen, %eax
- movl dsGoodMatch, %ebx
- cmpl %ebx, %eax
- movl dsWMask, %eax
- movl dsMaxChainLen, %chainlenwmask
- jl LastMatchGood
- shrl $2, %chainlenwmask
-LastMatchGood:
-
-/* chainlen is decremented once beforehand so that the function can */
-/* use the sign flag instead of the zero flag for the exit test. */
-/* It is then shifted into the high word, to make room for the wmask */
-/* value, which it will always accompany. */
-
- decl %chainlenwmask
- shll $16, %chainlenwmask
- orl %eax, %chainlenwmask
-
-/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; */
-
- movl dsNiceMatch, %eax
- movl dsLookahead, %ebx
- cmpl %eax, %ebx
- jl LookaheadLess
- movl %eax, %ebx
-LookaheadLess: movl %ebx, %nicematch
-
-/* register Bytef *scan = s->window + s->strstart; */
-
- mov dsWindow, %window
- movl dsStrStart, %limitd
- lea (%limit, %window), %scan
-
-/* Determine how many bytes the scan ptr is off from being */
-/* dword-aligned. */
-
- mov %scan, %scanalign
- negl %scanalignd
- andl $3, %scanalignd
-
-/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */
-/* s->strstart - (IPos)MAX_DIST(s) : NIL; */
-
- movl dsWSize, %eax
- subl $MIN_LOOKAHEAD, %eax
- xorl %ecx, %ecx
- subl %eax, %limitd
- cmovng %ecx, %limitd
-
-/* int best_len = s->prev_length; */
-
- movl dsPrevLen, %bestlend
-
-/* Store the sum of s->window + best_len in %windowbestlen locally, and in memory. */
-
- lea (%window, %bestlen), %windowbestlen
- mov %windowbestlen, _windowbestlen
-
-/* register ush scan_start = *(ushf*)scan; */
-/* register ush scan_end = *(ushf*)(scan+best_len-1); */
-/* Posf *prev = s->prev; */
-
- movzwl (%scan), %scanstart
- movzwl -1(%scan, %bestlen), %scanend
- mov dsPrev, %prev
-
-/* Jump into the main loop. */
-
- movl %chainlenwmask, _chainlenwmask
- jmp LoopEntry
-
-.balign 16
-
-/* do {
- * match = s->window + cur_match;
- * if (*(ushf*)(match+best_len-1) != scan_end ||
- * *(ushf*)match != scan_start) continue;
- * [...]
- * } while ((cur_match = prev[cur_match & wmask]) > limit
- * && --chain_length != 0);
- *
- * Here is the inner loop of the function. The function will spend the
- * majority of its time in this loop, and majority of that time will
- * be spent in the first ten instructions.
- */
-LookupLoop:
- andl %chainlenwmask, %curmatchd
- movzwl (%prev, %curmatch, 2), %curmatchd
- cmpl %limitd, %curmatchd
- jbe LeaveNow
- subl $0x00010000, %chainlenwmask
- js LeaveNow
-LoopEntry: cmpw -1(%windowbestlen, %curmatch), %scanendw
- jne LookupLoop
- cmpw %scanstartw, (%window, %curmatch)
- jne LookupLoop
-
-/* Store the current value of chainlen. */
- movl %chainlenwmask, _chainlenwmask
-
-/* %scan is the string under scrutiny, and %prev to the string we */
-/* are hoping to match it up with. In actuality, %esi and %edi are */
-/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */
-/* initialized to -(MAX_MATCH_8 - scanalign). */
-
- mov $(-MAX_MATCH_8), %rdx
- lea (%curmatch, %window), %windowbestlen
- lea MAX_MATCH_8(%windowbestlen, %scanalign), %windowbestlen
- lea MAX_MATCH_8(%scan, %scanalign), %prev
-
-/* the prefetching below makes very little difference... */
- prefetcht1 (%windowbestlen, %rdx)
- prefetcht1 (%prev, %rdx)
-
-/*
- * Test the strings for equality, 8 bytes at a time. At the end,
- * adjust %rdx so that it is offset to the exact byte that mismatched.
- *
- * It should be confessed that this loop usually does not represent
- * much of the total running time. Replacing it with a more
- * straightforward "rep cmpsb" would not drastically degrade
- * performance -- unrolling it, for example, makes no difference.
- */
-
-#undef USE_SSE /* works, but is 6-7% slower, than non-SSE... */
-
-LoopCmps:
-#ifdef USE_SSE
- /* Preload the SSE registers */
- movdqu (%windowbestlen, %rdx), %xmm1
- movdqu (%prev, %rdx), %xmm2
- pcmpeqb %xmm2, %xmm1
- movdqu 16(%windowbestlen, %rdx), %xmm3
- movdqu 16(%prev, %rdx), %xmm4
- pcmpeqb %xmm4, %xmm3
- movdqu 32(%windowbestlen, %rdx), %xmm5
- movdqu 32(%prev, %rdx), %xmm6
- pcmpeqb %xmm6, %xmm5
- movdqu 48(%windowbestlen, %rdx), %xmm7
- movdqu 48(%prev, %rdx), %xmm8
- pcmpeqb %xmm8, %xmm7
-
- /* Check the comparisions' results */
- pmovmskb %xmm1, %rax
- notw %ax
- bsfw %ax, %ax
- jnz LeaveLoopCmps
-
- /* this is the only iteration of the loop with a possibility of having
- incremented rdx by 0x108 (each loop iteration add 16*4 = 0x40
- and (0x40*4)+8=0x108 */
- add $8, %rdx
- jz LenMaximum
- add $8, %rdx
-
-
- pmovmskb %xmm3, %rax
- notw %ax
- bsfw %ax, %ax
- jnz LeaveLoopCmps
-
-
- add $16, %rdx
-
-
- pmovmskb %xmm5, %rax
- notw %ax
- bsfw %ax, %ax
- jnz LeaveLoopCmps
-
- add $16, %rdx
-
-
- pmovmskb %xmm7, %rax
- notw %ax
- bsfw %ax, %ax
- jnz LeaveLoopCmps
-
- add $16, %rdx
-
- jmp LoopCmps
-LeaveLoopCmps: add %rax, %rdx
-#else
- mov (%windowbestlen, %rdx), %rax
- xor (%prev, %rdx), %rax
- jnz LeaveLoopCmps
-
- mov 8(%windowbestlen, %rdx), %rax
- xor 8(%prev, %rdx), %rax
- jnz LeaveLoopCmps8
-
- mov 16(%windowbestlen, %rdx), %rax
- xor 16(%prev, %rdx), %rax
- jnz LeaveLoopCmps16
-
- add $24, %rdx
- jnz LoopCmps
- jmp LenMaximum
-# if 0
-/*
- * This three-liner is tantalizingly simple, but bsf is a slow instruction,
- * and the complicated alternative down below is quite a bit faster. Sad...
- */
-
-LeaveLoopCmps: bsf %rax, %rax /* find the first non-zero bit */
- shrl $3, %eax /* divide by 8 to get the byte */
- add %rax, %rdx
-# else
-LeaveLoopCmps16:
- add $8, %rdx
-LeaveLoopCmps8:
- add $8, %rdx
-LeaveLoopCmps: testl $0xFFFFFFFF, %eax /* Check the first 4 bytes */
- jnz Check16
- add $4, %rdx
- shr $32, %rax
-Check16: testw $0xFFFF, %ax
- jnz LenLower
- add $2, %rdx
- shrl $16, %eax
-LenLower: subb $1, %al
- adc $0, %rdx
-# endif
-#endif
-
-/* Calculate the length of the match. If it is longer than MAX_MATCH, */
-/* then automatically accept it as the best possible match and leave. */
-
- lea (%prev, %rdx), %rax
- sub %scan, %rax
- cmpl $MAX_MATCH, %eax
- jge LenMaximum
-
-/* If the length of the match is not longer than the best match we */
-/* have so far, then forget it and return to the lookup loop. */
-
- cmpl %bestlend, %eax
- jg LongerMatch
- mov _windowbestlen, %windowbestlen
- mov dsPrev, %prev
- movl _chainlenwmask, %edx
- jmp LookupLoop
-
-/* s->match_start = cur_match; */
-/* best_len = len; */
-/* if (len >= nice_match) break; */
-/* scan_end = *(ushf*)(scan+best_len-1); */
-
-LongerMatch:
- movl %eax, %bestlend
- movl %curmatchd, dsMatchStart
- cmpl %nicematch, %eax
- jge LeaveNow
-
- lea (%window, %bestlen), %windowbestlen
- mov %windowbestlen, _windowbestlen
-
- movzwl -1(%scan, %rax), %scanend
- mov dsPrev, %prev
- movl _chainlenwmask, %chainlenwmask
- jmp LookupLoop
-
-/* Accept the current string, with the maximum possible length. */
-
-LenMaximum:
- movl $MAX_MATCH, %bestlend
- movl %curmatchd, dsMatchStart
-
-/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */
-/* return s->lookahead; */
-
-LeaveNow:
- movl dsLookahead, %eax
- cmpl %eax, %bestlend
- cmovngl %bestlend, %eax
-LookaheadRet:
-
-/* Restore the registers and return from whence we came. */
-
- mov save_rsi, %rsi
- mov save_rbx, %rbx
- mov save_r12, %r12
- mov save_r13, %r13
- mov save_r14, %r14
- mov save_r15, %r15
-
- ret
-
-match_init: ret
+++ /dev/null
-This is a patched version of zlib, modified to use
-Pentium-Pro-optimized assembly code in the deflation algorithm. The
-files changed/added by this patch are:
-
-README.686
-match.S
-
-The speedup that this patch provides varies, depending on whether the
-compiler used to build the original version of zlib falls afoul of the
-PPro's speed traps. My own tests show a speedup of around 10-20% at
-the default compression level, and 20-30% using -9, against a version
-compiled using gcc 2.7.2.3. Your mileage may vary.
-
-Note that this code has been tailored for the PPro/PII in particular,
-and will not perform particuarly well on a Pentium.
-
-If you are using an assembler other than GNU as, you will have to
-translate match.S to use your assembler's syntax. (Have fun.)
-
-Brian Raiter
-breadbox@muppetlabs.com
-April, 1998
-
-
-Added for zlib 1.1.3:
-
-The patches come from
-http://www.muppetlabs.com/~breadbox/software/assembly.html
-
-To compile zlib with this asm file, copy match.S to the zlib directory
-then do:
-
-CFLAGS="-O3 -DASMV" ./configure
-make OBJA=match.o
-
-
-Update:
-
-I've been ignoring these assembly routines for years, believing that
-gcc's generated code had caught up with it sometime around gcc 2.95
-and the major rearchitecting of the Pentium 4. However, I recently
-learned that, despite what I believed, this code still has some life
-in it. On the Pentium 4 and AMD64 chips, it continues to run about 8%
-faster than the code produced by gcc 4.1.
-
-In acknowledgement of its continuing usefulness, I've altered the
-license to match that of the rest of zlib. Share and Enjoy!
-
-Brian Raiter
-breadbox@muppetlabs.com
-April, 2007
+++ /dev/null
-/* match.S -- x86 assembly version of the zlib longest_match() function.
- * Optimized for the Intel 686 chips (PPro and later).
- *
- * Copyright (C) 1998, 2007 Brian Raiter <breadbox@muppetlabs.com>
- *
- * This software is provided 'as-is', without any express or implied
- * warranty. In no event will the author be held liable for any damages
- * arising from the use of this software.
- *
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- *
- * 1. The origin of this software must not be misrepresented; you must not
- * claim that you wrote the original software. If you use this software
- * in a product, an acknowledgment in the product documentation would be
- * appreciated but is not required.
- * 2. Altered source versions must be plainly marked as such, and must not be
- * misrepresented as being the original software.
- * 3. This notice may not be removed or altered from any source distribution.
- */
-
-#ifndef NO_UNDERLINE
-#define match_init _match_init
-#define longest_match _longest_match
-#endif
-
-#define MAX_MATCH (258)
-#define MIN_MATCH (3)
-#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1)
-#define MAX_MATCH_8 ((MAX_MATCH + 7) & ~7)
-
-/* stack frame offsets */
-
-#define chainlenwmask 0 /* high word: current chain len */
- /* low word: s->wmask */
-#define window 4 /* local copy of s->window */
-#define windowbestlen 8 /* s->window + bestlen */
-#define scanstart 16 /* first two bytes of string */
-#define scanend 12 /* last two bytes of string */
-#define scanalign 20 /* dword-misalignment of string */
-#define nicematch 24 /* a good enough match size */
-#define bestlen 28 /* size of best match so far */
-#define scan 32 /* ptr to string wanting match */
-
-#define LocalVarsSize (36)
-/* saved ebx 36 */
-/* saved edi 40 */
-/* saved esi 44 */
-/* saved ebp 48 */
-/* return address 52 */
-#define deflatestate 56 /* the function arguments */
-#define curmatch 60
-
-/* All the +zlib1222add offsets are due to the addition of fields
- * in zlib in the deflate_state structure since the asm code was first written
- * (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
- * (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
- * if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
- */
-
-#define zlib1222add (8)
-
-#define dsWSize (36+zlib1222add)
-#define dsWMask (44+zlib1222add)
-#define dsWindow (48+zlib1222add)
-#define dsPrev (56+zlib1222add)
-#define dsMatchLen (88+zlib1222add)
-#define dsPrevMatch (92+zlib1222add)
-#define dsStrStart (100+zlib1222add)
-#define dsMatchStart (104+zlib1222add)
-#define dsLookahead (108+zlib1222add)
-#define dsPrevLen (112+zlib1222add)
-#define dsMaxChainLen (116+zlib1222add)
-#define dsGoodMatch (132+zlib1222add)
-#define dsNiceMatch (136+zlib1222add)
-
-
-.file "match.S"
-
-.globl match_init, longest_match
-
-.text
-
-/* uInt longest_match(deflate_state *deflatestate, IPos curmatch) */
-.cfi_sections .debug_frame
-
-longest_match:
-
-.cfi_startproc
-/* Save registers that the compiler may be using, and adjust %esp to */
-/* make room for our stack frame. */
-
- pushl %ebp
- .cfi_def_cfa_offset 8
- .cfi_offset ebp, -8
- pushl %edi
- .cfi_def_cfa_offset 12
- pushl %esi
- .cfi_def_cfa_offset 16
- pushl %ebx
- .cfi_def_cfa_offset 20
- subl $LocalVarsSize, %esp
- .cfi_def_cfa_offset LocalVarsSize+20
-
-/* Retrieve the function arguments. %ecx will hold cur_match */
-/* throughout the entire function. %edx will hold the pointer to the */
-/* deflate_state structure during the function's setup (before */
-/* entering the main loop). */
-
- movl deflatestate(%esp), %edx
- movl curmatch(%esp), %ecx
-
-/* uInt wmask = s->w_mask; */
-/* unsigned chain_length = s->max_chain_length; */
-/* if (s->prev_length >= s->good_match) { */
-/* chain_length >>= 2; */
-/* } */
-
- movl dsPrevLen(%edx), %eax
- movl dsGoodMatch(%edx), %ebx
- cmpl %ebx, %eax
- movl dsWMask(%edx), %eax
- movl dsMaxChainLen(%edx), %ebx
- jl LastMatchGood
- shrl $2, %ebx
-LastMatchGood:
-
-/* chainlen is decremented once beforehand so that the function can */
-/* use the sign flag instead of the zero flag for the exit test. */
-/* It is then shifted into the high word, to make room for the wmask */
-/* value, which it will always accompany. */
-
- decl %ebx
- shll $16, %ebx
- orl %eax, %ebx
- movl %ebx, chainlenwmask(%esp)
-
-/* if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; */
-
- movl dsNiceMatch(%edx), %eax
- movl dsLookahead(%edx), %ebx
- cmpl %eax, %ebx
- jl LookaheadLess
- movl %eax, %ebx
-LookaheadLess: movl %ebx, nicematch(%esp)
-
-/* register Bytef *scan = s->window + s->strstart; */
-
- movl dsWindow(%edx), %esi
- movl %esi, window(%esp)
- movl dsStrStart(%edx), %ebp
- lea (%esi,%ebp), %edi
- movl %edi, scan(%esp)
-
-/* Determine how many bytes the scan ptr is off from being */
-/* dword-aligned. */
-
- movl %edi, %eax
- negl %eax
- andl $3, %eax
- movl %eax, scanalign(%esp)
-
-/* IPos limit = s->strstart > (IPos)MAX_DIST(s) ? */
-/* s->strstart - (IPos)MAX_DIST(s) : NIL; */
-
- movl dsWSize(%edx), %eax
- subl $MIN_LOOKAHEAD, %eax
- subl %eax, %ebp
- jg LimitPositive
- xorl %ebp, %ebp
-LimitPositive:
-
-/* int best_len = s->prev_length; */
-
- movl dsPrevLen(%edx), %eax
- movl %eax, bestlen(%esp)
-
-/* Store the sum of s->window + best_len in %esi locally, and in %esi. */
-
- addl %eax, %esi
- movl %esi, windowbestlen(%esp)
-
-/* register ush scan_start = *(ushf*)scan; */
-/* register ush scan_end = *(ushf*)(scan+best_len-1); */
-/* Posf *prev = s->prev; */
-
- movzwl (%edi), %ebx
- movl %ebx, scanstart(%esp)
- movzwl -1(%edi,%eax), %ebx
- movl %ebx, scanend(%esp)
- movl dsPrev(%edx), %edi
-
-/* Jump into the main loop. */
-
- movl chainlenwmask(%esp), %edx
- jmp LoopEntry
-
-.balign 16
-
-/* do {
- * match = s->window + cur_match;
- * if (*(ushf*)(match+best_len-1) != scan_end ||
- * *(ushf*)match != scan_start) continue;
- * [...]
- * } while ((cur_match = prev[cur_match & wmask]) > limit
- * && --chain_length != 0);
- *
- * Here is the inner loop of the function. The function will spend the
- * majority of its time in this loop, and majority of that time will
- * be spent in the first ten instructions.
- *
- * Within this loop:
- * %ebx = scanend
- * %ecx = curmatch
- * %edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
- * %esi = windowbestlen - i.e., (window + bestlen)
- * %edi = prev
- * %ebp = limit
- */
-LookupLoop:
- andl %edx, %ecx
- movzwl (%edi,%ecx,2), %ecx
- cmpl %ebp, %ecx
- jbe LeaveNow
- subl $0x00010000, %edx
- js LeaveNow
-LoopEntry: movzwl -1(%esi,%ecx), %eax
- cmpl %ebx, %eax
- jnz LookupLoop
- movl window(%esp), %eax
- movzwl (%eax,%ecx), %eax
- cmpl scanstart(%esp), %eax
- jnz LookupLoop
-
-/* Store the current value of chainlen. */
-
- movl %edx, chainlenwmask(%esp)
-
-/* Point %edi to the string under scrutiny, and %esi to the string we */
-/* are hoping to match it up with. In actuality, %esi and %edi are */
-/* both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and %edx is */
-/* initialized to -(MAX_MATCH_8 - scanalign). */
-
- movl window(%esp), %esi
- movl scan(%esp), %edi
- addl %ecx, %esi
- movl scanalign(%esp), %eax
- movl $(-MAX_MATCH_8), %edx
- lea MAX_MATCH_8(%edi,%eax), %edi
- lea MAX_MATCH_8(%esi,%eax), %esi
-
-/* Test the strings for equality, 8 bytes at a time. At the end,
- * adjust %edx so that it is offset to the exact byte that mismatched.
- *
- * We already know at this point that the first three bytes of the
- * strings match each other, and they can be safely passed over before
- * starting the compare loop. So what this code does is skip over 0-3
- * bytes, as much as necessary in order to dword-align the %edi
- * pointer. (%esi will still be misaligned three times out of four.)
- *
- * It should be confessed that this loop usually does not represent
- * much of the total running time. Replacing it with a more
- * straightforward "rep cmpsb" would not drastically degrade
- * performance.
- */
-LoopCmps:
- movl (%esi,%edx), %eax
- xorl (%edi,%edx), %eax
- jnz LeaveLoopCmps
- movl 4(%esi,%edx), %eax
- xorl 4(%edi,%edx), %eax
- jnz LeaveLoopCmps4
- addl $8, %edx
- jnz LoopCmps
- jmp LenMaximum
-LeaveLoopCmps4: addl $4, %edx
-LeaveLoopCmps: testl $0x0000FFFF, %eax
- jnz LenLower
- addl $2, %edx
- shrl $16, %eax
-LenLower: subb $1, %al
- adcl $0, %edx
-
-/* Calculate the length of the match. If it is longer than MAX_MATCH, */
-/* then automatically accept it as the best possible match and leave. */
-
- lea (%edi,%edx), %eax
- movl scan(%esp), %edi
- subl %edi, %eax
- cmpl $MAX_MATCH, %eax
- jge LenMaximum
-
-/* If the length of the match is not longer than the best match we */
-/* have so far, then forget it and return to the lookup loop. */
-
- movl deflatestate(%esp), %edx
- movl bestlen(%esp), %ebx
- cmpl %ebx, %eax
- jg LongerMatch
- movl windowbestlen(%esp), %esi
- movl dsPrev(%edx), %edi
- movl scanend(%esp), %ebx
- movl chainlenwmask(%esp), %edx
- jmp LookupLoop
-
-/* s->match_start = cur_match; */
-/* best_len = len; */
-/* if (len >= nice_match) break; */
-/* scan_end = *(ushf*)(scan+best_len-1); */
-
-LongerMatch: movl nicematch(%esp), %ebx
- movl %eax, bestlen(%esp)
- movl %ecx, dsMatchStart(%edx)
- cmpl %ebx, %eax
- jge LeaveNow
- movl window(%esp), %esi
- addl %eax, %esi
- movl %esi, windowbestlen(%esp)
- movzwl -1(%edi,%eax), %ebx
- movl dsPrev(%edx), %edi
- movl %ebx, scanend(%esp)
- movl chainlenwmask(%esp), %edx
- jmp LookupLoop
-
-/* Accept the current string, with the maximum possible length. */
-
-LenMaximum: movl deflatestate(%esp), %edx
- movl $MAX_MATCH, bestlen(%esp)
- movl %ecx, dsMatchStart(%edx)
-
-/* if ((uInt)best_len <= s->lookahead) return (uInt)best_len; */
-/* return s->lookahead; */
-
-LeaveNow:
- movl deflatestate(%esp), %edx
- movl bestlen(%esp), %ebx
- movl dsLookahead(%edx), %eax
- cmpl %eax, %ebx
- jg LookaheadRet
- movl %ebx, %eax
-LookaheadRet:
-
-/* Restore the stack and return from whence we came. */
-
- addl $LocalVarsSize, %esp
- .cfi_def_cfa_offset 20
- popl %ebx
- .cfi_def_cfa_offset 16
- popl %esi
- .cfi_def_cfa_offset 12
- popl %edi
- .cfi_def_cfa_offset 8
- popl %ebp
- .cfi_def_cfa_offset 4
-.cfi_endproc
-match_init: ret
+++ /dev/null
-/* inffas86.c is a hand tuned assembler version of
- *
- * inffast.c -- fast decoding
- * Copyright (C) 1995-2003 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- *
- * Copyright (C) 2003 Chris Anderson <christop@charm.net>
- * Please use the copyright conditions above.
- *
- * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also
- * slightly quicker on x86 systems because, instead of using rep movsb to copy
- * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
- * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
- * from http://fedora.linux.duke.edu/fc1_x86_64
- * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
- * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version,
- * when decompressing mozilla-source-1.3.tar.gz.
- *
- * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
- * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
- * the moment. I have successfully compiled and tested this code with gcc2.96,
- * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
- * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
- * enabled. I will attempt to merge the MMX code into this version. Newer
- * versions of this and inffast.S can be found at
- * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
- */
-
-#include "zutil.h"
-#include "inftrees.h"
-#include "inflate.h"
-#include "inffast.h"
-
-/* Mark Adler's comments from inffast.c: */
-
-/*
- Decode literal, length, and distance codes and write out the resulting
- literal and match bytes until either not enough input or output is
- available, an end-of-block is encountered, or a data error is encountered.
- When large enough input and output buffers are supplied to inflate(), for
- example, a 16K input buffer and a 64K output buffer, more than 95% of the
- inflate execution time is spent in this routine.
-
- Entry assumptions:
-
- state->mode == LEN
- strm->avail_in >= 6
- strm->avail_out >= 258
- start >= strm->avail_out
- state->bits < 8
-
- On return, state->mode is one of:
-
- LEN -- ran out of enough output space or enough available input
- TYPE -- reached end of block code, inflate() to interpret next block
- BAD -- error in block data
-
- Notes:
-
- - The maximum input bits used by a length/distance pair is 15 bits for the
- length code, 5 bits for the length extra, 15 bits for the distance code,
- and 13 bits for the distance extra. This totals 48 bits, or six bytes.
- Therefore if strm->avail_in >= 6, then there is enough input to avoid
- checking for available input while decoding.
-
- - The maximum bytes that a single length/distance pair can output is 258
- bytes, which is the maximum length that can be coded. inflate_fast()
- requires strm->avail_out >= 258 for each loop to avoid checking for
- output space.
- */
-void inflate_fast(strm, start)
-z_streamp strm;
-unsigned start; /* inflate()'s starting value for strm->avail_out */
-{
- struct inflate_state FAR *state;
- struct inffast_ar {
-/* 64 32 x86 x86_64 */
-/* ar offset register */
-/* 0 0 */ void *esp; /* esp save */
-/* 8 4 */ void *ebp; /* ebp save */
-/* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */
-/* 24 12 */ unsigned char FAR *last; /* r9 while in < last */
-/* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */
-/* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */
-/* 48 24 */ unsigned char FAR *end; /* r10 while out < end */
-/* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */
-/* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */
-/* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */
-/* 80 40 */ unsigned long hold; /* edx rdx local strm->hold */
-/* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */
-/* 92 48 */ unsigned wsize; /* window size */
-/* 96 52 */ unsigned write; /* window write index */
-/*100 56 */ unsigned lmask; /* r12 mask for lcode */
-/*104 60 */ unsigned dmask; /* r13 mask for dcode */
-/*108 64 */ unsigned len; /* r14 match length */
-/*112 68 */ unsigned dist; /* r15 match distance */
-/*116 72 */ unsigned status; /* set when state chng*/
- } ar;
-
-#if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
-#define PAD_AVAIL_IN 6
-#define PAD_AVAIL_OUT 258
-#else
-#define PAD_AVAIL_IN 5
-#define PAD_AVAIL_OUT 257
-#endif
-
- /* copy state to local variables */
- state = (struct inflate_state FAR *)strm->state;
- ar.in = strm->next_in;
- ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
- ar.out = strm->next_out;
- ar.beg = ar.out - (start - strm->avail_out);
- ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
- ar.wsize = state->wsize;
- ar.write = state->wnext;
- ar.window = state->window;
- ar.hold = state->hold;
- ar.bits = state->bits;
- ar.lcode = state->lencode;
- ar.dcode = state->distcode;
- ar.lmask = (1U << state->lenbits) - 1;
- ar.dmask = (1U << state->distbits) - 1;
-
- /* decode literals and length/distances until end-of-block or not enough
- input data or output space */
-
- /* align in on 1/2 hold size boundary */
- while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
- ar.hold += (unsigned long)*ar.in++ << ar.bits;
- ar.bits += 8;
- }
-
-#if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )
- __asm__ __volatile__ (
-" leaq %0, %%rax\n"
-" movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */
-" movq %%rsp, (%%rax)\n"
-" movq %%rax, %%rsp\n" /* make rsp point to &ar */
-" movq 16(%%rsp), %%rsi\n" /* rsi = in */
-" movq 32(%%rsp), %%rdi\n" /* rdi = out */
-" movq 24(%%rsp), %%r9\n" /* r9 = last */
-" movq 48(%%rsp), %%r10\n" /* r10 = end */
-" movq 64(%%rsp), %%rbp\n" /* rbp = lcode */
-" movq 72(%%rsp), %%r11\n" /* r11 = dcode */
-" movq 80(%%rsp), %%rdx\n" /* rdx = hold */
-" movl 88(%%rsp), %%ebx\n" /* ebx = bits */
-" movl 100(%%rsp), %%r12d\n" /* r12d = lmask */
-" movl 104(%%rsp), %%r13d\n" /* r13d = dmask */
- /* r14d = len */
- /* r15d = dist */
-" cld\n"
-" cmpq %%rdi, %%r10\n"
-" je .L_one_time\n" /* if only one decode left */
-" cmpq %%rsi, %%r9\n"
-" je .L_one_time\n"
-" jmp .L_do_loop\n"
-
-".L_one_time:\n"
-" movq %%r12, %%r8\n" /* r8 = lmask */
-" cmpb $32, %%bl\n"
-" ja .L_get_length_code_one_time\n"
-
-" lodsl\n" /* eax = *(uint *)in++ */
-" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
-" addb $32, %%bl\n" /* bits += 32 */
-" shlq %%cl, %%rax\n"
-" orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
-" jmp .L_get_length_code_one_time\n"
-
-".align 32,0x90\n"
-".L_while_test:\n"
-" cmpq %%rdi, %%r10\n"
-" jbe .L_break_loop\n"
-" cmpq %%rsi, %%r9\n"
-" jbe .L_break_loop\n"
-
-".L_do_loop:\n"
-" movq %%r12, %%r8\n" /* r8 = lmask */
-" cmpb $32, %%bl\n"
-" ja .L_get_length_code\n" /* if (32 < bits) */
-
-" lodsl\n" /* eax = *(uint *)in++ */
-" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
-" addb $32, %%bl\n" /* bits += 32 */
-" shlq %%cl, %%rax\n"
-" orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
-
-".L_get_length_code:\n"
-" andq %%rdx, %%r8\n" /* r8 &= hold */
-" movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
-
-" movb %%ah, %%cl\n" /* cl = this.bits */
-" subb %%ah, %%bl\n" /* bits -= this.bits */
-" shrq %%cl, %%rdx\n" /* hold >>= this.bits */
-
-" testb %%al, %%al\n"
-" jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
-
-" movq %%r12, %%r8\n" /* r8 = lmask */
-" shrl $16, %%eax\n" /* output this.val char */
-" stosb\n"
-
-".L_get_length_code_one_time:\n"
-" andq %%rdx, %%r8\n" /* r8 &= hold */
-" movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */
-
-".L_dolen:\n"
-" movb %%ah, %%cl\n" /* cl = this.bits */
-" subb %%ah, %%bl\n" /* bits -= this.bits */
-" shrq %%cl, %%rdx\n" /* hold >>= this.bits */
-
-" testb %%al, %%al\n"
-" jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
-
-" shrl $16, %%eax\n" /* output this.val char */
-" stosb\n"
-" jmp .L_while_test\n"
-
-".align 32,0x90\n"
-".L_test_for_length_base:\n"
-" movl %%eax, %%r14d\n" /* len = this */
-" shrl $16, %%r14d\n" /* len = this.val */
-" movb %%al, %%cl\n"
-
-" testb $16, %%al\n"
-" jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
-" andb $15, %%cl\n" /* op &= 15 */
-" jz .L_decode_distance\n" /* if (!op) */
-
-".L_add_bits_to_len:\n"
-" subb %%cl, %%bl\n"
-" xorl %%eax, %%eax\n"
-" incl %%eax\n"
-" shll %%cl, %%eax\n"
-" decl %%eax\n"
-" andl %%edx, %%eax\n" /* eax &= hold */
-" shrq %%cl, %%rdx\n"
-" addl %%eax, %%r14d\n" /* len += hold & mask[op] */
-
-".L_decode_distance:\n"
-" movq %%r13, %%r8\n" /* r8 = dmask */
-" cmpb $32, %%bl\n"
-" ja .L_get_distance_code\n" /* if (32 < bits) */
-
-" lodsl\n" /* eax = *(uint *)in++ */
-" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
-" addb $32, %%bl\n" /* bits += 32 */
-" shlq %%cl, %%rax\n"
-" orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */
-
-".L_get_distance_code:\n"
-" andq %%rdx, %%r8\n" /* r8 &= hold */
-" movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */
-
-".L_dodist:\n"
-" movl %%eax, %%r15d\n" /* dist = this */
-" shrl $16, %%r15d\n" /* dist = this.val */
-" movb %%ah, %%cl\n"
-" subb %%ah, %%bl\n" /* bits -= this.bits */
-" shrq %%cl, %%rdx\n" /* hold >>= this.bits */
-" movb %%al, %%cl\n" /* cl = this.op */
-
-" testb $16, %%al\n" /* if ((op & 16) == 0) */
-" jz .L_test_for_second_level_dist\n"
-" andb $15, %%cl\n" /* op &= 15 */
-" jz .L_check_dist_one\n"
-
-".L_add_bits_to_dist:\n"
-" subb %%cl, %%bl\n"
-" xorl %%eax, %%eax\n"
-" incl %%eax\n"
-" shll %%cl, %%eax\n"
-" decl %%eax\n" /* (1 << op) - 1 */
-" andl %%edx, %%eax\n" /* eax &= hold */
-" shrq %%cl, %%rdx\n"
-" addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */
-
-".L_check_window:\n"
-" movq %%rsi, %%r8\n" /* save in so from can use it's reg */
-" movq %%rdi, %%rax\n"
-" subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */
-
-" cmpl %%r15d, %%eax\n"
-" jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
-
-" movl %%r14d, %%ecx\n" /* ecx = len */
-" movq %%rdi, %%rsi\n"
-" subq %%r15, %%rsi\n" /* from = out - dist */
-
-" sarl %%ecx\n"
-" jnc .L_copy_two\n" /* if len % 2 == 0 */
-
-" rep movsw\n"
-" movb (%%rsi), %%al\n"
-" movb %%al, (%%rdi)\n"
-" incq %%rdi\n"
-
-" movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
-" jmp .L_while_test\n"
-
-".L_copy_two:\n"
-" rep movsw\n"
-" movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */
-" jmp .L_while_test\n"
-
-".align 32,0x90\n"
-".L_check_dist_one:\n"
-" cmpl $1, %%r15d\n" /* if dist 1, is a memset */
-" jne .L_check_window\n"
-" cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */
-" je .L_check_window\n"
-
-" movl %%r14d, %%ecx\n" /* ecx = len */
-" movb -1(%%rdi), %%al\n"
-" movb %%al, %%ah\n"
-
-" sarl %%ecx\n"
-" jnc .L_set_two\n"
-" movb %%al, (%%rdi)\n"
-" incq %%rdi\n"
-
-".L_set_two:\n"
-" rep stosw\n"
-" jmp .L_while_test\n"
-
-".align 32,0x90\n"
-".L_test_for_second_level_length:\n"
-" testb $64, %%al\n"
-" jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
-
-" xorl %%eax, %%eax\n"
-" incl %%eax\n"
-" shll %%cl, %%eax\n"
-" decl %%eax\n"
-" andl %%edx, %%eax\n" /* eax &= hold */
-" addl %%r14d, %%eax\n" /* eax += len */
-" movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
-" jmp .L_dolen\n"
-
-".align 32,0x90\n"
-".L_test_for_second_level_dist:\n"
-" testb $64, %%al\n"
-" jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
-
-" xorl %%eax, %%eax\n"
-" incl %%eax\n"
-" shll %%cl, %%eax\n"
-" decl %%eax\n"
-" andl %%edx, %%eax\n" /* eax &= hold */
-" addl %%r15d, %%eax\n" /* eax += dist */
-" movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
-" jmp .L_dodist\n"
-
-".align 32,0x90\n"
-".L_clip_window:\n"
-" movl %%eax, %%ecx\n" /* ecx = nbytes */
-" movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */
-" negl %%ecx\n" /* nbytes = -nbytes */
-
-" cmpl %%r15d, %%eax\n"
-" jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
-
-" addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */
-" cmpl $0, 96(%%rsp)\n"
-" jne .L_wrap_around_window\n" /* if (write != 0) */
-
-" movq 56(%%rsp), %%rsi\n" /* from = window */
-" subl %%ecx, %%eax\n" /* eax -= nbytes */
-" addq %%rax, %%rsi\n" /* from += wsize - nbytes */
-
-" movl %%r14d, %%eax\n" /* eax = len */
-" cmpl %%ecx, %%r14d\n"
-" jbe .L_do_copy\n" /* if (nbytes >= len) */
-
-" subl %%ecx, %%eax\n" /* eax -= nbytes */
-" rep movsb\n"
-" movq %%rdi, %%rsi\n"
-" subq %%r15, %%rsi\n" /* from = &out[ -dist ] */
-" jmp .L_do_copy\n"
-
-".align 32,0x90\n"
-".L_wrap_around_window:\n"
-" movl 96(%%rsp), %%eax\n" /* eax = write */
-" cmpl %%eax, %%ecx\n"
-" jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
-
-" movl 92(%%rsp), %%esi\n" /* from = wsize */
-" addq 56(%%rsp), %%rsi\n" /* from += window */
-" addq %%rax, %%rsi\n" /* from += write */
-" subq %%rcx, %%rsi\n" /* from -= nbytes */
-" subl %%eax, %%ecx\n" /* nbytes -= write */
-
-" movl %%r14d, %%eax\n" /* eax = len */
-" cmpl %%ecx, %%eax\n"
-" jbe .L_do_copy\n" /* if (nbytes >= len) */
-
-" subl %%ecx, %%eax\n" /* len -= nbytes */
-" rep movsb\n"
-" movq 56(%%rsp), %%rsi\n" /* from = window */
-" movl 96(%%rsp), %%ecx\n" /* nbytes = write */
-" cmpl %%ecx, %%eax\n"
-" jbe .L_do_copy\n" /* if (nbytes >= len) */
-
-" subl %%ecx, %%eax\n" /* len -= nbytes */
-" rep movsb\n"
-" movq %%rdi, %%rsi\n"
-" subq %%r15, %%rsi\n" /* from = out - dist */
-" jmp .L_do_copy\n"
-
-".align 32,0x90\n"
-".L_contiguous_in_window:\n"
-" movq 56(%%rsp), %%rsi\n" /* rsi = window */
-" addq %%rax, %%rsi\n"
-" subq %%rcx, %%rsi\n" /* from += write - nbytes */
-
-" movl %%r14d, %%eax\n" /* eax = len */
-" cmpl %%ecx, %%eax\n"
-" jbe .L_do_copy\n" /* if (nbytes >= len) */
-
-" subl %%ecx, %%eax\n" /* len -= nbytes */
-" rep movsb\n"
-" movq %%rdi, %%rsi\n"
-" subq %%r15, %%rsi\n" /* from = out - dist */
-" jmp .L_do_copy\n" /* if (nbytes >= len) */
-
-".align 32,0x90\n"
-".L_do_copy:\n"
-" movl %%eax, %%ecx\n" /* ecx = len */
-" rep movsb\n"
-
-" movq %%r8, %%rsi\n" /* move in back to %esi, toss from */
-" jmp .L_while_test\n"
-
-".L_test_for_end_of_block:\n"
-" testb $32, %%al\n"
-" jz .L_invalid_literal_length_code\n"
-" movl $1, 116(%%rsp)\n"
-" jmp .L_break_loop_with_status\n"
-
-".L_invalid_literal_length_code:\n"
-" movl $2, 116(%%rsp)\n"
-" jmp .L_break_loop_with_status\n"
-
-".L_invalid_distance_code:\n"
-" movl $3, 116(%%rsp)\n"
-" jmp .L_break_loop_with_status\n"
-
-".L_invalid_distance_too_far:\n"
-" movl $4, 116(%%rsp)\n"
-" jmp .L_break_loop_with_status\n"
-
-".L_break_loop:\n"
-" movl $0, 116(%%rsp)\n"
-
-".L_break_loop_with_status:\n"
-/* put in, out, bits, and hold back into ar and pop esp */
-" movq %%rsi, 16(%%rsp)\n" /* in */
-" movq %%rdi, 32(%%rsp)\n" /* out */
-" movl %%ebx, 88(%%rsp)\n" /* bits */
-" movq %%rdx, 80(%%rsp)\n" /* hold */
-" movq (%%rsp), %%rax\n" /* restore rbp and rsp */
-" movq 8(%%rsp), %%rbp\n"
-" movq %%rax, %%rsp\n"
- :
- : "m" (ar)
- : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",
- "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
- );
-#elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 )
- __asm__ __volatile__ (
-" leal %0, %%eax\n"
-" movl %%esp, (%%eax)\n" /* save esp, ebp */
-" movl %%ebp, 4(%%eax)\n"
-" movl %%eax, %%esp\n"
-" movl 8(%%esp), %%esi\n" /* esi = in */
-" movl 16(%%esp), %%edi\n" /* edi = out */
-" movl 40(%%esp), %%edx\n" /* edx = hold */
-" movl 44(%%esp), %%ebx\n" /* ebx = bits */
-" movl 32(%%esp), %%ebp\n" /* ebp = lcode */
-
-" cld\n"
-" jmp .L_do_loop\n"
-
-".align 32,0x90\n"
-".L_while_test:\n"
-" cmpl %%edi, 24(%%esp)\n" /* out < end */
-" jbe .L_break_loop\n"
-" cmpl %%esi, 12(%%esp)\n" /* in < last */
-" jbe .L_break_loop\n"
-
-".L_do_loop:\n"
-" cmpb $15, %%bl\n"
-" ja .L_get_length_code\n" /* if (15 < bits) */
-
-" xorl %%eax, %%eax\n"
-" lodsw\n" /* al = *(ushort *)in++ */
-" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
-" addb $16, %%bl\n" /* bits += 16 */
-" shll %%cl, %%eax\n"
-" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
-
-".L_get_length_code:\n"
-" movl 56(%%esp), %%eax\n" /* eax = lmask */
-" andl %%edx, %%eax\n" /* eax &= hold */
-" movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */
-
-".L_dolen:\n"
-" movb %%ah, %%cl\n" /* cl = this.bits */
-" subb %%ah, %%bl\n" /* bits -= this.bits */
-" shrl %%cl, %%edx\n" /* hold >>= this.bits */
-
-" testb %%al, %%al\n"
-" jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */
-
-" shrl $16, %%eax\n" /* output this.val char */
-" stosb\n"
-" jmp .L_while_test\n"
-
-".align 32,0x90\n"
-".L_test_for_length_base:\n"
-" movl %%eax, %%ecx\n" /* len = this */
-" shrl $16, %%ecx\n" /* len = this.val */
-" movl %%ecx, 64(%%esp)\n" /* save len */
-" movb %%al, %%cl\n"
-
-" testb $16, %%al\n"
-" jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */
-" andb $15, %%cl\n" /* op &= 15 */
-" jz .L_decode_distance\n" /* if (!op) */
-" cmpb %%cl, %%bl\n"
-" jae .L_add_bits_to_len\n" /* if (op <= bits) */
-
-" movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
-" xorl %%eax, %%eax\n"
-" lodsw\n" /* al = *(ushort *)in++ */
-" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
-" addb $16, %%bl\n" /* bits += 16 */
-" shll %%cl, %%eax\n"
-" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
-" movb %%ch, %%cl\n" /* move op back to ecx */
-
-".L_add_bits_to_len:\n"
-" subb %%cl, %%bl\n"
-" xorl %%eax, %%eax\n"
-" incl %%eax\n"
-" shll %%cl, %%eax\n"
-" decl %%eax\n"
-" andl %%edx, %%eax\n" /* eax &= hold */
-" shrl %%cl, %%edx\n"
-" addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */
-
-".L_decode_distance:\n"
-" cmpb $15, %%bl\n"
-" ja .L_get_distance_code\n" /* if (15 < bits) */
-
-" xorl %%eax, %%eax\n"
-" lodsw\n" /* al = *(ushort *)in++ */
-" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
-" addb $16, %%bl\n" /* bits += 16 */
-" shll %%cl, %%eax\n"
-" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
-
-".L_get_distance_code:\n"
-" movl 60(%%esp), %%eax\n" /* eax = dmask */
-" movl 36(%%esp), %%ecx\n" /* ecx = dcode */
-" andl %%edx, %%eax\n" /* eax &= hold */
-" movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */
-
-".L_dodist:\n"
-" movl %%eax, %%ebp\n" /* dist = this */
-" shrl $16, %%ebp\n" /* dist = this.val */
-" movb %%ah, %%cl\n"
-" subb %%ah, %%bl\n" /* bits -= this.bits */
-" shrl %%cl, %%edx\n" /* hold >>= this.bits */
-" movb %%al, %%cl\n" /* cl = this.op */
-
-" testb $16, %%al\n" /* if ((op & 16) == 0) */
-" jz .L_test_for_second_level_dist\n"
-" andb $15, %%cl\n" /* op &= 15 */
-" jz .L_check_dist_one\n"
-" cmpb %%cl, %%bl\n"
-" jae .L_add_bits_to_dist\n" /* if (op <= bits) 97.6% */
-
-" movb %%cl, %%ch\n" /* stash op in ch, freeing cl */
-" xorl %%eax, %%eax\n"
-" lodsw\n" /* al = *(ushort *)in++ */
-" movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */
-" addb $16, %%bl\n" /* bits += 16 */
-" shll %%cl, %%eax\n"
-" orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */
-" movb %%ch, %%cl\n" /* move op back to ecx */
-
-".L_add_bits_to_dist:\n"
-" subb %%cl, %%bl\n"
-" xorl %%eax, %%eax\n"
-" incl %%eax\n"
-" shll %%cl, %%eax\n"
-" decl %%eax\n" /* (1 << op) - 1 */
-" andl %%edx, %%eax\n" /* eax &= hold */
-" shrl %%cl, %%edx\n"
-" addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */
-
-".L_check_window:\n"
-" movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */
-" movl %%edi, %%eax\n"
-" subl 20(%%esp), %%eax\n" /* nbytes = out - beg */
-
-" cmpl %%ebp, %%eax\n"
-" jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */
-
-" movl 64(%%esp), %%ecx\n" /* ecx = len */
-" movl %%edi, %%esi\n"
-" subl %%ebp, %%esi\n" /* from = out - dist */
-
-" sarl %%ecx\n"
-" jnc .L_copy_two\n" /* if len % 2 == 0 */
-
-" rep movsw\n"
-" movb (%%esi), %%al\n"
-" movb %%al, (%%edi)\n"
-" incl %%edi\n"
-
-" movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
-" movl 32(%%esp), %%ebp\n" /* ebp = lcode */
-" jmp .L_while_test\n"
-
-".L_copy_two:\n"
-" rep movsw\n"
-" movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
-" movl 32(%%esp), %%ebp\n" /* ebp = lcode */
-" jmp .L_while_test\n"
-
-".align 32,0x90\n"
-".L_check_dist_one:\n"
-" cmpl $1, %%ebp\n" /* if dist 1, is a memset */
-" jne .L_check_window\n"
-" cmpl %%edi, 20(%%esp)\n"
-" je .L_check_window\n" /* out == beg, if outside window */
-
-" movl 64(%%esp), %%ecx\n" /* ecx = len */
-" movb -1(%%edi), %%al\n"
-" movb %%al, %%ah\n"
-
-" sarl %%ecx\n"
-" jnc .L_set_two\n"
-" movb %%al, (%%edi)\n"
-" incl %%edi\n"
-
-".L_set_two:\n"
-" rep stosw\n"
-" movl 32(%%esp), %%ebp\n" /* ebp = lcode */
-" jmp .L_while_test\n"
-
-".align 32,0x90\n"
-".L_test_for_second_level_length:\n"
-" testb $64, %%al\n"
-" jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */
-
-" xorl %%eax, %%eax\n"
-" incl %%eax\n"
-" shll %%cl, %%eax\n"
-" decl %%eax\n"
-" andl %%edx, %%eax\n" /* eax &= hold */
-" addl 64(%%esp), %%eax\n" /* eax += len */
-" movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/
-" jmp .L_dolen\n"
-
-".align 32,0x90\n"
-".L_test_for_second_level_dist:\n"
-" testb $64, %%al\n"
-" jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */
-
-" xorl %%eax, %%eax\n"
-" incl %%eax\n"
-" shll %%cl, %%eax\n"
-" decl %%eax\n"
-" andl %%edx, %%eax\n" /* eax &= hold */
-" addl %%ebp, %%eax\n" /* eax += dist */
-" movl 36(%%esp), %%ecx\n" /* ecx = dcode */
-" movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/
-" jmp .L_dodist\n"
-
-".align 32,0x90\n"
-".L_clip_window:\n"
-" movl %%eax, %%ecx\n"
-" movl 48(%%esp), %%eax\n" /* eax = wsize */
-" negl %%ecx\n" /* nbytes = -nbytes */
-" movl 28(%%esp), %%esi\n" /* from = window */
-
-" cmpl %%ebp, %%eax\n"
-" jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */
-
-" addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */
-" cmpl $0, 52(%%esp)\n"
-" jne .L_wrap_around_window\n" /* if (write != 0) */
-
-" subl %%ecx, %%eax\n"
-" addl %%eax, %%esi\n" /* from += wsize - nbytes */
-
-" movl 64(%%esp), %%eax\n" /* eax = len */
-" cmpl %%ecx, %%eax\n"
-" jbe .L_do_copy\n" /* if (nbytes >= len) */
-
-" subl %%ecx, %%eax\n" /* len -= nbytes */
-" rep movsb\n"
-" movl %%edi, %%esi\n"
-" subl %%ebp, %%esi\n" /* from = out - dist */
-" jmp .L_do_copy\n"
-
-".align 32,0x90\n"
-".L_wrap_around_window:\n"
-" movl 52(%%esp), %%eax\n" /* eax = write */
-" cmpl %%eax, %%ecx\n"
-" jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */
-
-" addl 48(%%esp), %%esi\n" /* from += wsize */
-" addl %%eax, %%esi\n" /* from += write */
-" subl %%ecx, %%esi\n" /* from -= nbytes */
-" subl %%eax, %%ecx\n" /* nbytes -= write */
-
-" movl 64(%%esp), %%eax\n" /* eax = len */
-" cmpl %%ecx, %%eax\n"
-" jbe .L_do_copy\n" /* if (nbytes >= len) */
-
-" subl %%ecx, %%eax\n" /* len -= nbytes */
-" rep movsb\n"
-" movl 28(%%esp), %%esi\n" /* from = window */
-" movl 52(%%esp), %%ecx\n" /* nbytes = write */
-" cmpl %%ecx, %%eax\n"
-" jbe .L_do_copy\n" /* if (nbytes >= len) */
-
-" subl %%ecx, %%eax\n" /* len -= nbytes */
-" rep movsb\n"
-" movl %%edi, %%esi\n"
-" subl %%ebp, %%esi\n" /* from = out - dist */
-" jmp .L_do_copy\n"
-
-".align 32,0x90\n"
-".L_contiguous_in_window:\n"
-" addl %%eax, %%esi\n"
-" subl %%ecx, %%esi\n" /* from += write - nbytes */
-
-" movl 64(%%esp), %%eax\n" /* eax = len */
-" cmpl %%ecx, %%eax\n"
-" jbe .L_do_copy\n" /* if (nbytes >= len) */
-
-" subl %%ecx, %%eax\n" /* len -= nbytes */
-" rep movsb\n"
-" movl %%edi, %%esi\n"
-" subl %%ebp, %%esi\n" /* from = out - dist */
-" jmp .L_do_copy\n" /* if (nbytes >= len) */
-
-".align 32,0x90\n"
-".L_do_copy:\n"
-" movl %%eax, %%ecx\n"
-" rep movsb\n"
-
-" movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */
-" movl 32(%%esp), %%ebp\n" /* ebp = lcode */
-" jmp .L_while_test\n"
-
-".L_test_for_end_of_block:\n"
-" testb $32, %%al\n"
-" jz .L_invalid_literal_length_code\n"
-" movl $1, 72(%%esp)\n"
-" jmp .L_break_loop_with_status\n"
-
-".L_invalid_literal_length_code:\n"
-" movl $2, 72(%%esp)\n"
-" jmp .L_break_loop_with_status\n"
-
-".L_invalid_distance_code:\n"
-" movl $3, 72(%%esp)\n"
-" jmp .L_break_loop_with_status\n"
-
-".L_invalid_distance_too_far:\n"
-" movl 8(%%esp), %%esi\n"
-" movl $4, 72(%%esp)\n"
-" jmp .L_break_loop_with_status\n"
-
-".L_break_loop:\n"
-" movl $0, 72(%%esp)\n"
-
-".L_break_loop_with_status:\n"
-/* put in, out, bits, and hold back into ar and pop esp */
-" movl %%esi, 8(%%esp)\n" /* save in */
-" movl %%edi, 16(%%esp)\n" /* save out */
-" movl %%ebx, 44(%%esp)\n" /* save bits */
-" movl %%edx, 40(%%esp)\n" /* save hold */
-" movl 4(%%esp), %%ebp\n" /* restore esp, ebp */
-" movl (%%esp), %%esp\n"
- :
- : "m" (ar)
- : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi"
- );
-#elif defined( _MSC_VER ) && ! defined( _M_AMD64 )
- __asm {
- lea eax, ar
- mov [eax], esp /* save esp, ebp */
- mov [eax+4], ebp
- mov esp, eax
- mov esi, [esp+8] /* esi = in */
- mov edi, [esp+16] /* edi = out */
- mov edx, [esp+40] /* edx = hold */
- mov ebx, [esp+44] /* ebx = bits */
- mov ebp, [esp+32] /* ebp = lcode */
-
- cld
- jmp L_do_loop
-
-ALIGN 4
-L_while_test:
- cmp [esp+24], edi
- jbe L_break_loop
- cmp [esp+12], esi
- jbe L_break_loop
-
-L_do_loop:
- cmp bl, 15
- ja L_get_length_code /* if (15 < bits) */
-
- xor eax, eax
- lodsw /* al = *(ushort *)in++ */
- mov cl, bl /* cl = bits, needs it for shifting */
- add bl, 16 /* bits += 16 */
- shl eax, cl
- or edx, eax /* hold |= *((ushort *)in)++ << bits */
-
-L_get_length_code:
- mov eax, [esp+56] /* eax = lmask */
- and eax, edx /* eax &= hold */
- mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */
-
-L_dolen:
- mov cl, ah /* cl = this.bits */
- sub bl, ah /* bits -= this.bits */
- shr edx, cl /* hold >>= this.bits */
-
- test al, al
- jnz L_test_for_length_base /* if (op != 0) 45.7% */
-
- shr eax, 16 /* output this.val char */
- stosb
- jmp L_while_test
-
-ALIGN 4
-L_test_for_length_base:
- mov ecx, eax /* len = this */
- shr ecx, 16 /* len = this.val */
- mov [esp+64], ecx /* save len */
- mov cl, al
-
- test al, 16
- jz L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
- and cl, 15 /* op &= 15 */
- jz L_decode_distance /* if (!op) */
- cmp bl, cl
- jae L_add_bits_to_len /* if (op <= bits) */
-
- mov ch, cl /* stash op in ch, freeing cl */
- xor eax, eax
- lodsw /* al = *(ushort *)in++ */
- mov cl, bl /* cl = bits, needs it for shifting */
- add bl, 16 /* bits += 16 */
- shl eax, cl
- or edx, eax /* hold |= *((ushort *)in)++ << bits */
- mov cl, ch /* move op back to ecx */
-
-L_add_bits_to_len:
- sub bl, cl
- xor eax, eax
- inc eax
- shl eax, cl
- dec eax
- and eax, edx /* eax &= hold */
- shr edx, cl
- add [esp+64], eax /* len += hold & mask[op] */
-
-L_decode_distance:
- cmp bl, 15
- ja L_get_distance_code /* if (15 < bits) */
-
- xor eax, eax
- lodsw /* al = *(ushort *)in++ */
- mov cl, bl /* cl = bits, needs it for shifting */
- add bl, 16 /* bits += 16 */
- shl eax, cl
- or edx, eax /* hold |= *((ushort *)in)++ << bits */
-
-L_get_distance_code:
- mov eax, [esp+60] /* eax = dmask */
- mov ecx, [esp+36] /* ecx = dcode */
- and eax, edx /* eax &= hold */
- mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */
-
-L_dodist:
- mov ebp, eax /* dist = this */
- shr ebp, 16 /* dist = this.val */
- mov cl, ah
- sub bl, ah /* bits -= this.bits */
- shr edx, cl /* hold >>= this.bits */
- mov cl, al /* cl = this.op */
-
- test al, 16 /* if ((op & 16) == 0) */
- jz L_test_for_second_level_dist
- and cl, 15 /* op &= 15 */
- jz L_check_dist_one
- cmp bl, cl
- jae L_add_bits_to_dist /* if (op <= bits) 97.6% */
-
- mov ch, cl /* stash op in ch, freeing cl */
- xor eax, eax
- lodsw /* al = *(ushort *)in++ */
- mov cl, bl /* cl = bits, needs it for shifting */
- add bl, 16 /* bits += 16 */
- shl eax, cl
- or edx, eax /* hold |= *((ushort *)in)++ << bits */
- mov cl, ch /* move op back to ecx */
-
-L_add_bits_to_dist:
- sub bl, cl
- xor eax, eax
- inc eax
- shl eax, cl
- dec eax /* (1 << op) - 1 */
- and eax, edx /* eax &= hold */
- shr edx, cl
- add ebp, eax /* dist += hold & ((1 << op) - 1) */
-
-L_check_window:
- mov [esp+8], esi /* save in so from can use it's reg */
- mov eax, edi
- sub eax, [esp+20] /* nbytes = out - beg */
-
- cmp eax, ebp
- jb L_clip_window /* if (dist > nbytes) 4.2% */
-
- mov ecx, [esp+64] /* ecx = len */
- mov esi, edi
- sub esi, ebp /* from = out - dist */
-
- sar ecx, 1
- jnc L_copy_two
-
- rep movsw
- mov al, [esi]
- mov [edi], al
- inc edi
-
- mov esi, [esp+8] /* move in back to %esi, toss from */
- mov ebp, [esp+32] /* ebp = lcode */
- jmp L_while_test
-
-L_copy_two:
- rep movsw
- mov esi, [esp+8] /* move in back to %esi, toss from */
- mov ebp, [esp+32] /* ebp = lcode */
- jmp L_while_test
-
-ALIGN 4
-L_check_dist_one:
- cmp ebp, 1 /* if dist 1, is a memset */
- jne L_check_window
- cmp [esp+20], edi
- je L_check_window /* out == beg, if outside window */
-
- mov ecx, [esp+64] /* ecx = len */
- mov al, [edi-1]
- mov ah, al
-
- sar ecx, 1
- jnc L_set_two
- mov [edi], al /* memset out with from[-1] */
- inc edi
-
-L_set_two:
- rep stosw
- mov ebp, [esp+32] /* ebp = lcode */
- jmp L_while_test
-
-ALIGN 4
-L_test_for_second_level_length:
- test al, 64
- jnz L_test_for_end_of_block /* if ((op & 64) != 0) */
-
- xor eax, eax
- inc eax
- shl eax, cl
- dec eax
- and eax, edx /* eax &= hold */
- add eax, [esp+64] /* eax += len */
- mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/
- jmp L_dolen
-
-ALIGN 4
-L_test_for_second_level_dist:
- test al, 64
- jnz L_invalid_distance_code /* if ((op & 64) != 0) */
-
- xor eax, eax
- inc eax
- shl eax, cl
- dec eax
- and eax, edx /* eax &= hold */
- add eax, ebp /* eax += dist */
- mov ecx, [esp+36] /* ecx = dcode */
- mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/
- jmp L_dodist
-
-ALIGN 4
-L_clip_window:
- mov ecx, eax
- mov eax, [esp+48] /* eax = wsize */
- neg ecx /* nbytes = -nbytes */
- mov esi, [esp+28] /* from = window */
-
- cmp eax, ebp
- jb L_invalid_distance_too_far /* if (dist > wsize) */
-
- add ecx, ebp /* nbytes = dist - nbytes */
- cmp dword ptr [esp+52], 0
- jne L_wrap_around_window /* if (write != 0) */
-
- sub eax, ecx
- add esi, eax /* from += wsize - nbytes */
-
- mov eax, [esp+64] /* eax = len */
- cmp eax, ecx
- jbe L_do_copy /* if (nbytes >= len) */
-
- sub eax, ecx /* len -= nbytes */
- rep movsb
- mov esi, edi
- sub esi, ebp /* from = out - dist */
- jmp L_do_copy
-
-ALIGN 4
-L_wrap_around_window:
- mov eax, [esp+52] /* eax = write */
- cmp ecx, eax
- jbe L_contiguous_in_window /* if (write >= nbytes) */
-
- add esi, [esp+48] /* from += wsize */
- add esi, eax /* from += write */
- sub esi, ecx /* from -= nbytes */
- sub ecx, eax /* nbytes -= write */
-
- mov eax, [esp+64] /* eax = len */
- cmp eax, ecx
- jbe L_do_copy /* if (nbytes >= len) */
-
- sub eax, ecx /* len -= nbytes */
- rep movsb
- mov esi, [esp+28] /* from = window */
- mov ecx, [esp+52] /* nbytes = write */
- cmp eax, ecx
- jbe L_do_copy /* if (nbytes >= len) */
-
- sub eax, ecx /* len -= nbytes */
- rep movsb
- mov esi, edi
- sub esi, ebp /* from = out - dist */
- jmp L_do_copy
-
-ALIGN 4
-L_contiguous_in_window:
- add esi, eax
- sub esi, ecx /* from += write - nbytes */
-
- mov eax, [esp+64] /* eax = len */
- cmp eax, ecx
- jbe L_do_copy /* if (nbytes >= len) */
-
- sub eax, ecx /* len -= nbytes */
- rep movsb
- mov esi, edi
- sub esi, ebp /* from = out - dist */
- jmp L_do_copy
-
-ALIGN 4
-L_do_copy:
- mov ecx, eax
- rep movsb
-
- mov esi, [esp+8] /* move in back to %esi, toss from */
- mov ebp, [esp+32] /* ebp = lcode */
- jmp L_while_test
-
-L_test_for_end_of_block:
- test al, 32
- jz L_invalid_literal_length_code
- mov dword ptr [esp+72], 1
- jmp L_break_loop_with_status
-
-L_invalid_literal_length_code:
- mov dword ptr [esp+72], 2
- jmp L_break_loop_with_status
-
-L_invalid_distance_code:
- mov dword ptr [esp+72], 3
- jmp L_break_loop_with_status
-
-L_invalid_distance_too_far:
- mov esi, [esp+4]
- mov dword ptr [esp+72], 4
- jmp L_break_loop_with_status
-
-L_break_loop:
- mov dword ptr [esp+72], 0
-
-L_break_loop_with_status:
-/* put in, out, bits, and hold back into ar and pop esp */
- mov [esp+8], esi /* save in */
- mov [esp+16], edi /* save out */
- mov [esp+44], ebx /* save bits */
- mov [esp+40], edx /* save hold */
- mov ebp, [esp+4] /* restore esp, ebp */
- mov esp, [esp]
- }
-#else
-#error "x86 architecture not defined"
-#endif
-
- if (ar.status > 1) {
- if (ar.status == 2)
- strm->msg = "invalid literal/length code";
- else if (ar.status == 3)
- strm->msg = "invalid distance code";
- else
- strm->msg = "invalid distance too far back";
- state->mode = BAD;
- }
- else if ( ar.status == 1 ) {
- state->mode = TYPE;
- }
-
- /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
- ar.len = ar.bits >> 3;
- ar.in -= ar.len;
- ar.bits -= ar.len << 3;
- ar.hold &= (1U << ar.bits) - 1;
-
- /* update state and return */
- strm->next_in = ar.in;
- strm->next_out = ar.out;
- strm->avail_in = (unsigned)(ar.in < ar.last ?
- PAD_AVAIL_IN + (ar.last - ar.in) :
- PAD_AVAIL_IN - (ar.in - ar.last));
- strm->avail_out = (unsigned)(ar.out < ar.end ?
- PAD_AVAIL_OUT + (ar.end - ar.out) :
- PAD_AVAIL_OUT - (ar.out - ar.end));
- state->hold = ar.hold;
- state->bits = ar.bits;
- return;
-}
-
+++ /dev/null
-/*
- * inffast.S is a hand tuned assembler version of:
- *
- * inffast.c -- fast decoding
- * Copyright (C) 1995-2003 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- *
- * Copyright (C) 2003 Chris Anderson <christop@charm.net>
- * Please use the copyright conditions above.
- *
- * This version (Jan-23-2003) of inflate_fast was coded and tested under
- * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution. On that
- * machine, I found that gzip style archives decompressed about 20% faster than
- * the gcc-3.2 -O3 -fomit-frame-pointer compiled version. Your results will
- * depend on how large of a buffer is used for z_stream.next_in & next_out
- * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in
- * stream processing I/O and crc32/addler32. In my case, this routine used
- * 70% of the cpu time and crc32 used 20%.
- *
- * I am confident that this version will work in the general case, but I have
- * not tested a wide variety of datasets or a wide variety of platforms.
- *
- * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating.
- * It should be a runtime flag instead of compile time flag...
- *
- * Jan-26-2003 -- Added runtime check for MMX support with cpuid instruction.
- * With -DUSE_MMX, only MMX code is compiled. With -DNO_MMX, only non-MMX code
- * is compiled. Without either option, runtime detection is enabled. Runtime
- * detection should work on all modern cpus and the recomended algorithm (flip
- * ID bit on eflags and then use the cpuid instruction) is used in many
- * multimedia applications. Tested under win2k with gcc-2.95 and gas-2.12
- * distributed with cygwin3. Compiling with gcc-2.95 -c inffast.S -o
- * inffast.obj generates a COFF object which can then be linked with MSVC++
- * compiled code. Tested under FreeBSD 4.7 with gcc-2.95.
- *
- * Jan-28-2003 -- Tested Athlon XP... MMX mode is slower than no MMX (and
- * slower than compiler generated code). Adjusted cpuid check to use the MMX
- * code only for Pentiums < P4 until I have more data on the P4. Speed
- * improvment is only about 15% on the Athlon when compared with code generated
- * with MSVC++. Not sure yet, but I think the P4 will also be slower using the
- * MMX mode because many of it's x86 ALU instructions execute in .5 cycles and
- * have less latency than MMX ops. Added code to buffer the last 11 bytes of
- * the input stream since the MMX code grabs bits in chunks of 32, which
- * differs from the inffast.c algorithm. I don't think there would have been
- * read overruns where a page boundary was crossed (a segfault), but there
- * could have been overruns when next_in ends on unaligned memory (unintialized
- * memory read).
- *
- * Mar-13-2003 -- P4 MMX is slightly slower than P4 NO_MMX. I created a C
- * version of the non-MMX code so that it doesn't depend on zstrm and zstate
- * structure offsets which are hard coded in this file. This was last tested
- * with zlib-1.2.0 which is currently in beta testing, newer versions of this
- * and inffas86.c can be found at http://www.eetbeetee.com/zlib/ and
- * http://www.charm.net/~christop/zlib/
- */
-
-
-/*
- * if you have underscore linking problems (_inflate_fast undefined), try
- * using -DGAS_COFF
- */
-#if ! defined( GAS_COFF ) && ! defined( GAS_ELF )
-
-#if defined( WIN32 ) || defined( __CYGWIN__ )
-#define GAS_COFF /* windows object format */
-#else
-#define GAS_ELF
-#endif
-
-#endif /* ! GAS_COFF && ! GAS_ELF */
-
-
-#if defined( GAS_COFF )
-
-/* coff externals have underscores */
-#define inflate_fast _inflate_fast
-#define inflate_fast_use_mmx _inflate_fast_use_mmx
-
-#endif /* GAS_COFF */
-
-
-.file "inffast.S"
-
-.globl inflate_fast
-
-.text
-.align 4,0
-.L_invalid_literal_length_code_msg:
-.string "invalid literal/length code"
-
-.align 4,0
-.L_invalid_distance_code_msg:
-.string "invalid distance code"
-
-.align 4,0
-.L_invalid_distance_too_far_msg:
-.string "invalid distance too far back"
-
-#if ! defined( NO_MMX )
-.align 4,0
-.L_mask: /* mask[N] = ( 1 << N ) - 1 */
-.long 0
-.long 1
-.long 3
-.long 7
-.long 15
-.long 31
-.long 63
-.long 127
-.long 255
-.long 511
-.long 1023
-.long 2047
-.long 4095
-.long 8191
-.long 16383
-.long 32767
-.long 65535
-.long 131071
-.long 262143
-.long 524287
-.long 1048575
-.long 2097151
-.long 4194303
-.long 8388607
-.long 16777215
-.long 33554431
-.long 67108863
-.long 134217727
-.long 268435455
-.long 536870911
-.long 1073741823
-.long 2147483647
-.long 4294967295
-#endif /* NO_MMX */
-
-.text
-
-/*
- * struct z_stream offsets, in zlib.h
- */
-#define next_in_strm 0 /* strm->next_in */
-#define avail_in_strm 4 /* strm->avail_in */
-#define next_out_strm 12 /* strm->next_out */
-#define avail_out_strm 16 /* strm->avail_out */
-#define msg_strm 24 /* strm->msg */
-#define state_strm 28 /* strm->state */
-
-/*
- * struct inflate_state offsets, in inflate.h
- */
-#define mode_state 0 /* state->mode */
-#define wsize_state 32 /* state->wsize */
-#define write_state 40 /* state->write */
-#define window_state 44 /* state->window */
-#define hold_state 48 /* state->hold */
-#define bits_state 52 /* state->bits */
-#define lencode_state 68 /* state->lencode */
-#define distcode_state 72 /* state->distcode */
-#define lenbits_state 76 /* state->lenbits */
-#define distbits_state 80 /* state->distbits */
-
-/*
- * inflate_fast's activation record
- */
-#define local_var_size 64 /* how much local space for vars */
-#define strm_sp 88 /* first arg: z_stream * (local_var_size + 24) */
-#define start_sp 92 /* second arg: unsigned int (local_var_size + 28) */
-
-/*
- * offsets for local vars on stack
- */
-#define out 60 /* unsigned char* */
-#define window 56 /* unsigned char* */
-#define wsize 52 /* unsigned int */
-#define write 48 /* unsigned int */
-#define in 44 /* unsigned char* */
-#define beg 40 /* unsigned char* */
-#define buf 28 /* char[ 12 ] */
-#define len 24 /* unsigned int */
-#define last 20 /* unsigned char* */
-#define end 16 /* unsigned char* */
-#define dcode 12 /* code* */
-#define lcode 8 /* code* */
-#define dmask 4 /* unsigned int */
-#define lmask 0 /* unsigned int */
-
-/*
- * typedef enum inflate_mode consts, in inflate.h
- */
-#define INFLATE_MODE_TYPE 11 /* state->mode flags enum-ed in inflate.h */
-#define INFLATE_MODE_BAD 26
-
-
-#if ! defined( USE_MMX ) && ! defined( NO_MMX )
-
-#define RUN_TIME_MMX
-
-#define CHECK_MMX 1
-#define DO_USE_MMX 2
-#define DONT_USE_MMX 3
-
-.globl inflate_fast_use_mmx
-
-.data
-
-.align 4,0
-inflate_fast_use_mmx: /* integer flag for run time control 1=check,2=mmx,3=no */
-.long CHECK_MMX
-
-#if defined( GAS_ELF )
-/* elf info */
-.type inflate_fast_use_mmx,@object
-.size inflate_fast_use_mmx,4
-#endif
-
-#endif /* RUN_TIME_MMX */
-
-#if defined( GAS_COFF )
-/* coff info: scl 2 = extern, type 32 = function */
-.def inflate_fast; .scl 2; .type 32; .endef
-#endif
-
-.text
-
-.align 32,0x90
-inflate_fast:
- pushl %edi
- pushl %esi
- pushl %ebp
- pushl %ebx
- pushf /* save eflags (strm_sp, state_sp assumes this is 32 bits) */
- subl $local_var_size, %esp
- cld
-
-#define strm_r %esi
-#define state_r %edi
-
- movl strm_sp(%esp), strm_r
- movl state_strm(strm_r), state_r
-
- /* in = strm->next_in;
- * out = strm->next_out;
- * last = in + strm->avail_in - 11;
- * beg = out - (start - strm->avail_out);
- * end = out + (strm->avail_out - 257);
- */
- movl avail_in_strm(strm_r), %edx
- movl next_in_strm(strm_r), %eax
-
- addl %eax, %edx /* avail_in += next_in */
- subl $11, %edx /* avail_in -= 11 */
-
- movl %eax, in(%esp)
- movl %edx, last(%esp)
-
- movl start_sp(%esp), %ebp
- movl avail_out_strm(strm_r), %ecx
- movl next_out_strm(strm_r), %ebx
-
- subl %ecx, %ebp /* start -= avail_out */
- negl %ebp /* start = -start */
- addl %ebx, %ebp /* start += next_out */
-
- subl $257, %ecx /* avail_out -= 257 */
- addl %ebx, %ecx /* avail_out += out */
-
- movl %ebx, out(%esp)
- movl %ebp, beg(%esp)
- movl %ecx, end(%esp)
-
- /* wsize = state->wsize;
- * write = state->write;
- * window = state->window;
- * hold = state->hold;
- * bits = state->bits;
- * lcode = state->lencode;
- * dcode = state->distcode;
- * lmask = ( 1 << state->lenbits ) - 1;
- * dmask = ( 1 << state->distbits ) - 1;
- */
-
- movl lencode_state(state_r), %eax
- movl distcode_state(state_r), %ecx
-
- movl %eax, lcode(%esp)
- movl %ecx, dcode(%esp)
-
- movl $1, %eax
- movl lenbits_state(state_r), %ecx
- shll %cl, %eax
- decl %eax
- movl %eax, lmask(%esp)
-
- movl $1, %eax
- movl distbits_state(state_r), %ecx
- shll %cl, %eax
- decl %eax
- movl %eax, dmask(%esp)
-
- movl wsize_state(state_r), %eax
- movl write_state(state_r), %ecx
- movl window_state(state_r), %edx
-
- movl %eax, wsize(%esp)
- movl %ecx, write(%esp)
- movl %edx, window(%esp)
-
- movl hold_state(state_r), %ebp
- movl bits_state(state_r), %ebx
-
-#undef strm_r
-#undef state_r
-
-#define in_r %esi
-#define from_r %esi
-#define out_r %edi
-
- movl in(%esp), in_r
- movl last(%esp), %ecx
- cmpl in_r, %ecx
- ja .L_align_long /* if in < last */
-
- addl $11, %ecx /* ecx = &in[ avail_in ] */
- subl in_r, %ecx /* ecx = avail_in */
- movl $12, %eax
- subl %ecx, %eax /* eax = 12 - avail_in */
- leal buf(%esp), %edi
- rep movsb /* memcpy( buf, in, avail_in ) */
- movl %eax, %ecx
- xorl %eax, %eax
- rep stosb /* memset( &buf[ avail_in ], 0, 12 - avail_in ) */
- leal buf(%esp), in_r /* in = buf */
- movl in_r, last(%esp) /* last = in, do just one iteration */
- jmp .L_is_aligned
-
- /* align in_r on long boundary */
-.L_align_long:
- testl $3, in_r
- jz .L_is_aligned
- xorl %eax, %eax
- movb (in_r), %al
- incl in_r
- movl %ebx, %ecx
- addl $8, %ebx
- shll %cl, %eax
- orl %eax, %ebp
- jmp .L_align_long
-
-.L_is_aligned:
- movl out(%esp), out_r
-
-#if defined( NO_MMX )
- jmp .L_do_loop
-#endif
-
-#if defined( USE_MMX )
- jmp .L_init_mmx
-#endif
-
-/*** Runtime MMX check ***/
-
-#if defined( RUN_TIME_MMX )
-.L_check_mmx:
- cmpl $DO_USE_MMX, inflate_fast_use_mmx
- je .L_init_mmx
- ja .L_do_loop /* > 2 */
-
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- pushf
- movl (%esp), %eax /* copy eflags to eax */
- xorl $0x200000, (%esp) /* try toggling ID bit of eflags (bit 21)
- * to see if cpu supports cpuid...
- * ID bit method not supported by NexGen but
- * bios may load a cpuid instruction and
- * cpuid may be disabled on Cyrix 5-6x86 */
- popf
- pushf
- popl %edx /* copy new eflags to edx */
- xorl %eax, %edx /* test if ID bit is flipped */
- jz .L_dont_use_mmx /* not flipped if zero */
- xorl %eax, %eax
- cpuid
- cmpl $0x756e6547, %ebx /* check for GenuineIntel in ebx,ecx,edx */
- jne .L_dont_use_mmx
- cmpl $0x6c65746e, %ecx
- jne .L_dont_use_mmx
- cmpl $0x49656e69, %edx
- jne .L_dont_use_mmx
- movl $1, %eax
- cpuid /* get cpu features */
- shrl $8, %eax
- andl $15, %eax
- cmpl $6, %eax /* check for Pentium family, is 0xf for P4 */
- jne .L_dont_use_mmx
- testl $0x800000, %edx /* test if MMX feature is set (bit 23) */
- jnz .L_use_mmx
- jmp .L_dont_use_mmx
-.L_use_mmx:
- movl $DO_USE_MMX, inflate_fast_use_mmx
- jmp .L_check_mmx_pop
-.L_dont_use_mmx:
- movl $DONT_USE_MMX, inflate_fast_use_mmx
-.L_check_mmx_pop:
- popl %edx
- popl %ecx
- popl %ebx
- popl %eax
- jmp .L_check_mmx
-#endif
-
-
-/*** Non-MMX code ***/
-
-#if defined ( NO_MMX ) || defined( RUN_TIME_MMX )
-
-#define hold_r %ebp
-#define bits_r %bl
-#define bitslong_r %ebx
-
-.align 32,0x90
-.L_while_test:
- /* while (in < last && out < end)
- */
- cmpl out_r, end(%esp)
- jbe .L_break_loop /* if (out >= end) */
-
- cmpl in_r, last(%esp)
- jbe .L_break_loop
-
-.L_do_loop:
- /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
- *
- * do {
- * if (bits < 15) {
- * hold |= *((unsigned short *)in)++ << bits;
- * bits += 16
- * }
- * this = lcode[hold & lmask]
- */
- cmpb $15, bits_r
- ja .L_get_length_code /* if (15 < bits) */
-
- xorl %eax, %eax
- lodsw /* al = *(ushort *)in++ */
- movb bits_r, %cl /* cl = bits, needs it for shifting */
- addb $16, bits_r /* bits += 16 */
- shll %cl, %eax
- orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
-
-.L_get_length_code:
- movl lmask(%esp), %edx /* edx = lmask */
- movl lcode(%esp), %ecx /* ecx = lcode */
- andl hold_r, %edx /* edx &= hold */
- movl (%ecx,%edx,4), %eax /* eax = lcode[hold & lmask] */
-
-.L_dolen:
- /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
- *
- * dolen:
- * bits -= this.bits;
- * hold >>= this.bits
- */
- movb %ah, %cl /* cl = this.bits */
- subb %ah, bits_r /* bits -= this.bits */
- shrl %cl, hold_r /* hold >>= this.bits */
-
- /* check if op is a literal
- * if (op == 0) {
- * PUP(out) = this.val;
- * }
- */
- testb %al, %al
- jnz .L_test_for_length_base /* if (op != 0) 45.7% */
-
- shrl $16, %eax /* output this.val char */
- stosb
- jmp .L_while_test
-
-.L_test_for_length_base:
- /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len
- *
- * else if (op & 16) {
- * len = this.val
- * op &= 15
- * if (op) {
- * if (op > bits) {
- * hold |= *((unsigned short *)in)++ << bits;
- * bits += 16
- * }
- * len += hold & mask[op];
- * bits -= op;
- * hold >>= op;
- * }
- */
-#define len_r %edx
- movl %eax, len_r /* len = this */
- shrl $16, len_r /* len = this.val */
- movb %al, %cl
-
- testb $16, %al
- jz .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
- andb $15, %cl /* op &= 15 */
- jz .L_save_len /* if (!op) */
- cmpb %cl, bits_r
- jae .L_add_bits_to_len /* if (op <= bits) */
-
- movb %cl, %ch /* stash op in ch, freeing cl */
- xorl %eax, %eax
- lodsw /* al = *(ushort *)in++ */
- movb bits_r, %cl /* cl = bits, needs it for shifting */
- addb $16, bits_r /* bits += 16 */
- shll %cl, %eax
- orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
- movb %ch, %cl /* move op back to ecx */
-
-.L_add_bits_to_len:
- movl $1, %eax
- shll %cl, %eax
- decl %eax
- subb %cl, bits_r
- andl hold_r, %eax /* eax &= hold */
- shrl %cl, hold_r
- addl %eax, len_r /* len += hold & mask[op] */
-
-.L_save_len:
- movl len_r, len(%esp) /* save len */
-#undef len_r
-
-.L_decode_distance:
- /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist
- *
- * if (bits < 15) {
- * hold |= *((unsigned short *)in)++ << bits;
- * bits += 16
- * }
- * this = dcode[hold & dmask];
- * dodist:
- * bits -= this.bits;
- * hold >>= this.bits;
- * op = this.op;
- */
-
- cmpb $15, bits_r
- ja .L_get_distance_code /* if (15 < bits) */
-
- xorl %eax, %eax
- lodsw /* al = *(ushort *)in++ */
- movb bits_r, %cl /* cl = bits, needs it for shifting */
- addb $16, bits_r /* bits += 16 */
- shll %cl, %eax
- orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
-
-.L_get_distance_code:
- movl dmask(%esp), %edx /* edx = dmask */
- movl dcode(%esp), %ecx /* ecx = dcode */
- andl hold_r, %edx /* edx &= hold */
- movl (%ecx,%edx,4), %eax /* eax = dcode[hold & dmask] */
-
-#define dist_r %edx
-.L_dodist:
- movl %eax, dist_r /* dist = this */
- shrl $16, dist_r /* dist = this.val */
- movb %ah, %cl
- subb %ah, bits_r /* bits -= this.bits */
- shrl %cl, hold_r /* hold >>= this.bits */
-
- /* if (op & 16) {
- * dist = this.val
- * op &= 15
- * if (op > bits) {
- * hold |= *((unsigned short *)in)++ << bits;
- * bits += 16
- * }
- * dist += hold & mask[op];
- * bits -= op;
- * hold >>= op;
- */
- movb %al, %cl /* cl = this.op */
-
- testb $16, %al /* if ((op & 16) == 0) */
- jz .L_test_for_second_level_dist
- andb $15, %cl /* op &= 15 */
- jz .L_check_dist_one
- cmpb %cl, bits_r
- jae .L_add_bits_to_dist /* if (op <= bits) 97.6% */
-
- movb %cl, %ch /* stash op in ch, freeing cl */
- xorl %eax, %eax
- lodsw /* al = *(ushort *)in++ */
- movb bits_r, %cl /* cl = bits, needs it for shifting */
- addb $16, bits_r /* bits += 16 */
- shll %cl, %eax
- orl %eax, hold_r /* hold |= *((ushort *)in)++ << bits */
- movb %ch, %cl /* move op back to ecx */
-
-.L_add_bits_to_dist:
- movl $1, %eax
- shll %cl, %eax
- decl %eax /* (1 << op) - 1 */
- subb %cl, bits_r
- andl hold_r, %eax /* eax &= hold */
- shrl %cl, hold_r
- addl %eax, dist_r /* dist += hold & ((1 << op) - 1) */
- jmp .L_check_window
-
-.L_check_window:
- /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
- * %ecx = nbytes
- *
- * nbytes = out - beg;
- * if (dist <= nbytes) {
- * from = out - dist;
- * do {
- * PUP(out) = PUP(from);
- * } while (--len > 0) {
- * }
- */
-
- movl in_r, in(%esp) /* save in so from can use it's reg */
- movl out_r, %eax
- subl beg(%esp), %eax /* nbytes = out - beg */
-
- cmpl dist_r, %eax
- jb .L_clip_window /* if (dist > nbytes) 4.2% */
-
- movl len(%esp), %ecx
- movl out_r, from_r
- subl dist_r, from_r /* from = out - dist */
-
- subl $3, %ecx
- movb (from_r), %al
- movb %al, (out_r)
- movb 1(from_r), %al
- movb 2(from_r), %dl
- addl $3, from_r
- movb %al, 1(out_r)
- movb %dl, 2(out_r)
- addl $3, out_r
- rep movsb
-
- movl in(%esp), in_r /* move in back to %esi, toss from */
- jmp .L_while_test
-
-.align 16,0x90
-.L_check_dist_one:
- cmpl $1, dist_r
- jne .L_check_window
- cmpl out_r, beg(%esp)
- je .L_check_window
-
- decl out_r
- movl len(%esp), %ecx
- movb (out_r), %al
- subl $3, %ecx
-
- movb %al, 1(out_r)
- movb %al, 2(out_r)
- movb %al, 3(out_r)
- addl $4, out_r
- rep stosb
-
- jmp .L_while_test
-
-.align 16,0x90
-.L_test_for_second_level_length:
- /* else if ((op & 64) == 0) {
- * this = lcode[this.val + (hold & mask[op])];
- * }
- */
- testb $64, %al
- jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */
-
- movl $1, %eax
- shll %cl, %eax
- decl %eax
- andl hold_r, %eax /* eax &= hold */
- addl %edx, %eax /* eax += this.val */
- movl lcode(%esp), %edx /* edx = lcode */
- movl (%edx,%eax,4), %eax /* eax = lcode[val + (hold&mask[op])] */
- jmp .L_dolen
-
-.align 16,0x90
-.L_test_for_second_level_dist:
- /* else if ((op & 64) == 0) {
- * this = dcode[this.val + (hold & mask[op])];
- * }
- */
- testb $64, %al
- jnz .L_invalid_distance_code /* if ((op & 64) != 0) */
-
- movl $1, %eax
- shll %cl, %eax
- decl %eax
- andl hold_r, %eax /* eax &= hold */
- addl %edx, %eax /* eax += this.val */
- movl dcode(%esp), %edx /* edx = dcode */
- movl (%edx,%eax,4), %eax /* eax = dcode[val + (hold&mask[op])] */
- jmp .L_dodist
-
-.align 16,0x90
-.L_clip_window:
- /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
- * %ecx = nbytes
- *
- * else {
- * if (dist > wsize) {
- * invalid distance
- * }
- * from = window;
- * nbytes = dist - nbytes;
- * if (write == 0) {
- * from += wsize - nbytes;
- */
-#define nbytes_r %ecx
- movl %eax, nbytes_r
- movl wsize(%esp), %eax /* prepare for dist compare */
- negl nbytes_r /* nbytes = -nbytes */
- movl window(%esp), from_r /* from = window */
-
- cmpl dist_r, %eax
- jb .L_invalid_distance_too_far /* if (dist > wsize) */
-
- addl dist_r, nbytes_r /* nbytes = dist - nbytes */
- cmpl $0, write(%esp)
- jne .L_wrap_around_window /* if (write != 0) */
-
- subl nbytes_r, %eax
- addl %eax, from_r /* from += wsize - nbytes */
-
- /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
- * %ecx = nbytes, %eax = len
- *
- * if (nbytes < len) {
- * len -= nbytes;
- * do {
- * PUP(out) = PUP(from);
- * } while (--nbytes);
- * from = out - dist;
- * }
- * }
- */
-#define len_r %eax
- movl len(%esp), len_r
- cmpl nbytes_r, len_r
- jbe .L_do_copy1 /* if (nbytes >= len) */
-
- subl nbytes_r, len_r /* len -= nbytes */
- rep movsb
- movl out_r, from_r
- subl dist_r, from_r /* from = out - dist */
- jmp .L_do_copy1
-
- cmpl nbytes_r, len_r
- jbe .L_do_copy1 /* if (nbytes >= len) */
-
- subl nbytes_r, len_r /* len -= nbytes */
- rep movsb
- movl out_r, from_r
- subl dist_r, from_r /* from = out - dist */
- jmp .L_do_copy1
-
-.L_wrap_around_window:
- /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
- * %ecx = nbytes, %eax = write, %eax = len
- *
- * else if (write < nbytes) {
- * from += wsize + write - nbytes;
- * nbytes -= write;
- * if (nbytes < len) {
- * len -= nbytes;
- * do {
- * PUP(out) = PUP(from);
- * } while (--nbytes);
- * from = window;
- * nbytes = write;
- * if (nbytes < len) {
- * len -= nbytes;
- * do {
- * PUP(out) = PUP(from);
- * } while(--nbytes);
- * from = out - dist;
- * }
- * }
- * }
- */
-#define write_r %eax
- movl write(%esp), write_r
- cmpl write_r, nbytes_r
- jbe .L_contiguous_in_window /* if (write >= nbytes) */
-
- addl wsize(%esp), from_r
- addl write_r, from_r
- subl nbytes_r, from_r /* from += wsize + write - nbytes */
- subl write_r, nbytes_r /* nbytes -= write */
-#undef write_r
-
- movl len(%esp), len_r
- cmpl nbytes_r, len_r
- jbe .L_do_copy1 /* if (nbytes >= len) */
-
- subl nbytes_r, len_r /* len -= nbytes */
- rep movsb
- movl window(%esp), from_r /* from = window */
- movl write(%esp), nbytes_r /* nbytes = write */
- cmpl nbytes_r, len_r
- jbe .L_do_copy1 /* if (nbytes >= len) */
-
- subl nbytes_r, len_r /* len -= nbytes */
- rep movsb
- movl out_r, from_r
- subl dist_r, from_r /* from = out - dist */
- jmp .L_do_copy1
-
-.L_contiguous_in_window:
- /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
- * %ecx = nbytes, %eax = write, %eax = len
- *
- * else {
- * from += write - nbytes;
- * if (nbytes < len) {
- * len -= nbytes;
- * do {
- * PUP(out) = PUP(from);
- * } while (--nbytes);
- * from = out - dist;
- * }
- * }
- */
-#define write_r %eax
- addl write_r, from_r
- subl nbytes_r, from_r /* from += write - nbytes */
-#undef write_r
-
- movl len(%esp), len_r
- cmpl nbytes_r, len_r
- jbe .L_do_copy1 /* if (nbytes >= len) */
-
- subl nbytes_r, len_r /* len -= nbytes */
- rep movsb
- movl out_r, from_r
- subl dist_r, from_r /* from = out - dist */
-
-.L_do_copy1:
- /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out
- * %eax = len
- *
- * while (len > 0) {
- * PUP(out) = PUP(from);
- * len--;
- * }
- * }
- * } while (in < last && out < end);
- */
-#undef nbytes_r
-#define in_r %esi
- movl len_r, %ecx
- rep movsb
-
- movl in(%esp), in_r /* move in back to %esi, toss from */
- jmp .L_while_test
-
-#undef len_r
-#undef dist_r
-
-#endif /* NO_MMX || RUN_TIME_MMX */
-
-
-/*** MMX code ***/
-
-#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
-
-.align 32,0x90
-.L_init_mmx:
- emms
-
-#undef bits_r
-#undef bitslong_r
-#define bitslong_r %ebp
-#define hold_mm %mm0
- movd %ebp, hold_mm
- movl %ebx, bitslong_r
-
-#define used_mm %mm1
-#define dmask2_mm %mm2
-#define lmask2_mm %mm3
-#define lmask_mm %mm4
-#define dmask_mm %mm5
-#define tmp_mm %mm6
-
- movd lmask(%esp), lmask_mm
- movq lmask_mm, lmask2_mm
- movd dmask(%esp), dmask_mm
- movq dmask_mm, dmask2_mm
- pxor used_mm, used_mm
- movl lcode(%esp), %ebx /* ebx = lcode */
- jmp .L_do_loop_mmx
-
-.align 32,0x90
-.L_while_test_mmx:
- /* while (in < last && out < end)
- */
- cmpl out_r, end(%esp)
- jbe .L_break_loop /* if (out >= end) */
-
- cmpl in_r, last(%esp)
- jbe .L_break_loop
-
-.L_do_loop_mmx:
- psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
-
- cmpl $32, bitslong_r
- ja .L_get_length_code_mmx /* if (32 < bits) */
-
- movd bitslong_r, tmp_mm
- movd (in_r), %mm7
- addl $4, in_r
- psllq tmp_mm, %mm7
- addl $32, bitslong_r
- por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */
-
-.L_get_length_code_mmx:
- pand hold_mm, lmask_mm
- movd lmask_mm, %eax
- movq lmask2_mm, lmask_mm
- movl (%ebx,%eax,4), %eax /* eax = lcode[hold & lmask] */
-
-.L_dolen_mmx:
- movzbl %ah, %ecx /* ecx = this.bits */
- movd %ecx, used_mm
- subl %ecx, bitslong_r /* bits -= this.bits */
-
- testb %al, %al
- jnz .L_test_for_length_base_mmx /* if (op != 0) 45.7% */
-
- shrl $16, %eax /* output this.val char */
- stosb
- jmp .L_while_test_mmx
-
-.L_test_for_length_base_mmx:
-#define len_r %edx
- movl %eax, len_r /* len = this */
- shrl $16, len_r /* len = this.val */
-
- testb $16, %al
- jz .L_test_for_second_level_length_mmx /* if ((op & 16) == 0) 8% */
- andl $15, %eax /* op &= 15 */
- jz .L_decode_distance_mmx /* if (!op) */
-
- psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
- movd %eax, used_mm
- movd hold_mm, %ecx
- subl %eax, bitslong_r
- andl .L_mask(,%eax,4), %ecx
- addl %ecx, len_r /* len += hold & mask[op] */
-
-.L_decode_distance_mmx:
- psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
-
- cmpl $32, bitslong_r
- ja .L_get_dist_code_mmx /* if (32 < bits) */
-
- movd bitslong_r, tmp_mm
- movd (in_r), %mm7
- addl $4, in_r
- psllq tmp_mm, %mm7
- addl $32, bitslong_r
- por %mm7, hold_mm /* hold_mm |= *((uint *)in)++ << bits */
-
-.L_get_dist_code_mmx:
- movl dcode(%esp), %ebx /* ebx = dcode */
- pand hold_mm, dmask_mm
- movd dmask_mm, %eax
- movq dmask2_mm, dmask_mm
- movl (%ebx,%eax,4), %eax /* eax = dcode[hold & lmask] */
-
-.L_dodist_mmx:
-#define dist_r %ebx
- movzbl %ah, %ecx /* ecx = this.bits */
- movl %eax, dist_r
- shrl $16, dist_r /* dist = this.val */
- subl %ecx, bitslong_r /* bits -= this.bits */
- movd %ecx, used_mm
-
- testb $16, %al /* if ((op & 16) == 0) */
- jz .L_test_for_second_level_dist_mmx
- andl $15, %eax /* op &= 15 */
- jz .L_check_dist_one_mmx
-
-.L_add_bits_to_dist_mmx:
- psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
- movd %eax, used_mm /* save bit length of current op */
- movd hold_mm, %ecx /* get the next bits on input stream */
- subl %eax, bitslong_r /* bits -= op bits */
- andl .L_mask(,%eax,4), %ecx /* ecx = hold & mask[op] */
- addl %ecx, dist_r /* dist += hold & mask[op] */
-
-.L_check_window_mmx:
- movl in_r, in(%esp) /* save in so from can use it's reg */
- movl out_r, %eax
- subl beg(%esp), %eax /* nbytes = out - beg */
-
- cmpl dist_r, %eax
- jb .L_clip_window_mmx /* if (dist > nbytes) 4.2% */
-
- movl len_r, %ecx
- movl out_r, from_r
- subl dist_r, from_r /* from = out - dist */
-
- subl $3, %ecx
- movb (from_r), %al
- movb %al, (out_r)
- movb 1(from_r), %al
- movb 2(from_r), %dl
- addl $3, from_r
- movb %al, 1(out_r)
- movb %dl, 2(out_r)
- addl $3, out_r
- rep movsb
-
- movl in(%esp), in_r /* move in back to %esi, toss from */
- movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
- jmp .L_while_test_mmx
-
-.align 16,0x90
-.L_check_dist_one_mmx:
- cmpl $1, dist_r
- jne .L_check_window_mmx
- cmpl out_r, beg(%esp)
- je .L_check_window_mmx
-
- decl out_r
- movl len_r, %ecx
- movb (out_r), %al
- subl $3, %ecx
-
- movb %al, 1(out_r)
- movb %al, 2(out_r)
- movb %al, 3(out_r)
- addl $4, out_r
- rep stosb
-
- movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
- jmp .L_while_test_mmx
-
-.align 16,0x90
-.L_test_for_second_level_length_mmx:
- testb $64, %al
- jnz .L_test_for_end_of_block /* if ((op & 64) != 0) */
-
- andl $15, %eax
- psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
- movd hold_mm, %ecx
- andl .L_mask(,%eax,4), %ecx
- addl len_r, %ecx
- movl (%ebx,%ecx,4), %eax /* eax = lcode[hold & lmask] */
- jmp .L_dolen_mmx
-
-.align 16,0x90
-.L_test_for_second_level_dist_mmx:
- testb $64, %al
- jnz .L_invalid_distance_code /* if ((op & 64) != 0) */
-
- andl $15, %eax
- psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
- movd hold_mm, %ecx
- andl .L_mask(,%eax,4), %ecx
- movl dcode(%esp), %eax /* ecx = dcode */
- addl dist_r, %ecx
- movl (%eax,%ecx,4), %eax /* eax = lcode[hold & lmask] */
- jmp .L_dodist_mmx
-
-.align 16,0x90
-.L_clip_window_mmx:
-#define nbytes_r %ecx
- movl %eax, nbytes_r
- movl wsize(%esp), %eax /* prepare for dist compare */
- negl nbytes_r /* nbytes = -nbytes */
- movl window(%esp), from_r /* from = window */
-
- cmpl dist_r, %eax
- jb .L_invalid_distance_too_far /* if (dist > wsize) */
-
- addl dist_r, nbytes_r /* nbytes = dist - nbytes */
- cmpl $0, write(%esp)
- jne .L_wrap_around_window_mmx /* if (write != 0) */
-
- subl nbytes_r, %eax
- addl %eax, from_r /* from += wsize - nbytes */
-
- cmpl nbytes_r, len_r
- jbe .L_do_copy1_mmx /* if (nbytes >= len) */
-
- subl nbytes_r, len_r /* len -= nbytes */
- rep movsb
- movl out_r, from_r
- subl dist_r, from_r /* from = out - dist */
- jmp .L_do_copy1_mmx
-
- cmpl nbytes_r, len_r
- jbe .L_do_copy1_mmx /* if (nbytes >= len) */
-
- subl nbytes_r, len_r /* len -= nbytes */
- rep movsb
- movl out_r, from_r
- subl dist_r, from_r /* from = out - dist */
- jmp .L_do_copy1_mmx
-
-.L_wrap_around_window_mmx:
-#define write_r %eax
- movl write(%esp), write_r
- cmpl write_r, nbytes_r
- jbe .L_contiguous_in_window_mmx /* if (write >= nbytes) */
-
- addl wsize(%esp), from_r
- addl write_r, from_r
- subl nbytes_r, from_r /* from += wsize + write - nbytes */
- subl write_r, nbytes_r /* nbytes -= write */
-#undef write_r
-
- cmpl nbytes_r, len_r
- jbe .L_do_copy1_mmx /* if (nbytes >= len) */
-
- subl nbytes_r, len_r /* len -= nbytes */
- rep movsb
- movl window(%esp), from_r /* from = window */
- movl write(%esp), nbytes_r /* nbytes = write */
- cmpl nbytes_r, len_r
- jbe .L_do_copy1_mmx /* if (nbytes >= len) */
-
- subl nbytes_r, len_r /* len -= nbytes */
- rep movsb
- movl out_r, from_r
- subl dist_r, from_r /* from = out - dist */
- jmp .L_do_copy1_mmx
-
-.L_contiguous_in_window_mmx:
-#define write_r %eax
- addl write_r, from_r
- subl nbytes_r, from_r /* from += write - nbytes */
-#undef write_r
-
- cmpl nbytes_r, len_r
- jbe .L_do_copy1_mmx /* if (nbytes >= len) */
-
- subl nbytes_r, len_r /* len -= nbytes */
- rep movsb
- movl out_r, from_r
- subl dist_r, from_r /* from = out - dist */
-
-.L_do_copy1_mmx:
-#undef nbytes_r
-#define in_r %esi
- movl len_r, %ecx
- rep movsb
-
- movl in(%esp), in_r /* move in back to %esi, toss from */
- movl lcode(%esp), %ebx /* move lcode back to %ebx, toss dist */
- jmp .L_while_test_mmx
-
-#undef hold_r
-#undef bitslong_r
-
-#endif /* USE_MMX || RUN_TIME_MMX */
-
-
-/*** USE_MMX, NO_MMX, and RUNTIME_MMX from here on ***/
-
-.L_invalid_distance_code:
- /* else {
- * strm->msg = "invalid distance code";
- * state->mode = BAD;
- * }
- */
- movl $.L_invalid_distance_code_msg, %ecx
- movl $INFLATE_MODE_BAD, %edx
- jmp .L_update_stream_state
-
-.L_test_for_end_of_block:
- /* else if (op & 32) {
- * state->mode = TYPE;
- * break;
- * }
- */
- testb $32, %al
- jz .L_invalid_literal_length_code /* if ((op & 32) == 0) */
-
- movl $0, %ecx
- movl $INFLATE_MODE_TYPE, %edx
- jmp .L_update_stream_state
-
-.L_invalid_literal_length_code:
- /* else {
- * strm->msg = "invalid literal/length code";
- * state->mode = BAD;
- * }
- */
- movl $.L_invalid_literal_length_code_msg, %ecx
- movl $INFLATE_MODE_BAD, %edx
- jmp .L_update_stream_state
-
-.L_invalid_distance_too_far:
- /* strm->msg = "invalid distance too far back";
- * state->mode = BAD;
- */
- movl in(%esp), in_r /* from_r has in's reg, put in back */
- movl $.L_invalid_distance_too_far_msg, %ecx
- movl $INFLATE_MODE_BAD, %edx
- jmp .L_update_stream_state
-
-.L_update_stream_state:
- /* set strm->msg = %ecx, strm->state->mode = %edx */
- movl strm_sp(%esp), %eax
- testl %ecx, %ecx /* if (msg != NULL) */
- jz .L_skip_msg
- movl %ecx, msg_strm(%eax) /* strm->msg = msg */
-.L_skip_msg:
- movl state_strm(%eax), %eax /* state = strm->state */
- movl %edx, mode_state(%eax) /* state->mode = edx (BAD | TYPE) */
- jmp .L_break_loop
-
-.align 32,0x90
-.L_break_loop:
-
-/*
- * Regs:
- *
- * bits = %ebp when mmx, and in %ebx when non-mmx
- * hold = %hold_mm when mmx, and in %ebp when non-mmx
- * in = %esi
- * out = %edi
- */
-
-#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
-
-#if defined( RUN_TIME_MMX )
-
- cmpl $DO_USE_MMX, inflate_fast_use_mmx
- jne .L_update_next_in
-
-#endif /* RUN_TIME_MMX */
-
- movl %ebp, %ebx
-
-.L_update_next_in:
-
-#endif
-
-#define strm_r %eax
-#define state_r %edx
-
- /* len = bits >> 3;
- * in -= len;
- * bits -= len << 3;
- * hold &= (1U << bits) - 1;
- * state->hold = hold;
- * state->bits = bits;
- * strm->next_in = in;
- * strm->next_out = out;
- */
- movl strm_sp(%esp), strm_r
- movl %ebx, %ecx
- movl state_strm(strm_r), state_r
- shrl $3, %ecx
- subl %ecx, in_r
- shll $3, %ecx
- subl %ecx, %ebx
- movl out_r, next_out_strm(strm_r)
- movl %ebx, bits_state(state_r)
- movl %ebx, %ecx
-
- leal buf(%esp), %ebx
- cmpl %ebx, last(%esp)
- jne .L_buf_not_used /* if buf != last */
-
- subl %ebx, in_r /* in -= buf */
- movl next_in_strm(strm_r), %ebx
- movl %ebx, last(%esp) /* last = strm->next_in */
- addl %ebx, in_r /* in += strm->next_in */
- movl avail_in_strm(strm_r), %ebx
- subl $11, %ebx
- addl %ebx, last(%esp) /* last = &strm->next_in[ avail_in - 11 ] */
-
-.L_buf_not_used:
- movl in_r, next_in_strm(strm_r)
-
- movl $1, %ebx
- shll %cl, %ebx
- decl %ebx
-
-#if defined( USE_MMX ) || defined( RUN_TIME_MMX )
-
-#if defined( RUN_TIME_MMX )
-
- cmpl $DO_USE_MMX, inflate_fast_use_mmx
- jne .L_update_hold
-
-#endif /* RUN_TIME_MMX */
-
- psrlq used_mm, hold_mm /* hold_mm >>= last bit length */
- movd hold_mm, %ebp
-
- emms
-
-.L_update_hold:
-
-#endif /* USE_MMX || RUN_TIME_MMX */
-
- andl %ebx, %ebp
- movl %ebp, hold_state(state_r)
-
-#define last_r %ebx
-
- /* strm->avail_in = in < last ? 11 + (last - in) : 11 - (in - last) */
- movl last(%esp), last_r
- cmpl in_r, last_r
- jbe .L_last_is_smaller /* if (in >= last) */
-
- subl in_r, last_r /* last -= in */
- addl $11, last_r /* last += 11 */
- movl last_r, avail_in_strm(strm_r)
- jmp .L_fixup_out
-.L_last_is_smaller:
- subl last_r, in_r /* in -= last */
- negl in_r /* in = -in */
- addl $11, in_r /* in += 11 */
- movl in_r, avail_in_strm(strm_r)
-
-#undef last_r
-#define end_r %ebx
-
-.L_fixup_out:
- /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/
- movl end(%esp), end_r
- cmpl out_r, end_r
- jbe .L_end_is_smaller /* if (out >= end) */
-
- subl out_r, end_r /* end -= out */
- addl $257, end_r /* end += 257 */
- movl end_r, avail_out_strm(strm_r)
- jmp .L_done
-.L_end_is_smaller:
- subl end_r, out_r /* out -= end */
- negl out_r /* out = -out */
- addl $257, out_r /* out += 257 */
- movl out_r, avail_out_strm(strm_r)
-
-#undef end_r
-#undef strm_r
-#undef state_r
-
-.L_done:
- addl $local_var_size, %esp
- popf
- popl %ebx
- popl %ebp
- popl %esi
- popl %edi
- ret
-
-#if defined( GAS_ELF )
-/* elf info */
-.type inflate_fast,@function
-.size inflate_fast,.-inflate_fast
-#endif
+++ /dev/null
-ml64.exe /Flinffasx64 /c /Zi inffasx64.asm\r
-ml64.exe /Flgvmat64 /c /Zi gvmat64.asm\r
+++ /dev/null
-;uInt longest_match_x64(\r
-; deflate_state *s,\r
-; IPos cur_match); /* current match */\r
-\r
-; gvmat64.asm -- Asm portion of the optimized longest_match for 32 bits x86_64\r
-; (AMD64 on Athlon 64, Opteron, Phenom\r
-; and Intel EM64T on Pentium 4 with EM64T, Pentium D, Core 2 Duo, Core I5/I7)\r
-; Copyright (C) 1995-2010 Jean-loup Gailly, Brian Raiter and Gilles Vollant.\r
-;\r
-; File written by Gilles Vollant, by converting to assembly the longest_match\r
-; from Jean-loup Gailly in deflate.c of zLib and infoZip zip.\r
-;\r
-; and by taking inspiration on asm686 with masm, optimised assembly code\r
-; from Brian Raiter, written 1998\r
-;\r
-; This software is provided 'as-is', without any express or implied\r
-; warranty. In no event will the authors be held liable for any damages\r
-; arising from the use of this software.\r
-;\r
-; Permission is granted to anyone to use this software for any purpose,\r
-; including commercial applications, and to alter it and redistribute it\r
-; freely, subject to the following restrictions:\r
-;\r
-; 1. The origin of this software must not be misrepresented; you must not\r
-; claim that you wrote the original software. If you use this software\r
-; in a product, an acknowledgment in the product documentation would be\r
-; appreciated but is not required.\r
-; 2. Altered source versions must be plainly marked as such, and must not be\r
-; misrepresented as being the original software\r
-; 3. This notice may not be removed or altered from any source distribution.\r
-;\r
-;\r
-;\r
-; http://www.zlib.net\r
-; http://www.winimage.com/zLibDll\r
-; http://www.muppetlabs.com/~breadbox/software/assembly.html\r
-;\r
-; to compile this file for infozip Zip, I use option:\r
-; ml64.exe /Flgvmat64 /c /Zi /DINFOZIP gvmat64.asm\r
-;\r
-; to compile this file for zLib, I use option:\r
-; ml64.exe /Flgvmat64 /c /Zi gvmat64.asm\r
-; Be carrefull to adapt zlib1222add below to your version of zLib\r
-; (if you use a version of zLib before 1.0.4 or after 1.2.2.2, change\r
-; value of zlib1222add later)\r
-;\r
-; This file compile with Microsoft Macro Assembler (x64) for AMD64\r
-;\r
-; ml64.exe is given with Visual Studio 2005/2008/2010 and Windows WDK\r
-;\r
-; (you can get Windows WDK with ml64 for AMD64 from\r
-; http://www.microsoft.com/whdc/Devtools/wdk/default.mspx for low price)\r
-;\r
-\r
-\r
-;uInt longest_match(s, cur_match)\r
-; deflate_state *s;\r
-; IPos cur_match; /* current match */\r
-.code\r
-longest_match PROC\r
-\r
-\r
-;LocalVarsSize equ 88\r
- LocalVarsSize equ 72\r
-\r
-; register used : rax,rbx,rcx,rdx,rsi,rdi,r8,r9,r10,r11,r12\r
-; free register : r14,r15\r
-; register can be saved : rsp\r
-\r
- chainlenwmask equ rsp + 8 - LocalVarsSize ; high word: current chain len\r
- ; low word: s->wmask\r
-;window equ rsp + xx - LocalVarsSize ; local copy of s->window ; stored in r10\r
-;windowbestlen equ rsp + xx - LocalVarsSize ; s->window + bestlen , use r10+r11\r
-;scanstart equ rsp + xx - LocalVarsSize ; first two bytes of string ; stored in r12w\r
-;scanend equ rsp + xx - LocalVarsSize ; last two bytes of string use ebx\r
-;scanalign equ rsp + xx - LocalVarsSize ; dword-misalignment of string r13\r
-;bestlen equ rsp + xx - LocalVarsSize ; size of best match so far -> r11d\r
-;scan equ rsp + xx - LocalVarsSize ; ptr to string wanting match -> r9\r
-IFDEF INFOZIP\r
-ELSE\r
- nicematch equ (rsp + 16 - LocalVarsSize) ; a good enough match size\r
-ENDIF\r
-\r
-save_rdi equ rsp + 24 - LocalVarsSize\r
-save_rsi equ rsp + 32 - LocalVarsSize\r
-save_rbx equ rsp + 40 - LocalVarsSize\r
-save_rbp equ rsp + 48 - LocalVarsSize\r
-save_r12 equ rsp + 56 - LocalVarsSize\r
-save_r13 equ rsp + 64 - LocalVarsSize\r
-;save_r14 equ rsp + 72 - LocalVarsSize\r
-;save_r15 equ rsp + 80 - LocalVarsSize\r
-\r
-\r
-; summary of register usage\r
-; scanend ebx\r
-; scanendw bx\r
-; chainlenwmask edx\r
-; curmatch rsi\r
-; curmatchd esi\r
-; windowbestlen r8\r
-; scanalign r9\r
-; scanalignd r9d\r
-; window r10\r
-; bestlen r11\r
-; bestlend r11d\r
-; scanstart r12d\r
-; scanstartw r12w\r
-; scan r13\r
-; nicematch r14d\r
-; limit r15\r
-; limitd r15d\r
-; prev rcx\r
-\r
-; all the +4 offsets are due to the addition of pending_buf_size (in zlib\r
-; in the deflate_state structure since the asm code was first written\r
-; (if you compile with zlib 1.0.4 or older, remove the +4).\r
-; Note : these value are good with a 8 bytes boundary pack structure\r
-\r
-\r
- MAX_MATCH equ 258\r
- MIN_MATCH equ 3\r
- MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)\r
-\r
-\r
-;;; Offsets for fields in the deflate_state structure. These numbers\r
-;;; are calculated from the definition of deflate_state, with the\r
-;;; assumption that the compiler will dword-align the fields. (Thus,\r
-;;; changing the definition of deflate_state could easily cause this\r
-;;; program to crash horribly, without so much as a warning at\r
-;;; compile time. Sigh.)\r
-\r
-; all the +zlib1222add offsets are due to the addition of fields\r
-; in zlib in the deflate_state structure since the asm code was first written\r
-; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").\r
-; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").\r
-; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").\r
-\r
-\r
-IFDEF INFOZIP\r
-\r
-_DATA SEGMENT\r
-COMM window_size:DWORD\r
-; WMask ; 7fff\r
-COMM window:BYTE:010040H\r
-COMM prev:WORD:08000H\r
-; MatchLen : unused\r
-; PrevMatch : unused\r
-COMM strstart:DWORD\r
-COMM match_start:DWORD\r
-; Lookahead : ignore\r
-COMM prev_length:DWORD ; PrevLen\r
-COMM max_chain_length:DWORD\r
-COMM good_match:DWORD\r
-COMM nice_match:DWORD\r
-prev_ad equ OFFSET prev\r
-window_ad equ OFFSET window\r
-nicematch equ nice_match\r
-_DATA ENDS\r
-WMask equ 07fffh\r
-\r
-ELSE\r
-\r
- IFNDEF zlib1222add\r
- zlib1222add equ 8\r
- ENDIF\r
-dsWSize equ 56+zlib1222add+(zlib1222add/2)\r
-dsWMask equ 64+zlib1222add+(zlib1222add/2)\r
-dsWindow equ 72+zlib1222add\r
-dsPrev equ 88+zlib1222add\r
-dsMatchLen equ 128+zlib1222add\r
-dsPrevMatch equ 132+zlib1222add\r
-dsStrStart equ 140+zlib1222add\r
-dsMatchStart equ 144+zlib1222add\r
-dsLookahead equ 148+zlib1222add\r
-dsPrevLen equ 152+zlib1222add\r
-dsMaxChainLen equ 156+zlib1222add\r
-dsGoodMatch equ 172+zlib1222add\r
-dsNiceMatch equ 176+zlib1222add\r
-\r
-window_size equ [ rcx + dsWSize]\r
-WMask equ [ rcx + dsWMask]\r
-window_ad equ [ rcx + dsWindow]\r
-prev_ad equ [ rcx + dsPrev]\r
-strstart equ [ rcx + dsStrStart]\r
-match_start equ [ rcx + dsMatchStart]\r
-Lookahead equ [ rcx + dsLookahead] ; 0ffffffffh on infozip\r
-prev_length equ [ rcx + dsPrevLen]\r
-max_chain_length equ [ rcx + dsMaxChainLen]\r
-good_match equ [ rcx + dsGoodMatch]\r
-nice_match equ [ rcx + dsNiceMatch]\r
-ENDIF\r
-\r
-; parameter 1 in r8(deflate state s), param 2 in rdx (cur match)\r
-\r
-; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and\r
-; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp\r
-;\r
-; All registers must be preserved across the call, except for\r
-; rax, rcx, rdx, r8, r9, r10, and r11, which are scratch.\r
-\r
-\r
-\r
-;;; Save registers that the compiler may be using, and adjust esp to\r
-;;; make room for our stack frame.\r
-\r
-\r
-;;; Retrieve the function arguments. r8d will hold cur_match\r
-;;; throughout the entire function. edx will hold the pointer to the\r
-;;; deflate_state structure during the function's setup (before\r
-;;; entering the main loop.\r
-\r
-; parameter 1 in rcx (deflate_state* s), param 2 in edx -> r8 (cur match)\r
-\r
-; this clear high 32 bits of r8, which can be garbage in both r8 and rdx\r
-\r
- mov [save_rdi],rdi\r
- mov [save_rsi],rsi\r
- mov [save_rbx],rbx\r
- mov [save_rbp],rbp\r
-IFDEF INFOZIP\r
- mov r8d,ecx\r
-ELSE\r
- mov r8d,edx\r
-ENDIF\r
- mov [save_r12],r12\r
- mov [save_r13],r13\r
-; mov [save_r14],r14\r
-; mov [save_r15],r15\r
-\r
-\r
-;;; uInt wmask = s->w_mask;\r
-;;; unsigned chain_length = s->max_chain_length;\r
-;;; if (s->prev_length >= s->good_match) {\r
-;;; chain_length >>= 2;\r
-;;; }\r
-\r
- mov edi, prev_length\r
- mov esi, good_match\r
- mov eax, WMask\r
- mov ebx, max_chain_length\r
- cmp edi, esi\r
- jl LastMatchGood\r
- shr ebx, 2\r
-LastMatchGood:\r
-\r
-;;; chainlen is decremented once beforehand so that the function can\r
-;;; use the sign flag instead of the zero flag for the exit test.\r
-;;; It is then shifted into the high word, to make room for the wmask\r
-;;; value, which it will always accompany.\r
-\r
- dec ebx\r
- shl ebx, 16\r
- or ebx, eax\r
-\r
-;;; on zlib only\r
-;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;\r
-\r
-IFDEF INFOZIP\r
- mov [chainlenwmask], ebx\r
-; on infozip nice_match = [nice_match]\r
-ELSE\r
- mov eax, nice_match\r
- mov [chainlenwmask], ebx\r
- mov r10d, Lookahead\r
- cmp r10d, eax\r
- cmovnl r10d, eax\r
- mov [nicematch],r10d\r
-ENDIF\r
-\r
-;;; register Bytef *scan = s->window + s->strstart;\r
- mov r10, window_ad\r
- mov ebp, strstart\r
- lea r13, [r10 + rbp]\r
-\r
-;;; Determine how many bytes the scan ptr is off from being\r
-;;; dword-aligned.\r
-\r
- mov r9,r13\r
- neg r13\r
- and r13,3\r
-\r
-;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?\r
-;;; s->strstart - (IPos)MAX_DIST(s) : NIL;\r
-IFDEF INFOZIP\r
- mov eax,07efah ; MAX_DIST = (WSIZE-MIN_LOOKAHEAD) (0x8000-(3+8+1))\r
-ELSE\r
- mov eax, window_size\r
- sub eax, MIN_LOOKAHEAD\r
-ENDIF\r
- xor edi,edi\r
- sub ebp, eax\r
-\r
- mov r11d, prev_length\r
-\r
- cmovng ebp,edi\r
-\r
-;;; int best_len = s->prev_length;\r
-\r
-\r
-;;; Store the sum of s->window + best_len in esi locally, and in esi.\r
-\r
- lea rsi,[r10+r11]\r
-\r
-;;; register ush scan_start = *(ushf*)scan;\r
-;;; register ush scan_end = *(ushf*)(scan+best_len-1);\r
-;;; Posf *prev = s->prev;\r
-\r
- movzx r12d,word ptr [r9]\r
- movzx ebx, word ptr [r9 + r11 - 1]\r
-\r
- mov rdi, prev_ad\r
-\r
-;;; Jump into the main loop.\r
-\r
- mov edx, [chainlenwmask]\r
-\r
- cmp bx,word ptr [rsi + r8 - 1]\r
- jz LookupLoopIsZero\r
-\r
-LookupLoop1:\r
- and r8d, edx\r
-\r
- movzx r8d, word ptr [rdi + r8*2]\r
- cmp r8d, ebp\r
- jbe LeaveNow\r
- sub edx, 00010000h\r
- js LeaveNow\r
-\r
-LoopEntry1:\r
- cmp bx,word ptr [rsi + r8 - 1]\r
- jz LookupLoopIsZero\r
-\r
-LookupLoop2:\r
- and r8d, edx\r
-\r
- movzx r8d, word ptr [rdi + r8*2]\r
- cmp r8d, ebp\r
- jbe LeaveNow\r
- sub edx, 00010000h\r
- js LeaveNow\r
-\r
-LoopEntry2:\r
- cmp bx,word ptr [rsi + r8 - 1]\r
- jz LookupLoopIsZero\r
-\r
-LookupLoop4:\r
- and r8d, edx\r
-\r
- movzx r8d, word ptr [rdi + r8*2]\r
- cmp r8d, ebp\r
- jbe LeaveNow\r
- sub edx, 00010000h\r
- js LeaveNow\r
-\r
-LoopEntry4:\r
-\r
- cmp bx,word ptr [rsi + r8 - 1]\r
- jnz LookupLoop1\r
- jmp LookupLoopIsZero\r
-\r
-\r
-;;; do {\r
-;;; match = s->window + cur_match;\r
-;;; if (*(ushf*)(match+best_len-1) != scan_end ||\r
-;;; *(ushf*)match != scan_start) continue;\r
-;;; [...]\r
-;;; } while ((cur_match = prev[cur_match & wmask]) > limit\r
-;;; && --chain_length != 0);\r
-;;;\r
-;;; Here is the inner loop of the function. The function will spend the\r
-;;; majority of its time in this loop, and majority of that time will\r
-;;; be spent in the first ten instructions.\r
-;;;\r
-;;; Within this loop:\r
-;;; ebx = scanend\r
-;;; r8d = curmatch\r
-;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)\r
-;;; esi = windowbestlen - i.e., (window + bestlen)\r
-;;; edi = prev\r
-;;; ebp = limit\r
-\r
-LookupLoop:\r
- and r8d, edx\r
-\r
- movzx r8d, word ptr [rdi + r8*2]\r
- cmp r8d, ebp\r
- jbe LeaveNow\r
- sub edx, 00010000h\r
- js LeaveNow\r
-\r
-LoopEntry:\r
-\r
- cmp bx,word ptr [rsi + r8 - 1]\r
- jnz LookupLoop1\r
-LookupLoopIsZero:\r
- cmp r12w, word ptr [r10 + r8]\r
- jnz LookupLoop1\r
-\r
-\r
-;;; Store the current value of chainlen.\r
- mov [chainlenwmask], edx\r
-\r
-;;; Point edi to the string under scrutiny, and esi to the string we\r
-;;; are hoping to match it up with. In actuality, esi and edi are\r
-;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is\r
-;;; initialized to -(MAX_MATCH_8 - scanalign).\r
-\r
- lea rsi,[r8+r10]\r
- mov rdx, 0fffffffffffffef8h; -(MAX_MATCH_8)\r
- lea rsi, [rsi + r13 + 0108h] ;MAX_MATCH_8]\r
- lea rdi, [r9 + r13 + 0108h] ;MAX_MATCH_8]\r
-\r
- prefetcht1 [rsi+rdx]\r
- prefetcht1 [rdi+rdx]\r
-\r
-\r
-;;; Test the strings for equality, 8 bytes at a time. At the end,\r
-;;; adjust rdx so that it is offset to the exact byte that mismatched.\r
-;;;\r
-;;; We already know at this point that the first three bytes of the\r
-;;; strings match each other, and they can be safely passed over before\r
-;;; starting the compare loop. So what this code does is skip over 0-3\r
-;;; bytes, as much as necessary in order to dword-align the edi\r
-;;; pointer. (rsi will still be misaligned three times out of four.)\r
-;;;\r
-;;; It should be confessed that this loop usually does not represent\r
-;;; much of the total running time. Replacing it with a more\r
-;;; straightforward "rep cmpsb" would not drastically degrade\r
-;;; performance.\r
-\r
-\r
-LoopCmps:\r
- mov rax, [rsi + rdx]\r
- xor rax, [rdi + rdx]\r
- jnz LeaveLoopCmps\r
-\r
- mov rax, [rsi + rdx + 8]\r
- xor rax, [rdi + rdx + 8]\r
- jnz LeaveLoopCmps8\r
-\r
-\r
- mov rax, [rsi + rdx + 8+8]\r
- xor rax, [rdi + rdx + 8+8]\r
- jnz LeaveLoopCmps16\r
-\r
- add rdx,8+8+8\r
-\r
- jnz short LoopCmps\r
- jmp short LenMaximum\r
-LeaveLoopCmps16: add rdx,8\r
-LeaveLoopCmps8: add rdx,8\r
-LeaveLoopCmps:\r
-\r
- test eax, 0000FFFFh\r
- jnz LenLower\r
-\r
- test eax,0ffffffffh\r
-\r
- jnz LenLower32\r
-\r
- add rdx,4\r
- shr rax,32\r
- or ax,ax\r
- jnz LenLower\r
-\r
-LenLower32:\r
- shr eax,16\r
- add rdx,2\r
-LenLower: sub al, 1\r
- adc rdx, 0\r
-;;; Calculate the length of the match. If it is longer than MAX_MATCH,\r
-;;; then automatically accept it as the best possible match and leave.\r
-\r
- lea rax, [rdi + rdx]\r
- sub rax, r9\r
- cmp eax, MAX_MATCH\r
- jge LenMaximum\r
-\r
-;;; If the length of the match is not longer than the best match we\r
-;;; have so far, then forget it and return to the lookup loop.\r
-;///////////////////////////////////\r
-\r
- cmp eax, r11d\r
- jg LongerMatch\r
-\r
- lea rsi,[r10+r11]\r
-\r
- mov rdi, prev_ad\r
- mov edx, [chainlenwmask]\r
- jmp LookupLoop\r
-\r
-;;; s->match_start = cur_match;\r
-;;; best_len = len;\r
-;;; if (len >= nice_match) break;\r
-;;; scan_end = *(ushf*)(scan+best_len-1);\r
-\r
-LongerMatch:\r
- mov r11d, eax\r
- mov match_start, r8d\r
- cmp eax, [nicematch]\r
- jge LeaveNow\r
-\r
- lea rsi,[r10+rax]\r
-\r
- movzx ebx, word ptr [r9 + rax - 1]\r
- mov rdi, prev_ad\r
- mov edx, [chainlenwmask]\r
- jmp LookupLoop\r
-\r
-;;; Accept the current string, with the maximum possible length.\r
-\r
-LenMaximum:\r
- mov r11d,MAX_MATCH\r
- mov match_start, r8d\r
-\r
-;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;\r
-;;; return s->lookahead;\r
-\r
-LeaveNow:\r
-IFDEF INFOZIP\r
- mov eax,r11d\r
-ELSE\r
- mov eax, Lookahead\r
- cmp r11d, eax\r
- cmovng eax, r11d\r
-ENDIF\r
-\r
-;;; Restore the stack and return from whence we came.\r
-\r
-\r
- mov rsi,[save_rsi]\r
- mov rdi,[save_rdi]\r
- mov rbx,[save_rbx]\r
- mov rbp,[save_rbp]\r
- mov r12,[save_r12]\r
- mov r13,[save_r13]\r
-; mov r14,[save_r14]\r
-; mov r15,[save_r15]\r
-\r
-\r
- ret 0\r
-; please don't remove this string !\r
-; Your can freely use gvmat64 in any free or commercial app\r
-; but it is far better don't remove the string in the binary!\r
- db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998, converted to amd 64 by Gilles Vollant 2005",0dh,0ah,0\r
-longest_match ENDP\r
-\r
-match_init PROC\r
- ret 0\r
-match_init ENDP\r
-\r
-\r
-END\r
+++ /dev/null
-/* inffas8664.c is a hand tuned assembler version of inffast.c - fast decoding\r
- * version for AMD64 on Windows using Microsoft C compiler\r
- *\r
- * Copyright (C) 1995-2003 Mark Adler\r
- * For conditions of distribution and use, see copyright notice in zlib.h\r
- *\r
- * Copyright (C) 2003 Chris Anderson <christop@charm.net>\r
- * Please use the copyright conditions above.\r
- *\r
- * 2005 - Adaptation to Microsoft C Compiler for AMD64 by Gilles Vollant\r
- *\r
- * inffas8664.c call function inffas8664fnc in inffasx64.asm\r
- * inffasx64.asm is automatically convert from AMD64 portion of inffas86.c\r
- *\r
- * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also\r
- * slightly quicker on x86 systems because, instead of using rep movsb to copy\r
- * data, it uses rep movsw, which moves data in 2-byte chunks instead of single\r
- * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates\r
- * from http://fedora.linux.duke.edu/fc1_x86_64\r
- * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with\r
- * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version,\r
- * when decompressing mozilla-source-1.3.tar.gz.\r
- *\r
- * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from\r
- * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at\r
- * the moment. I have successfully compiled and tested this code with gcc2.96,\r
- * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S\r
- * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX\r
- * enabled. I will attempt to merge the MMX code into this version. Newer\r
- * versions of this and inffast.S can be found at\r
- * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/\r
- *\r
- */\r
-\r
-#include <stdio.h>\r
-#include "zutil.h"\r
-#include "inftrees.h"\r
-#include "inflate.h"\r
-#include "inffast.h"\r
-\r
-/* Mark Adler's comments from inffast.c: */\r
-\r
-/*\r
- Decode literal, length, and distance codes and write out the resulting\r
- literal and match bytes until either not enough input or output is\r
- available, an end-of-block is encountered, or a data error is encountered.\r
- When large enough input and output buffers are supplied to inflate(), for\r
- example, a 16K input buffer and a 64K output buffer, more than 95% of the\r
- inflate execution time is spent in this routine.\r
-\r
- Entry assumptions:\r
-\r
- state->mode == LEN\r
- strm->avail_in >= 6\r
- strm->avail_out >= 258\r
- start >= strm->avail_out\r
- state->bits < 8\r
-\r
- On return, state->mode is one of:\r
-\r
- LEN -- ran out of enough output space or enough available input\r
- TYPE -- reached end of block code, inflate() to interpret next block\r
- BAD -- error in block data\r
-\r
- Notes:\r
-\r
- - The maximum input bits used by a length/distance pair is 15 bits for the\r
- length code, 5 bits for the length extra, 15 bits for the distance code,\r
- and 13 bits for the distance extra. This totals 48 bits, or six bytes.\r
- Therefore if strm->avail_in >= 6, then there is enough input to avoid\r
- checking for available input while decoding.\r
-\r
- - The maximum bytes that a single length/distance pair can output is 258\r
- bytes, which is the maximum length that can be coded. inflate_fast()\r
- requires strm->avail_out >= 258 for each loop to avoid checking for\r
- output space.\r
- */\r
-\r
-\r
-\r
- typedef struct inffast_ar {\r
-/* 64 32 x86 x86_64 */\r
-/* ar offset register */\r
-/* 0 0 */ void *esp; /* esp save */\r
-/* 8 4 */ void *ebp; /* ebp save */\r
-/* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */\r
-/* 24 12 */ unsigned char FAR *last; /* r9 while in < last */\r
-/* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */\r
-/* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */\r
-/* 48 24 */ unsigned char FAR *end; /* r10 while out < end */\r
-/* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */\r
-/* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */\r
-/* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */\r
-/* 80 40 */ size_t /*unsigned long */hold; /* edx rdx local strm->hold */\r
-/* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */\r
-/* 92 48 */ unsigned wsize; /* window size */\r
-/* 96 52 */ unsigned write; /* window write index */\r
-/*100 56 */ unsigned lmask; /* r12 mask for lcode */\r
-/*104 60 */ unsigned dmask; /* r13 mask for dcode */\r
-/*108 64 */ unsigned len; /* r14 match length */\r
-/*112 68 */ unsigned dist; /* r15 match distance */\r
-/*116 72 */ unsigned status; /* set when state chng*/\r
- } type_ar;\r
-#ifdef ASMINF\r
-\r
-void inflate_fast(strm, start)\r
-z_streamp strm;\r
-unsigned start; /* inflate()'s starting value for strm->avail_out */\r
-{\r
- struct inflate_state FAR *state;\r
- type_ar ar;\r
- void inffas8664fnc(struct inffast_ar * par);\r
-\r
-\r
-\r
-#if (defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )) || (defined(_MSC_VER) && defined(_M_AMD64))\r
-#define PAD_AVAIL_IN 6\r
-#define PAD_AVAIL_OUT 258\r
-#else\r
-#define PAD_AVAIL_IN 5\r
-#define PAD_AVAIL_OUT 257\r
-#endif\r
-\r
- /* copy state to local variables */\r
- state = (struct inflate_state FAR *)strm->state;\r
-\r
- ar.in = strm->next_in;\r
- ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);\r
- ar.out = strm->next_out;\r
- ar.beg = ar.out - (start - strm->avail_out);\r
- ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);\r
- ar.wsize = state->wsize;\r
- ar.write = state->wnext;\r
- ar.window = state->window;\r
- ar.hold = state->hold;\r
- ar.bits = state->bits;\r
- ar.lcode = state->lencode;\r
- ar.dcode = state->distcode;\r
- ar.lmask = (1U << state->lenbits) - 1;\r
- ar.dmask = (1U << state->distbits) - 1;\r
-\r
- /* decode literals and length/distances until end-of-block or not enough\r
- input data or output space */\r
-\r
- /* align in on 1/2 hold size boundary */\r
- while (((size_t)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {\r
- ar.hold += (unsigned long)*ar.in++ << ar.bits;\r
- ar.bits += 8;\r
- }\r
-\r
- inffas8664fnc(&ar);\r
-\r
- if (ar.status > 1) {\r
- if (ar.status == 2)\r
- strm->msg = "invalid literal/length code";\r
- else if (ar.status == 3)\r
- strm->msg = "invalid distance code";\r
- else\r
- strm->msg = "invalid distance too far back";\r
- state->mode = BAD;\r
- }\r
- else if ( ar.status == 1 ) {\r
- state->mode = TYPE;\r
- }\r
-\r
- /* return unused bytes (on entry, bits < 8, so in won't go too far back) */\r
- ar.len = ar.bits >> 3;\r
- ar.in -= ar.len;\r
- ar.bits -= ar.len << 3;\r
- ar.hold &= (1U << ar.bits) - 1;\r
-\r
- /* update state and return */\r
- strm->next_in = ar.in;\r
- strm->next_out = ar.out;\r
- strm->avail_in = (unsigned)(ar.in < ar.last ?\r
- PAD_AVAIL_IN + (ar.last - ar.in) :\r
- PAD_AVAIL_IN - (ar.in - ar.last));\r
- strm->avail_out = (unsigned)(ar.out < ar.end ?\r
- PAD_AVAIL_OUT + (ar.end - ar.out) :\r
- PAD_AVAIL_OUT - (ar.out - ar.end));\r
- state->hold = (unsigned long)ar.hold;\r
- state->bits = ar.bits;\r
- return;\r
-}\r
-\r
-#endif\r
+++ /dev/null
-; inffasx64.asm is a hand tuned assembler version of inffast.c - fast decoding\r
-; version for AMD64 on Windows using Microsoft C compiler\r
-;\r
-; inffasx64.asm is automatically convert from AMD64 portion of inffas86.c\r
-; inffasx64.asm is called by inffas8664.c, which contain more info.\r
-\r
-\r
-; to compile this file, I use option\r
-; ml64.exe /Flinffasx64 /c /Zi inffasx64.asm\r
-; with Microsoft Macro Assembler (x64) for AMD64\r
-;\r
-\r
-; This file compile with Microsoft Macro Assembler (x64) for AMD64\r
-;\r
-; ml64.exe is given with Visual Studio 2005/2008/2010 and Windows WDK\r
-;\r
-; (you can get Windows WDK with ml64 for AMD64 from\r
-; http://www.microsoft.com/whdc/Devtools/wdk/default.mspx for low price)\r
-;\r
-\r
-\r
-.code\r
-inffas8664fnc PROC\r
-\r
-; see http://weblogs.asp.net/oldnewthing/archive/2004/01/14/58579.aspx and\r
-; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp\r
-;\r
-; All registers must be preserved across the call, except for\r
-; rax, rcx, rdx, r8, r-9, r10, and r11, which are scratch.\r
-\r
-\r
- mov [rsp-8],rsi\r
- mov [rsp-16],rdi\r
- mov [rsp-24],r12\r
- mov [rsp-32],r13\r
- mov [rsp-40],r14\r
- mov [rsp-48],r15\r
- mov [rsp-56],rbx\r
-\r
- mov rax,rcx\r
-\r
- mov [rax+8], rbp ; /* save regs rbp and rsp */\r
- mov [rax], rsp\r
-\r
- mov rsp, rax ; /* make rsp point to &ar */\r
-\r
- mov rsi, [rsp+16] ; /* rsi = in */\r
- mov rdi, [rsp+32] ; /* rdi = out */\r
- mov r9, [rsp+24] ; /* r9 = last */\r
- mov r10, [rsp+48] ; /* r10 = end */\r
- mov rbp, [rsp+64] ; /* rbp = lcode */\r
- mov r11, [rsp+72] ; /* r11 = dcode */\r
- mov rdx, [rsp+80] ; /* rdx = hold */\r
- mov ebx, [rsp+88] ; /* ebx = bits */\r
- mov r12d, [rsp+100] ; /* r12d = lmask */\r
- mov r13d, [rsp+104] ; /* r13d = dmask */\r
- ; /* r14d = len */\r
- ; /* r15d = dist */\r
-\r
-\r
- cld\r
- cmp r10, rdi\r
- je L_one_time ; /* if only one decode left */\r
- cmp r9, rsi\r
-\r
- jne L_do_loop\r
-\r
-\r
-L_one_time:\r
- mov r8, r12 ; /* r8 = lmask */\r
- cmp bl, 32\r
- ja L_get_length_code_one_time\r
-\r
- lodsd ; /* eax = *(uint *)in++ */\r
- mov cl, bl ; /* cl = bits, needs it for shifting */\r
- add bl, 32 ; /* bits += 32 */\r
- shl rax, cl\r
- or rdx, rax ; /* hold |= *((uint *)in)++ << bits */\r
- jmp L_get_length_code_one_time\r
-\r
-ALIGN 4\r
-L_while_test:\r
- cmp r10, rdi\r
- jbe L_break_loop\r
- cmp r9, rsi\r
- jbe L_break_loop\r
-\r
-L_do_loop:\r
- mov r8, r12 ; /* r8 = lmask */\r
- cmp bl, 32\r
- ja L_get_length_code ; /* if (32 < bits) */\r
-\r
- lodsd ; /* eax = *(uint *)in++ */\r
- mov cl, bl ; /* cl = bits, needs it for shifting */\r
- add bl, 32 ; /* bits += 32 */\r
- shl rax, cl\r
- or rdx, rax ; /* hold |= *((uint *)in)++ << bits */\r
-\r
-L_get_length_code:\r
- and r8, rdx ; /* r8 &= hold */\r
- mov eax, [rbp+r8*4] ; /* eax = lcode[hold & lmask] */\r
-\r
- mov cl, ah ; /* cl = this.bits */\r
- sub bl, ah ; /* bits -= this.bits */\r
- shr rdx, cl ; /* hold >>= this.bits */\r
-\r
- test al, al\r
- jnz L_test_for_length_base ; /* if (op != 0) 45.7% */\r
-\r
- mov r8, r12 ; /* r8 = lmask */\r
- shr eax, 16 ; /* output this.val char */\r
- stosb\r
-\r
-L_get_length_code_one_time:\r
- and r8, rdx ; /* r8 &= hold */\r
- mov eax, [rbp+r8*4] ; /* eax = lcode[hold & lmask] */\r
-\r
-L_dolen:\r
- mov cl, ah ; /* cl = this.bits */\r
- sub bl, ah ; /* bits -= this.bits */\r
- shr rdx, cl ; /* hold >>= this.bits */\r
-\r
- test al, al\r
- jnz L_test_for_length_base ; /* if (op != 0) 45.7% */\r
-\r
- shr eax, 16 ; /* output this.val char */\r
- stosb\r
- jmp L_while_test\r
-\r
-ALIGN 4\r
-L_test_for_length_base:\r
- mov r14d, eax ; /* len = this */\r
- shr r14d, 16 ; /* len = this.val */\r
- mov cl, al\r
-\r
- test al, 16\r
- jz L_test_for_second_level_length ; /* if ((op & 16) == 0) 8% */\r
- and cl, 15 ; /* op &= 15 */\r
- jz L_decode_distance ; /* if (!op) */\r
-\r
-L_add_bits_to_len:\r
- sub bl, cl\r
- xor eax, eax\r
- inc eax\r
- shl eax, cl\r
- dec eax\r
- and eax, edx ; /* eax &= hold */\r
- shr rdx, cl\r
- add r14d, eax ; /* len += hold & mask[op] */\r
-\r
-L_decode_distance:\r
- mov r8, r13 ; /* r8 = dmask */\r
- cmp bl, 32\r
- ja L_get_distance_code ; /* if (32 < bits) */\r
-\r
- lodsd ; /* eax = *(uint *)in++ */\r
- mov cl, bl ; /* cl = bits, needs it for shifting */\r
- add bl, 32 ; /* bits += 32 */\r
- shl rax, cl\r
- or rdx, rax ; /* hold |= *((uint *)in)++ << bits */\r
-\r
-L_get_distance_code:\r
- and r8, rdx ; /* r8 &= hold */\r
- mov eax, [r11+r8*4] ; /* eax = dcode[hold & dmask] */\r
-\r
-L_dodist:\r
- mov r15d, eax ; /* dist = this */\r
- shr r15d, 16 ; /* dist = this.val */\r
- mov cl, ah\r
- sub bl, ah ; /* bits -= this.bits */\r
- shr rdx, cl ; /* hold >>= this.bits */\r
- mov cl, al ; /* cl = this.op */\r
-\r
- test al, 16 ; /* if ((op & 16) == 0) */\r
- jz L_test_for_second_level_dist\r
- and cl, 15 ; /* op &= 15 */\r
- jz L_check_dist_one\r
-\r
-L_add_bits_to_dist:\r
- sub bl, cl\r
- xor eax, eax\r
- inc eax\r
- shl eax, cl\r
- dec eax ; /* (1 << op) - 1 */\r
- and eax, edx ; /* eax &= hold */\r
- shr rdx, cl\r
- add r15d, eax ; /* dist += hold & ((1 << op) - 1) */\r
-\r
-L_check_window:\r
- mov r8, rsi ; /* save in so from can use it's reg */\r
- mov rax, rdi\r
- sub rax, [rsp+40] ; /* nbytes = out - beg */\r
-\r
- cmp eax, r15d\r
- jb L_clip_window ; /* if (dist > nbytes) 4.2% */\r
-\r
- mov ecx, r14d ; /* ecx = len */\r
- mov rsi, rdi\r
- sub rsi, r15 ; /* from = out - dist */\r
-\r
- sar ecx, 1\r
- jnc L_copy_two ; /* if len % 2 == 0 */\r
-\r
- rep movsw\r
- mov al, [rsi]\r
- mov [rdi], al\r
- inc rdi\r
-\r
- mov rsi, r8 ; /* move in back to %rsi, toss from */\r
- jmp L_while_test\r
-\r
-L_copy_two:\r
- rep movsw\r
- mov rsi, r8 ; /* move in back to %rsi, toss from */\r
- jmp L_while_test\r
-\r
-ALIGN 4\r
-L_check_dist_one:\r
- cmp r15d, 1 ; /* if dist 1, is a memset */\r
- jne L_check_window\r
- cmp [rsp+40], rdi ; /* if out == beg, outside window */\r
- je L_check_window\r
-\r
- mov ecx, r14d ; /* ecx = len */\r
- mov al, [rdi-1]\r
- mov ah, al\r
-\r
- sar ecx, 1\r
- jnc L_set_two\r
- mov [rdi], al\r
- inc rdi\r
-\r
-L_set_two:\r
- rep stosw\r
- jmp L_while_test\r
-\r
-ALIGN 4\r
-L_test_for_second_level_length:\r
- test al, 64\r
- jnz L_test_for_end_of_block ; /* if ((op & 64) != 0) */\r
-\r
- xor eax, eax\r
- inc eax\r
- shl eax, cl\r
- dec eax\r
- and eax, edx ; /* eax &= hold */\r
- add eax, r14d ; /* eax += len */\r
- mov eax, [rbp+rax*4] ; /* eax = lcode[val+(hold&mask[op])]*/\r
- jmp L_dolen\r
-\r
-ALIGN 4\r
-L_test_for_second_level_dist:\r
- test al, 64\r
- jnz L_invalid_distance_code ; /* if ((op & 64) != 0) */\r
-\r
- xor eax, eax\r
- inc eax\r
- shl eax, cl\r
- dec eax\r
- and eax, edx ; /* eax &= hold */\r
- add eax, r15d ; /* eax += dist */\r
- mov eax, [r11+rax*4] ; /* eax = dcode[val+(hold&mask[op])]*/\r
- jmp L_dodist\r
-\r
-ALIGN 4\r
-L_clip_window:\r
- mov ecx, eax ; /* ecx = nbytes */\r
- mov eax, [rsp+92] ; /* eax = wsize, prepare for dist cmp */\r
- neg ecx ; /* nbytes = -nbytes */\r
-\r
- cmp eax, r15d\r
- jb L_invalid_distance_too_far ; /* if (dist > wsize) */\r
-\r
- add ecx, r15d ; /* nbytes = dist - nbytes */\r
- cmp dword ptr [rsp+96], 0\r
- jne L_wrap_around_window ; /* if (write != 0) */\r
-\r
- mov rsi, [rsp+56] ; /* from = window */\r
- sub eax, ecx ; /* eax -= nbytes */\r
- add rsi, rax ; /* from += wsize - nbytes */\r
-\r
- mov eax, r14d ; /* eax = len */\r
- cmp r14d, ecx\r
- jbe L_do_copy ; /* if (nbytes >= len) */\r
-\r
- sub eax, ecx ; /* eax -= nbytes */\r
- rep movsb\r
- mov rsi, rdi\r
- sub rsi, r15 ; /* from = &out[ -dist ] */\r
- jmp L_do_copy\r
-\r
-ALIGN 4\r
-L_wrap_around_window:\r
- mov eax, [rsp+96] ; /* eax = write */\r
- cmp ecx, eax\r
- jbe L_contiguous_in_window ; /* if (write >= nbytes) */\r
-\r
- mov esi, [rsp+92] ; /* from = wsize */\r
- add rsi, [rsp+56] ; /* from += window */\r
- add rsi, rax ; /* from += write */\r
- sub rsi, rcx ; /* from -= nbytes */\r
- sub ecx, eax ; /* nbytes -= write */\r
-\r
- mov eax, r14d ; /* eax = len */\r
- cmp eax, ecx\r
- jbe L_do_copy ; /* if (nbytes >= len) */\r
-\r
- sub eax, ecx ; /* len -= nbytes */\r
- rep movsb\r
- mov rsi, [rsp+56] ; /* from = window */\r
- mov ecx, [rsp+96] ; /* nbytes = write */\r
- cmp eax, ecx\r
- jbe L_do_copy ; /* if (nbytes >= len) */\r
-\r
- sub eax, ecx ; /* len -= nbytes */\r
- rep movsb\r
- mov rsi, rdi\r
- sub rsi, r15 ; /* from = out - dist */\r
- jmp L_do_copy\r
-\r
-ALIGN 4\r
-L_contiguous_in_window:\r
- mov rsi, [rsp+56] ; /* rsi = window */\r
- add rsi, rax\r
- sub rsi, rcx ; /* from += write - nbytes */\r
-\r
- mov eax, r14d ; /* eax = len */\r
- cmp eax, ecx\r
- jbe L_do_copy ; /* if (nbytes >= len) */\r
-\r
- sub eax, ecx ; /* len -= nbytes */\r
- rep movsb\r
- mov rsi, rdi\r
- sub rsi, r15 ; /* from = out - dist */\r
- jmp L_do_copy ; /* if (nbytes >= len) */\r
-\r
-ALIGN 4\r
-L_do_copy:\r
- mov ecx, eax ; /* ecx = len */\r
- rep movsb\r
-\r
- mov rsi, r8 ; /* move in back to %esi, toss from */\r
- jmp L_while_test\r
-\r
-L_test_for_end_of_block:\r
- test al, 32\r
- jz L_invalid_literal_length_code\r
- mov dword ptr [rsp+116], 1\r
- jmp L_break_loop_with_status\r
-\r
-L_invalid_literal_length_code:\r
- mov dword ptr [rsp+116], 2\r
- jmp L_break_loop_with_status\r
-\r
-L_invalid_distance_code:\r
- mov dword ptr [rsp+116], 3\r
- jmp L_break_loop_with_status\r
-\r
-L_invalid_distance_too_far:\r
- mov dword ptr [rsp+116], 4\r
- jmp L_break_loop_with_status\r
-\r
-L_break_loop:\r
- mov dword ptr [rsp+116], 0\r
-\r
-L_break_loop_with_status:\r
-; /* put in, out, bits, and hold back into ar and pop esp */\r
- mov [rsp+16], rsi ; /* in */\r
- mov [rsp+32], rdi ; /* out */\r
- mov [rsp+88], ebx ; /* bits */\r
- mov [rsp+80], rdx ; /* hold */\r
-\r
- mov rax, [rsp] ; /* restore rbp and rsp */\r
- mov rbp, [rsp+8]\r
- mov rsp, rax\r
-\r
-\r
-\r
- mov rsi,[rsp-8]\r
- mov rdi,[rsp-16]\r
- mov r12,[rsp-24]\r
- mov r13,[rsp-32]\r
- mov r14,[rsp-40]\r
- mov r15,[rsp-48]\r
- mov rbx,[rsp-56]\r
-\r
- ret 0\r
-; :\r
-; : "m" (ar)\r
-; : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi",\r
-; "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"\r
-; );\r
-\r
-inffas8664fnc ENDP\r
-;_TEXT ENDS\r
-END\r
+++ /dev/null
-Summary\r
--------\r
-This directory contains ASM implementations of the functions\r
-longest_match() and inflate_fast(), for 64 bits x86 (both AMD64 and Intel EM64t),\r
-for use with Microsoft Macro Assembler (x64) for AMD64 and Microsoft C++ 64 bits.\r
-\r
-gvmat64.asm is written by Gilles Vollant (2005), by using Brian Raiter 686/32 bits\r
- assembly optimized version from Jean-loup Gailly original longest_match function\r
-\r
-inffasx64.asm and inffas8664.c were written by Chris Anderson, by optimizing\r
- original function from Mark Adler\r
-\r
-Use instructions\r
-----------------\r
-Assemble the .asm files using MASM and put the object files into the zlib source\r
-directory. You can also get object files here:\r
-\r
- http://www.winimage.com/zLibDll/zlib124_masm_obj.zip\r
-\r
-define ASMV and ASMINF in your project. Include inffas8664.c in your source tree,\r
-and inffasx64.obj and gvmat64.obj as object to link.\r
-\r
-\r
-Build instructions\r
-------------------\r
-run bld_64.bat with Microsoft Macro Assembler (x64) for AMD64 (ml64.exe)\r
-\r
-ml64.exe is given with Visual Studio 2005, Windows 2003 server DDK\r
-\r
-You can get Windows 2003 server DDK with ml64 and cl for AMD64 from\r
- http://www.microsoft.com/whdc/devtools/ddk/default.mspx for low price)\r
+++ /dev/null
-ml /coff /Zi /c /Flmatch686.lst match686.asm\r
-ml /coff /Zi /c /Flinffas32.lst inffas32.asm\r
+++ /dev/null
-;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding\r
-; *\r
-; * inffas32.asm is derivated from inffas86.c, with translation of assembly code\r
-; *\r
-; * Copyright (C) 1995-2003 Mark Adler\r
-; * For conditions of distribution and use, see copyright notice in zlib.h\r
-; *\r
-; * Copyright (C) 2003 Chris Anderson <christop@charm.net>\r
-; * Please use the copyright conditions above.\r
-; *\r
-; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from\r
-; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at\r
-; * the moment. I have successfully compiled and tested this code with gcc2.96,\r
-; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S\r
-; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX\r
-; * enabled. I will attempt to merge the MMX code into this version. Newer\r
-; * versions of this and inffast.S can be found at\r
-; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/\r
-; *\r
-; * 2005 : modification by Gilles Vollant\r
-; */\r
-; For Visual C++ 4.x and higher and ML 6.x and higher\r
-; ml.exe is in directory \MASM611C of Win95 DDK\r
-; ml.exe is also distributed in http://www.masm32.com/masmdl.htm\r
-; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/\r
-;\r
-;\r
-; compile with command line option\r
-; ml /coff /Zi /c /Flinffas32.lst inffas32.asm\r
-\r
-; if you define NO_GZIP (see inflate.h), compile with\r
-; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm\r
-\r
-\r
-; zlib122sup is 0 fort zlib 1.2.2.1 and lower\r
-; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head\r
-; in inflate_state in inflate.h)\r
-zlib1222sup equ 8\r
-\r
-\r
-IFDEF GUNZIP\r
- INFLATE_MODE_TYPE equ 11\r
- INFLATE_MODE_BAD equ 26\r
-ELSE\r
- IFNDEF NO_GUNZIP\r
- INFLATE_MODE_TYPE equ 11\r
- INFLATE_MODE_BAD equ 26\r
- ELSE\r
- INFLATE_MODE_TYPE equ 3\r
- INFLATE_MODE_BAD equ 17\r
- ENDIF\r
-ENDIF\r
-\r
-\r
-; 75 "inffast.S"\r
-;FILE "inffast.S"\r
-\r
-;;;GLOBAL _inflate_fast\r
-\r
-;;;SECTION .text\r
-\r
-\r
-\r
- .586p\r
- .mmx\r
-\r
- name inflate_fast_x86\r
- .MODEL FLAT\r
-\r
-_DATA segment\r
-inflate_fast_use_mmx:\r
- dd 1\r
-\r
-\r
-_TEXT segment\r
-\r
-\r
-\r
-ALIGN 4\r
- db 'Fast decoding Code from Chris Anderson'\r
- db 0\r
-\r
-ALIGN 4\r
-invalid_literal_length_code_msg:\r
- db 'invalid literal/length code'\r
- db 0\r
-\r
-ALIGN 4\r
-invalid_distance_code_msg:\r
- db 'invalid distance code'\r
- db 0\r
-\r
-ALIGN 4\r
-invalid_distance_too_far_msg:\r
- db 'invalid distance too far back'\r
- db 0\r
-\r
-\r
-ALIGN 4\r
-inflate_fast_mask:\r
-dd 0\r
-dd 1\r
-dd 3\r
-dd 7\r
-dd 15\r
-dd 31\r
-dd 63\r
-dd 127\r
-dd 255\r
-dd 511\r
-dd 1023\r
-dd 2047\r
-dd 4095\r
-dd 8191\r
-dd 16383\r
-dd 32767\r
-dd 65535\r
-dd 131071\r
-dd 262143\r
-dd 524287\r
-dd 1048575\r
-dd 2097151\r
-dd 4194303\r
-dd 8388607\r
-dd 16777215\r
-dd 33554431\r
-dd 67108863\r
-dd 134217727\r
-dd 268435455\r
-dd 536870911\r
-dd 1073741823\r
-dd 2147483647\r
-dd 4294967295\r
-\r
-\r
-mode_state equ 0 ;/* state->mode */\r
-wsize_state equ (32+zlib1222sup) ;/* state->wsize */\r
-write_state equ (36+4+zlib1222sup) ;/* state->write */\r
-window_state equ (40+4+zlib1222sup) ;/* state->window */\r
-hold_state equ (44+4+zlib1222sup) ;/* state->hold */\r
-bits_state equ (48+4+zlib1222sup) ;/* state->bits */\r
-lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */\r
-distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */\r
-lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */\r
-distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */\r
-\r
-\r
-;;SECTION .text\r
-; 205 "inffast.S"\r
-;GLOBAL inflate_fast_use_mmx\r
-\r
-;SECTION .data\r
-\r
-\r
-; GLOBAL inflate_fast_use_mmx:object\r
-;.size inflate_fast_use_mmx, 4\r
-; 226 "inffast.S"\r
-;SECTION .text\r
-\r
-ALIGN 4\r
-_inflate_fast proc near\r
-.FPO (16, 4, 0, 0, 1, 0)\r
- push edi\r
- push esi\r
- push ebp\r
- push ebx\r
- pushfd\r
- sub esp,64\r
- cld\r
-\r
-\r
-\r
-\r
- mov esi, [esp+88]\r
- mov edi, [esi+28]\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
- mov edx, [esi+4]\r
- mov eax, [esi+0]\r
-\r
- add edx,eax\r
- sub edx,11\r
-\r
- mov [esp+44],eax\r
- mov [esp+20],edx\r
-\r
- mov ebp, [esp+92]\r
- mov ecx, [esi+16]\r
- mov ebx, [esi+12]\r
-\r
- sub ebp,ecx\r
- neg ebp\r
- add ebp,ebx\r
-\r
- sub ecx,257\r
- add ecx,ebx\r
-\r
- mov [esp+60],ebx\r
- mov [esp+40],ebp\r
- mov [esp+16],ecx\r
-; 285 "inffast.S"\r
- mov eax, [edi+lencode_state]\r
- mov ecx, [edi+distcode_state]\r
-\r
- mov [esp+8],eax\r
- mov [esp+12],ecx\r
-\r
- mov eax,1\r
- mov ecx, [edi+lenbits_state]\r
- shl eax,cl\r
- dec eax\r
- mov [esp+0],eax\r
-\r
- mov eax,1\r
- mov ecx, [edi+distbits_state]\r
- shl eax,cl\r
- dec eax\r
- mov [esp+4],eax\r
-\r
- mov eax, [edi+wsize_state]\r
- mov ecx, [edi+write_state]\r
- mov edx, [edi+window_state]\r
-\r
- mov [esp+52],eax\r
- mov [esp+48],ecx\r
- mov [esp+56],edx\r
-\r
- mov ebp, [edi+hold_state]\r
- mov ebx, [edi+bits_state]\r
-; 321 "inffast.S"\r
- mov esi, [esp+44]\r
- mov ecx, [esp+20]\r
- cmp ecx,esi\r
- ja L_align_long\r
-\r
- add ecx,11\r
- sub ecx,esi\r
- mov eax,12\r
- sub eax,ecx\r
- lea edi, [esp+28]\r
- rep movsb\r
- mov ecx,eax\r
- xor eax,eax\r
- rep stosb\r
- lea esi, [esp+28]\r
- mov [esp+20],esi\r
- jmp L_is_aligned\r
-\r
-\r
-L_align_long:\r
- test esi,3\r
- jz L_is_aligned\r
- xor eax,eax\r
- mov al, [esi]\r
- inc esi\r
- mov ecx,ebx\r
- add ebx,8\r
- shl eax,cl\r
- or ebp,eax\r
- jmp L_align_long\r
-\r
-L_is_aligned:\r
- mov edi, [esp+60]\r
-; 366 "inffast.S"\r
-L_check_mmx:\r
- cmp dword ptr [inflate_fast_use_mmx],2\r
- je L_init_mmx\r
- ja L_do_loop\r
-\r
- push eax\r
- push ebx\r
- push ecx\r
- push edx\r
- pushfd\r
- mov eax, [esp]\r
- xor dword ptr [esp],0200000h\r
-\r
-\r
-\r
-\r
- popfd\r
- pushfd\r
- pop edx\r
- xor edx,eax\r
- jz L_dont_use_mmx\r
- xor eax,eax\r
- cpuid\r
- cmp ebx,0756e6547h\r
- jne L_dont_use_mmx\r
- cmp ecx,06c65746eh\r
- jne L_dont_use_mmx\r
- cmp edx,049656e69h\r
- jne L_dont_use_mmx\r
- mov eax,1\r
- cpuid\r
- shr eax,8\r
- and eax,15\r
- cmp eax,6\r
- jne L_dont_use_mmx\r
- test edx,0800000h\r
- jnz L_use_mmx\r
- jmp L_dont_use_mmx\r
-L_use_mmx:\r
- mov dword ptr [inflate_fast_use_mmx],2\r
- jmp L_check_mmx_pop\r
-L_dont_use_mmx:\r
- mov dword ptr [inflate_fast_use_mmx],3\r
-L_check_mmx_pop:\r
- pop edx\r
- pop ecx\r
- pop ebx\r
- pop eax\r
- jmp L_check_mmx\r
-; 426 "inffast.S"\r
-ALIGN 4\r
-L_do_loop:\r
-; 437 "inffast.S"\r
- cmp bl,15\r
- ja L_get_length_code\r
-\r
- xor eax,eax\r
- lodsw\r
- mov cl,bl\r
- add bl,16\r
- shl eax,cl\r
- or ebp,eax\r
-\r
-L_get_length_code:\r
- mov edx, [esp+0]\r
- mov ecx, [esp+8]\r
- and edx,ebp\r
- mov eax, [ecx+edx*4]\r
-\r
-L_dolen:\r
-\r
-\r
-\r
-\r
-\r
-\r
- mov cl,ah\r
- sub bl,ah\r
- shr ebp,cl\r
-\r
-\r
-\r
-\r
-\r
-\r
- test al,al\r
- jnz L_test_for_length_base\r
-\r
- shr eax,16\r
- stosb\r
-\r
-L_while_test:\r
-\r
-\r
- cmp [esp+16],edi\r
- jbe L_break_loop\r
-\r
- cmp [esp+20],esi\r
- ja L_do_loop\r
- jmp L_break_loop\r
-\r
-L_test_for_length_base:\r
-; 502 "inffast.S"\r
- mov edx,eax\r
- shr edx,16\r
- mov cl,al\r
-\r
- test al,16\r
- jz L_test_for_second_level_length\r
- and cl,15\r
- jz L_save_len\r
- cmp bl,cl\r
- jae L_add_bits_to_len\r
-\r
- mov ch,cl\r
- xor eax,eax\r
- lodsw\r
- mov cl,bl\r
- add bl,16\r
- shl eax,cl\r
- or ebp,eax\r
- mov cl,ch\r
-\r
-L_add_bits_to_len:\r
- mov eax,1\r
- shl eax,cl\r
- dec eax\r
- sub bl,cl\r
- and eax,ebp\r
- shr ebp,cl\r
- add edx,eax\r
-\r
-L_save_len:\r
- mov [esp+24],edx\r
-\r
-\r
-L_decode_distance:\r
-; 549 "inffast.S"\r
- cmp bl,15\r
- ja L_get_distance_code\r
-\r
- xor eax,eax\r
- lodsw\r
- mov cl,bl\r
- add bl,16\r
- shl eax,cl\r
- or ebp,eax\r
-\r
-L_get_distance_code:\r
- mov edx, [esp+4]\r
- mov ecx, [esp+12]\r
- and edx,ebp\r
- mov eax, [ecx+edx*4]\r
-\r
-\r
-L_dodist:\r
- mov edx,eax\r
- shr edx,16\r
- mov cl,ah\r
- sub bl,ah\r
- shr ebp,cl\r
-; 584 "inffast.S"\r
- mov cl,al\r
-\r
- test al,16\r
- jz L_test_for_second_level_dist\r
- and cl,15\r
- jz L_check_dist_one\r
- cmp bl,cl\r
- jae L_add_bits_to_dist\r
-\r
- mov ch,cl\r
- xor eax,eax\r
- lodsw\r
- mov cl,bl\r
- add bl,16\r
- shl eax,cl\r
- or ebp,eax\r
- mov cl,ch\r
-\r
-L_add_bits_to_dist:\r
- mov eax,1\r
- shl eax,cl\r
- dec eax\r
- sub bl,cl\r
- and eax,ebp\r
- shr ebp,cl\r
- add edx,eax\r
- jmp L_check_window\r
-\r
-L_check_window:\r
-; 625 "inffast.S"\r
- mov [esp+44],esi\r
- mov eax,edi\r
- sub eax, [esp+40]\r
-\r
- cmp eax,edx\r
- jb L_clip_window\r
-\r
- mov ecx, [esp+24]\r
- mov esi,edi\r
- sub esi,edx\r
-\r
- sub ecx,3\r
- mov al, [esi]\r
- mov [edi],al\r
- mov al, [esi+1]\r
- mov dl, [esi+2]\r
- add esi,3\r
- mov [edi+1],al\r
- mov [edi+2],dl\r
- add edi,3\r
- rep movsb\r
-\r
- mov esi, [esp+44]\r
- jmp L_while_test\r
-\r
-ALIGN 4\r
-L_check_dist_one:\r
- cmp edx,1\r
- jne L_check_window\r
- cmp [esp+40],edi\r
- je L_check_window\r
-\r
- dec edi\r
- mov ecx, [esp+24]\r
- mov al, [edi]\r
- sub ecx,3\r
-\r
- mov [edi+1],al\r
- mov [edi+2],al\r
- mov [edi+3],al\r
- add edi,4\r
- rep stosb\r
-\r
- jmp L_while_test\r
-\r
-ALIGN 4\r
-L_test_for_second_level_length:\r
-\r
-\r
-\r
-\r
- test al,64\r
- jnz L_test_for_end_of_block\r
-\r
- mov eax,1\r
- shl eax,cl\r
- dec eax\r
- and eax,ebp\r
- add eax,edx\r
- mov edx, [esp+8]\r
- mov eax, [edx+eax*4]\r
- jmp L_dolen\r
-\r
-ALIGN 4\r
-L_test_for_second_level_dist:\r
-\r
-\r
-\r
-\r
- test al,64\r
- jnz L_invalid_distance_code\r
-\r
- mov eax,1\r
- shl eax,cl\r
- dec eax\r
- and eax,ebp\r
- add eax,edx\r
- mov edx, [esp+12]\r
- mov eax, [edx+eax*4]\r
- jmp L_dodist\r
-\r
-ALIGN 4\r
-L_clip_window:\r
-; 721 "inffast.S"\r
- mov ecx,eax\r
- mov eax, [esp+52]\r
- neg ecx\r
- mov esi, [esp+56]\r
-\r
- cmp eax,edx\r
- jb L_invalid_distance_too_far\r
-\r
- add ecx,edx\r
- cmp dword ptr [esp+48],0\r
- jne L_wrap_around_window\r
-\r
- sub eax,ecx\r
- add esi,eax\r
-; 749 "inffast.S"\r
- mov eax, [esp+24]\r
- cmp eax,ecx\r
- jbe L_do_copy1\r
-\r
- sub eax,ecx\r
- rep movsb\r
- mov esi,edi\r
- sub esi,edx\r
- jmp L_do_copy1\r
-\r
- cmp eax,ecx\r
- jbe L_do_copy1\r
-\r
- sub eax,ecx\r
- rep movsb\r
- mov esi,edi\r
- sub esi,edx\r
- jmp L_do_copy1\r
-\r
-L_wrap_around_window:\r
-; 793 "inffast.S"\r
- mov eax, [esp+48]\r
- cmp ecx,eax\r
- jbe L_contiguous_in_window\r
-\r
- add esi, [esp+52]\r
- add esi,eax\r
- sub esi,ecx\r
- sub ecx,eax\r
-\r
-\r
- mov eax, [esp+24]\r
- cmp eax,ecx\r
- jbe L_do_copy1\r
-\r
- sub eax,ecx\r
- rep movsb\r
- mov esi, [esp+56]\r
- mov ecx, [esp+48]\r
- cmp eax,ecx\r
- jbe L_do_copy1\r
-\r
- sub eax,ecx\r
- rep movsb\r
- mov esi,edi\r
- sub esi,edx\r
- jmp L_do_copy1\r
-\r
-L_contiguous_in_window:\r
-; 836 "inffast.S"\r
- add esi,eax\r
- sub esi,ecx\r
-\r
-\r
- mov eax, [esp+24]\r
- cmp eax,ecx\r
- jbe L_do_copy1\r
-\r
- sub eax,ecx\r
- rep movsb\r
- mov esi,edi\r
- sub esi,edx\r
-\r
-L_do_copy1:\r
-; 862 "inffast.S"\r
- mov ecx,eax\r
- rep movsb\r
-\r
- mov esi, [esp+44]\r
- jmp L_while_test\r
-; 878 "inffast.S"\r
-ALIGN 4\r
-L_init_mmx:\r
- emms\r
-\r
-\r
-\r
-\r
-\r
- movd mm0,ebp\r
- mov ebp,ebx\r
-; 896 "inffast.S"\r
- movd mm4,dword ptr [esp+0]\r
- movq mm3,mm4\r
- movd mm5,dword ptr [esp+4]\r
- movq mm2,mm5\r
- pxor mm1,mm1\r
- mov ebx, [esp+8]\r
- jmp L_do_loop_mmx\r
-\r
-ALIGN 4\r
-L_do_loop_mmx:\r
- psrlq mm0,mm1\r
-\r
- cmp ebp,32\r
- ja L_get_length_code_mmx\r
-\r
- movd mm6,ebp\r
- movd mm7,dword ptr [esi]\r
- add esi,4\r
- psllq mm7,mm6\r
- add ebp,32\r
- por mm0,mm7\r
-\r
-L_get_length_code_mmx:\r
- pand mm4,mm0\r
- movd eax,mm4\r
- movq mm4,mm3\r
- mov eax, [ebx+eax*4]\r
-\r
-L_dolen_mmx:\r
- movzx ecx,ah\r
- movd mm1,ecx\r
- sub ebp,ecx\r
-\r
- test al,al\r
- jnz L_test_for_length_base_mmx\r
-\r
- shr eax,16\r
- stosb\r
-\r
-L_while_test_mmx:\r
-\r
-\r
- cmp [esp+16],edi\r
- jbe L_break_loop\r
-\r
- cmp [esp+20],esi\r
- ja L_do_loop_mmx\r
- jmp L_break_loop\r
-\r
-L_test_for_length_base_mmx:\r
-\r
- mov edx,eax\r
- shr edx,16\r
-\r
- test al,16\r
- jz L_test_for_second_level_length_mmx\r
- and eax,15\r
- jz L_decode_distance_mmx\r
-\r
- psrlq mm0,mm1\r
- movd mm1,eax\r
- movd ecx,mm0\r
- sub ebp,eax\r
- and ecx, [inflate_fast_mask+eax*4]\r
- add edx,ecx\r
-\r
-L_decode_distance_mmx:\r
- psrlq mm0,mm1\r
-\r
- cmp ebp,32\r
- ja L_get_dist_code_mmx\r
-\r
- movd mm6,ebp\r
- movd mm7,dword ptr [esi]\r
- add esi,4\r
- psllq mm7,mm6\r
- add ebp,32\r
- por mm0,mm7\r
-\r
-L_get_dist_code_mmx:\r
- mov ebx, [esp+12]\r
- pand mm5,mm0\r
- movd eax,mm5\r
- movq mm5,mm2\r
- mov eax, [ebx+eax*4]\r
-\r
-L_dodist_mmx:\r
-\r
- movzx ecx,ah\r
- mov ebx,eax\r
- shr ebx,16\r
- sub ebp,ecx\r
- movd mm1,ecx\r
-\r
- test al,16\r
- jz L_test_for_second_level_dist_mmx\r
- and eax,15\r
- jz L_check_dist_one_mmx\r
-\r
-L_add_bits_to_dist_mmx:\r
- psrlq mm0,mm1\r
- movd mm1,eax\r
- movd ecx,mm0\r
- sub ebp,eax\r
- and ecx, [inflate_fast_mask+eax*4]\r
- add ebx,ecx\r
-\r
-L_check_window_mmx:\r
- mov [esp+44],esi\r
- mov eax,edi\r
- sub eax, [esp+40]\r
-\r
- cmp eax,ebx\r
- jb L_clip_window_mmx\r
-\r
- mov ecx,edx\r
- mov esi,edi\r
- sub esi,ebx\r
-\r
- sub ecx,3\r
- mov al, [esi]\r
- mov [edi],al\r
- mov al, [esi+1]\r
- mov dl, [esi+2]\r
- add esi,3\r
- mov [edi+1],al\r
- mov [edi+2],dl\r
- add edi,3\r
- rep movsb\r
-\r
- mov esi, [esp+44]\r
- mov ebx, [esp+8]\r
- jmp L_while_test_mmx\r
-\r
-ALIGN 4\r
-L_check_dist_one_mmx:\r
- cmp ebx,1\r
- jne L_check_window_mmx\r
- cmp [esp+40],edi\r
- je L_check_window_mmx\r
-\r
- dec edi\r
- mov ecx,edx\r
- mov al, [edi]\r
- sub ecx,3\r
-\r
- mov [edi+1],al\r
- mov [edi+2],al\r
- mov [edi+3],al\r
- add edi,4\r
- rep stosb\r
-\r
- mov ebx, [esp+8]\r
- jmp L_while_test_mmx\r
-\r
-ALIGN 4\r
-L_test_for_second_level_length_mmx:\r
- test al,64\r
- jnz L_test_for_end_of_block\r
-\r
- and eax,15\r
- psrlq mm0,mm1\r
- movd ecx,mm0\r
- and ecx, [inflate_fast_mask+eax*4]\r
- add ecx,edx\r
- mov eax, [ebx+ecx*4]\r
- jmp L_dolen_mmx\r
-\r
-ALIGN 4\r
-L_test_for_second_level_dist_mmx:\r
- test al,64\r
- jnz L_invalid_distance_code\r
-\r
- and eax,15\r
- psrlq mm0,mm1\r
- movd ecx,mm0\r
- and ecx, [inflate_fast_mask+eax*4]\r
- mov eax, [esp+12]\r
- add ecx,ebx\r
- mov eax, [eax+ecx*4]\r
- jmp L_dodist_mmx\r
-\r
-ALIGN 4\r
-L_clip_window_mmx:\r
-\r
- mov ecx,eax\r
- mov eax, [esp+52]\r
- neg ecx\r
- mov esi, [esp+56]\r
-\r
- cmp eax,ebx\r
- jb L_invalid_distance_too_far\r
-\r
- add ecx,ebx\r
- cmp dword ptr [esp+48],0\r
- jne L_wrap_around_window_mmx\r
-\r
- sub eax,ecx\r
- add esi,eax\r
-\r
- cmp edx,ecx\r
- jbe L_do_copy1_mmx\r
-\r
- sub edx,ecx\r
- rep movsb\r
- mov esi,edi\r
- sub esi,ebx\r
- jmp L_do_copy1_mmx\r
-\r
- cmp edx,ecx\r
- jbe L_do_copy1_mmx\r
-\r
- sub edx,ecx\r
- rep movsb\r
- mov esi,edi\r
- sub esi,ebx\r
- jmp L_do_copy1_mmx\r
-\r
-L_wrap_around_window_mmx:\r
-\r
- mov eax, [esp+48]\r
- cmp ecx,eax\r
- jbe L_contiguous_in_window_mmx\r
-\r
- add esi, [esp+52]\r
- add esi,eax\r
- sub esi,ecx\r
- sub ecx,eax\r
-\r
-\r
- cmp edx,ecx\r
- jbe L_do_copy1_mmx\r
-\r
- sub edx,ecx\r
- rep movsb\r
- mov esi, [esp+56]\r
- mov ecx, [esp+48]\r
- cmp edx,ecx\r
- jbe L_do_copy1_mmx\r
-\r
- sub edx,ecx\r
- rep movsb\r
- mov esi,edi\r
- sub esi,ebx\r
- jmp L_do_copy1_mmx\r
-\r
-L_contiguous_in_window_mmx:\r
-\r
- add esi,eax\r
- sub esi,ecx\r
-\r
-\r
- cmp edx,ecx\r
- jbe L_do_copy1_mmx\r
-\r
- sub edx,ecx\r
- rep movsb\r
- mov esi,edi\r
- sub esi,ebx\r
-\r
-L_do_copy1_mmx:\r
-\r
-\r
- mov ecx,edx\r
- rep movsb\r
-\r
- mov esi, [esp+44]\r
- mov ebx, [esp+8]\r
- jmp L_while_test_mmx\r
-; 1174 "inffast.S"\r
-L_invalid_distance_code:\r
-\r
-\r
-\r
-\r
-\r
- mov ecx, invalid_distance_code_msg\r
- mov edx,INFLATE_MODE_BAD\r
- jmp L_update_stream_state\r
-\r
-L_test_for_end_of_block:\r
-\r
-\r
-\r
-\r
-\r
- test al,32\r
- jz L_invalid_literal_length_code\r
-\r
- mov ecx,0\r
- mov edx,INFLATE_MODE_TYPE\r
- jmp L_update_stream_state\r
-\r
-L_invalid_literal_length_code:\r
-\r
-\r
-\r
-\r
-\r
- mov ecx, invalid_literal_length_code_msg\r
- mov edx,INFLATE_MODE_BAD\r
- jmp L_update_stream_state\r
-\r
-L_invalid_distance_too_far:\r
-\r
-\r
-\r
- mov esi, [esp+44]\r
- mov ecx, invalid_distance_too_far_msg\r
- mov edx,INFLATE_MODE_BAD\r
- jmp L_update_stream_state\r
-\r
-L_update_stream_state:\r
-\r
- mov eax, [esp+88]\r
- test ecx,ecx\r
- jz L_skip_msg\r
- mov [eax+24],ecx\r
-L_skip_msg:\r
- mov eax, [eax+28]\r
- mov [eax+mode_state],edx\r
- jmp L_break_loop\r
-\r
-ALIGN 4\r
-L_break_loop:\r
-; 1243 "inffast.S"\r
- cmp dword ptr [inflate_fast_use_mmx],2\r
- jne L_update_next_in\r
-\r
-\r
-\r
- mov ebx,ebp\r
-\r
-L_update_next_in:\r
-; 1266 "inffast.S"\r
- mov eax, [esp+88]\r
- mov ecx,ebx\r
- mov edx, [eax+28]\r
- shr ecx,3\r
- sub esi,ecx\r
- shl ecx,3\r
- sub ebx,ecx\r
- mov [eax+12],edi\r
- mov [edx+bits_state],ebx\r
- mov ecx,ebx\r
-\r
- lea ebx, [esp+28]\r
- cmp [esp+20],ebx\r
- jne L_buf_not_used\r
-\r
- sub esi,ebx\r
- mov ebx, [eax+0]\r
- mov [esp+20],ebx\r
- add esi,ebx\r
- mov ebx, [eax+4]\r
- sub ebx,11\r
- add [esp+20],ebx\r
-\r
-L_buf_not_used:\r
- mov [eax+0],esi\r
-\r
- mov ebx,1\r
- shl ebx,cl\r
- dec ebx\r
-\r
-\r
-\r
-\r
-\r
- cmp dword ptr [inflate_fast_use_mmx],2\r
- jne L_update_hold\r
-\r
-\r
-\r
- psrlq mm0,mm1\r
- movd ebp,mm0\r
-\r
- emms\r
-\r
-L_update_hold:\r
-\r
-\r
-\r
- and ebp,ebx\r
- mov [edx+hold_state],ebp\r
-\r
-\r
-\r
-\r
- mov ebx, [esp+20]\r
- cmp ebx,esi\r
- jbe L_last_is_smaller\r
-\r
- sub ebx,esi\r
- add ebx,11\r
- mov [eax+4],ebx\r
- jmp L_fixup_out\r
-L_last_is_smaller:\r
- sub esi,ebx\r
- neg esi\r
- add esi,11\r
- mov [eax+4],esi\r
-\r
-\r
-\r
-\r
-L_fixup_out:\r
-\r
- mov ebx, [esp+16]\r
- cmp ebx,edi\r
- jbe L_end_is_smaller\r
-\r
- sub ebx,edi\r
- add ebx,257\r
- mov [eax+16],ebx\r
- jmp L_done\r
-L_end_is_smaller:\r
- sub edi,ebx\r
- neg edi\r
- add edi,257\r
- mov [eax+16],edi\r
-\r
-\r
-\r
-\r
-\r
-L_done:\r
- add esp,64\r
- popfd\r
- pop ebx\r
- pop ebp\r
- pop esi\r
- pop edi\r
- ret\r
-_inflate_fast endp\r
-\r
-_TEXT ends\r
-end\r
+++ /dev/null
-; match686.asm -- Asm portion of the optimized longest_match for 32 bits x86\r
-; Copyright (C) 1995-1996 Jean-loup Gailly, Brian Raiter and Gilles Vollant.\r
-; File written by Gilles Vollant, by converting match686.S from Brian Raiter\r
-; for MASM. This is as assembly version of longest_match\r
-; from Jean-loup Gailly in deflate.c\r
-;\r
-; http://www.zlib.net\r
-; http://www.winimage.com/zLibDll\r
-; http://www.muppetlabs.com/~breadbox/software/assembly.html\r
-;\r
-; For Visual C++ 4.x and higher and ML 6.x and higher\r
-; ml.exe is distributed in\r
-; http://www.microsoft.com/downloads/details.aspx?FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64\r
-;\r
-; this file contain two implementation of longest_match\r
-;\r
-; this longest_match was written by Brian raiter (1998), optimized for Pentium Pro\r
-; (and the faster known version of match_init on modern Core 2 Duo and AMD Phenom)\r
-;\r
-; for using an assembly version of longest_match, you need define ASMV in project\r
-;\r
-; compile the asm file running\r
-; ml /coff /Zi /c /Flmatch686.lst match686.asm\r
-; and do not include match686.obj in your project\r
-;\r
-; note: contrib of zLib 1.2.3 and earlier contained both a deprecated version for\r
-; Pentium (prior Pentium Pro) and this version for Pentium Pro and modern processor\r
-; with autoselect (with cpu detection code)\r
-; if you want support the old pentium optimization, you can still use these version\r
-;\r
-; this file is not optimized for old pentium, but it compatible with all x86 32 bits\r
-; processor (starting 80386)\r
-;\r
-;\r
-; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2\r
-\r
-;uInt longest_match(s, cur_match)\r
-; deflate_state *s;\r
-; IPos cur_match; /* current match */\r
-\r
- NbStack equ 76\r
- cur_match equ dword ptr[esp+NbStack-0]\r
- str_s equ dword ptr[esp+NbStack-4]\r
-; 5 dword on top (ret,ebp,esi,edi,ebx)\r
- adrret equ dword ptr[esp+NbStack-8]\r
- pushebp equ dword ptr[esp+NbStack-12]\r
- pushedi equ dword ptr[esp+NbStack-16]\r
- pushesi equ dword ptr[esp+NbStack-20]\r
- pushebx equ dword ptr[esp+NbStack-24]\r
-\r
- chain_length equ dword ptr [esp+NbStack-28]\r
- limit equ dword ptr [esp+NbStack-32]\r
- best_len equ dword ptr [esp+NbStack-36]\r
- window equ dword ptr [esp+NbStack-40]\r
- prev equ dword ptr [esp+NbStack-44]\r
- scan_start equ word ptr [esp+NbStack-48]\r
- wmask equ dword ptr [esp+NbStack-52]\r
- match_start_ptr equ dword ptr [esp+NbStack-56]\r
- nice_match equ dword ptr [esp+NbStack-60]\r
- scan equ dword ptr [esp+NbStack-64]\r
-\r
- windowlen equ dword ptr [esp+NbStack-68]\r
- match_start equ dword ptr [esp+NbStack-72]\r
- strend equ dword ptr [esp+NbStack-76]\r
- NbStackAdd equ (NbStack-24)\r
-\r
- .386p\r
-\r
- name gvmatch\r
- .MODEL FLAT\r
-\r
-\r
-\r
-; all the +zlib1222add offsets are due to the addition of fields\r
-; in zlib in the deflate_state structure since the asm code was first written\r
-; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").\r
-; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").\r
-; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").\r
-\r
- zlib1222add equ 8\r
-\r
-; Note : these value are good with a 8 bytes boundary pack structure\r
- dep_chain_length equ 74h+zlib1222add\r
- dep_window equ 30h+zlib1222add\r
- dep_strstart equ 64h+zlib1222add\r
- dep_prev_length equ 70h+zlib1222add\r
- dep_nice_match equ 88h+zlib1222add\r
- dep_w_size equ 24h+zlib1222add\r
- dep_prev equ 38h+zlib1222add\r
- dep_w_mask equ 2ch+zlib1222add\r
- dep_good_match equ 84h+zlib1222add\r
- dep_match_start equ 68h+zlib1222add\r
- dep_lookahead equ 6ch+zlib1222add\r
-\r
-\r
-_TEXT segment\r
-\r
-IFDEF NOUNDERLINE\r
- public longest_match\r
- public match_init\r
-ELSE\r
- public _longest_match\r
- public _match_init\r
-ENDIF\r
-\r
- MAX_MATCH equ 258\r
- MIN_MATCH equ 3\r
- MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)\r
-\r
-\r
-\r
-MAX_MATCH equ 258\r
-MIN_MATCH equ 3\r
-MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1)\r
-MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h)\r
-\r
-\r
-;;; stack frame offsets\r
-\r
-chainlenwmask equ esp + 0 ; high word: current chain len\r
- ; low word: s->wmask\r
-window equ esp + 4 ; local copy of s->window\r
-windowbestlen equ esp + 8 ; s->window + bestlen\r
-scanstart equ esp + 16 ; first two bytes of string\r
-scanend equ esp + 12 ; last two bytes of string\r
-scanalign equ esp + 20 ; dword-misalignment of string\r
-nicematch equ esp + 24 ; a good enough match size\r
-bestlen equ esp + 28 ; size of best match so far\r
-scan equ esp + 32 ; ptr to string wanting match\r
-\r
-LocalVarsSize equ 36\r
-; saved ebx byte esp + 36\r
-; saved edi byte esp + 40\r
-; saved esi byte esp + 44\r
-; saved ebp byte esp + 48\r
-; return address byte esp + 52\r
-deflatestate equ esp + 56 ; the function arguments\r
-curmatch equ esp + 60\r
-\r
-;;; Offsets for fields in the deflate_state structure. These numbers\r
-;;; are calculated from the definition of deflate_state, with the\r
-;;; assumption that the compiler will dword-align the fields. (Thus,\r
-;;; changing the definition of deflate_state could easily cause this\r
-;;; program to crash horribly, without so much as a warning at\r
-;;; compile time. Sigh.)\r
-\r
-dsWSize equ 36+zlib1222add\r
-dsWMask equ 44+zlib1222add\r
-dsWindow equ 48+zlib1222add\r
-dsPrev equ 56+zlib1222add\r
-dsMatchLen equ 88+zlib1222add\r
-dsPrevMatch equ 92+zlib1222add\r
-dsStrStart equ 100+zlib1222add\r
-dsMatchStart equ 104+zlib1222add\r
-dsLookahead equ 108+zlib1222add\r
-dsPrevLen equ 112+zlib1222add\r
-dsMaxChainLen equ 116+zlib1222add\r
-dsGoodMatch equ 132+zlib1222add\r
-dsNiceMatch equ 136+zlib1222add\r
-\r
-\r
-;;; match686.asm -- Pentium-Pro-optimized version of longest_match()\r
-;;; Written for zlib 1.1.2\r
-;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>\r
-;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html\r
-;;;\r
-;;\r
-;; This software is provided 'as-is', without any express or implied\r
-;; warranty. In no event will the authors be held liable for any damages\r
-;; arising from the use of this software.\r
-;;\r
-;; Permission is granted to anyone to use this software for any purpose,\r
-;; including commercial applications, and to alter it and redistribute it\r
-;; freely, subject to the following restrictions:\r
-;;\r
-;; 1. The origin of this software must not be misrepresented; you must not\r
-;; claim that you wrote the original software. If you use this software\r
-;; in a product, an acknowledgment in the product documentation would be\r
-;; appreciated but is not required.\r
-;; 2. Altered source versions must be plainly marked as such, and must not be\r
-;; misrepresented as being the original software\r
-;; 3. This notice may not be removed or altered from any source distribution.\r
-;;\r
-\r
-;GLOBAL _longest_match, _match_init\r
-\r
-\r
-;SECTION .text\r
-\r
-;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch)\r
-\r
-;_longest_match:\r
- IFDEF NOUNDERLINE\r
- longest_match proc near\r
- ELSE\r
- _longest_match proc near\r
- ENDIF\r
-.FPO (9, 4, 0, 0, 1, 0)\r
-\r
-;;; Save registers that the compiler may be using, and adjust esp to\r
-;;; make room for our stack frame.\r
-\r
- push ebp\r
- push edi\r
- push esi\r
- push ebx\r
- sub esp, LocalVarsSize\r
-\r
-;;; Retrieve the function arguments. ecx will hold cur_match\r
-;;; throughout the entire function. edx will hold the pointer to the\r
-;;; deflate_state structure during the function's setup (before\r
-;;; entering the main loop.\r
-\r
- mov edx, [deflatestate]\r
- mov ecx, [curmatch]\r
-\r
-;;; uInt wmask = s->w_mask;\r
-;;; unsigned chain_length = s->max_chain_length;\r
-;;; if (s->prev_length >= s->good_match) {\r
-;;; chain_length >>= 2;\r
-;;; }\r
-\r
- mov eax, [edx + dsPrevLen]\r
- mov ebx, [edx + dsGoodMatch]\r
- cmp eax, ebx\r
- mov eax, [edx + dsWMask]\r
- mov ebx, [edx + dsMaxChainLen]\r
- jl LastMatchGood\r
- shr ebx, 2\r
-LastMatchGood:\r
-\r
-;;; chainlen is decremented once beforehand so that the function can\r
-;;; use the sign flag instead of the zero flag for the exit test.\r
-;;; It is then shifted into the high word, to make room for the wmask\r
-;;; value, which it will always accompany.\r
-\r
- dec ebx\r
- shl ebx, 16\r
- or ebx, eax\r
- mov [chainlenwmask], ebx\r
-\r
-;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;\r
-\r
- mov eax, [edx + dsNiceMatch]\r
- mov ebx, [edx + dsLookahead]\r
- cmp ebx, eax\r
- jl LookaheadLess\r
- mov ebx, eax\r
-LookaheadLess: mov [nicematch], ebx\r
-\r
-;;; register Bytef *scan = s->window + s->strstart;\r
-\r
- mov esi, [edx + dsWindow]\r
- mov [window], esi\r
- mov ebp, [edx + dsStrStart]\r
- lea edi, [esi + ebp]\r
- mov [scan], edi\r
-\r
-;;; Determine how many bytes the scan ptr is off from being\r
-;;; dword-aligned.\r
-\r
- mov eax, edi\r
- neg eax\r
- and eax, 3\r
- mov [scanalign], eax\r
-\r
-;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?\r
-;;; s->strstart - (IPos)MAX_DIST(s) : NIL;\r
-\r
- mov eax, [edx + dsWSize]\r
- sub eax, MIN_LOOKAHEAD\r
- sub ebp, eax\r
- jg LimitPositive\r
- xor ebp, ebp\r
-LimitPositive:\r
-\r
-;;; int best_len = s->prev_length;\r
-\r
- mov eax, [edx + dsPrevLen]\r
- mov [bestlen], eax\r
-\r
-;;; Store the sum of s->window + best_len in esi locally, and in esi.\r
-\r
- add esi, eax\r
- mov [windowbestlen], esi\r
-\r
-;;; register ush scan_start = *(ushf*)scan;\r
-;;; register ush scan_end = *(ushf*)(scan+best_len-1);\r
-;;; Posf *prev = s->prev;\r
-\r
- movzx ebx, word ptr [edi]\r
- mov [scanstart], ebx\r
- movzx ebx, word ptr [edi + eax - 1]\r
- mov [scanend], ebx\r
- mov edi, [edx + dsPrev]\r
-\r
-;;; Jump into the main loop.\r
-\r
- mov edx, [chainlenwmask]\r
- jmp short LoopEntry\r
-\r
-align 4\r
-\r
-;;; do {\r
-;;; match = s->window + cur_match;\r
-;;; if (*(ushf*)(match+best_len-1) != scan_end ||\r
-;;; *(ushf*)match != scan_start) continue;\r
-;;; [...]\r
-;;; } while ((cur_match = prev[cur_match & wmask]) > limit\r
-;;; && --chain_length != 0);\r
-;;;\r
-;;; Here is the inner loop of the function. The function will spend the\r
-;;; majority of its time in this loop, and majority of that time will\r
-;;; be spent in the first ten instructions.\r
-;;;\r
-;;; Within this loop:\r
-;;; ebx = scanend\r
-;;; ecx = curmatch\r
-;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)\r
-;;; esi = windowbestlen - i.e., (window + bestlen)\r
-;;; edi = prev\r
-;;; ebp = limit\r
-\r
-LookupLoop:\r
- and ecx, edx\r
- movzx ecx, word ptr [edi + ecx*2]\r
- cmp ecx, ebp\r
- jbe LeaveNow\r
- sub edx, 00010000h\r
- js LeaveNow\r
-LoopEntry: movzx eax, word ptr [esi + ecx - 1]\r
- cmp eax, ebx\r
- jnz LookupLoop\r
- mov eax, [window]\r
- movzx eax, word ptr [eax + ecx]\r
- cmp eax, [scanstart]\r
- jnz LookupLoop\r
-\r
-;;; Store the current value of chainlen.\r
-\r
- mov [chainlenwmask], edx\r
-\r
-;;; Point edi to the string under scrutiny, and esi to the string we\r
-;;; are hoping to match it up with. In actuality, esi and edi are\r
-;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is\r
-;;; initialized to -(MAX_MATCH_8 - scanalign).\r
-\r
- mov esi, [window]\r
- mov edi, [scan]\r
- add esi, ecx\r
- mov eax, [scanalign]\r
- mov edx, 0fffffef8h; -(MAX_MATCH_8)\r
- lea edi, [edi + eax + 0108h] ;MAX_MATCH_8]\r
- lea esi, [esi + eax + 0108h] ;MAX_MATCH_8]\r
-\r
-;;; Test the strings for equality, 8 bytes at a time. At the end,\r
-;;; adjust edx so that it is offset to the exact byte that mismatched.\r
-;;;\r
-;;; We already know at this point that the first three bytes of the\r
-;;; strings match each other, and they can be safely passed over before\r
-;;; starting the compare loop. So what this code does is skip over 0-3\r
-;;; bytes, as much as necessary in order to dword-align the edi\r
-;;; pointer. (esi will still be misaligned three times out of four.)\r
-;;;\r
-;;; It should be confessed that this loop usually does not represent\r
-;;; much of the total running time. Replacing it with a more\r
-;;; straightforward "rep cmpsb" would not drastically degrade\r
-;;; performance.\r
-\r
-LoopCmps:\r
- mov eax, [esi + edx]\r
- xor eax, [edi + edx]\r
- jnz LeaveLoopCmps\r
- mov eax, [esi + edx + 4]\r
- xor eax, [edi + edx + 4]\r
- jnz LeaveLoopCmps4\r
- add edx, 8\r
- jnz LoopCmps\r
- jmp short LenMaximum\r
-LeaveLoopCmps4: add edx, 4\r
-LeaveLoopCmps: test eax, 0000FFFFh\r
- jnz LenLower\r
- add edx, 2\r
- shr eax, 16\r
-LenLower: sub al, 1\r
- adc edx, 0\r
-\r
-;;; Calculate the length of the match. If it is longer than MAX_MATCH,\r
-;;; then automatically accept it as the best possible match and leave.\r
-\r
- lea eax, [edi + edx]\r
- mov edi, [scan]\r
- sub eax, edi\r
- cmp eax, MAX_MATCH\r
- jge LenMaximum\r
-\r
-;;; If the length of the match is not longer than the best match we\r
-;;; have so far, then forget it and return to the lookup loop.\r
-\r
- mov edx, [deflatestate]\r
- mov ebx, [bestlen]\r
- cmp eax, ebx\r
- jg LongerMatch\r
- mov esi, [windowbestlen]\r
- mov edi, [edx + dsPrev]\r
- mov ebx, [scanend]\r
- mov edx, [chainlenwmask]\r
- jmp LookupLoop\r
-\r
-;;; s->match_start = cur_match;\r
-;;; best_len = len;\r
-;;; if (len >= nice_match) break;\r
-;;; scan_end = *(ushf*)(scan+best_len-1);\r
-\r
-LongerMatch: mov ebx, [nicematch]\r
- mov [bestlen], eax\r
- mov [edx + dsMatchStart], ecx\r
- cmp eax, ebx\r
- jge LeaveNow\r
- mov esi, [window]\r
- add esi, eax\r
- mov [windowbestlen], esi\r
- movzx ebx, word ptr [edi + eax - 1]\r
- mov edi, [edx + dsPrev]\r
- mov [scanend], ebx\r
- mov edx, [chainlenwmask]\r
- jmp LookupLoop\r
-\r
-;;; Accept the current string, with the maximum possible length.\r
-\r
-LenMaximum: mov edx, [deflatestate]\r
- mov dword ptr [bestlen], MAX_MATCH\r
- mov [edx + dsMatchStart], ecx\r
-\r
-;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;\r
-;;; return s->lookahead;\r
-\r
-LeaveNow:\r
- mov edx, [deflatestate]\r
- mov ebx, [bestlen]\r
- mov eax, [edx + dsLookahead]\r
- cmp ebx, eax\r
- jg LookaheadRet\r
- mov eax, ebx\r
-LookaheadRet:\r
-\r
-;;; Restore the stack and return from whence we came.\r
-\r
- add esp, LocalVarsSize\r
- pop ebx\r
- pop esi\r
- pop edi\r
- pop ebp\r
-\r
- ret\r
-; please don't remove this string !\r
-; Your can freely use match686 in any free or commercial app if you don't remove the string in the binary!\r
- db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah\r
-\r
-\r
- IFDEF NOUNDERLINE\r
- longest_match endp\r
- ELSE\r
- _longest_match endp\r
- ENDIF\r
-\r
- IFDEF NOUNDERLINE\r
- match_init proc near\r
- ret\r
- match_init endp\r
- ELSE\r
- _match_init proc near\r
- ret\r
- _match_init endp\r
- ENDIF\r
-\r
-\r
-_TEXT ends\r
-end\r
+++ /dev/null
-\r
-Summary\r
--------\r
-This directory contains ASM implementations of the functions\r
-longest_match() and inflate_fast().\r
-\r
-\r
-Use instructions\r
-----------------\r
-Assemble using MASM, and copy the object files into the zlib source\r
-directory, then run the appropriate makefile, as suggested below. You can\r
-donwload MASM from here:\r
-\r
- http://www.microsoft.com/downloads/details.aspx?displaylang=en&FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64\r
-\r
-You can also get objects files here:\r
-\r
- http://www.winimage.com/zLibDll/zlib124_masm_obj.zip\r
-\r
-Build instructions\r
-------------------\r
-* With Microsoft C and MASM:\r
-nmake -f win32/Makefile.msc LOC="-DASMV -DASMINF" OBJA="match686.obj inffas32.obj"\r
-\r
-* With Borland C and TASM:\r
-make -f win32/Makefile.bor LOCAL_ZLIB="-DASMV -DASMINF" OBJA="match686.obj inffas32.obj" OBJPA="+match686c.obj+match686.obj+inffas32.obj"\r
-\r
#
# Usage:
# make -f win32/Makefile.bor
-# make -f win32/Makefile.bor LOCAL_ZLIB=-DASMV OBJA=match.obj OBJPA=+match.obj
# ------------ Borland C++ ------------
#
# make -fwin32/Makefile.gcc; make test testdll -fwin32/Makefile.gcc
#
-# To use the asm code, type:
-# cp contrib/asm?86/match.S ./match.S
-# make LOC=-DASMV OBJA=match.o -fwin32/Makefile.gcc
-#
# To install libz.a, zconf.h and zlib.h in the system directories, type:
#
# make install -fwin32/Makefile.gcc
#
SHARED_MODE=0
-#LOC = -DASMV
#LOC = -DZLIB_DEBUG -g
PREFIX =
# Usage:
# nmake -f win32/Makefile.msc (standard build)
# nmake -f win32/Makefile.msc LOC=-DFOO (nonstandard build)
-# nmake -f win32/Makefile.msc LOC="-DASMV -DASMINF" \
-# OBJA="inffas32.obj match686.obj" (use ASM code, x86)
-# nmake -f win32/Makefile.msc AS=ml64 LOC="-DASMV -DASMINF -I." \
-# OBJA="inffasx64.obj gvmat64.obj inffas8664.obj" (use ASM code, x64)
# The toplevel directory of the source tree.
#