\r
; gvmat64.asm -- Asm portion of the optimized longest_match for 32 bits x86\r
; Copyright (C) 1995-2005 Jean-loup Gailly, Brian Raiter and Gilles Vollant.\r
-; File written by Gilles Vollant, by modifiying the longest_match\r
-; from Jean-loup Gailly in deflate.c\r
-; and modifying asm686 with masm, optimised assembly code from Brian Raiter,\r
-; written 1998\r
+;\r
+; File written by Gilles Vollant, by converting to assembly the longest_match\r
+; from Jean-loup Gailly in deflate.c of zLib and infoZip zip.\r
+;\r
+; and by taking inspiration on asm686 with masm, optimised assembly code\r
+; from Brian Raiter, written 1998\r
+;\r
; http://www.zlib.net\r
; http://www.winimage.com/zLibDll\r
; http://www.muppetlabs.com/~breadbox/software/assembly.html\r
;\r
-; to compile this file, I use option\r
+; to compile this file for infozip Zip, I use option:\r
+; ml64.exe /Flgvmat64 /c /Zi /DINFOZIP gvmat64.asm\r
+;\r
+; to compile this file for zLib, I use option:\r
; ml64.exe /Flgvmat64 /c /Zi gvmat64.asm\r
-; with Microsoft Macro Assembler (x64) for AMD64\r
+; Be carrefull to adapt zlib1222add below to your version of zLib\r
+; (if you use a version of zLib before 1.0.4 or after 1.2.2.2, change\r
+; value of zlib1222add later)\r
;\r
-; ml64.exe is given with Visual Studio 2005, Windows 2003 server DDK\r
+; This file compile with Microsoft Macro Assembler (x64) for AMD64\r
+;\r
+; ml64.exe is given with Visual Studio 2005 and Windows 2003 server DDK\r
;\r
; (you can get Windows 2003 server DDK with ml64 and cl for AMD64 from\r
; http://www.microsoft.com/whdc/devtools/ddk/default.mspx for low price)\r
;\r
-; Be carrefull to adapt zlib1222add below to your version of zLib\r
\r
\r
;uInt longest_match(s, cur_match)\r
;scanalign equ rsp + xx - LocalVarsSize ; dword-misalignment of string r13\r
;bestlen equ rsp + xx - LocalVarsSize ; size of best match so far -> r11d\r
;scan equ rsp + xx - LocalVarsSize ; ptr to string wanting match -> r9\r
- nicematch equ rsp + 16 - LocalVarsSize ; a good enough match size -> r14\r
+IFDEF INFOZIP\r
+ELSE\r
+ nicematch equ (rsp + 16 - LocalVarsSize) ; a good enough match size\r
+ENDIF\r
\r
save_rdi equ rsp + 24 - LocalVarsSize\r
save_rsi equ rsp + 32 - LocalVarsSize\r
; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").\r
; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").\r
\r
-zlib1222add equ 8\r
\r
+IFDEF INFOZIP\r
+\r
+_DATA SEGMENT\r
+COMM window_size:DWORD\r
+; WMask ; 7fff\r
+COMM window:BYTE:010040H\r
+COMM prev:WORD:08000H\r
+; MatchLen : unused\r
+; PrevMatch : unused\r
+COMM strstart:DWORD\r
+COMM match_start:DWORD\r
+; Lookahead : ignore\r
+COMM prev_length:DWORD ; PrevLen\r
+COMM max_chain_length:DWORD\r
+COMM good_match:DWORD\r
+COMM nice_match:DWORD\r
+prev_ad equ OFFSET prev\r
+window_ad equ OFFSET window\r
+nicematch equ nice_match\r
+_DATA ENDS\r
+WMask equ 07fffh\r
+\r
+ELSE\r
+\r
+ IFNDEF zlib1222add\r
+ zlib1222add equ 8\r
+ ENDIF\r
dsWSize equ 56+zlib1222add+(zlib1222add/2)\r
dsWMask equ 64+zlib1222add+(zlib1222add/2)\r
dsWindow equ 72+zlib1222add\r
dsGoodMatch equ 172+zlib1222add\r
dsNiceMatch equ 176+zlib1222add\r
\r
+window_size equ [ rcx + dsWSize]\r
+WMask equ [ rcx + dsWMask]\r
+window_ad equ [ rcx + dsWindow]\r
+prev_ad equ [ rcx + dsPrev]\r
+strstart equ [ rcx + dsStrStart]\r
+match_start equ [ rcx + dsMatchStart]\r
+Lookahead equ [ rcx + dsLookahead] ; 0ffffffffh on infozip\r
+prev_length equ [ rcx + dsPrevLen]\r
+max_chain_length equ [ rcx + dsMaxChainLen]\r
+good_match equ [ rcx + dsGoodMatch]\r
+nice_match equ [ rcx + dsNiceMatch]\r
+ENDIF\r
\r
; parameter 1 in r8(deflate state s), param 2 in rdx (cur match)\r
\r
; http://msdn.microsoft.com/library/en-us/kmarch/hh/kmarch/64bitAMD_8e951dd2-ee77-4728-8702-55ce4b5dd24a.xml.asp\r
;\r
; All registers must be preserved across the call, except for\r
-; rax, rcx, rdx, r8, r-9, r10, and r11, which are scratch.\r
+; rax, rcx, rdx, r8, r9, r10, and r11, which are scratch.\r
\r
\r
\r
\r
; this clear high 32 bits of r8, which can be garbage in both r8 and rdx\r
\r
-\r
mov [save_rdi],rdi\r
mov [save_rsi],rsi\r
mov [save_rbx],rbx\r
mov [save_rbp],rbp\r
+IFDEF INFOZIP\r
+ mov r8d,ecx\r
+ELSE\r
mov r8d,edx\r
+ENDIF\r
mov [save_r12],r12\r
mov [save_r13],r13\r
; mov [save_r14],r14\r
;;; chain_length >>= 2;\r
;;; }\r
\r
- mov edi, [rcx + dsPrevLen]\r
- mov esi, [rcx + dsGoodMatch]\r
- mov eax, [rcx + dsWMask]\r
- mov ebx, [rcx + dsMaxChainLen]\r
+ mov edi, prev_length\r
+ mov esi, good_match\r
+ mov eax, WMask\r
+ mov ebx, max_chain_length\r
cmp edi, esi\r
jl LastMatchGood\r
shr ebx, 2\r
dec ebx\r
shl ebx, 16\r
or ebx, eax\r
- mov [chainlenwmask], ebx\r
\r
+;;; on zlib only\r
;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;\r
\r
- mov eax, [rcx + dsNiceMatch]\r
- mov r10d, [rcx + dsLookahead]\r
+IFDEF INFOZIP\r
+ mov [chainlenwmask], ebx\r
+; on infozip nice_match = [nice_match]\r
+ELSE\r
+ mov eax, nice_match\r
+ mov [chainlenwmask], ebx\r
+ mov r10d, Lookahead\r
cmp r10d, eax\r
cmovnl r10d, eax\r
mov [nicematch],r10d\r
-LookaheadLess:\r
+ENDIF\r
\r
;;; register Bytef *scan = s->window + s->strstart;\r
-\r
- mov r10, [rcx + dsWindow]\r
- mov ebp, [rcx + dsStrStart]\r
+ mov r10, window_ad\r
+ mov ebp, strstart\r
lea r13, [r10 + rbp]\r
\r
;;; Determine how many bytes the scan ptr is off from being\r
\r
;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?\r
;;; s->strstart - (IPos)MAX_DIST(s) : NIL;\r
-\r
- mov eax, [rcx + dsWSize]\r
+IFDEF INFOZIP\r
+ mov eax,07efah ; MAX_DIST = (WSIZE-MIN_LOOKAHEAD) (0x8000-(3+8+1))\r
+ELSE\r
+ mov eax, window_size\r
sub eax, MIN_LOOKAHEAD\r
+ENDIF\r
xor edi,edi\r
sub ebp, eax\r
\r
- mov r11d, [rcx + dsPrevLen]\r
+ mov r11d, prev_length\r
\r
cmovng ebp,edi\r
\r
;;; Posf *prev = s->prev;\r
\r
movzx r12d,word ptr [r9]\r
- movzx ebx, word ptr [r9 + r11 - 1]\r
- mov rdi, [rcx + dsPrev]\r
+ movzx ebx, word ptr [r9 + r11 - 1]\r
+\r
+ mov rdi, prev_ad\r
\r
;;; Jump into the main loop.\r
\r
\r
prefetcht1 [rsi+rdx]\r
prefetcht1 [rdi+rdx]\r
+\r
+\r
;;; Test the strings for equality, 8 bytes at a time. At the end,\r
-;;; adjust edx so that it is offset to the exact byte that mismatched.\r
+;;; adjust rdx so that it is offset to the exact byte that mismatched.\r
;;;\r
;;; We already know at this point that the first three bytes of the\r
;;; strings match each other, and they can be safely passed over before\r
;;; starting the compare loop. So what this code does is skip over 0-3\r
;;; bytes, as much as necessary in order to dword-align the edi\r
-;;; pointer. (esi will still be misaligned three times out of four.)\r
+;;; pointer. (rsi will still be misaligned three times out of four.)\r
;;;\r
;;; It should be confessed that this loop usually does not represent\r
;;; much of the total running time. Replacing it with a more\r
;;; straightforward "rep cmpsb" would not drastically degrade\r
;;; performance.\r
\r
-;LoopCmps:\r
-; mov eax, [rsi + rdx]\r
-; xor eax, [rdi + rdx]\r
-; jnz LeaveLoopCmps\r
-; mov eax, [rsi + rdx + 4]\r
-; xor eax, [rdi + rdx + 4]\r
-; jnz LeaveLoopCmps4\r
-; add rdx, 8\r
-; jnz LoopCmps\r
-; jmp LenMaximum\r
-;LeaveLoopCmps4: add rdx, 4\r
-;LeaveLoopCmps: test eax, 0000FFFFh\r
-; jnz LenLower\r
-; add rdx, 2\r
-; shr eax, 16\r
-;LenLower: sub al, 1\r
-; adc rdx, 0\r
-\r
\r
LoopCmps:\r
mov rax, [rsi + rdx]\r
\r
lea rsi,[r10+r11]\r
\r
- mov rdi, [rcx + dsPrev]\r
+ mov rdi, prev_ad\r
mov edx, [chainlenwmask]\r
jmp LookupLoop\r
\r
\r
LongerMatch:\r
mov r11d, eax\r
- mov [rcx + dsMatchStart], r8d\r
+ mov match_start, r8d\r
cmp eax, [nicematch]\r
jge LeaveNow\r
\r
lea rsi,[r10+rax]\r
\r
movzx ebx, word ptr [r9 + rax - 1]\r
- mov rdi, [rcx + dsPrev]\r
+ mov rdi, prev_ad\r
mov edx, [chainlenwmask]\r
jmp LookupLoop\r
\r
\r
LenMaximum:\r
mov r11d,MAX_MATCH\r
- mov [rcx + dsMatchStart], r8d\r
+ mov match_start, r8d\r
\r
;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;\r
;;; return s->lookahead;\r
\r
LeaveNow:\r
- mov eax, [rcx + dsLookahead]\r
+IFDEF INFOZIP\r
+ mov eax,r11d\r
+ELSE\r
+ mov eax, Lookahead\r
cmp r11d, eax\r
cmovng eax, r11d\r
-\r
+ENDIF\r
\r
;;; Restore the stack and return from whence we came.\r
\r
\r
ret 0\r
; please don't remove this string !\r
-; Your can freely use gvmat32 in any free or commercial app if you don't remove the string in the binary!\r
+; Your can freely use gvmat64 in any free or commercial app\r
+; but it is far better don't remove the string in the binary!\r
db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998, converted to amd 64 by Gilles Vollant 2005",0dh,0ah,0\r
longest_match ENDP\r
\r