1 ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
\r
3 ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
\r
5 ; * Copyright (C) 1995-2003 Mark Adler
\r
6 ; * For conditions of distribution and use, see copyright notice in zlib.h
\r
8 ; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
\r
9 ; * Please use the copyright conditions above.
\r
11 ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
\r
12 ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
\r
13 ; * the moment. I have successfully compiled and tested this code with gcc2.96,
\r
14 ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
\r
15 ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
\r
16 ; * enabled. I will attempt to merge the MMX code into this version. Newer
\r
17 ; * versions of this and inffast.S can be found at
\r
18 ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
\r
20 ; * 2005 : modification by Gilles Vollant
\r
22 ; For Visual C++ 4.x and higher and ML 6.x and higher
\r
23 ; ml.exe is in directory \MASM611C of Win95 DDK
\r
24 ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
\r
25 ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
\r
28 ; compile with command line option
\r
29 ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm
\r
31 ; if you define NO_GZIP (see inflate.h), compile with
\r
32 ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
\r
35 ; zlib122sup is 0 fort zlib 1.2.2.1 and lower
\r
36 ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
\r
37 ; in inflate_state in inflate.h)
\r
42 INFLATE_MODE_TYPE equ 11
\r
43 INFLATE_MODE_BAD equ 26
\r
46 INFLATE_MODE_TYPE equ 11
\r
47 INFLATE_MODE_BAD equ 26
\r
49 INFLATE_MODE_TYPE equ 3
\r
50 INFLATE_MODE_BAD equ 17
\r
58 ;;;GLOBAL _inflate_fast
\r
67 name inflate_fast_x86
\r
71 inflate_fast_use_mmx:
\r
80 db 'Fast decoding Code from Chris Anderson'
\r
84 invalid_literal_length_code_msg:
\r
85 db 'invalid literal/length code'
\r
89 invalid_distance_code_msg:
\r
90 db 'invalid distance code'
\r
94 invalid_distance_too_far_msg:
\r
95 db 'invalid distance too far back'
\r
136 mode_state equ 0 ;/* state->mode */
\r
137 wsize_state equ (32+zlib1222sup) ;/* state->wsize */
\r
138 write_state equ (36+4+zlib1222sup) ;/* state->write */
\r
139 window_state equ (40+4+zlib1222sup) ;/* state->window */
\r
140 hold_state equ (44+4+zlib1222sup) ;/* state->hold */
\r
141 bits_state equ (48+4+zlib1222sup) ;/* state->bits */
\r
142 lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */
\r
143 distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */
\r
144 lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */
\r
145 distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */
\r
150 ;GLOBAL inflate_fast_use_mmx
\r
155 ; GLOBAL inflate_fast_use_mmx:object
\r
156 ;.size inflate_fast_use_mmx, 4
\r
161 _inflate_fast proc near
\r
162 .FPO (16, 4, 0, 0, 1, 0)
\r
207 mov eax, [edi+lencode_state]
\r
208 mov ecx, [edi+distcode_state]
\r
214 mov ecx, [edi+lenbits_state]
\r
220 mov ecx, [edi+distbits_state]
\r
225 mov eax, [edi+wsize_state]
\r
226 mov ecx, [edi+write_state]
\r
227 mov edx, [edi+window_state]
\r
233 mov ebp, [edi+hold_state]
\r
234 mov ebx, [edi+bits_state]
\r
271 cmp dword ptr [inflate_fast_use_mmx],2
\r
281 xor dword ptr [esp],0200000h
\r
309 mov dword ptr [inflate_fast_use_mmx],2
\r
310 jmp L_check_mmx_pop
\r
312 mov dword ptr [inflate_fast_use_mmx],3
\r
324 ja L_get_length_code
\r
337 mov eax, [ecx+edx*4]
\r
356 jnz L_test_for_length_base
\r
371 L_test_for_length_base:
\r
378 jz L_test_for_second_level_length
\r
382 jae L_add_bits_to_len
\r
409 ja L_get_distance_code
\r
418 L_get_distance_code:
\r
422 mov eax, [ecx+edx*4]
\r
435 jz L_test_for_second_level_dist
\r
437 jz L_check_dist_one
\r
439 jae L_add_bits_to_dist
\r
450 L_add_bits_to_dist:
\r
508 L_test_for_second_level_length:
\r
514 jnz L_test_for_end_of_block
\r
522 mov eax, [edx+eax*4]
\r
526 L_test_for_second_level_dist:
\r
532 jnz L_invalid_distance_code
\r
540 mov eax, [edx+eax*4]
\r
552 jb L_invalid_distance_too_far
\r
555 cmp dword ptr [esp+48],0
\r
556 jne L_wrap_around_window
\r
580 L_wrap_around_window:
\r
584 jbe L_contiguous_in_window
\r
609 L_contiguous_in_window:
\r
643 movd mm4,dword ptr [esp+0]
\r
645 movd mm5,dword ptr [esp+4]
\r
656 ja L_get_length_code_mmx
\r
659 movd mm7,dword ptr [esi]
\r
665 L_get_length_code_mmx:
\r
669 mov eax, [ebx+eax*4]
\r
677 jnz L_test_for_length_base_mmx
\r
692 L_test_for_length_base_mmx:
\r
698 jz L_test_for_second_level_length_mmx
\r
700 jz L_decode_distance_mmx
\r
706 and ecx, [inflate_fast_mask+eax*4]
\r
709 L_decode_distance_mmx:
\r
713 ja L_get_dist_code_mmx
\r
716 movd mm7,dword ptr [esi]
\r
722 L_get_dist_code_mmx:
\r
727 mov eax, [ebx+eax*4]
\r
738 jz L_test_for_second_level_dist_mmx
\r
740 jz L_check_dist_one_mmx
\r
742 L_add_bits_to_dist_mmx:
\r
747 and ecx, [inflate_fast_mask+eax*4]
\r
750 L_check_window_mmx:
\r
756 jb L_clip_window_mmx
\r
775 jmp L_while_test_mmx
\r
778 L_check_dist_one_mmx:
\r
780 jne L_check_window_mmx
\r
782 je L_check_window_mmx
\r
796 jmp L_while_test_mmx
\r
799 L_test_for_second_level_length_mmx:
\r
801 jnz L_test_for_end_of_block
\r
806 and ecx, [inflate_fast_mask+eax*4]
\r
808 mov eax, [ebx+ecx*4]
\r
812 L_test_for_second_level_dist_mmx:
\r
814 jnz L_invalid_distance_code
\r
819 and ecx, [inflate_fast_mask+eax*4]
\r
822 mov eax, [eax+ecx*4]
\r
834 jb L_invalid_distance_too_far
\r
837 cmp dword ptr [esp+48],0
\r
838 jne L_wrap_around_window_mmx
\r
861 L_wrap_around_window_mmx:
\r
865 jbe L_contiguous_in_window_mmx
\r
889 L_contiguous_in_window_mmx:
\r
911 jmp L_while_test_mmx
\r
913 L_invalid_distance_code:
\r
919 mov ecx, invalid_distance_code_msg
\r
920 mov edx,INFLATE_MODE_BAD
\r
921 jmp L_update_stream_state
\r
923 L_test_for_end_of_block:
\r
930 jz L_invalid_literal_length_code
\r
933 mov edx,INFLATE_MODE_TYPE
\r
934 jmp L_update_stream_state
\r
936 L_invalid_literal_length_code:
\r
942 mov ecx, invalid_literal_length_code_msg
\r
943 mov edx,INFLATE_MODE_BAD
\r
944 jmp L_update_stream_state
\r
946 L_invalid_distance_too_far:
\r
951 mov ecx, invalid_distance_too_far_msg
\r
952 mov edx,INFLATE_MODE_BAD
\r
953 jmp L_update_stream_state
\r
955 L_update_stream_state:
\r
963 mov [eax+mode_state],edx
\r
969 cmp dword ptr [inflate_fast_use_mmx],2
\r
970 jne L_update_next_in
\r
986 mov [edx+bits_state],ebx
\r
1012 cmp dword ptr [inflate_fast_use_mmx],2
\r
1027 mov [edx+hold_state],ebp
\r
1034 jbe L_last_is_smaller
\r
1040 L_last_is_smaller:
\r
1053 jbe L_end_is_smaller
\r
1077 _inflate_fast endp
\r