wok view syslinux/stuff/iso2exe/unlzma.S @ rev 17491

syslinux/iso2exe: speedup crc
author Pascal Bellard <pascal.bellard@slitaz.org>
date Fri Jan 02 21:46:16 2015 +0100 (2015-01-02)
parents
children
line source
1 // #define RC_NORMALIZE if (Range < kTopValue) { Range <<= 8; Code = (Code << 8) | RC_READ_BYTE; }
2 //
3 // #define IfBit0(p) RC_NORMALIZE; bound = (Range >> kNumBitModelTotalBits) * *(p); if (Code < bound)
4 // #define UpdateBit0(p) Range = bound; *(p) += (kBitModelTotal - *(p)) >> kNumMoveBits;
5 // #define UpdateBit1(p) Range -= bound; Code -= bound; *(p) -= (*(p)) >> kNumMoveBits;
6 //
7 //#define RC_GET_BIT2(p, mi, A0, A1) IfBit0(p) \
8 // { UpdateBit0(p); mi <<= 1; A0; } else \
9 // { UpdateBit1(p); mi = (mi + mi) + 1; A1; }
10 //
11 // #define RC_GET_BIT(p, mi) RC_GET_BIT2(p, mi, ; , ;)
12 //
13 // #define RangeDecoderBitTreeDecode(probs, numLevels, res) \
14 // { int i = numLevels; res = 1; \
15 // do { CProb *p = probs + res; RC_GET_BIT(p, res) } while(--i != 0); \
16 // res -= (1 << numLevels); }
17 /*
18 * Compression with : lzma e src dst -eos -pb2 -lp0 -lc3
19 */
21 #define PROP_PB 2
22 #define PROP_LP 0
23 #define PROP_LC 3
24 #define PROPS (PROP_LC+(PROP_LP*9)+(PROP_PB*45))
26 // static const Byte *Buffer;
27 // static UInt32 bound, Code, Range;
29 /*
30 * Buffer register DS:SI
31 * all var based ws=ss:bp
32 */
34 rep0 = -4 // long
35 rep1 = rep0-4 // long
36 rep2 = rep0-8 // long
37 rep3 = rep0-12 // long
38 state = -17 // byte, 0..11
39 posState = state-1 // byte, 0..15
40 posState2 = posState-1 // byte, 0..15
41 scratched = rep0-16 // byte = 1
42 Code = -24 // long
43 outStream = -28 // long
44 nowPos = outStream // long
45 Range = Code-8 // long
46 #define LOCALS 32
48 // int LzmaDecode(CLzmaDecoderState *vs,
49 // const unsigned char *inStream,
50 // unsigned char *outStream)
51 // {
52 // CProb *p = vs->Probs;
53 // SizeT nowPos = 0;
54 // #define posStateMask = (1 << (vs->Properties.pb)) - 1;
55 // #define literalPosMask = (1 << (vs->Properties.lp)) - 1;
56 // int lc = vs->Properties.lc, state = 0, len = 0;
57 // UInt32 rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
58 //
59 // {
60 // UInt32 i, numProbs = Literal /*1846*/
61 // + ((UInt32)LZMA_LIT_SIZE /*768*/ << (lc + vs->Properties.lp));
62 // for (i = 0; i < numProbs; i++) p[i] = kBitModelTotal /*2048*/ >> 1;
64 #define WS (1846+(768<<(PROP_LC+PROP_LP)))
65 #if (WS+WS+LOCALS) >= 65000
66 /* MAX WS = (1846+(768<<(8+4))) > 3MB! */
67 #error invalid (lc,lp,pb) : out of memory
68 #endif
70 ws1 = WS
71 ws2 = ws1*2
72 ws = ws2+LOCALS+15
74 #ifndef FLAT16
75 #define INC incl
76 #else
77 #define INC incw
78 #endif
79 #ifndef FLAT32
80 #define AX %ax
81 #define BX %bx
82 #define CX %cx
83 #define DX %dx
84 #define SI %si
85 #define DI %di
86 #define BP %bp
87 #define SP %sp
88 #define CWD cwd
89 #else
90 #define AX %eax
91 #define BX %ebx
92 #define CX %ecx
93 #define DX %edx
94 #define SI %esi
95 #define DI %edi
96 #define BP %ebp
97 #define SP %esp
98 #define CWD cdq
99 #endif
100 /*
101 * LzmaDecode:
102 #ifndef FLAT32
103 * input ds:si=inStream, es:di=outStream
104 * output outStream[], ds:si, es:di
105 .code 16
106 #else
107 * input esi=inStream, edi=outStream
108 * output outStream[], esi, edi
109 .code 32
110 #endif
111 */
113 mov $ws1, CX
114 lzd1:
115 pushw $2048/2
116 loop lzd1
117 mov SP, BP
118 movb $((LOCALS+3)/4)*2, %cl
119 initlocals:
120 pushl $1
121 loop initlocals
123 #if !defined(FLAT32) && !defined(FLAT16)
124 movb $4, %cl
125 movw %es, %bx
126 shrw %cl, %bx
127 movw %es, %dx
128 shlw %cl, %dx
129 addw %dx, %di
130 movw %di, outStream(%bp)
131 adcb %bh, outStream+2(%bp)
132 incw %cx
133 #else
134 movb $5, %cl
135 mov DI, outStream(BP)
136 #endif
138 // Byte previousByte = 0;
139 xor BX, BX
141 // #define RC_INIT(buffer)
142 // Buffer = buffer; Code = 0; Range = 0xFFFFFFFF;
143 // { int i; for(i=0; i<5; i++) { Code = (Code<<8) | RC_READ_BYTE; }}
144 // }
145 // RC_INIT(inStream);
147 add $13, SI // skip header
148 setrep:
149 call RC_LOAD_BYTE
150 decb Range(BP)
151 loop setrep
153 lzdmainlp:
154 // while(1) {
155 // CProb *prob;
156 // int posState = (int)((nowPos) & posStateMask);
157 //
158 // prob = p + IsMatch /*0*/ + (state << kNumPosBitsMax /*4*/) + posState;
159 // if (Bit0(prob)) { /* char */
161 xor DX, DX
162 call Bit1state // Bit1(dx + (state << kNumPosBitsMax /*4*/) + posState)
163 mov $state, DI
164 jc lzdstring
166 // prob = p + Literal /*1846*/ + (LZMA_LIT_SIZE /*768*/ *
167 // ((((nowPos) & literalPosMask) << lc) + (previousByte >> (8 - lc))));
169 #if PROP_LC != 0
170 shrb $8-PROP_LC, %bl
171 #endif
173 #if PROP_LP != 0
174 movb posState2(BP), %dl
175 shl $PROP_LC, DX
176 movb $0, %bh
177 add BX, DX
178 #endif
180 movb $3, %ah
181 mul BX // dx = 3*bh
182 add $1846, AX
184 // int symbol = 1;
186 CWD
187 inc DX // symbol = 1
188 xchg AX, CX // save prob
190 // if (state >= kNumLitStates /*7*/) { /* previous was string */
191 // if (state < 4) state = 0;
193 lzd6z:
194 subb $3, (BP, DI)
196 // if (state < 4) state = 0;
198 jnc lzd6
199 movb %dh, (BP, DI) // %dh = 0
201 lzd6:
202 // else if (state < 10) state -= 3;
204 cmpb $10-3, (BP, DI)
206 // else state -= 6;
208 jnb lzd6z
209 cmpb $7-3-1, (BP, DI)
210 jbe lzd3
212 // int matchByte = outStream[nowPos - rep0];
214 call DicoRep02ESDI // %bl = outStream[nowPos - rep0];
216 // do {
217 // int bit;
218 // CProb *probLit;
219 // matchByte <<= 1; bit = (matchByte & 0x100);
221 movb $1, %bh
222 lzd4:
223 shlb $1, %bl // matchByte <<= 1
224 sbb DI, DI // save bit=C
226 // probLit = prob + 0x100 + bit + symbol;
228 mov CX, AX // restore prob
229 adcb %bh, %ah // + bit + 0x100
231 // RC_GET_BIT2(probLit, symbol, if (bit) break, if (!bit) break)
233 call Bit1axdx // C,%ax = Bit1(prob+%ax)
234 rclb $1, %dl // symbol <<= 1; symbol |= C
235 jc lzd5 // if symbol >= 0x100
236 cmp DI, AX
237 jz lzd4 // if bit == Bit1(prob+%ax)
239 // } while (symbol < 0x100);
240 // }
241 lzd3:
242 // while (symbol < 0x100) {
243 // CProb *probLit = prob + symbol;
244 // RC_GET_BIT(probLit, symbol)
245 // }
247 xor BX, BX
248 jmp lzd4
249 lzd5:
251 // outStream[nowPos++] = previousByte = (Byte)symbol;
253 xchg AX, DX
254 call outchar // %bl = outStream[nowPos++] = %al;
255 jmp lzdmainlp
257 // }
259 lzdstring:
260 mov $1, CX
262 // else { /* string */
263 // prob = p + IsRep /*192*/ + state;
265 movb $192, %dl
266 addb (BP, DI), %dl
267 mov $rep0, DI
269 // if (Bit0(prob)) {
271 call Bit1dx // Bit1(prob)
272 jc lzd8
274 // rep3 = rep2; rep2 = rep1; rep1 = rep0;
275 // state = (state < kNumLitStates /*7*/) ? 0 : 3;
277 stc
279 // prob = p + LenCoder /*818*/;
281 mov $818, DX
283 // }
285 jmp lzd11a
287 // else {
288 lzd8:
289 // prob += kNumStates /*12*/;
290 // if (Bit0(prob)) {
291 call Bit1dx12 // prob += 12; Bit1(prob)
292 jc lzd11
293 // prob = p + IsRep0Long /*240*/ + (state << kNumPosBitsMax /*4*/)
294 // + posState;
295 movb $240, %dl // dh=0
297 // if (Bit0(prob)) {
299 call Bit1state // Bit1(dx + (state << kNumPosBitsMax /*4*/) + posState)
300 jc lzd12
302 // // if (nowPos == 0) return LZMA_RESULT_DATA_ERROR;
303 // state = (state < kNumLitStates /*7*/) ? 9 : 11;
305 movb $9, %dl
307 // len++; goto string;
308 jmp lzd13string // ax = 0
309 // }
310 // }
311 // else {
312 lzd11:
313 // UInt32 distance = rep1;
314 // prob += kNumStates /*12*/;
315 // if (!Bit0(prob)) {
317 call Bit1dx12 // prob += 12; Bit1(prob)
318 jnc lzd11z
320 // prob += kNumStates /*12*/;
321 // if (Bit0(prob)) distance = rep2;
323 call Bit1dx12 // prob += 12; Bit1(prob)
324 lzd11a:
325 adcb %cl, %cl
327 // else { distance = rep3; rep3 = rep2; }
328 // rep2 = rep1;
329 // }
330 // rep1 = rep0; rep0 = distance;
332 lzd11z:
333 shl $2, CX // 8->32 bits
334 sub CX, DI // &rep[cx]
335 movl (BP, DI), %eax
336 rotreplp:
337 movb 4(BP, DI), %bl
338 movb %bl, (BP, DI)
339 inc DI
340 loop rotreplp
341 testb %dh, %dh
342 jnz lzd10
343 movl %eax, (BP, DI)
345 // }
346 lzd12:
347 // state = (state < kNumLitStates /*7*/) ? 8 : 11;
349 movb $0x08, %cl
351 // prob = p + RepLenCoder /*1332*/;
353 mov $1332, DX
355 // }
356 lzd10:
357 push CX // CX = 0
359 // { /* get len */
360 // int numBits, offset;
361 // CProb *probLen = prob + LenChoice /*0*/;
362 // numBits = kLenNumLowBits /*3*/;
364 movb $8, %cl // numBits : 3,3,8
366 // if (Bit0(probLen)) {
368 call Bit1dx // Bit1(prob)
369 xchg AX, BX
370 inc DX
371 jnc lzd15 // bx=0
373 // probLen = prob + LenLow/*2*/ + (posState << kLenNumLowBits/*3*/);
374 // offset = 0;
375 // }
376 // else {
377 // probLen = prob + LenChoice2 /*1*/;
379 call Bit1dx // Bit1(prob)
380 add AX, BX
382 #if PROP_PB != 0
383 inc AX // ah=0
384 #endif
385 jc lzd16 // %ax=0, %bx=-2
386 lzd15:
387 #if PROP_PB != 0
388 movb $8, %al
389 mulb posState(BP)
390 #endif
392 // if (Bit0(probLen)) {
393 // probLen = prob + LenMid/*130*/ + (posState << kLenNumMidBits/*3*/);
395 movb $3, %cl // numBits : 3,3,8
396 lzd16:
397 #if PROP_PB != 0
398 add $2-128-1, AX // probLen : 2,130,258
399 #else
400 mov $2-128-1, AX // probLen : 2,130,258
401 #endif
402 add DX, AX
403 mov $-8+1, DX // offset : 0,8,16
404 lzdargslp:
405 add $8, DX
406 add $128, AX
407 inc BX
408 jle lzdargslp // leave with bx=1
410 // offset = kLenNumLowSymbols /*8*/;
411 // //numBits = kLenNumMidBits /*3*/;
412 // }
413 // else {
414 // probLen = prob + LenHigh /*258*/;
415 // offset = kLenNumLowSymbols /*8*/ + kLenNumMidSymbols /*8*/;
416 // numBits = kLenNumHighBits /*8*/;
417 // }
418 // }
419 // RangeDecoderBitTreeDecode(probLen, numBits, len); len += offset;
421 push DX
422 call RangeDecoder // %ax=probs, %cx=numLevels, %ax=res
423 pop DX
424 add DX, AX // offset
425 pop DX // 0
426 lzd13string:
427 push AX
429 // state = (state < kNumLitStates /*7*/) ? dl : dl|3;
431 movb $7, %cl
432 cmpb %cl, state(BP)
433 jb new_state
434 orb $3, %dl
435 new_state:
436 movb %dl, state(BP)
438 // } /* get len */
439 // if (state < 4) {
441 cmpb $4-1, %dl
442 ja lzd19
444 // int posSlot;
445 // state += kNumLitStates /*7*/;
447 addb %cl, state(BP)
449 // prob = p + PosSlot /*432*/ + (((len < kNumLenToPosStates /*4*/) ?
450 // len : kNumLenToPosStates - 1) << kNumPosSlotBits /*6*/);
452 cmp $4+1, AX
453 jb lzd21
454 mov $3+1, AX
456 lzd21:
458 dec CX // cx = 6
459 shl %cl, AX
460 add $432-64, AX
462 // RangeDecoderBitTreeDecode(prob, kNumPosSlotBits /*6*/, posSlot);
464 call RangeDecoder // %ax=probs, %cx=numLevels, %ax=res
466 // if (posSlot >= kStartPosModelIndex /*4*/) {
467 // int numDirectBits = ((posSlot >> 1) - 1);
469 #ifndef FLAT32
470 movw %cx, 2(%bp, %di) // %cx = 0
471 #endif
472 mov AX, (BP, DI)
473 mov AX, CX
474 shrw $1, CX
475 dec CX
476 cmpb $4, %al
477 jb lzd22
479 // rep0 = (2 | ((UInt32)posSlot & 1));
481 andb %bl, (BP, DI) // %bx=1
482 orb $2, (BP, DI)
484 // if (posSlot < kEndPosModelIndex /*14*/) {
486 cmpb $14, %al
487 jnb lzd23
489 // rep0 <<= numDirectBits;
491 neg AX
492 shll %cl, (BP, DI)
493 add (BP, DI), AX
495 // prob = p + SpecPos /*688*/ + rep0 - posSlot - 1;
497 add $687, AX
498 jmp lzd24
500 // }
501 // else {
502 lzd23:
503 // numDirectBits -= kNumAlignBits /*4*/;
504 // do {
505 // RC_NORMALIZE; Range >>= 1; rep0 <<= 1;
506 // if (Code >= Range) { Code -= Range; rep0 |= 1; }
508 lzd23z:
509 call RC_NORMALIZE
510 shrl $1, Range(BP)
511 movl Range(BP), %eax
512 cmpl Code(BP), %eax
513 ja lzd25
514 subl %eax, Code(BP)
515 stc
516 lzd25:
517 rcll $1, (BP, DI)
519 // } while (--numDirectBits != 0);
521 cmpb $4+1, %cl
522 loopne lzd23z
524 // prob = p + Align /* 802 */; numDirectBits = kNumAlignBits /*4*/;
525 // rep0 <<= numDirectBits;
527 shll %cl, (BP, DI)
528 mov $802, AX
529 // }
531 lzd24:
532 call RangeDecoder // %ax=probs, %cx=numLevels, %ax=res
534 // {
535 // int i = 1, mi = 1;
536 // do {
537 // CProb *prob3 = prob + mi;
538 // RC_GET_BIT2(prob3, mi, ; , rep0 |= i);
540 orb %dh, (BP, DI) // update rep0 with DirectBits
542 // i <<= 1;
543 // } while(--numDirectBits != 0);
544 // }
545 // } else rep0 = posSlot;
546 lzd22:
547 // if (++rep0 == (UInt32)(0)) break; /* EOF */
549 incl (BP, DI)
551 lzd19:
552 pop CX
553 jz lzdone
555 // }
556 // len += kMatchMinLen;/*2*/
558 inc CX
560 // string: // if (rep0 > nowPos) return LZMA_RESULT_DATA_ERROR;
561 // do {
562 lzd13z:
563 // previousByte = outStream[nowPos - rep0];
564 // outStream[nowPos++] = previousByte;
566 call outcharDico // %bl = outStream[nowPos++] = outStream[nowPos - rep0]
568 // } while(--len != 0);
570 loop lzd13z
572 // } /* char/string */
573 // }
575 jmp lzdmainlp
577 lzdone:
578 // //RC_NORMALIZE;
579 // //*inSizeProcessed = (SizeT)(Buffer - inStream); *outSizeProcessed = nowPos;
580 // return LZMA_RESULT_OK;
581 call Dico2ESDI // set es & di (rep0 = 0)
582 lea ws2(BP), SP // dealloc
583 ret
584 // }
586 // al = outStream[nowPos - rep0];
588 /*
589 * output es:di, al
590 * scratch bh, cl, flags
591 */
593 DicoRep02ESDI:
594 stc
596 // bl = outStream[nowPos];
598 /*
599 * output es:di, bl
600 * scratch bh, cl, flags
601 */
603 Dico2ESDI:
604 #if !defined(FLAT32) && !defined(FLAT16)
605 movl nowPos(%bp), %ebx
606 jnc Dico2ESDIz
607 subl rep0(%bp), %ebx
608 Dico2ESDIz:
609 movw %bx, %di
610 xorw %bx, %bx
611 shrl $4, %ebx
612 movw %bx, %es
613 movb %es:(%di), %bl
614 #else
615 mov nowPos(BP), DI
616 jnc Dico2ESDIz
617 sub rep0(BP), DI
618 Dico2ESDIz:
619 movb (DI), %bl
620 #endif
621 ret
623 outcharDico:
625 // bl = outStream[nowPos++] = outStream[nowPos - rep0]
627 /*
628 * output es:di, bl
629 * update nowPos
630 * scratch ax, dx, bh, cl, flags
631 */
633 call DicoRep02ESDI // %bl = outStream[nowPos - rep0]
634 xchg AX, BX
635 outchar:
637 // bl = outStream[nowPos++] = previousByte = al;
639 /*
640 * output bl
641 * update nowPos
642 * scratch ax, dx, bh, di, cl, flags
643 */
645 clc
646 call Dico2ESDI
647 stosb
648 xchg AX, BX // previous byte
650 // int posState = (int)((nowPos) & posStateMask);
652 #if PROP_PB != 0 && PROP_LP != 0
653 addw $0x0101, posState2(BP)
654 andb $(((1 << PROP_PB) -1)<<8)+((1 << PROP_LP) -1), posState2(BP)
655 #else
656 # if PROP_PB != 0
657 incb posState(BP)
658 andb $((1 << PROP_PB) -1), posState(BP)
659 # endif
660 # if PROP_LP != 0
661 incb posState2(BP)
662 andb $((1 << PROP_LP) -1), posState2(BP)
663 # endif
664 #endif
665 INC nowPos(BP)
666 ret
668 //
669 // #define RC_NORMALIZE if (Range < kTopValue)
670 // { Range <<= 8; Code = (Code << 8) | RC_READ_BYTE; }
672 /*
673 * update Range, Code, ds:si
674 * scratch flags
675 */
677 RC_NORMALIZE:
678 cmpb $0, Range+3(BP)
679 jne RC_NORMALIZE_1
680 RC_LOAD_BYTE:
681 push AX
682 shll $8, Range(BP)
683 shll $8, Code(BP)
684 #if !defined(FLAT32) && !defined(FLAT16)
685 testw %si, %si
686 jns RC_READ_BYTE
687 movw %ds, %ax
688 incw %ax
689 movw %ax, %ds
690 addw $-16, %si
691 RC_READ_BYTE:
692 #endif
693 lodsb
694 movb %al, Code(BP)
695 pop AX
696 RC_NORMALIZE_1:
697 ret
699 // Bit1(dx + (state << kNumPosBitsMax /*4*/) + posState)
701 Bit1state:
702 movb $16, %al
703 mulb state(BP)
704 # if PROP_PB != 0
705 addb posState(BP), %al
706 # endif
707 Bit1axdx:
708 add DX, AX
709 jmp Bit1
711 // prob += 12; Bit1(prob)
713 Bit1dx12:
714 add $12, DX
715 Bit1dx:
716 mov DX, AX
718 // static int Bit1(CProb *p)
720 Bit1:
721 /*
722 * input ax=p
723 * output C, ax
724 * update bound, Range, Code, ds:si
725 * scratch flags
726 */
728 // {
729 // RC_NORMALIZE;
731 call RC_NORMALIZE // kill %ax, update %si
733 pushal
735 xchg AX, DI
736 add DI, DI // short *
739 // bound = (Range>>kNumBitModelTotalBits /*11*/) * *(p);
741 movl Range(BP), %eax
742 shrl $11, %eax
743 movzwl (BP, DI), %edx
744 mull %edx
746 // if (Code < bound) {
748 cmpl Code(BP), %eax
749 jbe Bit1_1
751 // Range = bound;
753 movl %eax, Range(BP)
755 // *(p) += (kBitModelTotal /*2048*/ - *(p)) >> kNumMoveBits /*5*/;
757 movw $2048, %ax
759 // return 0;
761 jmp Bit1_2
763 // }
764 // else {
766 Bit1_1:
768 // Range -= bound; Code -= bound;
770 subl %eax, Range(BP)
771 subl %eax, Code(BP)
773 // *(p) -= (*(p)) >> kNumMoveBits /*5*/;
775 movw $31, %ax
777 // return 1;
779 stc
780 Bit1_2:
781 pushf
782 subw (BP, DI), %ax
783 sarw $5, %ax
784 addw %ax, (BP, DI)
785 popf
786 popal
787 sbb AX, AX
789 // }
790 // }
792 ret
794 RangeDecoder:
796 /*
797 * input ax=probs cx=numLevels (< 8) bx=1
798 * output ax=res (backward), dh (forward)
799 * update bound, Range, Code, ds:si
800 * scratch flags, cx=0, dl
801 */
803 push BX
805 // { int i = numLevels; res = 1;
806 mov BX, DX // res = 1
808 // do { CProb *p = probs + res; RC_GET_BIT(p, res) } while(--i != 0);
810 RangeDecoder_1:
811 push AX
812 call Bit1axdx // C,%ax = Bit1(prob+%ax)
813 rclb $1, %dl // res <<= 1; res |= C
814 andb %bl, %al // current bit
815 orb %al, %bh // store in bh
816 shlb $1, %bl // update max
817 pop AX
818 loop RangeDecoder_1
820 // res -= (1 << numLevels); }
822 xchg AX, BX // move bh to dh
823 xchg AX, DX // and dl to al
824 sub %dl, %al // sub max
825 pop BX
826 ret