wok diff syslinux/stuff/iso2exe/unlzma.S @ rev 13713

syslinux/iso2exe: add loram support
author Pascal Bellard <pascal.bellard@slitaz.org>
date Tue Dec 18 16:09:07 2012 +0100 (2012-12-18)
parents
children
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/syslinux/stuff/iso2exe/unlzma.S	Tue Dec 18 16:09:07 2012 +0100
     1.3 @@ -0,0 +1,826 @@
     1.4 +// #define RC_NORMALIZE if (Range < kTopValue) { Range <<= 8; Code = (Code << 8) | RC_READ_BYTE; }
     1.5 +//
     1.6 +// #define IfBit0(p) RC_NORMALIZE; bound = (Range >> kNumBitModelTotalBits) * *(p); if (Code < bound)
     1.7 +// #define UpdateBit0(p) Range = bound; *(p) += (kBitModelTotal - *(p)) >> kNumMoveBits;
     1.8 +// #define UpdateBit1(p) Range -= bound; Code -= bound; *(p) -= (*(p)) >> kNumMoveBits;
     1.9 +//
    1.10 +//#define RC_GET_BIT2(p, mi, A0, A1) IfBit0(p) \
    1.11 +//  { UpdateBit0(p); mi <<= 1; A0; } else \
    1.12 +//  { UpdateBit1(p); mi = (mi + mi) + 1; A1; }
    1.13 +//
    1.14 +// #define RC_GET_BIT(p, mi) RC_GET_BIT2(p, mi, ; , ;)
    1.15 +//
    1.16 +// #define RangeDecoderBitTreeDecode(probs, numLevels, res) \
    1.17 +//  { int i = numLevels; res = 1; \
    1.18 +//  do { CProb *p = probs + res; RC_GET_BIT(p, res) } while(--i != 0); \
    1.19 +//  res -= (1 << numLevels); }
    1.20 +/*
    1.21 + * Compression with : lzma e src dst -eos -pb2 -lp0 -lc3
    1.22 + */
    1.23 +
    1.24 +#define PROP_PB 2
    1.25 +#define PROP_LP 0
    1.26 +#define PROP_LC 3
    1.27 +#define PROPS (PROP_LC+(PROP_LP*9)+(PROP_PB*45))
    1.28 +
    1.29 +// static const Byte *Buffer;
    1.30 +// static UInt32 bound, Code, Range;
    1.31 +
    1.32 +/*
    1.33 + * Buffer register DS:SI
    1.34 + * all var based ws=ss:bp
    1.35 + */
    1.36 +
    1.37 +rep0		=	-4		// long
    1.38 +rep1		=	rep0-4		// long
    1.39 +rep2		=	rep0-8		// long
    1.40 +rep3		=	rep0-12		// long
    1.41 +state		=	-17		// byte, 0..11
    1.42 +posState 	=	state-1		// byte, 0..15
    1.43 +posState2 	=	posState-1	// byte, 0..15
    1.44 +scratched	=	rep0-16		// byte = 1
    1.45 +Code		=	-24		// long
    1.46 +outStream	=	-28		// long
    1.47 +nowPos		=	outStream	// long
    1.48 +Range		=	Code-8		// long
    1.49 +#define LOCALS		32
    1.50 +
    1.51 +// int LzmaDecode(CLzmaDecoderState *vs,
    1.52 +//     const unsigned char *inStream, 
    1.53 +//     unsigned char *outStream)
    1.54 +// {
    1.55 +//   CProb *p = vs->Probs;
    1.56 +//   SizeT nowPos = 0;
    1.57 +//   #define posStateMask = (1 << (vs->Properties.pb)) - 1;
    1.58 +//   #define literalPosMask = (1 << (vs->Properties.lp)) - 1;
    1.59 +//   int lc = vs->Properties.lc, state = 0, len = 0;
    1.60 +//   UInt32 rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1;
    1.61 +// 
    1.62 +//   {
    1.63 +//     UInt32 i, numProbs = Literal /*1846*/
    1.64 +// 	    + ((UInt32)LZMA_LIT_SIZE /*768*/ << (lc + vs->Properties.lp));
    1.65 +//     for (i = 0; i < numProbs; i++) p[i] = kBitModelTotal /*2048*/ >> 1;
    1.66 +
    1.67 +#define WS (1846+(768<<(PROP_LC+PROP_LP)))
    1.68 +#if (WS+WS+LOCALS) >= 65000
    1.69 +/* MAX WS = (1846+(768<<(8+4))) > 3MB! */
    1.70 +#error invalid (lc,lp,pb) : out of memory
    1.71 +#endif
    1.72 +
    1.73 +ws1	=	WS
    1.74 +ws2	=	ws1*2
    1.75 +ws	=	ws2+LOCALS+15
    1.76 +
    1.77 +#ifndef FLAT16
    1.78 +#define INC	incl
    1.79 +#else
    1.80 +#define INC	incw
    1.81 +#endif
    1.82 +#ifndef FLAT32
    1.83 +#define	AX	%ax
    1.84 +#define	BX	%bx
    1.85 +#define	CX	%cx
    1.86 +#define	DX	%dx
    1.87 +#define	SI	%si
    1.88 +#define	DI	%di
    1.89 +#define	BP	%bp
    1.90 +#define	SP	%sp
    1.91 +#define CWD	cwd
    1.92 +#else
    1.93 +#define	AX	%eax
    1.94 +#define	BX	%ebx
    1.95 +#define	CX	%ecx
    1.96 +#define	DX	%edx
    1.97 +#define	SI	%esi
    1.98 +#define	DI	%edi
    1.99 +#define	BP	%ebp
   1.100 +#define	SP	%esp
   1.101 +#define CWD	cdq
   1.102 +#endif
   1.103 +/*
   1.104 + * LzmaDecode:
   1.105 +#ifndef FLAT32
   1.106 + *   input   ds:si=inStream, es:di=outStream
   1.107 + *   output  outStream[], ds:si, es:di
   1.108 + 	.code 16
   1.109 +#else
   1.110 + *   input   esi=inStream, edi=outStream
   1.111 + *   output  outStream[], esi, edi
   1.112 + 	.code 32
   1.113 +#endif
   1.114 + */
   1.115 + 
   1.116 +	mov	$ws1, CX
   1.117 +lzd1:
   1.118 +	pushw	$2048/2
   1.119 +	loop	lzd1
   1.120 +	mov	SP, BP
   1.121 +	movb	$((LOCALS+3)/4)*2, %cl
   1.122 +initlocals:
   1.123 +	pushl	$1
   1.124 +	loop	initlocals
   1.125 +
   1.126 +#if !defined(FLAT32) && !defined(FLAT16)
   1.127 +	movb	$4, %cl
   1.128 +	movw	%es, %bx
   1.129 +	shrw	%cl, %bx
   1.130 +	movw	%es, %dx
   1.131 +	shlw	%cl, %dx
   1.132 +	addw	%dx, %di
   1.133 +	movw	%di, outStream(%bp)
   1.134 +	adcb	%bh, outStream+2(%bp)
   1.135 +	incw	%cx
   1.136 +#else
   1.137 +	movb	$5, %cl
   1.138 +	mov	DI, outStream(BP)
   1.139 +#endif
   1.140 +
   1.141 +//   Byte previousByte = 0;
   1.142 +	xor	BX, BX
   1.143 +
   1.144 +// #define RC_INIT(buffer) 
   1.145 +//    Buffer = buffer; Code = 0; Range = 0xFFFFFFFF; 
   1.146 +//    { int i; for(i=0; i<5; i++) { Code = (Code<<8) | RC_READ_BYTE; }}
   1.147 +//   }
   1.148 +//   RC_INIT(inStream);
   1.149 +
   1.150 +	add	$13, SI		// skip header
   1.151 +setrep:
   1.152 +	call	RC_LOAD_BYTE
   1.153 +	decb	Range(BP)
   1.154 +	loop	setrep
   1.155 +
   1.156 +lzdmainlp:
   1.157 +//   while(1) {
   1.158 +//     CProb *prob;
   1.159 +//     int posState = (int)((nowPos) & posStateMask);
   1.160 +// 
   1.161 +//     prob = p + IsMatch /*0*/ + (state << kNumPosBitsMax /*4*/) + posState;
   1.162 +//     if (Bit0(prob)) { /* char */
   1.163 +
   1.164 +	xor	DX, DX
   1.165 +	call	Bit1state	// Bit1(dx + (state << kNumPosBitsMax /*4*/) + posState)
   1.166 +	mov	$state, DI
   1.167 +	jc	lzdstring
   1.168 +
   1.169 +//       prob = p + Literal /*1846*/ + (LZMA_LIT_SIZE /*768*/ * 
   1.170 +// 	((((nowPos) & literalPosMask) << lc) + (previousByte >> (8 - lc))));
   1.171 +
   1.172 +#if PROP_LC != 0
   1.173 +	shrb	$8-PROP_LC, %bl
   1.174 +#endif
   1.175 +
   1.176 +#if PROP_LP != 0
   1.177 +	movb	posState2(BP), %dl
   1.178 +	shl	$PROP_LC, DX
   1.179 +	movb	$0, %bh
   1.180 +	add	BX, DX
   1.181 +#endif
   1.182 +
   1.183 +	movb	$3, %ah
   1.184 +	mul	BX		// dx = 3*bh
   1.185 +	add	$1846, AX
   1.186 +
   1.187 +//       int symbol = 1;
   1.188 +
   1.189 +	CWD
   1.190 +	inc	DX		// symbol = 1
   1.191 +	xchg	AX, CX		// save prob
   1.192 +
   1.193 +//       if (state >= kNumLitStates /*7*/) { /* previous was string */
   1.194 +//       if (state < 4) state = 0;
   1.195 +
   1.196 +lzd6z:
   1.197 +	subb	$3, (BP, DI)
   1.198 +
   1.199 +//       if (state < 4) state = 0;
   1.200 +
   1.201 +	jnc	lzd6
   1.202 +	movb	%dh, (BP, DI)	// %dh = 0
   1.203 +
   1.204 +lzd6:
   1.205 +//       else if (state < 10) state -= 3;
   1.206 +
   1.207 +	cmpb	$10-3, (BP, DI)
   1.208 +	
   1.209 +//       else state -= 6;
   1.210 +
   1.211 +	jnb	lzd6z
   1.212 +	cmpb	$7-3-1, (BP, DI)
   1.213 +	jbe	lzd3
   1.214 +	
   1.215 +//         int matchByte = outStream[nowPos - rep0];
   1.216 +
   1.217 +	call	DicoRep02ESDI	// %bl = outStream[nowPos - rep0];
   1.218 +	
   1.219 +//         do {
   1.220 +//           int bit;
   1.221 +//           CProb *probLit;
   1.222 +//           matchByte <<= 1; bit = (matchByte & 0x100);
   1.223 +
   1.224 +	movb	$1, %bh
   1.225 +lzd4:
   1.226 +	shlb	$1, %bl			// matchByte <<= 1
   1.227 +	sbb	DI, DI			// save bit=C
   1.228 +
   1.229 +//           probLit = prob + 0x100 + bit + symbol;
   1.230 +
   1.231 +	mov	CX, AX			// restore prob
   1.232 +	adcb	%bh, %ah		// + bit + 0x100
   1.233 +	
   1.234 +//           RC_GET_BIT2(probLit, symbol, if (bit) break, if (!bit) break)
   1.235 +
   1.236 +	call	Bit1axdx		// C,%ax = Bit1(prob+%ax)
   1.237 +	rclb	$1, %dl			// symbol <<= 1; symbol |= C
   1.238 +	jc	lzd5			// if symbol >= 0x100
   1.239 +	cmp	DI, AX
   1.240 +	jz	lzd4			// if bit == Bit1(prob+%ax)
   1.241 +
   1.242 +//         } while (symbol < 0x100);
   1.243 +//       }
   1.244 +lzd3:
   1.245 +//       while (symbol < 0x100) {
   1.246 +//         CProb *probLit = prob + symbol;
   1.247 +//         RC_GET_BIT(probLit, symbol)
   1.248 +//       }
   1.249 +
   1.250 +	xor	BX, BX
   1.251 +	jmp	lzd4
   1.252 +lzd5:
   1.253 +
   1.254 +//       outStream[nowPos++] = previousByte = (Byte)symbol;
   1.255 +
   1.256 +	xchg	AX, DX
   1.257 +	call	outchar		// %bl = outStream[nowPos++] = %al;
   1.258 +	jmp	lzdmainlp
   1.259 +
   1.260 +//     }
   1.261 +
   1.262 +lzdstring:
   1.263 +	mov	$1, CX
   1.264 +
   1.265 +//     else { /* string */
   1.266 +//       prob = p + IsRep /*192*/ + state;
   1.267 +
   1.268 +	movb	$192, %dl
   1.269 +	addb	(BP, DI), %dl
   1.270 +	mov	$rep0, DI
   1.271 +
   1.272 +//       if (Bit0(prob)) {
   1.273 +
   1.274 +	call	Bit1dx		// Bit1(prob)
   1.275 + 	jc	lzd8
   1.276 +
   1.277 +//         rep3 = rep2; rep2 = rep1; rep1 = rep0;
   1.278 +//         state = (state < kNumLitStates /*7*/) ? 0 : 3;
   1.279 +
   1.280 +	stc
   1.281 +
   1.282 +//         prob = p + LenCoder /*818*/;
   1.283 +
   1.284 +	mov	$818, DX
   1.285 +
   1.286 +//       }
   1.287 +
   1.288 +	jmp	lzd11a
   1.289 +
   1.290 +//       else {
   1.291 +lzd8:
   1.292 +//         prob += kNumStates /*12*/;
   1.293 +//         if (Bit0(prob)) {
   1.294 +	call	Bit1dx12	// prob += 12; Bit1(prob)
   1.295 +	jc	lzd11
   1.296 +//           prob = p + IsRep0Long /*240*/ + (state << kNumPosBitsMax /*4*/) 
   1.297 +// 		   + posState;
   1.298 +	movb	$240, %dl	// dh=0
   1.299 +
   1.300 +//           if (Bit0(prob)) {
   1.301 +
   1.302 +	call	Bit1state	// Bit1(dx + (state << kNumPosBitsMax /*4*/) + posState)
   1.303 +	jc	lzd12
   1.304 +
   1.305 +//             // if (nowPos == 0) return LZMA_RESULT_DATA_ERROR;
   1.306 +//             state = (state < kNumLitStates /*7*/) ? 9 : 11;
   1.307 +
   1.308 +	movb	$9, %dl
   1.309 +
   1.310 +//             len++; goto string;
   1.311 +	jmp	lzd13string	// ax = 0
   1.312 +//           }
   1.313 +//         }
   1.314 +//         else {
   1.315 +lzd11:
   1.316 +//           UInt32 distance = rep1;
   1.317 +//           prob += kNumStates /*12*/;
   1.318 +//           if (!Bit0(prob)) {
   1.319 +
   1.320 +	call	Bit1dx12	// prob += 12; Bit1(prob)
   1.321 +	jnc	lzd11z
   1.322 +
   1.323 +//             prob += kNumStates /*12*/;
   1.324 +//             if (Bit0(prob)) distance = rep2;
   1.325 +
   1.326 +	call	Bit1dx12	// prob += 12; Bit1(prob)
   1.327 +lzd11a:
   1.328 +	adcb	%cl, %cl
   1.329 +	
   1.330 +//             else { distance = rep3; rep3 = rep2; }
   1.331 +//             rep2 = rep1;
   1.332 +//           }
   1.333 +//           rep1 = rep0; rep0 = distance;
   1.334 +
   1.335 +lzd11z:
   1.336 +	shl	$2, CX		// 8->32 bits
   1.337 +	sub	CX, DI		// &rep[cx]
   1.338 +	movl	(BP, DI), %eax
   1.339 +rotreplp:
   1.340 +	movb	4(BP, DI), %bl
   1.341 +	movb	%bl, (BP, DI)
   1.342 +	inc	DI
   1.343 +	loop	rotreplp
   1.344 +	testb	%dh, %dh
   1.345 +	jnz	lzd10
   1.346 +	movl	%eax, (BP, DI)
   1.347 +
   1.348 +//         }
   1.349 +lzd12:
   1.350 +//         state = (state < kNumLitStates /*7*/) ? 8 : 11;
   1.351 +
   1.352 +	movb	$0x08, %cl
   1.353 +
   1.354 +//         prob = p + RepLenCoder /*1332*/;
   1.355 +
   1.356 +	mov	$1332, DX
   1.357 +
   1.358 +//       }
   1.359 +lzd10:
   1.360 +	push	CX		// CX = 0
   1.361 +
   1.362 +//       { /* get len */
   1.363 +//         int numBits, offset;
   1.364 +//         CProb *probLen = prob + LenChoice /*0*/;
   1.365 +//         numBits = kLenNumLowBits /*3*/;
   1.366 +
   1.367 +	movb	$8, %cl		// numBits : 3,3,8
   1.368 +
   1.369 +//         if (Bit0(probLen)) {
   1.370 +
   1.371 +	call	Bit1dx		// Bit1(prob)
   1.372 +	xchg	AX, BX
   1.373 +	inc	DX
   1.374 +	jnc	lzd15		// bx=0
   1.375 +
   1.376 +//           probLen = prob + LenLow/*2*/ + (posState << kLenNumLowBits/*3*/);
   1.377 +//           offset = 0;
   1.378 +//         }
   1.379 +//         else {
   1.380 +//           probLen = prob + LenChoice2 /*1*/;
   1.381 +
   1.382 +	call	Bit1dx		// Bit1(prob)
   1.383 +	add	AX, BX
   1.384 +
   1.385 +#if PROP_PB != 0
   1.386 +	inc	AX		// ah=0
   1.387 +#endif
   1.388 +	jc	lzd16		// %ax=0, %bx=-2 
   1.389 +lzd15:
   1.390 +#if PROP_PB != 0
   1.391 +	movb	$8, %al
   1.392 +	mulb	posState(BP)
   1.393 +#endif
   1.394 +
   1.395 +//           if (Bit0(probLen)) {
   1.396 +//             probLen = prob + LenMid/*130*/ + (posState << kLenNumMidBits/*3*/);
   1.397 +
   1.398 +	movb	$3, %cl		// numBits : 3,3,8
   1.399 +lzd16:
   1.400 +#if PROP_PB != 0
   1.401 +	add	$2-128-1, AX	// probLen : 2,130,258
   1.402 +#else
   1.403 +	mov	$2-128-1, AX	// probLen : 2,130,258
   1.404 +#endif
   1.405 +	add	DX, AX
   1.406 +	mov	$-8+1, DX	// offset  : 0,8,16
   1.407 +lzdargslp:
   1.408 +	add	$8, DX
   1.409 +	add	$128, AX
   1.410 +	inc	BX
   1.411 +	jle	lzdargslp	// leave with bx=1
   1.412 +
   1.413 +//             offset = kLenNumLowSymbols /*8*/;
   1.414 +//             //numBits = kLenNumMidBits /*3*/;
   1.415 +//           }
   1.416 +//           else {
   1.417 +//             probLen = prob + LenHigh /*258*/;
   1.418 +//             offset = kLenNumLowSymbols /*8*/ + kLenNumMidSymbols /*8*/;
   1.419 +//             numBits = kLenNumHighBits /*8*/;
   1.420 +//           }
   1.421 +//         }
   1.422 +//         RangeDecoderBitTreeDecode(probLen, numBits, len); len += offset;
   1.423 +
   1.424 +	push	DX
   1.425 +	call	RangeDecoder	// %ax=probs, %cx=numLevels, %ax=res
   1.426 +	pop	DX
   1.427 +	add	DX, AX		// offset
   1.428 +	pop	DX		// 0
   1.429 +lzd13string:
   1.430 +	push	AX
   1.431 +
   1.432 +// state = (state < kNumLitStates /*7*/) ? dl : dl|3;
   1.433 +
   1.434 +	movb	$7, %cl
   1.435 +	cmpb	%cl, state(BP)
   1.436 +	jb	new_state
   1.437 +	orb	$3, %dl
   1.438 +new_state:
   1.439 +	movb	%dl, state(BP)
   1.440 +
   1.441 +//       } /* get len */
   1.442 +//       if (state < 4) {
   1.443 +
   1.444 +	cmpb	$4-1, %dl
   1.445 +	ja	lzd19
   1.446 +
   1.447 +//         int posSlot;
   1.448 +//         state += kNumLitStates /*7*/;
   1.449 +
   1.450 +	addb	%cl, state(BP)
   1.451 +
   1.452 +//         prob = p + PosSlot /*432*/ + (((len < kNumLenToPosStates /*4*/) ? 
   1.453 +// 		len : kNumLenToPosStates - 1) << kNumPosSlotBits /*6*/);
   1.454 +
   1.455 +	cmp	$4+1, AX
   1.456 +	jb	lzd21
   1.457 +	mov	$3+1, AX
   1.458 +
   1.459 +lzd21:
   1.460 +
   1.461 +	dec	CX		// cx = 6
   1.462 +	shl	%cl, AX
   1.463 +	add	$432-64, AX
   1.464 +
   1.465 +//         RangeDecoderBitTreeDecode(prob, kNumPosSlotBits /*6*/, posSlot);
   1.466 +
   1.467 +	call	RangeDecoder	// %ax=probs, %cx=numLevels, %ax=res
   1.468 +
   1.469 +//         if (posSlot >= kStartPosModelIndex /*4*/) {
   1.470 +//           int numDirectBits = ((posSlot >> 1) - 1);
   1.471 +
   1.472 +#ifndef FLAT32
   1.473 +	movw	%cx, 2(%bp, %di)	// %cx = 0
   1.474 +#endif
   1.475 +	mov	AX, (BP, DI)
   1.476 +	mov	AX, CX
   1.477 +	shrw	$1, CX
   1.478 +	dec	CX
   1.479 +	cmpb	$4, %al
   1.480 +	jb	lzd22
   1.481 +
   1.482 +//           rep0 = (2 | ((UInt32)posSlot & 1));
   1.483 +
   1.484 +	andb	%bl, (BP, DI)		// %bx=1
   1.485 +	orb	$2, (BP, DI)
   1.486 +
   1.487 +//           if (posSlot < kEndPosModelIndex /*14*/) {
   1.488 +
   1.489 +	cmpb	$14, %al
   1.490 +	jnb	lzd23
   1.491 +
   1.492 +//             rep0 <<= numDirectBits;
   1.493 +
   1.494 +	neg	AX
   1.495 +	shll	%cl, (BP, DI)
   1.496 +	add	(BP, DI), AX
   1.497 +
   1.498 +//             prob = p + SpecPos /*688*/ + rep0 - posSlot - 1;
   1.499 +
   1.500 +	add	$687, AX
   1.501 +	jmp	lzd24
   1.502 +
   1.503 +//           }
   1.504 +//           else {
   1.505 +lzd23:
   1.506 +//             numDirectBits -= kNumAlignBits /*4*/;
   1.507 +//             do {
   1.508 +//               RC_NORMALIZE; Range >>= 1; rep0 <<= 1;
   1.509 +//               if (Code >= Range) { Code -= Range; rep0 |= 1; }
   1.510 +
   1.511 +lzd23z:
   1.512 +	call	RC_NORMALIZE
   1.513 +	shrl	$1, Range(BP)
   1.514 +	movl	Range(BP), %eax
   1.515 +	cmpl	Code(BP), %eax
   1.516 +	ja	lzd25
   1.517 +	subl	%eax, Code(BP)
   1.518 +	stc
   1.519 +lzd25:
   1.520 +	rcll	$1, (BP, DI)
   1.521 +
   1.522 +//             } while (--numDirectBits != 0);
   1.523 +
   1.524 +	cmpb	$4+1, %cl
   1.525 +	loopne	lzd23z
   1.526 +
   1.527 +//             prob = p + Align /* 802 */; numDirectBits = kNumAlignBits /*4*/;
   1.528 +//             rep0 <<= numDirectBits;
   1.529 +
   1.530 +	shll	%cl, (BP, DI)
   1.531 +	mov	$802, AX
   1.532 +//           }
   1.533 +
   1.534 +lzd24:
   1.535 +	call	RangeDecoder	// %ax=probs, %cx=numLevels, %ax=res
   1.536 +
   1.537 +//           {
   1.538 +//             int i = 1, mi = 1;
   1.539 +//             do {
   1.540 +//               CProb *prob3 = prob + mi;
   1.541 +//               RC_GET_BIT2(prob3, mi, ; , rep0 |= i);
   1.542 +
   1.543 +	orb	%dh, (BP, DI)	// update rep0 with DirectBits
   1.544 +
   1.545 +//               i <<= 1;
   1.546 +//             } while(--numDirectBits != 0);
   1.547 +//           }
   1.548 +//         } else rep0 = posSlot;
   1.549 +lzd22:
   1.550 +//         if (++rep0 == (UInt32)(0)) break; /* EOF */
   1.551 +
   1.552 +	incl	(BP, DI)
   1.553 +
   1.554 +lzd19:
   1.555 +	pop	CX
   1.556 +	jz	lzdone
   1.557 +
   1.558 +//       }
   1.559 +//       len += kMatchMinLen;/*2*/
   1.560 +
   1.561 +	inc	CX
   1.562 +
   1.563 +//     string: // if (rep0 > nowPos) return LZMA_RESULT_DATA_ERROR;
   1.564 +//       do {
   1.565 +lzd13z:
   1.566 +//         previousByte = outStream[nowPos - rep0];
   1.567 +//         outStream[nowPos++] = previousByte;
   1.568 +
   1.569 +	call	outcharDico 	// %bl = outStream[nowPos++] = outStream[nowPos - rep0]
   1.570 +
   1.571 +//       } while(--len != 0);
   1.572 +
   1.573 +	loop	lzd13z
   1.574 +
   1.575 +//     } /* char/string */
   1.576 +//   }
   1.577 +
   1.578 +	jmp	lzdmainlp
   1.579 +
   1.580 +lzdone:
   1.581 +//   //RC_NORMALIZE;
   1.582 +//   //*inSizeProcessed = (SizeT)(Buffer - inStream); *outSizeProcessed = nowPos;
   1.583 +//   return LZMA_RESULT_OK;
   1.584 +	call	Dico2ESDI	// set es & di (rep0 = 0)
   1.585 +	lea	ws2(BP), SP	// dealloc
   1.586 +	ret	
   1.587 +// }
   1.588 +
   1.589 +// al = outStream[nowPos - rep0];
   1.590 +
   1.591 +/*
   1.592 + * output  es:di, al
   1.593 + * scratch bh, cl, flags
   1.594 + */
   1.595 +
   1.596 +DicoRep02ESDI:
   1.597 +	stc
   1.598 +
   1.599 +// bl = outStream[nowPos];
   1.600 +
   1.601 +/*
   1.602 + * output  es:di, bl
   1.603 + * scratch bh, cl, flags
   1.604 + */
   1.605 + 
   1.606 +Dico2ESDI:
   1.607 +#if !defined(FLAT32) && !defined(FLAT16)
   1.608 +	movl	nowPos(%bp), %ebx
   1.609 +	jnc	Dico2ESDIz
   1.610 +	subl	rep0(%bp), %ebx
   1.611 +Dico2ESDIz:
   1.612 +	movw	%bx, %di
   1.613 +	xorw	%bx, %bx
   1.614 +	shrl	$4, %ebx
   1.615 +	movw	%bx, %es
   1.616 +	movb	%es:(%di), %bl
   1.617 +#else
   1.618 +	mov	nowPos(BP), DI
   1.619 +	jnc	Dico2ESDIz
   1.620 +	sub	rep0(BP), DI
   1.621 +Dico2ESDIz:
   1.622 +	movb	(DI), %bl
   1.623 +#endif
   1.624 +	ret
   1.625 +
   1.626 +outcharDico:
   1.627 +
   1.628 +// bl = outStream[nowPos++] = outStream[nowPos - rep0]
   1.629 +
   1.630 +/*
   1.631 + * output  es:di, bl
   1.632 + * update  nowPos
   1.633 + * scratch ax, dx, bh, cl, flags
   1.634 + */
   1.635 +
   1.636 +	call	DicoRep02ESDI	// %bl = outStream[nowPos - rep0]
   1.637 +	xchg	AX, BX
   1.638 +outchar:
   1.639 +
   1.640 +// bl = outStream[nowPos++] = previousByte = al;
   1.641 +
   1.642 +/*
   1.643 + * output  bl
   1.644 + * update  nowPos
   1.645 + * scratch ax, dx, bh, di, cl, flags
   1.646 + */
   1.647 +
   1.648 +	clc
   1.649 +	call	Dico2ESDI
   1.650 +	stosb
   1.651 +	xchg	AX, BX		// previous byte
   1.652 +
   1.653 +//	int posState = (int)((nowPos) & posStateMask);
   1.654 +
   1.655 +#if PROP_PB != 0 && PROP_LP != 0
   1.656 +	addw	$0x0101, posState2(BP)
   1.657 +	andb	$(((1 << PROP_PB) -1)<<8)+((1 << PROP_LP) -1), posState2(BP)
   1.658 +#else
   1.659 +# if PROP_PB != 0
   1.660 +	incb	posState(BP)
   1.661 +	andb	$((1 << PROP_PB) -1), posState(BP)
   1.662 +# endif
   1.663 +# if PROP_LP != 0
   1.664 +	incb	posState2(BP)
   1.665 +	andb	$((1 << PROP_LP) -1), posState2(BP)
   1.666 +# endif
   1.667 +#endif
   1.668 +	INC	nowPos(BP)
   1.669 +	ret
   1.670 +
   1.671 +//  
   1.672 +// #define RC_NORMALIZE if (Range < kTopValue) 
   1.673 +//    { Range <<= 8; Code = (Code << 8) | RC_READ_BYTE; }
   1.674 +
   1.675 +/*
   1.676 + * update  Range, Code, ds:si
   1.677 + * scratch flags
   1.678 + */
   1.679 +
   1.680 +RC_NORMALIZE:
   1.681 +	cmpb	$0, Range+3(BP)
   1.682 +	jne	RC_NORMALIZE_1
   1.683 +RC_LOAD_BYTE:
   1.684 +	push	AX
   1.685 +	shll	$8, Range(BP)
   1.686 +	shll	$8, Code(BP)
   1.687 +#if !defined(FLAT32) && !defined(FLAT16)
   1.688 +	testw	%si, %si
   1.689 +	jns	RC_READ_BYTE
   1.690 +	movw	%ds, %ax
   1.691 +	incw	%ax
   1.692 +	movw	%ax, %ds
   1.693 +	addw	$-16, %si
   1.694 +RC_READ_BYTE:
   1.695 +#endif
   1.696 +	lodsb
   1.697 +	movb	%al, Code(BP)
   1.698 +	pop	AX
   1.699 +RC_NORMALIZE_1:
   1.700 +	ret
   1.701 +
   1.702 +// Bit1(dx + (state << kNumPosBitsMax /*4*/) + posState)
   1.703 +
   1.704 +Bit1state:
   1.705 +	movb	$16, %al
   1.706 +	mulb	state(BP)
   1.707 +# if PROP_PB != 0
   1.708 +	addb	posState(BP), %al
   1.709 +# endif
   1.710 +Bit1axdx:
   1.711 +	add	DX, AX
   1.712 +	jmp	Bit1
   1.713 +
   1.714 +// prob += 12; Bit1(prob)
   1.715 +
   1.716 +Bit1dx12:
   1.717 +	add	$12, DX
   1.718 +Bit1dx:
   1.719 +	mov	DX, AX
   1.720 +
   1.721 +// static int Bit1(CProb *p)
   1.722 +
   1.723 +Bit1:
   1.724 +/*
   1.725 + * input   ax=p
   1.726 + * output  C, ax
   1.727 + * update  bound, Range, Code, ds:si
   1.728 + * scratch flags
   1.729 + */
   1.730 + 
   1.731 +// {
   1.732 +// 	RC_NORMALIZE;
   1.733 +
   1.734 +	call  RC_NORMALIZE		// kill %ax, update %si
   1.735 +
   1.736 +	pushal
   1.737 +
   1.738 +	xchg	AX, DI
   1.739 +	add	DI, DI			// short *
   1.740 +	
   1.741 +
   1.742 +// 	bound = (Range>>kNumBitModelTotalBits /*11*/) * *(p);
   1.743 +
   1.744 +	movl	Range(BP), %eax
   1.745 +	shrl	$11, %eax
   1.746 +	movzwl	(BP, DI), %edx
   1.747 +	mull	%edx
   1.748 +
   1.749 +// 	if (Code < bound) {
   1.750 +
   1.751 +	cmpl	Code(BP), %eax
   1.752 +	jbe	Bit1_1
   1.753 +
   1.754 +//    		Range = bound;
   1.755 +
   1.756 +	movl	%eax, Range(BP)
   1.757 +
   1.758 +// 		*(p) += (kBitModelTotal /*2048*/ - *(p)) >> kNumMoveBits /*5*/;
   1.759 +
   1.760 +	movw	$2048, %ax
   1.761 +
   1.762 +// 		return 0;
   1.763 +
   1.764 +	jmp	Bit1_2
   1.765 +
   1.766 +//	}
   1.767 +// 	else {
   1.768 +
   1.769 +Bit1_1:
   1.770 +
   1.771 +//    		Range -= bound; Code -= bound;
   1.772 +
   1.773 +	subl	%eax, Range(BP)
   1.774 +	subl	%eax, Code(BP)
   1.775 +
   1.776 +// 		*(p) -= (*(p)) >> kNumMoveBits /*5*/;
   1.777 +
   1.778 +	movw	$31, %ax
   1.779 +
   1.780 +// 		return 1;
   1.781 +
   1.782 +	stc
   1.783 +Bit1_2:
   1.784 +	pushf
   1.785 +	subw	(BP, DI), %ax
   1.786 +	sarw	$5, %ax
   1.787 +	addw	%ax, (BP, DI)
   1.788 +	popf
   1.789 +	popal
   1.790 +	sbb	AX, AX
   1.791 +
   1.792 +// 	}
   1.793 +// }
   1.794 +
   1.795 +	ret
   1.796 +
   1.797 +RangeDecoder:
   1.798 +
   1.799 +/*
   1.800 + * input   ax=probs cx=numLevels (< 8) bx=1
   1.801 + * output  ax=res (backward), dh (forward)
   1.802 + * update  bound, Range, Code, ds:si
   1.803 + * scratch flags, cx=0, dl
   1.804 + */
   1.805 + 
   1.806 +	push	BX
   1.807 +	
   1.808 +//   { int i = numLevels; res = 1; 
   1.809 +	mov	BX, DX		// res = 1
   1.810 +	
   1.811 +//   do { CProb *p = probs + res; RC_GET_BIT(p, res) } while(--i != 0); 
   1.812 +
   1.813 +RangeDecoder_1:
   1.814 +	push	AX
   1.815 +	call	Bit1axdx		// C,%ax = Bit1(prob+%ax)
   1.816 +	rclb	$1, %dl			// res <<= 1; res |= C
   1.817 +	andb	%bl, %al		// current bit
   1.818 +	orb	%al, %bh		// store in bh
   1.819 +	shlb	$1, %bl			// update max
   1.820 +	pop	AX
   1.821 +	loop	RangeDecoder_1
   1.822 +
   1.823 +//   res -= (1 << numLevels); }
   1.824 +
   1.825 +	xchg	AX, BX			// move bh to dh
   1.826 +	xchg	AX, DX			// and dl to al
   1.827 +	sub	%dl, %al		// sub max
   1.828 +	pop	BX
   1.829 +	ret