| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290 |
- /* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
- 2023-03-01 : Igor Pavlov : Public domain */
- #include "Precomp.h"
- #include "Bcj2.h"
- #include "CpuArch.h"
- #define kTopValue ((UInt32)1 << 24)
- #define kNumBitModelTotalBits 11
- #define kBitModelTotal (1 << kNumBitModelTotalBits)
- #define kNumMoveBits 5
- // UInt32 bcj2_stats[256 + 2][2];
- void Bcj2Dec_Init(CBcj2Dec *p)
- {
- unsigned i;
- p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK;
- p->ip = 0;
- p->temp = 0;
- p->range = 0;
- p->code = 0;
- for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
- p->probs[i] = kBitModelTotal >> 1;
- }
- SRes Bcj2Dec_Decode(CBcj2Dec *p)
- {
- UInt32 v = p->temp;
- // const Byte *src;
- if (p->range <= 5)
- {
- UInt32 code = p->code;
- p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */
- for (; p->range != 5; p->range++)
- {
- if (p->range == 1 && code != 0)
- return SZ_ERROR_DATA;
- if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
- {
- p->state = BCJ2_STREAM_RC;
- return SZ_OK;
- }
- code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
- p->code = code;
- }
- if (code == 0xffffffff)
- return SZ_ERROR_DATA;
- p->range = 0xffffffff;
- }
- // else
- {
- unsigned state = p->state;
- // we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop
- if (BCJ2_IS_32BIT_STREAM(state))
- {
- const Byte *cur = p->bufs[state];
- if (cur == p->lims[state])
- return SZ_OK;
- p->bufs[state] = cur + 4;
- {
- const UInt32 ip = p->ip + 4;
- v = GetBe32a(cur) - ip;
- p->ip = ip;
- }
- state = BCJ2_DEC_STATE_ORIG_0;
- }
- if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4)
- {
- Byte *dest = p->dest;
- for (;;)
- {
- if (dest == p->destLim)
- {
- p->state = state;
- p->temp = v;
- return SZ_OK;
- }
- *dest++ = (Byte)v;
- p->dest = dest;
- if (++state == BCJ2_DEC_STATE_ORIG_3 + 1)
- break;
- v >>= 8;
- }
- }
- }
- // src = p->bufs[BCJ2_STREAM_MAIN];
- for (;;)
- {
- /*
- if (BCJ2_IS_32BIT_STREAM(p->state))
- p->state = BCJ2_DEC_STATE_OK;
- else
- */
- {
- if (p->range < kTopValue)
- {
- if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
- {
- p->state = BCJ2_STREAM_RC;
- p->temp = v;
- return SZ_OK;
- }
- p->range <<= 8;
- p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
- }
- {
- const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
- const Byte *srcLim;
- Byte *dest = p->dest;
- {
- const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
- SizeT num = (SizeT)(p->destLim - dest);
- if (num >= rem)
- num = rem;
- #define NUM_ITERS 4
- #if (NUM_ITERS & (NUM_ITERS - 1)) == 0
- num &= ~((SizeT)NUM_ITERS - 1); // if (NUM_ITERS == (1 << x))
- #else
- num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x))
- #endif
- srcLim = src + num;
- }
- #define NUM_SHIFT_BITS 24
- #define ONE_ITER(indx) { \
- const unsigned b = src[indx]; \
- *dest++ = (Byte)b; \
- v = (v << NUM_SHIFT_BITS) | b; \
- if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \
- if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \
- ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \
- /* ++dest */; /* v = b; */ }
-
- if (src != srcLim)
- for (;;)
- {
- /* The dependency chain of 2-cycle for (v) calculation is not big problem here.
- But we can remove dependency chain with v = b in the end of loop. */
- ONE_ITER(0)
- #if (NUM_ITERS > 1)
- ONE_ITER(1)
- #if (NUM_ITERS > 2)
- ONE_ITER(2)
- #if (NUM_ITERS > 3)
- ONE_ITER(3)
- #if (NUM_ITERS > 4)
- ONE_ITER(4)
- #if (NUM_ITERS > 5)
- ONE_ITER(5)
- #if (NUM_ITERS > 6)
- ONE_ITER(6)
- #if (NUM_ITERS > 7)
- ONE_ITER(7)
- #endif
- #endif
- #endif
- #endif
- #endif
- #endif
- #endif
-
- src += NUM_ITERS;
- if (src == srcLim)
- break;
- }
- if (src == srcLim)
- #if (NUM_ITERS > 1)
- for (;;)
- #endif
- {
- #if (NUM_ITERS > 1)
- if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim)
- #endif
- {
- const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
- p->bufs[BCJ2_STREAM_MAIN] = src;
- p->dest = dest;
- p->ip += (UInt32)num;
- /* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
- p->state =
- src == p->lims[BCJ2_STREAM_MAIN] ?
- (unsigned)BCJ2_STREAM_MAIN :
- (unsigned)BCJ2_DEC_STATE_ORIG;
- p->temp = v;
- return SZ_OK;
- }
- #if (NUM_ITERS > 1)
- ONE_ITER(0)
- src++;
- #endif
- }
- {
- const SizeT num = (SizeT)(dest - p->dest);
- p->dest = dest; // p->dest += num;
- p->bufs[BCJ2_STREAM_MAIN] += num; // = src;
- p->ip += (UInt32)num;
- }
- {
- UInt32 bound, ttt;
- CBcj2Prob *prob; // unsigned index;
- /*
- prob = p->probs + (unsigned)((Byte)v == 0xe8 ?
- 2 + (Byte)(v >> 8) :
- ((v >> 5) & 1)); // ((Byte)v < 0xe8 ? 0 : 1));
- */
- {
- const unsigned c = ((v + 0x17) >> 6) & 1;
- prob = p->probs + (unsigned)
- (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1));
- // (Byte)
- // 8x->0 : e9->1 : xxe8->xx+2
- // 8x->0x100 : e9->0x101 : xxe8->xx
- // (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v));
- // (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v));
- }
- ttt = *prob;
- bound = (p->range >> kNumBitModelTotalBits) * ttt;
- if (p->code < bound)
- {
- // bcj2_stats[prob - p->probs][0]++;
- p->range = bound;
- *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
- continue;
- }
- {
- // bcj2_stats[prob - p->probs][1]++;
- p->range -= bound;
- p->code -= bound;
- *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits));
- }
- }
- }
- }
- {
- /* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */
- // const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
- const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL;
- const Byte *cur = p->bufs[cj];
- Byte *dest;
- SizeT rem;
- if (cur == p->lims[cj])
- {
- p->state = cj;
- break;
- }
- v = GetBe32a(cur);
- p->bufs[cj] = cur + 4;
- {
- const UInt32 ip = p->ip + 4;
- v -= ip;
- p->ip = ip;
- }
- dest = p->dest;
- rem = (SizeT)(p->destLim - dest);
- if (rem < 4)
- {
- if ((unsigned)rem > 0) { dest[0] = (Byte)v; v >>= 8;
- if ((unsigned)rem > 1) { dest[1] = (Byte)v; v >>= 8;
- if ((unsigned)rem > 2) { dest[2] = (Byte)v; v >>= 8; }}}
- p->temp = v;
- p->dest = dest + rem;
- p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
- break;
- }
- SetUi32(dest, v)
- v >>= 24;
- p->dest = dest + 4;
- }
- }
- if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC])
- {
- p->range <<= 8;
- p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
- }
- return SZ_OK;
- }
- #undef NUM_ITERS
- #undef ONE_ITER
- #undef NUM_SHIFT_BITS
- #undef kTopValue
- #undef kNumBitModelTotalBits
- #undef kBitModelTotal
- #undef kNumMoveBits
|