Bcj2.c 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. /* Bcj2.c -- BCJ2 Decoder (Converter for x86 code)
  2. 2023-03-01 : Igor Pavlov : Public domain */
  3. #include "Precomp.h"
  4. #include "Bcj2.h"
  5. #include "CpuArch.h"
  6. #define kTopValue ((UInt32)1 << 24)
  7. #define kNumBitModelTotalBits 11
  8. #define kBitModelTotal (1 << kNumBitModelTotalBits)
  9. #define kNumMoveBits 5
  10. // UInt32 bcj2_stats[256 + 2][2];
  11. void Bcj2Dec_Init(CBcj2Dec *p)
  12. {
  13. unsigned i;
  14. p->state = BCJ2_STREAM_RC; // BCJ2_DEC_STATE_OK;
  15. p->ip = 0;
  16. p->temp = 0;
  17. p->range = 0;
  18. p->code = 0;
  19. for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
  20. p->probs[i] = kBitModelTotal >> 1;
  21. }
  22. SRes Bcj2Dec_Decode(CBcj2Dec *p)
  23. {
  24. UInt32 v = p->temp;
  25. // const Byte *src;
  26. if (p->range <= 5)
  27. {
  28. UInt32 code = p->code;
  29. p->state = BCJ2_DEC_STATE_ERROR; /* for case if we return SZ_ERROR_DATA; */
  30. for (; p->range != 5; p->range++)
  31. {
  32. if (p->range == 1 && code != 0)
  33. return SZ_ERROR_DATA;
  34. if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
  35. {
  36. p->state = BCJ2_STREAM_RC;
  37. return SZ_OK;
  38. }
  39. code = (code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
  40. p->code = code;
  41. }
  42. if (code == 0xffffffff)
  43. return SZ_ERROR_DATA;
  44. p->range = 0xffffffff;
  45. }
  46. // else
  47. {
  48. unsigned state = p->state;
  49. // we check BCJ2_IS_32BIT_STREAM() here instead of check in the main loop
  50. if (BCJ2_IS_32BIT_STREAM(state))
  51. {
  52. const Byte *cur = p->bufs[state];
  53. if (cur == p->lims[state])
  54. return SZ_OK;
  55. p->bufs[state] = cur + 4;
  56. {
  57. const UInt32 ip = p->ip + 4;
  58. v = GetBe32a(cur) - ip;
  59. p->ip = ip;
  60. }
  61. state = BCJ2_DEC_STATE_ORIG_0;
  62. }
  63. if ((unsigned)(state - BCJ2_DEC_STATE_ORIG_0) < 4)
  64. {
  65. Byte *dest = p->dest;
  66. for (;;)
  67. {
  68. if (dest == p->destLim)
  69. {
  70. p->state = state;
  71. p->temp = v;
  72. return SZ_OK;
  73. }
  74. *dest++ = (Byte)v;
  75. p->dest = dest;
  76. if (++state == BCJ2_DEC_STATE_ORIG_3 + 1)
  77. break;
  78. v >>= 8;
  79. }
  80. }
  81. }
  82. // src = p->bufs[BCJ2_STREAM_MAIN];
  83. for (;;)
  84. {
  85. /*
  86. if (BCJ2_IS_32BIT_STREAM(p->state))
  87. p->state = BCJ2_DEC_STATE_OK;
  88. else
  89. */
  90. {
  91. if (p->range < kTopValue)
  92. {
  93. if (p->bufs[BCJ2_STREAM_RC] == p->lims[BCJ2_STREAM_RC])
  94. {
  95. p->state = BCJ2_STREAM_RC;
  96. p->temp = v;
  97. return SZ_OK;
  98. }
  99. p->range <<= 8;
  100. p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
  101. }
  102. {
  103. const Byte *src = p->bufs[BCJ2_STREAM_MAIN];
  104. const Byte *srcLim;
  105. Byte *dest = p->dest;
  106. {
  107. const SizeT rem = (SizeT)(p->lims[BCJ2_STREAM_MAIN] - src);
  108. SizeT num = (SizeT)(p->destLim - dest);
  109. if (num >= rem)
  110. num = rem;
  111. #define NUM_ITERS 4
  112. #if (NUM_ITERS & (NUM_ITERS - 1)) == 0
  113. num &= ~((SizeT)NUM_ITERS - 1); // if (NUM_ITERS == (1 << x))
  114. #else
  115. num -= num % NUM_ITERS; // if (NUM_ITERS != (1 << x))
  116. #endif
  117. srcLim = src + num;
  118. }
  119. #define NUM_SHIFT_BITS 24
  120. #define ONE_ITER(indx) { \
  121. const unsigned b = src[indx]; \
  122. *dest++ = (Byte)b; \
  123. v = (v << NUM_SHIFT_BITS) | b; \
  124. if (((b + (0x100 - 0xe8)) & 0xfe) == 0) break; \
  125. if (((v - (((UInt32)0x0f << (NUM_SHIFT_BITS)) + 0x80)) & \
  126. ((((UInt32)1 << (4 + NUM_SHIFT_BITS)) - 0x1) << 4)) == 0) break; \
  127. /* ++dest */; /* v = b; */ }
  128. if (src != srcLim)
  129. for (;;)
  130. {
  131. /* The dependency chain of 2-cycle for (v) calculation is not big problem here.
  132. But we can remove dependency chain with v = b in the end of loop. */
  133. ONE_ITER(0)
  134. #if (NUM_ITERS > 1)
  135. ONE_ITER(1)
  136. #if (NUM_ITERS > 2)
  137. ONE_ITER(2)
  138. #if (NUM_ITERS > 3)
  139. ONE_ITER(3)
  140. #if (NUM_ITERS > 4)
  141. ONE_ITER(4)
  142. #if (NUM_ITERS > 5)
  143. ONE_ITER(5)
  144. #if (NUM_ITERS > 6)
  145. ONE_ITER(6)
  146. #if (NUM_ITERS > 7)
  147. ONE_ITER(7)
  148. #endif
  149. #endif
  150. #endif
  151. #endif
  152. #endif
  153. #endif
  154. #endif
  155. src += NUM_ITERS;
  156. if (src == srcLim)
  157. break;
  158. }
  159. if (src == srcLim)
  160. #if (NUM_ITERS > 1)
  161. for (;;)
  162. #endif
  163. {
  164. #if (NUM_ITERS > 1)
  165. if (src == p->lims[BCJ2_STREAM_MAIN] || dest == p->destLim)
  166. #endif
  167. {
  168. const SizeT num = (SizeT)(src - p->bufs[BCJ2_STREAM_MAIN]);
  169. p->bufs[BCJ2_STREAM_MAIN] = src;
  170. p->dest = dest;
  171. p->ip += (UInt32)num;
  172. /* state BCJ2_STREAM_MAIN has more priority than BCJ2_STATE_ORIG */
  173. p->state =
  174. src == p->lims[BCJ2_STREAM_MAIN] ?
  175. (unsigned)BCJ2_STREAM_MAIN :
  176. (unsigned)BCJ2_DEC_STATE_ORIG;
  177. p->temp = v;
  178. return SZ_OK;
  179. }
  180. #if (NUM_ITERS > 1)
  181. ONE_ITER(0)
  182. src++;
  183. #endif
  184. }
  185. {
  186. const SizeT num = (SizeT)(dest - p->dest);
  187. p->dest = dest; // p->dest += num;
  188. p->bufs[BCJ2_STREAM_MAIN] += num; // = src;
  189. p->ip += (UInt32)num;
  190. }
  191. {
  192. UInt32 bound, ttt;
  193. CBcj2Prob *prob; // unsigned index;
  194. /*
  195. prob = p->probs + (unsigned)((Byte)v == 0xe8 ?
  196. 2 + (Byte)(v >> 8) :
  197. ((v >> 5) & 1)); // ((Byte)v < 0xe8 ? 0 : 1));
  198. */
  199. {
  200. const unsigned c = ((v + 0x17) >> 6) & 1;
  201. prob = p->probs + (unsigned)
  202. (((0 - c) & (Byte)(v >> NUM_SHIFT_BITS)) + c + ((v >> 5) & 1));
  203. // (Byte)
  204. // 8x->0 : e9->1 : xxe8->xx+2
  205. // 8x->0x100 : e9->0x101 : xxe8->xx
  206. // (((0x100 - (e & ~v)) & (0x100 | (v >> 8))) + (e & v));
  207. // (((0x101 + (~e | v)) & (0x100 | (v >> 8))) + (e & v));
  208. }
  209. ttt = *prob;
  210. bound = (p->range >> kNumBitModelTotalBits) * ttt;
  211. if (p->code < bound)
  212. {
  213. // bcj2_stats[prob - p->probs][0]++;
  214. p->range = bound;
  215. *prob = (CBcj2Prob)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
  216. continue;
  217. }
  218. {
  219. // bcj2_stats[prob - p->probs][1]++;
  220. p->range -= bound;
  221. p->code -= bound;
  222. *prob = (CBcj2Prob)(ttt - (ttt >> kNumMoveBits));
  223. }
  224. }
  225. }
  226. }
  227. {
  228. /* (v == 0xe8 ? 0 : 1) uses setcc instruction with additional zero register usage in x64 MSVC. */
  229. // const unsigned cj = ((Byte)v == 0xe8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
  230. const unsigned cj = (((v + 0x57) >> 6) & 1) + BCJ2_STREAM_CALL;
  231. const Byte *cur = p->bufs[cj];
  232. Byte *dest;
  233. SizeT rem;
  234. if (cur == p->lims[cj])
  235. {
  236. p->state = cj;
  237. break;
  238. }
  239. v = GetBe32a(cur);
  240. p->bufs[cj] = cur + 4;
  241. {
  242. const UInt32 ip = p->ip + 4;
  243. v -= ip;
  244. p->ip = ip;
  245. }
  246. dest = p->dest;
  247. rem = (SizeT)(p->destLim - dest);
  248. if (rem < 4)
  249. {
  250. if ((unsigned)rem > 0) { dest[0] = (Byte)v; v >>= 8;
  251. if ((unsigned)rem > 1) { dest[1] = (Byte)v; v >>= 8;
  252. if ((unsigned)rem > 2) { dest[2] = (Byte)v; v >>= 8; }}}
  253. p->temp = v;
  254. p->dest = dest + rem;
  255. p->state = BCJ2_DEC_STATE_ORIG_0 + (unsigned)rem;
  256. break;
  257. }
  258. SetUi32(dest, v)
  259. v >>= 24;
  260. p->dest = dest + 4;
  261. }
  262. }
  263. if (p->range < kTopValue && p->bufs[BCJ2_STREAM_RC] != p->lims[BCJ2_STREAM_RC])
  264. {
  265. p->range <<= 8;
  266. p->code = (p->code << 8) | *(p->bufs[BCJ2_STREAM_RC])++;
  267. }
  268. return SZ_OK;
  269. }
  270. #undef NUM_ITERS
  271. #undef ONE_ITER
  272. #undef NUM_SHIFT_BITS
  273. #undef kTopValue
  274. #undef kNumBitModelTotalBits
  275. #undef kBitModelTotal
  276. #undef kNumMoveBits