Bra86.c 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. /* Bra86.c -- Branch converter for X86 code (BCJ)
  2. 2023-04-02 : Igor Pavlov : Public domain */
  3. #include "Precomp.h"
  4. #include "Bra.h"
  5. #include "CpuArch.h"
  6. #if defined(MY_CPU_SIZEOF_POINTER) \
  7. && ( MY_CPU_SIZEOF_POINTER == 4 \
  8. || MY_CPU_SIZEOF_POINTER == 8)
  9. #define BR_CONV_USE_OPT_PC_PTR
  10. #endif
  11. #ifdef BR_CONV_USE_OPT_PC_PTR
  12. #define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t)
  13. #define BR_PC_GET (pc + (UInt32)(SizeT)p)
  14. #else
  15. #define BR_PC_INIT pc += (UInt32)size;
  16. #define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
  17. // #define BR_PC_INIT
  18. // #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
  19. #endif
  20. #define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
  21. // #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
  22. #define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name
  23. #define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0)
  24. #ifdef MY_CPU_LE_UNALIGN
  25. #define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8;
  26. #define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0)
  27. #else
  28. #define BR86_PREPARE_BCJ_SCAN
  29. // bad for MSVC X86 (partial write to byte reg):
  30. #define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8)
  31. // bad for old MSVC (partial write to byte reg):
  32. // #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0)
  33. #endif
  34. static
  35. Z7_FORCE_INLINE
  36. Z7_ATTRIB_NO_VECTOR
  37. Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding)
  38. {
  39. if (size < 5)
  40. return p;
  41. {
  42. // Byte *p = data;
  43. const Byte *lim = p + size - 4;
  44. unsigned mask = (unsigned)*state; // & 7;
  45. #ifdef BR_CONV_USE_OPT_PC_PTR
  46. /* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4),
  47. because call/jump offset is relative to the next instruction.
  48. if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4),
  49. because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before.
  50. */
  51. pc += 4;
  52. #endif
  53. BR_PC_INIT
  54. goto start;
  55. for (;; mask |= 4)
  56. {
  57. // cont: mask |= 4;
  58. start:
  59. if (p >= lim)
  60. goto fin;
  61. {
  62. BR86_PREPARE_BCJ_SCAN
  63. p += 4;
  64. if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1;
  65. if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1;
  66. if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0;
  67. if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
  68. }
  69. goto main_loop;
  70. m0: p--;
  71. m1: p--;
  72. m2: p--;
  73. if (mask == 0)
  74. goto a3;
  75. if (p > lim)
  76. goto fin_p;
  77. // if (((0x17u >> mask) & 1) == 0)
  78. if (mask > 4 || mask == 3)
  79. {
  80. mask >>= 1;
  81. continue; // goto cont;
  82. }
  83. mask >>= 1;
  84. if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask]))
  85. continue; // goto cont;
  86. // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
  87. {
  88. UInt32 v = GetUi32(p);
  89. UInt32 c;
  90. v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
  91. c = BR_PC_GET;
  92. BR_CONVERT_VAL(v, c)
  93. {
  94. mask <<= 3;
  95. if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask))
  96. {
  97. v ^= (((UInt32)0x100 << mask) - 1);
  98. #ifdef MY_CPU_X86
  99. // for X86 : we can recalculate (c) to reduce register pressure
  100. c = BR_PC_GET;
  101. #endif
  102. BR_CONVERT_VAL(v, c)
  103. }
  104. mask = 0;
  105. }
  106. // v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
  107. v &= (1 << 25) - 1; v -= (1 << 24);
  108. SetUi32(p, v)
  109. p += 4;
  110. goto main_loop;
  111. }
  112. main_loop:
  113. if (p >= lim)
  114. goto fin;
  115. for (;;)
  116. {
  117. BR86_PREPARE_BCJ_SCAN
  118. p += 4;
  119. if (BR86_IS_BCJ_BYTE(0)) { goto a0; }
  120. if (BR86_IS_BCJ_BYTE(1)) { goto a1; }
  121. if (BR86_IS_BCJ_BYTE(2)) { goto a2; }
  122. if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
  123. if (p >= lim)
  124. goto fin;
  125. }
  126. a0: p--;
  127. a1: p--;
  128. a2: p--;
  129. a3:
  130. if (p > lim)
  131. goto fin_p;
  132. // if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
  133. {
  134. UInt32 v = GetUi32(p);
  135. UInt32 c;
  136. v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
  137. c = BR_PC_GET;
  138. BR_CONVERT_VAL(v, c)
  139. // v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
  140. v &= (1 << 25) - 1; v -= (1 << 24);
  141. SetUi32(p, v)
  142. p += 4;
  143. goto main_loop;
  144. }
  145. }
  146. fin_p:
  147. p--;
  148. fin:
  149. // the following processing for tail is optional and can be commented
  150. /*
  151. lim += 4;
  152. for (; p < lim; p++, mask >>= 1)
  153. if ((*p & 0xfe) == 0xe8)
  154. break;
  155. */
  156. *state = (UInt32)mask;
  157. return p;
  158. }
  159. }
  160. #define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \
  161. Z7_NO_INLINE \
  162. Z7_ATTRIB_NO_VECTOR \
  163. Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \
  164. { return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); }
  165. Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0)
  166. #ifndef Z7_EXTRACT_ONLY
  167. Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1)
  168. #endif