Bra.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709
  1. /* Bra.c -- Branch converters for RISC code
  2. 2024-01-20 : Igor Pavlov : Public domain */
  3. #include "Precomp.h"
  4. #include "Bra.h"
  5. #include "RotateDefs.h"
  6. #include "CpuArch.h"
  7. #if defined(MY_CPU_SIZEOF_POINTER) \
  8. && ( MY_CPU_SIZEOF_POINTER == 4 \
  9. || MY_CPU_SIZEOF_POINTER == 8)
  10. #define BR_CONV_USE_OPT_PC_PTR
  11. #endif
  12. #ifdef BR_CONV_USE_OPT_PC_PTR
  13. #define BR_PC_INIT pc -= (UInt32)(SizeT)p;
  14. #define BR_PC_GET (pc + (UInt32)(SizeT)p)
  15. #else
  16. #define BR_PC_INIT pc += (UInt32)size;
  17. #define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
  18. // #define BR_PC_INIT
  19. // #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
  20. #endif
  21. #define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
  22. // #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
  23. #define Z7_BRANCH_CONV(name) z7_ ## name
  24. #define Z7_BRANCH_FUNC_MAIN(name) \
  25. static \
  26. Z7_FORCE_INLINE \
  27. Z7_ATTRIB_NO_VECTOR \
  28. Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding)
  29. #define Z7_BRANCH_FUNC_IMP(name, m, encoding) \
  30. Z7_NO_INLINE \
  31. Z7_ATTRIB_NO_VECTOR \
  32. Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
  33. { return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \
  34. #ifdef Z7_EXTRACT_ONLY
  35. #define Z7_BRANCH_FUNCS_IMP(name) \
  36. Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0)
  37. #else
  38. #define Z7_BRANCH_FUNCS_IMP(name) \
  39. Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \
  40. Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1)
  41. #endif
  42. #if defined(__clang__)
  43. #define BR_EXTERNAL_FOR
  44. #define BR_NEXT_ITERATION continue;
  45. #else
  46. #define BR_EXTERNAL_FOR for (;;)
  47. #define BR_NEXT_ITERATION break;
  48. #endif
  49. #if defined(__clang__) && (__clang_major__ >= 8) \
  50. || defined(__GNUC__) && (__GNUC__ >= 1000) \
  51. // GCC is not good for __builtin_expect() here
  52. /* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
  53. // #define Z7_unlikely [[unlikely]]
  54. // #define Z7_LIKELY(x) (__builtin_expect((x), 1))
  55. #define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
  56. // #define Z7_likely [[likely]]
  57. #else
  58. // #define Z7_LIKELY(x) (x)
  59. #define Z7_UNLIKELY(x) (x)
  60. // #define Z7_likely
  61. #endif
  62. Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64)
  63. {
  64. // Byte *p = data;
  65. const Byte *lim;
  66. const UInt32 flag = (UInt32)1 << (24 - 4);
  67. const UInt32 mask = ((UInt32)1 << 24) - (flag << 1);
  68. size &= ~(SizeT)3;
  69. // if (size == 0) return p;
  70. lim = p + size;
  71. BR_PC_INIT
  72. pc -= 4; // because (p) will point to next instruction
  73. BR_EXTERNAL_FOR
  74. {
  75. // Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
  76. for (;;)
  77. {
  78. UInt32 v;
  79. if Z7_UNLIKELY(p == lim)
  80. return p;
  81. v = GetUi32a(p);
  82. p += 4;
  83. if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0)
  84. {
  85. UInt32 c = BR_PC_GET >> 2;
  86. BR_CONVERT_VAL(v, c)
  87. v &= 0x03ffffff;
  88. v |= 0x94000000;
  89. SetUi32a(p - 4, v)
  90. BR_NEXT_ITERATION
  91. }
  92. // v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0)
  93. v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0)
  94. {
  95. UInt32 z, c;
  96. // v = rotrFixed(v, 8);
  97. v += flag; if Z7_UNLIKELY(v & mask) continue;
  98. z = (v & 0xffffffe0) | (v >> 26);
  99. c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7;
  100. BR_CONVERT_VAL(z, c)
  101. v &= 0x1f;
  102. v |= 0x90000000;
  103. v |= z << 26;
  104. v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag);
  105. SetUi32a(p - 4, v)
  106. }
  107. }
  108. }
  109. }
  110. Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64)
  111. Z7_BRANCH_FUNC_MAIN(BranchConv_ARM)
  112. {
  113. // Byte *p = data;
  114. const Byte *lim;
  115. size &= ~(SizeT)3;
  116. lim = p + size;
  117. BR_PC_INIT
  118. /* in ARM: branch offset is relative to the +2 instructions from current instruction.
  119. (p) will point to next instruction */
  120. pc += 8 - 4;
  121. for (;;)
  122. {
  123. for (;;)
  124. {
  125. if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
  126. if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
  127. }
  128. {
  129. UInt32 v = GetUi32a(p - 4);
  130. UInt32 c = BR_PC_GET >> 2;
  131. BR_CONVERT_VAL(v, c)
  132. v &= 0x00ffffff;
  133. v |= 0xeb000000;
  134. SetUi32a(p - 4, v)
  135. }
  136. }
  137. }
  138. Z7_BRANCH_FUNCS_IMP(BranchConv_ARM)
  139. Z7_BRANCH_FUNC_MAIN(BranchConv_PPC)
  140. {
  141. // Byte *p = data;
  142. const Byte *lim;
  143. size &= ~(SizeT)3;
  144. lim = p + size;
  145. BR_PC_INIT
  146. pc -= 4; // because (p) will point to next instruction
  147. for (;;)
  148. {
  149. UInt32 v;
  150. for (;;)
  151. {
  152. if Z7_UNLIKELY(p == lim)
  153. return p;
  154. // v = GetBe32a(p);
  155. v = *(UInt32 *)(void *)p;
  156. p += 4;
  157. // if ((v & 0xfc000003) == 0x48000001) break;
  158. // if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break;
  159. if Z7_UNLIKELY(
  160. ((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001))
  161. & Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break;
  162. }
  163. {
  164. v = Z7_CONV_NATIVE_TO_BE_32(v);
  165. {
  166. UInt32 c = BR_PC_GET;
  167. BR_CONVERT_VAL(v, c)
  168. }
  169. v &= 0x03ffffff;
  170. v |= 0x48000000;
  171. SetBe32a(p - 4, v)
  172. }
  173. }
  174. }
  175. Z7_BRANCH_FUNCS_IMP(BranchConv_PPC)
  176. #ifdef Z7_CPU_FAST_ROTATE_SUPPORTED
  177. #define BR_SPARC_USE_ROTATE
  178. #endif
  179. Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC)
  180. {
  181. // Byte *p = data;
  182. const Byte *lim;
  183. const UInt32 flag = (UInt32)1 << 22;
  184. size &= ~(SizeT)3;
  185. lim = p + size;
  186. BR_PC_INIT
  187. pc -= 4; // because (p) will point to next instruction
  188. for (;;)
  189. {
  190. UInt32 v;
  191. for (;;)
  192. {
  193. if Z7_UNLIKELY(p == lim)
  194. return p;
  195. /* // the code without GetBe32a():
  196. { const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; }
  197. */
  198. v = GetBe32a(p);
  199. p += 4;
  200. #ifdef BR_SPARC_USE_ROTATE
  201. v = rotlFixed(v, 2);
  202. v += (flag << 2) - 1;
  203. if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0)
  204. #else
  205. v += (UInt32)5 << 29;
  206. v ^= (UInt32)7 << 29;
  207. v += flag;
  208. if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0)
  209. #endif
  210. break;
  211. }
  212. {
  213. // UInt32 v = GetBe32a(p - 4);
  214. #ifndef BR_SPARC_USE_ROTATE
  215. v <<= 2;
  216. #endif
  217. {
  218. UInt32 c = BR_PC_GET;
  219. BR_CONVERT_VAL(v, c)
  220. }
  221. v &= (flag << 3) - 1;
  222. #ifdef BR_SPARC_USE_ROTATE
  223. v -= (flag << 2) - 1;
  224. v = rotrFixed(v, 2);
  225. #else
  226. v -= (flag << 2);
  227. v >>= 2;
  228. v |= (UInt32)1 << 30;
  229. #endif
  230. SetBe32a(p - 4, v)
  231. }
  232. }
  233. }
  234. Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC)
  235. Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT)
  236. {
  237. // Byte *p = data;
  238. Byte *lim;
  239. size &= ~(SizeT)1;
  240. // if (size == 0) return p;
  241. if (size <= 2) return p;
  242. size -= 2;
  243. lim = p + size;
  244. BR_PC_INIT
  245. /* in ARM: branch offset is relative to the +2 instructions from current instruction.
  246. (p) will point to the +2 instructions from current instruction */
  247. // pc += 4 - 4;
  248. // if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1;
  249. // #define ARMT_TAIL_PROC { goto armt_tail; }
  250. #define ARMT_TAIL_PROC { return p; }
  251. do
  252. {
  253. /* in MSVC 32-bit x86 compilers:
  254. UInt32 version : it loads value from memory with movzx
  255. Byte version : it loads value to 8-bit register (AL/CL)
  256. movzx version is slightly faster in some cpus
  257. */
  258. unsigned b1;
  259. // Byte / unsigned
  260. b1 = p[1];
  261. // optimized version to reduce one (p >= lim) check:
  262. // unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8)
  263. for (;;)
  264. {
  265. unsigned b3; // Byte / UInt32
  266. /* (Byte)(b3) normalization can use low byte computations in MSVC.
  267. It gives smaller code, and no loss of speed in some compilers/cpus.
  268. But new MSVC 32-bit x86 compilers use more slow load
  269. from memory to low byte register in that case.
  270. So we try to use full 32-bit computations for faster code.
  271. */
  272. // if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break;
  273. if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break;
  274. if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break;
  275. }
  276. {
  277. /* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation.
  278. But gcc/clang for arm64 can use bfi instruction for full code here */
  279. UInt32 v =
  280. ((UInt32)GetUi16a(p - 2) << 11) |
  281. ((UInt32)GetUi16a(p) & 0x7FF);
  282. /*
  283. UInt32 v =
  284. ((UInt32)p[1 - 2] << 19)
  285. + (((UInt32)p[1] & 0x7) << 8)
  286. + (((UInt32)p[-2] << 11))
  287. + (p[0]);
  288. */
  289. p += 2;
  290. {
  291. UInt32 c = BR_PC_GET >> 1;
  292. BR_CONVERT_VAL(v, c)
  293. }
  294. SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000))
  295. SetUi16a(p - 2, (UInt16)(v | 0xf800))
  296. /*
  297. p[-4] = (Byte)(v >> 11);
  298. p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7));
  299. p[-2] = (Byte)v;
  300. p[-1] = (Byte)(0xf8 | (v >> 8));
  301. */
  302. }
  303. }
  304. while (p < lim);
  305. return p;
  306. // armt_tail:
  307. // if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim;
  308. // return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2));
  309. // return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2));
  310. // return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2));
  311. }
  312. Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT)
  313. // #define BR_IA64_NO_INLINE
  314. Z7_BRANCH_FUNC_MAIN(BranchConv_IA64)
  315. {
  316. // Byte *p = data;
  317. const Byte *lim;
  318. size &= ~(SizeT)15;
  319. lim = p + size;
  320. pc -= 1 << 4;
  321. pc >>= 4 - 1;
  322. // pc -= 1 << 1;
  323. for (;;)
  324. {
  325. unsigned m;
  326. for (;;)
  327. {
  328. if Z7_UNLIKELY(p == lim)
  329. return p;
  330. m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e));
  331. p += 16;
  332. pc += 1 << 1;
  333. if (m &= 3)
  334. break;
  335. }
  336. {
  337. p += (ptrdiff_t)m * 5 - 20; // negative value is expected here.
  338. do
  339. {
  340. const UInt32 t =
  341. #if defined(MY_CPU_X86_OR_AMD64)
  342. // we use 32-bit load here to reduce code size on x86:
  343. GetUi32(p);
  344. #else
  345. GetUi16(p);
  346. #endif
  347. UInt32 z = GetUi32(p + 1) >> m;
  348. p += 5;
  349. if (((t >> m) & (0x70 << 1)) == 0
  350. && ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0)
  351. {
  352. UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z;
  353. z ^= v;
  354. #ifdef BR_IA64_NO_INLINE
  355. v |= (v & ((UInt32)1 << (23 + 1))) >> 3;
  356. {
  357. UInt32 c = pc;
  358. BR_CONVERT_VAL(v, c)
  359. }
  360. v &= (0x1fffff << 1) | 1;
  361. #else
  362. {
  363. if (encoding)
  364. {
  365. // pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits
  366. pc &= (0x1fffff << 1) | 1;
  367. v += pc;
  368. }
  369. else
  370. {
  371. // pc |= 0xc00000 << 1; // we need to set at least 2 bits
  372. pc |= ~(UInt32)((0x1fffff << 1) | 1);
  373. v -= pc;
  374. }
  375. }
  376. v &= ~(UInt32)(0x600000 << 1);
  377. #endif
  378. v += (0x700000 << 1);
  379. v &= (0x8fffff << 1) | 1;
  380. z |= v;
  381. z <<= m;
  382. SetUi32(p + 1 - 5, z)
  383. }
  384. m++;
  385. }
  386. while (m &= 3); // while (m < 4);
  387. }
  388. }
  389. }
  390. Z7_BRANCH_FUNCS_IMP(BranchConv_IA64)
  391. #define BR_CONVERT_VAL_ENC(v) v += BR_PC_GET;
  392. #define BR_CONVERT_VAL_DEC(v) v -= BR_PC_GET;
  393. #if 1 && defined(MY_CPU_LE_UNALIGN)
  394. #define RISCV_USE_UNALIGNED_LOAD
  395. #endif
  396. #ifdef RISCV_USE_UNALIGNED_LOAD
  397. #define RISCV_GET_UI32(p) GetUi32(p)
  398. #define RISCV_SET_UI32(p, v) { SetUi32(p, v) }
  399. #else
  400. #define RISCV_GET_UI32(p) \
  401. ((UInt32)GetUi16a(p) + \
  402. ((UInt32)GetUi16a((p) + 2) << 16))
  403. #define RISCV_SET_UI32(p, v) { \
  404. SetUi16a(p, (UInt16)(v)) \
  405. SetUi16a((p) + 2, (UInt16)(v >> 16)) }
  406. #endif
  407. #if 1 && defined(MY_CPU_LE)
  408. #define RISCV_USE_16BIT_LOAD
  409. #endif
  410. #ifdef RISCV_USE_16BIT_LOAD
  411. #define RISCV_LOAD_VAL(p) GetUi16a(p)
  412. #else
  413. #define RISCV_LOAD_VAL(p) (*(p))
  414. #endif
  415. #define RISCV_INSTR_SIZE 2
  416. #define RISCV_STEP_1 (4 + RISCV_INSTR_SIZE)
  417. #define RISCV_STEP_2 4
  418. #define RISCV_REG_VAL (2 << 7)
  419. #define RISCV_CMD_VAL 3
  420. #if 1
  421. // for code size optimization:
  422. #define RISCV_DELTA_7F 0x7f
  423. #else
  424. #define RISCV_DELTA_7F 0
  425. #endif
  426. #define RISCV_CHECK_1(v, b) \
  427. (((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0)
  428. #if 1
  429. #define RISCV_CHECK_2(v, r) \
  430. ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \
  431. << 18) \
  432. < ((r) & 0x1d))
  433. #else
  434. // this branch gives larger code, because
  435. // compilers generate larger code for big constants.
  436. #define RISCV_CHECK_2(v, r) \
  437. ((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
  438. & ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
  439. < ((r) & 0x1d))
  440. #endif
  441. #define RISCV_SCAN_LOOP \
  442. Byte *lim; \
  443. size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \
  444. if (size <= 6) return p; \
  445. size -= 6; \
  446. lim = p + size; \
  447. BR_PC_INIT \
  448. for (;;) \
  449. { \
  450. UInt32 a, v; \
  451. /* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \
  452. for (;;) \
  453. { \
  454. if Z7_UNLIKELY(p >= lim) { return p; } \
  455. a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \
  456. if ((a & 0x77) == 0) break; \
  457. a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \
  458. p += RISCV_INSTR_SIZE * 2; \
  459. if ((a & 0x77) == 0) \
  460. { \
  461. p -= RISCV_INSTR_SIZE; \
  462. if Z7_UNLIKELY(p >= lim) { return p; } \
  463. break; \
  464. } \
  465. }
  466. // (xx6f ^ 10) + 1 = xx7f + 1 = xx80 : JAL
  467. // (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL
  468. // (xx17 ^ 10) + 1 = xx07 + 1 = xx08 : AUIPC
  469. // (xx97 ^ 10) + 1 = xx87 + 1 = xx88 : AUIPC
  470. Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc)
  471. {
  472. RISCV_SCAN_LOOP
  473. v = a;
  474. a = RISCV_GET_UI32(p);
  475. #ifndef RISCV_USE_16BIT_LOAD
  476. v += (UInt32)p[1] << 8;
  477. #endif
  478. if ((v & 8) == 0) // JAL
  479. {
  480. if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80)
  481. {
  482. p += RISCV_INSTR_SIZE;
  483. continue;
  484. }
  485. {
  486. v = ((a & 1u << 31) >> 11)
  487. | ((a & 0x3ff << 21) >> 20)
  488. | ((a & 1 << 20) >> 9)
  489. | (a & 0xff << 12);
  490. BR_CONVERT_VAL_ENC(v)
  491. // ((v & 1) == 0)
  492. // v: bits [1 : 20] contain offset bits
  493. #if 0 && defined(RISCV_USE_UNALIGNED_LOAD)
  494. a &= 0xfff;
  495. a |= ((UInt32)(v << 23))
  496. | ((UInt32)(v << 7) & ((UInt32)0xff << 16))
  497. | ((UInt32)(v >> 5) & ((UInt32)0xf0 << 8));
  498. RISCV_SET_UI32(p, a)
  499. #else // aligned
  500. #if 0
  501. SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff)))
  502. #else
  503. p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf));
  504. #endif
  505. #if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
  506. v <<= 15;
  507. v = Z7_BSWAP32(v);
  508. SetUi16a(p + 2, (UInt16)v)
  509. #else
  510. p[2] = (Byte)(v >> 9);
  511. p[3] = (Byte)(v >> 1);
  512. #endif
  513. #endif // aligned
  514. }
  515. p += 4;
  516. continue;
  517. } // JAL
  518. {
  519. // AUIPC
  520. if (v & 0xe80) // (not x0) and (not x2)
  521. {
  522. const UInt32 b = RISCV_GET_UI32(p + 4);
  523. if (RISCV_CHECK_1(v, b))
  524. {
  525. {
  526. const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL);
  527. RISCV_SET_UI32(p, temp)
  528. }
  529. a &= 0xfffff000;
  530. {
  531. #if 1
  532. const int t = -1 >> 1;
  533. if (t != -1)
  534. a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation
  535. else
  536. #endif
  537. a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension).
  538. }
  539. BR_CONVERT_VAL_ENC(a)
  540. #if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
  541. a = Z7_BSWAP32(a);
  542. RISCV_SET_UI32(p + 4, a)
  543. #else
  544. SetBe32(p + 4, a)
  545. #endif
  546. p += 8;
  547. }
  548. else
  549. p += RISCV_STEP_1;
  550. }
  551. else
  552. {
  553. UInt32 r = a >> 27;
  554. if (RISCV_CHECK_2(v, r))
  555. {
  556. v = RISCV_GET_UI32(p + 4);
  557. r = (r << 7) + 0x17 + (v & 0xfffff000);
  558. a = (a >> 12) | (v << 20);
  559. RISCV_SET_UI32(p, r)
  560. RISCV_SET_UI32(p + 4, a)
  561. p += 8;
  562. }
  563. else
  564. p += RISCV_STEP_2;
  565. }
  566. }
  567. } // for
  568. }
  569. Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc)
  570. {
  571. RISCV_SCAN_LOOP
  572. #ifdef RISCV_USE_16BIT_LOAD
  573. if ((a & 8) == 0)
  574. {
  575. #else
  576. v = a;
  577. a += (UInt32)p[1] << 8;
  578. if ((v & 8) == 0)
  579. {
  580. #endif
  581. // JAL
  582. a -= 0x100 - RISCV_DELTA_7F;
  583. if (a & 0xd80)
  584. {
  585. p += RISCV_INSTR_SIZE;
  586. continue;
  587. }
  588. {
  589. const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff;
  590. #if 0 // unaligned
  591. a = GetUi32(p);
  592. v = (UInt32)(a >> 23) & ((UInt32)0xff << 1)
  593. | (UInt32)(a >> 7) & ((UInt32)0xff << 9)
  594. #elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
  595. v = GetUi16a(p + 2);
  596. v = Z7_BSWAP32(v) >> 15
  597. #else
  598. v = (UInt32)p[3] << 1
  599. | (UInt32)p[2] << 9
  600. #endif
  601. | (UInt32)((a & 0xf000) << 5);
  602. BR_CONVERT_VAL_DEC(v)
  603. a = a_old
  604. | (v << 11 & 1u << 31)
  605. | (v << 20 & 0x3ff << 21)
  606. | (v << 9 & 1 << 20)
  607. | (v & 0xff << 12);
  608. RISCV_SET_UI32(p, a)
  609. }
  610. p += 4;
  611. continue;
  612. } // JAL
  613. {
  614. // AUIPC
  615. v = a;
  616. #if 1 && defined(RISCV_USE_UNALIGNED_LOAD)
  617. a = GetUi32(p);
  618. #else
  619. a |= (UInt32)GetUi16a(p + 2) << 16;
  620. #endif
  621. if ((v & 0xe80) == 0) // x0/x2
  622. {
  623. const UInt32 r = a >> 27;
  624. if (RISCV_CHECK_2(v, r))
  625. {
  626. UInt32 b;
  627. #if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
  628. b = RISCV_GET_UI32(p + 4);
  629. b = Z7_BSWAP32(b);
  630. #else
  631. b = GetBe32(p + 4);
  632. #endif
  633. v = a >> 12;
  634. BR_CONVERT_VAL_DEC(b)
  635. a = (r << 7) + 0x17;
  636. a += (b + 0x800) & 0xfffff000;
  637. v |= b << 20;
  638. RISCV_SET_UI32(p, a)
  639. RISCV_SET_UI32(p + 4, v)
  640. p += 8;
  641. }
  642. else
  643. p += RISCV_STEP_2;
  644. }
  645. else
  646. {
  647. const UInt32 b = RISCV_GET_UI32(p + 4);
  648. if (!RISCV_CHECK_1(v, b))
  649. p += RISCV_STEP_1;
  650. else
  651. {
  652. v = (a & 0xfffff000) | (b >> 20);
  653. a = (b << 12) | (0x17 + RISCV_REG_VAL);
  654. RISCV_SET_UI32(p, a)
  655. RISCV_SET_UI32(p + 4, v)
  656. p += 8;
  657. }
  658. }
  659. }
  660. } // for
  661. }