gravity_optimizer.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
  1. //
  2. // gravity_optimizer.c
  3. // gravity
  4. //
  5. // Created by Marco Bambini on 24/09/14.
  6. // Copyright (c) 2014 CreoLabs. All rights reserved.
  7. //
  8. // Some optimizations taken from: http://www.compileroptimizations.com/
  9. #include "gravity_hash.h"
  10. #include "gravity_optimizer.h"
  11. #include "gravity_opcodes.h"
  12. #include "gravity_ircode.h"
  13. #include "gravity_utils.h"
  14. #include "gravity_value.h"
  15. #define IS_MOVE(inst) ((inst) && (inst->op == MOVE))
  16. #define IS_RET(inst) ((inst) && (inst->op == RET))
  17. #define IS_NEG(inst) ((inst) && (inst->op == NEG))
  18. #define IS_NUM(inst) ((inst) && (inst->op == LOADI))
  19. #define IS_MATH(inst) ((inst) && (inst->op >= ADD) && (inst->op <= REM))
  20. #define IS_SKIP(inst) (inst->tag == SKIP_TAG)
  21. #define IS_LABEL(inst) (inst->tag == LABEL_TAG)
  22. #define IS_NOTNULL(inst) (inst)
  23. #define IS_PRAGMA_MOVE_OPT(inst) ((inst) && (inst->tag == PRAGMA_MOVE_OPTIMIZATION))
  24. // http://www.mathsisfun.com/binary-decimal-hexadecimal-converter.html
  25. #define OPCODE_SET(op,code) op = (code & 0x3F) << 26
  26. #define OPCODE_SET_TWO8bit_ONE10bit(op,code,a,b,c) op = (code & 0x3F) << 26; op += (a & 0xFF) << 18; op += (b & 0xFF) << 10; op += (c & 0x3FF)
  27. #define OPCODE_SET_FOUR8bit(op,a,b,c,d) op = (a & 0xFF) << 24; op += (b & 0xFF) << 16; op += (c & 0xFF) << 8; op += (d & 0xFF)
  28. #define OPCODE_SET_ONE8bit_SIGN_ONE17bit(op,code,a,s,n) op = (code & 0x3F) << 26; op += (a & 0xFF) << 18; op += (s & 0x01) << 17; op += (n & 0x1FFFF)
  29. #define OPCODE_SET_SIGN_ONE25bit(op,code,s,a) op = (code & 0x3F) << 26; op += (s & 0x01) << 25; op += (a & 0x1FFFFFF)
  30. #define OPCODE_SET_ONE8bit_ONE18bit(op,code,a,n) op = (code & 0x3F) << 26; op += (a & 0xFF) << 18; op += (n & 0x3FFFF)
  31. #define OPCODE_SET_ONE26bit(op,code,a) op = (code & 0x3F) << 26; op += (a & 0x3FFFFFF)
  32. #define OPCODE_SET_THREE8bit(op,code,a,b,c) OPCODE_SET_TWO8bit_ONE10bit(op,code,a,b,c)
  33. #define OPCODE_SET_ONE8bit_ONE10bit(op,code,a,b) OPCODE_SET_TWO8bit_ONE10bit(op,code,a,0,b)
  34. #define OPCODE_SET_ONE8bit(op,code,a) OPCODE_SET_TWO8bit_ONE10bit(op,code,a,0,0)
  35. #define OPCODE_SET_THREE8bit_ONE2bit(op,code,a,b,c,f) op =(code & 0x3F)<<26; op+=(a & 0xFF)<<18; op+=(b & 0xFF)<<10; op+=(c & 0xFF)<<2; op+=(f & 0x03)
  36. // MARK: -
  37. static bool hash_isequal (gravity_value_t v1, gravity_value_t v2) {
  38. return (v1.n == v2.n);
  39. }
  40. static uint32_t hash_compute (gravity_value_t v) {
  41. return gravity_hash_compute_int(v.n);
  42. }
  43. static void finalize_function (gravity_function_t *f, bool add_debug) {
  44. ircode_t *code = (ircode_t *)f->bytecode;
  45. uint32_t ninst = 0, count = ircode_count(code);
  46. uint32_t notpure = 0;
  47. uint32_t *bytecode = NULL;
  48. uint32_t *lineno = NULL;
  49. gravity_hash_t *labels = gravity_hash_create(0, hash_compute, hash_isequal, NULL, NULL);
  50. // determine how big bytecode buffer must be
  51. // and collect all LABEL instructions
  52. for (uint32_t i=0; i<count; ++i) {
  53. inst_t *inst = ircode_get(code, i);
  54. if (IS_SKIP(inst)) continue;
  55. if (IS_PRAGMA_MOVE_OPT(inst)) continue;
  56. if (IS_LABEL(inst)) {
  57. // insert key inst->p1 into hash table labels with value ninst (next instruction)
  58. gravity_hash_insert(labels, VALUE_FROM_INT(inst->p1), VALUE_FROM_INT(ninst));
  59. continue;
  60. }
  61. ++ninst;
  62. }
  63. // +1 is just a trick so the VM switch loop terminates with an implicit RET0 instruction (RET0 has opcode 0)
  64. f->ninsts = ninst;
  65. bytecode = (uint32_t *)mem_alloc(NULL, (ninst+1) * sizeof(uint32_t));
  66. if (add_debug) lineno = (uint32_t *)mem_alloc(NULL, (ninst+1) * sizeof(uint32_t));
  67. assert(bytecode);
  68. uint32_t j=0;
  69. for (uint32_t i=0; i<count; ++i) {
  70. inst_t *inst = ircode_get(code, i);
  71. if (IS_SKIP(inst)) continue;
  72. if (IS_LABEL(inst)) continue;
  73. if (IS_PRAGMA_MOVE_OPT(inst)) continue;
  74. uint32_t op = 0x0;
  75. switch (inst->op) {
  76. case HALT:
  77. case RET0:
  78. case NOP:
  79. OPCODE_SET(op, inst->op);
  80. break;
  81. case LOAD:
  82. case STORE:
  83. ++notpure; // not sure here
  84. case LOADS:
  85. case LOADAT:
  86. case STOREAT:
  87. case EQQ:
  88. case NEQQ:
  89. case ISA:
  90. case MATCH:
  91. case LSHIFT:
  92. case RSHIFT:
  93. case BOR:
  94. case BAND:
  95. case BNOT:
  96. case BXOR:
  97. case ADD:
  98. case SUB:
  99. case DIV:
  100. case MUL:
  101. case REM:
  102. case AND:
  103. case OR:
  104. case LT:
  105. case GT:
  106. case EQ:
  107. case LEQ:
  108. case GEQ:
  109. case NEQ:
  110. case NEG:
  111. case NOT:
  112. OPCODE_SET_TWO8bit_ONE10bit(op, inst->op, inst->p1, inst->p2, inst->p3);
  113. break;
  114. case LOADI:
  115. OPCODE_SET_ONE8bit_SIGN_ONE17bit(op, inst->op, inst->p1, (inst->n < 0) ? 1 : 0, inst->n);
  116. break;
  117. case JUMPF: {
  118. gravity_value_t *v = gravity_hash_lookup(labels, VALUE_FROM_INT(inst->p2));
  119. assert(v); // key MUST exists!
  120. uint32_t njump = (uint32_t)v->n;
  121. uint32_t bflag = inst->p3;
  122. OPCODE_SET_ONE8bit_SIGN_ONE17bit(op, inst->op, inst->p1, bflag, njump);
  123. //OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, njump);
  124. break;
  125. }
  126. case RET:
  127. OPCODE_SET_ONE8bit(op, inst->op, inst->p1);
  128. break;
  129. case JUMP: {
  130. gravity_value_t *v = gravity_hash_lookup(labels, VALUE_FROM_INT(inst->p1));
  131. assert(v); // key MUST exists!
  132. uint32_t njump = (uint32_t)v->n;
  133. OPCODE_SET_ONE26bit(op, inst->op, njump);
  134. break;
  135. }
  136. case LOADG:
  137. case STOREG:
  138. ++notpure;
  139. case MOVE:
  140. case LOADK:
  141. OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, inst->p2);
  142. break;
  143. case CALL:
  144. OPCODE_SET_TWO8bit_ONE10bit(op, inst->op, inst->p1, inst->p2, inst->p3);
  145. break;
  146. case SETLIST:
  147. OPCODE_SET_TWO8bit_ONE10bit(op, inst->op, inst->p1, inst->p2, inst->p3);
  148. break;
  149. case LOADU:
  150. case STOREU:
  151. ++notpure;
  152. OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, inst->p2);
  153. break;
  154. case RANGENEW: {
  155. uint8_t flag = (inst->tag == RANGE_INCLUDE_TAG) ? 0 : 1;
  156. OPCODE_SET_THREE8bit_ONE2bit(op, inst->op, inst->p1, inst->p2, inst->p3, flag);
  157. break;
  158. }
  159. case MAPNEW:
  160. case LISTNEW:
  161. OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, inst->p2);
  162. break;
  163. case SWITCH:
  164. assert(0);
  165. break;
  166. case CLOSURE:
  167. case CLOSE:
  168. OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, inst->p2);
  169. break;
  170. case CHECK:
  171. OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, inst->p2);
  172. break;
  173. case RESERVED2:
  174. case RESERVED3:
  175. case RESERVED4:
  176. case RESERVED5:
  177. case RESERVED6:
  178. assert(0);
  179. break;
  180. }
  181. // add debug information
  182. if (add_debug) lineno[j] = inst->lineno;
  183. // store encoded instruction
  184. bytecode[j++] = op;
  185. }
  186. ircode_free(code);
  187. gravity_hash_free(labels);
  188. f->bytecode = bytecode;
  189. f->lineno = lineno;
  190. f->purity = (notpure == 0) ? 1.0f : ((float)(notpure * 100) / (float)ninst) / 100.0f;
  191. }
  192. // MARK: -
  193. inline static bool pop1_instruction (ircode_t *code, uint32_t index, inst_t **inst1) {
  194. *inst1 = NULL;
  195. for (int32_t i=index-1; i>=0; --i) {
  196. inst_t *inst = ircode_get(code, i);
  197. if ((inst != NULL) && (inst->tag != SKIP_TAG)) {
  198. *inst1 = inst;
  199. return true;
  200. }
  201. }
  202. return false;
  203. }
  204. inline static bool pop2_instructions (ircode_t *code, uint32_t index, inst_t **inst1, inst_t **inst2) {
  205. *inst1 = NULL;
  206. *inst2 = NULL;
  207. for (int32_t i=index-1; i>=0; --i) {
  208. inst_t *inst = ircode_get(code, i);
  209. if ((inst != NULL) && (inst->tag != SKIP_TAG)) {
  210. if (*inst1 == NULL) *inst1 = inst;
  211. else if (*inst2 == NULL) {
  212. *inst2 = inst;
  213. return true;
  214. }
  215. }
  216. }
  217. return false;
  218. }
  219. inline static inst_t *current_instruction (ircode_t *code, uint32_t i) {
  220. while (1) {
  221. inst_t *inst = ircode_get(code, i);
  222. if (inst == NULL) return NULL;
  223. if (inst->tag != SKIP_TAG) return inst;
  224. ++i;
  225. }
  226. return NULL;
  227. }
  228. // MARK: -
  229. static bool optimize_const_instruction (inst_t *inst, inst_t *inst1, inst_t *inst2) {
  230. if (!inst2) return false;
  231. // select type algorithm:
  232. // two numeric types are supported here, int64 or double
  233. // if types are equals then set the first one
  234. // if types are not equals then set to double
  235. optag_t type;
  236. double d = 0.0, d1 = 0.0, d2 = 0.0;
  237. int64_t n = 0, n1 = 0, n2 = 0;
  238. // compute types
  239. if (inst1->tag == inst2->tag) type = inst1->tag;
  240. else type = DOUBLE_TAG;
  241. // check registers
  242. // code like:
  243. // var i = 13;
  244. // return 20 + i*100;
  245. // produces the following bytecode
  246. // 00000 LOADI 2 13
  247. // 00001 MOVE 1 2
  248. // 00002 LOADI 2 20
  249. // 00003 LOADI 3 100
  250. // 00004 MUL 3 1 3
  251. // 00005 ADD 2 2 3
  252. // inst points to a MATH instruction but registers are not the same as the LOADI instructions
  253. // so no optimizations must be performed
  254. if (!(inst->p2 == inst1->p1 && inst->p3 == inst2->p2)) return false;
  255. // compute operands
  256. if (type == DOUBLE_TAG) {
  257. d1 = (inst1->tag == INT_TAG) ? (double)inst1->n : inst1->d;
  258. d2 = (inst2->tag == INT_TAG) ? (double)inst2->n : inst2->d;
  259. } else {
  260. n1 = (inst1->tag == INT_TAG) ? inst1->n : (int64_t)inst1->d;
  261. n2 = (inst2->tag == INT_TAG) ? inst2->n : (int64_t)inst2->d;
  262. }
  263. // perform operation
  264. switch (inst->op) {
  265. case ADD:
  266. if (type == DOUBLE_TAG) d = d1 + d2;
  267. else n = n1 + n2;
  268. break;
  269. case SUB:
  270. if (type == DOUBLE_TAG) d = d1 - d2;
  271. else n = n1 - n2;
  272. break;
  273. case MUL:
  274. if (type == DOUBLE_TAG) d = d1 * d2;
  275. else n = n1 * n2;
  276. break;
  277. case DIV:
  278. // don't optimize in case of division by 0
  279. if ((int64_t)d2 == 0) return false;
  280. if (type == DOUBLE_TAG) d = d1 / d2;
  281. else n = n1 / n2;
  282. break;
  283. case REM:
  284. if ((int64_t)d2 == 0) return false;
  285. if (type == DOUBLE_TAG) d = (double)((int64_t)d1 % (int64_t)d2);
  286. else n = n1 % n2;
  287. break;
  288. default:
  289. assert(0);
  290. }
  291. // adjust IRCODE
  292. inst_setskip(inst1);
  293. inst_setskip(inst2);
  294. // convert an ADD instruction to a LOADI instruction
  295. // ADD A B C => R(A) = R(B) + R(C)
  296. // LOADI A B => R(A) = N
  297. inst->op = LOADI;
  298. inst->tag = type;
  299. inst->p2 = inst->p3 = 0;
  300. if (type == DOUBLE_TAG) inst->d = d;
  301. else inst->n = n;
  302. return true;
  303. }
  304. static bool optimize_neg_instruction (ircode_t *code, inst_t *inst, uint32_t i) {
  305. inst_t *inst1 = NULL;
  306. pop1_instruction(code, i, &inst1);
  307. if ((inst1 == NULL) || (inst1->op != LOADI) || (inst1->p1 != inst->p2)) return false;
  308. if (!ircode_register_istemp(code, inst1->p1)) return false;
  309. if (inst1->tag == INT_TAG) {
  310. int64_t n = inst1->n;
  311. if (n > 131072) return false;
  312. inst1->p1 = inst->p2;
  313. inst1->n = -(int64_t)n;
  314. } else if (inst1->tag == DOUBLE_TAG) {
  315. double d = inst1->d;
  316. inst1->p1 = inst->p2;
  317. inst1->d = -d;
  318. } else {
  319. return false;
  320. }
  321. inst_setskip(inst);
  322. return true;
  323. }
  324. static bool optimize_math_instruction (ircode_t *code, inst_t *inst, uint32_t i) {
  325. uint8_t count = opcode_numop(inst->op) - 1;
  326. inst_t *inst1 = NULL, *inst2 = NULL;
  327. bool flag = false;
  328. if (count == 2) {
  329. pop2_instructions(code, i, &inst2, &inst1);
  330. if (IS_NUM(inst1) && IS_NUM(inst2)) flag = optimize_const_instruction(inst, inst1, inst2);
  331. // process inst2
  332. if (IS_MOVE(inst2)) {
  333. bool b1 = ircode_register_istemp(code, inst->p3);
  334. bool b2 = ((inst2) && ircode_register_istemp(code, inst2->p1));
  335. if ((b1) && (b2) && (inst->p3 == inst2->p1)) {
  336. inst->p3 = inst2->p2;
  337. inst_setskip(inst2);
  338. flag = true;
  339. }
  340. }
  341. // process inst1
  342. if (IS_MOVE(inst1)) {
  343. bool b1 = ircode_register_istemp(code, inst->p2);
  344. bool b2 = ((inst1) && ircode_register_istemp(code, inst1->p1));
  345. if ((b1) && (b2) && (inst->p2 == inst1->p1)) {
  346. inst->p2 = inst1->p2;
  347. inst_setskip(inst1);
  348. flag = true;
  349. }
  350. }
  351. }
  352. else {
  353. pop1_instruction(code, i, &inst1);
  354. if (IS_NUM(inst1)) flag = optimize_const_instruction(inst, inst1, NULL);
  355. }
  356. return flag;
  357. }
  358. static bool optimize_move_instruction (ircode_t *code, inst_t *inst, uint32_t i) {
  359. inst_t *inst1 = NULL;
  360. pop1_instruction(code, i, &inst1);
  361. if (inst1 == NULL) return false;
  362. if ((inst1->op != LOADI) && (inst1->op != LOADG) && (inst1->op != LOADK)) return false;
  363. bool b1 = ircode_register_istemp(code, inst->p2);
  364. bool b2 = ((inst1) && ircode_register_istemp(code, inst1->p1));
  365. if ((b1) && (b2) && (inst->p2 == inst1->p1)) {
  366. inst1->p1 = inst->p1;
  367. inst_setskip(inst);
  368. return true;
  369. }
  370. return false;
  371. }
  372. static bool optimize_return_instruction (ircode_t *code, inst_t *inst, uint32_t i) {
  373. inst_t *inst1 = NULL;
  374. pop1_instruction(code, i, &inst1);
  375. if (!ircode_register_istemp(code, inst->p1)) return false;
  376. if ((IS_MOVE(inst1)) && (inst->p1 == inst1->p1)) {
  377. inst->p1 = inst1->p2;
  378. inst_setskip(inst1);
  379. return true;
  380. }
  381. return false;
  382. }
  383. static bool optimize_num_instruction (inst_t *inst, gravity_function_t *f) {
  384. // double values always added to constant pool
  385. bool add_cpool = (inst->tag == DOUBLE_TAG);
  386. // LOADI is a 32bit instruction
  387. // 32 - 6 (OPCODE) - 8 (register) - 1 bit sign = 17
  388. // range is from MAX_INLINE_INT-1 to MAX_INLINE_INT
  389. // so max/min values are in the range -(2^17)-1/+2^17
  390. // 2^17 = 131072 (MAX_INLINE_INT)
  391. if (inst->tag == INT_TAG) {
  392. int64_t n = inst->n;
  393. add_cpool = ((n < -MAX_INLINE_INT + 1) || (n > MAX_INLINE_INT));
  394. }
  395. if (add_cpool) {
  396. uint16_t index = 0;
  397. if (inst->tag == INT_TAG) {
  398. int64_t n = inst->n;
  399. index = gravity_function_cpool_add(NULL, f, VALUE_FROM_INT(n));
  400. } else {
  401. // always add floating point values as double in constant pool (then VM will be configured to interpret it as float or double)
  402. index = gravity_function_cpool_add(NULL, f, VALUE_FROM_FLOAT(inst->d));
  403. }
  404. // replace LOADI with a LOADK instruction
  405. inst->op = LOADK;
  406. inst->p2 = index;
  407. inst->tag = NO_TAG;
  408. }
  409. return true;
  410. }
  411. // MARK: -
  412. gravity_function_t *gravity_optimizer(gravity_function_t *f, bool add_debug) {
  413. if (f->bytecode == NULL) return f;
  414. ircode_t *code = (ircode_t *)f->bytecode;
  415. uint32_t count = ircode_count(code);
  416. bool optimizer = true;
  417. f->ntemps = ircode_ntemps(code);
  418. loop_neg:
  419. for (uint32_t i=0; i<count; ++i) {
  420. inst_t *inst = current_instruction(code, i);
  421. if (IS_NEG(inst)) {
  422. bool b = optimize_neg_instruction (code, inst, i);
  423. if (b) goto loop_neg;
  424. }
  425. }
  426. loop_math:
  427. for (uint32_t i=0; i<count; ++i) {
  428. inst_t *inst = current_instruction(code, i);
  429. if (IS_MATH(inst)) {
  430. bool b = optimize_math_instruction (code, inst, i);
  431. if (b) goto loop_math;
  432. }
  433. }
  434. loop_move:
  435. optimizer = true;
  436. for (uint32_t i=0; i<count; ++i) {
  437. inst_t *inst = current_instruction(code, i);
  438. if (IS_PRAGMA_MOVE_OPT(inst)) optimizer = (bool)inst->p1;
  439. if (optimizer && IS_MOVE(inst)) {
  440. bool b = optimize_move_instruction (code, inst, i);
  441. if (b) goto loop_move;
  442. }
  443. }
  444. loop_ret:
  445. for (uint32_t i=0; i<count; ++i) {
  446. inst_t *inst = current_instruction(code, i);
  447. if (IS_RET(inst)) {
  448. bool b = optimize_return_instruction (code, inst, i);
  449. if (b) goto loop_ret;
  450. }
  451. }
  452. for (uint32_t i=0; i<count; ++i) {
  453. inst_t *inst = current_instruction(code, i);
  454. if (IS_NUM(inst)) optimize_num_instruction (inst, f);
  455. }
  456. // dump optimized version
  457. #if GRAVITY_BYTECODE_DEBUG
  458. gravity_function_dump(f, ircode_dump);
  459. #endif
  460. // finalize function
  461. finalize_function(f, add_debug);
  462. return f;
  463. }