// // gravity_optimizer.c // gravity // // Created by Marco Bambini on 24/09/14. // Copyright (c) 2014 CreoLabs. All rights reserved. // // Some optimizations taken from: http://www.compileroptimizations.com/ #include "gravity_hash.h" #include "gravity_optimizer.h" #include "gravity_opcodes.h" #include "gravity_ircode.h" #include "gravity_utils.h" #include "gravity_value.h" #define IS_MOVE(inst) ((inst) && (inst->op == MOVE)) #define IS_RET(inst) ((inst) && (inst->op == RET)) #define IS_NEG(inst) ((inst) && (inst->op == NEG)) #define IS_NUM(inst) ((inst) && (inst->op == LOADI)) #define IS_MATH(inst) ((inst) && (inst->op >= ADD) && (inst->op <= REM)) #define IS_SKIP(inst) (inst->tag == SKIP_TAG) #define IS_LABEL(inst) (inst->tag == LABEL_TAG) #define IS_NOTNULL(inst) (inst) #define IS_PRAGMA_MOVE_OPT(inst) ((inst) && (inst->tag == PRAGMA_MOVE_OPTIMIZATION)) // http://www.mathsisfun.com/binary-decimal-hexadecimal-converter.html #define OPCODE_SET(op,code) op = (code & 0x3F) << 26 #define OPCODE_SET_TWO8bit_ONE10bit(op,code,a,b,c) op = (code & 0x3F) << 26; op += (a & 0xFF) << 18; op += (b & 0xFF) << 10; op += (c & 0x3FF) #define OPCODE_SET_FOUR8bit(op,a,b,c,d) op = (a & 0xFF) << 24; op += (b & 0xFF) << 16; op += (c & 0xFF) << 8; op += (d & 0xFF) #define OPCODE_SET_ONE8bit_SIGN_ONE17bit(op,code,a,s,n) op = (code & 0x3F) << 26; op += (a & 0xFF) << 18; op += (s & 0x01) << 17; op += (n & 0x1FFFF) #define OPCODE_SET_SIGN_ONE25bit(op,code,s,a) op = (code & 0x3F) << 26; op += (s & 0x01) << 25; op += (a & 0x1FFFFFF) #define OPCODE_SET_ONE8bit_ONE18bit(op,code,a,n) op = (code & 0x3F) << 26; op += (a & 0xFF) << 18; op += (n & 0x3FFFF) #define OPCODE_SET_ONE26bit(op,code,a) op = (code & 0x3F) << 26; op += (a & 0x3FFFFFF) #define OPCODE_SET_THREE8bit(op,code,a,b,c) OPCODE_SET_TWO8bit_ONE10bit(op,code,a,b,c) #define OPCODE_SET_ONE8bit_ONE10bit(op,code,a,b) OPCODE_SET_TWO8bit_ONE10bit(op,code,a,0,b) #define OPCODE_SET_ONE8bit(op,code,a) OPCODE_SET_TWO8bit_ONE10bit(op,code,a,0,0) #define OPCODE_SET_THREE8bit_ONE2bit(op,code,a,b,c,f) op =(code & 0x3F)<<26; op+=(a & 0xFF)<<18; op+=(b & 0xFF)<<10; op+=(c & 0xFF)<<2; op+=(f & 0x03) // MARK: - static bool hash_isequal (gravity_value_t v1, gravity_value_t v2) { return (v1.n == v2.n); } static uint32_t hash_compute (gravity_value_t v) { return gravity_hash_compute_int(v.n); } static void finalize_function (gravity_function_t *f, bool add_debug) { ircode_t *code = (ircode_t *)f->bytecode; uint32_t ninst = 0, count = ircode_count(code); uint32_t notpure = 0; uint32_t *bytecode = NULL; uint32_t *lineno = NULL; gravity_hash_t *labels = gravity_hash_create(0, hash_compute, hash_isequal, NULL, NULL); // determine how big bytecode buffer must be // and collect all LABEL instructions for (uint32_t i=0; ip1 into hash table labels with value ninst (next instruction) gravity_hash_insert(labels, VALUE_FROM_INT(inst->p1), VALUE_FROM_INT(ninst)); continue; } ++ninst; } // +1 is just a trick so the VM switch loop terminates with an implicit RET0 instruction (RET0 has opcode 0) f->ninsts = ninst; bytecode = (uint32_t *)mem_alloc(NULL, (ninst+1) * sizeof(uint32_t)); if (add_debug) lineno = (uint32_t *)mem_alloc(NULL, (ninst+1) * sizeof(uint32_t)); assert(bytecode); uint32_t j=0; for (uint32_t i=0; iop) { case HALT: case RET0: case NOP: OPCODE_SET(op, inst->op); break; case LOAD: case STORE: ++notpure; // not sure here case LOADS: case LOADAT: case STOREAT: case EQQ: case NEQQ: case ISA: case MATCH: case LSHIFT: case RSHIFT: case BOR: case BAND: case BNOT: case BXOR: case ADD: case SUB: case DIV: case MUL: case REM: case AND: case OR: case LT: case GT: case EQ: case LEQ: case GEQ: case NEQ: case NEG: case NOT: OPCODE_SET_TWO8bit_ONE10bit(op, inst->op, inst->p1, inst->p2, inst->p3); break; case LOADI: OPCODE_SET_ONE8bit_SIGN_ONE17bit(op, inst->op, inst->p1, (inst->n < 0) ? 1 : 0, inst->n); break; case JUMPF: { gravity_value_t *v = gravity_hash_lookup(labels, VALUE_FROM_INT(inst->p2)); assert(v); // key MUST exists! uint32_t njump = (uint32_t)v->n; uint32_t bflag = inst->p3; OPCODE_SET_ONE8bit_SIGN_ONE17bit(op, inst->op, inst->p1, bflag, njump); //OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, njump); break; } case RET: OPCODE_SET_ONE8bit(op, inst->op, inst->p1); break; case JUMP: { gravity_value_t *v = gravity_hash_lookup(labels, VALUE_FROM_INT(inst->p1)); assert(v); // key MUST exists! uint32_t njump = (uint32_t)v->n; OPCODE_SET_ONE26bit(op, inst->op, njump); break; } case LOADG: case STOREG: ++notpure; case MOVE: case LOADK: OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, inst->p2); break; case CALL: OPCODE_SET_TWO8bit_ONE10bit(op, inst->op, inst->p1, inst->p2, inst->p3); break; case SETLIST: OPCODE_SET_TWO8bit_ONE10bit(op, inst->op, inst->p1, inst->p2, inst->p3); break; case LOADU: case STOREU: ++notpure; OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, inst->p2); break; case RANGENEW: { uint8_t flag = (inst->tag == RANGE_INCLUDE_TAG) ? 0 : 1; OPCODE_SET_THREE8bit_ONE2bit(op, inst->op, inst->p1, inst->p2, inst->p3, flag); break; } case MAPNEW: case LISTNEW: OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, inst->p2); break; case SWITCH: assert(0); break; case CLOSURE: case CLOSE: OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, inst->p2); break; case CHECK: OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, inst->p2); break; case RESERVED2: case RESERVED3: case RESERVED4: case RESERVED5: case RESERVED6: assert(0); break; } // add debug information if (add_debug) lineno[j] = inst->lineno; // store encoded instruction bytecode[j++] = op; } ircode_free(code); gravity_hash_free(labels); f->bytecode = bytecode; f->lineno = lineno; f->purity = (notpure == 0) ? 1.0f : ((float)(notpure * 100) / (float)ninst) / 100.0f; } // MARK: - inline static bool pop1_instruction (ircode_t *code, uint32_t index, inst_t **inst1) { *inst1 = NULL; for (int32_t i=index-1; i>=0; --i) { inst_t *inst = ircode_get(code, i); if ((inst != NULL) && (inst->tag != SKIP_TAG)) { *inst1 = inst; return true; } } return false; } inline static bool pop2_instructions (ircode_t *code, uint32_t index, inst_t **inst1, inst_t **inst2) { *inst1 = NULL; *inst2 = NULL; for (int32_t i=index-1; i>=0; --i) { inst_t *inst = ircode_get(code, i); if ((inst != NULL) && (inst->tag != SKIP_TAG)) { if (*inst1 == NULL) *inst1 = inst; else if (*inst2 == NULL) { *inst2 = inst; return true; } } } return false; } inline static inst_t *current_instruction (ircode_t *code, uint32_t i) { while (1) { inst_t *inst = ircode_get(code, i); if (inst == NULL) return NULL; if (inst->tag != SKIP_TAG) return inst; ++i; } return NULL; } // MARK: - static bool optimize_const_instruction (inst_t *inst, inst_t *inst1, inst_t *inst2) { if (!inst2) return false; // select type algorithm: // two numeric types are supported here, int64 or double // if types are equals then set the first one // if types are not equals then set to double optag_t type; double d = 0.0, d1 = 0.0, d2 = 0.0; int64_t n = 0, n1 = 0, n2 = 0; // compute types if (inst1->tag == inst2->tag) type = inst1->tag; else type = DOUBLE_TAG; // check registers // code like: // var i = 13; // return 20 + i*100; // produces the following bytecode // 00000 LOADI 2 13 // 00001 MOVE 1 2 // 00002 LOADI 2 20 // 00003 LOADI 3 100 // 00004 MUL 3 1 3 // 00005 ADD 2 2 3 // inst points to a MATH instruction but registers are not the same as the LOADI instructions // so no optimizations must be performed if (!(inst->p2 == inst1->p1 && inst->p3 == inst2->p2)) return false; // compute operands if (type == DOUBLE_TAG) { d1 = (inst1->tag == INT_TAG) ? (double)inst1->n : inst1->d; d2 = (inst2->tag == INT_TAG) ? (double)inst2->n : inst2->d; } else { n1 = (inst1->tag == INT_TAG) ? inst1->n : (int64_t)inst1->d; n2 = (inst2->tag == INT_TAG) ? inst2->n : (int64_t)inst2->d; } // perform operation switch (inst->op) { case ADD: if (type == DOUBLE_TAG) d = d1 + d2; else n = n1 + n2; break; case SUB: if (type == DOUBLE_TAG) d = d1 - d2; else n = n1 - n2; break; case MUL: if (type == DOUBLE_TAG) d = d1 * d2; else n = n1 * n2; break; case DIV: // don't optimize in case of division by 0 if ((int64_t)d2 == 0) return false; if (type == DOUBLE_TAG) d = d1 / d2; else n = n1 / n2; break; case REM: if ((int64_t)d2 == 0) return false; if (type == DOUBLE_TAG) d = (double)((int64_t)d1 % (int64_t)d2); else n = n1 % n2; break; default: assert(0); } // adjust IRCODE inst_setskip(inst1); inst_setskip(inst2); // convert an ADD instruction to a LOADI instruction // ADD A B C => R(A) = R(B) + R(C) // LOADI A B => R(A) = N inst->op = LOADI; inst->tag = type; inst->p2 = inst->p3 = 0; if (type == DOUBLE_TAG) inst->d = d; else inst->n = n; return true; } static bool optimize_neg_instruction (ircode_t *code, inst_t *inst, uint32_t i) { inst_t *inst1 = NULL; pop1_instruction(code, i, &inst1); if ((inst1 == NULL) || (inst1->op != LOADI) || (inst1->p1 != inst->p2)) return false; if (!ircode_register_istemp(code, inst1->p1)) return false; if (inst1->tag == INT_TAG) { int64_t n = inst1->n; if (n > 131072) return false; inst1->p1 = inst->p2; inst1->n = -(int64_t)n; } else if (inst1->tag == DOUBLE_TAG) { double d = inst1->d; inst1->p1 = inst->p2; inst1->d = -d; } else { return false; } inst_setskip(inst); return true; } static bool optimize_math_instruction (ircode_t *code, inst_t *inst, uint32_t i) { uint8_t count = opcode_numop(inst->op) - 1; inst_t *inst1 = NULL, *inst2 = NULL; bool flag = false; if (count == 2) { pop2_instructions(code, i, &inst2, &inst1); if (IS_NUM(inst1) && IS_NUM(inst2)) flag = optimize_const_instruction(inst, inst1, inst2); // process inst2 if (IS_MOVE(inst2)) { bool b1 = ircode_register_istemp(code, inst->p3); bool b2 = ((inst2) && ircode_register_istemp(code, inst2->p1)); if ((b1) && (b2) && (inst->p3 == inst2->p1)) { inst->p3 = inst2->p2; inst_setskip(inst2); flag = true; } } // process inst1 if (IS_MOVE(inst1)) { bool b1 = ircode_register_istemp(code, inst->p2); bool b2 = ((inst1) && ircode_register_istemp(code, inst1->p1)); if ((b1) && (b2) && (inst->p2 == inst1->p1)) { inst->p2 = inst1->p2; inst_setskip(inst1); flag = true; } } } else { pop1_instruction(code, i, &inst1); if (IS_NUM(inst1)) flag = optimize_const_instruction(inst, inst1, NULL); } return flag; } static bool optimize_move_instruction (ircode_t *code, inst_t *inst, uint32_t i) { inst_t *inst1 = NULL; pop1_instruction(code, i, &inst1); if (inst1 == NULL) return false; if ((inst1->op != LOADI) && (inst1->op != LOADG) && (inst1->op != LOADK)) return false; bool b1 = ircode_register_istemp(code, inst->p2); bool b2 = ((inst1) && ircode_register_istemp(code, inst1->p1)); if ((b1) && (b2) && (inst->p2 == inst1->p1)) { inst1->p1 = inst->p1; inst_setskip(inst); return true; } return false; } static bool optimize_return_instruction (ircode_t *code, inst_t *inst, uint32_t i) { inst_t *inst1 = NULL; pop1_instruction(code, i, &inst1); if (!ircode_register_istemp(code, inst->p1)) return false; if ((IS_MOVE(inst1)) && (inst->p1 == inst1->p1)) { inst->p1 = inst1->p2; inst_setskip(inst1); return true; } return false; } static bool optimize_num_instruction (inst_t *inst, gravity_function_t *f) { // double values always added to constant pool bool add_cpool = (inst->tag == DOUBLE_TAG); // LOADI is a 32bit instruction // 32 - 6 (OPCODE) - 8 (register) - 1 bit sign = 17 // range is from MAX_INLINE_INT-1 to MAX_INLINE_INT // so max/min values are in the range -(2^17)-1/+2^17 // 2^17 = 131072 (MAX_INLINE_INT) if (inst->tag == INT_TAG) { int64_t n = inst->n; add_cpool = ((n < -MAX_INLINE_INT + 1) || (n > MAX_INLINE_INT)); } if (add_cpool) { uint16_t index = 0; if (inst->tag == INT_TAG) { int64_t n = inst->n; index = gravity_function_cpool_add(NULL, f, VALUE_FROM_INT(n)); } else { // always add floating point values as double in constant pool (then VM will be configured to interpret it as float or double) index = gravity_function_cpool_add(NULL, f, VALUE_FROM_FLOAT(inst->d)); } // replace LOADI with a LOADK instruction inst->op = LOADK; inst->p2 = index; inst->tag = NO_TAG; } return true; } // MARK: - gravity_function_t *gravity_optimizer(gravity_function_t *f, bool add_debug) { if (f->bytecode == NULL) return f; ircode_t *code = (ircode_t *)f->bytecode; uint32_t count = ircode_count(code); bool optimizer = true; f->ntemps = ircode_ntemps(code); loop_neg: for (uint32_t i=0; ip1; if (optimizer && IS_MOVE(inst)) { bool b = optimize_move_instruction (code, inst, i); if (b) goto loop_move; } } loop_ret: for (uint32_t i=0; i