#include "gravity_hash.h"
#include "gravity_optimizer.h"
#include "gravity_opcodes.h"
#include "gravity_ircode.h"
#include "gravity_utils.h"
#include "gravity_value.h"
#define IS_MOVE(inst) ((inst) && (inst->op == MOVE))
#define IS_RET(inst) ((inst) && (inst->op == RET))
#define IS_NEG(inst) ((inst) && (inst->op == NEG))
#define IS_NUM(inst) ((inst) && (inst->op == LOADI))
#define IS_MATH(inst) ((inst) && (inst->op >= ADD) && (inst->op <= REM))
#define IS_SKIP(inst) (inst->tag == SKIP_TAG)
#define IS_LABEL(inst) (inst->tag == LABEL_TAG)
#define IS_NOTNULL(inst) (inst)
#define OPCODE_SET(op,code) op = (code & 0x3F) << 26
#define OPCODE_SET_TWO8bit_ONE10bit(op,code,a,b,c) op = (code & 0x3F) << 26; op += (a & 0xFF) << 18; op += (b & 0xFF) << 10; op += (c & 0x3FF)
#define OPCODE_SET_FOUR8bit(op,a,b,c,d) op = (a & 0xFF) << 24; op += (b & 0xFF) << 16; op += (c & 0xFF) << 8; op += (d & 0xFF)
#define OPCODE_SET_ONE8bit_SIGN_ONE17bit(op,code,a,s,n) op = (code & 0x3F) << 26; op += (a & 0xFF) << 18; op += (s & 0x01) << 17; op += (n & 0x1FFFF)
#define OPCODE_SET_SIGN_ONE25bit(op,code,s,a) op = (code & 0x3F) << 26; op += (s & 0x01) << 25; op += (a & 0x1FFFFFF)
#define OPCODE_SET_ONE8bit_ONE18bit(op,code,a,n) op = (code & 0x3F) << 26; op += (a & 0xFF) << 18; op += (n & 0x3FFFF)
#define OPCODE_SET_ONE26bit(op,code,a) op = (code & 0x3F) << 26; op += (a & 0x3FFFFFF)
#define OPCODE_SET_THREE8bit(op,code,a,b,c) OPCODE_SET_TWO8bit_ONE10bit(op,code,a,b,c)
#define OPCODE_SET_ONE8bit_ONE10bit(op,code,a,b) OPCODE_SET_TWO8bit_ONE10bit(op,code,a,0,b)
#define OPCODE_SET_ONE8bit(op,code,a) OPCODE_SET_TWO8bit_ONE10bit(op,code,a,0,0)
#define OPCODE_SET_THREE8bit_ONE2bit(op,code,a,b,c,f) op =(code & 0x3F)<<26; op+=(a & 0xFF)<<18; op+=(b & 0xFF)<<10; op+=(c & 0xFF)<<2; op+=(f & 0x03)
static bool hash_isequal (gravity_value_t v1, gravity_value_t v2) {
return (v1.n == v2.n);
}
static uint32_t hash_compute (gravity_value_t v) {
return gravity_hash_compute_int(v.n);
}
static void finalize_function (gravity_function_t *f) {
ircode_t *code = (ircode_t *)f->bytecode;
uint32_t ninst = 0, count = ircode_count(code);
uint32_t notpure = 0;
uint32_t *bytecode = NULL;
gravity_hash_t *labels = gravity_hash_create(0, hash_compute, hash_isequal, NULL, NULL);
for (uint32_t i=0; i<count; ++i) {
inst_t *inst = ircode_get(code, i);
if (IS_SKIP(inst)) continue;
if (IS_LABEL(inst)) {
gravity_hash_insert(labels, VALUE_FROM_INT(inst->p1), VALUE_FROM_INT(ninst));
continue;
}
++ninst;
}
f->ninsts = ninst;
bytecode = (uint32_t *)mem_alloc((ninst+1) * sizeof(uint32_t));
assert(bytecode);
uint32_t j=0;
for (uint32_t i=0; i<count; ++i) {
inst_t *inst = ircode_get(code, i);
if (IS_SKIP(inst)) continue;
if (IS_LABEL(inst)) continue;
uint32_t op = 0x0;
switch (inst->op) {
case HALT:
case RET0:
case NOP:
OPCODE_SET(op, inst->op);
break;
case LOAD:
case STORE:
++notpure;
case LOADS:
case LOADAT:
case STOREAT:
case EQQ:
case NEQQ:
case ISA:
case MATCH:
case LSHIFT:
case RSHIFT:
case BOR:
case BAND:
case BNOT:
case BXOR:
case ADD:
case SUB:
case DIV:
case MUL:
case REM:
case AND:
case OR:
case LT:
case GT:
case EQ:
case LEQ:
case GEQ:
case NEQ:
case NEG:
case NOT:
OPCODE_SET_TWO8bit_ONE10bit(op, inst->op, inst->p1, inst->p2, inst->p3);
break;
case LOADI:
OPCODE_SET_ONE8bit_SIGN_ONE17bit(op, inst->op, inst->p1, (inst->n < 0) ? 1 : 0, inst->n);
break;
case JUMPF: {
gravity_value_t *v = gravity_hash_lookup(labels, VALUE_FROM_INT(inst->p2));
assert(v);
uint32_t njump = (uint32_t)v->n;
uint32_t bflag = inst->p3;
OPCODE_SET_ONE8bit_SIGN_ONE17bit(op, inst->op, inst->p1, bflag, njump);
break;
}
case RET:
OPCODE_SET_ONE8bit(op, inst->op, inst->p1);
break;
case JUMP: {
gravity_value_t *v = gravity_hash_lookup(labels, VALUE_FROM_INT(inst->p1));
assert(v);
uint32_t njump = (uint32_t)v->n;
OPCODE_SET_ONE26bit(op, inst->op, njump);
break;
}
case LOADG:
case STOREG:
++notpure;
case MOVE:
case LOADK:
OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, inst->p2);
break;
case CALL:
OPCODE_SET_TWO8bit_ONE10bit(op, inst->op, inst->p1, inst->p2, inst->p3);
break;
case SETLIST:
OPCODE_SET_TWO8bit_ONE10bit(op, inst->op, inst->p1, inst->p2, inst->p3);
break;
case LOADU:
case STOREU:
++notpure;
OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, inst->p2);
break;
case RANGENEW: {
uint8_t flag = (inst->tag == RANGE_INCLUDE_TAG) ? 0 : 1;
OPCODE_SET_THREE8bit_ONE2bit(op, inst->op, inst->p1, inst->p2, inst->p3, flag);
break;
}
case MAPNEW:
case LISTNEW:
OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, inst->p2);
break;
case SWITCH:
assert(0);
break;
case CLOSURE:
case CLOSE:
OPCODE_SET_ONE8bit_ONE18bit(op, inst->op, inst->p1, inst->p2);
break;
case RESERVED1:
case RESERVED2:
case RESERVED3:
case RESERVED4:
case RESERVED5:
case RESERVED6:
assert(0);
break;
}
bytecode[j++] = op;
}
ircode_free(code);
gravity_hash_free(labels);
f->bytecode = bytecode;
f->purity = (notpure == 0) ? 1.0 : ((float)(notpure * 100) / (float)ninst) / 100.0f;
}
inline static bool pop1_instruction (ircode_t *code, uint32_t index, inst_t **inst1) {
*inst1 = NULL;
for (int32_t i=index-1; i>=0; --i) {
inst_t *inst = ircode_get(code, i);
if ((inst != NULL) && (inst->tag != SKIP_TAG)) {
*inst1 = inst;
return true;
}
}
return false;
}
inline static bool pop2_instructions (ircode_t *code, uint32_t index, inst_t **inst1, inst_t **inst2) {
*inst1 = NULL;
*inst2 = NULL;
for (int32_t i=index-1; i>=0; --i) {
inst_t *inst = ircode_get(code, i);
if ((inst != NULL) && (inst->tag != SKIP_TAG)) {
if (*inst1 == NULL) *inst1 = inst;
else if (*inst2 == NULL) {
*inst2 = inst;
return true;
}
}
}
return false;
}
inline static inst_t *current_instruction (ircode_t *code, uint32_t i) {
while (1) {
inst_t *inst = ircode_get(code, i);
if (inst == NULL) return NULL;
if (inst->tag != SKIP_TAG) return inst;
++i;
}
return NULL;
}
static bool optimize_const_instruction (inst_t *inst, inst_t *inst1, inst_t *inst2) {
optag_t type;
double d = 0.0, d1 = 0.0, d2 = 0.0;
int64_t n = 0, n1 = 0, n2 = 0;
if (inst1->tag == inst2->tag) type = inst1->tag;
else type = DOUBLE_TAG;
if (type == DOUBLE_TAG) {
d1 = (inst1->tag == INT_TAG) ? (double)inst1->n : inst1->d;
d2 = (inst2->tag == INT_TAG) ? (double)inst2->n : inst2->d;
} else {
n1 = (inst1->tag == INT_TAG) ? inst1->n : (int64_t)inst1->d;
n2 = (inst2->tag == INT_TAG) ? inst2->n : (int64_t)inst2->d;
}
switch (inst->op) {
case ADD:
if (type == DOUBLE_TAG) d = d1 + d2;
else n = n1 + n2;
break;
case SUB:
if (type == DOUBLE_TAG) d = d1 - d2;
else n = n1 - n2;
break;
case MUL:
if (type == DOUBLE_TAG) d = d1 * d2;
else n = n1 * n2;
break;
case DIV:
if (d2 == 0) return false;
if (type == DOUBLE_TAG) d = d1 / d2;
else n = n1 / n2;
break;
case REM:
if (type == DOUBLE_TAG) d = (int64_t)d1 % (int64_t)d2;
else n = n1 % n2;
break;
default:
assert(0);
}
inst_setskip(inst1);
inst_setskip(inst2);
inst->op = LOADI;
inst->tag = type;
inst->p2 = inst->p3 = 0;
if (type == DOUBLE_TAG) inst->d = d;
else inst->n = n;
return true;
}
static bool optimize_neg_instruction (ircode_t *code, inst_t *inst, uint32_t i) {
inst_t *inst1 = NULL;
pop1_instruction(code, i, &inst1);
if (inst1 == NULL) return false;
if (inst1->op != LOADI) return false;
if (inst1->p1 != inst->p2) return false;
if (!ircode_register_istemp(code, inst1->p1)) return false;
uint64_t n = inst1->n;
if (n>131072) return false;
inst1->p1 = inst->p2;
inst1->n = -n;
inst_setskip(inst);
return true;
}
static bool optimize_math_instruction (ircode_t *code, inst_t *inst, uint32_t i) {
uint8_t count = opcode_numop(inst->op) - 1;
inst_t *inst1 = NULL, *inst2 = NULL;
bool flag = false;
if (count == 2) {
pop2_instructions(code, i, &inst2, &inst1);
if (IS_NUM(inst1) && IS_NUM(inst2)) flag = optimize_const_instruction(inst, inst1, inst2);
if (IS_MOVE(inst2)) {
bool b1 = ircode_register_istemp(code, inst->p3);
bool b2 = ((inst2) && ircode_register_istemp(code, inst2->p1));
if ((b1) && (b2) && (inst->p3 == inst2->p1)) {
inst->p3 = inst2->p2;
inst_setskip(inst2);
flag = true;
}
}
if (IS_MOVE(inst1)) {
bool b1 = ircode_register_istemp(code, inst->p2);
bool b2 = ((inst1) && ircode_register_istemp(code, inst1->p1));
if ((b1) && (b2) && (inst->p2 == inst1->p1)) {
inst->p2 = inst1->p2;
inst_setskip(inst1);
flag = true;
}
}
}
else {
pop1_instruction(code, i, &inst1);
if (IS_NUM(inst1)) flag = optimize_const_instruction(inst, inst1, NULL);
}
return flag;
}
static bool optimize_move_instruction (ircode_t *code, inst_t *inst, uint32_t i) {
return false;
inst_t *inst1 = NULL;
pop1_instruction(code, i, &inst1);
if (inst1 == NULL) return false;
if ((inst1->op != LOADI) && (inst1->op != LOADG) && (inst1->op != LOADK)) return false;
bool b1 = ircode_register_istemp(code, inst->p2);
bool b2 = ((inst1) && ircode_register_istemp(code, inst1->p1));
if ((b1) && (b2) && (inst->p2 == inst1->p1)) {
inst1->p1 = inst->p1;
inst_setskip(inst);
return true;
}
return false;
}
static bool optimize_return_instruction (ircode_t *code, inst_t *inst, uint32_t i) {
inst_t *inst1 = NULL;
pop1_instruction(code, i, &inst1);
if (!ircode_register_istemp(code, inst->p1)) return false;
if ((IS_MOVE(inst1)) && (inst->p1 == inst1->p1)) {
inst->p1 = inst1->p2;
inst_setskip(inst1);
return true;
}
return false;
}
static bool optimize_num_instruction (inst_t *inst, gravity_function_t *f) {
bool add_cpool = (inst->tag == DOUBLE_TAG);
if (inst->tag == INT_TAG) {
int64_t n = inst->n;
add_cpool = ((n < -MAX_INLINE_INT + 1) || (n > MAX_INLINE_INT));
}
if (add_cpool) {
uint16_t index = 0;
if (inst->tag == INT_TAG) {
int64_t n = inst->n;
index = gravity_function_cpool_add(NULL, f, VALUE_FROM_INT(n));
} else {
index = gravity_function_cpool_add(NULL, f, VALUE_FROM_FLOAT(inst->d));
}
inst->op = LOADK;
inst->p2 = index;
inst->tag = NO_TAG;
}
return true;
}
gravity_function_t *gravity_optimizer(gravity_function_t *f) {
if (f->bytecode == NULL) return f;
ircode_t *code = (ircode_t *)f->bytecode;
uint32_t count = ircode_count(code);
f->ntemps = ircode_ntemps(code);
loop_neg:
for (uint32_t i=0; i<count; ++i) {
inst_t *inst = current_instruction(code, i);
if (IS_NEG(inst)) {
bool b = optimize_neg_instruction (code, inst, i);
if (b) goto loop_neg;
}
}
loop_math:
for (uint32_t i=0; i<count; ++i) {
inst_t *inst = current_instruction(code, i);
if (IS_MATH(inst)) {
bool b = optimize_math_instruction (code, inst, i);
if (b) goto loop_math;
}
}
loop_move:
for (uint32_t i=0; i<count; ++i) {
inst_t *inst = current_instruction(code, i);
if (IS_MOVE(inst)) {
bool b = optimize_move_instruction (code, inst, i);
if (b) goto loop_move;
}
}
loop_ret:
for (uint32_t i=0; i<count; ++i) {
inst_t *inst = current_instruction(code, i);
if (IS_RET(inst)) {
bool b = optimize_return_instruction (code, inst, i);
if (b) goto loop_ret;
}
}
for (uint32_t i=0; i<count; ++i) {
inst_t *inst = current_instruction(code, i);
if (IS_NUM(inst)) optimize_num_instruction (inst, f);
}
#if GRAVITY_BYTECODE_DEBUG
gravity_function_dump(f, ircode_dump);
#endif
finalize_function(f);
return f;
}