Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeX86_32.c
9913 views
/*1* Stack-less Just-In-Time compiler2*3* Copyright Zoltan Herczeg ([email protected]). All rights reserved.4*5* Redistribution and use in source and binary forms, with or without modification, are6* permitted provided that the following conditions are met:7*8* 1. Redistributions of source code must retain the above copyright notice, this list of9* conditions and the following disclaimer.10*11* 2. Redistributions in binary form must reproduce the above copyright notice, this list12* of conditions and the following disclaimer in the documentation and/or other materials13* provided with the distribution.14*15* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY16* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES17* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT18* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,19* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED20* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR21* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN22* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN23* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.24*/2526/* x86 32-bit arch dependent functions. */2728/* --------------------------------------------------------------------- */29/* Operators */30/* --------------------------------------------------------------------- */3132static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)33{34sljit_u8 *inst;3536inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));37FAIL_IF(!inst);38INC_SIZE(1 + sizeof(sljit_sw));39*inst++ = opcode;40sljit_unaligned_store_sw(inst, imm);41return SLJIT_SUCCESS;42}4344/* Size contains the flags as well. */45static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,46/* The register or immediate operand. */47sljit_s32 a, sljit_sw imma,48/* The general operand (not immediate). */49sljit_s32 b, sljit_sw immb)50{51sljit_u8 *inst;52sljit_u8 *buf_ptr;53sljit_u8 reg_map_b;54sljit_uw flags = size;55sljit_uw inst_size;5657/* Both cannot be switched on. */58SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));59/* Size flags not allowed for typed instructions. */60SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);61/* Both size flags cannot be switched on. */62SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));63/* SSE2 and immediate is not possible. */64SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2));65SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))66& ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);67SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT);6869size &= 0xf;70/* The mod r/m byte is always present. */71inst_size = size + 1;7273if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))74inst_size++;7576/* Calculate size of b. */77if (b & SLJIT_MEM) {78if (!(b & REG_MASK))79inst_size += sizeof(sljit_sw);80else {81if (immb != 0 && !(b & OFFS_REG_MASK)) {82/* Immediate operand. */83if (immb <= 127 && immb >= -128)84inst_size += sizeof(sljit_s8);85else86inst_size += sizeof(sljit_sw);87} else if (reg_map[b & REG_MASK] == 5) {88/* Swap registers if possible. */89if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_map[OFFS_REG(b)] != 5)90b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);91else92inst_size += sizeof(sljit_s8);93}9495if (reg_map[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))96b |= TO_OFFS_REG(SLJIT_SP);9798if (b & OFFS_REG_MASK)99inst_size += 1; /* SIB byte. */100}101}102103/* Calculate size of a. */104if (a == SLJIT_IMM) {105if (flags & EX86_BIN_INS) {106if (imma <= 127 && imma >= -128) {107inst_size += 1;108flags |= EX86_BYTE_ARG;109} else110inst_size += 4;111} else if (flags & EX86_SHIFT_INS) {112SLJIT_ASSERT(imma <= 0x1f);113if (imma != 1) {114inst_size++;115flags |= EX86_BYTE_ARG;116}117} else if (flags & EX86_BYTE_ARG)118inst_size++;119else if (flags & EX86_HALF_ARG)120inst_size += sizeof(short);121else122inst_size += sizeof(sljit_sw);123} else124SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);125126inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);127PTR_FAIL_IF(!inst);128129/* Encoding the byte. */130INC_SIZE(inst_size);131if (flags & EX86_PREF_F2)132*inst++ = 0xf2;133else if (flags & EX86_PREF_F3)134*inst++ = 0xf3;135else if (flags & EX86_PREF_66)136*inst++ = 0x66;137138buf_ptr = inst + size;139140/* Encode mod/rm byte. */141if (!(flags & EX86_SHIFT_INS)) {142if ((flags & EX86_BIN_INS) && a == SLJIT_IMM)143*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;144145if (a == SLJIT_IMM)146*buf_ptr = 0;147else if (!(flags & EX86_SSE2_OP1))148*buf_ptr = U8(reg_map[a] << 3);149else150*buf_ptr = U8(freg_map[a] << 3);151} else {152if (a == SLJIT_IMM) {153if (imma == 1)154*inst = GROUP_SHIFT_1;155else156*inst = GROUP_SHIFT_N;157} else158*inst = GROUP_SHIFT_CL;159*buf_ptr = 0;160}161162if (!(b & SLJIT_MEM)) {163*buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : freg_map[b]));164buf_ptr++;165} else if (b & REG_MASK) {166reg_map_b = reg_map[b & REG_MASK];167168if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {169if (immb != 0 || reg_map_b == 5) {170if (immb <= 127 && immb >= -128)171*buf_ptr |= 0x40;172else173*buf_ptr |= 0x80;174}175176if (!(b & OFFS_REG_MASK))177*buf_ptr++ |= reg_map_b;178else {179buf_ptr[0] |= 0x04;180buf_ptr[1] = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3));181buf_ptr += 2;182}183184if (immb != 0 || reg_map_b == 5) {185if (immb <= 127 && immb >= -128)186*buf_ptr++ = U8(immb); /* 8 bit displacement. */187else {188sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */189buf_ptr += sizeof(sljit_sw);190}191}192} else {193if (reg_map_b == 5)194*buf_ptr |= 0x40;195196buf_ptr[0] |= 0x04;197buf_ptr[1] = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6));198buf_ptr += 2;199200if (reg_map_b == 5)201*buf_ptr++ = 0;202}203} else {204*buf_ptr++ |= 0x05;205sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */206buf_ptr += sizeof(sljit_sw);207}208209if (a == SLJIT_IMM) {210if (flags & EX86_BYTE_ARG)211*buf_ptr = U8(imma);212else if (flags & EX86_HALF_ARG)213sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);214else if (!(flags & EX86_SHIFT_INS))215sljit_unaligned_store_sw(buf_ptr, imma);216}217218return inst;219}220221static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op,222/* The first and second register operand. */223sljit_s32 a, sljit_s32 v,224/* The general operand (not immediate). */225sljit_s32 b, sljit_sw immb)226{227sljit_u8 *inst;228sljit_u8 vex = 0;229sljit_u8 vex_m = 0;230sljit_uw size;231232SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))233& ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);234235if (op & VEX_OP_0F38)236vex_m = 0x2;237else if (op & VEX_OP_0F3A)238vex_m = 0x3;239240if (op & VEX_W) {241if (vex_m == 0)242vex_m = 0x1;243244vex |= 0x80;245}246247if (op & EX86_PREF_66)248vex |= 0x1;249else if (op & EX86_PREF_F2)250vex |= 0x3;251else if (op & EX86_PREF_F3)252vex |= 0x2;253254op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3);255256if (op & VEX_256)257vex |= 0x4;258259vex = U8(vex | ((((op & VEX_SSE2_OPV) ? freg_map[v] : reg_map[v]) ^ 0xf) << 3));260261size = op & ~(sljit_uw)0xff;262size |= (vex_m == 0) ? 3 : 4;263264inst = emit_x86_instruction(compiler, size, a, 0, b, immb);265FAIL_IF(!inst);266267if (vex_m == 0) {268inst[0] = 0xc5;269inst[1] = U8(vex | 0x80);270inst[2] = U8(op);271return SLJIT_SUCCESS;272}273274inst[0] = 0xc4;275inst[1] = U8(vex_m | 0xe0);276inst[2] = vex;277inst[3] = U8(op);278return SLJIT_SUCCESS;279}280281/* --------------------------------------------------------------------- */282/* Enter / return */283/* --------------------------------------------------------------------- */284285static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)286{287sljit_uw type = jump->flags >> TYPE_SHIFT;288289if (type == SLJIT_JUMP) {290*code_ptr++ = JMP_i32;291} else if (type >= SLJIT_FAST_CALL) {292*code_ptr++ = CALL_i32;293} else {294*code_ptr++ = GROUP_0F;295*code_ptr++ = get_jump_code(type);296}297298jump->addr = (sljit_uw)code_ptr;299300if (jump->flags & JUMP_ADDR)301sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset));302else303jump->flags |= PATCH_MW;304code_ptr += 4;305306return code_ptr;307}308309#define ENTER_TMP_TO_R4 0x00001310#define ENTER_TMP_TO_S 0x00002311312SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,313sljit_s32 options, sljit_s32 arg_types,314sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)315{316sljit_s32 word_arg_count, saved_arg_count, float_arg_count;317sljit_s32 size, args_size, types, status;318sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(options);319sljit_u8 *inst;320#ifdef _WIN32321sljit_s32 r2_offset = -1;322#endif323324CHECK_ERROR();325CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));326set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);327328scratches = ENTER_GET_REGS(scratches);329330/* Emit ENDBR32 at function entry if needed. */331FAIL_IF(emit_endbranch(compiler));332333SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);334335arg_types >>= SLJIT_ARG_SHIFT;336word_arg_count = 0;337status = 0;338339if (options & SLJIT_ENTER_REG_ARG) {340args_size = 3 * SSIZE_OF(sw);341342while (arg_types) {343if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {344word_arg_count++;345if (word_arg_count >= 4)346status |= ENTER_TMP_TO_R4;347}348349arg_types >>= SLJIT_ARG_SHIFT;350}351352compiler->args_size = 0;353} else {354types = arg_types;355saved_arg_count = 0;356float_arg_count = 0;357args_size = SSIZE_OF(sw);358while (types) {359switch (types & SLJIT_ARG_MASK) {360case SLJIT_ARG_TYPE_F64:361float_arg_count++;362FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));363args_size += SSIZE_OF(f64);364break;365case SLJIT_ARG_TYPE_F32:366float_arg_count++;367FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));368args_size += SSIZE_OF(f32);369break;370default:371word_arg_count++;372373if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG))374saved_arg_count++;375376if (word_arg_count == 4) {377if (types & SLJIT_ARG_TYPE_SCRATCH_REG) {378status |= ENTER_TMP_TO_R4;379arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);380} else if (saved_arg_count == 4) {381status |= ENTER_TMP_TO_S;382arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);383}384}385386args_size += SSIZE_OF(sw);387break;388}389types >>= SLJIT_ARG_SHIFT;390}391392args_size -= SSIZE_OF(sw);393compiler->args_size = args_size;394}395396size = (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - kept_saveds_count;397if (!(options & SLJIT_ENTER_REG_ARG))398size++;399400if (size != 0) {401inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1));402FAIL_IF(!inst);403404INC_SIZE((sljit_uw)size);405406if (!(options & SLJIT_ENTER_REG_ARG))407PUSH_REG(reg_map[TMP_REG1]);408409if ((saveds > 2 && kept_saveds_count <= 2) || scratches > 9)410PUSH_REG(reg_map[SLJIT_S2]);411if ((saveds > 1 && kept_saveds_count <= 1) || scratches > 10)412PUSH_REG(reg_map[SLJIT_S1]);413if ((saveds > 0 && kept_saveds_count == 0) || scratches > 11)414PUSH_REG(reg_map[SLJIT_S0]);415416size *= SSIZE_OF(sw);417}418419if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))420EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size);421422size += SSIZE_OF(sw);423424local_size = ((SLJIT_LOCALS_OFFSET_BASE + local_size + size + 0xf) & ~0xf) - size;425compiler->local_size = local_size;426427word_arg_count = 0;428saved_arg_count = 0;429args_size = size;430while (arg_types) {431switch (arg_types & SLJIT_ARG_MASK) {432case SLJIT_ARG_TYPE_F64:433args_size += SSIZE_OF(f64);434break;435case SLJIT_ARG_TYPE_F32:436args_size += SSIZE_OF(f32);437break;438default:439word_arg_count++;440SLJIT_ASSERT(word_arg_count <= 3 || (word_arg_count == 4 && !(status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))));441442if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {443#ifdef _WIN32444if (word_arg_count == 3 && local_size > 4 * 4096)445r2_offset = local_size + args_size;446else447#endif448EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);449450} else {451EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);452saved_arg_count++;453}454455args_size += SSIZE_OF(sw);456break;457}458arg_types >>= SLJIT_ARG_SHIFT;459}460461SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);462463#ifdef _WIN32464SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096);465466if (local_size > 4096) {467if (local_size <= 4 * 4096) {468BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);469470if (local_size > 2 * 4096)471BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);472if (local_size > 3 * 4096)473BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);474}475else {476if (options & SLJIT_ENTER_REG_ARG) {477SLJIT_ASSERT(r2_offset == -1);478479inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 1));480FAIL_IF(!inst);481INC_SIZE(1);482PUSH_REG(reg_map[SLJIT_R2]);483484local_size -= SSIZE_OF(sw);485r2_offset = local_size;486}487488EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12);489490BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);491BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);492493inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);494FAIL_IF(!inst);495496INC_SIZE(2);497inst[0] = LOOP_i8;498inst[1] = (sljit_u8)-16;499local_size &= 0xfff;500}501}502503if (local_size > 0) {504BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size);505BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);506}507508if (r2_offset != -1)509EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);510511#else /* !_WIN32 */512513SLJIT_ASSERT(local_size > 0);514515BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);516517#endif /* _WIN32 */518519size = SLJIT_LOCALS_OFFSET_BASE - SSIZE_OF(sw);520kept_saveds_count = SLJIT_R3 - kept_saveds_count;521522while (saved_arg_count > 3) {523EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, kept_saveds_count, 0);524kept_saveds_count++;525size -= SSIZE_OF(sw);526saved_arg_count--;527}528529if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) {530if (status & ENTER_TMP_TO_R4)531size = 2 * SSIZE_OF(sw);532533EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0);534}535536return SLJIT_SUCCESS;537}538539SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,540sljit_s32 options, sljit_s32 arg_types,541sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)542{543sljit_s32 args_size;544545CHECK_ERROR();546CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));547set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);548549scratches = ENTER_GET_REGS(scratches);550551arg_types >>= SLJIT_ARG_SHIFT;552args_size = 0;553554if (!(options & SLJIT_ENTER_REG_ARG)) {555while (arg_types) {556switch (arg_types & SLJIT_ARG_MASK) {557case SLJIT_ARG_TYPE_F64:558args_size += SSIZE_OF(f64);559break;560case SLJIT_ARG_TYPE_F32:561args_size += SSIZE_OF(f32);562break;563default:564args_size += SSIZE_OF(sw);565break;566}567arg_types >>= SLJIT_ARG_SHIFT;568}569}570571compiler->args_size = args_size;572573/* [esp+0] for saving temporaries and for function calls. */574575saveds = (1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw);576577/* Saving ebp. */578if (!(options & SLJIT_ENTER_REG_ARG))579saveds += SSIZE_OF(sw);580581compiler->local_size = ((SLJIT_LOCALS_OFFSET_BASE + local_size + saveds + 0xf) & ~0xf) - saveds;582return SLJIT_SUCCESS;583}584585static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)586{587sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);588sljit_s32 local_size, saveds;589sljit_uw size;590sljit_u8 *inst;591592size = (sljit_uw)((compiler->scratches > 9 ? (compiler->scratches - 9) : 0) +593(compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count);594595local_size = compiler->local_size;596597if (!(compiler->options & SLJIT_ENTER_REG_ARG))598size++;599else if (is_return_to && size == 0) {600local_size += SSIZE_OF(sw);601is_return_to = 0;602}603604if (local_size > 0)605BINARY_IMM32(ADD, local_size, SLJIT_SP, 0);606607if (size == 0)608return SLJIT_SUCCESS;609610inst = (sljit_u8*)ensure_buf(compiler, 1 + size);611FAIL_IF(!inst);612613INC_SIZE(size);614615saveds = compiler->saveds;616617if ((saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11)618POP_REG(reg_map[SLJIT_S0]);619if ((saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10)620POP_REG(reg_map[SLJIT_S1]);621if ((saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9)622POP_REG(reg_map[SLJIT_S2]);623624if (!(compiler->options & SLJIT_ENTER_REG_ARG))625POP_REG(reg_map[TMP_REG1]);626627if (is_return_to)628BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0);629630return SLJIT_SUCCESS;631}632633SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)634{635CHECK_ERROR();636CHECK(check_sljit_emit_return_void(compiler));637638SLJIT_ASSERT(compiler->args_size >= 0);639SLJIT_ASSERT(compiler->local_size > 0);640641FAIL_IF(emit_stack_frame_release(compiler, 0));642643return emit_byte(compiler, RET_near);644}645646SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,647sljit_s32 src, sljit_sw srcw)648{649sljit_s32 src_r;650651CHECK_ERROR();652CHECK(check_sljit_emit_return_to(compiler, src, srcw));653654if ((src & SLJIT_MEM) || (src > SLJIT_R2 && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {655ADJUST_LOCAL_OFFSET(src, srcw);656CHECK_EXTRA_REGS(src, srcw, (void)0);657658src_r = (compiler->options & SLJIT_ENTER_REG_ARG) ? TMP_REG1 : SLJIT_R1;659660EMIT_MOV(compiler, src_r, 0, src, srcw);661src = src_r;662srcw = 0;663}664665FAIL_IF(emit_stack_frame_release(compiler, 1));666667SLJIT_SKIP_CHECKS(compiler);668return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);669}670671/* --------------------------------------------------------------------- */672/* Call / return instructions */673/* --------------------------------------------------------------------- */674675static sljit_s32 call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)676{677sljit_sw stack_size = 0;678sljit_s32 word_arg_count = 0;679680arg_types >>= SLJIT_ARG_SHIFT;681682while (arg_types) {683switch (arg_types & SLJIT_ARG_MASK) {684case SLJIT_ARG_TYPE_F64:685stack_size += SSIZE_OF(f64);686break;687case SLJIT_ARG_TYPE_F32:688stack_size += SSIZE_OF(f32);689break;690default:691word_arg_count++;692stack_size += SSIZE_OF(sw);693break;694}695696arg_types >>= SLJIT_ARG_SHIFT;697}698699if (word_arg_count_ptr)700*word_arg_count_ptr = word_arg_count;701702if (stack_size <= 4 * SSIZE_OF(sw))703return 0;704705return ((stack_size - (4 * SSIZE_OF(sw)) + 0xf) & ~0xf);706}707708static sljit_s32 call_with_args(struct sljit_compiler *compiler,709sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count, sljit_s32 keep_tmp1)710{711sljit_s32 float_arg_count = 0, arg4_reg = 0, arg_offset;712sljit_u8 *inst;713714if (word_arg_count >= 4) {715arg4_reg = SLJIT_R0;716717if (!keep_tmp1) {718EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw));719arg4_reg = TMP_REG1;720}721}722723if (stack_size > 0)724BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);725726arg_offset = 0;727word_arg_count = 0;728arg_types >>= SLJIT_ARG_SHIFT;729730while (arg_types) {731switch (arg_types & SLJIT_ARG_MASK) {732case SLJIT_ARG_TYPE_F64:733float_arg_count++;734FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), arg_offset, float_arg_count));735arg_offset += SSIZE_OF(f64);736break;737case SLJIT_ARG_TYPE_F32:738float_arg_count++;739FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), arg_offset, float_arg_count));740arg_offset += SSIZE_OF(f32);741break;742default:743word_arg_count++;744EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), arg_offset, (word_arg_count >= 4) ? arg4_reg : word_arg_count, 0);745746if (word_arg_count == 1 && arg4_reg == SLJIT_R0)747EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw) + stack_size);748749arg_offset += SSIZE_OF(sw);750break;751}752753arg_types >>= SLJIT_ARG_SHIFT;754}755756return SLJIT_SUCCESS;757}758759static sljit_s32 post_call_with_args(struct sljit_compiler *compiler,760sljit_s32 arg_types, sljit_s32 stack_size)761{762sljit_u8 *inst;763sljit_s32 single;764765if (stack_size > 0)766BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0);767768if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)769return SLJIT_SUCCESS;770771single = ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32);772773inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);774FAIL_IF(!inst);775INC_SIZE(3);776inst[0] = single ? FSTPS : FSTPD;777inst[1] = (0x03 << 3) | 0x04;778inst[2] = (0x04 << 3) | reg_map[SLJIT_SP];779780return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0);781}782783static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,784sljit_s32 *extra_space, sljit_s32 arg_types,785sljit_s32 src, sljit_sw srcw)786{787sljit_sw args_size, saved_regs_size;788sljit_sw types, word_arg_count, float_arg_count;789sljit_sw stack_size, prev_stack_size, min_size, offset;790sljit_sw word_arg4_offset;791sljit_u8 r2_offset = 0;792sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);793sljit_u8* inst;794795ADJUST_LOCAL_OFFSET(src, srcw);796CHECK_EXTRA_REGS(src, srcw, (void)0);797798saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)799+ (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count) * SSIZE_OF(sw);800801word_arg_count = 0;802float_arg_count = 0;803arg_types >>= SLJIT_ARG_SHIFT;804types = 0;805args_size = 0;806807while (arg_types != 0) {808types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);809810switch (arg_types & SLJIT_ARG_MASK) {811case SLJIT_ARG_TYPE_F64:812args_size += SSIZE_OF(f64);813float_arg_count++;814break;815case SLJIT_ARG_TYPE_F32:816args_size += SSIZE_OF(f32);817float_arg_count++;818break;819default:820word_arg_count++;821args_size += SSIZE_OF(sw);822break;823}824arg_types >>= SLJIT_ARG_SHIFT;825}826827if (args_size <= compiler->args_size) {828*extra_space = 0;829stack_size = args_size + SSIZE_OF(sw) + saved_regs_size;830831offset = stack_size + compiler->local_size;832833if (src != SLJIT_IMM && src != SLJIT_R0) {834if (word_arg_count >= 1) {835EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);836r2_offset = sizeof(sljit_sw);837}838EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);839}840841while (types != 0) {842switch (types & SLJIT_ARG_MASK) {843case SLJIT_ARG_TYPE_F64:844offset -= SSIZE_OF(f64);845FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));846float_arg_count--;847break;848case SLJIT_ARG_TYPE_F32:849offset -= SSIZE_OF(f32);850FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));851float_arg_count--;852break;853default:854switch (word_arg_count) {855case 1:856offset -= SSIZE_OF(sw);857if (r2_offset != 0) {858EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);859EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);860} else861EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);862break;863case 2:864offset -= SSIZE_OF(sw);865EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);866break;867case 3:868offset -= SSIZE_OF(sw);869break;870case 4:871offset -= SSIZE_OF(sw);872EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw));873EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);874break;875}876word_arg_count--;877break;878}879types >>= SLJIT_ARG_SHIFT;880}881882return emit_stack_frame_release(compiler, 0);883}884885stack_size = args_size + SSIZE_OF(sw);886887if (word_arg_count >= 1 && src != SLJIT_IMM && src != SLJIT_R0) {888r2_offset = SSIZE_OF(sw);889stack_size += SSIZE_OF(sw);890}891892if (word_arg_count >= 3)893stack_size += SSIZE_OF(sw);894895prev_stack_size = SSIZE_OF(sw) + saved_regs_size;896min_size = prev_stack_size + compiler->local_size;897898word_arg4_offset = 2 * SSIZE_OF(sw);899900if (stack_size > min_size) {901BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0);902if (src == SLJIT_MEM1(SLJIT_SP))903srcw += stack_size - min_size;904word_arg4_offset += stack_size - min_size;905}906else907stack_size = min_size;908909if (word_arg_count >= 3) {910EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0);911912if (word_arg_count >= 4)913EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset);914}915916if (src != SLJIT_IMM && src != SLJIT_R0) {917if (word_arg_count >= 1) {918SLJIT_ASSERT(r2_offset == sizeof(sljit_sw));919EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);920}921EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);922}923924/* Restore saved registers. */925offset = stack_size - 2 * SSIZE_OF(sw);926EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);927928if (compiler->saveds > 2 || compiler->scratches > 9) {929offset -= SSIZE_OF(sw);930EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset);931}932if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) {933offset -= SSIZE_OF(sw);934EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset);935}936if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) {937offset -= SSIZE_OF(sw);938EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset);939}940941/* Copy fourth argument and return address. */942offset = stack_size - SSIZE_OF(sw);943*extra_space = args_size;944945if (word_arg_count >= 4) {946offset -= SSIZE_OF(sw);947EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);948}949950while (types != 0) {951switch (types & SLJIT_ARG_MASK) {952case SLJIT_ARG_TYPE_F64:953offset -= SSIZE_OF(f64);954FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));955float_arg_count--;956break;957case SLJIT_ARG_TYPE_F32:958offset -= SSIZE_OF(f32);959FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));960float_arg_count--;961break;962default:963switch (word_arg_count) {964case 1:965offset -= SSIZE_OF(sw);966if (r2_offset != 0) {967EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);968EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);969} else970EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);971break;972case 2:973offset -= SSIZE_OF(sw);974EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);975break;976case 3:977offset -= SSIZE_OF(sw);978EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);979EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);980break;981}982word_arg_count--;983break;984}985types >>= SLJIT_ARG_SHIFT;986}987988SLJIT_ASSERT(offset >= 0);989990if (offset == 0)991return SLJIT_SUCCESS;992993BINARY_IMM32(ADD, offset, SLJIT_SP, 0);994return SLJIT_SUCCESS;995}996997static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space)998{999/* Called when stack consumption cannot be reduced to 0. */1000sljit_u8 *inst;10011002BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0);1003return emit_byte(compiler, RET_near);1004}10051006static sljit_s32 tail_call_reg_arg_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)1007{1008sljit_s32 word_arg_count = 0;1009sljit_s32 kept_saveds_count, offset;10101011arg_types >>= SLJIT_ARG_SHIFT;10121013while (arg_types) {1014if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)1015word_arg_count++;10161017arg_types >>= SLJIT_ARG_SHIFT;1018}10191020if (word_arg_count < 4)1021return SLJIT_SUCCESS;10221023EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw));10241025kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);1026offset = compiler->local_size + 3 * SSIZE_OF(sw);10271028if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11)1029offset += SSIZE_OF(sw);1030if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10)1031offset += SSIZE_OF(sw);1032if ((compiler->saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9)1033offset += SSIZE_OF(sw);10341035return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0);1036}10371038SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,1039sljit_s32 arg_types)1040{1041struct sljit_jump *jump;1042sljit_sw stack_size = 0;1043sljit_s32 word_arg_count;10441045CHECK_ERROR_PTR();1046CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));10471048if (type & SLJIT_CALL_RETURN) {1049if ((type & 0xff) == SLJIT_CALL_REG_ARG) {1050PTR_FAIL_IF(tail_call_reg_arg_with_args(compiler, arg_types));1051PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));10521053SLJIT_SKIP_CHECKS(compiler);1054return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP));1055}10561057stack_size = type;1058PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0));10591060SLJIT_SKIP_CHECKS(compiler);10611062if (stack_size == 0)1063return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP));10641065jump = sljit_emit_jump(compiler, type);1066PTR_FAIL_IF(jump == NULL);10671068PTR_FAIL_IF(emit_tail_call_end(compiler, stack_size));1069return jump;1070}10711072if ((type & 0xff) == SLJIT_CALL_REG_ARG) {1073SLJIT_SKIP_CHECKS(compiler);1074return sljit_emit_jump(compiler, type);1075}10761077stack_size = call_get_stack_size(arg_types, &word_arg_count);1078PTR_FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count, 0));10791080SLJIT_SKIP_CHECKS(compiler);1081jump = sljit_emit_jump(compiler, type);1082PTR_FAIL_IF(jump == NULL);10831084PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size));1085return jump;1086}10871088SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,1089sljit_s32 arg_types,1090sljit_s32 src, sljit_sw srcw)1091{1092sljit_sw stack_size = 0;1093sljit_s32 word_arg_count;10941095CHECK_ERROR();1096CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));10971098if (type & SLJIT_CALL_RETURN) {1099if ((type & 0xff) == SLJIT_CALL_REG_ARG) {1100FAIL_IF(tail_call_reg_arg_with_args(compiler, arg_types));11011102if ((src & SLJIT_MEM) || (src > SLJIT_R2 && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {1103ADJUST_LOCAL_OFFSET(src, srcw);1104CHECK_EXTRA_REGS(src, srcw, (void)0);11051106EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);1107src = TMP_REG1;1108srcw = 0;1109}11101111FAIL_IF(emit_stack_frame_release(compiler, 0));11121113SLJIT_SKIP_CHECKS(compiler);1114return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);1115}11161117stack_size = type;1118FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw));11191120if (src != SLJIT_IMM) {1121src = SLJIT_R0;1122srcw = 0;1123}11241125SLJIT_SKIP_CHECKS(compiler);11261127if (stack_size == 0)1128return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);11291130FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));1131return emit_tail_call_end(compiler, stack_size);1132}11331134if ((type & 0xff) == SLJIT_CALL_REG_ARG) {1135SLJIT_SKIP_CHECKS(compiler);1136return sljit_emit_ijump(compiler, type, src, srcw);1137}11381139ADJUST_LOCAL_OFFSET(src, srcw);1140CHECK_EXTRA_REGS(src, srcw, (void)0);11411142if (src & SLJIT_MEM) {1143EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);1144src = TMP_REG1;1145srcw = 0;1146}11471148stack_size = call_get_stack_size(arg_types, &word_arg_count);1149FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count, src == TMP_REG1));11501151if (stack_size > 0 && src == SLJIT_MEM1(SLJIT_SP))1152srcw += stack_size;11531154SLJIT_SKIP_CHECKS(compiler);1155FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));11561157return post_call_with_args(compiler, arg_types, stack_size);1158}11591160static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)1161{1162sljit_u8* inst;11631164if (compiler->options & SLJIT_ENTER_REG_ARG) {1165if (src == SLJIT_FR0)1166return SLJIT_SUCCESS;11671168SLJIT_SKIP_CHECKS(compiler);1169return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);1170}11711172if (FAST_IS_REG(src)) {1173FAIL_IF(emit_sse2_store(compiler, op & SLJIT_32, SLJIT_MEM1(SLJIT_SP), 0, src));11741175src = SLJIT_MEM1(SLJIT_SP);1176srcw = 0;1177} else {1178ADJUST_LOCAL_OFFSET(src, srcw);1179}11801181inst = emit_x86_instruction(compiler, 1 | EX86_SSE2_OP1, 0, 0, src, srcw);1182*inst = (op & SLJIT_32) ? FLDS : FLDL;11831184return SLJIT_SUCCESS;1185}11861187static sljit_s32 emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)1188{1189sljit_u8 *inst;11901191CHECK_EXTRA_REGS(dst, dstw, (void)0);11921193/* Unused dest is possible here. */1194if (FAST_IS_REG(dst))1195return emit_byte(compiler, U8(POP_r + reg_map[dst]));11961197/* Memory. */1198inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);1199FAIL_IF(!inst);1200*inst = POP_rm;1201return SLJIT_SUCCESS;1202}12031204static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)1205{1206sljit_u8 *inst;12071208CHECK_EXTRA_REGS(src, srcw, (void)0);12091210if (FAST_IS_REG(src)) {1211inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);1212FAIL_IF(!inst);12131214INC_SIZE(1 + 1);1215PUSH_REG(reg_map[src]);1216}1217else {1218inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);1219FAIL_IF(!inst);1220inst[0] = GROUP_FF;1221inst[1] |= PUSH_rm;12221223inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);1224FAIL_IF(!inst);1225INC_SIZE(1);1226}12271228RET();1229return SLJIT_SUCCESS;1230}12311232static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler,1233sljit_s32 dst, sljit_sw dstw)1234{1235sljit_s32 options = compiler->options;1236sljit_s32 saveds = compiler->saveds;1237sljit_s32 scratches = compiler->scratches;12381239saveds = ((scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw);12401241/* Saving ebp. */1242if (!(options & SLJIT_ENTER_REG_ARG))1243saveds += SSIZE_OF(sw);12441245return emit_mov(compiler, dst, dstw, SLJIT_MEM1(SLJIT_SP), compiler->local_size + saveds);1246}12471248/* --------------------------------------------------------------------- */1249/* Other operations */1250/* --------------------------------------------------------------------- */12511252SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,1253sljit_s32 dst_reg,1254sljit_s32 src1, sljit_sw src1w,1255sljit_s32 src2_reg)1256{1257sljit_s32 dst = dst_reg;1258sljit_sw dstw = 0;1259sljit_sw src2w = 0;12601261CHECK_ERROR();1262CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));12631264ADJUST_LOCAL_OFFSET(src1, src1w);12651266CHECK_EXTRA_REGS(dst, dstw, (void)0);1267CHECK_EXTRA_REGS(src1, src1w, (void)0);1268CHECK_EXTRA_REGS(src2_reg, src2w, (void)0);12691270type &= ~SLJIT_32;12711272if (dst & SLJIT_MEM) {1273if (src1 == SLJIT_IMM || (!(src1 & SLJIT_MEM) && (src2_reg & SLJIT_MEM))) {1274EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);1275src1 = src2_reg;1276src1w = src2w;1277type ^= 0x1;1278} else1279EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w);12801281dst_reg = TMP_REG1;1282} else {1283if (dst_reg != src2_reg) {1284if (dst_reg == src1) {1285src1 = src2_reg;1286src1w = src2w;1287type ^= 0x1;1288} else if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {1289EMIT_MOV(compiler, dst_reg, 0, src1, src1w);1290src1 = src2_reg;1291src1w = src2w;1292type ^= 0x1;1293} else1294EMIT_MOV(compiler, dst_reg, 0, src2_reg, src2w);1295}1296}12971298if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && (src1 != SLJIT_IMM || dst_reg != TMP_REG1)) {1299if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {1300EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);1301src1 = TMP_REG1;1302src1w = 0;1303}13041305FAIL_IF(emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w));1306} else1307FAIL_IF(emit_cmov_generic(compiler, type, dst_reg, src1, src1w));13081309if (dst & SLJIT_MEM)1310return emit_mov(compiler, dst, dstw, TMP_REG1, 0);1311return SLJIT_SUCCESS;1312}13131314SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,1315sljit_s32 reg,1316sljit_s32 mem, sljit_sw memw)1317{1318sljit_u8* inst;1319sljit_s32 i, next, reg_idx, offset;1320sljit_u8 regs[2];13211322CHECK_ERROR();1323CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));13241325if (!(reg & REG_PAIR_MASK))1326return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);13271328ADJUST_LOCAL_OFFSET(mem, memw);13291330regs[0] = U8(REG_PAIR_FIRST(reg));1331regs[1] = U8(REG_PAIR_SECOND(reg));13321333next = SSIZE_OF(sw);13341335if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) {1336if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) {1337/* None of them are virtual register so TMP_REG1 will not be used. */1338EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0);13391340if (regs[1] == OFFS_REG(mem))1341next = -SSIZE_OF(sw);13421343mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);1344} else {1345next = -SSIZE_OF(sw);13461347if (!(mem & OFFS_REG_MASK))1348memw += SSIZE_OF(sw);1349}1350}13511352for (i = 0; i < 2; i++) {1353reg_idx = next > 0 ? i : (i ^ 0x1);1354reg = regs[reg_idx];13551356offset = -1;13571358if (reg >= SLJIT_R3 && reg <= SLJIT_S3) {1359offset = (2 * SSIZE_OF(sw)) + ((reg) - SLJIT_R3) * SSIZE_OF(sw);1360reg = TMP_REG1;13611362if (type & SLJIT_MEM_STORE)1363EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);1364}13651366if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) {1367inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 4));1368FAIL_IF(!inst);13691370INC_SIZE(4);13711372inst[0] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm;1373inst[1] = 0x44 | U8(reg_map[reg] << 3);1374inst[2] = U8(memw << 6) | U8(reg_map[OFFS_REG(mem)] << 3) | reg_map[mem & REG_MASK];1375inst[3] = sizeof(sljit_sw);1376} else if (type & SLJIT_MEM_STORE) {1377EMIT_MOV(compiler, mem, memw, reg, 0);1378} else {1379EMIT_MOV(compiler, reg, 0, mem, memw);1380}13811382if (!(mem & OFFS_REG_MASK))1383memw += next;13841385if (!(type & SLJIT_MEM_STORE) && offset != -1)1386EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0);1387}13881389return SLJIT_SUCCESS;1390}13911392static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,1393sljit_s32 dst, sljit_sw dstw,1394sljit_s32 src, sljit_sw srcw)1395{1396sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;1397sljit_u8 *inst, *jump_inst1, *jump_inst2;1398sljit_uw size1, size2;13991400/* Binary representation of 0x80000000. */1401static const sljit_f64 f64_high_bit = (sljit_f64)0x80000000ul;14021403CHECK_EXTRA_REGS(src, srcw, (void)0);14041405if (!(op & SLJIT_32)) {1406EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);14071408inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);1409FAIL_IF(!inst);1410inst[1] |= ROL;14111412inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);1413FAIL_IF(!inst);1414inst[1] |= SHR;14151416FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_PREF_F2 | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));14171418inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1419FAIL_IF(!inst);1420INC_SIZE(2);1421inst[0] = U8(get_jump_code(SLJIT_NOT_CARRY) - 0x10);14221423size1 = compiler->size;1424FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_PREF_F2 | EX86_SSE2, dst_r, SLJIT_MEM0(), (sljit_sw)&f64_high_bit));14251426inst[1] = U8(compiler->size - size1);14271428if (dst_r == TMP_FREG)1429return emit_sse2_store(compiler, 0, dst, dstw, TMP_FREG);1430return SLJIT_SUCCESS;1431}14321433if (!FAST_IS_REG(src)) {1434EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);1435src = TMP_REG1;1436}14371438BINARY_IMM32(CMP, 0, src, 0);14391440inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1441FAIL_IF(!inst);1442INC_SIZE(2);1443inst[0] = JL_i8;1444jump_inst1 = inst;14451446size1 = compiler->size;14471448FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0));14491450inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1451FAIL_IF(!inst);1452INC_SIZE(2);1453inst[0] = JMP_i8;1454jump_inst2 = inst;14551456size2 = compiler->size;14571458jump_inst1[1] = U8(size2 - size1);14591460if (src != TMP_REG1)1461EMIT_MOV(compiler, TMP_REG1, 0, src, 0);14621463inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);1464FAIL_IF(!inst);1465inst[1] |= SHR;14661467inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1468FAIL_IF(!inst);1469INC_SIZE(2);1470inst[0] = JNC_i8;1471jump_inst1 = inst;14721473size1 = compiler->size;14741475BINARY_IMM32(OR, 1, TMP_REG1, 0);1476jump_inst1[1] = U8(compiler->size - size1);14771478FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));1479FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0));14801481jump_inst2[1] = U8(compiler->size - size2);14821483if (dst_r == TMP_FREG)1484return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);1485return SLJIT_SUCCESS;1486}14871488SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,1489sljit_s32 freg, sljit_f32 value)1490{1491sljit_u8 *inst;1492union {1493sljit_s32 imm;1494sljit_f32 value;1495} u;14961497CHECK_ERROR();1498CHECK(check_sljit_emit_fset32(compiler, freg, value));14991500u.value = value;15011502if (u.imm != 0)1503EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm);15041505inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);1506FAIL_IF(!inst);1507INC_SIZE(4);15081509inst[0] = GROUP_66;1510inst[1] = GROUP_0F;15111512if (u.imm == 0) {1513inst[2] = PXOR_x_xm;1514inst[3] = U8(freg_map[freg] | (freg_map[freg] << 3) | MOD_REG);1515} else {1516inst[2] = MOVD_x_rm;1517inst[3] = U8(reg_map[TMP_REG1] | (freg_map[freg] << 3) | MOD_REG);1518}15191520return SLJIT_SUCCESS;1521}15221523SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,1524sljit_s32 freg, sljit_f64 value)1525{1526sljit_u8 *inst;1527union {1528sljit_s32 imm[2];1529sljit_f64 value;1530} u;15311532CHECK_ERROR();1533CHECK(check_sljit_emit_fset64(compiler, freg, value));15341535u.value = value;15361537if (u.imm[0] == 0) {1538if (u.imm[1] == 0)1539return emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0);15401541EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]);1542} else {1543SLJIT_ASSERT(cpu_feature_list != 0);15441545if (!(cpu_feature_list & CPU_FEATURE_SSE41) && u.imm[1] != 0 && u.imm[0] != u.imm[1]) {1546EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_IMM, u.imm[0]);1547EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_IMM, u.imm[1]);15481549return emit_groupf(compiler, MOVLPD_x_m | EX86_SSE2, freg, SLJIT_MEM1(SLJIT_SP), 0);1550}15511552EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[0]);1553}15541555FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, TMP_REG1, 0));15561557if (u.imm[1] == 0)1558return SLJIT_SUCCESS;15591560if (u.imm[0] == 0) {1561inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);1562FAIL_IF(!inst);1563INC_SIZE(4);15641565inst[0] = GROUP_0F;1566inst[1] = SHUFPS_x_xm;1567inst[2] = U8(MOD_REG | (freg_map[freg] << 3) | freg_map[freg]);1568inst[3] = 0x51;1569return SLJIT_SUCCESS;1570}15711572if (u.imm[0] != u.imm[1]) {1573SLJIT_ASSERT(cpu_feature_list & CPU_FEATURE_SSE41);1574EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]);15751576FAIL_IF(emit_groupf_ext(compiler, PINSRD_x_rm_i8 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, TMP_REG1, 0));1577return emit_byte(compiler, 1);1578}15791580inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);1581FAIL_IF(!inst);1582INC_SIZE(3);15831584inst[0] = GROUP_0F;1585inst[1] = UNPCKLPS_x_xm;1586inst[2] = U8(MOD_REG | (freg_map[freg] << 3) | freg_map[freg]);1587return SLJIT_SUCCESS;1588}15891590SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,1591sljit_s32 freg, sljit_s32 reg)1592{1593sljit_u8 *inst;1594sljit_s32 reg2;1595sljit_sw regw, reg2w;15961597CHECK_ERROR();1598CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));15991600regw = 0;1601reg2 = 0;1602reg2w = 0;16031604SLJIT_ASSERT(cpu_feature_list != 0);16051606if (!(op & SLJIT_32) && (cpu_feature_list & CPU_FEATURE_SSE41)) {1607if (reg & REG_PAIR_MASK) {1608reg2 = REG_PAIR_FIRST(reg);1609reg = REG_PAIR_SECOND(reg);16101611CHECK_EXTRA_REGS(reg, regw, (void)0);16121613FAIL_IF(emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x)1614| EX86_PREF_66 | EX86_SSE2_OP1, freg, reg, regw));1615} else1616reg2 = reg;16171618CHECK_EXTRA_REGS(reg2, reg2w, (void)0);16191620FAIL_IF(emit_groupf_ext(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? PINSRD_x_rm_i8 : PEXTRD_rm_x_i8)1621| EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, reg2, reg2w));1622return emit_byte(compiler, 1);1623}16241625if (reg & REG_PAIR_MASK) {1626reg2 = REG_PAIR_SECOND(reg);1627reg = REG_PAIR_FIRST(reg);16281629if (reg == reg2)1630reg = 0;16311632CHECK_EXTRA_REGS(reg2, reg2w, (void)0);1633}16341635CHECK_EXTRA_REGS(reg, regw, (void)0);16361637if (op & SLJIT_32)1638return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x)1639| EX86_PREF_66 | EX86_SSE2_OP1, freg, reg, regw);16401641if (op == SLJIT_COPY_FROM_F64) {1642inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);1643FAIL_IF(!inst);1644INC_SIZE(5);16451646inst[0] = GROUP_66;1647inst[1] = GROUP_0F;1648inst[2] = PSHUFD_x_xm;1649inst[3] = U8(MOD_REG | (TMP_FREG << 3) | freg_map[freg]);1650inst[4] = 1;1651} else if (reg != 0)1652FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw));16531654if (reg2 != 0)1655FAIL_IF(emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x)1656| EX86_PREF_66 | EX86_SSE2_OP1, freg, reg2, reg2w));16571658if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {1659inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);1660FAIL_IF(!inst);1661INC_SIZE(3);16621663inst[0] = GROUP_0F;1664inst[1] = UNPCKLPS_x_xm;1665inst[2] = U8(MOD_REG | (freg_map[freg] << 3) | freg_map[reg == 0 ? freg : TMP_FREG]);1666} else1667FAIL_IF(emit_groupf(compiler, MOVD_rm_x | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw));16681669return SLJIT_SUCCESS;1670}16711672static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)1673{1674sljit_sw size;16751676/* Don't adjust shadow stack if it isn't enabled. */1677if (!cpu_has_shadow_stack())1678return SLJIT_SUCCESS;16791680SLJIT_ASSERT(compiler->args_size >= 0);1681SLJIT_ASSERT(compiler->local_size > 0);16821683size = compiler->local_size;1684size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)1685+ (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw);16861687return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);1688}168916901691