Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeX86_64.c
9913 views
/*1* Stack-less Just-In-Time compiler2*3* Copyright Zoltan Herczeg ([email protected]). All rights reserved.4*5* Redistribution and use in source and binary forms, with or without modification, are6* permitted provided that the following conditions are met:7*8* 1. Redistributions of source code must retain the above copyright notice, this list of9* conditions and the following disclaimer.10*11* 2. Redistributions in binary form must reproduce the above copyright notice, this list12* of conditions and the following disclaimer in the documentation and/or other materials13* provided with the distribution.14*15* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY16* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES17* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT18* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,19* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED20* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR21* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN22* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN23* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.24*/2526/* x86 64-bit arch dependent functions. */2728/* --------------------------------------------------------------------- */29/* Operators */30/* --------------------------------------------------------------------- */3132static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)33{34sljit_u8 *inst;3536inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));37FAIL_IF(!inst);38INC_SIZE(2 + sizeof(sljit_sw));39inst[0] = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);40inst[1] = U8(MOV_r_i32 | reg_lmap[reg]);41sljit_unaligned_store_sw(inst + 2, imm);42return SLJIT_SUCCESS;43}4445static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)46{47sljit_u8 *inst;48sljit_uw length = (rex ? 2 : 1) + sizeof(sljit_s32);4950inst = (sljit_u8*)ensure_buf(compiler, 1 + length);51FAIL_IF(!inst);52INC_SIZE(length);53if (rex)54*inst++ = rex;55*inst++ = opcode;56sljit_unaligned_store_s32(inst, (sljit_s32)imm);57return SLJIT_SUCCESS;58}5960static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,61/* The register or immediate operand. */62sljit_s32 a, sljit_sw imma,63/* The general operand (not immediate). */64sljit_s32 b, sljit_sw immb)65{66sljit_u8 *inst;67sljit_u8 *buf_ptr;68sljit_u8 rex = 0;69sljit_u8 reg_lmap_b;70sljit_uw flags = size;71sljit_uw inst_size;7273/* The immediate operand must be 32 bit. */74SLJIT_ASSERT(a != SLJIT_IMM || compiler->mode32 || IS_HALFWORD(imma));75/* Both cannot be switched on. */76SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));77/* Size flags not allowed for typed instructions. */78SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);79/* Both size flags cannot be switched on. */80SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));81/* SSE2 and immediate is not possible. */82SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2));83SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))84& ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);85SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT);8687size &= 0xf;88/* The mod r/m byte is always present. */89inst_size = size + 1;9091if (!compiler->mode32 && !(flags & EX86_NO_REXW))92rex |= REX_W;93else if (flags & EX86_REX)94rex |= REX;9596if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))97inst_size++;9899/* Calculate size of b. */100if (b & SLJIT_MEM) {101if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {102PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));103immb = 0;104if (b & REG_MASK)105b |= TO_OFFS_REG(TMP_REG2);106else107b |= TMP_REG2;108}109110if (!(b & REG_MASK))111inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */112else {113if (immb != 0 && !(b & OFFS_REG_MASK)) {114/* Immediate operand. */115if (immb <= 127 && immb >= -128)116inst_size += sizeof(sljit_s8);117else118inst_size += sizeof(sljit_s32);119} else if (reg_lmap[b & REG_MASK] == 5) {120/* Swap registers if possible. */121if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5)122b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);123else124inst_size += sizeof(sljit_s8);125}126127if (reg_map[b & REG_MASK] >= 8)128rex |= REX_B;129130if (reg_lmap[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))131b |= TO_OFFS_REG(SLJIT_SP);132133if (b & OFFS_REG_MASK) {134inst_size += 1; /* SIB byte. */135if (reg_map[OFFS_REG(b)] >= 8)136rex |= REX_X;137}138}139} else if (!(flags & EX86_SSE2_OP2)) {140if (reg_map[b] >= 8)141rex |= REX_B;142} else if (freg_map[b] >= 8)143rex |= REX_B;144145if ((flags & EX86_VEX_EXT) && (rex & 0x3)) {146SLJIT_ASSERT(size == 2);147size++;148inst_size++;149}150151if (a == SLJIT_IMM) {152if (flags & EX86_BIN_INS) {153if (imma <= 127 && imma >= -128) {154inst_size += 1;155flags |= EX86_BYTE_ARG;156} else157inst_size += 4;158} else if (flags & EX86_SHIFT_INS) {159SLJIT_ASSERT(imma <= (compiler->mode32 ? 0x1f : 0x3f));160if (imma != 1) {161inst_size++;162flags |= EX86_BYTE_ARG;163}164} else if (flags & EX86_BYTE_ARG)165inst_size++;166else if (flags & EX86_HALF_ARG)167inst_size += sizeof(short);168else169inst_size += sizeof(sljit_s32);170} else {171SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);172/* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */173if (!(flags & EX86_SSE2_OP1)) {174if (reg_map[a] >= 8)175rex |= REX_R;176}177else if (freg_map[a] >= 8)178rex |= REX_R;179}180181if (rex)182inst_size++;183184inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);185PTR_FAIL_IF(!inst);186187/* Encoding prefixes. */188INC_SIZE(inst_size);189if (flags & EX86_PREF_F2)190*inst++ = 0xf2;191else if (flags & EX86_PREF_F3)192*inst++ = 0xf3;193else if (flags & EX86_PREF_66)194*inst++ = 0x66;195196/* Rex is always the last prefix. */197if (rex)198*inst++ = rex;199200buf_ptr = inst + size;201202/* Encode mod/rm byte. */203if (!(flags & EX86_SHIFT_INS)) {204if ((flags & EX86_BIN_INS) && a == SLJIT_IMM)205*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;206207if (a == SLJIT_IMM)208*buf_ptr = 0;209else if (!(flags & EX86_SSE2_OP1))210*buf_ptr = U8(reg_lmap[a] << 3);211else212*buf_ptr = U8(freg_lmap[a] << 3);213} else {214if (a == SLJIT_IMM) {215if (imma == 1)216*inst = GROUP_SHIFT_1;217else218*inst = GROUP_SHIFT_N;219} else220*inst = GROUP_SHIFT_CL;221*buf_ptr = 0;222}223224if (!(b & SLJIT_MEM)) {225*buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_lmap[b] : freg_lmap[b]));226buf_ptr++;227} else if (b & REG_MASK) {228reg_lmap_b = reg_lmap[b & REG_MASK];229230if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {231if (immb != 0 || reg_lmap_b == 5) {232if (immb <= 127 && immb >= -128)233*buf_ptr |= 0x40;234else235*buf_ptr |= 0x80;236}237238if (!(b & OFFS_REG_MASK))239*buf_ptr++ |= reg_lmap_b;240else {241buf_ptr[0] |= 0x04;242buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3));243buf_ptr += 2;244}245246if (immb != 0 || reg_lmap_b == 5) {247if (immb <= 127 && immb >= -128)248*buf_ptr++ = U8(immb); /* 8 bit displacement. */249else {250sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */251buf_ptr += sizeof(sljit_s32);252}253}254} else {255if (reg_lmap_b == 5)256*buf_ptr |= 0x40;257258buf_ptr[0] |= 0x04;259buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));260buf_ptr += 2;261262if (reg_lmap_b == 5)263*buf_ptr++ = 0;264}265} else {266buf_ptr[0] |= 0x04;267buf_ptr[1] = 0x25;268buf_ptr += 2;269sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */270buf_ptr += sizeof(sljit_s32);271}272273if (a == SLJIT_IMM) {274if (flags & EX86_BYTE_ARG)275*buf_ptr = U8(imma);276else if (flags & EX86_HALF_ARG)277sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);278else if (!(flags & EX86_SHIFT_INS))279sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma);280}281282return inst;283}284285static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op,286/* The first and second register operand. */287sljit_s32 a, sljit_s32 v,288/* The general operand (not immediate). */289sljit_s32 b, sljit_sw immb)290{291sljit_u8 *inst;292sljit_u8 vex = 0;293sljit_u8 vex_m = 0;294sljit_uw size;295296SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))297& ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);298299op |= EX86_REX;300301if (op & VEX_OP_0F38)302vex_m = 0x2;303else if (op & VEX_OP_0F3A)304vex_m = 0x3;305306if ((op & VEX_W) || ((op & VEX_AUTO_W) && !compiler->mode32)) {307if (vex_m == 0)308vex_m = 0x1;309310vex |= 0x80;311}312313if (op & EX86_PREF_66)314vex |= 0x1;315else if (op & EX86_PREF_F2)316vex |= 0x3;317else if (op & EX86_PREF_F3)318vex |= 0x2;319320op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3);321322if (op & VEX_256)323vex |= 0x4;324325vex = U8(vex | ((((op & VEX_SSE2_OPV) ? freg_map[v] : reg_map[v]) ^ 0xf) << 3));326327size = op & ~(sljit_uw)0xff;328size |= (vex_m == 0) ? (EX86_VEX_EXT | 2) : 3;329330inst = emit_x86_instruction(compiler, size, a, 0, b, immb);331FAIL_IF(!inst);332333SLJIT_ASSERT((inst[-1] & 0xf0) == REX);334335/* If X or B is present in REX prefix. */336if (vex_m == 0 && inst[-1] & 0x3)337vex_m = 0x1;338339if (vex_m == 0) {340vex |= U8(((inst[-1] >> 2) ^ 0x1) << 7);341342inst[-1] = 0xc5;343inst[0] = vex;344inst[1] = U8(op);345return SLJIT_SUCCESS;346}347348vex_m |= U8((inst[-1] ^ 0x7) << 5);349inst[-1] = 0xc4;350inst[0] = vex_m;351inst[1] = vex;352inst[2] = U8(op);353return SLJIT_SUCCESS;354}355356/* --------------------------------------------------------------------- */357/* Enter / return */358/* --------------------------------------------------------------------- */359360static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr)361{362sljit_uw type = jump->flags >> TYPE_SHIFT;363364int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && (jump->flags & JUMP_ADDR) && (jump->u.target <= 0xffffffff);365366/* The relative jump below specialized for this case. */367SLJIT_ASSERT(reg_map[TMP_REG2] >= 8 && TMP_REG2 != SLJIT_TMP_DEST_REG);368369if (type < SLJIT_JUMP) {370/* Invert type. */371code_ptr[0] = U8(get_jump_code(type ^ 0x1) - 0x10);372code_ptr[1] = short_addr ? (6 + 3) : (10 + 3);373code_ptr += 2;374}375376code_ptr[0] = short_addr ? REX_B : (REX_W | REX_B);377code_ptr[1] = MOV_r_i32 | reg_lmap[TMP_REG2];378code_ptr += 2;379jump->addr = (sljit_uw)code_ptr;380381if (!(jump->flags & JUMP_ADDR))382jump->flags |= PATCH_MD;383else if (short_addr)384sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target);385else386sljit_unaligned_store_sw(code_ptr, (sljit_sw)jump->u.target);387388code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw);389390code_ptr[0] = REX_B;391code_ptr[1] = GROUP_FF;392code_ptr[2] = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]);393394return code_ptr + 3;395}396397static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)398{399sljit_uw addr;400sljit_sw diff;401SLJIT_UNUSED_ARG(executable_offset);402403SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) <= 10);404if (jump->flags & JUMP_ADDR)405addr = jump->u.target;406else407addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + jump->u.label->size;408409if (addr > 0xffffffffl) {410diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);411412if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN) {413SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 7);414code_ptr -= SSIZE_OF(s32) - 1;415416SLJIT_ASSERT((code_ptr[-3 - SSIZE_OF(s32)] & 0xf8) == REX_W);417SLJIT_ASSERT((code_ptr[-2 - SSIZE_OF(s32)] & 0xf8) == MOV_r_i32);418419code_ptr[-3 - SSIZE_OF(s32)] = U8(REX_W | ((code_ptr[-3 - SSIZE_OF(s32)] & 0x1) << 2));420code_ptr[-1 - SSIZE_OF(s32)] = U8(((code_ptr[-2 - SSIZE_OF(s32)] & 0x7) << 3) | 0x5);421code_ptr[-2 - SSIZE_OF(s32)] = LEA_r_m;422423jump->flags |= PATCH_MW;424return code_ptr;425}426427jump->flags |= PATCH_MD;428return code_ptr;429}430431code_ptr -= 2 + sizeof(sljit_uw);432433SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);434SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);435436if ((code_ptr[0] & 0x07) != 0) {437SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 6);438code_ptr[0] = U8(code_ptr[0] & ~0x08);439code_ptr += 2 + sizeof(sljit_s32);440} else {441SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 5);442code_ptr[0] = code_ptr[1];443code_ptr += 1 + sizeof(sljit_s32);444}445446return code_ptr;447}448449#ifdef _WIN64450typedef struct {451sljit_sw regs[2];452} sljit_sse2_reg;453#endif /* _WIN64 */454455SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,456sljit_s32 options, sljit_s32 arg_types,457sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)458{459sljit_uw size;460sljit_s32 word_arg_count = 0;461sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);462sljit_s32 saved_regs_size, tmp, i;463#ifdef _WIN64464sljit_s32 fscratches;465sljit_s32 fsaveds;466sljit_s32 saved_float_regs_size;467sljit_s32 saved_float_regs_offset = 0;468sljit_s32 float_arg_count = 0;469#endif /* _WIN64 */470sljit_u8 *inst;471472CHECK_ERROR();473CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));474set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);475476scratches = ENTER_GET_REGS(scratches);477#ifdef _WIN64478saveds = ENTER_GET_REGS(saveds);479fscratches = compiler->fscratches;480fsaveds = compiler->fsaveds;481#endif /* _WIN64 */482483if (options & SLJIT_ENTER_REG_ARG)484arg_types = 0;485486/* Emit ENDBR64 at function entry if needed. */487FAIL_IF(emit_endbranch(compiler));488489compiler->mode32 = 0;490491/* Including the return address saved by the call instruction. */492saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);493494tmp = SLJIT_S0 - saveds;495for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {496size = reg_map[i] >= 8 ? 2 : 1;497inst = (sljit_u8*)ensure_buf(compiler, 1 + size);498FAIL_IF(!inst);499INC_SIZE(size);500if (reg_map[i] >= 8)501*inst++ = REX_B;502PUSH_REG(reg_lmap[i]);503}504505for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {506size = reg_map[i] >= 8 ? 2 : 1;507inst = (sljit_u8*)ensure_buf(compiler, 1 + size);508FAIL_IF(!inst);509INC_SIZE(size);510if (reg_map[i] >= 8)511*inst++ = REX_B;512PUSH_REG(reg_lmap[i]);513}514515#ifdef _WIN64516local_size += SLJIT_LOCALS_OFFSET;517saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);518519if (saved_float_regs_size > 0) {520saved_float_regs_offset = ((local_size + 0xf) & ~0xf);521local_size = saved_float_regs_offset + saved_float_regs_size;522}523#else /* !_WIN64 */524SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);525#endif /* _WIN64 */526527arg_types >>= SLJIT_ARG_SHIFT;528529while (arg_types > 0) {530if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {531tmp = 0;532#ifndef _WIN64533switch (word_arg_count) {534case 0:535tmp = SLJIT_R2;536break;537case 1:538tmp = SLJIT_R1;539break;540case 2:541tmp = TMP_REG1;542break;543default:544tmp = SLJIT_R3;545break;546}547#else /* !_WIN64 */548switch (word_arg_count + float_arg_count) {549case 0:550tmp = SLJIT_R3;551break;552case 1:553tmp = SLJIT_R1;554break;555case 2:556tmp = SLJIT_R2;557break;558default:559tmp = TMP_REG1;560break;561}562#endif /* _WIN64 */563if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {564if (tmp != SLJIT_R0 + word_arg_count)565EMIT_MOV(compiler, SLJIT_R0 + word_arg_count, 0, tmp, 0);566} else {567EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, tmp, 0);568saved_arg_count++;569}570word_arg_count++;571} else {572#ifdef _WIN64573SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);574float_arg_count++;575if (float_arg_count != float_arg_count + word_arg_count)576FAIL_IF(emit_sse2_load(compiler, (arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32,577float_arg_count, float_arg_count + word_arg_count, 0));578#endif /* _WIN64 */579}580arg_types >>= SLJIT_ARG_SHIFT;581}582583local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;584compiler->local_size = local_size;585586#ifdef _WIN64587if (local_size > 0) {588if (local_size <= 4 * 4096) {589if (local_size > 4096)590EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);591if (local_size > 2 * 4096)592EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);593if (local_size > 3 * 4096)594EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);595}596else {597EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, local_size >> 12);598599EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_SP), -4096);600BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);601BINARY_IMM32(SUB, 1, TMP_REG1, 0);602603inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);604FAIL_IF(!inst);605606INC_SIZE(2);607inst[0] = JNE_i8;608inst[1] = (sljit_u8)-21;609local_size &= 0xfff;610}611612if (local_size > 0)613EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);614}615#endif /* _WIN64 */616617if (local_size > 0)618BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);619620#ifdef _WIN64621if (saved_float_regs_size > 0) {622compiler->mode32 = 1;623624tmp = SLJIT_FS0 - fsaveds;625for (i = SLJIT_FS0; i > tmp; i--) {626FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));627saved_float_regs_offset += 16;628}629630for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {631FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));632saved_float_regs_offset += 16;633}634}635#endif /* _WIN64 */636637return SLJIT_SUCCESS;638}639640SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,641sljit_s32 options, sljit_s32 arg_types,642sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)643{644sljit_s32 saved_regs_size;645#ifdef _WIN64646sljit_s32 fscratches;647sljit_s32 fsaveds;648sljit_s32 saved_float_regs_size;649#endif /* _WIN64 */650651CHECK_ERROR();652CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));653set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);654655scratches = ENTER_GET_REGS(scratches);656657#ifdef _WIN64658saveds = ENTER_GET_REGS(saveds);659fscratches = compiler->fscratches;660fsaveds = compiler->fsaveds;661662local_size += SLJIT_LOCALS_OFFSET;663saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);664665if (saved_float_regs_size > 0)666local_size = ((local_size + 0xf) & ~0xf) + saved_float_regs_size;667#else /* !_WIN64 */668SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);669#endif /* _WIN64 */670671/* Including the return address saved by the call instruction. */672saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);673compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;674return SLJIT_SUCCESS;675}676677static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)678{679sljit_uw size;680sljit_s32 local_size, i, tmp;681sljit_u8 *inst;682#ifdef _WIN64683sljit_s32 saved_float_regs_offset;684sljit_s32 fscratches = compiler->fscratches;685sljit_s32 fsaveds = compiler->fsaveds;686#endif /* _WIN64 */687688#ifdef _WIN64689saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);690691if (saved_float_regs_offset > 0) {692compiler->mode32 = 1;693saved_float_regs_offset = (compiler->local_size - saved_float_regs_offset) & ~0xf;694695tmp = SLJIT_FS0 - fsaveds;696for (i = SLJIT_FS0; i > tmp; i--) {697FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));698saved_float_regs_offset += 16;699}700701for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {702FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));703saved_float_regs_offset += 16;704}705706compiler->mode32 = 0;707}708#endif /* _WIN64 */709710local_size = compiler->local_size;711712if (is_return_to && compiler->scratches < SLJIT_FIRST_SAVED_REG && (compiler->saveds == SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {713local_size += SSIZE_OF(sw);714is_return_to = 0;715}716717if (local_size > 0)718BINARY_IMM32(ADD, local_size, SLJIT_SP, 0);719720tmp = compiler->scratches;721for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {722size = reg_map[i] >= 8 ? 2 : 1;723inst = (sljit_u8*)ensure_buf(compiler, 1 + size);724FAIL_IF(!inst);725INC_SIZE(size);726if (reg_map[i] >= 8)727*inst++ = REX_B;728POP_REG(reg_lmap[i]);729}730731tmp = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);732for (i = SLJIT_S0 + 1 - compiler->saveds; i <= tmp; i++) {733size = reg_map[i] >= 8 ? 2 : 1;734inst = (sljit_u8*)ensure_buf(compiler, 1 + size);735FAIL_IF(!inst);736INC_SIZE(size);737if (reg_map[i] >= 8)738*inst++ = REX_B;739POP_REG(reg_lmap[i]);740}741742if (is_return_to)743BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0);744745return SLJIT_SUCCESS;746}747748SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)749{750CHECK_ERROR();751CHECK(check_sljit_emit_return_void(compiler));752753compiler->mode32 = 0;754755FAIL_IF(emit_stack_frame_release(compiler, 0));756return emit_byte(compiler, RET_near);757}758759SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,760sljit_s32 src, sljit_sw srcw)761{762CHECK_ERROR();763CHECK(check_sljit_emit_return_to(compiler, src, srcw));764765compiler->mode32 = 0;766767if ((src & SLJIT_MEM) || (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {768ADJUST_LOCAL_OFFSET(src, srcw);769770EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);771src = TMP_REG2;772srcw = 0;773}774775FAIL_IF(emit_stack_frame_release(compiler, 1));776777SLJIT_SKIP_CHECKS(compiler);778return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);779}780781/* --------------------------------------------------------------------- */782/* Call / return instructions */783/* --------------------------------------------------------------------- */784785#ifndef _WIN64786787static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)788{789sljit_s32 src = src_ptr ? (*src_ptr) : 0;790sljit_s32 word_arg_count = 0;791792SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2);793SLJIT_ASSERT(!(src & SLJIT_MEM));794795/* Remove return value. */796arg_types >>= SLJIT_ARG_SHIFT;797798while (arg_types) {799if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)800word_arg_count++;801arg_types >>= SLJIT_ARG_SHIFT;802}803804if (word_arg_count == 0)805return SLJIT_SUCCESS;806807if (word_arg_count >= 3) {808if (src == SLJIT_R2)809*src_ptr = TMP_REG1;810EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0);811}812813return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0);814}815816#else817818static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)819{820sljit_s32 src = src_ptr ? (*src_ptr) : 0;821sljit_s32 arg_count = 0;822sljit_s32 word_arg_count = 0;823sljit_s32 float_arg_count = 0;824sljit_s32 types = 0;825sljit_s32 data_trandfer = 0;826static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 };827828SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9);829SLJIT_ASSERT(!(src & SLJIT_MEM));830831arg_types >>= SLJIT_ARG_SHIFT;832833while (arg_types) {834types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);835836switch (arg_types & SLJIT_ARG_MASK) {837case SLJIT_ARG_TYPE_F64:838case SLJIT_ARG_TYPE_F32:839arg_count++;840float_arg_count++;841842if (arg_count != float_arg_count)843data_trandfer = 1;844break;845default:846arg_count++;847word_arg_count++;848849if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) {850data_trandfer = 1;851852if (src == word_arg_regs[arg_count]) {853EMIT_MOV(compiler, TMP_REG2, 0, src, 0);854*src_ptr = TMP_REG2;855}856}857break;858}859860arg_types >>= SLJIT_ARG_SHIFT;861}862863if (!data_trandfer)864return SLJIT_SUCCESS;865866while (types) {867switch (types & SLJIT_ARG_MASK) {868case SLJIT_ARG_TYPE_F64:869if (arg_count != float_arg_count)870FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0));871arg_count--;872float_arg_count--;873break;874case SLJIT_ARG_TYPE_F32:875if (arg_count != float_arg_count)876FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0));877arg_count--;878float_arg_count--;879break;880default:881if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count])882EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0);883arg_count--;884word_arg_count--;885break;886}887888types >>= SLJIT_ARG_SHIFT;889}890891return SLJIT_SUCCESS;892}893894#endif895896SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,897sljit_s32 arg_types)898{899CHECK_ERROR_PTR();900CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));901902compiler->mode32 = 0;903904if ((type & 0xff) != SLJIT_CALL_REG_ARG)905PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));906907if (type & SLJIT_CALL_RETURN) {908PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));909type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);910}911912SLJIT_SKIP_CHECKS(compiler);913return sljit_emit_jump(compiler, type);914}915916SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,917sljit_s32 arg_types,918sljit_s32 src, sljit_sw srcw)919{920CHECK_ERROR();921CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));922923compiler->mode32 = 0;924925if (src & SLJIT_MEM) {926ADJUST_LOCAL_OFFSET(src, srcw);927EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);928src = TMP_REG2;929}930931if (type & SLJIT_CALL_RETURN) {932if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {933EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);934src = TMP_REG2;935}936937FAIL_IF(emit_stack_frame_release(compiler, 0));938}939940if ((type & 0xff) != SLJIT_CALL_REG_ARG)941FAIL_IF(call_with_args(compiler, arg_types, &src));942943if (type & SLJIT_CALL_RETURN)944type = SLJIT_JUMP;945946SLJIT_SKIP_CHECKS(compiler);947return sljit_emit_ijump(compiler, type, src, srcw);948}949950static sljit_s32 emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)951{952sljit_u8 *inst;953954if (FAST_IS_REG(dst)) {955if (reg_map[dst] < 8)956return emit_byte(compiler, U8(POP_r + reg_lmap[dst]));957958inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);959FAIL_IF(!inst);960INC_SIZE(2);961*inst++ = REX_B;962POP_REG(reg_lmap[dst]);963return SLJIT_SUCCESS;964}965966/* REX_W is not necessary (src is not immediate). */967compiler->mode32 = 1;968inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);969FAIL_IF(!inst);970*inst = POP_rm;971return SLJIT_SUCCESS;972}973974static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)975{976sljit_u8 *inst;977978if (FAST_IS_REG(src)) {979if (reg_map[src] < 8) {980inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);981FAIL_IF(!inst);982983INC_SIZE(1 + 1);984PUSH_REG(reg_lmap[src]);985}986else {987inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);988FAIL_IF(!inst);989990INC_SIZE(2 + 1);991*inst++ = REX_B;992PUSH_REG(reg_lmap[src]);993}994}995else {996/* REX_W is not necessary (src is not immediate). */997compiler->mode32 = 1;998inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);999FAIL_IF(!inst);1000inst[0] = GROUP_FF;1001inst[1] |= PUSH_rm;10021003inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);1004FAIL_IF(!inst);1005INC_SIZE(1);1006}10071008RET();1009return SLJIT_SUCCESS;1010}10111012static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler,1013sljit_s32 dst, sljit_sw dstw)1014{1015sljit_s32 saved_regs_size;10161017compiler->mode32 = 0;1018saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);1019return emit_mov(compiler, dst, dstw, SLJIT_MEM1(SLJIT_SP), compiler->local_size + saved_regs_size);1020}10211022/* --------------------------------------------------------------------- */1023/* Other operations */1024/* --------------------------------------------------------------------- */10251026SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,1027sljit_s32 dst_reg,1028sljit_s32 src1, sljit_sw src1w,1029sljit_s32 src2_reg)1030{1031CHECK_ERROR();1032CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));10331034ADJUST_LOCAL_OFFSET(src1, src1w);10351036compiler->mode32 = type & SLJIT_32;1037type &= ~SLJIT_32;10381039if (dst_reg != src2_reg) {1040if (dst_reg == src1) {1041src1 = src2_reg;1042src1w = 0;1043type ^= 0x1;1044} else if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {1045EMIT_MOV(compiler, dst_reg, 0, src1, src1w);1046src1 = src2_reg;1047src1w = 0;1048type ^= 0x1;1049} else1050EMIT_MOV(compiler, dst_reg, 0, src2_reg, 0);1051}10521053if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) {1054if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {1055EMIT_MOV(compiler, TMP_REG2, 0, src1, src1w);1056src1 = TMP_REG2;1057src1w = 0;1058}10591060return emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w);1061}10621063return emit_cmov_generic(compiler, type, dst_reg, src1, src1w);1064}10651066SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,1067sljit_s32 reg,1068sljit_s32 mem, sljit_sw memw)1069{1070sljit_u8* inst;1071sljit_s32 i, next, reg_idx;1072sljit_u8 regs[2];10731074CHECK_ERROR();1075CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));10761077if (!(reg & REG_PAIR_MASK))1078return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);10791080ADJUST_LOCAL_OFFSET(mem, memw);10811082compiler->mode32 = 0;10831084if ((mem & REG_MASK) == 0) {1085EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);10861087mem = SLJIT_MEM1(TMP_REG1);1088memw = 0;1089} else if (!(mem & OFFS_REG_MASK) && ((memw < HALFWORD_MIN) || (memw > HALFWORD_MAX - SSIZE_OF(sw)))) {1090EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);10911092mem = SLJIT_MEM2(mem & REG_MASK, TMP_REG1);1093memw = 0;1094}10951096regs[0] = U8(REG_PAIR_FIRST(reg));1097regs[1] = U8(REG_PAIR_SECOND(reg));10981099next = SSIZE_OF(sw);11001101if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) {1102if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) {1103/* Base and offset cannot be TMP_REG1. */1104EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0);11051106if (regs[1] == OFFS_REG(mem))1107next = -SSIZE_OF(sw);11081109mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);1110} else {1111next = -SSIZE_OF(sw);11121113if (!(mem & OFFS_REG_MASK))1114memw += SSIZE_OF(sw);1115}1116}11171118for (i = 0; i < 2; i++) {1119reg_idx = next > 0 ? i : (i ^ 0x1);1120reg = regs[reg_idx];11211122if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) {1123inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 5));1124FAIL_IF(!inst);11251126INC_SIZE(5);11271128inst[0] = U8(REX_W | ((reg_map[reg] >= 8) ? REX_R : 0) | ((reg_map[mem & REG_MASK] >= 8) ? REX_B : 0) | ((reg_map[OFFS_REG(mem)] >= 8) ? REX_X : 0));1129inst[1] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm;1130inst[2] = 0x44 | U8(reg_lmap[reg] << 3);1131inst[3] = U8(memw << 6) | U8(reg_lmap[OFFS_REG(mem)] << 3) | reg_lmap[mem & REG_MASK];1132inst[4] = sizeof(sljit_sw);1133} else if (type & SLJIT_MEM_STORE) {1134EMIT_MOV(compiler, mem, memw, reg, 0);1135} else {1136EMIT_MOV(compiler, reg, 0, mem, memw);1137}11381139if (!(mem & OFFS_REG_MASK))1140memw += next;1141}11421143return SLJIT_SUCCESS;1144}11451146static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,1147sljit_s32 dst, sljit_sw dstw,1148sljit_s32 src, sljit_sw srcw)1149{1150sljit_u8* inst;1151sljit_s32 dst_r;11521153compiler->mode32 = 0;11541155if (src == SLJIT_IMM) {1156if (FAST_IS_REG(dst)) {1157if (!sign || ((sljit_u32)srcw <= 0x7fffffff))1158return emit_do_imm32(compiler, reg_map[dst] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[dst]), srcw);11591160inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);1161FAIL_IF(!inst);1162*inst = MOV_rm_i32;1163return SLJIT_SUCCESS;1164}1165compiler->mode32 = 1;1166inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);1167FAIL_IF(!inst);1168*inst = MOV_rm_i32;1169compiler->mode32 = 0;1170return SLJIT_SUCCESS;1171}11721173dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;11741175if ((dst & SLJIT_MEM) && FAST_IS_REG(src))1176dst_r = src;1177else {1178if (sign) {1179inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);1180FAIL_IF(!inst);1181*inst = MOVSXD_r_rm;1182} else {1183compiler->mode32 = 1;1184EMIT_MOV(compiler, dst_r, 0, src, srcw);1185compiler->mode32 = 0;1186}1187}11881189if (dst & SLJIT_MEM) {1190compiler->mode32 = 1;1191inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);1192FAIL_IF(!inst);1193*inst = MOV_rm_r;1194compiler->mode32 = 0;1195}11961197return SLJIT_SUCCESS;1198}11991200static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,1201sljit_s32 dst, sljit_sw dstw,1202sljit_s32 src, sljit_sw srcw)1203{1204sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;1205sljit_u8 *inst, *jump_inst1, *jump_inst2;1206sljit_uw size1, size2;12071208compiler->mode32 = 0;12091210if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) {1211if (src != SLJIT_IMM) {1212compiler->mode32 = 1;1213EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);1214compiler->mode32 = 0;1215} else1216FAIL_IF(emit_do_imm32(compiler, reg_map[TMP_REG1] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[TMP_REG1]), srcw));12171218FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));12191220compiler->mode32 = 1;12211222if (dst_r == TMP_FREG)1223return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);1224return SLJIT_SUCCESS;1225}12261227if (!FAST_IS_REG(src)) {1228EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);1229src = TMP_REG1;1230}12311232BINARY_IMM32(CMP, 0, src, 0);12331234inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1235FAIL_IF(!inst);1236INC_SIZE(2);1237inst[0] = JL_i8;1238jump_inst1 = inst;12391240size1 = compiler->size;12411242compiler->mode32 = 0;1243FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0));12441245inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1246FAIL_IF(!inst);1247INC_SIZE(2);1248inst[0] = JMP_i8;1249jump_inst2 = inst;12501251size2 = compiler->size;12521253jump_inst1[1] = U8(size2 - size1);12541255if (src != TMP_REG1)1256EMIT_MOV(compiler, TMP_REG1, 0, src, 0);12571258EMIT_MOV(compiler, TMP_REG2, 0, src, 0);12591260inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);1261FAIL_IF(!inst);1262inst[1] |= SHR;12631264compiler->mode32 = 1;1265BINARY_IMM32(AND, 1, TMP_REG2, 0);12661267compiler->mode32 = 0;1268inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG2, 0);1269FAIL_IF(!inst);1270inst[0] = OR_r_rm;12711272FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));1273compiler->mode32 = 1;1274FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0));12751276jump_inst2[1] = U8(compiler->size - size2);12771278if (dst_r == TMP_FREG)1279return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);1280return SLJIT_SUCCESS;1281}12821283static sljit_s32 sljit_emit_fset(struct sljit_compiler *compiler,1284sljit_s32 freg, sljit_u8 rex, sljit_s32 is_zero)1285{1286sljit_u8 *inst;1287sljit_u32 size;12881289if (is_zero) {1290rex = freg_map[freg] >= 8 ? (REX_R | REX_B) : 0;1291} else {1292if (freg_map[freg] >= 8)1293rex |= REX_R;1294if (reg_map[TMP_REG1] >= 8)1295rex |= REX_B;1296}12971298size = (rex != 0) ? 5 : 4;12991300inst = (sljit_u8*)ensure_buf(compiler, 1 + size);1301FAIL_IF(!inst);1302INC_SIZE(size);13031304*inst++ = GROUP_66;1305if (rex != 0)1306*inst++ = rex;1307inst[0] = GROUP_0F;13081309if (is_zero) {1310inst[1] = PXOR_x_xm;1311inst[2] = U8(freg_lmap[freg] | (freg_lmap[freg] << 3) | MOD_REG);1312} else {1313inst[1] = MOVD_x_rm;1314inst[2] = U8(reg_lmap[TMP_REG1] | (freg_lmap[freg] << 3) | MOD_REG);1315}13161317return SLJIT_SUCCESS;1318}13191320SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,1321sljit_s32 freg, sljit_f32 value)1322{1323union {1324sljit_s32 imm;1325sljit_f32 value;1326} u;13271328CHECK_ERROR();1329CHECK(check_sljit_emit_fset32(compiler, freg, value));13301331u.value = value;13321333if (u.imm != 0) {1334compiler->mode32 = 1;1335EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm);1336}13371338return sljit_emit_fset(compiler, freg, 0, u.imm == 0);1339}13401341SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,1342sljit_s32 freg, sljit_f64 value)1343{1344union {1345sljit_sw imm;1346sljit_f64 value;1347} u;13481349CHECK_ERROR();1350CHECK(check_sljit_emit_fset64(compiler, freg, value));13511352u.value = value;13531354if (u.imm != 0) {1355compiler->mode32 = 0;1356EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm);1357}13581359return sljit_emit_fset(compiler, freg, REX_W, u.imm == 0);1360}13611362SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,1363sljit_s32 freg, sljit_s32 reg)1364{1365sljit_u8 *inst;1366sljit_u32 size;1367sljit_u8 rex = 0;13681369CHECK_ERROR();1370CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));13711372if (!(op & SLJIT_32))1373rex = REX_W;13741375if (freg_map[freg] >= 8)1376rex |= REX_R;13771378if (reg_map[reg] >= 8)1379rex |= REX_B;13801381size = (rex != 0) ? 5 : 4;13821383inst = (sljit_u8*)ensure_buf(compiler, 1 + size);1384FAIL_IF(!inst);1385INC_SIZE(size);13861387*inst++ = GROUP_66;1388if (rex != 0)1389*inst++ = rex;1390inst[0] = GROUP_0F;1391inst[1] = GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x;1392inst[2] = U8(reg_lmap[reg] | (freg_lmap[freg] << 3) | MOD_REG);13931394return SLJIT_SUCCESS;1395}13961397static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)1398{1399sljit_s32 tmp, size;14001401/* Don't adjust shadow stack if it isn't enabled. */1402if (!cpu_has_shadow_stack())1403return SLJIT_SUCCESS;14041405size = compiler->local_size;1406tmp = compiler->scratches;1407if (tmp >= SLJIT_FIRST_SAVED_REG)1408size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * SSIZE_OF(sw);1409tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;1410if (SLJIT_S0 >= tmp)1411size += (SLJIT_S0 - tmp + 1) * SSIZE_OF(sw);14121413return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);1414}141514161417