Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeX86_common.c
22467 views
/*1* Stack-less Just-In-Time compiler2*3* Copyright Zoltan Herczeg ([email protected]). All rights reserved.4*5* Redistribution and use in source and binary forms, with or without modification, are6* permitted provided that the following conditions are met:7*8* 1. Redistributions of source code must retain the above copyright notice, this list of9* conditions and the following disclaimer.10*11* 2. Redistributions in binary form must reproduce the above copyright notice, this list12* of conditions and the following disclaimer in the documentation and/or other materials13* provided with the distribution.14*15* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY16* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES17* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT18* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,19* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED20* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR21* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN22* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN23* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.24*/2526SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)27{28return "x86" SLJIT_CPUINFO;29}3031/*3232b register indexes:330 - EAX341 - ECX352 - EDX363 - EBX374 - ESP385 - EBP396 - ESI407 - EDI41*/4243/*4464b register indexes:450 - RAX461 - RCX472 - RDX483 - RBX494 - RSP505 - RBP516 - RSI527 - RDI538 - R8 - From now on REX prefix is required549 - R95510 - R105611 - R115712 - R125813 - R135914 - R146015 - R1561*/6263#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)64#define TMP_FREG (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)6566#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)6768static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {690, 0, 2, 1, 0, 0, 0, 0, 0, 0, 5, 7, 6, 4, 370};7172static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {730, 1, 2, 3, 4, 5, 6, 7, 074};7576#define CHECK_EXTRA_REGS(p, w, do) \77if (p >= SLJIT_R3 && p <= SLJIT_S3) { \78w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \79p = SLJIT_MEM1(SLJIT_SP); \80do; \81}8283#else /* SLJIT_CONFIG_X86_32 */8485#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)8687/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present88Note: avoid to use r12 and r13 for memory addressing89therefore r12 is better to be a higher saved register. */90#ifndef _WIN6491/* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */92static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {930, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 994};95/* low-map. reg_map & 0x7. */96static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {970, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 198};99#else100/* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */101static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {1020, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10103};104/* low-map. reg_map & 0x7. */105static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {1060, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2107};108#endif109110/* Args: xmm0-xmm3 */111static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {1120, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4113};114/* low-map. freg_map & 0x7. */115static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {1160, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 4117};118119#define REX_W 0x48120#define REX_R 0x44121#define REX_X 0x42122#define REX_B 0x41123#define REX 0x40124125#ifndef _WIN64126#define HALFWORD_MAX 0x7fffffffl127#define HALFWORD_MIN -0x80000000l128#else129#define HALFWORD_MAX 0x7fffffffll130#define HALFWORD_MIN -0x80000000ll131#endif132133#define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)134#define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)135136#define CHECK_EXTRA_REGS(p, w, do)137138#endif /* SLJIT_CONFIG_X86_32 */139140#define U8(v) ((sljit_u8)(v))141142/* Size flags for emit_x86_instruction: */143#define EX86_BIN_INS ((sljit_uw)0x000010)144#define EX86_SHIFT_INS ((sljit_uw)0x000020)145#define EX86_BYTE_ARG ((sljit_uw)0x000040)146#define EX86_HALF_ARG ((sljit_uw)0x000080)147/* Size flags for both emit_x86_instruction and emit_vex_instruction: */148#define EX86_REX ((sljit_uw)0x000100)149#define EX86_NO_REXW ((sljit_uw)0x000200)150#define EX86_PREF_66 ((sljit_uw)0x000400)151#define EX86_PREF_F2 ((sljit_uw)0x000800)152#define EX86_PREF_F3 ((sljit_uw)0x001000)153#define EX86_SSE2_OP1 ((sljit_uw)0x002000)154#define EX86_SSE2_OP2 ((sljit_uw)0x004000)155#define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)156#define EX86_VEX_EXT ((sljit_uw)0x008000)157/* Op flags for emit_vex_instruction: */158#define VEX_OP_0F38 ((sljit_uw)0x010000)159#define VEX_OP_0F3A ((sljit_uw)0x020000)160#define VEX_SSE2_OPV ((sljit_uw)0x040000)161#define VEX_AUTO_W ((sljit_uw)0x080000)162#define VEX_W ((sljit_uw)0x100000)163#define VEX_256 ((sljit_uw)0x200000)164165#define EX86_SELECT_66(op) (((op) & SLJIT_32) ? 0 : EX86_PREF_66)166#define EX86_SELECT_F2_F3(op) (((op) & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2)167168/* --------------------------------------------------------------------- */169/* Instruction forms */170/* --------------------------------------------------------------------- */171172#define ADD (/* BINARY */ 0 << 3)173#define ADD_EAX_i32 0x05174#define ADD_r_rm 0x03175#define ADD_rm_r 0x01176#define ADDSD_x_xm 0x58177#define ADC (/* BINARY */ 2 << 3)178#define ADC_EAX_i32 0x15179#define ADC_r_rm 0x13180#define ADC_rm_r 0x11181#define AND (/* BINARY */ 4 << 3)182#define AND_EAX_i32 0x25183#define AND_r_rm 0x23184#define AND_rm_r 0x21185#define ANDPD_x_xm 0x54186#define BSR_r_rm (/* GROUP_0F */ 0xbd)187#define BSF_r_rm (/* GROUP_0F */ 0xbc)188#define BSWAP_r (/* GROUP_0F */ 0xc8)189#define CALL_i32 0xe8190#define CALL_rm (/* GROUP_FF */ 2 << 3)191#define CDQ 0x99192#define CMOVE_r_rm (/* GROUP_0F */ 0x44)193#define CMP (/* BINARY */ 7 << 3)194#define CMP_EAX_i32 0x3d195#define CMP_r_rm 0x3b196#define CMP_rm_r 0x39197#define CMPS_x_xm 0xc2198#define CMPXCHG_rm_r 0xb1199#define CMPXCHG_rm8_r 0xb0200#define CVTPD2PS_x_xm 0x5a201#define CVTPS2PD_x_xm 0x5a202#define CVTSI2SD_x_rm 0x2a203#define CVTTSD2SI_r_xm 0x2c204#define DIV (/* GROUP_F7 */ 6 << 3)205#define DIVSD_x_xm 0x5e206#define EXTRACTPS_x_xm 0x17207#define FLDS 0xd9208#define FLDL 0xdd209#define FSTPS 0xd9210#define FSTPD 0xdd211#define INSERTPS_x_xm 0x21212#define INT3 0xcc213#define IDIV (/* GROUP_F7 */ 7 << 3)214#define IMUL (/* GROUP_F7 */ 5 << 3)215#define IMUL_r_rm (/* GROUP_0F */ 0xaf)216#define IMUL_r_rm_i8 0x6b217#define IMUL_r_rm_i32 0x69218#define JL_i8 0x7c219#define JE_i8 0x74220#define JNC_i8 0x73221#define JNE_i8 0x75222#define JMP_i8 0xeb223#define JMP_i32 0xe9224#define JMP_rm (/* GROUP_FF */ 4 << 3)225#define LEA_r_m 0x8d226#define LOOP_i8 0xe2227#define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd)228#define MOV_r_rm 0x8b229#define MOV_r_i32 0xb8230#define MOV_rm_r 0x89231#define MOV_rm_i32 0xc7232#define MOV_rm8_i8 0xc6233#define MOV_rm8_r8 0x88234#define MOVAPS_x_xm 0x28235#define MOVAPS_xm_x 0x29236#define MOVD_x_rm 0x6e237#define MOVD_rm_x 0x7e238#define MOVDDUP_x_xm 0x12239#define MOVDQA_x_xm 0x6f240#define MOVDQA_xm_x 0x7f241#define MOVDQU_x_xm 0x6f242#define MOVHLPS_x_x 0x12243#define MOVHPD_m_x 0x17244#define MOVHPD_x_m 0x16245#define MOVLHPS_x_x 0x16246#define MOVLPD_m_x 0x13247#define MOVLPD_x_m 0x12248#define MOVMSKPS_r_x (/* GROUP_0F */ 0x50)249#define MOVQ_x_xm (/* GROUP_0F */ 0x7e)250#define MOVSD_x_xm 0x10251#define MOVSD_xm_x 0x11252#define MOVSHDUP_x_xm 0x16253#define MOVSXD_r_rm 0x63254#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)255#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)256#define MOVUPS_x_xm 0x10257#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)258#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)259#define MUL (/* GROUP_F7 */ 4 << 3)260#define MULSD_x_xm 0x59261#define NEG_rm (/* GROUP_F7 */ 3 << 3)262#define NOP 0x90263#define NOT_rm (/* GROUP_F7 */ 2 << 3)264#define OR (/* BINARY */ 1 << 3)265#define OR_r_rm 0x0b266#define OR_EAX_i32 0x0d267#define OR_rm_r 0x09268#define OR_rm8_r8 0x08269#define ORPD_x_xm 0x56270#define PACKSSWB_x_xm (/* GROUP_0F */ 0x63)271#define PAND_x_xm 0xdb272#define PCMPEQD_x_xm 0x76273#define PINSRB_x_rm_i8 0x20274#define PINSRW_x_rm_i8 0xc4275#define PINSRD_x_rm_i8 0x22276#define PEXTRB_rm_x_i8 0x14277#define PEXTRW_rm_x_i8 0x15278#define PEXTRD_rm_x_i8 0x16279#define PMOVMSKB_r_x (/* GROUP_0F */ 0xd7)280#define PMOVSXBD_x_xm 0x21281#define PMOVSXBQ_x_xm 0x22282#define PMOVSXBW_x_xm 0x20283#define PMOVSXDQ_x_xm 0x25284#define PMOVSXWD_x_xm 0x23285#define PMOVSXWQ_x_xm 0x24286#define PMOVZXBD_x_xm 0x31287#define PMOVZXBQ_x_xm 0x32288#define PMOVZXBW_x_xm 0x30289#define PMOVZXDQ_x_xm 0x35290#define PMOVZXWD_x_xm 0x33291#define PMOVZXWQ_x_xm 0x34292#define POP_r 0x58293#define POP_rm 0x8f294#define POPF 0x9d295#define POR_x_xm 0xeb296#define PREFETCH 0x18297#define PSHUFB_x_xm 0x00298#define PSHUFD_x_xm 0x70299#define PSHUFLW_x_xm 0x70300#define PSRLDQ_x 0x73301#define PSLLD_x_i8 0x72302#define PSLLQ_x_i8 0x73303#define PUSH_i32 0x68304#define PUSH_r 0x50305#define PUSH_rm (/* GROUP_FF */ 6 << 3)306#define PUSHF 0x9c307#define PXOR_x_xm 0xef308#define ROL (/* SHIFT */ 0 << 3)309#define ROR (/* SHIFT */ 1 << 3)310#define RET_near 0xc3311#define RET_i16 0xc2312#define SBB (/* BINARY */ 3 << 3)313#define SBB_EAX_i32 0x1d314#define SBB_r_rm 0x1b315#define SBB_rm_r 0x19316#define SAR (/* SHIFT */ 7 << 3)317#define SHL (/* SHIFT */ 4 << 3)318#define SHLD (/* GROUP_0F */ 0xa5)319#define SHRD (/* GROUP_0F */ 0xad)320#define SHR (/* SHIFT */ 5 << 3)321#define SHUFPS_x_xm 0xc6322#define SUB (/* BINARY */ 5 << 3)323#define SUB_EAX_i32 0x2d324#define SUB_r_rm 0x2b325#define SUB_rm_r 0x29326#define SUBSD_x_xm 0x5c327#define TEST_EAX_i32 0xa9328#define TEST_rm_r 0x85329#define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc)330#define UCOMISD_x_xm 0x2e331#define UNPCKLPD_x_xm 0x14332#define UNPCKLPS_x_xm 0x14333#define VBROADCASTSD_x_xm 0x19334#define VBROADCASTSS_x_xm 0x18335#define VEXTRACTF128_x_ym 0x19336#define VEXTRACTI128_x_ym 0x39337#define VINSERTF128_y_y_xm 0x18338#define VINSERTI128_y_y_xm 0x38339#define VPBROADCASTB_x_xm 0x78340#define VPBROADCASTD_x_xm 0x58341#define VPBROADCASTQ_x_xm 0x59342#define VPBROADCASTW_x_xm 0x79343#define VPERMPD_y_ym 0x01344#define VPERMQ_y_ym 0x00345#define XCHG_EAX_r 0x90346#define XCHG_r_rm 0x87347#define XOR (/* BINARY */ 6 << 3)348#define XOR_EAX_i32 0x35349#define XOR_r_rm 0x33350#define XOR_rm_r 0x31351#define XORPD_x_xm 0x57352353#define GROUP_0F 0x0f354#define GROUP_66 0x66355#define GROUP_F3 0xf3356#define GROUP_F7 0xf7357#define GROUP_FF 0xff358#define GROUP_BINARY_81 0x81359#define GROUP_BINARY_83 0x83360#define GROUP_SHIFT_1 0xd1361#define GROUP_SHIFT_N 0xc1362#define GROUP_SHIFT_CL 0xd3363#define GROUP_LOCK 0xf0364365#define MOD_REG 0xc0366#define MOD_DISP8 0x40367368#define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s))369370#define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r)))371#define POP_REG(r) (*inst++ = U8(POP_r + (r)))372#define RET() (*inst++ = RET_near)373#define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0)374375#define SLJIT_INST_LABEL 255376#define SLJIT_INST_JUMP 254377#define SLJIT_INST_MOV_ADDR 253378#define SLJIT_INST_CONST 252379380/* Multithreading does not affect these static variables, since they store381built-in CPU features. Therefore they can be overwritten by different threads382if they detect the CPU features in the same time. */383#define CPU_FEATURE_DETECTED 0x001384#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)385#define CPU_FEATURE_SSE2 0x002386#endif387#define CPU_FEATURE_SSE41 0x004388#define CPU_FEATURE_LZCNT 0x008389#define CPU_FEATURE_TZCNT 0x010390#define CPU_FEATURE_CMOV 0x020391#define CPU_FEATURE_AVX 0x040392#define CPU_FEATURE_AVX2 0x080393#define CPU_FEATURE_OSXSAVE 0x100394395static sljit_u32 cpu_feature_list = 0;396397#ifdef _WIN32_WCE398#include <cmnintrin.h>399#elif defined(_MSC_VER) && _MSC_VER >= 1400400#include <intrin.h>401#elif defined(__INTEL_COMPILER)402#include <cpuid.h>403#endif404405#if (defined(_MSC_VER) && _MSC_VER >= 1400) || defined(__INTEL_COMPILER) \406|| (defined(__INTEL_LLVM_COMPILER) && defined(__XSAVE__))407#include <immintrin.h>408#endif409410/******************************************************/411/* Unaligned-store functions */412/******************************************************/413414static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)415{416SLJIT_MEMCPY(addr, &value, sizeof(value));417}418419static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)420{421SLJIT_MEMCPY(addr, &value, sizeof(value));422}423424static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)425{426SLJIT_MEMCPY(addr, &value, sizeof(value));427}428429/******************************************************/430/* Utility functions */431/******************************************************/432433static void execute_cpu_id(sljit_u32 info[4])434{435#if (defined(_MSC_VER) && _MSC_VER >= 1400) \436|| (defined(__INTEL_COMPILER) && __INTEL_COMPILER == 2021 && __INTEL_COMPILER_UPDATE >= 7)437438__cpuidex((int*)info, (int)info[0], (int)info[2]);439440#elif (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900)441442__get_cpuid_count(info[0], info[2], info, info + 1, info + 2, info + 3);443444#elif (defined(_MSC_VER) || defined(__INTEL_COMPILER)) \445&& (defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32)446447/* Intel syntax. */448__asm {449mov esi, info450mov eax, [esi]451mov ecx, [esi + 8]452cpuid453mov [esi], eax454mov [esi + 4], ebx455mov [esi + 8], ecx456mov [esi + 12], edx457}458459#else460461__asm__ __volatile__ (462"cpuid\n"463: "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3])464: "0" (info[0]), "2" (info[2])465);466467#endif468}469470static sljit_u32 execute_get_xcr0_low(void)471{472sljit_u32 xcr0;473474#if (defined(_MSC_VER) && _MSC_VER >= 1400) || defined(__INTEL_COMPILER) \475|| (defined(__INTEL_LLVM_COMPILER) && defined(__XSAVE__))476477xcr0 = (sljit_u32)_xgetbv(0);478479#elif defined(__TINYC__)480481__asm__ (482"xorl %%ecx, %%ecx\n"483".byte 0x0f\n"484".byte 0x01\n"485".byte 0xd0\n"486: "=a" (xcr0)487:488#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32489: "ecx", "edx"490#else /* !SLJIT_CONFIG_X86_32 */491: "rcx", "rdx"492#endif /* SLJIT_CONFIG_X86_32 */493);494495#elif (defined(__INTEL_LLVM_COMPILER) && __INTEL_LLVM_COMPILER < 20220100) \496|| (defined(__clang__) && __clang_major__ < 14) \497|| (defined(__GNUC__) && __GNUC__ < 3) \498|| defined(__SUNPRO_C) || defined(__SUNPRO_CC)499500/* AT&T syntax. */501__asm__ (502"xorl %%ecx, %%ecx\n"503"xgetbv\n"504: "=a" (xcr0)505:506#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32507: "ecx", "edx"508#else /* !SLJIT_CONFIG_X86_32 */509: "rcx", "rdx"510#endif /* SLJIT_CONFIG_X86_32 */511);512513#elif defined(_MSC_VER)514515/* Intel syntax. */516__asm {517xor ecx, ecx518xgetbv519mov xcr0, eax520}521522#else523524__asm__ (525"xor{l %%ecx, %%ecx | ecx, ecx}\n"526"xgetbv\n"527: "=a" (xcr0)528:529#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32530: "ecx", "edx"531#else /* !SLJIT_CONFIG_X86_32 */532: "rcx", "rdx"533#endif /* SLJIT_CONFIG_X86_32 */534);535536#endif537return xcr0;538}539540static void get_cpu_features(void)541{542sljit_u32 feature_list = CPU_FEATURE_DETECTED;543sljit_u32 info[4] = {0};544sljit_u32 max_id;545546execute_cpu_id(info);547max_id = info[0];548549if (max_id >= 7) {550info[0] = 7;551info[2] = 0;552execute_cpu_id(info);553554if (info[1] & 0x8)555feature_list |= CPU_FEATURE_TZCNT;556if (info[1] & 0x20)557feature_list |= CPU_FEATURE_AVX2;558}559560if (max_id >= 1) {561info[0] = 1;562#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32563/* Winchip 2 and Cyrix MII bugs */564info[1] = info[2] = 0;565#endif566execute_cpu_id(info);567568if (info[2] & 0x80000)569feature_list |= CPU_FEATURE_SSE41;570if (info[2] & 0x8000000)571feature_list |= CPU_FEATURE_OSXSAVE;572if (info[2] & 0x10000000)573feature_list |= CPU_FEATURE_AVX;574#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)575if (info[3] & 0x4000000)576feature_list |= CPU_FEATURE_SSE2;577#endif578if (info[3] & 0x8000)579feature_list |= CPU_FEATURE_CMOV;580}581582info[0] = 0x80000000;583execute_cpu_id(info);584max_id = info[0];585586if (max_id >= 0x80000001) {587info[0] = 0x80000001;588execute_cpu_id(info);589590if (info[2] & 0x20)591feature_list |= CPU_FEATURE_LZCNT;592}593594if ((feature_list & CPU_FEATURE_OSXSAVE) && (execute_get_xcr0_low() & 0x4) == 0)595feature_list &= ~(sljit_u32)(CPU_FEATURE_AVX | CPU_FEATURE_AVX2);596597cpu_feature_list = feature_list;598}599600static sljit_u8 get_jump_code(sljit_uw type)601{602switch (type) {603case SLJIT_EQUAL:604case SLJIT_ATOMIC_STORED:605case SLJIT_F_EQUAL:606case SLJIT_UNORDERED_OR_EQUAL:607return 0x84 /* je */;608609case SLJIT_NOT_EQUAL:610case SLJIT_ATOMIC_NOT_STORED:611case SLJIT_F_NOT_EQUAL:612case SLJIT_ORDERED_NOT_EQUAL:613return 0x85 /* jne */;614615case SLJIT_LESS:616case SLJIT_CARRY:617case SLJIT_F_LESS:618case SLJIT_UNORDERED_OR_LESS:619case SLJIT_UNORDERED_OR_GREATER:620return 0x82 /* jc */;621622case SLJIT_GREATER_EQUAL:623case SLJIT_NOT_CARRY:624case SLJIT_F_GREATER_EQUAL:625case SLJIT_ORDERED_GREATER_EQUAL:626case SLJIT_ORDERED_LESS_EQUAL:627return 0x83 /* jae */;628629case SLJIT_GREATER:630case SLJIT_F_GREATER:631case SLJIT_ORDERED_LESS:632case SLJIT_ORDERED_GREATER:633return 0x87 /* jnbe */;634635case SLJIT_LESS_EQUAL:636case SLJIT_F_LESS_EQUAL:637case SLJIT_UNORDERED_OR_GREATER_EQUAL:638case SLJIT_UNORDERED_OR_LESS_EQUAL:639return 0x86 /* jbe */;640641case SLJIT_SIG_LESS:642return 0x8c /* jl */;643644case SLJIT_SIG_GREATER_EQUAL:645return 0x8d /* jnl */;646647case SLJIT_SIG_GREATER:648return 0x8f /* jnle */;649650case SLJIT_SIG_LESS_EQUAL:651return 0x8e /* jle */;652653case SLJIT_OVERFLOW:654return 0x80 /* jo */;655656case SLJIT_NOT_OVERFLOW:657return 0x81 /* jno */;658659case SLJIT_UNORDERED:660case SLJIT_ORDERED_EQUAL: /* NaN. */661return 0x8a /* jp */;662663case SLJIT_ORDERED:664case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not NaN. */665return 0x8b /* jpo */;666}667return 0;668}669670#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)671static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);672#else /* !SLJIT_CONFIG_X86_32 */673static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr);674static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset);675#endif /* SLJIT_CONFIG_X86_32 */676677static sljit_u8* detect_near_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)678{679sljit_uw type = jump->flags >> TYPE_SHIFT;680sljit_s32 short_jump;681sljit_uw label_addr;682sljit_uw jump_addr;683684jump_addr = (sljit_uw)code_ptr;685if (!(jump->flags & JUMP_ADDR)) {686label_addr = (sljit_uw)(code + jump->u.label->size);687688if (jump->u.label->size > jump->addr)689jump_addr = (sljit_uw)(code + jump->addr);690} else691label_addr = jump->u.target - (sljit_uw)executable_offset;692693#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)694if ((sljit_sw)(label_addr - (jump_addr + 6)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump_addr + 5)) < HALFWORD_MIN)695return detect_far_jump_type(jump, code_ptr);696#endif /* SLJIT_CONFIG_X86_64 */697698short_jump = (sljit_sw)(label_addr - (jump_addr + 2)) >= -0x80 && (sljit_sw)(label_addr - (jump_addr + 2)) <= 0x7f;699700if (type == SLJIT_JUMP) {701if (short_jump)702*code_ptr++ = JMP_i8;703else704*code_ptr++ = JMP_i32;705} else if (type > SLJIT_JUMP) {706short_jump = 0;707*code_ptr++ = CALL_i32;708} else if (short_jump) {709*code_ptr++ = U8(get_jump_code(type) - 0x10);710} else {711*code_ptr++ = GROUP_0F;712*code_ptr++ = get_jump_code(type);713}714715jump->addr = (sljit_uw)code_ptr;716717if (short_jump) {718jump->flags |= PATCH_MB;719code_ptr += sizeof(sljit_s8);720} else {721jump->flags |= PATCH_MW;722code_ptr += sizeof(sljit_s32);723}724725return code_ptr;726}727728static void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset)729{730sljit_uw flags = jump->flags;731sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;732sljit_uw jump_addr = jump->addr;733SLJIT_UNUSED_ARG(executable_offset);734735if (SLJIT_UNLIKELY(flags & JUMP_MOV_ADDR)) {736#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)737sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr);738#else /* SLJIT_CONFIG_X86_32 */739if (flags & PATCH_MD) {740SLJIT_ASSERT(addr > HALFWORD_MAX);741sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr);742return;743}744745if (flags & PATCH_MW) {746addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset);747SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN);748} else {749SLJIT_ASSERT(addr <= HALFWORD_MAX);750}751sljit_unaligned_store_s32((void*)(jump_addr - sizeof(sljit_s32)), (sljit_s32)addr);752#endif /* !SLJIT_CONFIG_X86_32 */753return;754}755756#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)757if (SLJIT_UNLIKELY(flags & PATCH_MD)) {758SLJIT_ASSERT(!(flags & JUMP_ADDR));759sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr);760return;761}762#endif /* SLJIT_CONFIG_X86_64 */763764addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset);765766if (flags & PATCH_MB) {767addr -= sizeof(sljit_s8);768SLJIT_ASSERT((sljit_sw)addr <= 0x7f && (sljit_sw)addr >= -0x80);769*(sljit_u8*)jump_addr = U8(addr);770return;771} else if (flags & PATCH_MW) {772addr -= sizeof(sljit_s32);773#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)774sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr);775#else /* !SLJIT_CONFIG_X86_32 */776SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN);777sljit_unaligned_store_s32((void*)jump_addr, (sljit_s32)addr);778#endif /* SLJIT_CONFIG_X86_32 */779}780}781782static sljit_u8 *process_extended_label(sljit_u8 *code_ptr, struct sljit_extended_label *ext_label)783{784sljit_uw mask;785sljit_u8 *ptr = code_ptr;786787SLJIT_ASSERT(ext_label->label.u.index == SLJIT_LABEL_ALIGNED);788mask = ext_label->data;789790code_ptr = (sljit_u8*)(((sljit_uw)code_ptr + mask) & ~mask);791792while (ptr < code_ptr)793*ptr++ = NOP;794795return code_ptr;796}797798static void reduce_code_size(struct sljit_compiler *compiler)799{800struct sljit_label *label;801struct sljit_jump *jump;802sljit_uw next_label_size;803sljit_uw next_jump_addr;804sljit_uw next_min_addr;805sljit_uw size_reduce = 0;806sljit_sw diff;807sljit_uw type;808#if (defined SLJIT_DEBUG && SLJIT_DEBUG)809sljit_uw size_reduce_max;810#endif /* SLJIT_DEBUG */811812label = compiler->labels;813jump = compiler->jumps;814815next_label_size = SLJIT_GET_NEXT_SIZE(label);816next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);817818while (1) {819next_min_addr = next_label_size;820if (next_jump_addr < next_min_addr)821next_min_addr = next_jump_addr;822823if (next_min_addr == SLJIT_MAX_ADDRESS)824break;825826if (next_min_addr == next_label_size) {827label->size -= size_reduce;828829label = label->next;830next_label_size = SLJIT_GET_NEXT_SIZE(label);831}832833if (next_min_addr != next_jump_addr)834continue;835836jump->addr -= size_reduce;837if (!(jump->flags & JUMP_MOV_ADDR)) {838#if (defined SLJIT_DEBUG && SLJIT_DEBUG)839size_reduce_max = size_reduce + (((jump->flags >> TYPE_SHIFT) < SLJIT_JUMP) ? CJUMP_MAX_SIZE : JUMP_MAX_SIZE);840#endif /* SLJIT_DEBUG */841842if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {843if (jump->flags & JUMP_ADDR) {844#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)845if (jump->u.target <= 0xffffffffl)846size_reduce += sizeof(sljit_s32);847#endif /* SLJIT_CONFIG_X86_64 */848} else {849/* Unit size: instruction. */850diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;851if (jump->u.label->size > jump->addr) {852SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);853diff -= (sljit_sw)size_reduce;854}855type = jump->flags >> TYPE_SHIFT;856857#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)858if (type == SLJIT_JUMP) {859if (diff <= 0x7f + 2 && diff >= -0x80 + 2)860size_reduce += JUMP_MAX_SIZE - 2;861else if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5)862size_reduce += JUMP_MAX_SIZE - 5;863} else if (type < SLJIT_JUMP) {864if (diff <= 0x7f + 2 && diff >= -0x80 + 2)865size_reduce += CJUMP_MAX_SIZE - 2;866else if (diff <= HALFWORD_MAX + 6 && diff >= HALFWORD_MIN + 6)867size_reduce += CJUMP_MAX_SIZE - 6;868} else {869if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5)870size_reduce += JUMP_MAX_SIZE - 5;871}872#else /* !SLJIT_CONFIG_X86_64 */873if (type == SLJIT_JUMP) {874if (diff <= 0x7f + 2 && diff >= -0x80 + 2)875size_reduce += JUMP_MAX_SIZE - 2;876} else if (type < SLJIT_JUMP) {877if (diff <= 0x7f + 2 && diff >= -0x80 + 2)878size_reduce += CJUMP_MAX_SIZE - 2;879}880#endif /* SLJIT_CONFIG_X86_64 */881}882}883884#if (defined SLJIT_DEBUG && SLJIT_DEBUG)885jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT;886#endif /* SLJIT_DEBUG */887#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)888} else {889#if (defined SLJIT_DEBUG && SLJIT_DEBUG)890size_reduce_max = size_reduce + 10;891#endif /* SLJIT_DEBUG */892893if (!(jump->flags & JUMP_ADDR)) {894diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - 3);895896if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN)897size_reduce += 3;898} else if (jump->u.target <= 0xffffffffl)899size_reduce += (jump->flags & MOV_ADDR_HI) ? 4 : 5;900901#if (defined SLJIT_DEBUG && SLJIT_DEBUG)902jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT;903#endif /* SLJIT_DEBUG */904#endif /* SLJIT_CONFIG_X86_64 */905}906907jump = jump->next;908next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);909}910911compiler->size -= size_reduce;912}913914SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)915{916struct sljit_memory_fragment *buf;917sljit_u8 *code;918sljit_u8 *code_ptr;919sljit_u8 *buf_ptr;920sljit_u8 *buf_end;921sljit_u8 len;922sljit_sw executable_offset;923#if (defined SLJIT_DEBUG && SLJIT_DEBUG)924sljit_uw addr;925#endif /* SLJIT_DEBUG */926927struct sljit_label *label;928struct sljit_jump *jump;929struct sljit_const *const_;930931CHECK_ERROR_PTR();932CHECK_PTR(check_sljit_generate_code(compiler, options));933934reduce_code_size(compiler);935936/* Second code generation pass. */937code = (sljit_u8*)allocate_executable_memory(compiler->size, options, exec_allocator_data, &executable_offset);938PTR_FAIL_WITH_EXEC_IF(code);939940reverse_buf(compiler);941buf = compiler->buf;942943code_ptr = code;944label = compiler->labels;945jump = compiler->jumps;946const_ = compiler->consts;947948do {949buf_ptr = buf->memory;950buf_end = buf_ptr + buf->used_size;951do {952len = *buf_ptr++;953SLJIT_ASSERT(len > 0);954if (len < SLJIT_INST_CONST) {955/* The code is already generated. */956SLJIT_MEMCPY(code_ptr, buf_ptr, len);957code_ptr += len;958buf_ptr += len;959} else {960switch (len) {961case SLJIT_INST_LABEL:962if (label->u.index >= SLJIT_LABEL_ALIGNED)963code_ptr = process_extended_label(code_ptr, (struct sljit_extended_label*)label);964965label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);966label->size = (sljit_uw)(code_ptr - code);967label = label->next;968break;969case SLJIT_INST_JUMP:970#if (defined SLJIT_DEBUG && SLJIT_DEBUG)971addr = (sljit_uw)code_ptr;972#endif /* SLJIT_DEBUG */973if (!(jump->flags & SLJIT_REWRITABLE_JUMP))974code_ptr = detect_near_jump_type(jump, code_ptr, code, executable_offset);975else {976#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)977code_ptr = detect_far_jump_type(jump, code_ptr, executable_offset);978#else /* !SLJIT_CONFIG_X86_32 */979code_ptr = detect_far_jump_type(jump, code_ptr);980#endif /* SLJIT_CONFIG_X86_32 */981}982983SLJIT_ASSERT((sljit_uw)code_ptr - addr <= ((jump->flags >> JUMP_SIZE_SHIFT) & 0xff));984jump = jump->next;985break;986case SLJIT_INST_MOV_ADDR:987#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)988code_ptr = generate_mov_addr_code(jump, code_ptr, code, executable_offset);989#endif /* SLJIT_CONFIG_X86_64 */990jump->addr = (sljit_uw)code_ptr;991jump = jump->next;992break;993default:994SLJIT_ASSERT(len == SLJIT_INST_CONST);995const_->addr = (sljit_uw)code_ptr;996const_ = const_->next;997break;998}999}1000} while (buf_ptr < buf_end);10011002SLJIT_ASSERT(buf_ptr == buf_end);1003buf = buf->next;1004} while (buf);10051006SLJIT_ASSERT(!label);1007SLJIT_ASSERT(!jump);1008SLJIT_ASSERT(!const_);1009SLJIT_ASSERT(code_ptr <= code + compiler->size);10101011jump = compiler->jumps;1012while (jump) {1013generate_jump_or_mov_addr(jump, executable_offset);1014jump = jump->next;1015}10161017compiler->error = SLJIT_ERR_COMPILED;1018compiler->executable_offset = executable_offset;1019compiler->executable_size = (sljit_uw)(code_ptr - code);10201021code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);10221023SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1);1024return (void*)code;1025}10261027SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)1028{1029switch (feature_type) {1030case SLJIT_HAS_FPU:1031#ifdef SLJIT_IS_FPU_AVAILABLE1032return (SLJIT_IS_FPU_AVAILABLE) != 0;1033#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)1034if (cpu_feature_list == 0)1035get_cpu_features();1036return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;1037#else /* SLJIT_DETECT_SSE2 */1038return 1;1039#endif /* SLJIT_DETECT_SSE2 */10401041#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1042case SLJIT_HAS_VIRTUAL_REGISTERS:1043return 1;1044#endif /* SLJIT_CONFIG_X86_32 */10451046case SLJIT_HAS_CLZ:1047if (cpu_feature_list == 0)1048get_cpu_features();10491050return (cpu_feature_list & CPU_FEATURE_LZCNT) ? 1 : 2;10511052case SLJIT_HAS_CTZ:1053if (cpu_feature_list == 0)1054get_cpu_features();10551056return (cpu_feature_list & CPU_FEATURE_TZCNT) ? 1 : 2;10571058case SLJIT_HAS_CMOV:1059if (cpu_feature_list == 0)1060get_cpu_features();1061return (cpu_feature_list & CPU_FEATURE_CMOV) != 0;10621063case SLJIT_HAS_REV:1064case SLJIT_HAS_ROT:1065case SLJIT_HAS_PREFETCH:1066case SLJIT_HAS_COPY_F32:1067case SLJIT_HAS_COPY_F64:1068case SLJIT_HAS_ATOMIC:1069case SLJIT_HAS_MEMORY_BARRIER:1070return 1;10711072#if !(defined SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE1073case SLJIT_HAS_AVX:1074if (cpu_feature_list == 0)1075get_cpu_features();1076return (cpu_feature_list & CPU_FEATURE_AVX) != 0;1077case SLJIT_HAS_AVX2:1078if (cpu_feature_list == 0)1079get_cpu_features();1080return (cpu_feature_list & CPU_FEATURE_AVX2) != 0;1081case SLJIT_HAS_SIMD:1082if (cpu_feature_list == 0)1083get_cpu_features();1084return (cpu_feature_list & CPU_FEATURE_SSE41) != 0;1085#endif /* SLJIT_IS_FPU_AVAILABLE */1086default:1087return 0;1088}1089}10901091SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)1092{1093switch (type) {1094case SLJIT_ORDERED_EQUAL:1095case SLJIT_UNORDERED_OR_NOT_EQUAL:1096return 2;1097}10981099return 0;1100}11011102/* --------------------------------------------------------------------- */1103/* Operators */1104/* --------------------------------------------------------------------- */11051106#define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))11071108#define BINARY_IMM32(op_imm, immw, arg, argw) \1109do { \1110inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \1111FAIL_IF(!inst); \1112*(inst + 1) |= (op_imm); \1113} while (0)11141115#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)11161117#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \1118do { \1119if (IS_HALFWORD(immw) || compiler->mode32) { \1120BINARY_IMM32(op_imm, immw, arg, argw); \1121} \1122else { \1123FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, immw)); \1124inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \1125FAIL_IF(!inst); \1126*inst = (op_mr); \1127} \1128} while (0)11291130#define BINARY_EAX_IMM(op_eax_imm, immw) \1131FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))11321133#else /* !SLJIT_CONFIG_X86_64 */11341135#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \1136BINARY_IMM32(op_imm, immw, arg, argw)11371138#define BINARY_EAX_IMM(op_eax_imm, immw) \1139FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))11401141#endif /* SLJIT_CONFIG_X86_64 */11421143static sljit_s32 emit_byte(struct sljit_compiler *compiler, sljit_u8 byte)1144{1145sljit_u8 *inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);1146FAIL_IF(!inst);1147INC_SIZE(1);1148*inst = byte;1149return SLJIT_SUCCESS;1150}11511152static sljit_s32 emit_mov(struct sljit_compiler *compiler,1153sljit_s32 dst, sljit_sw dstw,1154sljit_s32 src, sljit_sw srcw);11551156#define EMIT_MOV(compiler, dst, dstw, src, srcw) \1157FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));11581159static sljit_s32 emit_groupf(struct sljit_compiler *compiler,1160sljit_uw op,1161sljit_s32 dst, sljit_s32 src, sljit_sw srcw);11621163static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,1164sljit_uw op,1165sljit_s32 dst, sljit_s32 src, sljit_sw srcw);11661167static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,1168sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);11691170static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,1171sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);11721173static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,1174sljit_s32 src1, sljit_sw src1w,1175sljit_s32 src2, sljit_sw src2w);11761177static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,1178sljit_s32 dst_reg,1179sljit_s32 src, sljit_sw srcw);11801181static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler)1182{1183#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)1184/* Emit endbr32/endbr64 when CET is enabled. */1185sljit_u8 *inst;1186inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);1187FAIL_IF(!inst);1188INC_SIZE(4);1189inst[0] = GROUP_F3;1190inst[1] = GROUP_0F;1191inst[2] = 0x1e;1192#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1193inst[3] = 0xfb;1194#else /* !SLJIT_CONFIG_X86_32 */1195inst[3] = 0xfa;1196#endif /* SLJIT_CONFIG_X86_32 */1197#else /* !SLJIT_CONFIG_X86_CET */1198SLJIT_UNUSED_ARG(compiler);1199#endif /* SLJIT_CONFIG_X86_CET */1200return SLJIT_SUCCESS;1201}12021203#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)12041205static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg)1206{1207sljit_u8 *inst;1208sljit_s32 size;12091210#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1211size = 5;1212#else1213size = 4;1214#endif12151216inst = (sljit_u8*)ensure_buf(compiler, 1 + size);1217FAIL_IF(!inst);1218INC_SIZE(size);1219*inst++ = GROUP_F3;1220#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1221*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);1222#endif1223inst[0] = GROUP_0F;1224inst[1] = 0x1e;1225#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1226inst[2] = U8(MOD_REG | (0x1 << 3) | reg_lmap[reg]);1227#else1228inst[2] = U8(MOD_REG | (0x1 << 3) | reg_map[reg]);1229#endif1230return SLJIT_SUCCESS;1231}12321233static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg)1234{1235sljit_u8 *inst;1236sljit_s32 size;12371238#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1239size = 5;1240#else1241size = 4;1242#endif12431244inst = (sljit_u8*)ensure_buf(compiler, 1 + size);1245FAIL_IF(!inst);1246INC_SIZE(size);1247*inst++ = GROUP_F3;1248#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1249*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);1250#endif1251inst[0] = GROUP_0F;1252inst[1] = 0xae;1253inst[2] = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7);1254return SLJIT_SUCCESS;1255}12561257#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */12581259static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void)1260{1261#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)1262return _get_ssp() != 0;1263#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */1264return 0;1265#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */1266}12671268static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler,1269sljit_s32 src, sljit_sw srcw)1270{1271#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)1272sljit_u8 *inst, *jz_after_cmp_inst;1273sljit_uw size_jz_after_cmp_inst;12741275sljit_uw size_before_rdssp_inst = compiler->size;12761277/* Generate "RDSSP TMP_REG1". */1278FAIL_IF(emit_rdssp(compiler, TMP_REG1));12791280/* Load return address on shadow stack into TMP_REG1. */1281EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0);12821283/* Compare return address against TMP_REG1. */1284FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw));12851286/* Generate JZ to skip shadow stack ajdustment when shadow1287stack matches normal stack. */1288inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1289FAIL_IF(!inst);1290INC_SIZE(2);1291*inst++ = get_jump_code(SLJIT_EQUAL) - 0x10;1292size_jz_after_cmp_inst = compiler->size;1293jz_after_cmp_inst = inst;12941295#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1296/* REX_W is not necessary. */1297compiler->mode32 = 1;1298#endif1299/* Load 1 into TMP_REG1. */1300EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);13011302/* Generate "INCSSP TMP_REG1". */1303FAIL_IF(emit_incssp(compiler, TMP_REG1));13041305/* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */1306inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1307FAIL_IF(!inst);1308INC_SIZE(2);1309inst[0] = JMP_i8;1310inst[1] = size_before_rdssp_inst - compiler->size;13111312*jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst;1313#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */1314SLJIT_UNUSED_ARG(compiler);1315SLJIT_UNUSED_ARG(src);1316SLJIT_UNUSED_ARG(srcw);1317#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */1318return SLJIT_SUCCESS;1319}13201321#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1322#include "sljitNativeX86_32.c"1323#else1324#include "sljitNativeX86_64.c"1325#endif13261327static sljit_s32 emit_mov(struct sljit_compiler *compiler,1328sljit_s32 dst, sljit_sw dstw,1329sljit_s32 src, sljit_sw srcw)1330{1331sljit_u8* inst;13321333if (FAST_IS_REG(src)) {1334inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);1335FAIL_IF(!inst);1336*inst = MOV_rm_r;1337return SLJIT_SUCCESS;1338}13391340if (src == SLJIT_IMM) {1341if (FAST_IS_REG(dst)) {1342#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1343return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);1344#else1345if (!compiler->mode32) {1346if (NOT_HALFWORD(srcw))1347return emit_load_imm64(compiler, dst, srcw);1348}1349else1350return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw);1351#endif1352}1353#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1354if (!compiler->mode32 && NOT_HALFWORD(srcw)) {1355/* Immediate to memory move. Only SLJIT_MOV operation copies1356an immediate directly into memory so TMP_REG1 can be used. */1357FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));1358inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);1359FAIL_IF(!inst);1360*inst = MOV_rm_r;1361return SLJIT_SUCCESS;1362}1363#endif1364inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);1365FAIL_IF(!inst);1366*inst = MOV_rm_i32;1367return SLJIT_SUCCESS;1368}1369if (FAST_IS_REG(dst)) {1370inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);1371FAIL_IF(!inst);1372*inst = MOV_r_rm;1373return SLJIT_SUCCESS;1374}13751376/* Memory to memory move. Only SLJIT_MOV operation copies1377data from memory to memory so TMP_REG1 can be used. */1378inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);1379FAIL_IF(!inst);1380*inst = MOV_r_rm;1381inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);1382FAIL_IF(!inst);1383*inst = MOV_rm_r;1384return SLJIT_SUCCESS;1385}13861387static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,1388sljit_s32 dst_reg,1389sljit_s32 src, sljit_sw srcw)1390{1391sljit_u8* inst;1392sljit_uw size;13931394SLJIT_ASSERT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL);13951396inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1397FAIL_IF(!inst);1398INC_SIZE(2);1399inst[0] = U8(get_jump_code((sljit_uw)type ^ 0x1) - 0x10);14001401size = compiler->size;1402EMIT_MOV(compiler, dst_reg, 0, src, srcw);14031404inst[1] = U8(compiler->size - size);1405return SLJIT_SUCCESS;1406}14071408SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)1409{1410sljit_u8 *inst;1411#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1412sljit_uw size;1413#endif14141415CHECK_ERROR();1416CHECK(check_sljit_emit_op0(compiler, op));14171418switch (GET_OPCODE(op)) {1419case SLJIT_BREAKPOINT:1420return emit_byte(compiler, INT3);1421case SLJIT_NOP:1422return emit_byte(compiler, NOP);1423case SLJIT_LMUL_UW:1424case SLJIT_LMUL_SW:1425case SLJIT_DIVMOD_UW:1426case SLJIT_DIVMOD_SW:1427case SLJIT_DIV_UW:1428case SLJIT_DIV_SW:1429#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1430#ifdef _WIN641431SLJIT_ASSERT(1432reg_map[SLJIT_R0] == 01433&& reg_map[SLJIT_R1] == 21434&& reg_map[TMP_REG1] > 7);1435#else1436SLJIT_ASSERT(1437reg_map[SLJIT_R0] == 01438&& reg_map[SLJIT_R1] < 71439&& reg_map[TMP_REG1] == 2);1440#endif1441compiler->mode32 = op & SLJIT_32;1442#endif1443SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);14441445op = GET_OPCODE(op);1446if ((op | 0x2) == SLJIT_DIV_UW) {1447#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)1448EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);1449inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);1450#else1451inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);1452#endif1453FAIL_IF(!inst);1454*inst = XOR_r_rm;1455}14561457if ((op | 0x2) == SLJIT_DIV_SW) {1458#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)1459EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);1460#endif14611462#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1463FAIL_IF(emit_byte(compiler, CDQ));1464#else1465if (!compiler->mode32) {1466inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1467FAIL_IF(!inst);1468INC_SIZE(2);1469inst[0] = REX_W;1470inst[1] = CDQ;1471} else1472FAIL_IF(emit_byte(compiler, CDQ));1473#endif1474}14751476#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1477inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1478FAIL_IF(!inst);1479INC_SIZE(2);1480inst[0] = GROUP_F7;1481inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);1482#else /* !SLJIT_CONFIG_X86_32 */1483#ifdef _WIN641484size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;1485#else /* !_WIN64 */1486size = (!compiler->mode32) ? 3 : 2;1487#endif /* _WIN64 */1488inst = (sljit_u8*)ensure_buf(compiler, 1 + size);1489FAIL_IF(!inst);1490INC_SIZE(size);1491#ifdef _WIN641492if (!compiler->mode32)1493*inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);1494else if (op >= SLJIT_DIVMOD_UW)1495*inst++ = REX_B;1496inst[0] = GROUP_F7;1497inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);1498#else /* !_WIN64 */1499if (!compiler->mode32)1500*inst++ = REX_W;1501inst[0] = GROUP_F7;1502inst[1] = MOD_REG | reg_map[SLJIT_R1];1503#endif /* _WIN64 */1504#endif /* SLJIT_CONFIG_X86_32 */1505switch (op) {1506case SLJIT_LMUL_UW:1507inst[1] |= MUL;1508break;1509case SLJIT_LMUL_SW:1510inst[1] |= IMUL;1511break;1512case SLJIT_DIVMOD_UW:1513case SLJIT_DIV_UW:1514inst[1] |= DIV;1515break;1516case SLJIT_DIVMOD_SW:1517case SLJIT_DIV_SW:1518inst[1] |= IDIV;1519break;1520}1521#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)1522if (op <= SLJIT_DIVMOD_SW)1523EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);1524#else1525if (op >= SLJIT_DIV_UW)1526EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);1527#endif1528break;1529case SLJIT_MEMORY_BARRIER:1530inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);1531FAIL_IF(!inst);1532INC_SIZE(3);1533inst[0] = GROUP_0F;1534inst[1] = 0xae;1535inst[2] = 0xf0;1536return SLJIT_SUCCESS;1537case SLJIT_ENDBR:1538return emit_endbranch(compiler);1539case SLJIT_SKIP_FRAMES_BEFORE_RETURN:1540return skip_frames_before_return(compiler);1541}15421543return SLJIT_SUCCESS;1544}15451546static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,1547sljit_s32 dst, sljit_sw dstw,1548sljit_s32 src, sljit_sw srcw)1549{1550sljit_u8* inst;1551sljit_s32 dst_r;15521553#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1554compiler->mode32 = 0;1555#endif15561557if (src == SLJIT_IMM) {1558if (FAST_IS_REG(dst)) {1559#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1560return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);1561#else1562inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);1563FAIL_IF(!inst);1564*inst = MOV_rm_i32;1565return SLJIT_SUCCESS;1566#endif1567}1568inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);1569FAIL_IF(!inst);1570*inst = MOV_rm8_i8;1571return SLJIT_SUCCESS;1572}15731574dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;15751576if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {1577#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1578if (reg_map[src] >= 4) {1579SLJIT_ASSERT(dst_r == TMP_REG1);1580EMIT_MOV(compiler, TMP_REG1, 0, src, 0);1581} else1582dst_r = src;1583#else1584dst_r = src;1585#endif1586} else {1587#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1588if (FAST_IS_REG(src) && reg_map[src] >= 4) {1589/* Both src and dst are registers. */1590SLJIT_ASSERT(FAST_IS_REG(dst));15911592if (src == dst && !sign) {1593inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);1594FAIL_IF(!inst);1595*(inst + 1) |= AND;1596return SLJIT_SUCCESS;1597}15981599EMIT_MOV(compiler, TMP_REG1, 0, src, 0);1600src = TMP_REG1;1601srcw = 0;1602}1603#endif /* !SLJIT_CONFIG_X86_32 */16041605/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */1606FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm8 : MOVZX_r_rm8, dst_r, src, srcw));1607}16081609if (dst & SLJIT_MEM) {1610inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);1611FAIL_IF(!inst);1612*inst = MOV_rm8_r8;1613}16141615return SLJIT_SUCCESS;1616}16171618static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,1619sljit_s32 src, sljit_sw srcw)1620{1621sljit_u8* inst;16221623#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1624compiler->mode32 = 1;1625#endif16261627inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);1628FAIL_IF(!inst);1629inst[0] = GROUP_0F;1630inst[1] = PREFETCH;16311632if (op == SLJIT_PREFETCH_L1)1633inst[2] |= (1 << 3);1634else if (op == SLJIT_PREFETCH_L2)1635inst[2] |= (2 << 3);1636else if (op == SLJIT_PREFETCH_L3)1637inst[2] |= (3 << 3);16381639return SLJIT_SUCCESS;1640}16411642static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,1643sljit_s32 dst, sljit_sw dstw,1644sljit_s32 src, sljit_sw srcw)1645{1646sljit_u8* inst;1647sljit_s32 dst_r;16481649#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1650compiler->mode32 = 0;1651#endif16521653if (src == SLJIT_IMM) {1654if (FAST_IS_REG(dst)) {1655#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1656return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);1657#else1658inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);1659FAIL_IF(!inst);1660*inst = MOV_rm_i32;1661return SLJIT_SUCCESS;1662#endif1663}1664inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);1665FAIL_IF(!inst);1666*inst = MOV_rm_i32;1667return SLJIT_SUCCESS;1668}16691670dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;16711672if ((dst & SLJIT_MEM) && FAST_IS_REG(src))1673dst_r = src;1674else1675FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm16 : MOVZX_r_rm16, dst_r, src, srcw));16761677if (dst & SLJIT_MEM) {1678inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);1679FAIL_IF(!inst);1680*inst = MOV_rm_r;1681}16821683return SLJIT_SUCCESS;1684}16851686static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,1687sljit_s32 dst, sljit_sw dstw,1688sljit_s32 src, sljit_sw srcw)1689{1690sljit_u8* inst;16911692if (dst == src && dstw == srcw) {1693/* Same input and output */1694inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);1695FAIL_IF(!inst);1696inst[0] = GROUP_F7;1697inst[1] |= opcode;1698return SLJIT_SUCCESS;1699}17001701if (FAST_IS_REG(dst)) {1702EMIT_MOV(compiler, dst, 0, src, srcw);1703inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);1704FAIL_IF(!inst);1705inst[0] = GROUP_F7;1706inst[1] |= opcode;1707return SLJIT_SUCCESS;1708}17091710EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);1711inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);1712FAIL_IF(!inst);1713inst[0] = GROUP_F7;1714inst[1] |= opcode;1715EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);1716return SLJIT_SUCCESS;1717}17181719#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1720static const sljit_sw emit_clz_arg = 32 + 31;1721static const sljit_sw emit_ctz_arg = 32;1722#endif17231724static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,1725sljit_s32 dst, sljit_sw dstw,1726sljit_s32 src, sljit_sw srcw)1727{1728sljit_u8* inst;1729sljit_s32 dst_r;1730sljit_sw max;17311732SLJIT_ASSERT(cpu_feature_list != 0);17331734dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;17351736if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) {1737FAIL_IF(emit_groupf(compiler, (is_clz ? LZCNT_r_rm : TZCNT_r_rm) | EX86_PREF_F3, dst_r, src, srcw));17381739if (dst & SLJIT_MEM)1740EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);1741return SLJIT_SUCCESS;1742}17431744FAIL_IF(emit_groupf(compiler, is_clz ? BSR_r_rm : BSF_r_rm, dst_r, src, srcw));17451746#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1747max = is_clz ? (32 + 31) : 32;17481749if (cpu_feature_list & CPU_FEATURE_CMOV) {1750if (dst_r != TMP_REG1) {1751EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, max);1752inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);1753}1754else1755inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg);17561757FAIL_IF(!inst);1758inst[0] = GROUP_0F;1759inst[1] = CMOVE_r_rm;1760}1761else1762FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));17631764if (is_clz) {1765inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);1766FAIL_IF(!inst);1767*(inst + 1) |= XOR;1768}1769#else1770if (is_clz)1771max = compiler->mode32 ? (32 + 31) : (64 + 63);1772else1773max = compiler->mode32 ? 32 : 64;17741775if (cpu_feature_list & CPU_FEATURE_CMOV) {1776EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max);1777FAIL_IF(emit_groupf(compiler, CMOVE_r_rm, dst_r, TMP_REG2, 0));1778} else1779FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));17801781if (is_clz) {1782inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0);1783FAIL_IF(!inst);1784*(inst + 1) |= XOR;1785}1786#endif17871788if (dst & SLJIT_MEM)1789EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);1790return SLJIT_SUCCESS;1791}17921793static sljit_s32 emit_bswap(struct sljit_compiler *compiler,1794sljit_s32 op,1795sljit_s32 dst, sljit_sw dstw,1796sljit_s32 src, sljit_sw srcw)1797{1798sljit_u8 *inst;1799sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;1800sljit_uw size;1801#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1802sljit_u8 rex = 0;1803#else /* !SLJIT_CONFIG_X86_64 */1804sljit_s32 dst_is_ereg = op & SLJIT_32;1805#endif /* SLJIT_CONFIG_X86_64 */18061807#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1808if (op == SLJIT_REV_U32 || op == SLJIT_REV_S32)1809compiler->mode32 = 1;1810#else /* !SLJIT_CONFIG_X86_64 */1811op &= ~SLJIT_32;1812#endif /* SLJIT_CONFIG_X86_64 */18131814if (src != dst_r) {1815/* Only the lower 16 bit is read for eregs. */1816if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)1817FAIL_IF(emit_mov_half(compiler, 0, dst_r, 0, src, srcw));1818else1819EMIT_MOV(compiler, dst_r, 0, src, srcw);1820}18211822size = 2;1823#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1824if (!compiler->mode32)1825rex = REX_W;18261827if (reg_map[dst_r] >= 8)1828rex |= REX_B;18291830if (rex != 0)1831size++;1832#endif /* SLJIT_CONFIG_X86_64 */18331834inst = (sljit_u8*)ensure_buf(compiler, 1 + size);1835FAIL_IF(!inst);1836INC_SIZE(size);18371838#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1839if (rex != 0)1840*inst++ = rex;18411842inst[0] = GROUP_0F;1843inst[1] = BSWAP_r | reg_lmap[dst_r];1844#else /* !SLJIT_CONFIG_X86_64 */1845inst[0] = GROUP_0F;1846inst[1] = BSWAP_r | reg_map[dst_r];1847#endif /* SLJIT_CONFIG_X86_64 */18481849if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {1850#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1851size = compiler->mode32 ? 16 : 48;1852#else /* !SLJIT_CONFIG_X86_64 */1853size = 16;1854#endif /* SLJIT_CONFIG_X86_64 */18551856inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, (sljit_sw)size, dst_r, 0);1857FAIL_IF(!inst);1858if (op == SLJIT_REV_U16)1859inst[1] |= SHR;1860else1861inst[1] |= SAR;1862}18631864if (dst & SLJIT_MEM) {1865#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1866if (dst_is_ereg)1867op = SLJIT_REV;1868#endif /* SLJIT_CONFIG_X86_32 */1869if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)1870return emit_mov_half(compiler, 0, dst, dstw, TMP_REG1, 0);18711872return emit_mov(compiler, dst, dstw, TMP_REG1, 0);1873}18741875#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1876if (op == SLJIT_REV_S32) {1877compiler->mode32 = 0;1878inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);1879FAIL_IF(!inst);1880*inst = MOVSXD_r_rm;1881}1882#endif /* SLJIT_CONFIG_X86_64 */18831884return SLJIT_SUCCESS;1885}18861887SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,1888sljit_s32 dst, sljit_sw dstw,1889sljit_s32 src, sljit_sw srcw)1890{1891#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1892sljit_s32 dst_is_ereg = 0;1893#else /* !SLJIT_CONFIG_X86_32 */1894sljit_s32 op_flags = GET_ALL_FLAGS(op);1895#endif /* SLJIT_CONFIG_X86_32 */18961897CHECK_ERROR();1898CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));1899ADJUST_LOCAL_OFFSET(dst, dstw);1900ADJUST_LOCAL_OFFSET(src, srcw);19011902CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);1903CHECK_EXTRA_REGS(src, srcw, (void)0);1904#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1905compiler->mode32 = op_flags & SLJIT_32;1906#endif /* SLJIT_CONFIG_X86_64 */19071908op = GET_OPCODE(op);19091910if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {1911#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1912compiler->mode32 = 0;1913#endif /* SLJIT_CONFIG_X86_64 */19141915if (FAST_IS_REG(src) && src == dst) {1916if (!TYPE_CAST_NEEDED(op))1917return SLJIT_SUCCESS;1918}19191920#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1921if (op_flags & SLJIT_32) {1922if (src & SLJIT_MEM) {1923if (op == SLJIT_MOV_S32)1924op = SLJIT_MOV_U32;1925}1926else if (src == SLJIT_IMM) {1927if (op == SLJIT_MOV_U32)1928op = SLJIT_MOV_S32;1929}1930}1931#endif /* SLJIT_CONFIG_X86_64 */19321933if (src == SLJIT_IMM) {1934switch (op) {1935case SLJIT_MOV_U8:1936srcw = (sljit_u8)srcw;1937break;1938case SLJIT_MOV_S8:1939srcw = (sljit_s8)srcw;1940break;1941case SLJIT_MOV_U16:1942srcw = (sljit_u16)srcw;1943break;1944case SLJIT_MOV_S16:1945srcw = (sljit_s16)srcw;1946break;1947#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1948case SLJIT_MOV_U32:1949srcw = (sljit_u32)srcw;1950break;1951case SLJIT_MOV_S32:1952srcw = (sljit_s32)srcw;1953break;1954#endif /* SLJIT_CONFIG_X86_64 */1955}1956#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1957if (SLJIT_UNLIKELY(dst_is_ereg))1958return emit_mov(compiler, dst, dstw, src, srcw);1959#endif /* SLJIT_CONFIG_X86_32 */1960}19611962#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1963if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {1964SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));1965dst = TMP_REG1;1966}1967#endif /* SLJIT_CONFIG_X86_32 */19681969switch (op) {1970case SLJIT_MOV:1971case SLJIT_MOV_P:1972#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1973case SLJIT_MOV_U32:1974case SLJIT_MOV_S32:1975case SLJIT_MOV32:1976#endif /* SLJIT_CONFIG_X86_32 */1977EMIT_MOV(compiler, dst, dstw, src, srcw);1978break;1979case SLJIT_MOV_U8:1980FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));1981break;1982case SLJIT_MOV_S8:1983FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));1984break;1985case SLJIT_MOV_U16:1986FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));1987break;1988case SLJIT_MOV_S16:1989FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));1990break;1991#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1992case SLJIT_MOV_U32:1993FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));1994break;1995case SLJIT_MOV_S32:1996FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));1997break;1998case SLJIT_MOV32:1999compiler->mode32 = 1;2000EMIT_MOV(compiler, dst, dstw, src, srcw);2001compiler->mode32 = 0;2002break;2003#endif /* SLJIT_CONFIG_X86_64 */2004}20052006#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2007if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)2008return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);2009#endif /* SLJIT_CONFIG_X86_32 */2010return SLJIT_SUCCESS;2011}20122013switch (op) {2014case SLJIT_CLZ:2015case SLJIT_CTZ:2016return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw);2017case SLJIT_REV:2018case SLJIT_REV_U16:2019case SLJIT_REV_S16:2020case SLJIT_REV_U32:2021case SLJIT_REV_S32:2022#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2023if (dst_is_ereg)2024op |= SLJIT_32;2025#endif /* SLJIT_CONFIG_X86_32 */2026return emit_bswap(compiler, op, dst, dstw, src, srcw);2027}20282029return SLJIT_SUCCESS;2030}20312032static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,2033sljit_u32 op_types,2034sljit_s32 dst, sljit_sw dstw,2035sljit_s32 src1, sljit_sw src1w,2036sljit_s32 src2, sljit_sw src2w)2037{2038sljit_u8* inst;2039sljit_u8 op_eax_imm = U8(op_types >> 24);2040sljit_u8 op_rm = U8((op_types >> 16) & 0xff);2041sljit_u8 op_mr = U8((op_types >> 8) & 0xff);2042sljit_u8 op_imm = U8(op_types & 0xff);20432044if (dst == src1 && dstw == src1w) {2045if (src2 == SLJIT_IMM) {2046#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2047if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {2048#else2049if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {2050#endif2051BINARY_EAX_IMM(op_eax_imm, src2w);2052}2053else {2054BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);2055}2056}2057else if (FAST_IS_REG(dst)) {2058inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);2059FAIL_IF(!inst);2060*inst = op_rm;2061}2062else if (FAST_IS_REG(src2)) {2063/* Special exception for sljit_emit_op_flags. */2064inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);2065FAIL_IF(!inst);2066*inst = op_mr;2067}2068else {2069EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);2070inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);2071FAIL_IF(!inst);2072*inst = op_mr;2073}2074return SLJIT_SUCCESS;2075}20762077/* Only for cumulative operations. */2078if (dst == src2 && dstw == src2w) {2079if (src1 == SLJIT_IMM) {2080#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2081if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {2082#else2083if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {2084#endif2085BINARY_EAX_IMM(op_eax_imm, src1w);2086}2087else {2088BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);2089}2090}2091else if (FAST_IS_REG(dst)) {2092inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);2093FAIL_IF(!inst);2094*inst = op_rm;2095}2096else if (FAST_IS_REG(src1)) {2097inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);2098FAIL_IF(!inst);2099*inst = op_mr;2100}2101else {2102EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2103inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);2104FAIL_IF(!inst);2105*inst = op_mr;2106}2107return SLJIT_SUCCESS;2108}21092110/* General version. */2111if (FAST_IS_REG(dst)) {2112EMIT_MOV(compiler, dst, 0, src1, src1w);2113if (src2 == SLJIT_IMM) {2114BINARY_IMM(op_imm, op_mr, src2w, dst, 0);2115}2116else {2117inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);2118FAIL_IF(!inst);2119*inst = op_rm;2120}2121}2122else {2123/* This version requires less memory writing. */2124EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2125if (src2 == SLJIT_IMM) {2126BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);2127}2128else {2129inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);2130FAIL_IF(!inst);2131*inst = op_rm;2132}2133EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);2134}21352136return SLJIT_SUCCESS;2137}21382139static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,2140sljit_u32 op_types,2141sljit_s32 dst, sljit_sw dstw,2142sljit_s32 src1, sljit_sw src1w,2143sljit_s32 src2, sljit_sw src2w)2144{2145sljit_u8* inst;2146sljit_u8 op_eax_imm = U8(op_types >> 24);2147sljit_u8 op_rm = U8((op_types >> 16) & 0xff);2148sljit_u8 op_mr = U8((op_types >> 8) & 0xff);2149sljit_u8 op_imm = U8(op_types & 0xff);21502151if (dst == src1 && dstw == src1w) {2152if (src2 == SLJIT_IMM) {2153#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2154if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {2155#else2156if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {2157#endif2158BINARY_EAX_IMM(op_eax_imm, src2w);2159}2160else {2161BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);2162}2163}2164else if (FAST_IS_REG(dst)) {2165inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);2166FAIL_IF(!inst);2167*inst = op_rm;2168}2169else if (FAST_IS_REG(src2)) {2170inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);2171FAIL_IF(!inst);2172*inst = op_mr;2173}2174else {2175EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);2176inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);2177FAIL_IF(!inst);2178*inst = op_mr;2179}2180return SLJIT_SUCCESS;2181}21822183/* General version. */2184if (FAST_IS_REG(dst) && dst != src2) {2185EMIT_MOV(compiler, dst, 0, src1, src1w);2186if (src2 == SLJIT_IMM) {2187BINARY_IMM(op_imm, op_mr, src2w, dst, 0);2188}2189else {2190inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);2191FAIL_IF(!inst);2192*inst = op_rm;2193}2194}2195else {2196/* This version requires less memory writing. */2197EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2198if (src2 == SLJIT_IMM) {2199BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);2200}2201else {2202inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);2203FAIL_IF(!inst);2204*inst = op_rm;2205}2206EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);2207}22082209return SLJIT_SUCCESS;2210}22112212static sljit_s32 emit_mul(struct sljit_compiler *compiler,2213sljit_s32 dst, sljit_sw dstw,2214sljit_s32 src1, sljit_sw src1w,2215sljit_s32 src2, sljit_sw src2w)2216{2217sljit_u8* inst;2218sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;22192220/* Register destination. */2221if (dst_r == src1 && src2 != SLJIT_IMM) {2222FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));2223} else if (dst_r == src2 && src1 != SLJIT_IMM) {2224FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src1, src1w));2225} else if (src1 == SLJIT_IMM) {2226if (src2 == SLJIT_IMM) {2227EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);2228src2 = dst_r;2229src2w = 0;2230}22312232if (src1w <= 127 && src1w >= -128) {2233inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);2234FAIL_IF(!inst);2235*inst = IMUL_r_rm_i8;22362237FAIL_IF(emit_byte(compiler, U8(src1w)));2238}2239#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2240else {2241inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);2242FAIL_IF(!inst);2243*inst = IMUL_r_rm_i32;2244inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);2245FAIL_IF(!inst);2246INC_SIZE(4);2247sljit_unaligned_store_sw(inst, src1w);2248}2249#else2250else if (IS_HALFWORD(src1w)) {2251inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);2252FAIL_IF(!inst);2253*inst = IMUL_r_rm_i32;2254inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);2255FAIL_IF(!inst);2256INC_SIZE(4);2257sljit_unaligned_store_s32(inst, (sljit_s32)src1w);2258}2259else {2260if (dst_r != src2)2261EMIT_MOV(compiler, dst_r, 0, src2, src2w);2262FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));2263FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));2264}2265#endif2266}2267else if (src2 == SLJIT_IMM) {2268/* Note: src1 is NOT immediate. */22692270if (src2w <= 127 && src2w >= -128) {2271inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);2272FAIL_IF(!inst);2273*inst = IMUL_r_rm_i8;22742275FAIL_IF(emit_byte(compiler, U8(src2w)));2276}2277#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2278else {2279inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);2280FAIL_IF(!inst);2281*inst = IMUL_r_rm_i32;22822283inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);2284FAIL_IF(!inst);2285INC_SIZE(4);2286sljit_unaligned_store_sw(inst, src2w);2287}2288#else2289else if (IS_HALFWORD(src2w)) {2290inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);2291FAIL_IF(!inst);2292*inst = IMUL_r_rm_i32;22932294inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);2295FAIL_IF(!inst);2296INC_SIZE(4);2297sljit_unaligned_store_s32(inst, (sljit_s32)src2w);2298} else {2299if (dst_r != src1)2300EMIT_MOV(compiler, dst_r, 0, src1, src1w);2301FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));2302FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));2303}2304#endif2305} else {2306/* Neither argument is immediate. */2307if (ADDRESSING_DEPENDS_ON(src2, dst_r))2308dst_r = TMP_REG1;2309EMIT_MOV(compiler, dst_r, 0, src1, src1w);2310FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));2311}23122313if (dst & SLJIT_MEM)2314EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);23152316return SLJIT_SUCCESS;2317}23182319static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,2320sljit_s32 dst, sljit_sw dstw,2321sljit_s32 src1, sljit_sw src1w,2322sljit_s32 src2, sljit_sw src2w)2323{2324sljit_u8* inst;2325sljit_s32 dst_r, done = 0;23262327/* These cases better be left to handled by normal way. */2328if (dst == src1 && dstw == src1w)2329return SLJIT_ERR_UNSUPPORTED;2330if (dst == src2 && dstw == src2w)2331return SLJIT_ERR_UNSUPPORTED;23322333dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;23342335if (FAST_IS_REG(src1)) {2336if (FAST_IS_REG(src2)) {2337inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);2338FAIL_IF(!inst);2339*inst = LEA_r_m;2340done = 1;2341}2342#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2343if (src2 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src2w))) {2344inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);2345#else2346if (src2 == SLJIT_IMM) {2347inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);2348#endif2349FAIL_IF(!inst);2350*inst = LEA_r_m;2351done = 1;2352}2353}2354else if (FAST_IS_REG(src2)) {2355#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2356if (src1 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src1w))) {2357inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);2358#else2359if (src1 == SLJIT_IMM) {2360inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);2361#endif2362FAIL_IF(!inst);2363*inst = LEA_r_m;2364done = 1;2365}2366}23672368if (done) {2369if (dst_r == TMP_REG1)2370return emit_mov(compiler, dst, dstw, TMP_REG1, 0);2371return SLJIT_SUCCESS;2372}2373return SLJIT_ERR_UNSUPPORTED;2374}23752376static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,2377sljit_s32 src1, sljit_sw src1w,2378sljit_s32 src2, sljit_sw src2w)2379{2380sljit_u8* inst;23812382#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2383if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {2384#else2385if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {2386#endif2387BINARY_EAX_IMM(CMP_EAX_i32, src2w);2388return SLJIT_SUCCESS;2389}23902391if (FAST_IS_REG(src1)) {2392if (src2 == SLJIT_IMM) {2393BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);2394}2395else {2396inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);2397FAIL_IF(!inst);2398*inst = CMP_r_rm;2399}2400return SLJIT_SUCCESS;2401}24022403if (FAST_IS_REG(src2) && src1 != SLJIT_IMM) {2404inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);2405FAIL_IF(!inst);2406*inst = CMP_rm_r;2407return SLJIT_SUCCESS;2408}24092410if (src2 == SLJIT_IMM) {2411if (src1 == SLJIT_IMM) {2412EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2413src1 = TMP_REG1;2414src1w = 0;2415}2416BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);2417}2418else {2419EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2420inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);2421FAIL_IF(!inst);2422*inst = CMP_r_rm;2423}2424return SLJIT_SUCCESS;2425}24262427static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,2428sljit_s32 src1, sljit_sw src1w,2429sljit_s32 src2, sljit_sw src2w)2430{2431sljit_u8* inst;24322433#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2434if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {2435#else2436if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {2437#endif2438BINARY_EAX_IMM(TEST_EAX_i32, src2w);2439return SLJIT_SUCCESS;2440}24412442#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2443if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {2444#else2445if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128)) {2446#endif2447BINARY_EAX_IMM(TEST_EAX_i32, src1w);2448return SLJIT_SUCCESS;2449}24502451if (src1 != SLJIT_IMM) {2452if (src2 == SLJIT_IMM) {2453#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2454if (IS_HALFWORD(src2w) || compiler->mode32) {2455inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);2456FAIL_IF(!inst);2457*inst = GROUP_F7;2458} else {2459FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, src2w));2460inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, 0, src1, src1w);2461FAIL_IF(!inst);2462*inst = TEST_rm_r;2463}2464#else2465inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);2466FAIL_IF(!inst);2467*inst = GROUP_F7;2468#endif2469return SLJIT_SUCCESS;2470}2471else if (FAST_IS_REG(src1)) {2472inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);2473FAIL_IF(!inst);2474*inst = TEST_rm_r;2475return SLJIT_SUCCESS;2476}2477}24782479if (src2 != SLJIT_IMM) {2480if (src1 == SLJIT_IMM) {2481#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2482if (IS_HALFWORD(src1w) || compiler->mode32) {2483inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);2484FAIL_IF(!inst);2485*inst = GROUP_F7;2486}2487else {2488FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));2489inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);2490FAIL_IF(!inst);2491*inst = TEST_rm_r;2492}2493#else2494inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);2495FAIL_IF(!inst);2496*inst = GROUP_F7;2497#endif2498return SLJIT_SUCCESS;2499}2500else if (FAST_IS_REG(src2)) {2501inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);2502FAIL_IF(!inst);2503*inst = TEST_rm_r;2504return SLJIT_SUCCESS;2505}2506}25072508EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2509if (src2 == SLJIT_IMM) {2510#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2511if (IS_HALFWORD(src2w) || compiler->mode32) {2512inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);2513FAIL_IF(!inst);2514*inst = GROUP_F7;2515}2516else {2517FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));2518inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);2519FAIL_IF(!inst);2520*inst = TEST_rm_r;2521}2522#else2523inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);2524FAIL_IF(!inst);2525*inst = GROUP_F7;2526#endif2527}2528else {2529inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);2530FAIL_IF(!inst);2531*inst = TEST_rm_r;2532}2533return SLJIT_SUCCESS;2534}25352536static sljit_s32 emit_shift(struct sljit_compiler *compiler,2537sljit_u8 mode,2538sljit_s32 dst, sljit_sw dstw,2539sljit_s32 src1, sljit_sw src1w,2540sljit_s32 src2, sljit_sw src2w)2541{2542#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2543sljit_s32 mode32;2544#endif2545sljit_u8* inst;25462547if (src2 == SLJIT_IMM || src2 == SLJIT_PREF_SHIFT_REG) {2548if (dst == src1 && dstw == src1w) {2549inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);2550FAIL_IF(!inst);2551inst[1] |= mode;2552return SLJIT_SUCCESS;2553}2554if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {2555EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2556inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);2557FAIL_IF(!inst);2558inst[1] |= mode;2559EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);2560return SLJIT_SUCCESS;2561}2562if (FAST_IS_REG(dst)) {2563EMIT_MOV(compiler, dst, 0, src1, src1w);2564inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);2565FAIL_IF(!inst);2566inst[1] |= mode;2567return SLJIT_SUCCESS;2568}25692570EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2571inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);2572FAIL_IF(!inst);2573inst[1] |= mode;2574EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);2575return SLJIT_SUCCESS;2576}25772578if (dst == SLJIT_PREF_SHIFT_REG) {2579EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2580EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);2581inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);2582FAIL_IF(!inst);2583inst[1] |= mode;2584return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);2585}25862587if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) {2588if (src1 != dst)2589EMIT_MOV(compiler, dst, 0, src1, src1w);2590#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2591mode32 = compiler->mode32;2592compiler->mode32 = 0;2593#endif2594EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);2595#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2596compiler->mode32 = mode32;2597#endif2598EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);2599inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);2600FAIL_IF(!inst);2601inst[1] |= mode;2602#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2603compiler->mode32 = 0;2604#endif2605EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);2606#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2607compiler->mode32 = mode32;2608#endif2609return SLJIT_SUCCESS;2610}26112612/* This case is complex since ecx itself may be used for2613addressing, and this case must be supported as well. */2614EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2615#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2616EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);2617#else /* !SLJIT_CONFIG_X86_32 */2618mode32 = compiler->mode32;2619compiler->mode32 = 0;2620EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);2621compiler->mode32 = mode32;2622#endif /* SLJIT_CONFIG_X86_32 */26232624EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);2625inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);2626FAIL_IF(!inst);2627inst[1] |= mode;26282629#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2630EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);2631#else2632compiler->mode32 = 0;2633EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);2634compiler->mode32 = mode32;2635#endif /* SLJIT_CONFIG_X86_32 */26362637if (dst != TMP_REG1)2638return emit_mov(compiler, dst, dstw, TMP_REG1, 0);26392640return SLJIT_SUCCESS;2641}26422643static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,2644sljit_u8 mode, sljit_s32 set_flags,2645sljit_s32 dst, sljit_sw dstw,2646sljit_s32 src1, sljit_sw src1w,2647sljit_s32 src2, sljit_sw src2w)2648{2649/* The CPU does not set flags if the shift count is 0. */2650if (src2 == SLJIT_IMM) {2651#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2652src2w &= compiler->mode32 ? 0x1f : 0x3f;2653#else /* !SLJIT_CONFIG_X86_64 */2654src2w &= 0x1f;2655#endif /* SLJIT_CONFIG_X86_64 */2656if (src2w != 0)2657return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);26582659if (!set_flags)2660return emit_mov(compiler, dst, dstw, src1, src1w);2661/* OR dst, src, 0 */2662return emit_cum_binary(compiler, BINARY_OPCODE(OR),2663dst, dstw, src1, src1w, SLJIT_IMM, 0);2664}26652666if (!set_flags)2667return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);26682669if (!FAST_IS_REG(dst))2670FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));26712672FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));26732674if (FAST_IS_REG(dst))2675return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);2676return SLJIT_SUCCESS;2677}26782679SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,2680sljit_s32 dst, sljit_sw dstw,2681sljit_s32 src1, sljit_sw src1w,2682sljit_s32 src2, sljit_sw src2w)2683{2684CHECK_ERROR();2685CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));2686ADJUST_LOCAL_OFFSET(dst, dstw);2687ADJUST_LOCAL_OFFSET(src1, src1w);2688ADJUST_LOCAL_OFFSET(src2, src2w);26892690CHECK_EXTRA_REGS(dst, dstw, (void)0);2691CHECK_EXTRA_REGS(src1, src1w, (void)0);2692CHECK_EXTRA_REGS(src2, src2w, (void)0);2693#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2694compiler->mode32 = op & SLJIT_32;2695#endif26962697switch (GET_OPCODE(op)) {2698case SLJIT_ADD:2699if (!HAS_FLAGS(op)) {2700if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)2701return compiler->error;2702}2703return emit_cum_binary(compiler, BINARY_OPCODE(ADD),2704dst, dstw, src1, src1w, src2, src2w);2705case SLJIT_ADDC:2706return emit_cum_binary(compiler, BINARY_OPCODE(ADC),2707dst, dstw, src1, src1w, src2, src2w);2708case SLJIT_SUB:2709if (src1 == SLJIT_IMM && src1w == 0)2710return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w);27112712if (!HAS_FLAGS(op)) {2713if (src2 == SLJIT_IMM && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)2714return compiler->error;2715if (FAST_IS_REG(dst) && src2 == dst) {2716FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w));2717return emit_unary(compiler, NEG_rm, dst, 0, dst, 0);2718}2719}27202721return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),2722dst, dstw, src1, src1w, src2, src2w);2723case SLJIT_SUBC:2724return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),2725dst, dstw, src1, src1w, src2, src2w);2726case SLJIT_MUL:2727return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);2728case SLJIT_AND:2729return emit_cum_binary(compiler, BINARY_OPCODE(AND),2730dst, dstw, src1, src1w, src2, src2w);2731case SLJIT_OR:2732return emit_cum_binary(compiler, BINARY_OPCODE(OR),2733dst, dstw, src1, src1w, src2, src2w);2734case SLJIT_XOR:2735if (!HAS_FLAGS(op)) {2736if (src2 == SLJIT_IMM && src2w == -1)2737return emit_unary(compiler, NOT_rm, dst, dstw, src1, src1w);2738if (src1 == SLJIT_IMM && src1w == -1)2739return emit_unary(compiler, NOT_rm, dst, dstw, src2, src2w);2740}27412742return emit_cum_binary(compiler, BINARY_OPCODE(XOR),2743dst, dstw, src1, src1w, src2, src2w);2744case SLJIT_SHL:2745case SLJIT_MSHL:2746return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),2747dst, dstw, src1, src1w, src2, src2w);2748case SLJIT_LSHR:2749case SLJIT_MLSHR:2750return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),2751dst, dstw, src1, src1w, src2, src2w);2752case SLJIT_ASHR:2753case SLJIT_MASHR:2754return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),2755dst, dstw, src1, src1w, src2, src2w);2756case SLJIT_ROTL:2757return emit_shift_with_flags(compiler, ROL, 0,2758dst, dstw, src1, src1w, src2, src2w);2759case SLJIT_ROTR:2760return emit_shift_with_flags(compiler, ROR, 0,2761dst, dstw, src1, src1w, src2, src2w);2762}27632764return SLJIT_SUCCESS;2765}27662767SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,2768sljit_s32 src1, sljit_sw src1w,2769sljit_s32 src2, sljit_sw src2w)2770{2771sljit_s32 opcode = GET_OPCODE(op);27722773CHECK_ERROR();2774CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));27752776if (opcode != SLJIT_SUB && opcode != SLJIT_AND) {2777SLJIT_SKIP_CHECKS(compiler);2778return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);2779}27802781ADJUST_LOCAL_OFFSET(src1, src1w);2782ADJUST_LOCAL_OFFSET(src2, src2w);27832784CHECK_EXTRA_REGS(src1, src1w, (void)0);2785CHECK_EXTRA_REGS(src2, src2w, (void)0);2786#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2787compiler->mode32 = op & SLJIT_32;2788#endif27892790if (opcode == SLJIT_SUB)2791return emit_cmp_binary(compiler, src1, src1w, src2, src2w);27922793return emit_test_binary(compiler, src1, src1w, src2, src2w);2794}27952796SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,2797sljit_s32 dst_reg,2798sljit_s32 src1, sljit_sw src1w,2799sljit_s32 src2, sljit_sw src2w)2800{2801sljit_u8* inst;2802sljit_sw dstw = 0;28032804CHECK_ERROR();2805CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));2806ADJUST_LOCAL_OFFSET(src1, src1w);2807ADJUST_LOCAL_OFFSET(src2, src2w);28082809CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);2810CHECK_EXTRA_REGS(src1, src1w, (void)0);2811CHECK_EXTRA_REGS(src2, src2w, (void)0);2812#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2813compiler->mode32 = op & SLJIT_32;2814#endif28152816switch (GET_OPCODE(op)) {2817case SLJIT_MULADD:2818FAIL_IF(emit_mul(compiler, TMP_REG1, 0, src1, src1w, src2, src2w));2819inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst_reg, dstw);2820FAIL_IF(!inst);2821*inst = ADD_rm_r;2822return SLJIT_SUCCESS;2823}28242825return SLJIT_SUCCESS;2826}28272828SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,2829sljit_s32 dst_reg,2830sljit_s32 src1_reg,2831sljit_s32 src2_reg,2832sljit_s32 src3, sljit_sw src3w)2833{2834sljit_s32 is_rotate, is_left, move_src1;2835sljit_u8* inst;2836sljit_sw src1w = 0;2837sljit_sw dstw = 0;2838/* The whole register must be saved even for 32 bit operations. */2839sljit_u8 restore_ecx = 0;2840#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2841sljit_sw src2w = 0;2842sljit_s32 restore_sp4 = 0;2843#endif /* SLJIT_CONFIG_X86_32 */28442845CHECK_ERROR();2846CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));2847ADJUST_LOCAL_OFFSET(src3, src3w);28482849CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);2850CHECK_EXTRA_REGS(src3, src3w, (void)0);28512852#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2853compiler->mode32 = op & SLJIT_32;2854#endif /* SLJIT_CONFIG_X86_64 */28552856if (src3 == SLJIT_IMM) {2857#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2858src3w &= 0x1f;2859#else /* !SLJIT_CONFIG_X86_32 */2860src3w &= (op & SLJIT_32) ? 0x1f : 0x3f;2861#endif /* SLJIT_CONFIG_X86_32 */28622863if (src3w == 0)2864return SLJIT_SUCCESS;2865}28662867is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);28682869is_rotate = (src1_reg == src2_reg);2870CHECK_EXTRA_REGS(src1_reg, src1w, (void)0);2871CHECK_EXTRA_REGS(src2_reg, src2w, (void)0);28722873if (is_rotate)2874return emit_shift(compiler, is_left ? ROL : ROR, dst_reg, dstw, src1_reg, src1w, src3, src3w);28752876#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2877if (src2_reg & SLJIT_MEM) {2878EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w);2879src2_reg = TMP_REG1;2880}2881#endif /* SLJIT_CONFIG_X86_32 */28822883if (dst_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && (src3 != SLJIT_PREF_SHIFT_REG || src1_reg != SLJIT_PREF_SHIFT_REG)) {2884#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2885EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);2886src1_reg = TMP_REG1;2887src1w = 0;2888#else /* !SLJIT_CONFIG_X86_64 */2889if (src2_reg != TMP_REG1) {2890EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);2891src1_reg = TMP_REG1;2892src1w = 0;2893} else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {2894restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;2895EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);2896EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);2897src1_reg = restore_sp4;2898src1w = 0;2899} else {2900EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);2901restore_sp4 = src1_reg;2902}2903#endif /* SLJIT_CONFIG_X86_64 */29042905if (src3 != SLJIT_PREF_SHIFT_REG)2906EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);2907} else {2908if (src2_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {2909#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2910compiler->mode32 = 0;2911#endif /* SLJIT_CONFIG_X86_64 */2912EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);2913#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2914compiler->mode32 = op & SLJIT_32;2915#endif /* SLJIT_CONFIG_X86_64 */2916src2_reg = TMP_REG1;2917restore_ecx = 1;2918}29192920move_src1 = 0;2921#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2922if (dst_reg != src1_reg) {2923if (dst_reg != src3) {2924EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);2925src1_reg = dst_reg;2926src1w = 0;2927} else2928move_src1 = 1;2929}2930#else /* !SLJIT_CONFIG_X86_64 */2931if (dst_reg & SLJIT_MEM) {2932if (src2_reg != TMP_REG1) {2933EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);2934src1_reg = TMP_REG1;2935src1w = 0;2936} else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {2937restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;2938EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);2939EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);2940src1_reg = restore_sp4;2941src1w = 0;2942} else {2943EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);2944restore_sp4 = src1_reg;2945}2946} else if (dst_reg != src1_reg) {2947if (dst_reg != src3) {2948EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);2949src1_reg = dst_reg;2950src1w = 0;2951} else2952move_src1 = 1;2953}2954#endif /* SLJIT_CONFIG_X86_64 */29552956if (src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {2957if (!restore_ecx) {2958#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2959compiler->mode32 = 0;2960EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);2961compiler->mode32 = op & SLJIT_32;2962restore_ecx = 1;2963#else /* !SLJIT_CONFIG_X86_64 */2964if (src1_reg != TMP_REG1 && src2_reg != TMP_REG1) {2965EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);2966restore_ecx = 1;2967} else {2968EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);2969restore_ecx = 2;2970}2971#endif /* SLJIT_CONFIG_X86_64 */2972}2973EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);2974}29752976if (move_src1) {2977EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);2978src1_reg = dst_reg;2979src1w = 0;2980}2981}29822983inst = emit_x86_instruction(compiler, 2, src2_reg, 0, src1_reg, src1w);2984FAIL_IF(!inst);2985inst[0] = GROUP_0F;29862987if (src3 == SLJIT_IMM) {2988inst[1] = U8((is_left ? SHLD : SHRD) - 1);29892990/* Immediate argument is added separately. */2991FAIL_IF(emit_byte(compiler, U8(src3w)));2992} else2993inst[1] = U8(is_left ? SHLD : SHRD);29942995#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2996if (restore_ecx) {2997compiler->mode32 = 0;2998EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);2999}30003001if (src1_reg != dst_reg) {3002compiler->mode32 = op & SLJIT_32;3003return emit_mov(compiler, dst_reg, dstw, src1_reg, 0);3004}3005#else /* !SLJIT_CONFIG_X86_64 */3006if (restore_ecx)3007EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, restore_ecx == 1 ? TMP_REG1 : SLJIT_MEM1(SLJIT_SP), 0);30083009if (src1_reg != dst_reg)3010EMIT_MOV(compiler, dst_reg, dstw, src1_reg, 0);30113012if (restore_sp4)3013return emit_mov(compiler, restore_sp4, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32));3014#endif /* SLJIT_CONFIG_X86_32 */30153016return SLJIT_SUCCESS;3017}30183019SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2_shift(struct sljit_compiler *compiler, sljit_s32 op,3020sljit_s32 dst, sljit_sw dstw,3021sljit_s32 src1, sljit_sw src1w,3022sljit_s32 src2, sljit_sw src2w,3023sljit_sw shift_arg)3024{3025sljit_s32 dst_r;3026int use_lea = 0;3027sljit_u8* inst;30283029CHECK_ERROR();3030CHECK(check_sljit_emit_op2_shift(compiler, op, dst, dstw, src1, src1w, src2, src2w, shift_arg));3031ADJUST_LOCAL_OFFSET(dst, dstw);3032ADJUST_LOCAL_OFFSET(src1, src1w);3033ADJUST_LOCAL_OFFSET(src2, src2w);30343035shift_arg &= (sljit_sw)((sizeof(sljit_sw) * 8) - 1);30363037if (src2 == SLJIT_IMM) {3038src2w = src2w << shift_arg;3039shift_arg = 0;3040}30413042if (shift_arg == 0) {3043SLJIT_SKIP_CHECKS(compiler);3044return sljit_emit_op2(compiler, GET_OPCODE(op), dst, dstw, src1, src1w, src2, src2w);3045}30463047CHECK_EXTRA_REGS(dst, dstw, (void)0);3048CHECK_EXTRA_REGS(src1, src1w, (void)0);3049CHECK_EXTRA_REGS(src2, src2w, (void)0);30503051#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3052compiler->mode32 = 0;3053#endif30543055#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3056if (shift_arg <= 3) {3057use_lea = 1;3058if (!FAST_IS_REG(src2)) {3059EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);3060src2 = TMP_REG1;3061}30623063if (!FAST_IS_REG(src1)) {3064EMIT_MOV(compiler, src2 == TMP_REG1 ? TMP_REG2 : TMP_REG1, 0, src1, src1w);3065src1 = src2 == TMP_REG1 ? TMP_REG2 : TMP_REG1;3066}3067}3068#else /* !SLJIT_CONFIG_X86_64 */3069if (shift_arg <= 3 && (FAST_IS_REG(src1) || (FAST_IS_REG(src2) && src2 != TMP_REG1))) {3070use_lea = 1;3071if (!FAST_IS_REG(src2)) {3072EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);3073src2 = TMP_REG1;3074}30753076if (!FAST_IS_REG(src1)) {3077EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);3078src1 = TMP_REG1;3079}3080}3081#endif /* SLJIT_CONFIG_X86_64 */30823083if (use_lea) {3084dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;30853086inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), shift_arg);3087FAIL_IF(!inst);3088*inst = LEA_r_m;30893090if (!FAST_IS_REG(dst))3091return emit_mov(compiler, dst, dstw, dst_r, 0);30923093return SLJIT_SUCCESS;3094}30953096if ((op & SLJIT_SRC2_UNDEFINED) != 0 && FAST_IS_REG(src2) && src1 != src2)3097dst_r = src2;3098else {3099dst_r = FAST_IS_REG(dst) && (dst != src1) ? dst : TMP_REG1;31003101if (src2 != dst_r) {3102EMIT_MOV(compiler, dst_r, 0, src2, src2w);3103}3104}31053106inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, shift_arg, dst_r, 0);3107FAIL_IF(!inst);3108inst[1] |= SHL;31093110if (dst == src1 && dstw == src1w) {3111inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);3112FAIL_IF(!inst);3113*inst = ADD_rm_r;3114return SLJIT_SUCCESS;3115}31163117if (FAST_IS_REG(dst) && FAST_IS_REG(src1)) {3118inst = emit_x86_instruction(compiler, 1, dst, 0, SLJIT_MEM2(src1, dst_r), 0);3119FAIL_IF(!inst);3120*inst = LEA_r_m;3121return SLJIT_SUCCESS;3122}31233124if (src1 == SLJIT_IMM) {3125BINARY_IMM(ADD, ADD_rm_r, src1w, dst_r, 0);3126} else {3127inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);3128FAIL_IF(!inst);3129*inst = ADD_r_rm;3130}31313132if (dst != dst_r)3133return emit_mov(compiler, dst, dstw, dst_r, 0);31343135return SLJIT_SUCCESS;3136}31373138SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,3139sljit_s32 src, sljit_sw srcw)3140{3141CHECK_ERROR();3142CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));3143ADJUST_LOCAL_OFFSET(src, srcw);31443145CHECK_EXTRA_REGS(src, srcw, (void)0);31463147switch (op) {3148case SLJIT_FAST_RETURN:3149return emit_fast_return(compiler, src, srcw);3150case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:3151/* Don't adjust shadow stack if it isn't enabled. */3152if (!cpu_has_shadow_stack ())3153return SLJIT_SUCCESS;3154return adjust_shadow_stack(compiler, src, srcw);3155case SLJIT_PREFETCH_L1:3156case SLJIT_PREFETCH_L2:3157case SLJIT_PREFETCH_L3:3158case SLJIT_PREFETCH_ONCE:3159return emit_prefetch(compiler, op, src, srcw);3160}31613162return SLJIT_SUCCESS;3163}31643165SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,3166sljit_s32 dst, sljit_sw dstw)3167{3168CHECK_ERROR();3169CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));3170ADJUST_LOCAL_OFFSET(dst, dstw);31713172CHECK_EXTRA_REGS(dst, dstw, (void)0);31733174switch (op) {3175case SLJIT_FAST_ENTER:3176return emit_fast_enter(compiler, dst, dstw);3177case SLJIT_GET_RETURN_ADDRESS:3178return sljit_emit_get_return_address(compiler, dst, dstw);3179}31803181return SLJIT_SUCCESS;3182}31833184SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)3185{3186CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));31873188if (type == SLJIT_GP_REGISTER) {3189#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)3190if (reg >= SLJIT_R3 && reg <= SLJIT_R8)3191return -1;3192#endif /* SLJIT_CONFIG_X86_32 */3193return reg_map[reg];3194}31953196if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256 && type != SLJIT_SIMD_REG_512)3197return -1;31983199return freg_map[reg];3200}32013202SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,3203void *instruction, sljit_u32 size)3204{3205sljit_u8 *inst;32063207CHECK_ERROR();3208CHECK(check_sljit_emit_op_custom(compiler, instruction, size));32093210inst = (sljit_u8*)ensure_buf(compiler, 1 + size);3211FAIL_IF(!inst);3212INC_SIZE(size);3213SLJIT_MEMCPY(inst, instruction, size);3214return SLJIT_SUCCESS;3215}32163217/* --------------------------------------------------------------------- */3218/* Floating point operators */3219/* --------------------------------------------------------------------- */32203221/* Alignment(3) + 4 * 16 bytes. */3222static sljit_u32 sse2_data[3 + (4 * 4)];3223static sljit_u32 *sse2_buffer;32243225static void init_compiler(void)3226{3227get_cpu_features();32283229/* Align to 16 bytes. */3230sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf);32313232/* Single precision constants (each constant is 16 byte long). */3233sse2_buffer[0] = 0x80000000;3234sse2_buffer[4] = 0x7fffffff;3235/* Double precision constants (each constant is 16 byte long). */3236sse2_buffer[8] = 0;3237sse2_buffer[9] = 0x80000000;3238sse2_buffer[12] = 0xffffffff;3239sse2_buffer[13] = 0x7fffffff;3240}32413242static sljit_s32 emit_groupf(struct sljit_compiler *compiler,3243sljit_uw op,3244sljit_s32 dst, sljit_s32 src, sljit_sw srcw)3245{3246sljit_u8 *inst = emit_x86_instruction(compiler, 2 | (op & ~(sljit_uw)0xff), dst, 0, src, srcw);3247FAIL_IF(!inst);3248inst[0] = GROUP_0F;3249inst[1] = op & 0xff;3250return SLJIT_SUCCESS;3251}32523253static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,3254sljit_uw op,3255sljit_s32 dst, sljit_s32 src, sljit_sw srcw)3256{3257sljit_u8 *inst;32583259SLJIT_ASSERT((op & EX86_SSE2) && ((op & VEX_OP_0F38) || (op & VEX_OP_0F3A)));32603261inst = emit_x86_instruction(compiler, 3 | (op & ~((sljit_uw)0xff | VEX_OP_0F38 | VEX_OP_0F3A)), dst, 0, src, srcw);3262FAIL_IF(!inst);3263inst[0] = GROUP_0F;3264inst[1] = U8((op & VEX_OP_0F38) ? 0x38 : 0x3A);3265inst[2] = op & 0xff;3266return SLJIT_SUCCESS;3267}32683269static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,3270sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)3271{3272return emit_groupf(compiler, MOVSD_x_xm | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, dst, src, srcw);3273}32743275static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,3276sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)3277{3278return emit_groupf(compiler, MOVSD_xm_x | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, src, dst, dstw);3279}32803281static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,3282sljit_s32 dst, sljit_sw dstw,3283sljit_s32 src, sljit_sw srcw)3284{3285sljit_s32 dst_r;32863287CHECK_EXTRA_REGS(dst, dstw, (void)0);3288dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;32893290#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3291if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)3292compiler->mode32 = 0;3293#endif32943295FAIL_IF(emit_groupf(compiler, CVTTSD2SI_r_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP2, dst_r, src, srcw));32963297if (dst & SLJIT_MEM)3298return emit_mov(compiler, dst, dstw, TMP_REG1, 0);3299return SLJIT_SUCCESS;3300}33013302static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,3303sljit_s32 dst, sljit_sw dstw,3304sljit_s32 src, sljit_sw srcw)3305{3306sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;33073308CHECK_EXTRA_REGS(src, srcw, (void)0);33093310#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3311if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)3312compiler->mode32 = 0;3313#endif33143315if (src == SLJIT_IMM) {3316#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3317if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)3318srcw = (sljit_s32)srcw;3319#endif3320EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);3321src = TMP_REG1;3322srcw = 0;3323}33243325FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, srcw));33263327#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3328compiler->mode32 = 1;3329#endif3330if (dst_r == TMP_FREG)3331return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);3332return SLJIT_SUCCESS;3333}33343335static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,3336sljit_s32 src1, sljit_sw src1w,3337sljit_s32 src2, sljit_sw src2w)3338{3339switch (GET_FLAG_TYPE(op)) {3340case SLJIT_ORDERED_EQUAL:3341/* Also: SLJIT_UNORDERED_OR_NOT_EQUAL */3342FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));3343FAIL_IF(emit_groupf(compiler, CMPS_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, TMP_FREG, src2, src2w));33443345/* EQ */3346FAIL_IF(emit_byte(compiler, 0));33473348src1 = TMP_FREG;3349src2 = TMP_FREG;3350src2w = 0;3351break;33523353case SLJIT_ORDERED_LESS:3354case SLJIT_UNORDERED_OR_GREATER:3355/* Also: SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_ORDERED_LESS_EQUAL */3356if (!FAST_IS_REG(src2)) {3357FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));3358src2 = TMP_FREG;3359}33603361return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src2, src1, src1w);3362}33633364if (!FAST_IS_REG(src1)) {3365FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));3366src1 = TMP_FREG;3367}33683369return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src1, src2, src2w);3370}33713372SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,3373sljit_s32 dst, sljit_sw dstw,3374sljit_s32 src, sljit_sw srcw)3375{3376sljit_s32 dst_r;3377sljit_u8 *inst;33783379#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3380compiler->mode32 = 1;3381#endif33823383CHECK_ERROR();3384SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);33853386if (GET_OPCODE(op) == SLJIT_MOV_F64) {3387if (FAST_IS_REG(dst))3388return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw);3389if (FAST_IS_REG(src))3390return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src);3391FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));3392return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);3393}33943395if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {3396dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;3397if (FAST_IS_REG(src)) {3398/* We overwrite the high bits of source. From SLJIT point of view,3399this is not an issue.3400Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */3401FAIL_IF(emit_groupf(compiler, UNPCKLPD_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, src, src, 0));3402} else {3403FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw));3404src = TMP_FREG;3405}34063407FAIL_IF(emit_groupf(compiler, CVTPD2PS_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, dst_r, src, 0));3408if (dst_r == TMP_FREG)3409return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);3410return SLJIT_SUCCESS;3411}34123413if (FAST_IS_REG(dst)) {3414dst_r = (dst == src) ? TMP_FREG : dst;34153416if (src & SLJIT_MEM)3417FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));34183419FAIL_IF(emit_groupf(compiler, PCMPEQD_x_xm | EX86_PREF_66 | EX86_SSE2, dst_r, dst_r, 0));34203421inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP2, 0, 0, dst_r, 0);3422inst[0] = GROUP_0F;3423/* Same as PSRLD_x / PSRLQ_x */3424inst[1] = (op & SLJIT_32) ? PSLLD_x_i8 : PSLLQ_x_i8;34253426if (GET_OPCODE(op) == SLJIT_ABS_F64) {3427inst[2] |= 2 << 3;3428FAIL_IF(emit_byte(compiler, 1));3429} else {3430inst[2] |= 6 << 3;3431FAIL_IF(emit_byte(compiler, ((op & SLJIT_32) ? 31 : 63)));3432}34333434if (dst_r != TMP_FREG)3435dst_r = (src & SLJIT_MEM) ? TMP_FREG : src;3436return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_NEG_F64 ? XORPD_x_xm : ANDPD_x_xm) | EX86_SSE2, dst, dst_r, 0);3437}34383439FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));34403441switch (GET_OPCODE(op)) {3442case SLJIT_NEG_F64:3443FAIL_IF(emit_groupf(compiler, XORPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));3444break;34453446case SLJIT_ABS_F64:3447FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer + 4 : sse2_buffer + 12)));3448break;3449}34503451return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);3452}34533454SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,3455sljit_s32 dst, sljit_sw dstw,3456sljit_s32 src1, sljit_sw src1w,3457sljit_s32 src2, sljit_sw src2w)3458{3459sljit_s32 dst_r;34603461CHECK_ERROR();3462CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));3463ADJUST_LOCAL_OFFSET(dst, dstw);3464ADJUST_LOCAL_OFFSET(src1, src1w);3465ADJUST_LOCAL_OFFSET(src2, src2w);34663467#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3468compiler->mode32 = 1;3469#endif34703471if (FAST_IS_REG(dst)) {3472dst_r = dst;3473if (dst == src1)3474; /* Do nothing here. */3475else if (dst == src2 && (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64)) {3476/* Swap arguments. */3477src2 = src1;3478src2w = src1w;3479} else if (dst != src2)3480FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w));3481else {3482dst_r = TMP_FREG;3483FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));3484}3485} else {3486dst_r = TMP_FREG;3487FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));3488}34893490switch (GET_OPCODE(op)) {3491case SLJIT_ADD_F64:3492FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));3493break;34943495case SLJIT_SUB_F64:3496FAIL_IF(emit_groupf(compiler, SUBSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));3497break;34983499case SLJIT_MUL_F64:3500FAIL_IF(emit_groupf(compiler, MULSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));3501break;35023503case SLJIT_DIV_F64:3504FAIL_IF(emit_groupf(compiler, DIVSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));3505break;3506}35073508if (dst_r != dst)3509return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);3510return SLJIT_SUCCESS;3511}35123513SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,3514sljit_s32 dst_freg,3515sljit_s32 src1, sljit_sw src1w,3516sljit_s32 src2, sljit_sw src2w)3517{3518sljit_uw pref;35193520CHECK_ERROR();3521CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));3522ADJUST_LOCAL_OFFSET(src1, src1w);3523ADJUST_LOCAL_OFFSET(src2, src2w);35243525#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3526compiler->mode32 = 1;3527#endif35283529if (dst_freg == src1) {3530FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));3531pref = EX86_SELECT_66(op) | EX86_SSE2;3532FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, TMP_FREG, src1, src1w));3533FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));3534return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, TMP_FREG, 0);3535}35363537if (src1 & SLJIT_MEM) {3538FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));3539src1 = TMP_FREG;3540src1w = 0;3541}35423543if (dst_freg != src2)3544FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_freg, src2, src2w));35453546pref = EX86_SELECT_66(op) | EX86_SSE2;3547FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w));3548FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, dst_freg, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));3549return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w);3550}35513552/* --------------------------------------------------------------------- */3553/* Conditional instructions */3554/* --------------------------------------------------------------------- */35553556SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)3557{3558sljit_u8 *inst;3559struct sljit_label *label;35603561CHECK_ERROR_PTR();3562CHECK_PTR(check_sljit_emit_label(compiler));35633564if (compiler->last_label && compiler->last_label->size == compiler->size)3565return compiler->last_label;35663567label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));3568PTR_FAIL_IF(!label);3569set_label(label, compiler);35703571inst = (sljit_u8*)ensure_buf(compiler, 1);3572PTR_FAIL_IF(!inst);3573inst[0] = SLJIT_INST_LABEL;35743575return label;3576}35773578SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_aligned_label(struct sljit_compiler *compiler,3579sljit_s32 alignment, struct sljit_read_only_buffer *buffers)3580{3581sljit_uw mask, size;3582sljit_u8 *inst;3583struct sljit_label *label;3584struct sljit_label *next_label;3585struct sljit_extended_label *ext_label;35863587CHECK_ERROR_PTR();3588CHECK_PTR(check_sljit_emit_aligned_label(compiler, alignment, buffers));35893590sljit_reset_read_only_buffers(buffers);35913592if (alignment <= SLJIT_LABEL_ALIGN_1) {3593SLJIT_SKIP_CHECKS(compiler);3594label = sljit_emit_label(compiler);3595PTR_FAIL_IF(!label);3596} else {3597/* The used space is filled with NOPs. */3598mask = ((sljit_uw)1 << alignment) - 1;3599compiler->size += mask;36003601inst = (sljit_u8*)ensure_buf(compiler, 1);3602PTR_FAIL_IF(!inst);3603inst[0] = SLJIT_INST_LABEL;36043605ext_label = (struct sljit_extended_label*)ensure_abuf(compiler, sizeof(struct sljit_extended_label));3606PTR_FAIL_IF(!ext_label);3607set_extended_label(ext_label, compiler, SLJIT_LABEL_ALIGNED, mask);3608label = &ext_label->label;3609}36103611if (buffers == NULL)3612return label;36133614next_label = label;36153616while (1) {3617buffers->u.label = next_label;3618size = buffers->size;36193620while (size >= 4) {3621inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);3622PTR_FAIL_IF(!inst);3623INC_SIZE(4);3624inst[0] = NOP;3625inst[1] = NOP;3626inst[2] = NOP;3627inst[3] = NOP;3628size -= 4;3629}36303631if (size > 0) {3632inst = (sljit_u8*)ensure_buf(compiler, 1 + size);3633PTR_FAIL_IF(!inst);3634INC_SIZE(size);36353636do {3637*inst++ = NOP;3638} while (--size != 0);3639}36403641buffers = buffers->next;36423643if (buffers == NULL)3644break;36453646SLJIT_SKIP_CHECKS(compiler);3647next_label = sljit_emit_label(compiler);3648PTR_FAIL_IF(!next_label);3649}36503651return label;3652}36533654SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)3655{3656sljit_u8 *inst;3657struct sljit_jump *jump;36583659CHECK_ERROR_PTR();3660CHECK_PTR(check_sljit_emit_jump(compiler, type));36613662jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));3663PTR_FAIL_IF_NULL(jump);3664set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT)));3665type &= 0xff;36663667jump->addr = compiler->size;3668/* Worst case size. */3669compiler->size += (type >= SLJIT_JUMP) ? JUMP_MAX_SIZE : CJUMP_MAX_SIZE;3670inst = (sljit_u8*)ensure_buf(compiler, 1);3671PTR_FAIL_IF_NULL(inst);36723673inst[0] = SLJIT_INST_JUMP;3674return jump;3675}36763677SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)3678{3679sljit_u8 *inst;3680struct sljit_jump *jump;36813682CHECK_ERROR();3683CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));3684ADJUST_LOCAL_OFFSET(src, srcw);36853686CHECK_EXTRA_REGS(src, srcw, (void)0);36873688if (src == SLJIT_IMM) {3689jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));3690FAIL_IF_NULL(jump);3691set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT)));3692jump->u.target = (sljit_uw)srcw;36933694jump->addr = compiler->size;3695/* Worst case size. */3696compiler->size += JUMP_MAX_SIZE;3697inst = (sljit_u8*)ensure_buf(compiler, 1);3698FAIL_IF_NULL(inst);36993700inst[0] = SLJIT_INST_JUMP;3701} else {3702#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3703/* REX_W is not necessary (src is not immediate). */3704compiler->mode32 = 1;3705#endif3706inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);3707FAIL_IF(!inst);3708inst[0] = GROUP_FF;3709inst[1] = U8(inst[1] | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm));3710}3711return SLJIT_SUCCESS;3712}37133714SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,3715sljit_s32 dst, sljit_sw dstw,3716sljit_s32 type)3717{3718sljit_u8 *inst;3719sljit_u8 cond_set;3720#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3721sljit_s32 reg;3722sljit_uw size;3723#endif /* !SLJIT_CONFIG_X86_64 */3724/* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */3725sljit_s32 dst_save = dst;3726sljit_sw dstw_save = dstw;37273728CHECK_ERROR();3729CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));37303731ADJUST_LOCAL_OFFSET(dst, dstw);3732CHECK_EXTRA_REGS(dst, dstw, (void)0);37333734/* setcc = jcc + 0x10. */3735cond_set = U8(get_jump_code((sljit_uw)type) + 0x10);37363737#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3738if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {3739size = 3 + 2;3740if (reg_map[TMP_REG1] >= 4)3741size += 1 + 1;3742else if (reg_map[dst] >= 4)3743size++;37443745inst = (sljit_u8*)ensure_buf(compiler, 1 + size);3746FAIL_IF(!inst);3747INC_SIZE(size);3748/* Set low register to conditional flag. */3749if (reg_map[TMP_REG1] >= 4)3750*inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;37513752inst[0] = GROUP_0F;3753inst[1] = cond_set;3754inst[2] = MOD_REG | reg_lmap[TMP_REG1];3755inst += 3;37563757if (reg_map[TMP_REG1] >= 4 || reg_map[dst] >= 4)3758*inst++ = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B));37593760inst[0] = OR_rm8_r8;3761inst[1] = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]);3762return SLJIT_SUCCESS;3763}37643765reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;37663767size = 3 + (reg_map[reg] >= 4) + 4;3768inst = (sljit_u8*)ensure_buf(compiler, 1 + size);3769FAIL_IF(!inst);3770INC_SIZE(size);3771/* Set low register to conditional flag. */37723773if (reg_map[reg] >= 4)3774*inst++ = (reg_map[reg] <= 7) ? REX : REX_B;37753776inst[0] = GROUP_0F;3777inst[1] = cond_set;3778inst[2] = MOD_REG | reg_lmap[reg];37793780inst[3] = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));3781/* The movzx instruction does not affect flags. */3782inst[4] = GROUP_0F;3783inst[5] = MOVZX_r_rm8;3784inst[6] = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]);37853786if (reg != TMP_REG1)3787return SLJIT_SUCCESS;37883789if (GET_OPCODE(op) < SLJIT_ADD) {3790compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;3791return emit_mov(compiler, dst, dstw, TMP_REG1, 0);3792}37933794SLJIT_SKIP_CHECKS(compiler);3795return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);37963797#else /* !SLJIT_CONFIG_X86_64 */3798SLJIT_ASSERT(reg_map[TMP_REG1] < 4);37993800/* The SLJIT_CONFIG_X86_32 code path starts here. */3801if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst) && reg_map[dst] <= 4) {3802/* Low byte is accessible. */3803inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);3804FAIL_IF(!inst);3805INC_SIZE(3 + 3);3806/* Set low byte to conditional flag. */3807inst[0] = GROUP_0F;3808inst[1] = cond_set;3809inst[2] = U8(MOD_REG | reg_map[dst]);38103811inst[3] = GROUP_0F;3812inst[4] = MOVZX_r_rm8;3813inst[5] = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]);3814return SLJIT_SUCCESS;3815}38163817if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {3818inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 2);3819FAIL_IF(!inst);3820INC_SIZE(3 + 2);38213822/* Set low byte to conditional flag. */3823inst[0] = GROUP_0F;3824inst[1] = cond_set;3825inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);38263827inst[3] = OR_rm8_r8;3828inst[4] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[dst]);3829return SLJIT_SUCCESS;3830}38313832inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);3833FAIL_IF(!inst);3834INC_SIZE(3 + 3);3835/* Set low byte to conditional flag. */3836inst[0] = GROUP_0F;3837inst[1] = cond_set;3838inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);38393840inst[3] = GROUP_0F;3841inst[4] = MOVZX_r_rm8;3842inst[5] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[TMP_REG1]);38433844if (GET_OPCODE(op) < SLJIT_ADD)3845return emit_mov(compiler, dst, dstw, TMP_REG1, 0);38463847SLJIT_SKIP_CHECKS(compiler);3848return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);3849#endif /* SLJIT_CONFIG_X86_64 */3850}38513852SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,3853sljit_s32 dst_freg,3854sljit_s32 src1, sljit_sw src1w,3855sljit_s32 src2_freg)3856{3857sljit_u8* inst;3858sljit_uw size;38593860CHECK_ERROR();3861CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));38623863ADJUST_LOCAL_OFFSET(src1, src1w);38643865#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3866compiler->mode32 = 1;3867#endif /* SLJIT_CONFIG_X86_64 */38683869if (dst_freg != src2_freg) {3870if (dst_freg == src1) {3871src1 = src2_freg;3872src1w = 0;3873type ^= 0x1;3874} else3875FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src2_freg, 0));3876}38773878inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);3879FAIL_IF(!inst);3880INC_SIZE(2);3881inst[0] = U8(get_jump_code((sljit_uw)(type & ~SLJIT_32) ^ 0x1) - 0x10);38823883size = compiler->size;3884FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src1, src1w));38853886inst[1] = U8(compiler->size - size);3887return SLJIT_SUCCESS;3888}38893890SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,3891sljit_s32 vreg,3892sljit_s32 srcdst, sljit_sw srcdstw)3893{3894sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);3895sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);3896sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);3897sljit_uw op;38983899CHECK_ERROR();3900CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));39013902ADJUST_LOCAL_OFFSET(srcdst, srcdstw);39033904#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3905compiler->mode32 = 1;3906#endif /* SLJIT_CONFIG_X86_64 */39073908switch (reg_size) {3909case 4:3910op = EX86_SSE2;3911break;3912case 5:3913if (!(cpu_feature_list & CPU_FEATURE_AVX2))3914return SLJIT_ERR_UNSUPPORTED;3915op = EX86_SSE2 | VEX_256;3916break;3917default:3918return SLJIT_ERR_UNSUPPORTED;3919}39203921if (!(srcdst & SLJIT_MEM))3922alignment = reg_size;39233924if (type & SLJIT_SIMD_FLOAT) {3925if (elem_size == 2 || elem_size == 3) {3926op |= alignment >= reg_size ? MOVAPS_x_xm : MOVUPS_x_xm;39273928if (elem_size == 3)3929op |= EX86_PREF_66;39303931if (type & SLJIT_SIMD_STORE)3932op += 1;3933} else3934return SLJIT_ERR_UNSUPPORTED;3935} else {3936op |= ((type & SLJIT_SIMD_STORE) ? MOVDQA_xm_x : MOVDQA_x_xm)3937| (alignment >= reg_size ? EX86_PREF_66 : EX86_PREF_F3);3938}39393940if (type & SLJIT_SIMD_TEST)3941return SLJIT_SUCCESS;39423943if ((op & VEX_256) || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX)))3944return emit_vex_instruction(compiler, op, vreg, 0, srcdst, srcdstw);39453946return emit_groupf(compiler, op, vreg, srcdst, srcdstw);3947}39483949SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,3950sljit_s32 vreg,3951sljit_s32 src, sljit_sw srcw)3952{3953sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);3954sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);3955sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);3956sljit_u8 *inst;3957sljit_u8 opcode = 0;3958sljit_uw op;39593960CHECK_ERROR();3961CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));39623963ADJUST_LOCAL_OFFSET(src, srcw);39643965if (!(type & SLJIT_SIMD_FLOAT)) {3966CHECK_EXTRA_REGS(src, srcw, (void)0);3967}39683969#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)3970if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))3971return SLJIT_ERR_UNSUPPORTED;3972#else /* !SLJIT_CONFIG_X86_32 */3973compiler->mode32 = 1;39743975if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))3976return SLJIT_ERR_UNSUPPORTED;3977#endif /* SLJIT_CONFIG_X86_32 */39783979if (reg_size != 4 && (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2)))3980return SLJIT_ERR_UNSUPPORTED;39813982if (type & SLJIT_SIMD_TEST)3983return SLJIT_SUCCESS;39843985if (reg_size == 5)3986use_vex = 1;39873988if (use_vex && src != SLJIT_IMM) {3989op = 0;39903991switch (elem_size) {3992case 0:3993if (cpu_feature_list & CPU_FEATURE_AVX2)3994op = VPBROADCASTB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;3995break;3996case 1:3997if (cpu_feature_list & CPU_FEATURE_AVX2)3998op = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;3999break;4000case 2:4001if (type & SLJIT_SIMD_FLOAT) {4002if ((cpu_feature_list & CPU_FEATURE_AVX2) || ((cpu_feature_list & CPU_FEATURE_AVX) && (src & SLJIT_MEM)))4003op = VBROADCASTSS_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;4004} else if (cpu_feature_list & CPU_FEATURE_AVX2)4005op = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;4006break;4007default:4008#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4009if (!(type & SLJIT_SIMD_FLOAT)) {4010if (cpu_feature_list & CPU_FEATURE_AVX2)4011op = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;4012break;4013}4014#endif /* SLJIT_CONFIG_X86_64 */40154016if (reg_size == 5)4017op = VBROADCASTSD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;4018break;4019}40204021if (op != 0) {4022if (!(src & SLJIT_MEM) && !(type & SLJIT_SIMD_FLOAT)) {4023#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4024if (elem_size >= 3)4025compiler->mode32 = 0;4026#endif /* SLJIT_CONFIG_X86_64 */4027FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw));4028#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4029compiler->mode32 = 1;4030#endif /* SLJIT_CONFIG_X86_64 */4031src = vreg;4032srcw = 0;4033}40344035if (reg_size == 5)4036op |= VEX_256;40374038return emit_vex_instruction(compiler, op, vreg, 0, src, srcw);4039}4040}40414042if (type & SLJIT_SIMD_FLOAT) {4043if (src == SLJIT_IMM) {4044if (use_vex)4045return emit_vex_instruction(compiler, XORPD_x_xm | (reg_size == 5 ? VEX_256 : 0) | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0);40464047return emit_groupf(compiler, XORPD_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, vreg, vreg, 0);4048}40494050SLJIT_ASSERT(reg_size == 4);40514052if (use_vex) {4053if (elem_size == 3)4054return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, src, srcw);40554056SLJIT_ASSERT(!(src & SLJIT_MEM));4057FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, vreg, src, src, 0));4058return emit_byte(compiler, 0);4059}40604061if (elem_size == 2 && vreg != src) {4062FAIL_IF(emit_sse2_load(compiler, 1, vreg, src, srcw));4063src = vreg;4064srcw = 0;4065}40664067op = (elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm) | (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2;4068FAIL_IF(emit_groupf(compiler, op, vreg, src, srcw));40694070if (elem_size == 2)4071return emit_byte(compiler, 0);4072return SLJIT_SUCCESS;4073}40744075if (src == SLJIT_IMM) {4076if (elem_size == 0) {4077srcw = (sljit_u8)srcw;4078srcw |= srcw << 8;4079srcw |= srcw << 16;4080elem_size = 2;4081} else if (elem_size == 1) {4082srcw = (sljit_u16)srcw;4083srcw |= srcw << 16;4084elem_size = 2;4085}40864087#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4088if (elem_size == 2 && (sljit_s32)srcw == -1)4089srcw = -1;4090#endif /* SLJIT_CONFIG_X86_64 */40914092if (srcw == 0 || srcw == -1) {4093if (use_vex)4094return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0);40954096return emit_groupf(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0);4097}40984099#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4100if (elem_size == 3)4101FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));4102else4103#endif /* SLJIT_CONFIG_X86_64 */4104EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);41054106src = TMP_REG1;4107srcw = 0;41084109}41104111op = 2;4112opcode = MOVD_x_rm;41134114switch (elem_size) {4115case 0:4116if (!FAST_IS_REG(src)) {4117opcode = 0x3a /* Prefix of PINSRB_x_rm_i8. */;4118op = 3;4119}4120break;4121case 1:4122if (!FAST_IS_REG(src))4123opcode = PINSRW_x_rm_i8;4124break;4125case 2:4126break;4127#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4128case 3:4129/* MOVQ */4130compiler->mode32 = 0;4131break;4132#endif /* SLJIT_CONFIG_X86_64 */4133}41344135if (use_vex) {4136if (opcode != MOVD_x_rm) {4137op = (opcode == 0x3a) ? (PINSRB_x_rm_i8 | VEX_OP_0F3A) : opcode;4138FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1 | VEX_SSE2_OPV, vreg, vreg, src, srcw));4139} else4140FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw));4141} else {4142inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw);4143FAIL_IF(!inst);4144inst[0] = GROUP_0F;4145inst[1] = opcode;41464147if (op == 3) {4148SLJIT_ASSERT(opcode == 0x3a);4149inst[2] = PINSRB_x_rm_i8;4150}4151}41524153if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && elem_size >= 2) {4154#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)4155op = VPBROADCASTD_x_xm;4156#else /* !SLJIT_CONFIG_X86_32 */4157op = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm;4158#endif /* SLJIT_CONFIG_X86_32 */4159return emit_vex_instruction(compiler, op | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0);4160}41614162SLJIT_ASSERT(reg_size == 4);41634164if (opcode != MOVD_x_rm)4165FAIL_IF(emit_byte(compiler, 0));41664167switch (elem_size) {4168case 0:4169if (use_vex) {4170FAIL_IF(emit_vex_instruction(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0));4171return emit_vex_instruction(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, TMP_FREG, 0);4172}4173FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));4174return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, TMP_FREG, 0);4175case 1:4176if (use_vex)4177FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, vreg, 0));4178else4179FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, vreg, 0));4180FAIL_IF(emit_byte(compiler, 0));4181SLJIT_FALLTHROUGH4182default:4183if (use_vex)4184FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, vreg, 0));4185else4186FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0));4187return emit_byte(compiler, 0);4188#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4189case 3:4190compiler->mode32 = 1;4191if (use_vex)4192FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, vreg, 0));4193else4194FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0));4195return emit_byte(compiler, 0x44);4196#endif /* SLJIT_CONFIG_X86_64 */4197}4198}41994200SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,4201sljit_s32 vreg, sljit_s32 lane_index,4202sljit_s32 srcdst, sljit_sw srcdstw)4203{4204sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4205sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4206sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);4207sljit_u8 *inst;4208sljit_u8 opcode = 0;4209sljit_uw op;4210sljit_s32 vreg_orig = vreg;4211#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)4212sljit_s32 srcdst_is_ereg = 0;4213sljit_s32 srcdst_orig = 0;4214sljit_sw srcdstw_orig = 0;4215#endif /* SLJIT_CONFIG_X86_32 */42164217CHECK_ERROR();4218CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));42194220ADJUST_LOCAL_OFFSET(srcdst, srcdstw);42214222if (reg_size == 5) {4223if (!(cpu_feature_list & CPU_FEATURE_AVX2))4224return SLJIT_ERR_UNSUPPORTED;4225use_vex = 1;4226} else if (reg_size != 4)4227return SLJIT_ERR_UNSUPPORTED;42284229#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)4230if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2)4231return SLJIT_ERR_UNSUPPORTED;4232#else /* SLJIT_CONFIG_X86_32 */4233if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))4234return SLJIT_ERR_UNSUPPORTED;4235#endif /* SLJIT_CONFIG_X86_32 */42364237if (type & SLJIT_SIMD_TEST)4238return SLJIT_SUCCESS;42394240#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4241compiler->mode32 = 1;4242#else /* !SLJIT_CONFIG_X86_64 */4243if (!(type & SLJIT_SIMD_FLOAT)) {4244CHECK_EXTRA_REGS(srcdst, srcdstw, srcdst_is_ereg = 1);42454246if ((type & SLJIT_SIMD_STORE) && ((srcdst_is_ereg && elem_size < 2) || (elem_size == 0 && (type & SLJIT_SIMD_LANE_SIGNED) && FAST_IS_REG(srcdst) && reg_map[srcdst] >= 4))) {4247srcdst_orig = srcdst;4248srcdstw_orig = srcdstw;4249srcdst = TMP_REG1;4250srcdstw = 0;4251}4252}4253#endif /* SLJIT_CONFIG_X86_64 */42544255if (type & SLJIT_SIMD_LANE_ZERO) {4256if (lane_index == 0) {4257if (!(type & SLJIT_SIMD_FLOAT)) {4258#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4259if (elem_size == 3) {4260compiler->mode32 = 0;4261elem_size = 2;4262}4263#endif /* SLJIT_CONFIG_X86_64 */4264if (srcdst == SLJIT_IMM) {4265if (elem_size == 0)4266srcdstw = (sljit_u8)srcdstw;4267else if (elem_size == 1)4268srcdstw = (sljit_u16)srcdstw;42694270EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);4271srcdst = TMP_REG1;4272srcdstw = 0;4273elem_size = 2;4274}42754276if (elem_size == 2) {4277if (use_vex)4278return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, srcdst, srcdstw);4279return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, vreg, srcdst, srcdstw);4280}4281} else if (srcdst & SLJIT_MEM) {4282SLJIT_ASSERT(elem_size == 2 || elem_size == 3);42834284if (use_vex)4285return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, vreg, 0, srcdst, srcdstw);4286return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, vreg, srcdst, srcdstw);4287} else if (elem_size == 3) {4288if (use_vex)4289return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, vreg, 0, srcdst, 0);4290return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, vreg, srcdst, 0);4291} else if (use_vex) {4292FAIL_IF(emit_vex_instruction(compiler, XORPD_x_xm | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0));4293return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F3 | EX86_SSE2 | VEX_SSE2_OPV, vreg, TMP_FREG, srcdst, 0);4294}4295}42964297if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {4298vreg = TMP_FREG;4299lane_index -= (1 << (4 - elem_size));4300} else if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) {4301if (use_vex)4302FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, srcdst, srcdstw));4303else4304FAIL_IF(emit_sse2_load(compiler, elem_size == 2, TMP_FREG, srcdst, srcdstw));4305srcdst = TMP_FREG;4306srcdstw = 0;4307}43084309op = ((!(type & SLJIT_SIMD_FLOAT) || elem_size != 2) ? EX86_PREF_66 : 0)4310| ((type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm) | EX86_SSE2;43114312if (use_vex)4313FAIL_IF(emit_vex_instruction(compiler, op | (reg_size == 5 ? VEX_256 : 0) | VEX_SSE2_OPV, vreg, vreg, vreg, 0));4314else4315FAIL_IF(emit_groupf(compiler, op, vreg, vreg, 0));4316} else if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {4317FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, TMP_FREG, 0));4318FAIL_IF(emit_byte(compiler, 1));43194320vreg = TMP_FREG;4321lane_index -= (1 << (4 - elem_size));4322}43234324if (type & SLJIT_SIMD_FLOAT) {4325if (elem_size == 3) {4326if (srcdst & SLJIT_MEM) {4327if (type & SLJIT_SIMD_STORE)4328op = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x;4329else4330op = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m;43314332/* VEX prefix clears upper bits of the target register. */4333if (use_vex && ((type & SLJIT_SIMD_STORE) || reg_size == 4 || vreg == TMP_FREG))4334FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE24335| ((type & SLJIT_SIMD_STORE) ? 0 : VEX_SSE2_OPV), vreg, (type & SLJIT_SIMD_STORE) ? 0 : vreg, srcdst, srcdstw));4336else4337FAIL_IF(emit_groupf(compiler, op | EX86_PREF_66 | EX86_SSE2, vreg, srcdst, srcdstw));43384339/* In case of store, vreg is not TMP_FREG. */4340} else if (type & SLJIT_SIMD_STORE) {4341if (lane_index == 1) {4342if (use_vex)4343return emit_vex_instruction(compiler, MOVHLPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, vreg, 0);4344return emit_groupf(compiler, MOVHLPS_x_x | EX86_SSE2, srcdst, vreg, 0);4345}4346if (use_vex)4347return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, vreg, 0);4348return emit_sse2_load(compiler, 0, srcdst, vreg, 0);4349} else if (use_vex && (reg_size == 4 || vreg == TMP_FREG)) {4350if (lane_index == 1)4351FAIL_IF(emit_vex_instruction(compiler, MOVLHPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, srcdst, 0));4352else4353FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, srcdst, 0));4354} else {4355if (lane_index == 1)4356FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x | EX86_SSE2, vreg, srcdst, 0));4357else4358FAIL_IF(emit_sse2_load(compiler, 0, vreg, srcdst, 0));4359}4360} else if (type & SLJIT_SIMD_STORE) {4361if (lane_index == 0) {4362if (use_vex)4363return emit_vex_instruction(compiler, MOVSD_xm_x | EX86_PREF_F3 | EX86_SSE2 | ((srcdst & SLJIT_MEM) ? 0 : VEX_SSE2_OPV),4364vreg, ((srcdst & SLJIT_MEM) ? 0 : srcdst), srcdst, srcdstw);4365return emit_sse2_store(compiler, 1, srcdst, srcdstw, vreg);4366}43674368if (srcdst & SLJIT_MEM) {4369if (use_vex)4370FAIL_IF(emit_vex_instruction(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, srcdst, srcdstw));4371else4372FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, srcdst, srcdstw));4373return emit_byte(compiler, U8(lane_index));4374}43754376if (use_vex) {4377FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, vreg, vreg, 0));4378return emit_byte(compiler, U8(lane_index));4379}43804381if (srcdst == vreg)4382op = SHUFPS_x_xm | EX86_SSE2;4383else {4384switch (lane_index) {4385case 1:4386op = MOVSHDUP_x_xm | EX86_PREF_F3 | EX86_SSE2;4387break;4388case 2:4389op = MOVHLPS_x_x | EX86_SSE2;4390break;4391default:4392SLJIT_ASSERT(lane_index == 3);4393op = PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2;4394break;4395}4396}43974398FAIL_IF(emit_groupf(compiler, op, srcdst, vreg, 0));43994400op &= 0xff;4401if (op == SHUFPS_x_xm || op == PSHUFD_x_xm)4402return emit_byte(compiler, U8(lane_index));44034404return SLJIT_SUCCESS;4405} else {4406if (lane_index != 0 || (srcdst & SLJIT_MEM)) {4407FAIL_IF(emit_groupf_ext(compiler, INSERTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, srcdst, srcdstw));4408FAIL_IF(emit_byte(compiler, U8(lane_index << 4)));4409} else4410FAIL_IF(emit_sse2_store(compiler, 1, vreg, 0, srcdst));4411}44124413if (vreg != TMP_FREG || (type & SLJIT_SIMD_STORE))4414return SLJIT_SUCCESS;44154416SLJIT_ASSERT(reg_size == 5);44174418if (type & SLJIT_SIMD_LANE_ZERO) {4419FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg_orig, 0, TMP_FREG, 0));4420return emit_byte(compiler, 0x4e);4421}44224423FAIL_IF(emit_vex_instruction(compiler, VINSERTF128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, vreg_orig, vreg_orig, TMP_FREG, 0));4424return emit_byte(compiler, 1);4425}44264427if (srcdst == SLJIT_IMM) {4428EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);4429srcdst = TMP_REG1;4430srcdstw = 0;4431}44324433op = 3;44344435switch (elem_size) {4436case 0:4437opcode = (type & SLJIT_SIMD_STORE) ? PEXTRB_rm_x_i8 : PINSRB_x_rm_i8;4438break;4439case 1:4440if (!(type & SLJIT_SIMD_STORE)) {4441op = 2;4442opcode = PINSRW_x_rm_i8;4443} else4444opcode = PEXTRW_rm_x_i8;4445break;4446case 2:4447opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;4448break;4449#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4450case 3:4451/* PINSRQ / PEXTRQ */4452opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;4453compiler->mode32 = 0;4454break;4455#endif /* SLJIT_CONFIG_X86_64 */4456}44574458if (use_vex && (type & SLJIT_SIMD_STORE)) {4459op = opcode | ((op == 3) ? VEX_OP_0F3A : 0);4460FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | VEX_AUTO_W | EX86_SSE2_OP1 | VEX_SSE2_OPV, vreg, 0, srcdst, srcdstw));4461} else {4462inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, srcdst, srcdstw);4463FAIL_IF(!inst);4464inst[0] = GROUP_0F;44654466if (op == 3) {4467inst[1] = 0x3a;4468inst[2] = opcode;4469} else4470inst[1] = opcode;4471}44724473FAIL_IF(emit_byte(compiler, U8(lane_index)));44744475if (!(type & SLJIT_SIMD_LANE_SIGNED) || (srcdst & SLJIT_MEM)) {4476if (vreg == TMP_FREG && !(type & SLJIT_SIMD_STORE)) {4477SLJIT_ASSERT(reg_size == 5);44784479if (type & SLJIT_SIMD_LANE_ZERO) {4480FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg_orig, 0, TMP_FREG, 0));4481return emit_byte(compiler, 0x4e);4482}44834484FAIL_IF(emit_vex_instruction(compiler, VINSERTI128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, vreg_orig, vreg_orig, TMP_FREG, 0));4485return emit_byte(compiler, 1);4486}44874488#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)4489if (srcdst_orig & SLJIT_MEM)4490return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);4491#endif /* SLJIT_CONFIG_X86_32 */4492return SLJIT_SUCCESS;4493}44944495#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4496if (elem_size >= 3)4497return SLJIT_SUCCESS;44984499compiler->mode32 = (type & SLJIT_32);45004501op = 2;45024503if (elem_size == 0)4504op |= EX86_REX;45054506if (elem_size == 2) {4507if (type & SLJIT_32)4508return SLJIT_SUCCESS;45094510SLJIT_ASSERT(!(compiler->mode32));4511op = 1;4512}45134514inst = emit_x86_instruction(compiler, op, srcdst, 0, srcdst, 0);4515FAIL_IF(!inst);45164517if (op != 1) {4518inst[0] = GROUP_0F;4519inst[1] = U8((elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16);4520} else4521inst[0] = MOVSXD_r_rm;4522#else /* !SLJIT_CONFIG_X86_64 */4523if (elem_size >= 2)4524return SLJIT_SUCCESS;45254526FAIL_IF(emit_groupf(compiler, (elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16,4527(srcdst_orig != 0 && FAST_IS_REG(srcdst_orig)) ? srcdst_orig : srcdst, srcdst, 0));45284529if (srcdst_orig & SLJIT_MEM)4530return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);4531#endif /* SLJIT_CONFIG_X86_64 */4532return SLJIT_SUCCESS;4533}45344535SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,4536sljit_s32 vreg,4537sljit_s32 src, sljit_s32 src_lane_index)4538{4539sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4540sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4541sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);4542sljit_uw pref;4543sljit_u8 byte;4544#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)4545sljit_s32 opcode3 = TMP_REG1;4546#else /* !SLJIT_CONFIG_X86_32 */4547sljit_s32 opcode3 = SLJIT_S0;4548#endif /* SLJIT_CONFIG_X86_32 */45494550CHECK_ERROR();4551CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));45524553#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4554compiler->mode32 = 1;4555#endif /* SLJIT_CONFIG_X86_64 */4556SLJIT_ASSERT(reg_map[opcode3] == 3);45574558if (reg_size == 5) {4559if (!(cpu_feature_list & CPU_FEATURE_AVX2))4560return SLJIT_ERR_UNSUPPORTED;4561use_vex = 1;4562} else if (reg_size != 4)4563return SLJIT_ERR_UNSUPPORTED;45644565if (type & SLJIT_SIMD_FLOAT) {4566pref = 0;4567byte = U8(src_lane_index);45684569if (elem_size == 3) {4570if (type & SLJIT_SIMD_TEST)4571return SLJIT_SUCCESS;45724573if (reg_size == 5) {4574if (src_lane_index == 0)4575return emit_vex_instruction(compiler, VBROADCASTSD_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0);45764577FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));45784579byte = U8(byte | (byte << 2));4580return emit_byte(compiler, U8(byte | (byte << 4)));4581}45824583if (src_lane_index == 0) {4584if (use_vex)4585return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, src, 0);4586return emit_groupf(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, src, 0);4587}45884589/* Changes it to SHUFPD_x_xm. */4590pref = EX86_PREF_66;4591} else if (elem_size != 2)4592return SLJIT_ERR_UNSUPPORTED;4593else if (type & SLJIT_SIMD_TEST)4594return SLJIT_SUCCESS;45954596if (reg_size == 5) {4597SLJIT_ASSERT(elem_size == 2);45984599if (src_lane_index == 0)4600return emit_vex_instruction(compiler, VBROADCASTSS_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0);46014602FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));46034604byte = 0x44;4605if (src_lane_index >= 4) {4606byte = 0xee;4607src_lane_index -= 4;4608}46094610FAIL_IF(emit_byte(compiler, byte));4611FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0));4612byte = U8(src_lane_index);4613} else if (use_vex) {4614FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, vreg, src, src, 0));4615} else {4616if (vreg != src)4617FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | pref | EX86_SSE2, vreg, src, 0));46184619FAIL_IF(emit_groupf(compiler, SHUFPS_x_xm | pref | EX86_SSE2, vreg, vreg, 0));4620}46214622if (elem_size == 2) {4623byte = U8(byte | (byte << 2));4624byte = U8(byte | (byte << 4));4625} else4626byte = U8(byte | (byte << 1));46274628return emit_byte(compiler, U8(byte));4629}46304631if (type & SLJIT_SIMD_TEST)4632return SLJIT_SUCCESS;46334634if (elem_size == 0) {4635if (reg_size == 5 && src_lane_index >= 16) {4636FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));4637FAIL_IF(emit_byte(compiler, src_lane_index >= 24 ? 0xff : 0xaa));4638src_lane_index &= 0x7;4639src = vreg;4640}46414642if (src_lane_index != 0 || (vreg != src && (!(cpu_feature_list & CPU_FEATURE_AVX2) || !use_vex))) {4643pref = 0;46444645if ((src_lane_index & 0x3) == 0) {4646pref = EX86_PREF_66;4647byte = U8(src_lane_index >> 2);4648} else if (src_lane_index < 8 && (src_lane_index & 0x1) == 0) {4649pref = EX86_PREF_F2;4650byte = U8(src_lane_index >> 1);4651} else {4652if (!use_vex) {4653if (vreg != src)4654FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, src, 0));46554656FAIL_IF(emit_groupf(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2, opcode3, vreg, 0));4657} else4658FAIL_IF(emit_vex_instruction(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2 | VEX_SSE2_OPV, opcode3, vreg, src, 0));46594660FAIL_IF(emit_byte(compiler, U8(src_lane_index)));4661}46624663if (pref != 0) {4664if (use_vex)4665FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, 0, src, 0));4666else4667FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, src, 0));4668FAIL_IF(emit_byte(compiler, byte));4669}46704671src = vreg;4672}46734674if (use_vex && (cpu_feature_list & CPU_FEATURE_AVX2))4675return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0);46764677SLJIT_ASSERT(reg_size == 4);4678FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));4679return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, TMP_FREG, 0);4680}46814682if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && src_lane_index == 0 && elem_size <= 3) {4683switch (elem_size) {4684case 1:4685pref = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;4686break;4687case 2:4688pref = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;4689break;4690default:4691pref = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;4692break;4693}46944695if (reg_size == 5)4696pref |= VEX_256;46974698return emit_vex_instruction(compiler, pref, vreg, 0, src, 0);4699}47004701if (reg_size == 5) {4702switch (elem_size) {4703case 1:4704byte = U8(src_lane_index & 0x3);4705src_lane_index >>= 2;4706pref = PSHUFLW_x_xm | VEX_256 | ((src_lane_index & 1) == 0 ? EX86_PREF_F2 : EX86_PREF_F3) | EX86_SSE2;4707break;4708case 2:4709byte = U8(src_lane_index & 0x3);4710src_lane_index >>= 1;4711pref = PSHUFD_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2;4712break;4713case 3:4714pref = 0;4715break;4716default:4717FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));4718return emit_byte(compiler, U8(src_lane_index == 0 ? 0x44 : 0xee));4719}47204721if (pref != 0) {4722FAIL_IF(emit_vex_instruction(compiler, pref, vreg, 0, src, 0));4723byte = U8(byte | (byte << 2));4724FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));47254726if (src_lane_index == 0)4727return emit_vex_instruction(compiler, VPBROADCASTQ_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0);47284729src = vreg;4730}47314732FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));4733byte = U8(src_lane_index);4734byte = U8(byte | (byte << 2));4735return emit_byte(compiler, U8(byte | (byte << 4)));4736}47374738switch (elem_size) {4739case 1:4740byte = U8(src_lane_index & 0x3);4741src_lane_index >>= 1;4742pref = (src_lane_index & 2) == 0 ? EX86_PREF_F2 : EX86_PREF_F3;47434744if (use_vex)4745FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, 0, src, 0));4746else4747FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, src, 0));4748byte = U8(byte | (byte << 2));4749FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));47504751if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && pref == EX86_PREF_F2)4752return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0);47534754src = vreg;4755SLJIT_FALLTHROUGH4756case 2:4757byte = U8(src_lane_index);4758byte = U8(byte | (byte << 2));4759break;4760default:4761byte = U8(src_lane_index << 1);4762byte = U8(byte | (byte << 2) | 0x4);4763break;4764}47654766if (use_vex)4767FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, src, 0));4768else4769FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, src, 0));4770return emit_byte(compiler, U8(byte | (byte << 4)));4771}47724773SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,4774sljit_s32 vreg,4775sljit_s32 src, sljit_sw srcw)4776{4777sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4778sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4779sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);4780sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);4781sljit_u8 opcode;47824783CHECK_ERROR();4784CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));47854786ADJUST_LOCAL_OFFSET(src, srcw);47874788#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4789compiler->mode32 = 1;4790#endif /* SLJIT_CONFIG_X86_64 */47914792if (reg_size == 5) {4793if (!(cpu_feature_list & CPU_FEATURE_AVX2))4794return SLJIT_ERR_UNSUPPORTED;4795use_vex = 1;4796} else if (reg_size != 4)4797return SLJIT_ERR_UNSUPPORTED;47984799if (type & SLJIT_SIMD_FLOAT) {4800if (elem_size != 2 || elem2_size != 3)4801return SLJIT_ERR_UNSUPPORTED;48024803if (type & SLJIT_SIMD_TEST)4804return SLJIT_SUCCESS;48054806if (use_vex)4807return emit_vex_instruction(compiler, CVTPS2PD_x_xm | ((reg_size == 5) ? VEX_256 : 0) | EX86_SSE2, vreg, 0, src, srcw);4808return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, vreg, src, srcw);4809}48104811switch (elem_size) {4812case 0:4813if (elem2_size == 1)4814opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBW_x_xm : PMOVZXBW_x_xm;4815else if (elem2_size == 2)4816opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBD_x_xm : PMOVZXBD_x_xm;4817else if (elem2_size == 3)4818opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBQ_x_xm : PMOVZXBQ_x_xm;4819else4820return SLJIT_ERR_UNSUPPORTED;4821break;4822case 1:4823if (elem2_size == 2)4824opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWD_x_xm : PMOVZXWD_x_xm;4825else if (elem2_size == 3)4826opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWQ_x_xm : PMOVZXWQ_x_xm;4827else4828return SLJIT_ERR_UNSUPPORTED;4829break;4830case 2:4831if (elem2_size == 3)4832opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXDQ_x_xm : PMOVZXDQ_x_xm;4833else4834return SLJIT_ERR_UNSUPPORTED;4835break;4836default:4837return SLJIT_ERR_UNSUPPORTED;4838}48394840if (type & SLJIT_SIMD_TEST)4841return SLJIT_SUCCESS;48424843if (use_vex)4844return emit_vex_instruction(compiler, opcode | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, srcw);4845return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, src, srcw);4846}48474848SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,4849sljit_s32 vreg,4850sljit_s32 dst, sljit_sw dstw)4851{4852sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4853sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4854sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);4855sljit_s32 dst_r;4856sljit_uw op;4857sljit_u8 *inst;48584859CHECK_ERROR();4860CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));48614862ADJUST_LOCAL_OFFSET(dst, dstw);48634864CHECK_EXTRA_REGS(dst, dstw, (void)0);4865#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4866compiler->mode32 = 1;4867#endif /* SLJIT_CONFIG_X86_64 */48684869if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))4870return SLJIT_ERR_UNSUPPORTED;48714872if (reg_size == 4) {4873if (type & SLJIT_SIMD_TEST)4874return SLJIT_SUCCESS;48754876op = EX86_PREF_66 | EX86_SSE2_OP2;48774878switch (elem_size) {4879case 1:4880if (use_vex)4881FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, vreg, vreg, 0));4882else4883FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, vreg, 0));4884vreg = TMP_FREG;4885break;4886case 2:4887op = EX86_SSE2_OP2;4888break;4889}48904891dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;4892op |= (elem_size < 2) ? PMOVMSKB_r_x : MOVMSKPS_r_x;48934894if (use_vex)4895FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, vreg, 0));4896else4897FAIL_IF(emit_groupf(compiler, op, dst_r, vreg, 0));48984899#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4900compiler->mode32 = type & SLJIT_32;4901#endif /* SLJIT_CONFIG_X86_64 */49024903if (elem_size == 1) {4904inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 8, dst_r, 0);4905FAIL_IF(!inst);4906inst[1] |= SHR;4907}49084909if (dst_r == TMP_REG1)4910return emit_mov(compiler, dst, dstw, TMP_REG1, 0);49114912return SLJIT_SUCCESS;4913}49144915if (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2))4916return SLJIT_ERR_UNSUPPORTED;49174918if (type & SLJIT_SIMD_TEST)4919return SLJIT_SUCCESS;49204921dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;49224923if (elem_size == 1) {4924FAIL_IF(emit_vex_instruction(compiler, VEXTRACTI128_x_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, TMP_FREG, 0));4925FAIL_IF(emit_byte(compiler, 1));4926FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, vreg, TMP_FREG, 0));4927FAIL_IF(emit_groupf(compiler, PMOVMSKB_r_x | EX86_PREF_66 | EX86_SSE2_OP2, dst_r, TMP_FREG, 0));4928} else {4929op = MOVMSKPS_r_x | VEX_256 | EX86_SSE2_OP2;49304931if (elem_size == 0)4932op = PMOVMSKB_r_x | VEX_256 | EX86_PREF_66 | EX86_SSE2_OP2;4933else if (elem_size == 3)4934op |= EX86_PREF_66;49354936FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, vreg, 0));4937}49384939if (dst_r == TMP_REG1) {4940#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4941compiler->mode32 = type & SLJIT_32;4942#endif /* SLJIT_CONFIG_X86_64 */4943return emit_mov(compiler, dst, dstw, TMP_REG1, 0);4944}49454946return SLJIT_SUCCESS;4947}49484949static sljit_s32 emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,4950sljit_s32 dst_vreg, sljit_s32 src_vreg)4951{4952sljit_uw op = ((type & SLJIT_SIMD_FLOAT) ? MOVAPS_x_xm : MOVDQA_x_xm) | EX86_SSE2;49534954SLJIT_ASSERT(SLJIT_SIMD_GET_REG_SIZE(type) == 4);49554956if (!(type & SLJIT_SIMD_FLOAT) || SLJIT_SIMD_GET_ELEM_SIZE(type) == 3)4957op |= EX86_PREF_66;49584959return emit_groupf(compiler, op, dst_vreg, src_vreg, 0);4960}49614962SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,4963sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)4964{4965sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4966sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4967sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);4968sljit_uw op = 0;4969sljit_uw mov_op = 0;49704971CHECK_ERROR();4972CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));4973ADJUST_LOCAL_OFFSET(src2, src2w);49744975#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4976compiler->mode32 = 1;4977#endif /* SLJIT_CONFIG_X86_64 */49784979if (reg_size == 5) {4980if (!(cpu_feature_list & CPU_FEATURE_AVX2))4981return SLJIT_ERR_UNSUPPORTED;4982} else if (reg_size != 4)4983return SLJIT_ERR_UNSUPPORTED;49844985if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))4986return SLJIT_ERR_UNSUPPORTED;49874988switch (SLJIT_SIMD_GET_OPCODE(type)) {4989case SLJIT_SIMD_OP2_AND:4990op = (type & SLJIT_SIMD_FLOAT) ? ANDPD_x_xm : PAND_x_xm;49914992if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)4993op |= EX86_PREF_66;4994break;4995case SLJIT_SIMD_OP2_OR:4996op = (type & SLJIT_SIMD_FLOAT) ? ORPD_x_xm : POR_x_xm;49974998if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)4999op |= EX86_PREF_66;5000break;5001case SLJIT_SIMD_OP2_XOR:5002op = (type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm;50035004if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)5005op |= EX86_PREF_66;5006break;50075008case SLJIT_SIMD_OP2_SHUFFLE:5009if (reg_size != 4)5010return SLJIT_ERR_UNSUPPORTED;50115012op = PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38;5013break;5014}50155016if (type & SLJIT_SIMD_TEST)5017return SLJIT_SUCCESS;50185019if ((src2 & SLJIT_MEM) && SLJIT_SIMD_GET_ELEM2_SIZE(type) < reg_size) {5020mov_op = ((type & SLJIT_SIMD_FLOAT) ? (MOVUPS_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0)) : (MOVDQU_x_xm | EX86_PREF_F3)) | EX86_SSE2;5021if (use_vex)5022FAIL_IF(emit_vex_instruction(compiler, mov_op, TMP_FREG, 0, src2, src2w));5023else5024FAIL_IF(emit_groupf(compiler, mov_op, TMP_FREG, src2, src2w));50255026src2 = TMP_FREG;5027src2w = 0;5028}50295030if (reg_size == 5 || use_vex) {5031if (reg_size == 5)5032op |= VEX_256;50335034return emit_vex_instruction(compiler, op | EX86_SSE2 | VEX_SSE2_OPV, dst_vreg, src1_vreg, src2, src2w);5035}50365037if (dst_vreg != src1_vreg) {5038if (dst_vreg == src2) {5039if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE) {5040FAIL_IF(emit_simd_mov(compiler, type, TMP_FREG, src2));5041FAIL_IF(emit_simd_mov(compiler, type, dst_vreg, src1_vreg));5042src2 = TMP_FREG;5043src2w = 0;5044} else5045src2 = src1_vreg;5046} else5047FAIL_IF(emit_simd_mov(compiler, type, dst_vreg, src1_vreg));5048}50495050if (op & (VEX_OP_0F38 | VEX_OP_0F3A))5051return emit_groupf_ext(compiler, op | EX86_SSE2, dst_vreg, src2, src2w);5052return emit_groupf(compiler, op | EX86_SSE2, dst_vreg, src2, src2w);5053}50545055SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,5056sljit_s32 dst_reg,5057sljit_s32 mem_reg)5058{5059CHECK_ERROR();5060CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));50615062if ((op & SLJIT_ATOMIC_USE_LS) || GET_OPCODE(op) == SLJIT_MOV_S8 || GET_OPCODE(op) == SLJIT_MOV_S16 || GET_OPCODE(op) == SLJIT_MOV_S32)5063return SLJIT_ERR_UNSUPPORTED;50645065if (op & SLJIT_ATOMIC_TEST)5066return SLJIT_SUCCESS;50675068SLJIT_SKIP_CHECKS(compiler);5069return sljit_emit_op1(compiler, op & ~SLJIT_ATOMIC_USE_CAS, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);5070}50715072SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,5073sljit_s32 src_reg,5074sljit_s32 mem_reg,5075sljit_s32 temp_reg)5076{5077sljit_uw pref;5078#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)5079sljit_s32 saved_reg = TMP_REG1;5080sljit_s32 swap_tmp = 0;5081sljit_sw srcw = 0;5082sljit_sw tempw = 0;5083#endif /* SLJIT_CONFIG_X86_32 */50845085CHECK_ERROR();5086CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));5087CHECK_EXTRA_REGS(src_reg, srcw, (void)0);5088CHECK_EXTRA_REGS(temp_reg, tempw, (void)0);50895090SLJIT_ASSERT(FAST_IS_REG(src_reg) || src_reg == SLJIT_MEM1(SLJIT_SP));5091SLJIT_ASSERT(FAST_IS_REG(temp_reg) || temp_reg == SLJIT_MEM1(SLJIT_SP));50925093if ((op & SLJIT_ATOMIC_USE_LS) || GET_OPCODE(op) == SLJIT_MOV_S8 || GET_OPCODE(op) == SLJIT_MOV_S16 || GET_OPCODE(op) == SLJIT_MOV_S32)5094return SLJIT_ERR_UNSUPPORTED;50955096if (op & SLJIT_ATOMIC_TEST)5097return SLJIT_SUCCESS;50985099op = GET_OPCODE(op);51005101#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)5102if (temp_reg == SLJIT_TMP_DEST_REG) {5103FAIL_IF(emit_byte(compiler, XCHG_EAX_r | reg_map[TMP_REG1]));51045105if (src_reg == SLJIT_R0)5106src_reg = TMP_REG1;5107if (mem_reg == SLJIT_R0)5108mem_reg = TMP_REG1;51095110temp_reg = SLJIT_R0;5111swap_tmp = 1;5112}51135114/* Src is virtual register or its low byte is not accessible. */5115if ((src_reg & SLJIT_MEM) || (op == SLJIT_MOV_U8 && reg_map[src_reg] >= 4)) {5116SLJIT_ASSERT(src_reg != SLJIT_R1 && temp_reg != SLJIT_TMP_DEST_REG);51175118if (swap_tmp) {5119saved_reg = (mem_reg != SLJIT_R1) ? SLJIT_R1 : SLJIT_R2;51205121EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, saved_reg, 0);5122EMIT_MOV(compiler, saved_reg, 0, src_reg, srcw);5123} else5124EMIT_MOV(compiler, TMP_REG1, 0, src_reg, srcw);51255126src_reg = saved_reg;51275128if (mem_reg == src_reg)5129mem_reg = saved_reg;5130}5131#endif /* SLJIT_CONFIG_X86_32 */51325133if (temp_reg != SLJIT_R0) {5134#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5135compiler->mode32 = 0;51365137EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_R0, 0);5138EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, 0);51395140if (src_reg == SLJIT_R0)5141src_reg = TMP_REG2;5142if (mem_reg == SLJIT_R0)5143mem_reg = TMP_REG2;5144#else /* !SLJIT_CONFIG_X86_64 */5145SLJIT_ASSERT(!swap_tmp);51465147if (src_reg == TMP_REG1) {5148if (mem_reg == SLJIT_R0) {5149EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R1, 0);5150EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_R0, 0);5151EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);51525153mem_reg = SLJIT_R1;5154saved_reg = SLJIT_R1;5155} else {5156EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);5157EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);5158saved_reg = SLJIT_R0;5159}5160} else {5161EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R0, 0);5162EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);51635164if (src_reg == SLJIT_R0)5165src_reg = TMP_REG1;5166if (mem_reg == SLJIT_R0)5167mem_reg = TMP_REG1;5168}5169#endif /* SLJIT_CONFIG_X86_64 */5170}51715172#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5173compiler->mode32 = op != SLJIT_MOV && op != SLJIT_MOV_P;5174#endif /* SLJIT_CONFIG_X86_64 */51755176/* Lock prefix. */5177FAIL_IF(emit_byte(compiler, GROUP_LOCK));51785179pref = 0;5180if (op == SLJIT_MOV_U16)5181pref = EX86_HALF_ARG | EX86_PREF_66;5182#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5183if (op == SLJIT_MOV_U8)5184pref = EX86_REX;5185#endif /* SLJIT_CONFIG_X86_64 */51865187FAIL_IF(emit_groupf(compiler, (op == SLJIT_MOV_U8 ? CMPXCHG_rm8_r : CMPXCHG_rm_r) | pref, src_reg, SLJIT_MEM1(mem_reg), 0));51885189#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)5190if (swap_tmp) {5191SLJIT_ASSERT(temp_reg == SLJIT_R0);5192FAIL_IF(emit_byte(compiler, XCHG_EAX_r | reg_map[TMP_REG1]));51935194if (saved_reg != TMP_REG1)5195return emit_mov(compiler, saved_reg, 0, SLJIT_MEM1(SLJIT_SP), 0);5196return SLJIT_SUCCESS;5197}5198#endif /* SLJIT_CONFIG_X86_32 */51995200if (temp_reg != SLJIT_R0) {5201#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5202compiler->mode32 = 0;5203return emit_mov(compiler, SLJIT_R0, 0, TMP_REG2, 0);5204#else /* !SLJIT_CONFIG_X86_64 */5205EMIT_MOV(compiler, SLJIT_R0, 0, (saved_reg == SLJIT_R0) ? SLJIT_MEM1(SLJIT_SP) : saved_reg, 0);5206if (saved_reg == SLJIT_R1)5207return emit_mov(compiler, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_SP), 0);5208#endif /* SLJIT_CONFIG_X86_64 */5209}5210return SLJIT_SUCCESS;5211}52125213SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)5214{5215CHECK_ERROR();5216CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));5217ADJUST_LOCAL_OFFSET(dst, dstw);5218ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);52195220CHECK_EXTRA_REGS(dst, dstw, (void)0);52215222#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5223compiler->mode32 = 0;5224#endif52255226#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5227if (NOT_HALFWORD(offset)) {5228FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));5229#if (defined SLJIT_DEBUG && SLJIT_DEBUG)5230SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);5231return compiler->error;5232#else5233return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);5234#endif5235}5236#endif52375238if (offset != 0)5239return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);5240return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);5241}52425243SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 op,5244sljit_s32 dst, sljit_sw dstw,5245sljit_sw init_value)5246{5247sljit_u8 *inst;5248struct sljit_const *const_;5249sljit_s32 reg;5250#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)5251sljit_s32 dst_is_ereg = 0;5252#endif /* !SLJIT_CONFIG_X86_32 */52535254CHECK_ERROR_PTR();5255CHECK_PTR(check_sljit_emit_const(compiler, op, dst, dstw, init_value));5256ADJUST_LOCAL_OFFSET(dst, dstw);52575258CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);52595260const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));5261PTR_FAIL_IF(!const_);5262set_const(const_, compiler);52635264switch (GET_OPCODE(op)) {5265case SLJIT_MOV_U8:5266#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5267compiler->mode32 = (op & SLJIT_32);5268#endif /* SLJIT_CONFIG_X86_64 */52695270if ((init_value & 0x100) != 0)5271init_value = init_value | -(sljit_sw)0x100;5272else5273init_value = (sljit_u8)init_value;52745275#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)5276if (dst_is_ereg) {5277if (emit_mov(compiler, dst, dstw, SLJIT_IMM, (sljit_s32)init_value))5278return NULL;5279dst = 0;5280break;5281}5282#endif /* !SLJIT_CONFIG_X86_32 */52835284reg = FAST_IS_REG(dst) ? dst : TMP_REG1;52855286if (emit_mov(compiler, reg, 0, SLJIT_IMM, init_value))5287return NULL;5288break;5289#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5290case SLJIT_MOV:5291compiler->mode32 = 0;5292reg = FAST_IS_REG(dst) ? dst : TMP_REG1;52935294if (emit_load_imm64(compiler, reg, init_value))5295return NULL;5296break;5297#endif /* SLJIT_CONFIG_X86_64 */5298default:5299#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5300compiler->mode32 = (op == SLJIT_MOV32);5301#endif /* SLJIT_CONFIG_X86_64 */53025303if (emit_mov(compiler, dst, dstw, SLJIT_IMM, (sljit_s32)init_value))5304return NULL;5305dst = 0;5306break;5307}53085309inst = (sljit_u8*)ensure_buf(compiler, 1);5310PTR_FAIL_IF(!inst);53115312inst[0] = SLJIT_INST_CONST;53135314if (dst & SLJIT_MEM) {5315#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5316if (op == SLJIT_MOV) {5317if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))5318return NULL;5319return const_;5320}5321#endif53225323if (emit_mov_byte(compiler, 0, dst, dstw, TMP_REG1, 0))5324return NULL;5325}53265327return const_;5328}53295330SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_op_addr(struct sljit_compiler *compiler, sljit_s32 op,5331sljit_s32 dst, sljit_sw dstw)5332{5333struct sljit_jump *jump;5334sljit_u8 *inst;5335#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5336sljit_s32 reg;5337#endif /* SLJIT_CONFIG_X86_64 */5338SLJIT_UNUSED_ARG(op);53395340CHECK_ERROR_PTR();5341CHECK_PTR(check_sljit_emit_op_addr(compiler, op, dst, dstw));5342ADJUST_LOCAL_OFFSET(dst, dstw);53435344CHECK_EXTRA_REGS(dst, dstw, (void)0);53455346jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));5347PTR_FAIL_IF(!jump);5348set_mov_addr(jump, compiler, 0);53495350#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5351compiler->mode32 = 0;5352if (dst & SLJIT_MEM)5353reg = TMP_REG1;5354else5355reg = (op != SLJIT_ADD_ABS_ADDR) ? dst : TMP_REG2;53565357PTR_FAIL_IF(emit_load_imm64(compiler, reg, 0));5358jump->addr = compiler->size;53595360if (reg_map[reg] >= 8)5361jump->flags |= MOV_ADDR_HI;5362#else /* !SLJIT_CONFIG_X86_64 */5363if (op == SLJIT_ADD_ABS_ADDR) {5364if (dst != SLJIT_R0) {5365/* Must not be a signed byte argument. */5366inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0x100, dst, dstw);5367PTR_FAIL_IF(!inst);5368*(inst + 1) |= ADD;5369} else5370PTR_FAIL_IF(emit_do_imm(compiler, ADD_EAX_i32, 0));5371} else {5372PTR_FAIL_IF(emit_mov(compiler, dst, dstw, SLJIT_IMM, 0));5373}5374#endif /* SLJIT_CONFIG_X86_64 */53755376inst = (sljit_u8*)ensure_buf(compiler, 1);5377PTR_FAIL_IF(!inst);53785379inst[0] = SLJIT_INST_MOV_ADDR;53805381#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5382if (op == SLJIT_ADD_ABS_ADDR) {5383inst = emit_x86_instruction(compiler, 1, reg, 0, dst, dstw);5384PTR_FAIL_IF(!inst);5385*inst = ADD_rm_r;5386} else if (dst & SLJIT_MEM)5387PTR_FAIL_IF(emit_mov(compiler, dst, dstw, TMP_REG1, 0));5388#endif /* SLJIT_CONFIG_X86_64 */53895390return jump;5391}53925393SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)5394{5395SLJIT_UNUSED_ARG(executable_offset);53965397SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0);5398#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)5399sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset));5400#else5401sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target);5402#endif5403SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1);5404}54055406SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_s32 op, sljit_sw new_constant, sljit_sw executable_offset)5407{5408void *start_addr;5409SLJIT_UNUSED_ARG(executable_offset);54105411#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5412if (op == SLJIT_MOV) {5413start_addr = (void*)(addr - sizeof(sljit_sw));5414SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 0);5415sljit_unaligned_store_sw(start_addr, new_constant);5416SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 1);5417return;5418}5419#endif54205421start_addr = (void*)(addr - sizeof(sljit_s32));54225423if ((op | SLJIT_32) == SLJIT_MOV32_U8) {5424if ((new_constant & 0x100) != 0)5425new_constant = new_constant | -(sljit_sw)0x100;5426else5427new_constant = (sljit_u8)new_constant;5428}54295430SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 0);5431sljit_unaligned_store_s32(start_addr, (sljit_s32)new_constant);5432SLJIT_UPDATE_WX_FLAGS(start_addr, (void*)addr, 1);5433}543454355436