Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeX86_common.c
9913 views
/*1* Stack-less Just-In-Time compiler2*3* Copyright Zoltan Herczeg ([email protected]). All rights reserved.4*5* Redistribution and use in source and binary forms, with or without modification, are6* permitted provided that the following conditions are met:7*8* 1. Redistributions of source code must retain the above copyright notice, this list of9* conditions and the following disclaimer.10*11* 2. Redistributions in binary form must reproduce the above copyright notice, this list12* of conditions and the following disclaimer in the documentation and/or other materials13* provided with the distribution.14*15* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY16* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES17* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT18* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,19* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED20* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR21* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN22* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN23* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.24*/2526SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)27{28return "x86" SLJIT_CPUINFO;29}3031/*3232b register indexes:330 - EAX341 - ECX352 - EDX363 - EBX374 - ESP385 - EBP396 - ESI407 - EDI41*/4243/*4464b register indexes:450 - RAX461 - RCX472 - RDX483 - RBX494 - RSP505 - RBP516 - RSI527 - RDI538 - R8 - From now on REX prefix is required549 - R95510 - R105611 - R115712 - R125813 - R135914 - R146015 - R1561*/6263#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)64#define TMP_FREG (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)6566#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)6768static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {690, 0, 2, 1, 0, 0, 0, 0, 0, 0, 5, 7, 6, 4, 370};7172static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {730, 1, 2, 3, 4, 5, 6, 7, 074};7576#define CHECK_EXTRA_REGS(p, w, do) \77if (p >= SLJIT_R3 && p <= SLJIT_S3) { \78w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \79p = SLJIT_MEM1(SLJIT_SP); \80do; \81}8283#else /* SLJIT_CONFIG_X86_32 */8485#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)8687/* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present88Note: avoid to use r12 and r13 for memory addressing89therefore r12 is better to be a higher saved register. */90#ifndef _WIN6491/* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */92static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {930, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 994};95/* low-map. reg_map & 0x7. */96static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {970, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 198};99#else100/* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */101static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {1020, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10103};104/* low-map. reg_map & 0x7. */105static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {1060, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2107};108#endif109110/* Args: xmm0-xmm3 */111static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {1120, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4113};114/* low-map. freg_map & 0x7. */115static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {1160, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 4117};118119#define REX_W 0x48120#define REX_R 0x44121#define REX_X 0x42122#define REX_B 0x41123#define REX 0x40124125#ifndef _WIN64126#define HALFWORD_MAX 0x7fffffffl127#define HALFWORD_MIN -0x80000000l128#else129#define HALFWORD_MAX 0x7fffffffll130#define HALFWORD_MIN -0x80000000ll131#endif132133#define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)134#define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)135136#define CHECK_EXTRA_REGS(p, w, do)137138#endif /* SLJIT_CONFIG_X86_32 */139140#define U8(v) ((sljit_u8)(v))141142/* Size flags for emit_x86_instruction: */143#define EX86_BIN_INS ((sljit_uw)0x000010)144#define EX86_SHIFT_INS ((sljit_uw)0x000020)145#define EX86_BYTE_ARG ((sljit_uw)0x000040)146#define EX86_HALF_ARG ((sljit_uw)0x000080)147/* Size flags for both emit_x86_instruction and emit_vex_instruction: */148#define EX86_REX ((sljit_uw)0x000100)149#define EX86_NO_REXW ((sljit_uw)0x000200)150#define EX86_PREF_66 ((sljit_uw)0x000400)151#define EX86_PREF_F2 ((sljit_uw)0x000800)152#define EX86_PREF_F3 ((sljit_uw)0x001000)153#define EX86_SSE2_OP1 ((sljit_uw)0x002000)154#define EX86_SSE2_OP2 ((sljit_uw)0x004000)155#define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)156#define EX86_VEX_EXT ((sljit_uw)0x008000)157/* Op flags for emit_vex_instruction: */158#define VEX_OP_0F38 ((sljit_uw)0x010000)159#define VEX_OP_0F3A ((sljit_uw)0x020000)160#define VEX_SSE2_OPV ((sljit_uw)0x040000)161#define VEX_AUTO_W ((sljit_uw)0x080000)162#define VEX_W ((sljit_uw)0x100000)163#define VEX_256 ((sljit_uw)0x200000)164165#define EX86_SELECT_66(op) (((op) & SLJIT_32) ? 0 : EX86_PREF_66)166#define EX86_SELECT_F2_F3(op) (((op) & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2)167168/* --------------------------------------------------------------------- */169/* Instruction forms */170/* --------------------------------------------------------------------- */171172#define ADD (/* BINARY */ 0 << 3)173#define ADD_EAX_i32 0x05174#define ADD_r_rm 0x03175#define ADD_rm_r 0x01176#define ADDSD_x_xm 0x58177#define ADC (/* BINARY */ 2 << 3)178#define ADC_EAX_i32 0x15179#define ADC_r_rm 0x13180#define ADC_rm_r 0x11181#define AND (/* BINARY */ 4 << 3)182#define AND_EAX_i32 0x25183#define AND_r_rm 0x23184#define AND_rm_r 0x21185#define ANDPD_x_xm 0x54186#define BSR_r_rm (/* GROUP_0F */ 0xbd)187#define BSF_r_rm (/* GROUP_0F */ 0xbc)188#define BSWAP_r (/* GROUP_0F */ 0xc8)189#define CALL_i32 0xe8190#define CALL_rm (/* GROUP_FF */ 2 << 3)191#define CDQ 0x99192#define CMOVE_r_rm (/* GROUP_0F */ 0x44)193#define CMP (/* BINARY */ 7 << 3)194#define CMP_EAX_i32 0x3d195#define CMP_r_rm 0x3b196#define CMP_rm_r 0x39197#define CMPS_x_xm 0xc2198#define CMPXCHG_rm_r 0xb1199#define CMPXCHG_rm8_r 0xb0200#define CVTPD2PS_x_xm 0x5a201#define CVTPS2PD_x_xm 0x5a202#define CVTSI2SD_x_rm 0x2a203#define CVTTSD2SI_r_xm 0x2c204#define DIV (/* GROUP_F7 */ 6 << 3)205#define DIVSD_x_xm 0x5e206#define EXTRACTPS_x_xm 0x17207#define FLDS 0xd9208#define FLDL 0xdd209#define FSTPS 0xd9210#define FSTPD 0xdd211#define INSERTPS_x_xm 0x21212#define INT3 0xcc213#define IDIV (/* GROUP_F7 */ 7 << 3)214#define IMUL (/* GROUP_F7 */ 5 << 3)215#define IMUL_r_rm (/* GROUP_0F */ 0xaf)216#define IMUL_r_rm_i8 0x6b217#define IMUL_r_rm_i32 0x69218#define JL_i8 0x7c219#define JE_i8 0x74220#define JNC_i8 0x73221#define JNE_i8 0x75222#define JMP_i8 0xeb223#define JMP_i32 0xe9224#define JMP_rm (/* GROUP_FF */ 4 << 3)225#define LEA_r_m 0x8d226#define LOOP_i8 0xe2227#define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd)228#define MOV_r_rm 0x8b229#define MOV_r_i32 0xb8230#define MOV_rm_r 0x89231#define MOV_rm_i32 0xc7232#define MOV_rm8_i8 0xc6233#define MOV_rm8_r8 0x88234#define MOVAPS_x_xm 0x28235#define MOVAPS_xm_x 0x29236#define MOVD_x_rm 0x6e237#define MOVD_rm_x 0x7e238#define MOVDDUP_x_xm 0x12239#define MOVDQA_x_xm 0x6f240#define MOVDQA_xm_x 0x7f241#define MOVDQU_x_xm 0x6f242#define MOVHLPS_x_x 0x12243#define MOVHPD_m_x 0x17244#define MOVHPD_x_m 0x16245#define MOVLHPS_x_x 0x16246#define MOVLPD_m_x 0x13247#define MOVLPD_x_m 0x12248#define MOVMSKPS_r_x (/* GROUP_0F */ 0x50)249#define MOVQ_x_xm (/* GROUP_0F */ 0x7e)250#define MOVSD_x_xm 0x10251#define MOVSD_xm_x 0x11252#define MOVSHDUP_x_xm 0x16253#define MOVSXD_r_rm 0x63254#define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)255#define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)256#define MOVUPS_x_xm 0x10257#define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)258#define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)259#define MUL (/* GROUP_F7 */ 4 << 3)260#define MULSD_x_xm 0x59261#define NEG_rm (/* GROUP_F7 */ 3 << 3)262#define NOP 0x90263#define NOT_rm (/* GROUP_F7 */ 2 << 3)264#define OR (/* BINARY */ 1 << 3)265#define OR_r_rm 0x0b266#define OR_EAX_i32 0x0d267#define OR_rm_r 0x09268#define OR_rm8_r8 0x08269#define ORPD_x_xm 0x56270#define PACKSSWB_x_xm (/* GROUP_0F */ 0x63)271#define PAND_x_xm 0xdb272#define PCMPEQD_x_xm 0x76273#define PINSRB_x_rm_i8 0x20274#define PINSRW_x_rm_i8 0xc4275#define PINSRD_x_rm_i8 0x22276#define PEXTRB_rm_x_i8 0x14277#define PEXTRW_rm_x_i8 0x15278#define PEXTRD_rm_x_i8 0x16279#define PMOVMSKB_r_x (/* GROUP_0F */ 0xd7)280#define PMOVSXBD_x_xm 0x21281#define PMOVSXBQ_x_xm 0x22282#define PMOVSXBW_x_xm 0x20283#define PMOVSXDQ_x_xm 0x25284#define PMOVSXWD_x_xm 0x23285#define PMOVSXWQ_x_xm 0x24286#define PMOVZXBD_x_xm 0x31287#define PMOVZXBQ_x_xm 0x32288#define PMOVZXBW_x_xm 0x30289#define PMOVZXDQ_x_xm 0x35290#define PMOVZXWD_x_xm 0x33291#define PMOVZXWQ_x_xm 0x34292#define POP_r 0x58293#define POP_rm 0x8f294#define POPF 0x9d295#define POR_x_xm 0xeb296#define PREFETCH 0x18297#define PSHUFB_x_xm 0x00298#define PSHUFD_x_xm 0x70299#define PSHUFLW_x_xm 0x70300#define PSRLDQ_x 0x73301#define PSLLD_x_i8 0x72302#define PSLLQ_x_i8 0x73303#define PUSH_i32 0x68304#define PUSH_r 0x50305#define PUSH_rm (/* GROUP_FF */ 6 << 3)306#define PUSHF 0x9c307#define PXOR_x_xm 0xef308#define ROL (/* SHIFT */ 0 << 3)309#define ROR (/* SHIFT */ 1 << 3)310#define RET_near 0xc3311#define RET_i16 0xc2312#define SBB (/* BINARY */ 3 << 3)313#define SBB_EAX_i32 0x1d314#define SBB_r_rm 0x1b315#define SBB_rm_r 0x19316#define SAR (/* SHIFT */ 7 << 3)317#define SHL (/* SHIFT */ 4 << 3)318#define SHLD (/* GROUP_0F */ 0xa5)319#define SHRD (/* GROUP_0F */ 0xad)320#define SHR (/* SHIFT */ 5 << 3)321#define SHUFPS_x_xm 0xc6322#define SUB (/* BINARY */ 5 << 3)323#define SUB_EAX_i32 0x2d324#define SUB_r_rm 0x2b325#define SUB_rm_r 0x29326#define SUBSD_x_xm 0x5c327#define TEST_EAX_i32 0xa9328#define TEST_rm_r 0x85329#define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc)330#define UCOMISD_x_xm 0x2e331#define UNPCKLPD_x_xm 0x14332#define UNPCKLPS_x_xm 0x14333#define VBROADCASTSD_x_xm 0x19334#define VBROADCASTSS_x_xm 0x18335#define VEXTRACTF128_x_ym 0x19336#define VEXTRACTI128_x_ym 0x39337#define VINSERTF128_y_y_xm 0x18338#define VINSERTI128_y_y_xm 0x38339#define VPBROADCASTB_x_xm 0x78340#define VPBROADCASTD_x_xm 0x58341#define VPBROADCASTQ_x_xm 0x59342#define VPBROADCASTW_x_xm 0x79343#define VPERMPD_y_ym 0x01344#define VPERMQ_y_ym 0x00345#define XCHG_EAX_r 0x90346#define XCHG_r_rm 0x87347#define XOR (/* BINARY */ 6 << 3)348#define XOR_EAX_i32 0x35349#define XOR_r_rm 0x33350#define XOR_rm_r 0x31351#define XORPD_x_xm 0x57352353#define GROUP_0F 0x0f354#define GROUP_66 0x66355#define GROUP_F3 0xf3356#define GROUP_F7 0xf7357#define GROUP_FF 0xff358#define GROUP_BINARY_81 0x81359#define GROUP_BINARY_83 0x83360#define GROUP_SHIFT_1 0xd1361#define GROUP_SHIFT_N 0xc1362#define GROUP_SHIFT_CL 0xd3363#define GROUP_LOCK 0xf0364365#define MOD_REG 0xc0366#define MOD_DISP8 0x40367368#define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s))369370#define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r)))371#define POP_REG(r) (*inst++ = U8(POP_r + (r)))372#define RET() (*inst++ = RET_near)373#define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0)374375#define SLJIT_INST_LABEL 255376#define SLJIT_INST_JUMP 254377#define SLJIT_INST_MOV_ADDR 253378#define SLJIT_INST_CONST 252379380/* Multithreading does not affect these static variables, since they store381built-in CPU features. Therefore they can be overwritten by different threads382if they detect the CPU features in the same time. */383#define CPU_FEATURE_DETECTED 0x001384#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)385#define CPU_FEATURE_SSE2 0x002386#endif387#define CPU_FEATURE_SSE41 0x004388#define CPU_FEATURE_LZCNT 0x008389#define CPU_FEATURE_TZCNT 0x010390#define CPU_FEATURE_CMOV 0x020391#define CPU_FEATURE_AVX 0x040392#define CPU_FEATURE_AVX2 0x080393#define CPU_FEATURE_OSXSAVE 0x100394395static sljit_u32 cpu_feature_list = 0;396397#ifdef _WIN32_WCE398#include <cmnintrin.h>399#elif defined(_MSC_VER) && _MSC_VER >= 1400400#include <intrin.h>401#elif defined(__INTEL_COMPILER)402#include <cpuid.h>403#endif404405#if (defined(_MSC_VER) && _MSC_VER >= 1400) || defined(__INTEL_COMPILER) \406|| (defined(__INTEL_LLVM_COMPILER) && defined(__XSAVE__))407#include <immintrin.h>408#endif409410/******************************************************/411/* Unaligned-store functions */412/******************************************************/413414static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)415{416SLJIT_MEMCPY(addr, &value, sizeof(value));417}418419static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)420{421SLJIT_MEMCPY(addr, &value, sizeof(value));422}423424static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)425{426SLJIT_MEMCPY(addr, &value, sizeof(value));427}428429/******************************************************/430/* Utility functions */431/******************************************************/432433static void execute_cpu_id(sljit_u32 info[4])434{435#if (defined(_MSC_VER) && _MSC_VER >= 1400) \436|| (defined(__INTEL_COMPILER) && __INTEL_COMPILER == 2021 && __INTEL_COMPILER_UPDATE >= 7)437438__cpuidex((int*)info, (int)info[0], (int)info[2]);439440#elif (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900)441442__get_cpuid_count(info[0], info[2], info, info + 1, info + 2, info + 3);443444#elif (defined(_MSC_VER) || defined(__INTEL_COMPILER)) \445&& (defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32)446447/* Intel syntax. */448__asm {449mov esi, info450mov eax, [esi]451mov ecx, [esi + 8]452cpuid453mov [esi], eax454mov [esi + 4], ebx455mov [esi + 8], ecx456mov [esi + 12], edx457}458459#else460461__asm__ __volatile__ (462"cpuid\n"463: "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3])464: "0" (info[0]), "2" (info[2])465);466467#endif468}469470static sljit_u32 execute_get_xcr0_low(void)471{472sljit_u32 xcr0;473474#if (defined(_MSC_VER) && _MSC_VER >= 1400) || defined(__INTEL_COMPILER) \475|| (defined(__INTEL_LLVM_COMPILER) && defined(__XSAVE__))476477xcr0 = (sljit_u32)_xgetbv(0);478479#elif defined(__TINYC__)480481__asm__ (482"xorl %%ecx, %%ecx\n"483".byte 0x0f\n"484".byte 0x01\n"485".byte 0xd0\n"486: "=a" (xcr0)487:488#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32489: "ecx", "edx"490#else /* !SLJIT_CONFIG_X86_32 */491: "rcx", "rdx"492#endif /* SLJIT_CONFIG_X86_32 */493);494495#elif (defined(__INTEL_LLVM_COMPILER) && __INTEL_LLVM_COMPILER < 20220100) \496|| (defined(__clang__) && __clang_major__ < 14) \497|| (defined(__GNUC__) && __GNUC__ < 3) \498|| defined(__SUNPRO_C) || defined(__SUNPRO_CC)499500/* AT&T syntax. */501__asm__ (502"xorl %%ecx, %%ecx\n"503"xgetbv\n"504: "=a" (xcr0)505:506#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32507: "ecx", "edx"508#else /* !SLJIT_CONFIG_X86_32 */509: "rcx", "rdx"510#endif /* SLJIT_CONFIG_X86_32 */511);512513#elif defined(_MSC_VER)514515/* Intel syntax. */516__asm {517xor ecx, ecx518xgetbv519mov xcr0, eax520}521522#else523524__asm__ (525"xor{l %%ecx, %%ecx | ecx, ecx}\n"526"xgetbv\n"527: "=a" (xcr0)528:529#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32530: "ecx", "edx"531#else /* !SLJIT_CONFIG_X86_32 */532: "rcx", "rdx"533#endif /* SLJIT_CONFIG_X86_32 */534);535536#endif537return xcr0;538}539540static void get_cpu_features(void)541{542sljit_u32 feature_list = CPU_FEATURE_DETECTED;543sljit_u32 info[4] = {0};544sljit_u32 max_id;545546execute_cpu_id(info);547max_id = info[0];548549if (max_id >= 7) {550info[0] = 7;551info[2] = 0;552execute_cpu_id(info);553554if (info[1] & 0x8)555feature_list |= CPU_FEATURE_TZCNT;556if (info[1] & 0x20)557feature_list |= CPU_FEATURE_AVX2;558}559560if (max_id >= 1) {561info[0] = 1;562#if defined(SLJIT_CONFIG_X86_32) && SLJIT_CONFIG_X86_32563/* Winchip 2 and Cyrix MII bugs */564info[1] = info[2] = 0;565#endif566execute_cpu_id(info);567568if (info[2] & 0x80000)569feature_list |= CPU_FEATURE_SSE41;570if (info[2] & 0x8000000)571feature_list |= CPU_FEATURE_OSXSAVE;572if (info[2] & 0x10000000)573feature_list |= CPU_FEATURE_AVX;574#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)575if (info[3] & 0x4000000)576feature_list |= CPU_FEATURE_SSE2;577#endif578if (info[3] & 0x8000)579feature_list |= CPU_FEATURE_CMOV;580}581582info[0] = 0x80000000;583execute_cpu_id(info);584max_id = info[0];585586if (max_id >= 0x80000001) {587info[0] = 0x80000001;588execute_cpu_id(info);589590if (info[2] & 0x20)591feature_list |= CPU_FEATURE_LZCNT;592}593594if ((feature_list & CPU_FEATURE_OSXSAVE) && (execute_get_xcr0_low() & 0x4) == 0)595feature_list &= ~(sljit_u32)(CPU_FEATURE_AVX | CPU_FEATURE_AVX2);596597cpu_feature_list = feature_list;598}599600static sljit_u8 get_jump_code(sljit_uw type)601{602switch (type) {603case SLJIT_EQUAL:604case SLJIT_ATOMIC_STORED:605case SLJIT_F_EQUAL:606case SLJIT_UNORDERED_OR_EQUAL:607return 0x84 /* je */;608609case SLJIT_NOT_EQUAL:610case SLJIT_ATOMIC_NOT_STORED:611case SLJIT_F_NOT_EQUAL:612case SLJIT_ORDERED_NOT_EQUAL:613return 0x85 /* jne */;614615case SLJIT_LESS:616case SLJIT_CARRY:617case SLJIT_F_LESS:618case SLJIT_UNORDERED_OR_LESS:619case SLJIT_UNORDERED_OR_GREATER:620return 0x82 /* jc */;621622case SLJIT_GREATER_EQUAL:623case SLJIT_NOT_CARRY:624case SLJIT_F_GREATER_EQUAL:625case SLJIT_ORDERED_GREATER_EQUAL:626case SLJIT_ORDERED_LESS_EQUAL:627return 0x83 /* jae */;628629case SLJIT_GREATER:630case SLJIT_F_GREATER:631case SLJIT_ORDERED_LESS:632case SLJIT_ORDERED_GREATER:633return 0x87 /* jnbe */;634635case SLJIT_LESS_EQUAL:636case SLJIT_F_LESS_EQUAL:637case SLJIT_UNORDERED_OR_GREATER_EQUAL:638case SLJIT_UNORDERED_OR_LESS_EQUAL:639return 0x86 /* jbe */;640641case SLJIT_SIG_LESS:642return 0x8c /* jl */;643644case SLJIT_SIG_GREATER_EQUAL:645return 0x8d /* jnl */;646647case SLJIT_SIG_GREATER:648return 0x8f /* jnle */;649650case SLJIT_SIG_LESS_EQUAL:651return 0x8e /* jle */;652653case SLJIT_OVERFLOW:654return 0x80 /* jo */;655656case SLJIT_NOT_OVERFLOW:657return 0x81 /* jno */;658659case SLJIT_UNORDERED:660case SLJIT_ORDERED_EQUAL: /* NaN. */661return 0x8a /* jp */;662663case SLJIT_ORDERED:664case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not NaN. */665return 0x8b /* jpo */;666}667return 0;668}669670#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)671static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);672#else /* !SLJIT_CONFIG_X86_32 */673static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr);674static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset);675#endif /* SLJIT_CONFIG_X86_32 */676677static sljit_u8* detect_near_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)678{679sljit_uw type = jump->flags >> TYPE_SHIFT;680sljit_s32 short_jump;681sljit_uw label_addr;682sljit_uw jump_addr;683684jump_addr = (sljit_uw)code_ptr;685if (!(jump->flags & JUMP_ADDR)) {686label_addr = (sljit_uw)(code + jump->u.label->size);687688if (jump->u.label->size > jump->addr)689jump_addr = (sljit_uw)(code + jump->addr);690} else691label_addr = jump->u.target - (sljit_uw)executable_offset;692693#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)694if ((sljit_sw)(label_addr - (jump_addr + 6)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump_addr + 5)) < HALFWORD_MIN)695return detect_far_jump_type(jump, code_ptr);696#endif /* SLJIT_CONFIG_X86_64 */697698short_jump = (sljit_sw)(label_addr - (jump_addr + 2)) >= -0x80 && (sljit_sw)(label_addr - (jump_addr + 2)) <= 0x7f;699700if (type == SLJIT_JUMP) {701if (short_jump)702*code_ptr++ = JMP_i8;703else704*code_ptr++ = JMP_i32;705} else if (type > SLJIT_JUMP) {706short_jump = 0;707*code_ptr++ = CALL_i32;708} else if (short_jump) {709*code_ptr++ = U8(get_jump_code(type) - 0x10);710} else {711*code_ptr++ = GROUP_0F;712*code_ptr++ = get_jump_code(type);713}714715jump->addr = (sljit_uw)code_ptr;716717if (short_jump) {718jump->flags |= PATCH_MB;719code_ptr += sizeof(sljit_s8);720} else {721jump->flags |= PATCH_MW;722code_ptr += sizeof(sljit_s32);723}724725return code_ptr;726}727728static void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset)729{730sljit_uw flags = jump->flags;731sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;732sljit_uw jump_addr = jump->addr;733SLJIT_UNUSED_ARG(executable_offset);734735if (SLJIT_UNLIKELY(flags & JUMP_MOV_ADDR)) {736#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)737sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr);738#else /* SLJIT_CONFIG_X86_32 */739if (flags & PATCH_MD) {740SLJIT_ASSERT(addr > HALFWORD_MAX);741sljit_unaligned_store_sw((void*)(jump_addr - sizeof(sljit_sw)), (sljit_sw)addr);742return;743}744745if (flags & PATCH_MW) {746addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset);747SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN);748} else {749SLJIT_ASSERT(addr <= HALFWORD_MAX);750}751sljit_unaligned_store_s32((void*)(jump_addr - sizeof(sljit_s32)), (sljit_s32)addr);752#endif /* !SLJIT_CONFIG_X86_32 */753return;754}755756#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)757if (SLJIT_UNLIKELY(flags & PATCH_MD)) {758SLJIT_ASSERT(!(flags & JUMP_ADDR));759sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr);760return;761}762#endif /* SLJIT_CONFIG_X86_64 */763764addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET((sljit_u8*)jump_addr, executable_offset);765766if (flags & PATCH_MB) {767addr -= sizeof(sljit_s8);768SLJIT_ASSERT((sljit_sw)addr <= 0x7f && (sljit_sw)addr >= -0x80);769*(sljit_u8*)jump_addr = U8(addr);770return;771} else if (flags & PATCH_MW) {772addr -= sizeof(sljit_s32);773#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)774sljit_unaligned_store_sw((void*)jump_addr, (sljit_sw)addr);775#else /* !SLJIT_CONFIG_X86_32 */776SLJIT_ASSERT((sljit_sw)addr <= HALFWORD_MAX && (sljit_sw)addr >= HALFWORD_MIN);777sljit_unaligned_store_s32((void*)jump_addr, (sljit_s32)addr);778#endif /* SLJIT_CONFIG_X86_32 */779}780}781782static void reduce_code_size(struct sljit_compiler *compiler)783{784struct sljit_label *label;785struct sljit_jump *jump;786sljit_uw next_label_size;787sljit_uw next_jump_addr;788sljit_uw next_min_addr;789sljit_uw size_reduce = 0;790sljit_sw diff;791sljit_uw type;792#if (defined SLJIT_DEBUG && SLJIT_DEBUG)793sljit_uw size_reduce_max;794#endif /* SLJIT_DEBUG */795796label = compiler->labels;797jump = compiler->jumps;798799next_label_size = SLJIT_GET_NEXT_SIZE(label);800next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);801802while (1) {803next_min_addr = next_label_size;804if (next_jump_addr < next_min_addr)805next_min_addr = next_jump_addr;806807if (next_min_addr == SLJIT_MAX_ADDRESS)808break;809810if (next_min_addr == next_label_size) {811label->size -= size_reduce;812813label = label->next;814next_label_size = SLJIT_GET_NEXT_SIZE(label);815}816817if (next_min_addr != next_jump_addr)818continue;819820jump->addr -= size_reduce;821if (!(jump->flags & JUMP_MOV_ADDR)) {822#if (defined SLJIT_DEBUG && SLJIT_DEBUG)823size_reduce_max = size_reduce + (((jump->flags >> TYPE_SHIFT) < SLJIT_JUMP) ? CJUMP_MAX_SIZE : JUMP_MAX_SIZE);824#endif /* SLJIT_DEBUG */825826if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {827if (jump->flags & JUMP_ADDR) {828#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)829if (jump->u.target <= 0xffffffffl)830size_reduce += sizeof(sljit_s32);831#endif /* SLJIT_CONFIG_X86_64 */832} else {833/* Unit size: instruction. */834diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;835if (jump->u.label->size > jump->addr) {836SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);837diff -= (sljit_sw)size_reduce;838}839type = jump->flags >> TYPE_SHIFT;840841#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)842if (type == SLJIT_JUMP) {843if (diff <= 0x7f + 2 && diff >= -0x80 + 2)844size_reduce += JUMP_MAX_SIZE - 2;845else if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5)846size_reduce += JUMP_MAX_SIZE - 5;847} else if (type < SLJIT_JUMP) {848if (diff <= 0x7f + 2 && diff >= -0x80 + 2)849size_reduce += CJUMP_MAX_SIZE - 2;850else if (diff <= HALFWORD_MAX + 6 && diff >= HALFWORD_MIN + 6)851size_reduce += CJUMP_MAX_SIZE - 6;852} else {853if (diff <= HALFWORD_MAX + 5 && diff >= HALFWORD_MIN + 5)854size_reduce += JUMP_MAX_SIZE - 5;855}856#else /* !SLJIT_CONFIG_X86_64 */857if (type == SLJIT_JUMP) {858if (diff <= 0x7f + 2 && diff >= -0x80 + 2)859size_reduce += JUMP_MAX_SIZE - 2;860} else if (type < SLJIT_JUMP) {861if (diff <= 0x7f + 2 && diff >= -0x80 + 2)862size_reduce += CJUMP_MAX_SIZE - 2;863}864#endif /* SLJIT_CONFIG_X86_64 */865}866}867868#if (defined SLJIT_DEBUG && SLJIT_DEBUG)869jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT;870#endif /* SLJIT_DEBUG */871#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)872} else {873#if (defined SLJIT_DEBUG && SLJIT_DEBUG)874size_reduce_max = size_reduce + 10;875#endif /* SLJIT_DEBUG */876877if (!(jump->flags & JUMP_ADDR)) {878diff = (sljit_sw)jump->u.label->size - (sljit_sw)(jump->addr - 3);879880if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN)881size_reduce += 3;882} else if (jump->u.target <= 0xffffffffl)883size_reduce += (jump->flags & MOV_ADDR_HI) ? 4 : 5;884885#if (defined SLJIT_DEBUG && SLJIT_DEBUG)886jump->flags |= (size_reduce_max - size_reduce) << JUMP_SIZE_SHIFT;887#endif /* SLJIT_DEBUG */888#endif /* SLJIT_CONFIG_X86_64 */889}890891jump = jump->next;892next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);893}894895compiler->size -= size_reduce;896}897898SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)899{900struct sljit_memory_fragment *buf;901sljit_u8 *code;902sljit_u8 *code_ptr;903sljit_u8 *buf_ptr;904sljit_u8 *buf_end;905sljit_u8 len;906sljit_sw executable_offset;907#if (defined SLJIT_DEBUG && SLJIT_DEBUG)908sljit_uw addr;909#endif /* SLJIT_DEBUG */910911struct sljit_label *label;912struct sljit_jump *jump;913struct sljit_const *const_;914915CHECK_ERROR_PTR();916CHECK_PTR(check_sljit_generate_code(compiler));917918reduce_code_size(compiler);919920/* Second code generation pass. */921code = (sljit_u8*)allocate_executable_memory(compiler->size, options, exec_allocator_data, &executable_offset);922PTR_FAIL_WITH_EXEC_IF(code);923924reverse_buf(compiler);925buf = compiler->buf;926927code_ptr = code;928label = compiler->labels;929jump = compiler->jumps;930const_ = compiler->consts;931932do {933buf_ptr = buf->memory;934buf_end = buf_ptr + buf->used_size;935do {936len = *buf_ptr++;937SLJIT_ASSERT(len > 0);938if (len < SLJIT_INST_CONST) {939/* The code is already generated. */940SLJIT_MEMCPY(code_ptr, buf_ptr, len);941code_ptr += len;942buf_ptr += len;943} else {944switch (len) {945case SLJIT_INST_LABEL:946label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);947label->size = (sljit_uw)(code_ptr - code);948label = label->next;949break;950case SLJIT_INST_JUMP:951#if (defined SLJIT_DEBUG && SLJIT_DEBUG)952addr = (sljit_uw)code_ptr;953#endif /* SLJIT_DEBUG */954if (!(jump->flags & SLJIT_REWRITABLE_JUMP))955code_ptr = detect_near_jump_type(jump, code_ptr, code, executable_offset);956else {957#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)958code_ptr = detect_far_jump_type(jump, code_ptr, executable_offset);959#else /* !SLJIT_CONFIG_X86_32 */960code_ptr = detect_far_jump_type(jump, code_ptr);961#endif /* SLJIT_CONFIG_X86_32 */962}963964SLJIT_ASSERT((sljit_uw)code_ptr - addr <= ((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f));965jump = jump->next;966break;967case SLJIT_INST_MOV_ADDR:968#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)969code_ptr = generate_mov_addr_code(jump, code_ptr, code, executable_offset);970#endif /* SLJIT_CONFIG_X86_64 */971jump->addr = (sljit_uw)code_ptr;972jump = jump->next;973break;974default:975SLJIT_ASSERT(len == SLJIT_INST_CONST);976const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);977const_ = const_->next;978break;979}980}981} while (buf_ptr < buf_end);982983SLJIT_ASSERT(buf_ptr == buf_end);984buf = buf->next;985} while (buf);986987SLJIT_ASSERT(!label);988SLJIT_ASSERT(!jump);989SLJIT_ASSERT(!const_);990SLJIT_ASSERT(code_ptr <= code + compiler->size);991992jump = compiler->jumps;993while (jump) {994generate_jump_or_mov_addr(jump, executable_offset);995jump = jump->next;996}997998compiler->error = SLJIT_ERR_COMPILED;999compiler->executable_offset = executable_offset;1000compiler->executable_size = (sljit_uw)(code_ptr - code);10011002code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);10031004SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1);1005return (void*)code;1006}10071008SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)1009{1010switch (feature_type) {1011case SLJIT_HAS_FPU:1012#ifdef SLJIT_IS_FPU_AVAILABLE1013return (SLJIT_IS_FPU_AVAILABLE) != 0;1014#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)1015if (cpu_feature_list == 0)1016get_cpu_features();1017return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;1018#else /* SLJIT_DETECT_SSE2 */1019return 1;1020#endif /* SLJIT_DETECT_SSE2 */10211022#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1023case SLJIT_HAS_VIRTUAL_REGISTERS:1024return 1;1025#endif /* SLJIT_CONFIG_X86_32 */10261027case SLJIT_HAS_CLZ:1028if (cpu_feature_list == 0)1029get_cpu_features();10301031return (cpu_feature_list & CPU_FEATURE_LZCNT) ? 1 : 2;10321033case SLJIT_HAS_CTZ:1034if (cpu_feature_list == 0)1035get_cpu_features();10361037return (cpu_feature_list & CPU_FEATURE_TZCNT) ? 1 : 2;10381039case SLJIT_HAS_CMOV:1040if (cpu_feature_list == 0)1041get_cpu_features();1042return (cpu_feature_list & CPU_FEATURE_CMOV) != 0;10431044case SLJIT_HAS_REV:1045case SLJIT_HAS_ROT:1046case SLJIT_HAS_PREFETCH:1047case SLJIT_HAS_COPY_F32:1048case SLJIT_HAS_COPY_F64:1049case SLJIT_HAS_ATOMIC:1050case SLJIT_HAS_MEMORY_BARRIER:1051return 1;10521053#if !(defined SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE1054case SLJIT_HAS_AVX:1055if (cpu_feature_list == 0)1056get_cpu_features();1057return (cpu_feature_list & CPU_FEATURE_AVX) != 0;1058case SLJIT_HAS_AVX2:1059if (cpu_feature_list == 0)1060get_cpu_features();1061return (cpu_feature_list & CPU_FEATURE_AVX2) != 0;1062case SLJIT_HAS_SIMD:1063if (cpu_feature_list == 0)1064get_cpu_features();1065return (cpu_feature_list & CPU_FEATURE_SSE41) != 0;1066#endif /* SLJIT_IS_FPU_AVAILABLE */1067default:1068return 0;1069}1070}10711072SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)1073{1074switch (type) {1075case SLJIT_ORDERED_EQUAL:1076case SLJIT_UNORDERED_OR_NOT_EQUAL:1077return 2;1078}10791080return 0;1081}10821083/* --------------------------------------------------------------------- */1084/* Operators */1085/* --------------------------------------------------------------------- */10861087#define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))10881089#define BINARY_IMM32(op_imm, immw, arg, argw) \1090do { \1091inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \1092FAIL_IF(!inst); \1093*(inst + 1) |= (op_imm); \1094} while (0)10951096#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)10971098#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \1099do { \1100if (IS_HALFWORD(immw) || compiler->mode32) { \1101BINARY_IMM32(op_imm, immw, arg, argw); \1102} \1103else { \1104FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, immw)); \1105inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(arg) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \1106FAIL_IF(!inst); \1107*inst = (op_mr); \1108} \1109} while (0)11101111#define BINARY_EAX_IMM(op_eax_imm, immw) \1112FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))11131114#else /* !SLJIT_CONFIG_X86_64 */11151116#define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \1117BINARY_IMM32(op_imm, immw, arg, argw)11181119#define BINARY_EAX_IMM(op_eax_imm, immw) \1120FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))11211122#endif /* SLJIT_CONFIG_X86_64 */11231124static sljit_s32 emit_byte(struct sljit_compiler *compiler, sljit_u8 byte)1125{1126sljit_u8 *inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);1127FAIL_IF(!inst);1128INC_SIZE(1);1129*inst = byte;1130return SLJIT_SUCCESS;1131}11321133static sljit_s32 emit_mov(struct sljit_compiler *compiler,1134sljit_s32 dst, sljit_sw dstw,1135sljit_s32 src, sljit_sw srcw);11361137#define EMIT_MOV(compiler, dst, dstw, src, srcw) \1138FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));11391140static sljit_s32 emit_groupf(struct sljit_compiler *compiler,1141sljit_uw op,1142sljit_s32 dst, sljit_s32 src, sljit_sw srcw);11431144static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,1145sljit_uw op,1146sljit_s32 dst, sljit_s32 src, sljit_sw srcw);11471148static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,1149sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);11501151static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,1152sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);11531154static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,1155sljit_s32 src1, sljit_sw src1w,1156sljit_s32 src2, sljit_sw src2w);11571158static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,1159sljit_s32 dst_reg,1160sljit_s32 src, sljit_sw srcw);11611162static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler)1163{1164#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)1165/* Emit endbr32/endbr64 when CET is enabled. */1166sljit_u8 *inst;1167inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);1168FAIL_IF(!inst);1169INC_SIZE(4);1170inst[0] = GROUP_F3;1171inst[1] = GROUP_0F;1172inst[2] = 0x1e;1173#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1174inst[3] = 0xfb;1175#else /* !SLJIT_CONFIG_X86_32 */1176inst[3] = 0xfa;1177#endif /* SLJIT_CONFIG_X86_32 */1178#else /* !SLJIT_CONFIG_X86_CET */1179SLJIT_UNUSED_ARG(compiler);1180#endif /* SLJIT_CONFIG_X86_CET */1181return SLJIT_SUCCESS;1182}11831184#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)11851186static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg)1187{1188sljit_u8 *inst;1189sljit_s32 size;11901191#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1192size = 5;1193#else1194size = 4;1195#endif11961197inst = (sljit_u8*)ensure_buf(compiler, 1 + size);1198FAIL_IF(!inst);1199INC_SIZE(size);1200*inst++ = GROUP_F3;1201#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1202*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);1203#endif1204inst[0] = GROUP_0F;1205inst[1] = 0x1e;1206#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1207inst[2] = U8(MOD_REG | (0x1 << 3) | reg_lmap[reg]);1208#else1209inst[2] = U8(MOD_REG | (0x1 << 3) | reg_map[reg]);1210#endif1211return SLJIT_SUCCESS;1212}12131214static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg)1215{1216sljit_u8 *inst;1217sljit_s32 size;12181219#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1220size = 5;1221#else1222size = 4;1223#endif12241225inst = (sljit_u8*)ensure_buf(compiler, 1 + size);1226FAIL_IF(!inst);1227INC_SIZE(size);1228*inst++ = GROUP_F3;1229#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1230*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);1231#endif1232inst[0] = GROUP_0F;1233inst[1] = 0xae;1234inst[2] = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7);1235return SLJIT_SUCCESS;1236}12371238#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */12391240static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void)1241{1242#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)1243return _get_ssp() != 0;1244#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */1245return 0;1246#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */1247}12481249static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler,1250sljit_s32 src, sljit_sw srcw)1251{1252#if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)1253sljit_u8 *inst, *jz_after_cmp_inst;1254sljit_uw size_jz_after_cmp_inst;12551256sljit_uw size_before_rdssp_inst = compiler->size;12571258/* Generate "RDSSP TMP_REG1". */1259FAIL_IF(emit_rdssp(compiler, TMP_REG1));12601261/* Load return address on shadow stack into TMP_REG1. */1262EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0);12631264/* Compare return address against TMP_REG1. */1265FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw));12661267/* Generate JZ to skip shadow stack ajdustment when shadow1268stack matches normal stack. */1269inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1270FAIL_IF(!inst);1271INC_SIZE(2);1272*inst++ = get_jump_code(SLJIT_EQUAL) - 0x10;1273size_jz_after_cmp_inst = compiler->size;1274jz_after_cmp_inst = inst;12751276#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1277/* REX_W is not necessary. */1278compiler->mode32 = 1;1279#endif1280/* Load 1 into TMP_REG1. */1281EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);12821283/* Generate "INCSSP TMP_REG1". */1284FAIL_IF(emit_incssp(compiler, TMP_REG1));12851286/* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */1287inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1288FAIL_IF(!inst);1289INC_SIZE(2);1290inst[0] = JMP_i8;1291inst[1] = size_before_rdssp_inst - compiler->size;12921293*jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst;1294#else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */1295SLJIT_UNUSED_ARG(compiler);1296SLJIT_UNUSED_ARG(src);1297SLJIT_UNUSED_ARG(srcw);1298#endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */1299return SLJIT_SUCCESS;1300}13011302#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1303#include "sljitNativeX86_32.c"1304#else1305#include "sljitNativeX86_64.c"1306#endif13071308static sljit_s32 emit_mov(struct sljit_compiler *compiler,1309sljit_s32 dst, sljit_sw dstw,1310sljit_s32 src, sljit_sw srcw)1311{1312sljit_u8* inst;13131314if (FAST_IS_REG(src)) {1315inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);1316FAIL_IF(!inst);1317*inst = MOV_rm_r;1318return SLJIT_SUCCESS;1319}13201321if (src == SLJIT_IMM) {1322if (FAST_IS_REG(dst)) {1323#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1324return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);1325#else1326if (!compiler->mode32) {1327if (NOT_HALFWORD(srcw))1328return emit_load_imm64(compiler, dst, srcw);1329}1330else1331return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw);1332#endif1333}1334#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1335if (!compiler->mode32 && NOT_HALFWORD(srcw)) {1336/* Immediate to memory move. Only SLJIT_MOV operation copies1337an immediate directly into memory so TMP_REG1 can be used. */1338FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));1339inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);1340FAIL_IF(!inst);1341*inst = MOV_rm_r;1342return SLJIT_SUCCESS;1343}1344#endif1345inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);1346FAIL_IF(!inst);1347*inst = MOV_rm_i32;1348return SLJIT_SUCCESS;1349}1350if (FAST_IS_REG(dst)) {1351inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);1352FAIL_IF(!inst);1353*inst = MOV_r_rm;1354return SLJIT_SUCCESS;1355}13561357/* Memory to memory move. Only SLJIT_MOV operation copies1358data from memory to memory so TMP_REG1 can be used. */1359inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);1360FAIL_IF(!inst);1361*inst = MOV_r_rm;1362inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);1363FAIL_IF(!inst);1364*inst = MOV_rm_r;1365return SLJIT_SUCCESS;1366}13671368static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,1369sljit_s32 dst_reg,1370sljit_s32 src, sljit_sw srcw)1371{1372sljit_u8* inst;1373sljit_uw size;13741375SLJIT_ASSERT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL);13761377inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1378FAIL_IF(!inst);1379INC_SIZE(2);1380inst[0] = U8(get_jump_code((sljit_uw)type ^ 0x1) - 0x10);13811382size = compiler->size;1383EMIT_MOV(compiler, dst_reg, 0, src, srcw);13841385inst[1] = U8(compiler->size - size);1386return SLJIT_SUCCESS;1387}13881389SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)1390{1391sljit_u8 *inst;1392#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1393sljit_uw size;1394#endif13951396CHECK_ERROR();1397CHECK(check_sljit_emit_op0(compiler, op));13981399switch (GET_OPCODE(op)) {1400case SLJIT_BREAKPOINT:1401return emit_byte(compiler, INT3);1402case SLJIT_NOP:1403return emit_byte(compiler, NOP);1404case SLJIT_LMUL_UW:1405case SLJIT_LMUL_SW:1406case SLJIT_DIVMOD_UW:1407case SLJIT_DIVMOD_SW:1408case SLJIT_DIV_UW:1409case SLJIT_DIV_SW:1410#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1411#ifdef _WIN641412SLJIT_ASSERT(1413reg_map[SLJIT_R0] == 01414&& reg_map[SLJIT_R1] == 21415&& reg_map[TMP_REG1] > 7);1416#else1417SLJIT_ASSERT(1418reg_map[SLJIT_R0] == 01419&& reg_map[SLJIT_R1] < 71420&& reg_map[TMP_REG1] == 2);1421#endif1422compiler->mode32 = op & SLJIT_32;1423#endif1424SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);14251426op = GET_OPCODE(op);1427if ((op | 0x2) == SLJIT_DIV_UW) {1428#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)1429EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);1430inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);1431#else1432inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);1433#endif1434FAIL_IF(!inst);1435*inst = XOR_r_rm;1436}14371438if ((op | 0x2) == SLJIT_DIV_SW) {1439#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)1440EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);1441#endif14421443#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1444FAIL_IF(emit_byte(compiler, CDQ));1445#else1446if (!compiler->mode32) {1447inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1448FAIL_IF(!inst);1449INC_SIZE(2);1450inst[0] = REX_W;1451inst[1] = CDQ;1452} else1453FAIL_IF(emit_byte(compiler, CDQ));1454#endif1455}14561457#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1458inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);1459FAIL_IF(!inst);1460INC_SIZE(2);1461inst[0] = GROUP_F7;1462inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);1463#else /* !SLJIT_CONFIG_X86_32 */1464#ifdef _WIN641465size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;1466#else /* !_WIN64 */1467size = (!compiler->mode32) ? 3 : 2;1468#endif /* _WIN64 */1469inst = (sljit_u8*)ensure_buf(compiler, 1 + size);1470FAIL_IF(!inst);1471INC_SIZE(size);1472#ifdef _WIN641473if (!compiler->mode32)1474*inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);1475else if (op >= SLJIT_DIVMOD_UW)1476*inst++ = REX_B;1477inst[0] = GROUP_F7;1478inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);1479#else /* !_WIN64 */1480if (!compiler->mode32)1481*inst++ = REX_W;1482inst[0] = GROUP_F7;1483inst[1] = MOD_REG | reg_map[SLJIT_R1];1484#endif /* _WIN64 */1485#endif /* SLJIT_CONFIG_X86_32 */1486switch (op) {1487case SLJIT_LMUL_UW:1488inst[1] |= MUL;1489break;1490case SLJIT_LMUL_SW:1491inst[1] |= IMUL;1492break;1493case SLJIT_DIVMOD_UW:1494case SLJIT_DIV_UW:1495inst[1] |= DIV;1496break;1497case SLJIT_DIVMOD_SW:1498case SLJIT_DIV_SW:1499inst[1] |= IDIV;1500break;1501}1502#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)1503if (op <= SLJIT_DIVMOD_SW)1504EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);1505#else1506if (op >= SLJIT_DIV_UW)1507EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);1508#endif1509break;1510case SLJIT_MEMORY_BARRIER:1511inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);1512FAIL_IF(!inst);1513INC_SIZE(3);1514inst[0] = GROUP_0F;1515inst[1] = 0xae;1516inst[2] = 0xf0;1517return SLJIT_SUCCESS;1518case SLJIT_ENDBR:1519return emit_endbranch(compiler);1520case SLJIT_SKIP_FRAMES_BEFORE_RETURN:1521return skip_frames_before_return(compiler);1522}15231524return SLJIT_SUCCESS;1525}15261527static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,1528sljit_s32 dst, sljit_sw dstw,1529sljit_s32 src, sljit_sw srcw)1530{1531sljit_u8* inst;1532sljit_s32 dst_r;15331534#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1535compiler->mode32 = 0;1536#endif15371538if (src == SLJIT_IMM) {1539if (FAST_IS_REG(dst)) {1540#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1541return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);1542#else1543inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);1544FAIL_IF(!inst);1545*inst = MOV_rm_i32;1546return SLJIT_SUCCESS;1547#endif1548}1549inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);1550FAIL_IF(!inst);1551*inst = MOV_rm8_i8;1552return SLJIT_SUCCESS;1553}15541555dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;15561557if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {1558#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1559if (reg_map[src] >= 4) {1560SLJIT_ASSERT(dst_r == TMP_REG1);1561EMIT_MOV(compiler, TMP_REG1, 0, src, 0);1562} else1563dst_r = src;1564#else1565dst_r = src;1566#endif1567} else {1568#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1569if (FAST_IS_REG(src) && reg_map[src] >= 4) {1570/* Both src and dst are registers. */1571SLJIT_ASSERT(FAST_IS_REG(dst));15721573if (src == dst && !sign) {1574inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);1575FAIL_IF(!inst);1576*(inst + 1) |= AND;1577return SLJIT_SUCCESS;1578}15791580EMIT_MOV(compiler, TMP_REG1, 0, src, 0);1581src = TMP_REG1;1582srcw = 0;1583}1584#endif /* !SLJIT_CONFIG_X86_32 */15851586/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */1587FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm8 : MOVZX_r_rm8, dst_r, src, srcw));1588}15891590if (dst & SLJIT_MEM) {1591inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);1592FAIL_IF(!inst);1593*inst = MOV_rm8_r8;1594}15951596return SLJIT_SUCCESS;1597}15981599static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,1600sljit_s32 src, sljit_sw srcw)1601{1602sljit_u8* inst;16031604#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1605compiler->mode32 = 1;1606#endif16071608inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);1609FAIL_IF(!inst);1610inst[0] = GROUP_0F;1611inst[1] = PREFETCH;16121613if (op == SLJIT_PREFETCH_L1)1614inst[2] |= (1 << 3);1615else if (op == SLJIT_PREFETCH_L2)1616inst[2] |= (2 << 3);1617else if (op == SLJIT_PREFETCH_L3)1618inst[2] |= (3 << 3);16191620return SLJIT_SUCCESS;1621}16221623static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,1624sljit_s32 dst, sljit_sw dstw,1625sljit_s32 src, sljit_sw srcw)1626{1627sljit_u8* inst;1628sljit_s32 dst_r;16291630#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1631compiler->mode32 = 0;1632#endif16331634if (src == SLJIT_IMM) {1635if (FAST_IS_REG(dst)) {1636#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1637return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);1638#else1639inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);1640FAIL_IF(!inst);1641*inst = MOV_rm_i32;1642return SLJIT_SUCCESS;1643#endif1644}1645inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);1646FAIL_IF(!inst);1647*inst = MOV_rm_i32;1648return SLJIT_SUCCESS;1649}16501651dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;16521653if ((dst & SLJIT_MEM) && FAST_IS_REG(src))1654dst_r = src;1655else1656FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm16 : MOVZX_r_rm16, dst_r, src, srcw));16571658if (dst & SLJIT_MEM) {1659inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);1660FAIL_IF(!inst);1661*inst = MOV_rm_r;1662}16631664return SLJIT_SUCCESS;1665}16661667static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,1668sljit_s32 dst, sljit_sw dstw,1669sljit_s32 src, sljit_sw srcw)1670{1671sljit_u8* inst;16721673if (dst == src && dstw == srcw) {1674/* Same input and output */1675inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);1676FAIL_IF(!inst);1677inst[0] = GROUP_F7;1678inst[1] |= opcode;1679return SLJIT_SUCCESS;1680}16811682if (FAST_IS_REG(dst)) {1683EMIT_MOV(compiler, dst, 0, src, srcw);1684inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);1685FAIL_IF(!inst);1686inst[0] = GROUP_F7;1687inst[1] |= opcode;1688return SLJIT_SUCCESS;1689}16901691EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);1692inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);1693FAIL_IF(!inst);1694inst[0] = GROUP_F7;1695inst[1] |= opcode;1696EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);1697return SLJIT_SUCCESS;1698}16991700#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1701static const sljit_sw emit_clz_arg = 32 + 31;1702static const sljit_sw emit_ctz_arg = 32;1703#endif17041705static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,1706sljit_s32 dst, sljit_sw dstw,1707sljit_s32 src, sljit_sw srcw)1708{1709sljit_u8* inst;1710sljit_s32 dst_r;1711sljit_sw max;17121713SLJIT_ASSERT(cpu_feature_list != 0);17141715dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;17161717if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) {1718FAIL_IF(emit_groupf(compiler, (is_clz ? LZCNT_r_rm : TZCNT_r_rm) | EX86_PREF_F3, dst_r, src, srcw));17191720if (dst & SLJIT_MEM)1721EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);1722return SLJIT_SUCCESS;1723}17241725FAIL_IF(emit_groupf(compiler, is_clz ? BSR_r_rm : BSF_r_rm, dst_r, src, srcw));17261727#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1728max = is_clz ? (32 + 31) : 32;17291730if (cpu_feature_list & CPU_FEATURE_CMOV) {1731if (dst_r != TMP_REG1) {1732EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, max);1733inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);1734}1735else1736inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg);17371738FAIL_IF(!inst);1739inst[0] = GROUP_0F;1740inst[1] = CMOVE_r_rm;1741}1742else1743FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));17441745if (is_clz) {1746inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);1747FAIL_IF(!inst);1748*(inst + 1) |= XOR;1749}1750#else1751if (is_clz)1752max = compiler->mode32 ? (32 + 31) : (64 + 63);1753else1754max = compiler->mode32 ? 32 : 64;17551756if (cpu_feature_list & CPU_FEATURE_CMOV) {1757EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max);1758FAIL_IF(emit_groupf(compiler, CMOVE_r_rm, dst_r, TMP_REG2, 0));1759} else1760FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));17611762if (is_clz) {1763inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0);1764FAIL_IF(!inst);1765*(inst + 1) |= XOR;1766}1767#endif17681769if (dst & SLJIT_MEM)1770EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);1771return SLJIT_SUCCESS;1772}17731774static sljit_s32 emit_bswap(struct sljit_compiler *compiler,1775sljit_s32 op,1776sljit_s32 dst, sljit_sw dstw,1777sljit_s32 src, sljit_sw srcw)1778{1779sljit_u8 *inst;1780sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;1781sljit_uw size;1782#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1783sljit_u8 rex = 0;1784#else /* !SLJIT_CONFIG_X86_64 */1785sljit_s32 dst_is_ereg = op & SLJIT_32;1786#endif /* SLJIT_CONFIG_X86_64 */17871788#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1789if (op == SLJIT_REV_U32 || op == SLJIT_REV_S32)1790compiler->mode32 = 1;1791#else /* !SLJIT_CONFIG_X86_64 */1792op &= ~SLJIT_32;1793#endif /* SLJIT_CONFIG_X86_64 */17941795if (src != dst_r) {1796/* Only the lower 16 bit is read for eregs. */1797if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)1798FAIL_IF(emit_mov_half(compiler, 0, dst_r, 0, src, srcw));1799else1800EMIT_MOV(compiler, dst_r, 0, src, srcw);1801}18021803size = 2;1804#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1805if (!compiler->mode32)1806rex = REX_W;18071808if (reg_map[dst_r] >= 8)1809rex |= REX_B;18101811if (rex != 0)1812size++;1813#endif /* SLJIT_CONFIG_X86_64 */18141815inst = (sljit_u8*)ensure_buf(compiler, 1 + size);1816FAIL_IF(!inst);1817INC_SIZE(size);18181819#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1820if (rex != 0)1821*inst++ = rex;18221823inst[0] = GROUP_0F;1824inst[1] = BSWAP_r | reg_lmap[dst_r];1825#else /* !SLJIT_CONFIG_X86_64 */1826inst[0] = GROUP_0F;1827inst[1] = BSWAP_r | reg_map[dst_r];1828#endif /* SLJIT_CONFIG_X86_64 */18291830if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {1831#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1832size = compiler->mode32 ? 16 : 48;1833#else /* !SLJIT_CONFIG_X86_64 */1834size = 16;1835#endif /* SLJIT_CONFIG_X86_64 */18361837inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, (sljit_sw)size, dst_r, 0);1838FAIL_IF(!inst);1839if (op == SLJIT_REV_U16)1840inst[1] |= SHR;1841else1842inst[1] |= SAR;1843}18441845if (dst & SLJIT_MEM) {1846#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1847if (dst_is_ereg)1848op = SLJIT_REV;1849#endif /* SLJIT_CONFIG_X86_32 */1850if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)1851return emit_mov_half(compiler, 0, dst, dstw, TMP_REG1, 0);18521853return emit_mov(compiler, dst, dstw, TMP_REG1, 0);1854}18551856#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1857if (op == SLJIT_REV_S32) {1858compiler->mode32 = 0;1859inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);1860FAIL_IF(!inst);1861*inst = MOVSXD_r_rm;1862}1863#endif /* SLJIT_CONFIG_X86_64 */18641865return SLJIT_SUCCESS;1866}18671868SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,1869sljit_s32 dst, sljit_sw dstw,1870sljit_s32 src, sljit_sw srcw)1871{1872#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1873sljit_s32 dst_is_ereg = 0;1874#else /* !SLJIT_CONFIG_X86_32 */1875sljit_s32 op_flags = GET_ALL_FLAGS(op);1876#endif /* SLJIT_CONFIG_X86_32 */18771878CHECK_ERROR();1879CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));1880ADJUST_LOCAL_OFFSET(dst, dstw);1881ADJUST_LOCAL_OFFSET(src, srcw);18821883CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);1884CHECK_EXTRA_REGS(src, srcw, (void)0);1885#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1886compiler->mode32 = op_flags & SLJIT_32;1887#endif /* SLJIT_CONFIG_X86_64 */18881889op = GET_OPCODE(op);18901891if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {1892#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1893compiler->mode32 = 0;1894#endif /* SLJIT_CONFIG_X86_64 */18951896if (FAST_IS_REG(src) && src == dst) {1897if (!TYPE_CAST_NEEDED(op))1898return SLJIT_SUCCESS;1899}19001901#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1902if (op_flags & SLJIT_32) {1903if (src & SLJIT_MEM) {1904if (op == SLJIT_MOV_S32)1905op = SLJIT_MOV_U32;1906}1907else if (src == SLJIT_IMM) {1908if (op == SLJIT_MOV_U32)1909op = SLJIT_MOV_S32;1910}1911}1912#endif /* SLJIT_CONFIG_X86_64 */19131914if (src == SLJIT_IMM) {1915switch (op) {1916case SLJIT_MOV_U8:1917srcw = (sljit_u8)srcw;1918break;1919case SLJIT_MOV_S8:1920srcw = (sljit_s8)srcw;1921break;1922case SLJIT_MOV_U16:1923srcw = (sljit_u16)srcw;1924break;1925case SLJIT_MOV_S16:1926srcw = (sljit_s16)srcw;1927break;1928#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1929case SLJIT_MOV_U32:1930srcw = (sljit_u32)srcw;1931break;1932case SLJIT_MOV_S32:1933srcw = (sljit_s32)srcw;1934break;1935#endif /* SLJIT_CONFIG_X86_64 */1936}1937#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1938if (SLJIT_UNLIKELY(dst_is_ereg))1939return emit_mov(compiler, dst, dstw, src, srcw);1940#endif /* SLJIT_CONFIG_X86_32 */1941}19421943#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1944if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {1945SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));1946dst = TMP_REG1;1947}1948#endif /* SLJIT_CONFIG_X86_32 */19491950switch (op) {1951case SLJIT_MOV:1952case SLJIT_MOV_P:1953#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1954case SLJIT_MOV_U32:1955case SLJIT_MOV_S32:1956case SLJIT_MOV32:1957#endif /* SLJIT_CONFIG_X86_32 */1958EMIT_MOV(compiler, dst, dstw, src, srcw);1959break;1960case SLJIT_MOV_U8:1961FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));1962break;1963case SLJIT_MOV_S8:1964FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));1965break;1966case SLJIT_MOV_U16:1967FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));1968break;1969case SLJIT_MOV_S16:1970FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));1971break;1972#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)1973case SLJIT_MOV_U32:1974FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));1975break;1976case SLJIT_MOV_S32:1977FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));1978break;1979case SLJIT_MOV32:1980compiler->mode32 = 1;1981EMIT_MOV(compiler, dst, dstw, src, srcw);1982compiler->mode32 = 0;1983break;1984#endif /* SLJIT_CONFIG_X86_64 */1985}19861987#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)1988if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)1989return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);1990#endif /* SLJIT_CONFIG_X86_32 */1991return SLJIT_SUCCESS;1992}19931994switch (op) {1995case SLJIT_CLZ:1996case SLJIT_CTZ:1997return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw);1998case SLJIT_REV:1999case SLJIT_REV_U16:2000case SLJIT_REV_S16:2001case SLJIT_REV_U32:2002case SLJIT_REV_S32:2003#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2004if (dst_is_ereg)2005op |= SLJIT_32;2006#endif /* SLJIT_CONFIG_X86_32 */2007return emit_bswap(compiler, op, dst, dstw, src, srcw);2008}20092010return SLJIT_SUCCESS;2011}20122013static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,2014sljit_u32 op_types,2015sljit_s32 dst, sljit_sw dstw,2016sljit_s32 src1, sljit_sw src1w,2017sljit_s32 src2, sljit_sw src2w)2018{2019sljit_u8* inst;2020sljit_u8 op_eax_imm = U8(op_types >> 24);2021sljit_u8 op_rm = U8((op_types >> 16) & 0xff);2022sljit_u8 op_mr = U8((op_types >> 8) & 0xff);2023sljit_u8 op_imm = U8(op_types & 0xff);20242025if (dst == src1 && dstw == src1w) {2026if (src2 == SLJIT_IMM) {2027#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2028if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {2029#else2030if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {2031#endif2032BINARY_EAX_IMM(op_eax_imm, src2w);2033}2034else {2035BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);2036}2037}2038else if (FAST_IS_REG(dst)) {2039inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);2040FAIL_IF(!inst);2041*inst = op_rm;2042}2043else if (FAST_IS_REG(src2)) {2044/* Special exception for sljit_emit_op_flags. */2045inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);2046FAIL_IF(!inst);2047*inst = op_mr;2048}2049else {2050EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);2051inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);2052FAIL_IF(!inst);2053*inst = op_mr;2054}2055return SLJIT_SUCCESS;2056}20572058/* Only for cumulative operations. */2059if (dst == src2 && dstw == src2w) {2060if (src1 == SLJIT_IMM) {2061#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2062if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {2063#else2064if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {2065#endif2066BINARY_EAX_IMM(op_eax_imm, src1w);2067}2068else {2069BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);2070}2071}2072else if (FAST_IS_REG(dst)) {2073inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);2074FAIL_IF(!inst);2075*inst = op_rm;2076}2077else if (FAST_IS_REG(src1)) {2078inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);2079FAIL_IF(!inst);2080*inst = op_mr;2081}2082else {2083EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2084inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);2085FAIL_IF(!inst);2086*inst = op_mr;2087}2088return SLJIT_SUCCESS;2089}20902091/* General version. */2092if (FAST_IS_REG(dst)) {2093EMIT_MOV(compiler, dst, 0, src1, src1w);2094if (src2 == SLJIT_IMM) {2095BINARY_IMM(op_imm, op_mr, src2w, dst, 0);2096}2097else {2098inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);2099FAIL_IF(!inst);2100*inst = op_rm;2101}2102}2103else {2104/* This version requires less memory writing. */2105EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2106if (src2 == SLJIT_IMM) {2107BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);2108}2109else {2110inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);2111FAIL_IF(!inst);2112*inst = op_rm;2113}2114EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);2115}21162117return SLJIT_SUCCESS;2118}21192120static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,2121sljit_u32 op_types,2122sljit_s32 dst, sljit_sw dstw,2123sljit_s32 src1, sljit_sw src1w,2124sljit_s32 src2, sljit_sw src2w)2125{2126sljit_u8* inst;2127sljit_u8 op_eax_imm = U8(op_types >> 24);2128sljit_u8 op_rm = U8((op_types >> 16) & 0xff);2129sljit_u8 op_mr = U8((op_types >> 8) & 0xff);2130sljit_u8 op_imm = U8(op_types & 0xff);21312132if (dst == src1 && dstw == src1w) {2133if (src2 == SLJIT_IMM) {2134#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2135if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {2136#else2137if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {2138#endif2139BINARY_EAX_IMM(op_eax_imm, src2w);2140}2141else {2142BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);2143}2144}2145else if (FAST_IS_REG(dst)) {2146inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);2147FAIL_IF(!inst);2148*inst = op_rm;2149}2150else if (FAST_IS_REG(src2)) {2151inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);2152FAIL_IF(!inst);2153*inst = op_mr;2154}2155else {2156EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);2157inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);2158FAIL_IF(!inst);2159*inst = op_mr;2160}2161return SLJIT_SUCCESS;2162}21632164/* General version. */2165if (FAST_IS_REG(dst) && dst != src2) {2166EMIT_MOV(compiler, dst, 0, src1, src1w);2167if (src2 == SLJIT_IMM) {2168BINARY_IMM(op_imm, op_mr, src2w, dst, 0);2169}2170else {2171inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);2172FAIL_IF(!inst);2173*inst = op_rm;2174}2175}2176else {2177/* This version requires less memory writing. */2178EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2179if (src2 == SLJIT_IMM) {2180BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);2181}2182else {2183inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);2184FAIL_IF(!inst);2185*inst = op_rm;2186}2187EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);2188}21892190return SLJIT_SUCCESS;2191}21922193static sljit_s32 emit_mul(struct sljit_compiler *compiler,2194sljit_s32 dst, sljit_sw dstw,2195sljit_s32 src1, sljit_sw src1w,2196sljit_s32 src2, sljit_sw src2w)2197{2198sljit_u8* inst;2199sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;22002201/* Register destination. */2202if (dst_r == src1 && src2 != SLJIT_IMM) {2203FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));2204} else if (dst_r == src2 && src1 != SLJIT_IMM) {2205FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src1, src1w));2206} else if (src1 == SLJIT_IMM) {2207if (src2 == SLJIT_IMM) {2208EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);2209src2 = dst_r;2210src2w = 0;2211}22122213if (src1w <= 127 && src1w >= -128) {2214inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);2215FAIL_IF(!inst);2216*inst = IMUL_r_rm_i8;22172218FAIL_IF(emit_byte(compiler, U8(src1w)));2219}2220#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2221else {2222inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);2223FAIL_IF(!inst);2224*inst = IMUL_r_rm_i32;2225inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);2226FAIL_IF(!inst);2227INC_SIZE(4);2228sljit_unaligned_store_sw(inst, src1w);2229}2230#else2231else if (IS_HALFWORD(src1w)) {2232inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);2233FAIL_IF(!inst);2234*inst = IMUL_r_rm_i32;2235inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);2236FAIL_IF(!inst);2237INC_SIZE(4);2238sljit_unaligned_store_s32(inst, (sljit_s32)src1w);2239}2240else {2241if (dst_r != src2)2242EMIT_MOV(compiler, dst_r, 0, src2, src2w);2243FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));2244FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));2245}2246#endif2247}2248else if (src2 == SLJIT_IMM) {2249/* Note: src1 is NOT immediate. */22502251if (src2w <= 127 && src2w >= -128) {2252inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);2253FAIL_IF(!inst);2254*inst = IMUL_r_rm_i8;22552256FAIL_IF(emit_byte(compiler, U8(src2w)));2257}2258#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2259else {2260inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);2261FAIL_IF(!inst);2262*inst = IMUL_r_rm_i32;22632264inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);2265FAIL_IF(!inst);2266INC_SIZE(4);2267sljit_unaligned_store_sw(inst, src2w);2268}2269#else2270else if (IS_HALFWORD(src2w)) {2271inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);2272FAIL_IF(!inst);2273*inst = IMUL_r_rm_i32;22742275inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);2276FAIL_IF(!inst);2277INC_SIZE(4);2278sljit_unaligned_store_s32(inst, (sljit_s32)src2w);2279} else {2280if (dst_r != src1)2281EMIT_MOV(compiler, dst_r, 0, src1, src1w);2282FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));2283FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));2284}2285#endif2286} else {2287/* Neither argument is immediate. */2288if (ADDRESSING_DEPENDS_ON(src2, dst_r))2289dst_r = TMP_REG1;2290EMIT_MOV(compiler, dst_r, 0, src1, src1w);2291FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));2292}22932294if (dst & SLJIT_MEM)2295EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);22962297return SLJIT_SUCCESS;2298}22992300static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,2301sljit_s32 dst, sljit_sw dstw,2302sljit_s32 src1, sljit_sw src1w,2303sljit_s32 src2, sljit_sw src2w)2304{2305sljit_u8* inst;2306sljit_s32 dst_r, done = 0;23072308/* These cases better be left to handled by normal way. */2309if (dst == src1 && dstw == src1w)2310return SLJIT_ERR_UNSUPPORTED;2311if (dst == src2 && dstw == src2w)2312return SLJIT_ERR_UNSUPPORTED;23132314dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;23152316if (FAST_IS_REG(src1)) {2317if (FAST_IS_REG(src2)) {2318inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);2319FAIL_IF(!inst);2320*inst = LEA_r_m;2321done = 1;2322}2323#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2324if (src2 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src2w))) {2325inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);2326#else2327if (src2 == SLJIT_IMM) {2328inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);2329#endif2330FAIL_IF(!inst);2331*inst = LEA_r_m;2332done = 1;2333}2334}2335else if (FAST_IS_REG(src2)) {2336#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2337if (src1 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src1w))) {2338inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);2339#else2340if (src1 == SLJIT_IMM) {2341inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);2342#endif2343FAIL_IF(!inst);2344*inst = LEA_r_m;2345done = 1;2346}2347}23482349if (done) {2350if (dst_r == TMP_REG1)2351return emit_mov(compiler, dst, dstw, TMP_REG1, 0);2352return SLJIT_SUCCESS;2353}2354return SLJIT_ERR_UNSUPPORTED;2355}23562357static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,2358sljit_s32 src1, sljit_sw src1w,2359sljit_s32 src2, sljit_sw src2w)2360{2361sljit_u8* inst;23622363#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2364if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {2365#else2366if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {2367#endif2368BINARY_EAX_IMM(CMP_EAX_i32, src2w);2369return SLJIT_SUCCESS;2370}23712372if (FAST_IS_REG(src1)) {2373if (src2 == SLJIT_IMM) {2374BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);2375}2376else {2377inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);2378FAIL_IF(!inst);2379*inst = CMP_r_rm;2380}2381return SLJIT_SUCCESS;2382}23832384if (FAST_IS_REG(src2) && src1 != SLJIT_IMM) {2385inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);2386FAIL_IF(!inst);2387*inst = CMP_rm_r;2388return SLJIT_SUCCESS;2389}23902391if (src2 == SLJIT_IMM) {2392if (src1 == SLJIT_IMM) {2393EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2394src1 = TMP_REG1;2395src1w = 0;2396}2397BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);2398}2399else {2400EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2401inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);2402FAIL_IF(!inst);2403*inst = CMP_r_rm;2404}2405return SLJIT_SUCCESS;2406}24072408static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,2409sljit_s32 src1, sljit_sw src1w,2410sljit_s32 src2, sljit_sw src2w)2411{2412sljit_u8* inst;24132414#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2415if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {2416#else2417if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {2418#endif2419BINARY_EAX_IMM(TEST_EAX_i32, src2w);2420return SLJIT_SUCCESS;2421}24222423#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2424if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {2425#else2426if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128)) {2427#endif2428BINARY_EAX_IMM(TEST_EAX_i32, src1w);2429return SLJIT_SUCCESS;2430}24312432if (src1 != SLJIT_IMM) {2433if (src2 == SLJIT_IMM) {2434#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2435if (IS_HALFWORD(src2w) || compiler->mode32) {2436inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);2437FAIL_IF(!inst);2438*inst = GROUP_F7;2439} else {2440FAIL_IF(emit_load_imm64(compiler, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, src2w));2441inst = emit_x86_instruction(compiler, 1, FAST_IS_REG(src1) ? TMP_REG2 : TMP_REG1, 0, src1, src1w);2442FAIL_IF(!inst);2443*inst = TEST_rm_r;2444}2445#else2446inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);2447FAIL_IF(!inst);2448*inst = GROUP_F7;2449#endif2450return SLJIT_SUCCESS;2451}2452else if (FAST_IS_REG(src1)) {2453inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);2454FAIL_IF(!inst);2455*inst = TEST_rm_r;2456return SLJIT_SUCCESS;2457}2458}24592460if (src2 != SLJIT_IMM) {2461if (src1 == SLJIT_IMM) {2462#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2463if (IS_HALFWORD(src1w) || compiler->mode32) {2464inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);2465FAIL_IF(!inst);2466*inst = GROUP_F7;2467}2468else {2469FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));2470inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);2471FAIL_IF(!inst);2472*inst = TEST_rm_r;2473}2474#else2475inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);2476FAIL_IF(!inst);2477*inst = GROUP_F7;2478#endif2479return SLJIT_SUCCESS;2480}2481else if (FAST_IS_REG(src2)) {2482inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);2483FAIL_IF(!inst);2484*inst = TEST_rm_r;2485return SLJIT_SUCCESS;2486}2487}24882489EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2490if (src2 == SLJIT_IMM) {2491#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2492if (IS_HALFWORD(src2w) || compiler->mode32) {2493inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);2494FAIL_IF(!inst);2495*inst = GROUP_F7;2496}2497else {2498FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));2499inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);2500FAIL_IF(!inst);2501*inst = TEST_rm_r;2502}2503#else2504inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);2505FAIL_IF(!inst);2506*inst = GROUP_F7;2507#endif2508}2509else {2510inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);2511FAIL_IF(!inst);2512*inst = TEST_rm_r;2513}2514return SLJIT_SUCCESS;2515}25162517static sljit_s32 emit_shift(struct sljit_compiler *compiler,2518sljit_u8 mode,2519sljit_s32 dst, sljit_sw dstw,2520sljit_s32 src1, sljit_sw src1w,2521sljit_s32 src2, sljit_sw src2w)2522{2523#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2524sljit_s32 mode32;2525#endif2526sljit_u8* inst;25272528if (src2 == SLJIT_IMM || src2 == SLJIT_PREF_SHIFT_REG) {2529if (dst == src1 && dstw == src1w) {2530inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);2531FAIL_IF(!inst);2532inst[1] |= mode;2533return SLJIT_SUCCESS;2534}2535if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {2536EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2537inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);2538FAIL_IF(!inst);2539inst[1] |= mode;2540EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);2541return SLJIT_SUCCESS;2542}2543if (FAST_IS_REG(dst)) {2544EMIT_MOV(compiler, dst, 0, src1, src1w);2545inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);2546FAIL_IF(!inst);2547inst[1] |= mode;2548return SLJIT_SUCCESS;2549}25502551EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2552inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);2553FAIL_IF(!inst);2554inst[1] |= mode;2555EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);2556return SLJIT_SUCCESS;2557}25582559if (dst == SLJIT_PREF_SHIFT_REG) {2560EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2561EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);2562inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);2563FAIL_IF(!inst);2564inst[1] |= mode;2565return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);2566}25672568if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) {2569if (src1 != dst)2570EMIT_MOV(compiler, dst, 0, src1, src1w);2571#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2572mode32 = compiler->mode32;2573compiler->mode32 = 0;2574#endif2575EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);2576#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2577compiler->mode32 = mode32;2578#endif2579EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);2580inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);2581FAIL_IF(!inst);2582inst[1] |= mode;2583#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2584compiler->mode32 = 0;2585#endif2586EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);2587#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2588compiler->mode32 = mode32;2589#endif2590return SLJIT_SUCCESS;2591}25922593/* This case is complex since ecx itself may be used for2594addressing, and this case must be supported as well. */2595EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);2596#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2597EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);2598#else /* !SLJIT_CONFIG_X86_32 */2599mode32 = compiler->mode32;2600compiler->mode32 = 0;2601EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);2602compiler->mode32 = mode32;2603#endif /* SLJIT_CONFIG_X86_32 */26042605EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);2606inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);2607FAIL_IF(!inst);2608inst[1] |= mode;26092610#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2611EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);2612#else2613compiler->mode32 = 0;2614EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);2615compiler->mode32 = mode32;2616#endif /* SLJIT_CONFIG_X86_32 */26172618if (dst != TMP_REG1)2619return emit_mov(compiler, dst, dstw, TMP_REG1, 0);26202621return SLJIT_SUCCESS;2622}26232624static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,2625sljit_u8 mode, sljit_s32 set_flags,2626sljit_s32 dst, sljit_sw dstw,2627sljit_s32 src1, sljit_sw src1w,2628sljit_s32 src2, sljit_sw src2w)2629{2630/* The CPU does not set flags if the shift count is 0. */2631if (src2 == SLJIT_IMM) {2632#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2633src2w &= compiler->mode32 ? 0x1f : 0x3f;2634#else /* !SLJIT_CONFIG_X86_64 */2635src2w &= 0x1f;2636#endif /* SLJIT_CONFIG_X86_64 */2637if (src2w != 0)2638return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);26392640if (!set_flags)2641return emit_mov(compiler, dst, dstw, src1, src1w);2642/* OR dst, src, 0 */2643return emit_cum_binary(compiler, BINARY_OPCODE(OR),2644dst, dstw, src1, src1w, SLJIT_IMM, 0);2645}26462647if (!set_flags)2648return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);26492650if (!FAST_IS_REG(dst))2651FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));26522653FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));26542655if (FAST_IS_REG(dst))2656return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);2657return SLJIT_SUCCESS;2658}26592660SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,2661sljit_s32 dst, sljit_sw dstw,2662sljit_s32 src1, sljit_sw src1w,2663sljit_s32 src2, sljit_sw src2w)2664{2665CHECK_ERROR();2666CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));2667ADJUST_LOCAL_OFFSET(dst, dstw);2668ADJUST_LOCAL_OFFSET(src1, src1w);2669ADJUST_LOCAL_OFFSET(src2, src2w);26702671CHECK_EXTRA_REGS(dst, dstw, (void)0);2672CHECK_EXTRA_REGS(src1, src1w, (void)0);2673CHECK_EXTRA_REGS(src2, src2w, (void)0);2674#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2675compiler->mode32 = op & SLJIT_32;2676#endif26772678switch (GET_OPCODE(op)) {2679case SLJIT_ADD:2680if (!HAS_FLAGS(op)) {2681if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)2682return compiler->error;2683}2684return emit_cum_binary(compiler, BINARY_OPCODE(ADD),2685dst, dstw, src1, src1w, src2, src2w);2686case SLJIT_ADDC:2687return emit_cum_binary(compiler, BINARY_OPCODE(ADC),2688dst, dstw, src1, src1w, src2, src2w);2689case SLJIT_SUB:2690if (src1 == SLJIT_IMM && src1w == 0)2691return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w);26922693if (!HAS_FLAGS(op)) {2694if (src2 == SLJIT_IMM && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)2695return compiler->error;2696if (FAST_IS_REG(dst) && src2 == dst) {2697FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w));2698return emit_unary(compiler, NEG_rm, dst, 0, dst, 0);2699}2700}27012702return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),2703dst, dstw, src1, src1w, src2, src2w);2704case SLJIT_SUBC:2705return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),2706dst, dstw, src1, src1w, src2, src2w);2707case SLJIT_MUL:2708return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);2709case SLJIT_AND:2710return emit_cum_binary(compiler, BINARY_OPCODE(AND),2711dst, dstw, src1, src1w, src2, src2w);2712case SLJIT_OR:2713return emit_cum_binary(compiler, BINARY_OPCODE(OR),2714dst, dstw, src1, src1w, src2, src2w);2715case SLJIT_XOR:2716if (!HAS_FLAGS(op)) {2717if (src2 == SLJIT_IMM && src2w == -1)2718return emit_unary(compiler, NOT_rm, dst, dstw, src1, src1w);2719if (src1 == SLJIT_IMM && src1w == -1)2720return emit_unary(compiler, NOT_rm, dst, dstw, src2, src2w);2721}27222723return emit_cum_binary(compiler, BINARY_OPCODE(XOR),2724dst, dstw, src1, src1w, src2, src2w);2725case SLJIT_SHL:2726case SLJIT_MSHL:2727return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),2728dst, dstw, src1, src1w, src2, src2w);2729case SLJIT_LSHR:2730case SLJIT_MLSHR:2731return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),2732dst, dstw, src1, src1w, src2, src2w);2733case SLJIT_ASHR:2734case SLJIT_MASHR:2735return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),2736dst, dstw, src1, src1w, src2, src2w);2737case SLJIT_ROTL:2738return emit_shift_with_flags(compiler, ROL, 0,2739dst, dstw, src1, src1w, src2, src2w);2740case SLJIT_ROTR:2741return emit_shift_with_flags(compiler, ROR, 0,2742dst, dstw, src1, src1w, src2, src2w);2743}27442745return SLJIT_SUCCESS;2746}27472748SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,2749sljit_s32 src1, sljit_sw src1w,2750sljit_s32 src2, sljit_sw src2w)2751{2752sljit_s32 opcode = GET_OPCODE(op);27532754CHECK_ERROR();2755CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));27562757if (opcode != SLJIT_SUB && opcode != SLJIT_AND) {2758SLJIT_SKIP_CHECKS(compiler);2759return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);2760}27612762ADJUST_LOCAL_OFFSET(src1, src1w);2763ADJUST_LOCAL_OFFSET(src2, src2w);27642765CHECK_EXTRA_REGS(src1, src1w, (void)0);2766CHECK_EXTRA_REGS(src2, src2w, (void)0);2767#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2768compiler->mode32 = op & SLJIT_32;2769#endif27702771if (opcode == SLJIT_SUB)2772return emit_cmp_binary(compiler, src1, src1w, src2, src2w);27732774return emit_test_binary(compiler, src1, src1w, src2, src2w);2775}27762777SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,2778sljit_s32 dst_reg,2779sljit_s32 src1, sljit_sw src1w,2780sljit_s32 src2, sljit_sw src2w)2781{2782sljit_u8* inst;2783sljit_sw dstw = 0;27842785CHECK_ERROR();2786CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));2787ADJUST_LOCAL_OFFSET(src1, src1w);2788ADJUST_LOCAL_OFFSET(src2, src2w);27892790CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);2791CHECK_EXTRA_REGS(src1, src1w, (void)0);2792CHECK_EXTRA_REGS(src2, src2w, (void)0);2793#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2794compiler->mode32 = op & SLJIT_32;2795#endif27962797switch (GET_OPCODE(op)) {2798case SLJIT_MULADD:2799FAIL_IF(emit_mul(compiler, TMP_REG1, 0, src1, src1w, src2, src2w));2800inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst_reg, dstw);2801FAIL_IF(!inst);2802*inst = ADD_rm_r;2803return SLJIT_SUCCESS;2804}28052806return SLJIT_SUCCESS;2807}28082809SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,2810sljit_s32 dst_reg,2811sljit_s32 src1_reg,2812sljit_s32 src2_reg,2813sljit_s32 src3, sljit_sw src3w)2814{2815sljit_s32 is_rotate, is_left, move_src1;2816sljit_u8* inst;2817sljit_sw src1w = 0;2818sljit_sw dstw = 0;2819/* The whole register must be saved even for 32 bit operations. */2820sljit_u8 restore_ecx = 0;2821#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2822sljit_sw src2w = 0;2823sljit_s32 restore_sp4 = 0;2824#endif /* SLJIT_CONFIG_X86_32 */28252826CHECK_ERROR();2827CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));2828ADJUST_LOCAL_OFFSET(src3, src3w);28292830CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);2831CHECK_EXTRA_REGS(src3, src3w, (void)0);28322833#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2834compiler->mode32 = op & SLJIT_32;2835#endif /* SLJIT_CONFIG_X86_64 */28362837if (src3 == SLJIT_IMM) {2838#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2839src3w &= 0x1f;2840#else /* !SLJIT_CONFIG_X86_32 */2841src3w &= (op & SLJIT_32) ? 0x1f : 0x3f;2842#endif /* SLJIT_CONFIG_X86_32 */28432844if (src3w == 0)2845return SLJIT_SUCCESS;2846}28472848is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);28492850is_rotate = (src1_reg == src2_reg);2851CHECK_EXTRA_REGS(src1_reg, src1w, (void)0);2852CHECK_EXTRA_REGS(src2_reg, src2w, (void)0);28532854if (is_rotate)2855return emit_shift(compiler, is_left ? ROL : ROR, dst_reg, dstw, src1_reg, src1w, src3, src3w);28562857#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)2858if (src2_reg & SLJIT_MEM) {2859EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w);2860src2_reg = TMP_REG1;2861}2862#endif /* SLJIT_CONFIG_X86_32 */28632864if (dst_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && (src3 != SLJIT_PREF_SHIFT_REG || src1_reg != SLJIT_PREF_SHIFT_REG)) {2865#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2866EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);2867src1_reg = TMP_REG1;2868src1w = 0;2869#else /* !SLJIT_CONFIG_X86_64 */2870if (src2_reg != TMP_REG1) {2871EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);2872src1_reg = TMP_REG1;2873src1w = 0;2874} else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {2875restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;2876EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);2877EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);2878src1_reg = restore_sp4;2879src1w = 0;2880} else {2881EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);2882restore_sp4 = src1_reg;2883}2884#endif /* SLJIT_CONFIG_X86_64 */28852886if (src3 != SLJIT_PREF_SHIFT_REG)2887EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);2888} else {2889if (src2_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {2890#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2891compiler->mode32 = 0;2892#endif /* SLJIT_CONFIG_X86_64 */2893EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);2894#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2895compiler->mode32 = op & SLJIT_32;2896#endif /* SLJIT_CONFIG_X86_64 */2897src2_reg = TMP_REG1;2898restore_ecx = 1;2899}29002901move_src1 = 0;2902#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2903if (dst_reg != src1_reg) {2904if (dst_reg != src3) {2905EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);2906src1_reg = dst_reg;2907src1w = 0;2908} else2909move_src1 = 1;2910}2911#else /* !SLJIT_CONFIG_X86_64 */2912if (dst_reg & SLJIT_MEM) {2913if (src2_reg != TMP_REG1) {2914EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);2915src1_reg = TMP_REG1;2916src1w = 0;2917} else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {2918restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;2919EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);2920EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);2921src1_reg = restore_sp4;2922src1w = 0;2923} else {2924EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);2925restore_sp4 = src1_reg;2926}2927} else if (dst_reg != src1_reg) {2928if (dst_reg != src3) {2929EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);2930src1_reg = dst_reg;2931src1w = 0;2932} else2933move_src1 = 1;2934}2935#endif /* SLJIT_CONFIG_X86_64 */29362937if (src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {2938if (!restore_ecx) {2939#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2940compiler->mode32 = 0;2941EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);2942compiler->mode32 = op & SLJIT_32;2943restore_ecx = 1;2944#else /* !SLJIT_CONFIG_X86_64 */2945if (src1_reg != TMP_REG1 && src2_reg != TMP_REG1) {2946EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);2947restore_ecx = 1;2948} else {2949EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);2950restore_ecx = 2;2951}2952#endif /* SLJIT_CONFIG_X86_64 */2953}2954EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);2955}29562957if (move_src1) {2958EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);2959src1_reg = dst_reg;2960src1w = 0;2961}2962}29632964inst = emit_x86_instruction(compiler, 2, src2_reg, 0, src1_reg, src1w);2965FAIL_IF(!inst);2966inst[0] = GROUP_0F;29672968if (src3 == SLJIT_IMM) {2969inst[1] = U8((is_left ? SHLD : SHRD) - 1);29702971/* Immediate argument is added separately. */2972FAIL_IF(emit_byte(compiler, U8(src3w)));2973} else2974inst[1] = U8(is_left ? SHLD : SHRD);29752976#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)2977if (restore_ecx) {2978compiler->mode32 = 0;2979EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);2980}29812982if (src1_reg != dst_reg) {2983compiler->mode32 = op & SLJIT_32;2984return emit_mov(compiler, dst_reg, dstw, src1_reg, 0);2985}2986#else /* !SLJIT_CONFIG_X86_64 */2987if (restore_ecx)2988EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, restore_ecx == 1 ? TMP_REG1 : SLJIT_MEM1(SLJIT_SP), 0);29892990if (src1_reg != dst_reg)2991EMIT_MOV(compiler, dst_reg, dstw, src1_reg, 0);29922993if (restore_sp4)2994return emit_mov(compiler, restore_sp4, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32));2995#endif /* SLJIT_CONFIG_X86_32 */29962997return SLJIT_SUCCESS;2998}29993000SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,3001sljit_s32 src, sljit_sw srcw)3002{3003CHECK_ERROR();3004CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));3005ADJUST_LOCAL_OFFSET(src, srcw);30063007CHECK_EXTRA_REGS(src, srcw, (void)0);30083009switch (op) {3010case SLJIT_FAST_RETURN:3011return emit_fast_return(compiler, src, srcw);3012case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:3013/* Don't adjust shadow stack if it isn't enabled. */3014if (!cpu_has_shadow_stack ())3015return SLJIT_SUCCESS;3016return adjust_shadow_stack(compiler, src, srcw);3017case SLJIT_PREFETCH_L1:3018case SLJIT_PREFETCH_L2:3019case SLJIT_PREFETCH_L3:3020case SLJIT_PREFETCH_ONCE:3021return emit_prefetch(compiler, op, src, srcw);3022}30233024return SLJIT_SUCCESS;3025}30263027SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,3028sljit_s32 dst, sljit_sw dstw)3029{3030CHECK_ERROR();3031CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));3032ADJUST_LOCAL_OFFSET(dst, dstw);30333034CHECK_EXTRA_REGS(dst, dstw, (void)0);30353036switch (op) {3037case SLJIT_FAST_ENTER:3038return emit_fast_enter(compiler, dst, dstw);3039case SLJIT_GET_RETURN_ADDRESS:3040return sljit_emit_get_return_address(compiler, dst, dstw);3041}30423043return SLJIT_SUCCESS;3044}30453046SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)3047{3048CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));30493050if (type == SLJIT_GP_REGISTER) {3051#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)3052if (reg >= SLJIT_R3 && reg <= SLJIT_R8)3053return -1;3054#endif /* SLJIT_CONFIG_X86_32 */3055return reg_map[reg];3056}30573058if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256 && type != SLJIT_SIMD_REG_512)3059return -1;30603061return freg_map[reg];3062}30633064SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,3065void *instruction, sljit_u32 size)3066{3067sljit_u8 *inst;30683069CHECK_ERROR();3070CHECK(check_sljit_emit_op_custom(compiler, instruction, size));30713072inst = (sljit_u8*)ensure_buf(compiler, 1 + size);3073FAIL_IF(!inst);3074INC_SIZE(size);3075SLJIT_MEMCPY(inst, instruction, size);3076return SLJIT_SUCCESS;3077}30783079/* --------------------------------------------------------------------- */3080/* Floating point operators */3081/* --------------------------------------------------------------------- */30823083/* Alignment(3) + 4 * 16 bytes. */3084static sljit_u32 sse2_data[3 + (4 * 4)];3085static sljit_u32 *sse2_buffer;30863087static void init_compiler(void)3088{3089get_cpu_features();30903091/* Align to 16 bytes. */3092sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf);30933094/* Single precision constants (each constant is 16 byte long). */3095sse2_buffer[0] = 0x80000000;3096sse2_buffer[4] = 0x7fffffff;3097/* Double precision constants (each constant is 16 byte long). */3098sse2_buffer[8] = 0;3099sse2_buffer[9] = 0x80000000;3100sse2_buffer[12] = 0xffffffff;3101sse2_buffer[13] = 0x7fffffff;3102}31033104static sljit_s32 emit_groupf(struct sljit_compiler *compiler,3105sljit_uw op,3106sljit_s32 dst, sljit_s32 src, sljit_sw srcw)3107{3108sljit_u8 *inst = emit_x86_instruction(compiler, 2 | (op & ~(sljit_uw)0xff), dst, 0, src, srcw);3109FAIL_IF(!inst);3110inst[0] = GROUP_0F;3111inst[1] = op & 0xff;3112return SLJIT_SUCCESS;3113}31143115static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,3116sljit_uw op,3117sljit_s32 dst, sljit_s32 src, sljit_sw srcw)3118{3119sljit_u8 *inst;31203121SLJIT_ASSERT((op & EX86_SSE2) && ((op & VEX_OP_0F38) || (op & VEX_OP_0F3A)));31223123inst = emit_x86_instruction(compiler, 3 | (op & ~((sljit_uw)0xff | VEX_OP_0F38 | VEX_OP_0F3A)), dst, 0, src, srcw);3124FAIL_IF(!inst);3125inst[0] = GROUP_0F;3126inst[1] = U8((op & VEX_OP_0F38) ? 0x38 : 0x3A);3127inst[2] = op & 0xff;3128return SLJIT_SUCCESS;3129}31303131static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,3132sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)3133{3134return emit_groupf(compiler, MOVSD_x_xm | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, dst, src, srcw);3135}31363137static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,3138sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)3139{3140return emit_groupf(compiler, MOVSD_xm_x | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, src, dst, dstw);3141}31423143static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,3144sljit_s32 dst, sljit_sw dstw,3145sljit_s32 src, sljit_sw srcw)3146{3147sljit_s32 dst_r;31483149CHECK_EXTRA_REGS(dst, dstw, (void)0);3150dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;31513152#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3153if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)3154compiler->mode32 = 0;3155#endif31563157FAIL_IF(emit_groupf(compiler, CVTTSD2SI_r_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP2, dst_r, src, srcw));31583159if (dst & SLJIT_MEM)3160return emit_mov(compiler, dst, dstw, TMP_REG1, 0);3161return SLJIT_SUCCESS;3162}31633164static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,3165sljit_s32 dst, sljit_sw dstw,3166sljit_s32 src, sljit_sw srcw)3167{3168sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;31693170CHECK_EXTRA_REGS(src, srcw, (void)0);31713172#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3173if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)3174compiler->mode32 = 0;3175#endif31763177if (src == SLJIT_IMM) {3178#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3179if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)3180srcw = (sljit_s32)srcw;3181#endif3182EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);3183src = TMP_REG1;3184srcw = 0;3185}31863187FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, srcw));31883189#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3190compiler->mode32 = 1;3191#endif3192if (dst_r == TMP_FREG)3193return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);3194return SLJIT_SUCCESS;3195}31963197static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,3198sljit_s32 src1, sljit_sw src1w,3199sljit_s32 src2, sljit_sw src2w)3200{3201switch (GET_FLAG_TYPE(op)) {3202case SLJIT_ORDERED_EQUAL:3203/* Also: SLJIT_UNORDERED_OR_NOT_EQUAL */3204FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));3205FAIL_IF(emit_groupf(compiler, CMPS_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, TMP_FREG, src2, src2w));32063207/* EQ */3208FAIL_IF(emit_byte(compiler, 0));32093210src1 = TMP_FREG;3211src2 = TMP_FREG;3212src2w = 0;3213break;32143215case SLJIT_ORDERED_LESS:3216case SLJIT_UNORDERED_OR_GREATER:3217/* Also: SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_ORDERED_LESS_EQUAL */3218if (!FAST_IS_REG(src2)) {3219FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));3220src2 = TMP_FREG;3221}32223223return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src2, src1, src1w);3224}32253226if (!FAST_IS_REG(src1)) {3227FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));3228src1 = TMP_FREG;3229}32303231return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src1, src2, src2w);3232}32333234SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,3235sljit_s32 dst, sljit_sw dstw,3236sljit_s32 src, sljit_sw srcw)3237{3238sljit_s32 dst_r;3239sljit_u8 *inst;32403241#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3242compiler->mode32 = 1;3243#endif32443245CHECK_ERROR();3246SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);32473248if (GET_OPCODE(op) == SLJIT_MOV_F64) {3249if (FAST_IS_REG(dst))3250return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw);3251if (FAST_IS_REG(src))3252return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src);3253FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));3254return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);3255}32563257if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {3258dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;3259if (FAST_IS_REG(src)) {3260/* We overwrite the high bits of source. From SLJIT point of view,3261this is not an issue.3262Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */3263FAIL_IF(emit_groupf(compiler, UNPCKLPD_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, src, src, 0));3264} else {3265FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw));3266src = TMP_FREG;3267}32683269FAIL_IF(emit_groupf(compiler, CVTPD2PS_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, dst_r, src, 0));3270if (dst_r == TMP_FREG)3271return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);3272return SLJIT_SUCCESS;3273}32743275if (FAST_IS_REG(dst)) {3276dst_r = (dst == src) ? TMP_FREG : dst;32773278if (src & SLJIT_MEM)3279FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));32803281FAIL_IF(emit_groupf(compiler, PCMPEQD_x_xm | EX86_PREF_66 | EX86_SSE2, dst_r, dst_r, 0));32823283inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP2, 0, 0, dst_r, 0);3284inst[0] = GROUP_0F;3285/* Same as PSRLD_x / PSRLQ_x */3286inst[1] = (op & SLJIT_32) ? PSLLD_x_i8 : PSLLQ_x_i8;32873288if (GET_OPCODE(op) == SLJIT_ABS_F64) {3289inst[2] |= 2 << 3;3290FAIL_IF(emit_byte(compiler, 1));3291} else {3292inst[2] |= 6 << 3;3293FAIL_IF(emit_byte(compiler, ((op & SLJIT_32) ? 31 : 63)));3294}32953296if (dst_r != TMP_FREG)3297dst_r = (src & SLJIT_MEM) ? TMP_FREG : src;3298return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_NEG_F64 ? XORPD_x_xm : ANDPD_x_xm) | EX86_SSE2, dst, dst_r, 0);3299}33003301FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));33023303switch (GET_OPCODE(op)) {3304case SLJIT_NEG_F64:3305FAIL_IF(emit_groupf(compiler, XORPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));3306break;33073308case SLJIT_ABS_F64:3309FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer + 4 : sse2_buffer + 12)));3310break;3311}33123313return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);3314}33153316SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,3317sljit_s32 dst, sljit_sw dstw,3318sljit_s32 src1, sljit_sw src1w,3319sljit_s32 src2, sljit_sw src2w)3320{3321sljit_s32 dst_r;33223323CHECK_ERROR();3324CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));3325ADJUST_LOCAL_OFFSET(dst, dstw);3326ADJUST_LOCAL_OFFSET(src1, src1w);3327ADJUST_LOCAL_OFFSET(src2, src2w);33283329#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3330compiler->mode32 = 1;3331#endif33323333if (FAST_IS_REG(dst)) {3334dst_r = dst;3335if (dst == src1)3336; /* Do nothing here. */3337else if (dst == src2 && (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64)) {3338/* Swap arguments. */3339src2 = src1;3340src2w = src1w;3341} else if (dst != src2)3342FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w));3343else {3344dst_r = TMP_FREG;3345FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));3346}3347} else {3348dst_r = TMP_FREG;3349FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));3350}33513352switch (GET_OPCODE(op)) {3353case SLJIT_ADD_F64:3354FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));3355break;33563357case SLJIT_SUB_F64:3358FAIL_IF(emit_groupf(compiler, SUBSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));3359break;33603361case SLJIT_MUL_F64:3362FAIL_IF(emit_groupf(compiler, MULSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));3363break;33643365case SLJIT_DIV_F64:3366FAIL_IF(emit_groupf(compiler, DIVSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));3367break;3368}33693370if (dst_r != dst)3371return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);3372return SLJIT_SUCCESS;3373}33743375SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,3376sljit_s32 dst_freg,3377sljit_s32 src1, sljit_sw src1w,3378sljit_s32 src2, sljit_sw src2w)3379{3380sljit_uw pref;33813382CHECK_ERROR();3383CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));3384ADJUST_LOCAL_OFFSET(src1, src1w);3385ADJUST_LOCAL_OFFSET(src2, src2w);33863387#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3388compiler->mode32 = 1;3389#endif33903391if (dst_freg == src1) {3392FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));3393pref = EX86_SELECT_66(op) | EX86_SSE2;3394FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, TMP_FREG, src1, src1w));3395FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));3396return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, TMP_FREG, 0);3397}33983399if (src1 & SLJIT_MEM) {3400FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));3401src1 = TMP_FREG;3402src1w = 0;3403}34043405if (dst_freg != src2)3406FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_freg, src2, src2w));34073408pref = EX86_SELECT_66(op) | EX86_SSE2;3409FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w));3410FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, dst_freg, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));3411return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w);3412}34133414/* --------------------------------------------------------------------- */3415/* Conditional instructions */3416/* --------------------------------------------------------------------- */34173418SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)3419{3420sljit_u8 *inst;3421struct sljit_label *label;34223423CHECK_ERROR_PTR();3424CHECK_PTR(check_sljit_emit_label(compiler));34253426if (compiler->last_label && compiler->last_label->size == compiler->size)3427return compiler->last_label;34283429label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));3430PTR_FAIL_IF(!label);3431set_label(label, compiler);34323433inst = (sljit_u8*)ensure_buf(compiler, 1);3434PTR_FAIL_IF(!inst);3435inst[0] = SLJIT_INST_LABEL;34363437return label;3438}34393440SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)3441{3442sljit_u8 *inst;3443struct sljit_jump *jump;34443445CHECK_ERROR_PTR();3446CHECK_PTR(check_sljit_emit_jump(compiler, type));34473448jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));3449PTR_FAIL_IF_NULL(jump);3450set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT)));3451type &= 0xff;34523453jump->addr = compiler->size;3454/* Worst case size. */3455compiler->size += (type >= SLJIT_JUMP) ? JUMP_MAX_SIZE : CJUMP_MAX_SIZE;3456inst = (sljit_u8*)ensure_buf(compiler, 1);3457PTR_FAIL_IF_NULL(inst);34583459inst[0] = SLJIT_INST_JUMP;3460return jump;3461}34623463SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)3464{3465sljit_u8 *inst;3466struct sljit_jump *jump;34673468CHECK_ERROR();3469CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));3470ADJUST_LOCAL_OFFSET(src, srcw);34713472CHECK_EXTRA_REGS(src, srcw, (void)0);34733474if (src == SLJIT_IMM) {3475jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));3476FAIL_IF_NULL(jump);3477set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT)));3478jump->u.target = (sljit_uw)srcw;34793480jump->addr = compiler->size;3481/* Worst case size. */3482compiler->size += JUMP_MAX_SIZE;3483inst = (sljit_u8*)ensure_buf(compiler, 1);3484FAIL_IF_NULL(inst);34853486inst[0] = SLJIT_INST_JUMP;3487} else {3488#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3489/* REX_W is not necessary (src is not immediate). */3490compiler->mode32 = 1;3491#endif3492inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);3493FAIL_IF(!inst);3494inst[0] = GROUP_FF;3495inst[1] = U8(inst[1] | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm));3496}3497return SLJIT_SUCCESS;3498}34993500SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,3501sljit_s32 dst, sljit_sw dstw,3502sljit_s32 type)3503{3504sljit_u8 *inst;3505sljit_u8 cond_set;3506#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3507sljit_s32 reg;3508sljit_uw size;3509#endif /* !SLJIT_CONFIG_X86_64 */3510/* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */3511sljit_s32 dst_save = dst;3512sljit_sw dstw_save = dstw;35133514CHECK_ERROR();3515CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));35163517ADJUST_LOCAL_OFFSET(dst, dstw);3518CHECK_EXTRA_REGS(dst, dstw, (void)0);35193520/* setcc = jcc + 0x10. */3521cond_set = U8(get_jump_code((sljit_uw)type) + 0x10);35223523#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3524if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {3525size = 3 + 2;3526if (reg_map[TMP_REG1] >= 4)3527size += 1 + 1;3528else if (reg_map[dst] >= 4)3529size++;35303531inst = (sljit_u8*)ensure_buf(compiler, 1 + size);3532FAIL_IF(!inst);3533INC_SIZE(size);3534/* Set low register to conditional flag. */3535if (reg_map[TMP_REG1] >= 4)3536*inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;35373538inst[0] = GROUP_0F;3539inst[1] = cond_set;3540inst[2] = MOD_REG | reg_lmap[TMP_REG1];3541inst += 3;35423543if (reg_map[TMP_REG1] >= 4 || reg_map[dst] >= 4)3544*inst++ = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B));35453546inst[0] = OR_rm8_r8;3547inst[1] = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]);3548return SLJIT_SUCCESS;3549}35503551reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;35523553size = 3 + (reg_map[reg] >= 4) + 4;3554inst = (sljit_u8*)ensure_buf(compiler, 1 + size);3555FAIL_IF(!inst);3556INC_SIZE(size);3557/* Set low register to conditional flag. */35583559if (reg_map[reg] >= 4)3560*inst++ = (reg_map[reg] <= 7) ? REX : REX_B;35613562inst[0] = GROUP_0F;3563inst[1] = cond_set;3564inst[2] = MOD_REG | reg_lmap[reg];35653566inst[3] = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));3567/* The movzx instruction does not affect flags. */3568inst[4] = GROUP_0F;3569inst[5] = MOVZX_r_rm8;3570inst[6] = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]);35713572if (reg != TMP_REG1)3573return SLJIT_SUCCESS;35743575if (GET_OPCODE(op) < SLJIT_ADD) {3576compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;3577return emit_mov(compiler, dst, dstw, TMP_REG1, 0);3578}35793580SLJIT_SKIP_CHECKS(compiler);3581return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);35823583#else /* !SLJIT_CONFIG_X86_64 */3584SLJIT_ASSERT(reg_map[TMP_REG1] < 4);35853586/* The SLJIT_CONFIG_X86_32 code path starts here. */3587if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst) && reg_map[dst] <= 4) {3588/* Low byte is accessible. */3589inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);3590FAIL_IF(!inst);3591INC_SIZE(3 + 3);3592/* Set low byte to conditional flag. */3593inst[0] = GROUP_0F;3594inst[1] = cond_set;3595inst[2] = U8(MOD_REG | reg_map[dst]);35963597inst[3] = GROUP_0F;3598inst[4] = MOVZX_r_rm8;3599inst[5] = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]);3600return SLJIT_SUCCESS;3601}36023603if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {3604inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 2);3605FAIL_IF(!inst);3606INC_SIZE(3 + 2);36073608/* Set low byte to conditional flag. */3609inst[0] = GROUP_0F;3610inst[1] = cond_set;3611inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);36123613inst[3] = OR_rm8_r8;3614inst[4] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[dst]);3615return SLJIT_SUCCESS;3616}36173618inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);3619FAIL_IF(!inst);3620INC_SIZE(3 + 3);3621/* Set low byte to conditional flag. */3622inst[0] = GROUP_0F;3623inst[1] = cond_set;3624inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);36253626inst[3] = GROUP_0F;3627inst[4] = MOVZX_r_rm8;3628inst[5] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[TMP_REG1]);36293630if (GET_OPCODE(op) < SLJIT_ADD)3631return emit_mov(compiler, dst, dstw, TMP_REG1, 0);36323633SLJIT_SKIP_CHECKS(compiler);3634return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);3635#endif /* SLJIT_CONFIG_X86_64 */3636}36373638SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,3639sljit_s32 dst_freg,3640sljit_s32 src1, sljit_sw src1w,3641sljit_s32 src2_freg)3642{3643sljit_u8* inst;3644sljit_uw size;36453646CHECK_ERROR();3647CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));36483649ADJUST_LOCAL_OFFSET(src1, src1w);36503651#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3652compiler->mode32 = 1;3653#endif /* SLJIT_CONFIG_X86_64 */36543655if (dst_freg != src2_freg) {3656if (dst_freg == src1) {3657src1 = src2_freg;3658src1w = 0;3659type ^= 0x1;3660} else3661FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src2_freg, 0));3662}36633664inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);3665FAIL_IF(!inst);3666INC_SIZE(2);3667inst[0] = U8(get_jump_code((sljit_uw)(type & ~SLJIT_32) ^ 0x1) - 0x10);36683669size = compiler->size;3670FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src1, src1w));36713672inst[1] = U8(compiler->size - size);3673return SLJIT_SUCCESS;3674}36753676SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,3677sljit_s32 vreg,3678sljit_s32 srcdst, sljit_sw srcdstw)3679{3680sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);3681sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);3682sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);3683sljit_uw op;36843685CHECK_ERROR();3686CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));36873688ADJUST_LOCAL_OFFSET(srcdst, srcdstw);36893690#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3691compiler->mode32 = 1;3692#endif /* SLJIT_CONFIG_X86_64 */36933694switch (reg_size) {3695case 4:3696op = EX86_SSE2;3697break;3698case 5:3699if (!(cpu_feature_list & CPU_FEATURE_AVX2))3700return SLJIT_ERR_UNSUPPORTED;3701op = EX86_SSE2 | VEX_256;3702break;3703default:3704return SLJIT_ERR_UNSUPPORTED;3705}37063707if (!(srcdst & SLJIT_MEM))3708alignment = reg_size;37093710if (type & SLJIT_SIMD_FLOAT) {3711if (elem_size == 2 || elem_size == 3) {3712op |= alignment >= reg_size ? MOVAPS_x_xm : MOVUPS_x_xm;37133714if (elem_size == 3)3715op |= EX86_PREF_66;37163717if (type & SLJIT_SIMD_STORE)3718op += 1;3719} else3720return SLJIT_ERR_UNSUPPORTED;3721} else {3722op |= ((type & SLJIT_SIMD_STORE) ? MOVDQA_xm_x : MOVDQA_x_xm)3723| (alignment >= reg_size ? EX86_PREF_66 : EX86_PREF_F3);3724}37253726if (type & SLJIT_SIMD_TEST)3727return SLJIT_SUCCESS;37283729if ((op & VEX_256) || ((cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX)))3730return emit_vex_instruction(compiler, op, vreg, 0, srcdst, srcdstw);37313732return emit_groupf(compiler, op, vreg, srcdst, srcdstw);3733}37343735SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,3736sljit_s32 vreg,3737sljit_s32 src, sljit_sw srcw)3738{3739sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);3740sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);3741sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);3742sljit_u8 *inst;3743sljit_u8 opcode = 0;3744sljit_uw op;37453746CHECK_ERROR();3747CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));37483749ADJUST_LOCAL_OFFSET(src, srcw);37503751if (!(type & SLJIT_SIMD_FLOAT)) {3752CHECK_EXTRA_REGS(src, srcw, (void)0);3753}37543755#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)3756if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))3757return SLJIT_ERR_UNSUPPORTED;3758#else /* !SLJIT_CONFIG_X86_32 */3759compiler->mode32 = 1;37603761if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))3762return SLJIT_ERR_UNSUPPORTED;3763#endif /* SLJIT_CONFIG_X86_32 */37643765if (reg_size != 4 && (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2)))3766return SLJIT_ERR_UNSUPPORTED;37673768if (type & SLJIT_SIMD_TEST)3769return SLJIT_SUCCESS;37703771if (reg_size == 5)3772use_vex = 1;37733774if (use_vex && src != SLJIT_IMM) {3775op = 0;37763777switch (elem_size) {3778case 0:3779if (cpu_feature_list & CPU_FEATURE_AVX2)3780op = VPBROADCASTB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;3781break;3782case 1:3783if (cpu_feature_list & CPU_FEATURE_AVX2)3784op = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;3785break;3786case 2:3787if (type & SLJIT_SIMD_FLOAT) {3788if ((cpu_feature_list & CPU_FEATURE_AVX2) || ((cpu_feature_list & CPU_FEATURE_AVX) && (src & SLJIT_MEM)))3789op = VBROADCASTSS_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;3790} else if (cpu_feature_list & CPU_FEATURE_AVX2)3791op = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;3792break;3793default:3794#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3795if (!(type & SLJIT_SIMD_FLOAT)) {3796if (cpu_feature_list & CPU_FEATURE_AVX2)3797op = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;3798break;3799}3800#endif /* SLJIT_CONFIG_X86_64 */38013802if (reg_size == 5)3803op = VBROADCASTSD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;3804break;3805}38063807if (op != 0) {3808if (!(src & SLJIT_MEM) && !(type & SLJIT_SIMD_FLOAT)) {3809#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3810if (elem_size >= 3)3811compiler->mode32 = 0;3812#endif /* SLJIT_CONFIG_X86_64 */3813FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw));3814#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3815compiler->mode32 = 1;3816#endif /* SLJIT_CONFIG_X86_64 */3817src = vreg;3818srcw = 0;3819}38203821if (reg_size == 5)3822op |= VEX_256;38233824return emit_vex_instruction(compiler, op, vreg, 0, src, srcw);3825}3826}38273828if (type & SLJIT_SIMD_FLOAT) {3829if (src == SLJIT_IMM) {3830if (use_vex)3831return emit_vex_instruction(compiler, XORPD_x_xm | (reg_size == 5 ? VEX_256 : 0) | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0);38323833return emit_groupf(compiler, XORPD_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, vreg, vreg, 0);3834}38353836SLJIT_ASSERT(reg_size == 4);38373838if (use_vex) {3839if (elem_size == 3)3840return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, src, srcw);38413842SLJIT_ASSERT(!(src & SLJIT_MEM));3843FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, vreg, src, src, 0));3844return emit_byte(compiler, 0);3845}38463847if (elem_size == 2 && vreg != src) {3848FAIL_IF(emit_sse2_load(compiler, 1, vreg, src, srcw));3849src = vreg;3850srcw = 0;3851}38523853op = (elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm) | (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2;3854FAIL_IF(emit_groupf(compiler, op, vreg, src, srcw));38553856if (elem_size == 2)3857return emit_byte(compiler, 0);3858return SLJIT_SUCCESS;3859}38603861if (src == SLJIT_IMM) {3862if (elem_size == 0) {3863srcw = (sljit_u8)srcw;3864srcw |= srcw << 8;3865srcw |= srcw << 16;3866elem_size = 2;3867} else if (elem_size == 1) {3868srcw = (sljit_u16)srcw;3869srcw |= srcw << 16;3870elem_size = 2;3871}38723873#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3874if (elem_size == 2 && (sljit_s32)srcw == -1)3875srcw = -1;3876#endif /* SLJIT_CONFIG_X86_64 */38773878if (srcw == 0 || srcw == -1) {3879if (use_vex)3880return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0);38813882return emit_groupf(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0);3883}38843885#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3886if (elem_size == 3)3887FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));3888else3889#endif /* SLJIT_CONFIG_X86_64 */3890EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);38913892src = TMP_REG1;3893srcw = 0;38943895}38963897op = 2;3898opcode = MOVD_x_rm;38993900switch (elem_size) {3901case 0:3902if (!FAST_IS_REG(src)) {3903opcode = 0x3a /* Prefix of PINSRB_x_rm_i8. */;3904op = 3;3905}3906break;3907case 1:3908if (!FAST_IS_REG(src))3909opcode = PINSRW_x_rm_i8;3910break;3911case 2:3912break;3913#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3914case 3:3915/* MOVQ */3916compiler->mode32 = 0;3917break;3918#endif /* SLJIT_CONFIG_X86_64 */3919}39203921if (use_vex) {3922if (opcode != MOVD_x_rm) {3923op = (opcode == 0x3a) ? (PINSRB_x_rm_i8 | VEX_OP_0F3A) : opcode;3924FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1 | VEX_SSE2_OPV, vreg, vreg, src, srcw));3925} else3926FAIL_IF(emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw));3927} else {3928inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, src, srcw);3929FAIL_IF(!inst);3930inst[0] = GROUP_0F;3931inst[1] = opcode;39323933if (op == 3) {3934SLJIT_ASSERT(opcode == 0x3a);3935inst[2] = PINSRB_x_rm_i8;3936}3937}39383939if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && elem_size >= 2) {3940#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)3941op = VPBROADCASTD_x_xm;3942#else /* !SLJIT_CONFIG_X86_32 */3943op = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm;3944#endif /* SLJIT_CONFIG_X86_32 */3945return emit_vex_instruction(compiler, op | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0);3946}39473948SLJIT_ASSERT(reg_size == 4);39493950if (opcode != MOVD_x_rm)3951FAIL_IF(emit_byte(compiler, 0));39523953switch (elem_size) {3954case 0:3955if (use_vex) {3956FAIL_IF(emit_vex_instruction(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0));3957return emit_vex_instruction(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, TMP_FREG, 0);3958}3959FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));3960return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, TMP_FREG, 0);3961case 1:3962if (use_vex)3963FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, vreg, 0));3964else3965FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, vreg, 0));3966FAIL_IF(emit_byte(compiler, 0));3967/* fallthrough */3968default:3969if (use_vex)3970FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, vreg, 0));3971else3972FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0));3973return emit_byte(compiler, 0);3974#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)3975case 3:3976compiler->mode32 = 1;3977if (use_vex)3978FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, vreg, 0));3979else3980FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, vreg, 0));3981return emit_byte(compiler, 0x44);3982#endif /* SLJIT_CONFIG_X86_64 */3983}3984}39853986SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,3987sljit_s32 vreg, sljit_s32 lane_index,3988sljit_s32 srcdst, sljit_sw srcdstw)3989{3990sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);3991sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);3992sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);3993sljit_u8 *inst;3994sljit_u8 opcode = 0;3995sljit_uw op;3996sljit_s32 vreg_orig = vreg;3997#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)3998sljit_s32 srcdst_is_ereg = 0;3999sljit_s32 srcdst_orig = 0;4000sljit_sw srcdstw_orig = 0;4001#endif /* SLJIT_CONFIG_X86_32 */40024003CHECK_ERROR();4004CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));40054006ADJUST_LOCAL_OFFSET(srcdst, srcdstw);40074008if (reg_size == 5) {4009if (!(cpu_feature_list & CPU_FEATURE_AVX2))4010return SLJIT_ERR_UNSUPPORTED;4011use_vex = 1;4012} else if (reg_size != 4)4013return SLJIT_ERR_UNSUPPORTED;40144015#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)4016if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2)4017return SLJIT_ERR_UNSUPPORTED;4018#else /* SLJIT_CONFIG_X86_32 */4019if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))4020return SLJIT_ERR_UNSUPPORTED;4021#endif /* SLJIT_CONFIG_X86_32 */40224023if (type & SLJIT_SIMD_TEST)4024return SLJIT_SUCCESS;40254026#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4027compiler->mode32 = 1;4028#else /* !SLJIT_CONFIG_X86_64 */4029if (!(type & SLJIT_SIMD_FLOAT)) {4030CHECK_EXTRA_REGS(srcdst, srcdstw, srcdst_is_ereg = 1);40314032if ((type & SLJIT_SIMD_STORE) && ((srcdst_is_ereg && elem_size < 2) || (elem_size == 0 && (type & SLJIT_SIMD_LANE_SIGNED) && FAST_IS_REG(srcdst) && reg_map[srcdst] >= 4))) {4033srcdst_orig = srcdst;4034srcdstw_orig = srcdstw;4035srcdst = TMP_REG1;4036srcdstw = 0;4037}4038}4039#endif /* SLJIT_CONFIG_X86_64 */40404041if (type & SLJIT_SIMD_LANE_ZERO) {4042if (lane_index == 0) {4043if (!(type & SLJIT_SIMD_FLOAT)) {4044#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4045if (elem_size == 3) {4046compiler->mode32 = 0;4047elem_size = 2;4048}4049#endif /* SLJIT_CONFIG_X86_64 */4050if (srcdst == SLJIT_IMM) {4051if (elem_size == 0)4052srcdstw = (sljit_u8)srcdstw;4053else if (elem_size == 1)4054srcdstw = (sljit_u16)srcdstw;40554056EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);4057srcdst = TMP_REG1;4058srcdstw = 0;4059elem_size = 2;4060}40614062if (elem_size == 2) {4063if (use_vex)4064return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, srcdst, srcdstw);4065return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, vreg, srcdst, srcdstw);4066}4067} else if (srcdst & SLJIT_MEM) {4068SLJIT_ASSERT(elem_size == 2 || elem_size == 3);40694070if (use_vex)4071return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, vreg, 0, srcdst, srcdstw);4072return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, vreg, srcdst, srcdstw);4073} else if (elem_size == 3) {4074if (use_vex)4075return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, vreg, 0, srcdst, 0);4076return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, vreg, srcdst, 0);4077} else if (use_vex) {4078FAIL_IF(emit_vex_instruction(compiler, XORPD_x_xm | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, TMP_FREG, 0));4079return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F3 | EX86_SSE2 | VEX_SSE2_OPV, vreg, TMP_FREG, srcdst, 0);4080}4081}40824083if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {4084vreg = TMP_FREG;4085lane_index -= (1 << (4 - elem_size));4086} else if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) {4087if (use_vex)4088FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, TMP_FREG, srcdst, srcdstw));4089else4090FAIL_IF(emit_sse2_load(compiler, elem_size == 2, TMP_FREG, srcdst, srcdstw));4091srcdst = TMP_FREG;4092srcdstw = 0;4093}40944095op = ((!(type & SLJIT_SIMD_FLOAT) || elem_size != 2) ? EX86_PREF_66 : 0)4096| ((type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm) | EX86_SSE2;40974098if (use_vex)4099FAIL_IF(emit_vex_instruction(compiler, op | (reg_size == 5 ? VEX_256 : 0) | VEX_SSE2_OPV, vreg, vreg, vreg, 0));4100else4101FAIL_IF(emit_groupf(compiler, op, vreg, vreg, 0));4102} else if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {4103FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, TMP_FREG, 0));4104FAIL_IF(emit_byte(compiler, 1));41054106vreg = TMP_FREG;4107lane_index -= (1 << (4 - elem_size));4108}41094110if (type & SLJIT_SIMD_FLOAT) {4111if (elem_size == 3) {4112if (srcdst & SLJIT_MEM) {4113if (type & SLJIT_SIMD_STORE)4114op = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x;4115else4116op = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m;41174118/* VEX prefix clears upper bits of the target register. */4119if (use_vex && ((type & SLJIT_SIMD_STORE) || reg_size == 4 || vreg == TMP_FREG))4120FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | EX86_SSE24121| ((type & SLJIT_SIMD_STORE) ? 0 : VEX_SSE2_OPV), vreg, (type & SLJIT_SIMD_STORE) ? 0 : vreg, srcdst, srcdstw));4122else4123FAIL_IF(emit_groupf(compiler, op | EX86_PREF_66 | EX86_SSE2, vreg, srcdst, srcdstw));41244125/* In case of store, vreg is not TMP_FREG. */4126} else if (type & SLJIT_SIMD_STORE) {4127if (lane_index == 1) {4128if (use_vex)4129return emit_vex_instruction(compiler, MOVHLPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, vreg, 0);4130return emit_groupf(compiler, MOVHLPS_x_x | EX86_SSE2, srcdst, vreg, 0);4131}4132if (use_vex)4133return emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, srcdst, srcdst, vreg, 0);4134return emit_sse2_load(compiler, 0, srcdst, vreg, 0);4135} else if (use_vex && (reg_size == 4 || vreg == TMP_FREG)) {4136if (lane_index == 1)4137FAIL_IF(emit_vex_instruction(compiler, MOVLHPS_x_x | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, srcdst, 0));4138else4139FAIL_IF(emit_vex_instruction(compiler, MOVSD_x_xm | EX86_PREF_F2 | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, srcdst, 0));4140} else {4141if (lane_index == 1)4142FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x | EX86_SSE2, vreg, srcdst, 0));4143else4144FAIL_IF(emit_sse2_load(compiler, 0, vreg, srcdst, 0));4145}4146} else if (type & SLJIT_SIMD_STORE) {4147if (lane_index == 0) {4148if (use_vex)4149return emit_vex_instruction(compiler, MOVSD_xm_x | EX86_PREF_F3 | EX86_SSE2 | ((srcdst & SLJIT_MEM) ? 0 : VEX_SSE2_OPV),4150vreg, ((srcdst & SLJIT_MEM) ? 0 : srcdst), srcdst, srcdstw);4151return emit_sse2_store(compiler, 1, srcdst, srcdstw, vreg);4152}41534154if (srcdst & SLJIT_MEM) {4155if (use_vex)4156FAIL_IF(emit_vex_instruction(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, srcdst, srcdstw));4157else4158FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, srcdst, srcdstw));4159return emit_byte(compiler, U8(lane_index));4160}41614162if (use_vex) {4163FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, vreg, vreg, 0));4164return emit_byte(compiler, U8(lane_index));4165}41664167if (srcdst == vreg)4168op = SHUFPS_x_xm | EX86_SSE2;4169else {4170switch (lane_index) {4171case 1:4172op = MOVSHDUP_x_xm | EX86_PREF_F3 | EX86_SSE2;4173break;4174case 2:4175op = MOVHLPS_x_x | EX86_SSE2;4176break;4177default:4178SLJIT_ASSERT(lane_index == 3);4179op = PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2;4180break;4181}4182}41834184FAIL_IF(emit_groupf(compiler, op, srcdst, vreg, 0));41854186op &= 0xff;4187if (op == SHUFPS_x_xm || op == PSHUFD_x_xm)4188return emit_byte(compiler, U8(lane_index));41894190return SLJIT_SUCCESS;4191} else {4192if (lane_index != 0 || (srcdst & SLJIT_MEM)) {4193FAIL_IF(emit_groupf_ext(compiler, INSERTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, srcdst, srcdstw));4194FAIL_IF(emit_byte(compiler, U8(lane_index << 4)));4195} else4196FAIL_IF(emit_sse2_store(compiler, 1, vreg, 0, srcdst));4197}41984199if (vreg != TMP_FREG || (type & SLJIT_SIMD_STORE))4200return SLJIT_SUCCESS;42014202SLJIT_ASSERT(reg_size == 5);42034204if (type & SLJIT_SIMD_LANE_ZERO) {4205FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg_orig, 0, TMP_FREG, 0));4206return emit_byte(compiler, 0x4e);4207}42084209FAIL_IF(emit_vex_instruction(compiler, VINSERTF128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, vreg_orig, vreg_orig, TMP_FREG, 0));4210return emit_byte(compiler, 1);4211}42124213if (srcdst == SLJIT_IMM) {4214EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);4215srcdst = TMP_REG1;4216srcdstw = 0;4217}42184219op = 3;42204221switch (elem_size) {4222case 0:4223opcode = (type & SLJIT_SIMD_STORE) ? PEXTRB_rm_x_i8 : PINSRB_x_rm_i8;4224break;4225case 1:4226if (!(type & SLJIT_SIMD_STORE)) {4227op = 2;4228opcode = PINSRW_x_rm_i8;4229} else4230opcode = PEXTRW_rm_x_i8;4231break;4232case 2:4233opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;4234break;4235#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4236case 3:4237/* PINSRQ / PEXTRQ */4238opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;4239compiler->mode32 = 0;4240break;4241#endif /* SLJIT_CONFIG_X86_64 */4242}42434244if (use_vex && (type & SLJIT_SIMD_STORE)) {4245op = opcode | ((op == 3) ? VEX_OP_0F3A : 0);4246FAIL_IF(emit_vex_instruction(compiler, op | EX86_PREF_66 | VEX_AUTO_W | EX86_SSE2_OP1 | VEX_SSE2_OPV, vreg, 0, srcdst, srcdstw));4247} else {4248inst = emit_x86_instruction(compiler, op | EX86_PREF_66 | EX86_SSE2_OP1, vreg, 0, srcdst, srcdstw);4249FAIL_IF(!inst);4250inst[0] = GROUP_0F;42514252if (op == 3) {4253inst[1] = 0x3a;4254inst[2] = opcode;4255} else4256inst[1] = opcode;4257}42584259FAIL_IF(emit_byte(compiler, U8(lane_index)));42604261if (!(type & SLJIT_SIMD_LANE_SIGNED) || (srcdst & SLJIT_MEM)) {4262if (vreg == TMP_FREG && !(type & SLJIT_SIMD_STORE)) {4263SLJIT_ASSERT(reg_size == 5);42644265if (type & SLJIT_SIMD_LANE_ZERO) {4266FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg_orig, 0, TMP_FREG, 0));4267return emit_byte(compiler, 0x4e);4268}42694270FAIL_IF(emit_vex_instruction(compiler, VINSERTI128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, vreg_orig, vreg_orig, TMP_FREG, 0));4271return emit_byte(compiler, 1);4272}42734274#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)4275if (srcdst_orig & SLJIT_MEM)4276return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);4277#endif /* SLJIT_CONFIG_X86_32 */4278return SLJIT_SUCCESS;4279}42804281#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4282if (elem_size >= 3)4283return SLJIT_SUCCESS;42844285compiler->mode32 = (type & SLJIT_32);42864287op = 2;42884289if (elem_size == 0)4290op |= EX86_REX;42914292if (elem_size == 2) {4293if (type & SLJIT_32)4294return SLJIT_SUCCESS;42954296SLJIT_ASSERT(!(compiler->mode32));4297op = 1;4298}42994300inst = emit_x86_instruction(compiler, op, srcdst, 0, srcdst, 0);4301FAIL_IF(!inst);43024303if (op != 1) {4304inst[0] = GROUP_0F;4305inst[1] = U8((elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16);4306} else4307inst[0] = MOVSXD_r_rm;4308#else /* !SLJIT_CONFIG_X86_64 */4309if (elem_size >= 2)4310return SLJIT_SUCCESS;43114312FAIL_IF(emit_groupf(compiler, (elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16,4313(srcdst_orig != 0 && FAST_IS_REG(srcdst_orig)) ? srcdst_orig : srcdst, srcdst, 0));43144315if (srcdst_orig & SLJIT_MEM)4316return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);4317#endif /* SLJIT_CONFIG_X86_64 */4318return SLJIT_SUCCESS;4319}43204321SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,4322sljit_s32 vreg,4323sljit_s32 src, sljit_s32 src_lane_index)4324{4325sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4326sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4327sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);4328sljit_uw pref;4329sljit_u8 byte;4330#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)4331sljit_s32 opcode3 = TMP_REG1;4332#else /* !SLJIT_CONFIG_X86_32 */4333sljit_s32 opcode3 = SLJIT_S0;4334#endif /* SLJIT_CONFIG_X86_32 */43354336CHECK_ERROR();4337CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));43384339#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4340compiler->mode32 = 1;4341#endif /* SLJIT_CONFIG_X86_64 */4342SLJIT_ASSERT(reg_map[opcode3] == 3);43434344if (reg_size == 5) {4345if (!(cpu_feature_list & CPU_FEATURE_AVX2))4346return SLJIT_ERR_UNSUPPORTED;4347use_vex = 1;4348} else if (reg_size != 4)4349return SLJIT_ERR_UNSUPPORTED;43504351if (type & SLJIT_SIMD_FLOAT) {4352pref = 0;4353byte = U8(src_lane_index);43544355if (elem_size == 3) {4356if (type & SLJIT_SIMD_TEST)4357return SLJIT_SUCCESS;43584359if (reg_size == 5) {4360if (src_lane_index == 0)4361return emit_vex_instruction(compiler, VBROADCASTSD_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0);43624363FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));43644365byte = U8(byte | (byte << 2));4366return emit_byte(compiler, U8(byte | (byte << 4)));4367}43684369if (src_lane_index == 0) {4370if (use_vex)4371return emit_vex_instruction(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, 0, src, 0);4372return emit_groupf(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, vreg, src, 0);4373}43744375/* Changes it to SHUFPD_x_xm. */4376pref = EX86_PREF_66;4377} else if (elem_size != 2)4378return SLJIT_ERR_UNSUPPORTED;4379else if (type & SLJIT_SIMD_TEST)4380return SLJIT_SUCCESS;43814382if (reg_size == 5) {4383SLJIT_ASSERT(elem_size == 2);43844385if (src_lane_index == 0)4386return emit_vex_instruction(compiler, VBROADCASTSS_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0);43874388FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));43894390byte = 0x44;4391if (src_lane_index >= 4) {4392byte = 0xee;4393src_lane_index -= 4;4394}43954396FAIL_IF(emit_byte(compiler, byte));4397FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, vreg, vreg, vreg, 0));4398byte = U8(src_lane_index);4399} else if (use_vex) {4400FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, vreg, src, src, 0));4401} else {4402if (vreg != src)4403FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | pref | EX86_SSE2, vreg, src, 0));44044405FAIL_IF(emit_groupf(compiler, SHUFPS_x_xm | pref | EX86_SSE2, vreg, vreg, 0));4406}44074408if (elem_size == 2) {4409byte = U8(byte | (byte << 2));4410byte = U8(byte | (byte << 4));4411} else4412byte = U8(byte | (byte << 1));44134414return emit_byte(compiler, U8(byte));4415}44164417if (type & SLJIT_SIMD_TEST)4418return SLJIT_SUCCESS;44194420if (elem_size == 0) {4421if (reg_size == 5 && src_lane_index >= 16) {4422FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));4423FAIL_IF(emit_byte(compiler, src_lane_index >= 24 ? 0xff : 0xaa));4424src_lane_index &= 0x7;4425src = vreg;4426}44274428if (src_lane_index != 0 || (vreg != src && (!(cpu_feature_list & CPU_FEATURE_AVX2) || !use_vex))) {4429pref = 0;44304431if ((src_lane_index & 0x3) == 0) {4432pref = EX86_PREF_66;4433byte = U8(src_lane_index >> 2);4434} else if (src_lane_index < 8 && (src_lane_index & 0x1) == 0) {4435pref = EX86_PREF_F2;4436byte = U8(src_lane_index >> 1);4437} else {4438if (!use_vex) {4439if (vreg != src)4440FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, src, 0));44414442FAIL_IF(emit_groupf(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2, opcode3, vreg, 0));4443} else4444FAIL_IF(emit_vex_instruction(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2 | VEX_SSE2_OPV, opcode3, vreg, src, 0));44454446FAIL_IF(emit_byte(compiler, U8(src_lane_index)));4447}44484449if (pref != 0) {4450if (use_vex)4451FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, 0, src, 0));4452else4453FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, src, 0));4454FAIL_IF(emit_byte(compiler, byte));4455}44564457src = vreg;4458}44594460if (use_vex && (cpu_feature_list & CPU_FEATURE_AVX2))4461return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, 0);44624463SLJIT_ASSERT(reg_size == 4);4464FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));4465return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, TMP_FREG, 0);4466}44674468if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && src_lane_index == 0 && elem_size <= 3) {4469switch (elem_size) {4470case 1:4471pref = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;4472break;4473case 2:4474pref = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;4475break;4476default:4477pref = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;4478break;4479}44804481if (reg_size == 5)4482pref |= VEX_256;44834484return emit_vex_instruction(compiler, pref, vreg, 0, src, 0);4485}44864487if (reg_size == 5) {4488switch (elem_size) {4489case 1:4490byte = U8(src_lane_index & 0x3);4491src_lane_index >>= 2;4492pref = PSHUFLW_x_xm | VEX_256 | ((src_lane_index & 1) == 0 ? EX86_PREF_F2 : EX86_PREF_F3) | EX86_SSE2;4493break;4494case 2:4495byte = U8(src_lane_index & 0x3);4496src_lane_index >>= 1;4497pref = PSHUFD_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2;4498break;4499case 3:4500pref = 0;4501break;4502default:4503FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));4504return emit_byte(compiler, U8(src_lane_index == 0 ? 0x44 : 0xee));4505}45064507if (pref != 0) {4508FAIL_IF(emit_vex_instruction(compiler, pref, vreg, 0, src, 0));4509byte = U8(byte | (byte << 2));4510FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));45114512if (src_lane_index == 0)4513return emit_vex_instruction(compiler, VPBROADCASTQ_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0);45144515src = vreg;4516}45174518FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, vreg, 0, src, 0));4519byte = U8(src_lane_index);4520byte = U8(byte | (byte << 2));4521return emit_byte(compiler, U8(byte | (byte << 4)));4522}45234524switch (elem_size) {4525case 1:4526byte = U8(src_lane_index & 0x3);4527src_lane_index >>= 1;4528pref = (src_lane_index & 2) == 0 ? EX86_PREF_F2 : EX86_PREF_F3;45294530if (use_vex)4531FAIL_IF(emit_vex_instruction(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, 0, src, 0));4532else4533FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, vreg, src, 0));4534byte = U8(byte | (byte << 2));4535FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));45364537if ((cpu_feature_list & CPU_FEATURE_AVX2) && use_vex && pref == EX86_PREF_F2)4538return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, vreg, 0);45394540src = vreg;4541/* fallthrough */4542case 2:4543byte = U8(src_lane_index);4544byte = U8(byte | (byte << 2));4545break;4546default:4547byte = U8(src_lane_index << 1);4548byte = U8(byte | (byte << 2) | 0x4);4549break;4550}45514552if (use_vex)4553FAIL_IF(emit_vex_instruction(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, 0, src, 0));4554else4555FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, vreg, src, 0));4556return emit_byte(compiler, U8(byte | (byte << 4)));4557}45584559SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,4560sljit_s32 vreg,4561sljit_s32 src, sljit_sw srcw)4562{4563sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4564sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4565sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);4566sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);4567sljit_u8 opcode;45684569CHECK_ERROR();4570CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));45714572ADJUST_LOCAL_OFFSET(src, srcw);45734574#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4575compiler->mode32 = 1;4576#endif /* SLJIT_CONFIG_X86_64 */45774578if (reg_size == 5) {4579if (!(cpu_feature_list & CPU_FEATURE_AVX2))4580return SLJIT_ERR_UNSUPPORTED;4581use_vex = 1;4582} else if (reg_size != 4)4583return SLJIT_ERR_UNSUPPORTED;45844585if (type & SLJIT_SIMD_FLOAT) {4586if (elem_size != 2 || elem2_size != 3)4587return SLJIT_ERR_UNSUPPORTED;45884589if (type & SLJIT_SIMD_TEST)4590return SLJIT_SUCCESS;45914592if (use_vex)4593return emit_vex_instruction(compiler, CVTPS2PD_x_xm | ((reg_size == 5) ? VEX_256 : 0) | EX86_SSE2, vreg, 0, src, srcw);4594return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, vreg, src, srcw);4595}45964597switch (elem_size) {4598case 0:4599if (elem2_size == 1)4600opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBW_x_xm : PMOVZXBW_x_xm;4601else if (elem2_size == 2)4602opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBD_x_xm : PMOVZXBD_x_xm;4603else if (elem2_size == 3)4604opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBQ_x_xm : PMOVZXBQ_x_xm;4605else4606return SLJIT_ERR_UNSUPPORTED;4607break;4608case 1:4609if (elem2_size == 2)4610opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWD_x_xm : PMOVZXWD_x_xm;4611else if (elem2_size == 3)4612opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWQ_x_xm : PMOVZXWQ_x_xm;4613else4614return SLJIT_ERR_UNSUPPORTED;4615break;4616case 2:4617if (elem2_size == 3)4618opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXDQ_x_xm : PMOVZXDQ_x_xm;4619else4620return SLJIT_ERR_UNSUPPORTED;4621break;4622default:4623return SLJIT_ERR_UNSUPPORTED;4624}46254626if (type & SLJIT_SIMD_TEST)4627return SLJIT_SUCCESS;46284629if (use_vex)4630return emit_vex_instruction(compiler, opcode | ((reg_size == 5) ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, 0, src, srcw);4631return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, vreg, src, srcw);4632}46334634SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,4635sljit_s32 vreg,4636sljit_s32 dst, sljit_sw dstw)4637{4638sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4639sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4640sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);4641sljit_s32 dst_r;4642sljit_uw op;4643sljit_u8 *inst;46444645CHECK_ERROR();4646CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));46474648ADJUST_LOCAL_OFFSET(dst, dstw);46494650CHECK_EXTRA_REGS(dst, dstw, (void)0);4651#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4652compiler->mode32 = 1;4653#endif /* SLJIT_CONFIG_X86_64 */46544655if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))4656return SLJIT_ERR_UNSUPPORTED;46574658if (reg_size == 4) {4659if (type & SLJIT_SIMD_TEST)4660return SLJIT_SUCCESS;46614662op = EX86_PREF_66 | EX86_SSE2_OP2;46634664switch (elem_size) {4665case 1:4666if (use_vex)4667FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, vreg, vreg, 0));4668else4669FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, vreg, 0));4670vreg = TMP_FREG;4671break;4672case 2:4673op = EX86_SSE2_OP2;4674break;4675}46764677dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;4678op |= (elem_size < 2) ? PMOVMSKB_r_x : MOVMSKPS_r_x;46794680if (use_vex)4681FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, vreg, 0));4682else4683FAIL_IF(emit_groupf(compiler, op, dst_r, vreg, 0));46844685#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4686compiler->mode32 = type & SLJIT_32;4687#endif /* SLJIT_CONFIG_X86_64 */46884689if (elem_size == 1) {4690inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 8, dst_r, 0);4691FAIL_IF(!inst);4692inst[1] |= SHR;4693}46944695if (dst_r == TMP_REG1)4696return emit_mov(compiler, dst, dstw, TMP_REG1, 0);46974698return SLJIT_SUCCESS;4699}47004701if (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2))4702return SLJIT_ERR_UNSUPPORTED;47034704if (type & SLJIT_SIMD_TEST)4705return SLJIT_SUCCESS;47064707dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;47084709if (elem_size == 1) {4710FAIL_IF(emit_vex_instruction(compiler, VEXTRACTI128_x_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, vreg, 0, TMP_FREG, 0));4711FAIL_IF(emit_byte(compiler, 1));4712FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, vreg, TMP_FREG, 0));4713FAIL_IF(emit_groupf(compiler, PMOVMSKB_r_x | EX86_PREF_66 | EX86_SSE2_OP2, dst_r, TMP_FREG, 0));4714} else {4715op = MOVMSKPS_r_x | VEX_256 | EX86_SSE2_OP2;47164717if (elem_size == 0)4718op = PMOVMSKB_r_x | VEX_256 | EX86_PREF_66 | EX86_SSE2_OP2;4719else if (elem_size == 3)4720op |= EX86_PREF_66;47214722FAIL_IF(emit_vex_instruction(compiler, op, dst_r, 0, vreg, 0));4723}47244725if (dst_r == TMP_REG1) {4726#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4727compiler->mode32 = type & SLJIT_32;4728#endif /* SLJIT_CONFIG_X86_64 */4729return emit_mov(compiler, dst, dstw, TMP_REG1, 0);4730}47314732return SLJIT_SUCCESS;4733}47344735static sljit_s32 emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,4736sljit_s32 dst_vreg, sljit_s32 src_vreg)4737{4738sljit_uw op = ((type & SLJIT_SIMD_FLOAT) ? MOVAPS_x_xm : MOVDQA_x_xm) | EX86_SSE2;47394740SLJIT_ASSERT(SLJIT_SIMD_GET_REG_SIZE(type) == 4);47414742if (!(type & SLJIT_SIMD_FLOAT) || SLJIT_SIMD_GET_ELEM_SIZE(type) == 3)4743op |= EX86_PREF_66;47444745return emit_groupf(compiler, op, dst_vreg, src_vreg, 0);4746}47474748SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,4749sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)4750{4751sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4752sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4753sljit_s32 use_vex = (cpu_feature_list & CPU_FEATURE_AVX) && (compiler->options & SLJIT_ENTER_USE_VEX);4754sljit_uw op = 0;4755sljit_uw mov_op = 0;47564757CHECK_ERROR();4758CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));4759ADJUST_LOCAL_OFFSET(src2, src2w);47604761#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4762compiler->mode32 = 1;4763#endif /* SLJIT_CONFIG_X86_64 */47644765if (reg_size == 5) {4766if (!(cpu_feature_list & CPU_FEATURE_AVX2))4767return SLJIT_ERR_UNSUPPORTED;4768} else if (reg_size != 4)4769return SLJIT_ERR_UNSUPPORTED;47704771if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))4772return SLJIT_ERR_UNSUPPORTED;47734774switch (SLJIT_SIMD_GET_OPCODE(type)) {4775case SLJIT_SIMD_OP2_AND:4776op = (type & SLJIT_SIMD_FLOAT) ? ANDPD_x_xm : PAND_x_xm;47774778if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)4779op |= EX86_PREF_66;4780break;4781case SLJIT_SIMD_OP2_OR:4782op = (type & SLJIT_SIMD_FLOAT) ? ORPD_x_xm : POR_x_xm;47834784if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)4785op |= EX86_PREF_66;4786break;4787case SLJIT_SIMD_OP2_XOR:4788op = (type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm;47894790if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)4791op |= EX86_PREF_66;4792break;47934794case SLJIT_SIMD_OP2_SHUFFLE:4795if (reg_size != 4)4796return SLJIT_ERR_UNSUPPORTED;47974798op = PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38;4799break;4800}48014802if (type & SLJIT_SIMD_TEST)4803return SLJIT_SUCCESS;48044805if ((src2 & SLJIT_MEM) && SLJIT_SIMD_GET_ELEM2_SIZE(type) < reg_size) {4806mov_op = ((type & SLJIT_SIMD_FLOAT) ? (MOVUPS_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0)) : (MOVDQU_x_xm | EX86_PREF_F3)) | EX86_SSE2;4807if (use_vex)4808FAIL_IF(emit_vex_instruction(compiler, mov_op, TMP_FREG, 0, src2, src2w));4809else4810FAIL_IF(emit_groupf(compiler, mov_op, TMP_FREG, src2, src2w));48114812src2 = TMP_FREG;4813src2w = 0;4814}48154816if (reg_size == 5 || use_vex) {4817if (reg_size == 5)4818op |= VEX_256;48194820return emit_vex_instruction(compiler, op | EX86_SSE2 | VEX_SSE2_OPV, dst_vreg, src1_vreg, src2, src2w);4821}48224823if (dst_vreg != src1_vreg) {4824if (dst_vreg == src2) {4825if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE) {4826FAIL_IF(emit_simd_mov(compiler, type, TMP_FREG, src2));4827FAIL_IF(emit_simd_mov(compiler, type, dst_vreg, src1_vreg));4828src2 = TMP_FREG;4829src2w = 0;4830} else4831src2 = src1_vreg;4832} else4833FAIL_IF(emit_simd_mov(compiler, type, dst_vreg, src1_vreg));4834}48354836if (op & (VEX_OP_0F38 | VEX_OP_0F3A))4837return emit_groupf_ext(compiler, op | EX86_SSE2, dst_vreg, src2, src2w);4838return emit_groupf(compiler, op | EX86_SSE2, dst_vreg, src2, src2w);4839}48404841SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,4842sljit_s32 dst_reg,4843sljit_s32 mem_reg)4844{4845CHECK_ERROR();4846CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));48474848if ((op & SLJIT_ATOMIC_USE_LS) || GET_OPCODE(op) == SLJIT_MOV_S8 || GET_OPCODE(op) == SLJIT_MOV_S16 || GET_OPCODE(op) == SLJIT_MOV_S32)4849return SLJIT_ERR_UNSUPPORTED;48504851if (op & SLJIT_ATOMIC_TEST)4852return SLJIT_SUCCESS;48534854SLJIT_SKIP_CHECKS(compiler);4855return sljit_emit_op1(compiler, op & ~SLJIT_ATOMIC_USE_CAS, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);4856}48574858SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,4859sljit_s32 src_reg,4860sljit_s32 mem_reg,4861sljit_s32 temp_reg)4862{4863sljit_uw pref;4864#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)4865sljit_s32 saved_reg = TMP_REG1;4866sljit_s32 swap_tmp = 0;4867sljit_sw srcw = 0;4868sljit_sw tempw = 0;4869#endif /* SLJIT_CONFIG_X86_32 */48704871CHECK_ERROR();4872CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));4873CHECK_EXTRA_REGS(src_reg, srcw, (void)0);4874CHECK_EXTRA_REGS(temp_reg, tempw, (void)0);48754876SLJIT_ASSERT(FAST_IS_REG(src_reg) || src_reg == SLJIT_MEM1(SLJIT_SP));4877SLJIT_ASSERT(FAST_IS_REG(temp_reg) || temp_reg == SLJIT_MEM1(SLJIT_SP));48784879if ((op & SLJIT_ATOMIC_USE_LS) || GET_OPCODE(op) == SLJIT_MOV_S8 || GET_OPCODE(op) == SLJIT_MOV_S16 || GET_OPCODE(op) == SLJIT_MOV_S32)4880return SLJIT_ERR_UNSUPPORTED;48814882if (op & SLJIT_ATOMIC_TEST)4883return SLJIT_SUCCESS;48844885op = GET_OPCODE(op);48864887#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)4888if (temp_reg == SLJIT_TMP_DEST_REG) {4889FAIL_IF(emit_byte(compiler, XCHG_EAX_r | reg_map[TMP_REG1]));48904891if (src_reg == SLJIT_R0)4892src_reg = TMP_REG1;4893if (mem_reg == SLJIT_R0)4894mem_reg = TMP_REG1;48954896temp_reg = SLJIT_R0;4897swap_tmp = 1;4898}48994900/* Src is virtual register or its low byte is not accessible. */4901if ((src_reg & SLJIT_MEM) || (op == SLJIT_MOV_U8 && reg_map[src_reg] >= 4)) {4902SLJIT_ASSERT(src_reg != SLJIT_R1 && temp_reg != SLJIT_TMP_DEST_REG);49034904if (swap_tmp) {4905saved_reg = (mem_reg != SLJIT_R1) ? SLJIT_R1 : SLJIT_R2;49064907EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, saved_reg, 0);4908EMIT_MOV(compiler, saved_reg, 0, src_reg, srcw);4909} else4910EMIT_MOV(compiler, TMP_REG1, 0, src_reg, srcw);49114912src_reg = saved_reg;49134914if (mem_reg == src_reg)4915mem_reg = saved_reg;4916}4917#endif /* SLJIT_CONFIG_X86_32 */49184919if (temp_reg != SLJIT_R0) {4920#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4921compiler->mode32 = 0;49224923EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_R0, 0);4924EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, 0);49254926if (src_reg == SLJIT_R0)4927src_reg = TMP_REG2;4928if (mem_reg == SLJIT_R0)4929mem_reg = TMP_REG2;4930#else /* !SLJIT_CONFIG_X86_64 */4931SLJIT_ASSERT(!swap_tmp);49324933if (src_reg == TMP_REG1) {4934if (mem_reg == SLJIT_R0) {4935EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R1, 0);4936EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_R0, 0);4937EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);49384939mem_reg = SLJIT_R1;4940saved_reg = SLJIT_R1;4941} else {4942EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);4943EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);4944saved_reg = SLJIT_R0;4945}4946} else {4947EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R0, 0);4948EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);49494950if (src_reg == SLJIT_R0)4951src_reg = TMP_REG1;4952if (mem_reg == SLJIT_R0)4953mem_reg = TMP_REG1;4954}4955#endif /* SLJIT_CONFIG_X86_64 */4956}49574958#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4959compiler->mode32 = op != SLJIT_MOV && op != SLJIT_MOV_P;4960#endif /* SLJIT_CONFIG_X86_64 */49614962/* Lock prefix. */4963FAIL_IF(emit_byte(compiler, GROUP_LOCK));49644965pref = 0;4966if (op == SLJIT_MOV_U16)4967pref = EX86_HALF_ARG | EX86_PREF_66;4968#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4969if (op == SLJIT_MOV_U8)4970pref = EX86_REX;4971#endif /* SLJIT_CONFIG_X86_64 */49724973FAIL_IF(emit_groupf(compiler, (op == SLJIT_MOV_U8 ? CMPXCHG_rm8_r : CMPXCHG_rm_r) | pref, src_reg, SLJIT_MEM1(mem_reg), 0));49744975#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)4976if (swap_tmp) {4977SLJIT_ASSERT(temp_reg == SLJIT_R0);4978FAIL_IF(emit_byte(compiler, XCHG_EAX_r | reg_map[TMP_REG1]));49794980if (saved_reg != TMP_REG1)4981return emit_mov(compiler, saved_reg, 0, SLJIT_MEM1(SLJIT_SP), 0);4982return SLJIT_SUCCESS;4983}4984#endif /* SLJIT_CONFIG_X86_32 */49854986if (temp_reg != SLJIT_R0) {4987#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)4988compiler->mode32 = 0;4989return emit_mov(compiler, SLJIT_R0, 0, TMP_REG2, 0);4990#else /* !SLJIT_CONFIG_X86_64 */4991EMIT_MOV(compiler, SLJIT_R0, 0, (saved_reg == SLJIT_R0) ? SLJIT_MEM1(SLJIT_SP) : saved_reg, 0);4992if (saved_reg == SLJIT_R1)4993return emit_mov(compiler, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_SP), 0);4994#endif /* SLJIT_CONFIG_X86_64 */4995}4996return SLJIT_SUCCESS;4997}49984999SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)5000{5001CHECK_ERROR();5002CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));5003ADJUST_LOCAL_OFFSET(dst, dstw);50045005CHECK_EXTRA_REGS(dst, dstw, (void)0);50065007#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5008compiler->mode32 = 0;5009#endif50105011ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);50125013#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5014if (NOT_HALFWORD(offset)) {5015FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));5016#if (defined SLJIT_DEBUG && SLJIT_DEBUG)5017SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);5018return compiler->error;5019#else5020return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);5021#endif5022}5023#endif50245025if (offset != 0)5026return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);5027return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);5028}50295030SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)5031{5032sljit_u8 *inst;5033struct sljit_const *const_;5034#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5035sljit_s32 reg;5036#endif50375038CHECK_ERROR_PTR();5039CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));5040ADJUST_LOCAL_OFFSET(dst, dstw);50415042CHECK_EXTRA_REGS(dst, dstw, (void)0);50435044const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));5045PTR_FAIL_IF(!const_);5046set_const(const_, compiler);50475048#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5049compiler->mode32 = 0;5050reg = FAST_IS_REG(dst) ? dst : TMP_REG1;50515052if (emit_load_imm64(compiler, reg, init_value))5053return NULL;5054#else5055if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))5056return NULL;5057#endif50585059inst = (sljit_u8*)ensure_buf(compiler, 1);5060PTR_FAIL_IF(!inst);50615062inst[0] = SLJIT_INST_CONST;50635064#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5065if (dst & SLJIT_MEM)5066if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))5067return NULL;5068#endif50695070return const_;5071}50725073SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)5074{5075struct sljit_jump *jump;5076sljit_u8 *inst;5077#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5078sljit_s32 reg;5079#endif /* SLJIT_CONFIG_X86_64 */50805081CHECK_ERROR_PTR();5082CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));5083ADJUST_LOCAL_OFFSET(dst, dstw);50845085CHECK_EXTRA_REGS(dst, dstw, (void)0);50865087jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));5088PTR_FAIL_IF(!jump);5089set_mov_addr(jump, compiler, 0);50905091#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5092compiler->mode32 = 0;5093reg = FAST_IS_REG(dst) ? dst : TMP_REG1;50945095PTR_FAIL_IF(emit_load_imm64(compiler, reg, 0));5096jump->addr = compiler->size;50975098if (reg_map[reg] >= 8)5099jump->flags |= MOV_ADDR_HI;5100#else /* !SLJIT_CONFIG_X86_64 */5101PTR_FAIL_IF(emit_mov(compiler, dst, dstw, SLJIT_IMM, 0));5102#endif /* SLJIT_CONFIG_X86_64 */51035104inst = (sljit_u8*)ensure_buf(compiler, 1);5105PTR_FAIL_IF(!inst);51065107inst[0] = SLJIT_INST_MOV_ADDR;51085109#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)5110if (dst & SLJIT_MEM)5111PTR_FAIL_IF(emit_mov(compiler, dst, dstw, TMP_REG1, 0));5112#endif /* SLJIT_CONFIG_X86_64 */51135114return jump;5115}51165117SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)5118{5119SLJIT_UNUSED_ARG(executable_offset);51205121SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0);5122#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)5123sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset));5124#else5125sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target);5126#endif5127SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1);5128}51295130SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)5131{5132SLJIT_UNUSED_ARG(executable_offset);51335134SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 0);5135sljit_unaligned_store_sw((void*)addr, new_constant);5136SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 1);5137}513851395140