Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeLOONGARCH_64.c
9913 views
/*1* Stack-less Just-In-Time compiler2*3* Copyright Zoltan Herczeg ([email protected]). All rights reserved.4*5* Redistribution and use in source and binary forms, with or without modification, are6* permitted provided that the following conditions are met:7*8* 1. Redistributions of source code must retain the above copyright notice, this list of9* conditions and the following disclaimer.10*11* 2. Redistributions in binary form must reproduce the above copyright notice, this list12* of conditions and the following disclaimer in the documentation and/or other materials13* provided with the distribution.14*15* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY16* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES17* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT18* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,19* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED20* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR21* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN22* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN23* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.24*/2526SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)27{28return "LOONGARCH" SLJIT_CPUINFO;29}3031typedef sljit_u32 sljit_ins;3233#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)34#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)35#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)36#define TMP_ZERO 03738/* Flags are kept in volatile registers. */39#define EQUAL_FLAG (SLJIT_NUMBER_OF_REGISTERS + 5)40#define RETURN_ADDR_REG TMP_REG241#define OTHER_FLAG (SLJIT_NUMBER_OF_REGISTERS + 6)4243#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)44#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)4546static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {470, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 22, 31, 30, 29, 28, 27, 26, 25, 24, 23, 3, 13, 1, 14, 12, 1548};4950static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {510, 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 31, 30, 29, 28, 27, 26, 25, 24, 8, 952};5354/* --------------------------------------------------------------------- */55/* Instrucion forms */56/* --------------------------------------------------------------------- */5758/*59LoongArch instructions are 32 bits wide, belonging to 9 basic instruction formats (and variants of them):6061| Format name | Composition |62| 2R | Opcode + Rj + Rd |63| 3R | Opcode + Rk + Rj + Rd |64| 4R | Opcode + Ra + Rk + Rj + Rd |65| 2RI8 | Opcode + I8 + Rj + Rd |66| 2RI12 | Opcode + I12 + Rj + Rd |67| 2RI14 | Opcode + I14 + Rj + Rd |68| 2RI16 | Opcode + I16 + Rj + Rd |69| 1RI21 | Opcode + I21L + Rj + I21H |70| I26 | Opcode + I26L + I26H |7172Rd is the destination register operand, while Rj, Rk and Ra (“a” stands for “additional”) are the source register operands.73I8/I12/I14/I16/I21/I26 are immediate operands of respective width. The longer I21 and I26 are stored in separate higher and74lower parts in the instruction word, denoted by the “L” and “H” suffixes. */7576#define RD(rd) ((sljit_ins)reg_map[rd])77#define RJ(rj) ((sljit_ins)reg_map[rj] << 5)78#define RK(rk) ((sljit_ins)reg_map[rk] << 10)79#define RA(ra) ((sljit_ins)reg_map[ra] << 15)8081#define FD(fd) ((sljit_ins)reg_map[fd])82#define FRD(fd) ((sljit_ins)freg_map[fd])83#define FRJ(fj) ((sljit_ins)freg_map[fj] << 5)84#define FRK(fk) ((sljit_ins)freg_map[fk] << 10)85#define FRA(fa) ((sljit_ins)freg_map[fa] << 15)8687#define IMM_V(imm) ((sljit_ins)(imm) << 10)88#define IMM_I8(imm) (((sljit_ins)(imm)&0xff) << 10)89#define IMM_I12(imm) (((sljit_ins)(imm)&0xfff) << 10)90#define IMM_I14(imm) (((sljit_ins)(imm)&0xfff3) << 10)91#define IMM_I16(imm) (((sljit_ins)(imm)&0xffff) << 10)92#define IMM_I20(imm) (((sljit_ins)(imm)&0xffffffff) >> 12 << 5)93#define IMM_I21(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x1f))94#define IMM_I26(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x3ff))9596#define OPC_I26(opc) ((sljit_ins)(opc) << 26)97#define OPC_1RI21(opc) ((sljit_ins)(opc) << 26)98#define OPC_2RI16(opc) ((sljit_ins)(opc) << 26)99#define OPC_2RI14(opc) ((sljit_ins)(opc) << 24)100#define OPC_2RI12(opc) ((sljit_ins)(opc) << 22)101#define OPC_2RI8(opc) ((sljit_ins)(opc) << 18)102#define OPC_4R(opc) ((sljit_ins)(opc) << 20)103#define OPC_3R(opc) ((sljit_ins)(opc) << 15)104#define OPC_2R(opc) ((sljit_ins)(opc) << 10)105#define OPC_1RI20(opc) ((sljit_ins)(opc) << 25)106107/* Arithmetic operation instructions */108#define ADD_W OPC_3R(0x20)109#define ADD_D OPC_3R(0x21)110#define SUB_W OPC_3R(0x22)111#define SUB_D OPC_3R(0x23)112#define ADDI_W OPC_2RI12(0xa)113#define ADDI_D OPC_2RI12(0xb)114#define ANDI OPC_2RI12(0xd)115#define ORI OPC_2RI12(0xe)116#define XORI OPC_2RI12(0xf)117#define ADDU16I_D OPC_2RI16(0x4)118#define LU12I_W OPC_1RI20(0xa)119#define LU32I_D OPC_1RI20(0xb)120#define LU52I_D OPC_2RI12(0xc)121#define SLT OPC_3R(0x24)122#define SLTU OPC_3R(0x25)123#define SLTI OPC_2RI12(0x8)124#define SLTUI OPC_2RI12(0x9)125#define PCADDI OPC_1RI20(0xc)126#define PCALAU12I OPC_1RI20(0xd)127#define PCADDU12I OPC_1RI20(0xe)128#define PCADDU18I OPC_1RI20(0xf)129#define NOR OPC_3R(0x28)130#define AND OPC_3R(0x29)131#define OR OPC_3R(0x2a)132#define XOR OPC_3R(0x2b)133#define ORN OPC_3R(0x2c)134#define ANDN OPC_3R(0x2d)135#define MUL_W OPC_3R(0x38)136#define MULH_W OPC_3R(0x39)137#define MULH_WU OPC_3R(0x3a)138#define MUL_D OPC_3R(0x3b)139#define MULH_D OPC_3R(0x3c)140#define MULH_DU OPC_3R(0x3d)141#define MULW_D_W OPC_3R(0x3e)142#define MULW_D_WU OPC_3R(0x3f)143#define DIV_W OPC_3R(0x40)144#define MOD_W OPC_3R(0x41)145#define DIV_WU OPC_3R(0x42)146#define MOD_WU OPC_3R(0x43)147#define DIV_D OPC_3R(0x44)148#define MOD_D OPC_3R(0x45)149#define DIV_DU OPC_3R(0x46)150#define MOD_DU OPC_3R(0x47)151152/* Bit-shift instructions */153#define SLL_W OPC_3R(0x2e)154#define SRL_W OPC_3R(0x2f)155#define SRA_W OPC_3R(0x30)156#define SLL_D OPC_3R(0x31)157#define SRL_D OPC_3R(0x32)158#define SRA_D OPC_3R(0x33)159#define ROTR_W OPC_3R(0x36)160#define ROTR_D OPC_3R(0x37)161#define SLLI_W OPC_3R(0x81)162#define SLLI_D ((sljit_ins)(0x41) << 16)163#define SRLI_W OPC_3R(0x89)164#define SRLI_D ((sljit_ins)(0x45) << 16)165#define SRAI_W OPC_3R(0x91)166#define SRAI_D ((sljit_ins)(0x49) << 16)167#define ROTRI_W OPC_3R(0x99)168#define ROTRI_D ((sljit_ins)(0x4d) << 16)169170/* Bit-manipulation instructions */171#define CLO_W OPC_2R(0x4)172#define CLZ_W OPC_2R(0x5)173#define CTO_W OPC_2R(0x6)174#define CTZ_W OPC_2R(0x7)175#define CLO_D OPC_2R(0x8)176#define CLZ_D OPC_2R(0x9)177#define CTO_D OPC_2R(0xa)178#define CTZ_D OPC_2R(0xb)179#define REVB_2H OPC_2R(0xc)180#define REVB_4H OPC_2R(0xd)181#define REVB_2W OPC_2R(0xe)182#define REVB_D OPC_2R(0xf)183#define REVH_2W OPC_2R(0x10)184#define REVH_D OPC_2R(0x11)185#define BITREV_4B OPC_2R(0x12)186#define BITREV_8B OPC_2R(0x13)187#define BITREV_W OPC_2R(0x14)188#define BITREV_D OPC_2R(0x15)189#define EXT_W_H OPC_2R(0x16)190#define EXT_W_B OPC_2R(0x17)191#define BSTRINS_W (0x1 << 22 | 1 << 21)192#define BSTRPICK_W (0x1 << 22 | 1 << 21 | 1 << 15)193#define BSTRINS_D (0x2 << 22)194#define BSTRPICK_D (0x3 << 22)195196/* Branch instructions */197#define BEQZ OPC_1RI21(0x10)198#define BNEZ OPC_1RI21(0x11)199#define JIRL OPC_2RI16(0x13)200#define B OPC_I26(0x14)201#define BL OPC_I26(0x15)202#define BEQ OPC_2RI16(0x16)203#define BNE OPC_2RI16(0x17)204#define BLT OPC_2RI16(0x18)205#define BGE OPC_2RI16(0x19)206#define BLTU OPC_2RI16(0x1a)207#define BGEU OPC_2RI16(0x1b)208209/* Memory access instructions */210#define LD_B OPC_2RI12(0xa0)211#define LD_H OPC_2RI12(0xa1)212#define LD_W OPC_2RI12(0xa2)213#define LD_D OPC_2RI12(0xa3)214215#define ST_B OPC_2RI12(0xa4)216#define ST_H OPC_2RI12(0xa5)217#define ST_W OPC_2RI12(0xa6)218#define ST_D OPC_2RI12(0xa7)219220#define LD_BU OPC_2RI12(0xa8)221#define LD_HU OPC_2RI12(0xa9)222#define LD_WU OPC_2RI12(0xaa)223224#define LDX_B OPC_3R(0x7000)225#define LDX_H OPC_3R(0x7008)226#define LDX_W OPC_3R(0x7010)227#define LDX_D OPC_3R(0x7018)228229#define STX_B OPC_3R(0x7020)230#define STX_H OPC_3R(0x7028)231#define STX_W OPC_3R(0x7030)232#define STX_D OPC_3R(0x7038)233234#define LDX_BU OPC_3R(0x7040)235#define LDX_HU OPC_3R(0x7048)236#define LDX_WU OPC_3R(0x7050)237238#define PRELD OPC_2RI12(0xab)239240/* Atomic memory access instructions */241#define LL_W OPC_2RI14(0x20)242#define SC_W OPC_2RI14(0x21)243#define LL_D OPC_2RI14(0x22)244#define SC_D OPC_2RI14(0x23)245246/* LoongArch V1.10 Instructions */247#define AMCAS_B OPC_3R(0x70B0)248#define AMCAS_H OPC_3R(0x70B1)249#define AMCAS_W OPC_3R(0x70B2)250#define AMCAS_D OPC_3R(0x70B3)251252/* Memory barrier instructions */253#define DBAR OPC_3R(0x70e4)254255/* Other instructions */256#define BREAK OPC_3R(0x54)257#define DBGCALL OPC_3R(0x55)258#define SYSCALL OPC_3R(0x56)259260/* Basic Floating-Point Instructions */261/* Floating-Point Arithmetic Operation Instructions */262#define FADD_S OPC_3R(0x201)263#define FADD_D OPC_3R(0x202)264#define FSUB_S OPC_3R(0x205)265#define FSUB_D OPC_3R(0x206)266#define FMUL_S OPC_3R(0x209)267#define FMUL_D OPC_3R(0x20a)268#define FDIV_S OPC_3R(0x20d)269#define FDIV_D OPC_3R(0x20e)270#define FCMP_COND_S OPC_4R(0xc1)271#define FCMP_COND_D OPC_4R(0xc2)272#define FCOPYSIGN_S OPC_3R(0x225)273#define FCOPYSIGN_D OPC_3R(0x226)274#define FSEL OPC_4R(0xd0)275#define FABS_S OPC_2R(0x4501)276#define FABS_D OPC_2R(0x4502)277#define FNEG_S OPC_2R(0x4505)278#define FNEG_D OPC_2R(0x4506)279#define FMOV_S OPC_2R(0x4525)280#define FMOV_D OPC_2R(0x4526)281282/* Floating-Point Conversion Instructions */283#define FCVT_S_D OPC_2R(0x4646)284#define FCVT_D_S OPC_2R(0x4649)285#define FTINTRZ_W_S OPC_2R(0x46a1)286#define FTINTRZ_W_D OPC_2R(0x46a2)287#define FTINTRZ_L_S OPC_2R(0x46a9)288#define FTINTRZ_L_D OPC_2R(0x46aa)289#define FFINT_S_W OPC_2R(0x4744)290#define FFINT_S_L OPC_2R(0x4746)291#define FFINT_D_W OPC_2R(0x4748)292#define FFINT_D_L OPC_2R(0x474a)293294/* Floating-Point Move Instructions */295#define FMOV_S OPC_2R(0x4525)296#define FMOV_D OPC_2R(0x4526)297#define MOVGR2FR_W OPC_2R(0x4529)298#define MOVGR2FR_D OPC_2R(0x452a)299#define MOVGR2FRH_W OPC_2R(0x452b)300#define MOVFR2GR_S OPC_2R(0x452d)301#define MOVFR2GR_D OPC_2R(0x452e)302#define MOVFRH2GR_S OPC_2R(0x452f)303#define MOVGR2FCSR OPC_2R(0x4530)304#define MOVFCSR2GR OPC_2R(0x4532)305#define MOVFR2CF OPC_2R(0x4534)306#define MOVCF2FR OPC_2R(0x4535)307#define MOVGR2CF OPC_2R(0x4536)308#define MOVCF2GR OPC_2R(0x4537)309310/* Floating-Point Branch Instructions */311#define BCEQZ OPC_I26(0x12)312#define BCNEZ OPC_I26(0x12)313314/* Floating-Point Common Memory Access Instructions */315#define FLD_S OPC_2RI12(0xac)316#define FLD_D OPC_2RI12(0xae)317#define FST_S OPC_2RI12(0xad)318#define FST_D OPC_2RI12(0xaf)319320#define FLDX_S OPC_3R(0x7060)321#define FLDX_D OPC_3R(0x7068)322#define FSTX_S OPC_3R(0x7070)323#define FSTX_D OPC_3R(0x7078)324325/* Vector Instructions */326327/* Vector Arithmetic Instructions */328#define VOR_V OPC_3R(0xe24d)329#define VXOR_V OPC_3R(0xe24e)330#define VAND_V OPC_3R(0xe24c)331#define VMSKLTZ OPC_2R(0x1ca710)332333/* Vector Memory Access Instructions */334#define VLD OPC_2RI12(0xb0)335#define VST OPC_2RI12(0xb1)336#define XVLD OPC_2RI12(0xb2)337#define XVST OPC_2RI12(0xb3)338#define VSTELM OPC_2RI8(0xc40)339340/* Vector Float Conversion Instructions */341#define VFCVTL_D_S OPC_2R(0x1ca77c)342343/* Vector Bit Manipulate Instructions */344#define VSLLWIL OPC_2R(0x1cc200)345346/* Vector Move And Shuffle Instructions */347#define VLDREPL OPC_2R(0xc0000)348#define VINSGR2VR OPC_2R(0x1cbac0)349#define VPICKVE2GR_U OPC_2R(0x1cbce0)350#define VREPLGR2VR OPC_2R(0x1ca7c0)351#define VREPLVE OPC_3R(0xe244)352#define VREPLVEI OPC_2R(0x1cbde0)353#define VSHUF_B OPC_4R(0xd5)354#define XVPERMI OPC_2RI8(0x1dfa)355356#define I12_MAX (0x7ff)357#define I12_MIN (-0x800)358#define BRANCH16_MAX (0x7fff << 2)359#define BRANCH16_MIN (-(0x8000 << 2))360#define BRANCH21_MAX (0xfffff << 2)361#define BRANCH21_MIN (-(0x100000 << 2))362#define JUMP_MAX (0x1ffffff << 2)363#define JUMP_MIN (-(0x2000000 << 2))364#define JIRL_MAX (0x7fff << 2)365#define JIRL_MIN (-(0x8000 << 2))366367#define S32_MAX (0x7fffffffl)368#define S32_MIN (-0x80000000l)369#define S52_MAX (0x7ffffffffffffl)370371#define INST(inst, type) ((sljit_ins)((type & SLJIT_32) ? inst##_W : inst##_D))372373/* LoongArch CPUCFG register for feature detection */374#define LOONGARCH_CFG2 0x02375#define LOONGARCH_CFG2_LAMCAS (1 << 28)376377static sljit_u32 cfg2_feature_list = 0;378379/* According to Software Development and Build Convention for LoongArch Architectures,380+ the status of LSX and LASX extension must be checked through HWCAP */381#include <sys/auxv.h>382383#define LOONGARCH_HWCAP_LSX (1 << 4)384#define LOONGARCH_HWCAP_LASX (1 << 5)385386static sljit_u32 hwcap_feature_list = 0;387388/* Feature type */389#define GET_CFG2 0390#define GET_HWCAP 1391392#define LOONGARCH_SUPPORT_AMCAS (LOONGARCH_CFG2_LAMCAS & get_cpu_features(GET_CFG2))393394static SLJIT_INLINE sljit_u32 get_cpu_features(sljit_u32 feature_type)395{396if (cfg2_feature_list == 0)397__asm__ ("cpucfg %0, %1" : "+&r"(cfg2_feature_list) : "r"(LOONGARCH_CFG2));398if (hwcap_feature_list == 0)399hwcap_feature_list = (sljit_u32)getauxval(AT_HWCAP);400401return feature_type ? hwcap_feature_list : cfg2_feature_list;402}403404static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)405{406sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));407FAIL_IF(!ptr);408*ptr = ins;409compiler->size++;410return SLJIT_SUCCESS;411}412413static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)414{415sljit_sw diff;416sljit_uw target_addr;417sljit_uw jump_addr = (sljit_uw)code_ptr;418sljit_uw orig_addr = jump->addr;419SLJIT_UNUSED_ARG(executable_offset);420421jump->addr = jump_addr;422if (jump->flags & SLJIT_REWRITABLE_JUMP)423goto exit;424425if (jump->flags & JUMP_ADDR)426target_addr = jump->u.target;427else {428SLJIT_ASSERT(jump->u.label != NULL);429target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);430431if (jump->u.label->size > orig_addr)432jump_addr = (sljit_uw)(code + orig_addr);433}434435diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);436437if (jump->flags & IS_COND) {438diff += SSIZE_OF(ins);439440if (diff >= BRANCH16_MIN && diff <= BRANCH16_MAX) {441code_ptr--;442code_ptr[0] = (code_ptr[0] & 0xfc0003ff) ^ 0x4000000;443jump->flags |= PATCH_B;444jump->addr = (sljit_uw)code_ptr;445return code_ptr;446}447448diff -= SSIZE_OF(ins);449}450451if (diff >= JUMP_MIN && diff <= JUMP_MAX) {452if (jump->flags & IS_COND) {453code_ptr[-1] |= (sljit_ins)IMM_I16(2);454}455456jump->flags |= PATCH_J;457return code_ptr;458}459460if (diff >= S32_MIN && diff <= S32_MAX) {461if (jump->flags & IS_COND)462code_ptr[-1] |= (sljit_ins)IMM_I16(3);463464jump->flags |= PATCH_REL32;465code_ptr[1] = code_ptr[0];466return code_ptr + 1;467}468469if (target_addr <= (sljit_uw)S32_MAX) {470if (jump->flags & IS_COND)471code_ptr[-1] |= (sljit_ins)IMM_I16(3);472473jump->flags |= PATCH_ABS32;474code_ptr[1] = code_ptr[0];475return code_ptr + 1;476}477478if (target_addr <= S52_MAX) {479if (jump->flags & IS_COND)480code_ptr[-1] |= (sljit_ins)IMM_I16(4);481482jump->flags |= PATCH_ABS52;483code_ptr[2] = code_ptr[0];484return code_ptr + 2;485}486487exit:488if (jump->flags & IS_COND)489code_ptr[-1] |= (sljit_ins)IMM_I16(5);490code_ptr[3] = code_ptr[0];491return code_ptr + 3;492}493494static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)495{496sljit_uw addr;497sljit_uw jump_addr = (sljit_uw)code_ptr;498sljit_sw diff;499SLJIT_UNUSED_ARG(executable_offset);500501SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT));502if (jump->flags & JUMP_ADDR)503addr = jump->u.target;504else {505addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);506507if (jump->u.label->size > jump->addr)508jump_addr = (sljit_uw)(code + jump->addr);509}510511diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);512513if (diff >= S32_MIN && diff <= S32_MAX) {514SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));515jump->flags |= PATCH_REL32;516return 1;517}518519if (addr <= S32_MAX) {520SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));521jump->flags |= PATCH_ABS32;522return 1;523}524525if (addr <= S52_MAX) {526SLJIT_ASSERT(jump->flags >= ((sljit_uw)2 << JUMP_SIZE_SHIFT));527jump->flags |= PATCH_ABS52;528return 2;529}530531SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT));532return 3;533}534535static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump, sljit_sw executable_offset)536{537sljit_uw flags = jump->flags;538sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;539sljit_ins *ins = (sljit_ins*)jump->addr;540sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : TMP_REG1;541SLJIT_UNUSED_ARG(executable_offset);542543if (flags & PATCH_REL32) {544addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset);545546SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX);547548if ((addr & 0x800) != 0)549addr += 0x1000;550551ins[0] = PCADDU12I | RD(reg) | IMM_I20(addr);552553if (!(flags & JUMP_MOV_ADDR)) {554SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL);555ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2);556} else557ins[1] = ADDI_D | RD(reg) | RJ(reg) | IMM_I12(addr);558return;559}560561if (flags & PATCH_ABS32) {562SLJIT_ASSERT(addr <= S32_MAX);563ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);564} else if (flags & PATCH_ABS52) {565ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);566ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);567ins += 1;568} else {569ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);570ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);571ins[2] = LU52I_D | RD(reg) | RJ(reg) | IMM_I12(addr >> 52);572ins += 2;573}574575if (!(flags & JUMP_MOV_ADDR)) {576SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL);577ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2);578} else579ins[1] = ORI | RD(reg) | RJ(reg) | IMM_I12(addr);580}581582static void reduce_code_size(struct sljit_compiler *compiler)583{584struct sljit_label *label;585struct sljit_jump *jump;586struct sljit_const *const_;587SLJIT_NEXT_DEFINE_TYPES;588sljit_uw total_size;589sljit_uw size_reduce = 0;590sljit_sw diff;591592label = compiler->labels;593jump = compiler->jumps;594const_ = compiler->consts;595596SLJIT_NEXT_INIT_TYPES();597598while (1) {599SLJIT_GET_NEXT_MIN();600601if (next_min_addr == SLJIT_MAX_ADDRESS)602break;603604if (next_min_addr == next_label_size) {605label->size -= size_reduce;606607label = label->next;608next_label_size = SLJIT_GET_NEXT_SIZE(label);609}610611if (next_min_addr == next_const_addr) {612const_->addr -= size_reduce;613const_ = const_->next;614next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);615continue;616}617618if (next_min_addr != next_jump_addr)619continue;620621jump->addr -= size_reduce;622if (!(jump->flags & JUMP_MOV_ADDR)) {623total_size = JUMP_MAX_SIZE;624625if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {626if (jump->flags & JUMP_ADDR) {627if (jump->u.target <= S32_MAX)628total_size = 2;629else if (jump->u.target <= S52_MAX)630total_size = 3;631} else {632/* Unit size: instruction. */633diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;634if (jump->u.label->size > jump->addr) {635SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);636diff -= (sljit_sw)size_reduce;637}638639if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH16_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH16_MIN / SSIZE_OF(ins)))640total_size = 0;641else if (diff >= (JUMP_MIN / SSIZE_OF(ins)) && diff <= (JUMP_MAX / SSIZE_OF(ins)))642total_size = 1;643else if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))644total_size = 2;645}646}647648size_reduce += JUMP_MAX_SIZE - total_size;649jump->flags |= total_size << JUMP_SIZE_SHIFT;650} else {651total_size = 3;652653if (!(jump->flags & JUMP_ADDR)) {654/* Real size minus 1. Unit size: instruction. */655diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;656if (jump->u.label->size > jump->addr) {657SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);658diff -= (sljit_sw)size_reduce;659}660661if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))662total_size = 1;663} else if (jump->u.target < S32_MAX)664total_size = 1;665else if (jump->u.target <= S52_MAX)666total_size = 2;667668size_reduce += 3 - total_size;669jump->flags |= total_size << JUMP_SIZE_SHIFT;670}671672jump = jump->next;673next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);674}675676compiler->size -= size_reduce;677}678679SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)680{681struct sljit_memory_fragment *buf;682sljit_ins *code;683sljit_ins *code_ptr;684sljit_ins *buf_ptr;685sljit_ins *buf_end;686sljit_uw word_count;687SLJIT_NEXT_DEFINE_TYPES;688sljit_sw executable_offset;689sljit_uw addr;690691struct sljit_label *label;692struct sljit_jump *jump;693struct sljit_const *const_;694695CHECK_ERROR_PTR();696CHECK_PTR(check_sljit_generate_code(compiler));697698reduce_code_size(compiler);699700code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset);701PTR_FAIL_WITH_EXEC_IF(code);702703reverse_buf(compiler);704buf = compiler->buf;705706code_ptr = code;707word_count = 0;708label = compiler->labels;709jump = compiler->jumps;710const_ = compiler->consts;711SLJIT_NEXT_INIT_TYPES();712SLJIT_GET_NEXT_MIN();713714do {715buf_ptr = (sljit_ins*)buf->memory;716buf_end = buf_ptr + (buf->used_size >> 2);717do {718*code_ptr = *buf_ptr++;719if (next_min_addr == word_count) {720SLJIT_ASSERT(!label || label->size >= word_count);721SLJIT_ASSERT(!jump || jump->addr >= word_count);722SLJIT_ASSERT(!const_ || const_->addr >= word_count);723724/* These structures are ordered by their address. */725if (next_min_addr == next_label_size) {726label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);727label->size = (sljit_uw)(code_ptr - code);728label = label->next;729next_label_size = SLJIT_GET_NEXT_SIZE(label);730}731732if (next_min_addr == next_jump_addr) {733if (!(jump->flags & JUMP_MOV_ADDR)) {734word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT);735code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);736SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins)));737} else {738word_count += jump->flags >> JUMP_SIZE_SHIFT;739addr = (sljit_uw)code_ptr;740code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);741jump->addr = addr;742}743jump = jump->next;744next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);745} else if (next_min_addr == next_const_addr) {746const_->addr = (sljit_uw)code_ptr;747const_ = const_->next;748next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);749}750751SLJIT_GET_NEXT_MIN();752}753code_ptr++;754word_count++;755} while (buf_ptr < buf_end);756757buf = buf->next;758} while (buf);759760if (label && label->size == word_count) {761label->u.addr = (sljit_uw)code_ptr;762label->size = (sljit_uw)(code_ptr - code);763label = label->next;764}765766SLJIT_ASSERT(!label);767SLJIT_ASSERT(!jump);768SLJIT_ASSERT(!const_);769SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);770771jump = compiler->jumps;772while (jump) {773do {774if (!(jump->flags & (PATCH_B | PATCH_J)) || (jump->flags & JUMP_MOV_ADDR)) {775load_addr_to_reg(jump, executable_offset);776break;777}778779addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;780buf_ptr = (sljit_ins *)jump->addr;781addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);782783if (jump->flags & PATCH_B) {784SLJIT_ASSERT((sljit_sw)addr >= BRANCH16_MIN && (sljit_sw)addr <= BRANCH16_MAX);785buf_ptr[0] |= (sljit_ins)IMM_I16(addr >> 2);786break;787}788789SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX);790if (jump->flags & IS_CALL)791buf_ptr[0] = BL | (sljit_ins)IMM_I26(addr >> 2);792else793buf_ptr[0] = B | (sljit_ins)IMM_I26(addr >> 2);794} while (0);795jump = jump->next;796}797798compiler->error = SLJIT_ERR_COMPILED;799compiler->executable_offset = executable_offset;800compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);801802code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);803code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);804805SLJIT_CACHE_FLUSH(code, code_ptr);806SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);807return code;808}809810SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)811{812switch (feature_type)813{814case SLJIT_HAS_FPU:815#ifdef SLJIT_IS_FPU_AVAILABLE816return (SLJIT_IS_FPU_AVAILABLE) != 0;817#else818/* Available by default. */819return 1;820#endif821822case SLJIT_HAS_LASX:823return (LOONGARCH_HWCAP_LASX & get_cpu_features(GET_HWCAP));824825case SLJIT_HAS_SIMD:826return (LOONGARCH_HWCAP_LSX & get_cpu_features(GET_HWCAP));827828case SLJIT_HAS_CLZ:829case SLJIT_HAS_CTZ:830case SLJIT_HAS_REV:831case SLJIT_HAS_ROT:832case SLJIT_HAS_PREFETCH:833case SLJIT_HAS_COPY_F32:834case SLJIT_HAS_COPY_F64:835case SLJIT_HAS_ATOMIC:836case SLJIT_HAS_MEMORY_BARRIER:837return 1;838839default:840return 0;841}842}843844SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)845{846SLJIT_UNUSED_ARG(type);847848return 0;849}850851/* --------------------------------------------------------------------- */852/* Entry, exit */853/* --------------------------------------------------------------------- */854855/* Creates an index in data_transfer_insts array. */856#define LOAD_DATA 0x01857#define WORD_DATA 0x00858#define BYTE_DATA 0x02859#define HALF_DATA 0x04860#define INT_DATA 0x06861#define SIGNED_DATA 0x08862/* Separates integer and floating point registers */863#define GPR_REG 0x0f864#define DOUBLE_DATA 0x10865#define SINGLE_DATA 0x12866867#define MEM_MASK 0x1f868869#define ARG_TEST 0x00020870#define ALT_KEEP_CACHE 0x00040871#define CUMULATIVE_OP 0x00080872#define IMM_OP 0x00100873#define MOVE_OP 0x00200874#define SRC2_IMM 0x00400875876#define UNUSED_DEST 0x00800877#define REG_DEST 0x01000878#define REG1_SOURCE 0x02000879#define REG2_SOURCE 0x04000880#define SLOW_SRC1 0x08000881#define SLOW_SRC2 0x10000882#define SLOW_DEST 0x20000883#define MEM_USE_TMP2 0x40000884885#define STACK_STORE ST_D886#define STACK_LOAD LD_D887888static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm)889{890if (imm <= I12_MAX && imm >= I12_MIN)891return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(TMP_ZERO) | IMM_I12(imm));892893if (imm <= 0x7fffffffl && imm >= -0x80000000l) {894FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));895return push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm));896} else if (imm <= 0x7ffffffffffffl && imm >= -0x8000000000000l) {897FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));898FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));899return push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5));900}901FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));902FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));903FAIL_IF(push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5)));904return push_inst(compiler, LU52I_D | RD(dst_r) | RJ(dst_r) | IMM_I12(imm >> 52));905}906907#define STACK_MAX_DISTANCE (-I12_MIN)908909static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw);910911SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,912sljit_s32 options, sljit_s32 arg_types,913sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)914{915sljit_s32 fscratches;916sljit_s32 fsaveds;917sljit_s32 i, tmp, offset;918sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);919920CHECK_ERROR();921CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));922set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);923924scratches = ENTER_GET_REGS(scratches);925saveds = ENTER_GET_REGS(saveds);926fscratches = compiler->fscratches;927fsaveds = compiler->fsaveds;928local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);929local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);930931local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;932compiler->local_size = local_size;933934if (local_size <= STACK_MAX_DISTANCE) {935/* Frequent case. */936FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));937offset = local_size - SSIZE_OF(sw);938local_size = 0;939} else {940FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(STACK_MAX_DISTANCE)));941local_size -= STACK_MAX_DISTANCE;942943if (local_size > STACK_MAX_DISTANCE)944FAIL_IF(load_immediate(compiler, TMP_REG1, local_size));945offset = STACK_MAX_DISTANCE - SSIZE_OF(sw);946}947948FAIL_IF(push_inst(compiler, STACK_STORE | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));949950tmp = SLJIT_S0 - saveds;951for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {952offset -= SSIZE_OF(sw);953FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));954}955956for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {957offset -= SSIZE_OF(sw);958FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));959}960961tmp = SLJIT_FS0 - fsaveds;962for (i = SLJIT_FS0; i > tmp; i--) {963offset -= SSIZE_OF(f64);964FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));965}966967for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {968offset -= SSIZE_OF(f64);969FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));970}971972if (local_size > STACK_MAX_DISTANCE)973FAIL_IF(push_inst(compiler, SUB_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG1)));974else if (local_size > 0)975FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));976977if (options & SLJIT_ENTER_REG_ARG)978return SLJIT_SUCCESS;979980arg_types >>= SLJIT_ARG_SHIFT;981saved_arg_count = 0;982tmp = SLJIT_R0;983984while (arg_types > 0) {985if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {986if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {987FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_S0 - saved_arg_count) | RJ(tmp) | IMM_I12(0)));988saved_arg_count++;989}990tmp++;991}992993arg_types >>= SLJIT_ARG_SHIFT;994}995996return SLJIT_SUCCESS;997}998999#undef STACK_MAX_DISTANCE10001001SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,1002sljit_s32 options, sljit_s32 arg_types,1003sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)1004{1005sljit_s32 fscratches;1006sljit_s32 fsaveds;10071008CHECK_ERROR();1009CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));1010set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);10111012scratches = ENTER_GET_REGS(scratches);1013saveds = ENTER_GET_REGS(saveds);1014fscratches = compiler->fscratches;1015fsaveds = compiler->fsaveds;1016local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);1017local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);10181019compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;10201021return SLJIT_SUCCESS;1022}10231024#define STACK_MAX_DISTANCE (-I12_MIN - 16)10251026static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)1027{1028sljit_s32 i, tmp, offset;1029sljit_s32 local_size = compiler->local_size;10301031if (local_size > STACK_MAX_DISTANCE) {1032local_size -= STACK_MAX_DISTANCE;10331034if (local_size > STACK_MAX_DISTANCE) {1035FAIL_IF(load_immediate(compiler, TMP_REG2, local_size));1036FAIL_IF(push_inst(compiler, ADD_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG2)));1037} else1038FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size)));10391040local_size = STACK_MAX_DISTANCE;1041}10421043SLJIT_ASSERT(local_size > 0);10441045offset = local_size - SSIZE_OF(sw);1046if (!is_return_to)1047FAIL_IF(push_inst(compiler, STACK_LOAD | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));10481049tmp = SLJIT_S0 - compiler->saveds;1050for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {1051offset -= SSIZE_OF(sw);1052FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));1053}10541055for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {1056offset -= SSIZE_OF(sw);1057FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));1058}10591060tmp = SLJIT_FS0 - compiler->fsaveds;1061for (i = SLJIT_FS0; i > tmp; i--) {1062offset -= SSIZE_OF(f64);1063FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));1064}10651066for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {1067offset -= SSIZE_OF(f64);1068FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));1069}10701071return push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size));1072}10731074#undef STACK_MAX_DISTANCE10751076SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)1077{1078CHECK_ERROR();1079CHECK(check_sljit_emit_return_void(compiler));10801081FAIL_IF(emit_stack_frame_release(compiler, 0));1082return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));1083}10841085SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,1086sljit_s32 src, sljit_sw srcw)1087{1088CHECK_ERROR();1089CHECK(check_sljit_emit_return_to(compiler, src, srcw));10901091if (src & SLJIT_MEM) {1092ADJUST_LOCAL_OFFSET(src, srcw);1093FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));1094src = TMP_REG1;1095srcw = 0;1096} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {1097FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));1098src = TMP_REG1;1099srcw = 0;1100}11011102FAIL_IF(emit_stack_frame_release(compiler, 1));11031104SLJIT_SKIP_CHECKS(compiler);1105return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);1106}11071108/* --------------------------------------------------------------------- */1109/* Operators */1110/* --------------------------------------------------------------------- */11111112static const sljit_ins data_transfer_insts[16 + 4] = {1113/* u w s */ ST_D /* st.d */,1114/* u w l */ LD_D /* ld.d */,1115/* u b s */ ST_B /* st.b */,1116/* u b l */ LD_BU /* ld.bu */,1117/* u h s */ ST_H /* st.h */,1118/* u h l */ LD_HU /* ld.hu */,1119/* u i s */ ST_W /* st.w */,1120/* u i l */ LD_WU /* ld.wu */,11211122/* s w s */ ST_D /* st.d */,1123/* s w l */ LD_D /* ld.d */,1124/* s b s */ ST_B /* st.b */,1125/* s b l */ LD_B /* ld.b */,1126/* s h s */ ST_H /* st.h */,1127/* s h l */ LD_H /* ld.h */,1128/* s i s */ ST_W /* st.w */,1129/* s i l */ LD_W /* ld.w */,11301131/* d s */ FST_D /* fst.d */,1132/* d l */ FLD_D /* fld.d */,1133/* s s */ FST_S /* fst.s */,1134/* s l */ FLD_S /* fld.s */,1135};11361137static const sljit_ins data_transfer_insts_x[16 + 4] = {1138/* u w s */ STX_D /* stx.d */,1139/* u w l */ LDX_D /* ldx.d */,1140/* u b s */ STX_B /* stx.b */,1141/* u b l */ LDX_BU /* ldx.bu */,1142/* u h s */ STX_H /* stx.h */,1143/* u h l */ LDX_HU /* ldx.hu */,1144/* u i s */ STX_W /* stx.w */,1145/* u i l */ LDX_WU /* ldx.wu */,11461147/* s w s */ STX_D /* stx.d */,1148/* s w l */ LDX_D /* ldx.d */,1149/* s b s */ STX_B /* stx.b */,1150/* s b l */ LDX_B /* ldx.b */,1151/* s h s */ STX_H /* stx.h */,1152/* s h l */ LDX_H /* ldx.h */,1153/* s i s */ STX_W /* stx.w */,1154/* s i l */ LDX_W /* ldx.w */,11551156/* d s */ FSTX_D /* fstx.d */,1157/* d l */ FLDX_D /* fldx.d */,1158/* s s */ FSTX_S /* fstx.s */,1159/* s l */ FLDX_S /* fldx.s */,1160};11611162static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)1163{1164sljit_ins ins;1165sljit_s32 base = arg & REG_MASK;11661167SLJIT_ASSERT(arg & SLJIT_MEM);11681169if (arg & OFFS_REG_MASK) {1170sljit_s32 offs = OFFS_REG(arg);11711172SLJIT_ASSERT(!argw);1173ins = data_transfer_insts_x[flags & MEM_MASK] |1174((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |1175RJ(base) | RK(offs);1176} else {1177SLJIT_ASSERT(argw <= 0xfff && argw >= I12_MIN);11781179ins = data_transfer_insts[flags & MEM_MASK] |1180((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |1181RJ(base) | IMM_I12(argw);1182}1183return push_inst(compiler, ins);1184}11851186/* Can perform an operation using at most 1 instruction. */1187static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)1188{1189SLJIT_ASSERT(arg & SLJIT_MEM);11901191/* argw == 0 (ldx/stx rd, rj, rk) can be used.1192* argw in [-2048, 2047] (ld/st rd, rj, imm) can be used. */1193if (!argw || (!(arg & OFFS_REG_MASK) && (argw <= I12_MAX && argw >= I12_MIN))) {1194/* Works for both absolute and relative addresses. */1195if (SLJIT_UNLIKELY(flags & ARG_TEST))1196return 1;11971198FAIL_IF(push_mem_inst(compiler, flags, reg, arg, argw));1199return -1;1200}1201return 0;1202}12031204#define TO_ARGW_HI(argw) (((argw) & ~0xfff) + (((argw) & 0x800) ? 0x1000 : 0))12051206/* See getput_arg below.1207Note: can_cache is called only for binary operators. */1208static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)1209{1210SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));12111212if (arg & OFFS_REG_MASK)1213return 0;12141215if (arg == next_arg) {1216if (((next_argw - argw) <= I12_MAX && (next_argw - argw) >= I12_MIN)1217|| TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw))1218return 1;1219return 0;1220}12211222return 0;1223}12241225/* Emit the necessary instructions. See can_cache above. */1226static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)1227{1228sljit_s32 base = arg & REG_MASK;1229sljit_s32 tmp_r = (flags & MEM_USE_TMP2) ? TMP_REG2 : TMP_REG1;1230sljit_sw offset;12311232SLJIT_ASSERT(arg & SLJIT_MEM);1233if (!(next_arg & SLJIT_MEM)) {1234next_arg = 0;1235next_argw = 0;1236}12371238if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {1239argw &= 0x3;12401241if (SLJIT_UNLIKELY(argw))1242FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));1243return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);1244}12451246if (compiler->cache_arg == arg && argw - compiler->cache_argw <= I12_MAX && argw - compiler->cache_argw >= I12_MIN)1247return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), argw - compiler->cache_argw);12481249if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw <= I12_MAX) && (argw - compiler->cache_argw >= I12_MIN)) {1250offset = argw - compiler->cache_argw;1251} else {1252sljit_sw argw_hi=TO_ARGW_HI(argw);1253compiler->cache_arg = SLJIT_MEM;12541255if (next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN && argw_hi != TO_ARGW_HI(next_argw)) {1256FAIL_IF(load_immediate(compiler, TMP_REG3, argw));1257compiler->cache_argw = argw;1258offset = 0;1259} else {1260FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi));1261compiler->cache_argw = argw_hi;1262offset = argw & 0xfff;1263argw = argw_hi;1264}1265}12661267if (!base)1268return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);12691270if (arg == next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN) {1271compiler->cache_arg = arg;1272FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(base)));1273return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);1274}12751276if (!offset)1277return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);12781279FAIL_IF(push_inst(compiler, ADD_D | RD(tmp_r) | RJ(TMP_REG3) | RK(base)));1280return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), offset);1281}12821283static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)1284{1285sljit_s32 base = arg & REG_MASK;1286sljit_s32 tmp_r = TMP_REG1;12871288if (getput_arg_fast(compiler, flags, reg, arg, argw))1289return compiler->error;12901291if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))1292tmp_r = reg;12931294if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {1295argw &= 0x3;12961297if (SLJIT_UNLIKELY(argw))1298FAIL_IF(push_inst(compiler, SLLI_D | RD(tmp_r) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));1299return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);1300} else {1301FAIL_IF(load_immediate(compiler, tmp_r, argw));13021303if (base != 0)1304return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);1305return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), 0);1306}1307}13081309static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)1310{1311if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))1312return compiler->error;1313return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);1314}13151316#define IMM_EXTEND(v) (IMM_I12((op & SLJIT_32) ? (v) : (32 + (v))))13171318/* andi/ori/xori are zero-extended */1319#define EMIT_LOGICAL(op_imm, op_reg) \1320if (flags & SRC2_IMM) { \1321if (op & SLJIT_SET_Z) {\1322FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(src2))); \1323FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); \1324} \1325if (!(flags & UNUSED_DEST)) { \1326if (dst == src1) { \1327FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(TMP_ZERO) | IMM_I12(src2))); \1328FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(TMP_REG1))); \1329} else { \1330FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(TMP_ZERO) | IMM_I12(src2))); \1331FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(dst))); \1332} \1333} \1334} else { \1335if (op & SLJIT_SET_Z) \1336FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); \1337if (!(flags & UNUSED_DEST)) \1338FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2))); \1339} \1340while (0)13411342#define EMIT_SHIFT(imm, reg) \1343op_imm = (imm); \1344op_reg = (reg)13451346static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,1347sljit_s32 dst, sljit_s32 src1, sljit_sw src2)1348{1349sljit_s32 is_overflow, is_carry, carry_src_r, is_handled, reg;1350sljit_ins op_imm, op_reg;1351sljit_ins word_size = ((op & SLJIT_32) ? 32 : 64);13521353switch (GET_OPCODE(op)) {1354case SLJIT_MOV:1355SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));1356if (dst != src2)1357return push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src2) | IMM_I12(0));1358return SLJIT_SUCCESS;13591360case SLJIT_MOV_U8:1361SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));1362if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))1363return push_inst(compiler, ANDI | RD(dst) | RJ(src2) | IMM_I12(0xff));1364SLJIT_ASSERT(dst == src2);1365return SLJIT_SUCCESS;13661367case SLJIT_MOV_S8:1368SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));1369if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))1370return push_inst(compiler, EXT_W_B | RD(dst) | RJ(src2));1371SLJIT_ASSERT(dst == src2);1372return SLJIT_SUCCESS;13731374case SLJIT_MOV_U16:1375SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));1376if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))1377return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(src2) | (15 << 16));1378SLJIT_ASSERT(dst == src2);1379return SLJIT_SUCCESS;13801381case SLJIT_MOV_S16:1382SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));1383if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))1384return push_inst(compiler, EXT_W_H | RD(dst) | RJ(src2));1385SLJIT_ASSERT(dst == src2);1386return SLJIT_SUCCESS;13871388case SLJIT_MOV_U32:1389SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));1390if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))1391return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(src2) | (31 << 16));1392SLJIT_ASSERT(dst == src2);1393return SLJIT_SUCCESS;13941395case SLJIT_MOV_S32:1396SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));1397if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))1398return push_inst(compiler, SLLI_W | RD(dst) | RJ(src2) | IMM_I12(0));1399SLJIT_ASSERT(dst == src2);1400return SLJIT_SUCCESS;14011402case SLJIT_CLZ:1403SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));1404return push_inst(compiler, INST(CLZ, op) | RD(dst) | RJ(src2));14051406case SLJIT_CTZ:1407SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));1408return push_inst(compiler, INST(CTZ, op) | RD(dst) | RJ(src2));14091410case SLJIT_REV:1411SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));1412return push_inst(compiler, ((op & SLJIT_32) ? REVB_2W : REVB_D) | RD(dst) | RJ(src2));14131414case SLJIT_REV_S16:1415SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));1416FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));1417return push_inst(compiler, EXT_W_H | RD(dst) | RJ(dst));14181419case SLJIT_REV_U16:1420SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));1421FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));1422return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(dst) | (15 << 16));14231424case SLJIT_REV_S32:1425SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1);1426FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));1427return push_inst(compiler, SLLI_W | RD(dst) | RJ(dst) | IMM_I12(0));14281429case SLJIT_REV_U32:1430SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1);1431FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));1432return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(dst) | (31 << 16));14331434case SLJIT_ADD:1435/* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */1436is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;1437carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;14381439if (flags & SRC2_IMM) {1440if (is_overflow) {1441if (src2 >= 0)1442FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));1443else {1444FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(-1)));1445FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));1446}1447} else if (op & SLJIT_SET_Z)1448FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));14491450/* Only the zero flag is needed. */1451if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))1452FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(src2)));1453} else {1454if (is_overflow)1455FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));1456else if (op & SLJIT_SET_Z)1457FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));14581459if (is_overflow || carry_src_r != 0) {1460if (src1 != dst)1461carry_src_r = (sljit_s32)src1;1462else if (src2 != dst)1463carry_src_r = (sljit_s32)src2;1464else {1465FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(0)));1466carry_src_r = OTHER_FLAG;1467}1468}14691470/* Only the zero flag is needed. */1471if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))1472FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src1) | RK(src2)));1473}14741475/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */1476if (is_overflow || carry_src_r != 0) {1477if (flags & SRC2_IMM)1478FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(dst) | IMM_I12(src2)));1479else1480FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(carry_src_r)));1481}14821483if (!is_overflow)1484return SLJIT_SUCCESS;14851486FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));1487if (op & SLJIT_SET_Z)1488FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));1489FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));1490return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));14911492case SLJIT_ADDC:1493carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;14941495if (flags & SRC2_IMM) {1496FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(src1) | IMM_I12(src2)));1497} else {1498if (carry_src_r != 0) {1499if (src1 != dst)1500carry_src_r = (sljit_s32)src1;1501else if (src2 != dst)1502carry_src_r = (sljit_s32)src2;1503else {1504FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));1505carry_src_r = EQUAL_FLAG;1506}1507}15081509FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(src1) | RK(src2)));1510}15111512/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */1513if (carry_src_r != 0) {1514if (flags & SRC2_IMM)1515FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(src2)));1516else1517FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(dst) | RK(carry_src_r)));1518}15191520FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));15211522if (carry_src_r == 0)1523return SLJIT_SUCCESS;15241525/* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */1526FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG)));1527/* Set carry flag. */1528return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(EQUAL_FLAG));15291530case SLJIT_SUB:1531if ((flags & SRC2_IMM) && src2 == I12_MIN) {1532FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));1533src2 = TMP_REG2;1534flags &= ~SRC2_IMM;1535}15361537is_handled = 0;15381539if (flags & SRC2_IMM) {1540if (GET_FLAG_TYPE(op) == SLJIT_LESS) {1541FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));1542is_handled = 1;1543} else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) {1544FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));1545is_handled = 1;1546}1547}15481549if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {1550is_handled = 1;15511552if (flags & SRC2_IMM) {1553reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;1554FAIL_IF(push_inst(compiler, ADDI_D | RD(reg) | RJ(TMP_ZERO) | IMM_I12(src2)));1555src2 = reg;1556flags &= ~SRC2_IMM;1557}15581559switch (GET_FLAG_TYPE(op)) {1560case SLJIT_LESS:1561FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));1562break;1563case SLJIT_GREATER:1564FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));1565break;1566case SLJIT_SIG_LESS:1567FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));1568break;1569case SLJIT_SIG_GREATER:1570FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));1571break;1572}1573}15741575if (is_handled) {1576if (flags & SRC2_IMM) {1577if (op & SLJIT_SET_Z)1578FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));1579if (!(flags & UNUSED_DEST))1580return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2));1581} else {1582if (op & SLJIT_SET_Z)1583FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));1584if (!(flags & UNUSED_DEST))1585return push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2));1586}1587return SLJIT_SUCCESS;1588}15891590is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;1591is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;15921593if (flags & SRC2_IMM) {1594if (is_overflow) {1595if (src2 >= 0)1596FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));1597else {1598FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-1)));1599FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));1600}1601} else if (op & SLJIT_SET_Z)1602FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));16031604if (is_overflow || is_carry)1605FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));16061607/* Only the zero flag is needed. */1608if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))1609FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));1610} else {1611if (is_overflow)1612FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));1613else if (op & SLJIT_SET_Z)1614FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));16151616if (is_overflow || is_carry)1617FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));16181619/* Only the zero flag is needed. */1620if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))1621FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));1622}16231624if (!is_overflow)1625return SLJIT_SUCCESS;16261627FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));1628if (op & SLJIT_SET_Z)1629FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));1630FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));1631return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));16321633case SLJIT_SUBC:1634if ((flags & SRC2_IMM) && src2 == I12_MIN) {1635FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));1636src2 = TMP_REG2;1637flags &= ~SRC2_IMM;1638}16391640is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;16411642if (flags & SRC2_IMM) {1643if (is_carry)1644FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));16451646FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));1647} else {1648if (is_carry)1649FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));16501651FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));1652}16531654if (is_carry)1655FAIL_IF(push_inst(compiler, SLTU | RD(TMP_REG1) | RJ(dst) | RK(OTHER_FLAG)));16561657FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));16581659if (!is_carry)1660return SLJIT_SUCCESS;16611662return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(TMP_REG1));16631664case SLJIT_MUL:1665SLJIT_ASSERT(!(flags & SRC2_IMM));16661667if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW)1668return push_inst(compiler, INST(MUL, op) | RD(dst) | RJ(src1) | RK(src2));16691670if (op & SLJIT_32) {1671FAIL_IF(push_inst(compiler, MUL_D | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));1672FAIL_IF(push_inst(compiler, MUL_W | RD(dst) | RJ(src1) | RK(src2)));1673return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG));1674}16751676FAIL_IF(push_inst(compiler, MULH_D | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));1677FAIL_IF(push_inst(compiler, MUL_D | RD(dst) | RJ(src1) | RK(src2)));1678FAIL_IF(push_inst(compiler, SRAI_D | RD(OTHER_FLAG) | RJ(dst) | IMM_I12((63))));1679return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(OTHER_FLAG));16801681case SLJIT_AND:1682EMIT_LOGICAL(ANDI, AND);1683return SLJIT_SUCCESS;16841685case SLJIT_OR:1686EMIT_LOGICAL(ORI, OR);1687return SLJIT_SUCCESS;16881689case SLJIT_XOR:1690EMIT_LOGICAL(XORI, XOR);1691return SLJIT_SUCCESS;16921693case SLJIT_SHL:1694case SLJIT_MSHL:1695if (op & SLJIT_32) {1696EMIT_SHIFT(SLLI_W, SLL_W);1697} else {1698EMIT_SHIFT(SLLI_D, SLL_D);1699}1700break;17011702case SLJIT_LSHR:1703case SLJIT_MLSHR:1704if (op & SLJIT_32) {1705EMIT_SHIFT(SRLI_W, SRL_W);1706} else {1707EMIT_SHIFT(SRLI_D, SRL_D);1708}1709break;17101711case SLJIT_ASHR:1712case SLJIT_MASHR:1713if (op & SLJIT_32) {1714EMIT_SHIFT(SRAI_W, SRA_W);1715} else {1716EMIT_SHIFT(SRAI_D, SRA_D);1717}1718break;17191720case SLJIT_ROTL:1721case SLJIT_ROTR:1722if (flags & SRC2_IMM) {1723SLJIT_ASSERT(src2 != 0);17241725if (GET_OPCODE(op) == SLJIT_ROTL)1726src2 = word_size - src2;1727return push_inst(compiler, INST(ROTRI, op) | RD(dst) | RJ(src1) | IMM_I12(src2));1728}17291730if (src2 == TMP_ZERO) {1731if (dst != src1)1732return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(0));1733return SLJIT_SUCCESS;1734}17351736if (GET_OPCODE(op) == SLJIT_ROTL) {1737FAIL_IF(push_inst(compiler, INST(SUB, op)| RD(OTHER_FLAG) | RJ(TMP_ZERO) | RK(src2)));1738src2 = OTHER_FLAG;1739}1740return push_inst(compiler, INST(ROTR, op) | RD(dst) | RJ(src1) | RK(src2));17411742default:1743SLJIT_UNREACHABLE();1744return SLJIT_SUCCESS;1745}17461747if (flags & SRC2_IMM) {1748if (op & SLJIT_SET_Z)1749FAIL_IF(push_inst(compiler, op_imm | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));17501751if (flags & UNUSED_DEST)1752return SLJIT_SUCCESS;1753return push_inst(compiler, op_imm | RD(dst) | RJ(src1) | IMM_I12(src2));1754}17551756if (op & SLJIT_SET_Z)1757FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));17581759if (flags & UNUSED_DEST)1760return SLJIT_SUCCESS;1761return push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2));1762}17631764#undef IMM_EXTEND17651766static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,1767sljit_s32 dst, sljit_sw dstw,1768sljit_s32 src1, sljit_sw src1w,1769sljit_s32 src2, sljit_sw src2w)1770{1771/* arg1 goes to TMP_REG1 or src reg1772arg2 goes to TMP_REG2, imm or src reg1773TMP_REG3 can be used for caching1774result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */1775sljit_s32 dst_r = TMP_REG2;1776sljit_s32 src1_r;1777sljit_sw src2_r = 0;1778sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2;17791780if (!(flags & ALT_KEEP_CACHE)) {1781compiler->cache_arg = 0;1782compiler->cache_argw = 0;1783}17841785if (dst == 0) {1786SLJIT_ASSERT(HAS_FLAGS(op));1787flags |= UNUSED_DEST;1788dst = TMP_REG2;1789} else if (FAST_IS_REG(dst)) {1790dst_r = dst;1791flags |= REG_DEST;1792if (flags & MOVE_OP)1793src2_tmp_reg = dst_r;1794} else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))1795flags |= SLOW_DEST;17961797if (flags & IMM_OP) {1798if (src2 == SLJIT_IMM && src2w != 0 && src2w <= I12_MAX && src2w >= I12_MIN) {1799flags |= SRC2_IMM;1800src2_r = src2w;1801} else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) {1802flags |= SRC2_IMM;1803src2_r = src1w;18041805/* And swap arguments. */1806src1 = src2;1807src1w = src2w;1808src2 = SLJIT_IMM;1809/* src2w = src2_r unneeded. */1810}1811}18121813/* Source 1. */1814if (FAST_IS_REG(src1)) {1815src1_r = src1;1816flags |= REG1_SOURCE;1817} else if (src1 == SLJIT_IMM) {1818if (src1w) {1819FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));1820src1_r = TMP_REG1;1821}1822else1823src1_r = TMP_ZERO;1824} else {1825if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))1826FAIL_IF(compiler->error);1827else1828flags |= SLOW_SRC1;1829src1_r = TMP_REG1;1830}18311832/* Source 2. */1833if (FAST_IS_REG(src2)) {1834src2_r = src2;1835flags |= REG2_SOURCE;1836if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)1837dst_r = (sljit_s32)src2_r;1838} else if (src2 == SLJIT_IMM) {1839if (!(flags & SRC2_IMM)) {1840if (src2w) {1841FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w));1842src2_r = src2_tmp_reg;1843} else {1844src2_r = TMP_ZERO;1845if (flags & MOVE_OP) {1846if (dst & SLJIT_MEM)1847dst_r = 0;1848else1849op = SLJIT_MOV;1850}1851}1852}1853} else {1854if (getput_arg_fast(compiler, flags | LOAD_DATA, src2_tmp_reg, src2, src2w))1855FAIL_IF(compiler->error);1856else1857flags |= SLOW_SRC2;18581859src2_r = src2_tmp_reg;1860}18611862if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {1863SLJIT_ASSERT(src2_r == TMP_REG2);1864if ((flags & SLOW_DEST) && !can_cache(src2, src2w, src1, src1w) && can_cache(src2, src2w, dst, dstw)) {1865FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));1866FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | MEM_USE_TMP2, TMP_REG2, src2, src2w, dst, dstw));1867} else {1868FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));1869FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));1870}1871}1872else if (flags & SLOW_SRC1)1873FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));1874else if (flags & SLOW_SRC2)1875FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | ((src1_r == TMP_REG1) ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, dst, dstw));18761877FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));18781879if (dst & SLJIT_MEM) {1880if (!(flags & SLOW_DEST)) {1881getput_arg_fast(compiler, flags, dst_r, dst, dstw);1882return compiler->error;1883}1884return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);1885}18861887return SLJIT_SUCCESS;1888}18891890SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)1891{1892CHECK_ERROR();1893CHECK(check_sljit_emit_op0(compiler, op));18941895switch (GET_OPCODE(op)) {1896case SLJIT_BREAKPOINT:1897return push_inst(compiler, BREAK);1898case SLJIT_NOP:1899return push_inst(compiler, ANDI | RD(TMP_ZERO) | RJ(TMP_ZERO) | IMM_I12(0));1900case SLJIT_LMUL_UW:1901FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));1902FAIL_IF(push_inst(compiler, MULH_DU | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));1903return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));1904case SLJIT_LMUL_SW:1905FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));1906FAIL_IF(push_inst(compiler, MULH_D | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));1907return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));1908case SLJIT_DIVMOD_UW:1909FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));1910FAIL_IF(push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));1911return push_inst(compiler, ((op & SLJIT_32)? MOD_WU: MOD_DU) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));1912case SLJIT_DIVMOD_SW:1913FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));1914FAIL_IF(push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));1915return push_inst(compiler, INST(MOD, op) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));1916case SLJIT_DIV_UW:1917return push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));1918case SLJIT_DIV_SW:1919return push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));1920case SLJIT_MEMORY_BARRIER:1921return push_inst(compiler, DBAR);1922case SLJIT_ENDBR:1923case SLJIT_SKIP_FRAMES_BEFORE_RETURN:1924return SLJIT_SUCCESS;1925}19261927SLJIT_UNREACHABLE();1928return SLJIT_ERR_UNSUPPORTED;1929}19301931SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,1932sljit_s32 dst, sljit_sw dstw,1933sljit_s32 src, sljit_sw srcw)1934{1935sljit_s32 flags = 0;19361937CHECK_ERROR();1938CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));1939ADJUST_LOCAL_OFFSET(dst, dstw);1940ADJUST_LOCAL_OFFSET(src, srcw);19411942if (op & SLJIT_32)1943flags = INT_DATA | SIGNED_DATA;19441945switch (GET_OPCODE(op)) {1946case SLJIT_MOV:1947case SLJIT_MOV_P:1948return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, srcw);19491950case SLJIT_MOV_U32:1951return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw);19521953case SLJIT_MOV_S32:1954/* Logical operators have no W variant, so sign extended input is necessary for them. */1955case SLJIT_MOV32:1956return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw);19571958case SLJIT_MOV_U8:1959return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);19601961case SLJIT_MOV_S8:1962return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);19631964case SLJIT_MOV_U16:1965return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);19661967case SLJIT_MOV_S16:1968return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);19691970case SLJIT_CLZ:1971case SLJIT_CTZ:1972case SLJIT_REV:1973return emit_op(compiler, op, flags, dst, dstw, TMP_ZERO, 0, src, srcw);19741975case SLJIT_REV_U16:1976case SLJIT_REV_S16:1977return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);19781979case SLJIT_REV_U32:1980case SLJIT_REV_S32:1981return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);1982}19831984SLJIT_UNREACHABLE();1985return SLJIT_SUCCESS;1986}19871988SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,1989sljit_s32 dst, sljit_sw dstw,1990sljit_s32 src1, sljit_sw src1w,1991sljit_s32 src2, sljit_sw src2w)1992{1993sljit_s32 flags = 0;19941995CHECK_ERROR();1996CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));1997ADJUST_LOCAL_OFFSET(dst, dstw);1998ADJUST_LOCAL_OFFSET(src1, src1w);1999ADJUST_LOCAL_OFFSET(src2, src2w);20002001if (op & SLJIT_32) {2002flags |= INT_DATA | SIGNED_DATA;2003if (src1 == SLJIT_IMM)2004src1w = (sljit_s32)src1w;2005if (src2 == SLJIT_IMM)2006src2w = (sljit_s32)src2w;2007}200820092010switch (GET_OPCODE(op)) {2011case SLJIT_ADD:2012case SLJIT_ADDC:2013compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;2014return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);20152016case SLJIT_SUB:2017case SLJIT_SUBC:2018compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;2019return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);20202021case SLJIT_MUL:2022compiler->status_flags_state = 0;2023return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);20242025case SLJIT_AND:2026case SLJIT_OR:2027case SLJIT_XOR:2028return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);20292030case SLJIT_SHL:2031case SLJIT_MSHL:2032case SLJIT_LSHR:2033case SLJIT_MLSHR:2034case SLJIT_ASHR:2035case SLJIT_MASHR:2036case SLJIT_ROTL:2037case SLJIT_ROTR:2038if (src2 == SLJIT_IMM) {2039if (op & SLJIT_32)2040src2w &= 0x1f;2041else2042src2w &= 0x3f;2043}20442045return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);2046}20472048SLJIT_UNREACHABLE();2049return SLJIT_SUCCESS;2050}20512052SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,2053sljit_s32 src1, sljit_sw src1w,2054sljit_s32 src2, sljit_sw src2w)2055{2056CHECK_ERROR();2057CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));20582059SLJIT_SKIP_CHECKS(compiler);2060return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w);2061}20622063SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,2064sljit_s32 dst_reg,2065sljit_s32 src1, sljit_sw src1w,2066sljit_s32 src2, sljit_sw src2w)2067{2068CHECK_ERROR();2069CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));20702071switch (GET_OPCODE(op)) {2072case SLJIT_MULADD:2073SLJIT_SKIP_CHECKS(compiler);2074FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w));2075return push_inst(compiler, ADD_D | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG2));2076}20772078return SLJIT_SUCCESS;2079}20802081SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,2082sljit_s32 dst_reg,2083sljit_s32 src1_reg,2084sljit_s32 src2_reg,2085sljit_s32 src3, sljit_sw src3w)2086{2087sljit_s32 is_left;2088sljit_ins ins1, ins2, ins3;2089sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;2090sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;209120922093CHECK_ERROR();2094CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));20952096is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);20972098if (src1_reg == src2_reg) {2099SLJIT_SKIP_CHECKS(compiler);2100return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);2101}21022103ADJUST_LOCAL_OFFSET(src3, src3w);21042105if (src3 == SLJIT_IMM) {2106src3w &= bit_length - 1;21072108if (src3w == 0)2109return SLJIT_SUCCESS;21102111if (is_left) {2112ins1 = INST(SLLI, op) | IMM_I12(src3w);2113src3w = bit_length - src3w;2114ins2 = INST(SRLI, op) | IMM_I12(src3w);2115} else {2116ins1 = INST(SRLI, op) | IMM_I12(src3w);2117src3w = bit_length - src3w;2118ins2 = INST(SLLI, op) | IMM_I12(src3w);2119}21202121FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg)));2122FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg)));2123return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));2124}21252126if (src3 & SLJIT_MEM) {2127FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src3, src3w));2128src3 = TMP_REG2;2129} else if (dst_reg == src3) {2130push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(src3) | IMM_I12(0));2131src3 = TMP_REG2;2132}21332134if (is_left) {2135ins1 = INST(SLL, op);2136ins2 = INST(SRLI, op);2137ins3 = INST(SRL, op);2138} else {2139ins1 = INST(SRL, op);2140ins2 = INST(SLLI, op);2141ins3 = INST(SLL, op);2142}21432144FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg) | RK(src3)));21452146if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {2147FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg) | IMM_I12(1)));2148FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RJ(src3) | IMM_I12((sljit_ins)bit_length - 1)));2149src2_reg = TMP_REG1;2150} else2151FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | RK(src3)));21522153FAIL_IF(push_inst(compiler, ins3 | RD(TMP_REG1) | RJ(src2_reg) | RK(TMP_REG2)));2154return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));2155}21562157SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,2158sljit_s32 src, sljit_sw srcw)2159{2160sljit_s32 base = src & REG_MASK;21612162CHECK_ERROR();2163CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));2164ADJUST_LOCAL_OFFSET(src, srcw);21652166switch (op) {2167case SLJIT_FAST_RETURN:2168if (FAST_IS_REG(src))2169FAIL_IF(push_inst(compiler, ADDI_D | RD(RETURN_ADDR_REG) | RJ(src) | IMM_I12(0)));2170else2171FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));21722173return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));2174case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:2175return SLJIT_SUCCESS;2176case SLJIT_PREFETCH_L1:2177case SLJIT_PREFETCH_L2:2178case SLJIT_PREFETCH_L3:2179case SLJIT_PREFETCH_ONCE:2180if (SLJIT_UNLIKELY(src & OFFS_REG_MASK)) {2181srcw &= 0x3;2182if (SLJIT_UNLIKELY(srcw))2183FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(src)) | IMM_I12(srcw)));2184FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));2185} else {2186if (base && srcw <= I12_MAX && srcw >= I12_MIN)2187return push_inst(compiler,PRELD | RJ(base) | IMM_I12(srcw));21882189FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));2190if (base != 0)2191FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));2192}2193return push_inst(compiler, PRELD | RD(0) | RJ(TMP_REG1));2194}2195return SLJIT_SUCCESS;2196}21972198SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,2199sljit_s32 dst, sljit_sw dstw)2200{2201sljit_s32 dst_r;22022203CHECK_ERROR();2204CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));2205ADJUST_LOCAL_OFFSET(dst, dstw);22062207switch (op) {2208case SLJIT_FAST_ENTER:2209if (FAST_IS_REG(dst))2210return push_inst(compiler, ADDI_D | RD(dst) | RJ(RETURN_ADDR_REG) | IMM_I12(0));22112212SLJIT_ASSERT(RETURN_ADDR_REG == TMP_REG2);2213break;2214case SLJIT_GET_RETURN_ADDRESS:2215dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;2216FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size - SSIZE_OF(sw)));2217break;2218}22192220if (dst & SLJIT_MEM)2221return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw);22222223return SLJIT_SUCCESS;2224}22252226SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)2227{2228CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));22292230if (type == SLJIT_GP_REGISTER)2231return reg_map[reg];22322233if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256)2234return -1;22352236return freg_map[reg];2237}22382239SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,2240void *instruction, sljit_u32 size)2241{2242SLJIT_UNUSED_ARG(size);2243CHECK_ERROR();2244CHECK(check_sljit_emit_op_custom(compiler, instruction, size));22452246return push_inst(compiler, *(sljit_ins*)instruction);2247}22482249/* --------------------------------------------------------------------- */2250/* Floating point operators */2251/* --------------------------------------------------------------------- */2252#define SET_COND(cond) (sljit_ins)(cond << 15)22532254#define COND_CUN SET_COND(0x8) /* UN */2255#define COND_CEQ SET_COND(0x4) /* EQ */2256#define COND_CUEQ SET_COND(0xc) /* UN EQ */2257#define COND_CLT SET_COND(0x2) /* LT */2258#define COND_CULT SET_COND(0xa) /* UN LT */2259#define COND_CLE SET_COND(0x6) /* LT EQ */2260#define COND_CULE SET_COND(0xe) /* UN LT EQ */2261#define COND_CNE SET_COND(0x10) /* GT LT */2262#define COND_CUNE SET_COND(0x18) /* UN GT LT */2263#define COND_COR SET_COND(0x14) /* GT LT EQ */22642265#define FINST(inst, type) (sljit_ins)((type & SLJIT_32) ? inst##_S : inst##_D)2266#define FCD(cd) (sljit_ins)(cd & 0x7)2267#define FCJ(cj) (sljit_ins)((cj & 0x7) << 5)2268#define FCA(ca) (sljit_ins)((ca & 0x7) << 15)2269#define F_OTHER_FLAG 122702271#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7))22722273/* convert to inter exact toward zero */2274static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,2275sljit_s32 dst, sljit_sw dstw,2276sljit_s32 src, sljit_sw srcw)2277{2278sljit_ins inst;2279sljit_u32 word_data = 0;2280sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;22812282switch (GET_OPCODE(op))2283{2284case SLJIT_CONV_SW_FROM_F64:2285word_data = 1;2286inst = FINST(FTINTRZ_L, op);2287break;2288case SLJIT_CONV_S32_FROM_F64:2289inst = FINST(FTINTRZ_W, op);2290break;2291default:2292inst = BREAK;2293SLJIT_UNREACHABLE();2294}22952296if (src & SLJIT_MEM) {2297FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));2298src = TMP_FREG1;2299}23002301FAIL_IF(push_inst(compiler, inst | FRD(TMP_FREG1) | FRJ(src)));2302FAIL_IF(push_inst(compiler, FINST(MOVFR2GR, word_data) | RD(dst_r) | FRJ(TMP_FREG1)));23032304if (dst & SLJIT_MEM)2305return emit_op_mem2(compiler, word_data ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0);2306return SLJIT_SUCCESS;2307}23082309static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_s32 op,2310sljit_s32 dst, sljit_sw dstw,2311sljit_s32 src, sljit_sw srcw)2312{2313sljit_ins inst;2314sljit_u32 word_data = 0;2315sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;23162317switch (GET_OPCODE(op))2318{2319case SLJIT_CONV_F64_FROM_SW:2320word_data = 1;2321inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);2322break;2323case SLJIT_CONV_F64_FROM_S32:2324inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);2325break;2326default:2327inst = BREAK;2328SLJIT_UNREACHABLE();2329}23302331if (src & SLJIT_MEM) {2332FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));2333src = TMP_REG1;2334} else if (src == SLJIT_IMM) {2335if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)2336srcw = (sljit_s32)srcw;23372338FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));2339src = TMP_REG1;2340}2341FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));2342FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));23432344if (dst & SLJIT_MEM)2345return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);2346return SLJIT_SUCCESS;2347}23482349static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,2350sljit_s32 dst, sljit_sw dstw,2351sljit_s32 src, sljit_sw srcw)2352{2353return sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw);2354}23552356static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,2357sljit_s32 dst, sljit_sw dstw,2358sljit_s32 src, sljit_sw srcw)2359{2360sljit_ins inst;2361sljit_u32 word_data = 0;2362sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;23632364switch (GET_OPCODE(op))2365{2366case SLJIT_CONV_F64_FROM_UW:2367word_data = 1;2368inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);2369break;2370case SLJIT_CONV_F64_FROM_U32:2371inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);2372break;2373default:2374inst = BREAK;2375SLJIT_UNREACHABLE();2376}23772378if (src & SLJIT_MEM) {2379FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));2380src = TMP_REG1;2381} else if (src == SLJIT_IMM) {2382if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)2383srcw = (sljit_u32)srcw;23842385FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));2386src = TMP_REG1;2387}23882389if (!word_data)2390FAIL_IF(push_inst(compiler, SRLI_W | RD(src) | RJ(src) | IMM_I12(0)));23912392FAIL_IF(push_inst(compiler, BLT | RJ(src) | RD(TMP_ZERO) | IMM_I16(4)));23932394FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));2395FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));2396FAIL_IF(push_inst(compiler, B | IMM_I26(7)));23972398FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG2) | RJ(src) | IMM_I12(1)));2399FAIL_IF(push_inst(compiler, (word_data ? SRLI_D : SRLI_W) | RD(TMP_REG1) | RJ(src) | IMM_I12(1)));2400FAIL_IF(push_inst(compiler, OR | RD(TMP_REG1) | RJ(TMP_REG1) | RK(TMP_REG2)));2401FAIL_IF(push_inst(compiler, INST(MOVGR2FR, (!word_data)) | FRD(dst_r) | RJ(TMP_REG1)));2402FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));2403FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(dst_r) | FRK(dst_r)));24042405if (dst & SLJIT_MEM)2406return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);2407return SLJIT_SUCCESS;2408}24092410static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,2411sljit_s32 src1, sljit_sw src1w,2412sljit_s32 src2, sljit_sw src2w)2413{2414if (src1 & SLJIT_MEM) {2415FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));2416src1 = TMP_FREG1;2417}24182419if (src2 & SLJIT_MEM) {2420FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));2421src2 = TMP_FREG2;2422}24232424FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(OTHER_FLAG)));24252426switch (GET_FLAG_TYPE(op)) {2427case SLJIT_F_EQUAL:2428case SLJIT_ORDERED_EQUAL:2429FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));2430break;2431case SLJIT_F_LESS:2432case SLJIT_ORDERED_LESS:2433FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));2434break;2435case SLJIT_F_GREATER:2436case SLJIT_ORDERED_GREATER:2437FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));2438break;2439case SLJIT_UNORDERED_OR_GREATER:2440FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));2441break;2442case SLJIT_UNORDERED_OR_LESS:2443FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));2444break;2445case SLJIT_UNORDERED_OR_EQUAL:2446FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));2447break;2448default: /* SLJIT_UNORDERED */2449FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUN | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));2450}2451return push_inst(compiler, MOVCF2GR | RD(OTHER_FLAG) | FCJ(F_OTHER_FLAG));2452}24532454SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,2455sljit_s32 dst, sljit_sw dstw,2456sljit_s32 src, sljit_sw srcw)2457{2458sljit_s32 dst_r;24592460CHECK_ERROR();2461compiler->cache_arg = 0;2462compiler->cache_argw = 0;24632464SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);2465SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);24662467if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)2468op ^= SLJIT_32;24692470dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;24712472if (src & SLJIT_MEM) {2473FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));2474src = dst_r;2475}24762477switch (GET_OPCODE(op)) {2478case SLJIT_MOV_F64:2479if (src != dst_r) {2480if (!(dst & SLJIT_MEM))2481FAIL_IF(push_inst(compiler, FINST(FMOV, op) | FRD(dst_r) | FRJ(src)));2482else2483dst_r = src;2484}2485break;2486case SLJIT_NEG_F64:2487FAIL_IF(push_inst(compiler, FINST(FNEG, op) | FRD(dst_r) | FRJ(src)));2488break;2489case SLJIT_ABS_F64:2490FAIL_IF(push_inst(compiler, FINST(FABS, op) | FRD(dst_r) | FRJ(src)));2491break;2492case SLJIT_CONV_F64_FROM_F32:2493/* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */2494FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? FCVT_D_S : FCVT_S_D) | FRD(dst_r) | FRJ(src)));2495op ^= SLJIT_32;2496break;2497}24982499if (dst & SLJIT_MEM)2500return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0);2501return SLJIT_SUCCESS;2502}25032504SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,2505sljit_s32 dst, sljit_sw dstw,2506sljit_s32 src1, sljit_sw src1w,2507sljit_s32 src2, sljit_sw src2w)2508{2509sljit_s32 dst_r, flags = 0;25102511CHECK_ERROR();2512CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));2513ADJUST_LOCAL_OFFSET(dst, dstw);2514ADJUST_LOCAL_OFFSET(src1, src1w);2515ADJUST_LOCAL_OFFSET(src2, src2w);25162517compiler->cache_arg = 0;2518compiler->cache_argw = 0;25192520dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;25212522if (src1 & SLJIT_MEM) {2523if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {2524FAIL_IF(compiler->error);2525src1 = TMP_FREG1;2526} else2527flags |= SLOW_SRC1;2528}25292530if (src2 & SLJIT_MEM) {2531if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {2532FAIL_IF(compiler->error);2533src2 = TMP_FREG2;2534} else2535flags |= SLOW_SRC2;2536}25372538if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {2539if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {2540FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));2541FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));2542} else {2543FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));2544FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));2545}2546}2547else if (flags & SLOW_SRC1)2548FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));2549else if (flags & SLOW_SRC2)2550FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));25512552if (flags & SLOW_SRC1)2553src1 = TMP_FREG1;2554if (flags & SLOW_SRC2)2555src2 = TMP_FREG2;25562557switch (GET_OPCODE(op)) {2558case SLJIT_ADD_F64:2559FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));2560break;2561case SLJIT_SUB_F64:2562FAIL_IF(push_inst(compiler, FINST(FSUB, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));2563break;2564case SLJIT_MUL_F64:2565FAIL_IF(push_inst(compiler, FINST(FMUL, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));2566break;2567case SLJIT_DIV_F64:2568FAIL_IF(push_inst(compiler, FINST(FDIV, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));2569break;2570}25712572if (dst_r != dst)2573FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));2574return SLJIT_SUCCESS;2575}25762577SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,2578sljit_s32 dst_freg,2579sljit_s32 src1, sljit_sw src1w,2580sljit_s32 src2, sljit_sw src2w)2581{2582sljit_s32 reg;25832584CHECK_ERROR();2585CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));2586ADJUST_LOCAL_OFFSET(src1, src1w);2587ADJUST_LOCAL_OFFSET(src2, src2w);25882589if (src2 & SLJIT_MEM) {2590FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src2, src2w, 0, 0));2591src2 = TMP_FREG1;2592}25932594if (src1 & SLJIT_MEM) {2595reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;2596FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, reg, src1, src1w, 0, 0));2597src1 = reg;2598}25992600return push_inst(compiler, FINST(FCOPYSIGN, op) | FRD(dst_freg) | FRJ(src1) | FRK(src2));2601}26022603SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,2604sljit_s32 freg, sljit_f32 value)2605{2606union {2607sljit_s32 imm;2608sljit_f32 value;2609} u;26102611CHECK_ERROR();2612CHECK(check_sljit_emit_fset32(compiler, freg, value));26132614u.value = value;26152616if (u.imm == 0)2617return push_inst(compiler, MOVGR2FR_W | RJ(TMP_ZERO) | FRD(freg));26182619FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));2620return push_inst(compiler, MOVGR2FR_W | RJ(TMP_REG1) | FRD(freg));2621}26222623SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,2624sljit_s32 freg, sljit_f64 value)2625{2626union {2627sljit_sw imm;2628sljit_f64 value;2629} u;26302631CHECK_ERROR();2632CHECK(check_sljit_emit_fset64(compiler, freg, value));26332634u.value = value;26352636if (u.imm == 0)2637return push_inst(compiler, MOVGR2FR_D | RJ(TMP_ZERO) | FRD(freg));26382639FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));2640return push_inst(compiler, MOVGR2FR_D | RJ(TMP_REG1) | FRD(freg));2641}26422643SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,2644sljit_s32 freg, sljit_s32 reg)2645{2646sljit_ins inst;26472648CHECK_ERROR();2649CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));26502651if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)2652inst = ((op & SLJIT_32) ? MOVGR2FR_W : MOVGR2FR_D) | FRD(freg) | RJ(reg);2653else2654inst = ((op & SLJIT_32) ? MOVFR2GR_S : MOVFR2GR_D) | RD(reg) | FRJ(freg);2655return push_inst(compiler, inst);2656}26572658/* --------------------------------------------------------------------- */2659/* Conditional instructions */2660/* --------------------------------------------------------------------- */26612662SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)2663{2664struct sljit_label *label;26652666CHECK_ERROR_PTR();2667CHECK_PTR(check_sljit_emit_label(compiler));26682669if (compiler->last_label && compiler->last_label->size == compiler->size)2670return compiler->last_label;26712672label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));2673PTR_FAIL_IF(!label);2674set_label(label, compiler);2675return label;2676}26772678static sljit_ins get_jump_instruction(sljit_s32 type)2679{2680switch (type) {2681case SLJIT_EQUAL:2682return BNE | RJ(EQUAL_FLAG) | RD(TMP_ZERO);2683case SLJIT_NOT_EQUAL:2684return BEQ | RJ(EQUAL_FLAG) | RD(TMP_ZERO);2685case SLJIT_LESS:2686case SLJIT_GREATER:2687case SLJIT_SIG_LESS:2688case SLJIT_SIG_GREATER:2689case SLJIT_OVERFLOW:2690case SLJIT_CARRY:2691case SLJIT_ATOMIC_STORED:2692return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);2693case SLJIT_GREATER_EQUAL:2694case SLJIT_LESS_EQUAL:2695case SLJIT_SIG_GREATER_EQUAL:2696case SLJIT_SIG_LESS_EQUAL:2697case SLJIT_NOT_OVERFLOW:2698case SLJIT_NOT_CARRY:2699case SLJIT_ATOMIC_NOT_STORED:2700return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);2701case SLJIT_F_EQUAL:2702case SLJIT_ORDERED_EQUAL:2703case SLJIT_F_LESS:2704case SLJIT_ORDERED_LESS:2705case SLJIT_ORDERED_GREATER:2706case SLJIT_UNORDERED_OR_GREATER:2707case SLJIT_F_GREATER:2708case SLJIT_UNORDERED_OR_LESS:2709case SLJIT_UNORDERED_OR_EQUAL:2710case SLJIT_UNORDERED:2711return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);2712case SLJIT_ORDERED_NOT_EQUAL:2713case SLJIT_ORDERED_LESS_EQUAL:2714case SLJIT_ORDERED_GREATER_EQUAL:2715case SLJIT_F_NOT_EQUAL:2716case SLJIT_UNORDERED_OR_NOT_EQUAL:2717case SLJIT_UNORDERED_OR_GREATER_EQUAL:2718case SLJIT_UNORDERED_OR_LESS_EQUAL:2719case SLJIT_F_LESS_EQUAL:2720case SLJIT_F_GREATER_EQUAL:2721case SLJIT_ORDERED:2722return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);2723default:2724/* Not conditional branch. */2725return 0;2726}2727}27282729SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)2730{2731struct sljit_jump *jump;2732sljit_ins inst;27332734CHECK_ERROR_PTR();2735CHECK_PTR(check_sljit_emit_jump(compiler, type));27362737jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));2738PTR_FAIL_IF(!jump);2739set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);2740type &= 0xff;27412742inst = get_jump_instruction(type);27432744if (inst != 0) {2745PTR_FAIL_IF(push_inst(compiler, inst));2746jump->flags |= IS_COND;2747}27482749jump->addr = compiler->size;2750inst = JIRL | RJ(TMP_REG1) | IMM_I16(0);27512752if (type >= SLJIT_FAST_CALL) {2753jump->flags |= IS_CALL;2754inst |= RD(RETURN_ADDR_REG);2755}27562757PTR_FAIL_IF(push_inst(compiler, inst));27582759/* Maximum number of instructions required for generating a constant. */2760compiler->size += JUMP_MAX_SIZE - 1;2761return jump;2762}27632764SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,2765sljit_s32 arg_types)2766{2767SLJIT_UNUSED_ARG(arg_types);2768CHECK_ERROR_PTR();2769CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));27702771if (type & SLJIT_CALL_RETURN) {2772PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));2773type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);2774}27752776SLJIT_SKIP_CHECKS(compiler);2777return sljit_emit_jump(compiler, type);2778}27792780SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,2781sljit_s32 src1, sljit_sw src1w,2782sljit_s32 src2, sljit_sw src2w)2783{2784struct sljit_jump *jump;2785sljit_s32 flags;2786sljit_ins inst;2787sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;27882789CHECK_ERROR_PTR();2790CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w));2791ADJUST_LOCAL_OFFSET(src1, src1w);2792ADJUST_LOCAL_OFFSET(src2, src2w);27932794compiler->cache_arg = 0;2795compiler->cache_argw = 0;27962797flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;27982799if (src1 & SLJIT_MEM) {2800PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG1, src1, src1w, src2, src2w));2801src1 = TMP_REG1;2802}28032804if (src2 & SLJIT_MEM) {2805PTR_FAIL_IF(emit_op_mem2(compiler, flags, src2_tmp_reg, src2, src2w, 0, 0));2806src2 = src2_tmp_reg;2807}28082809if (src1 == SLJIT_IMM) {2810if (src1w != 0) {2811PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));2812src1 = TMP_REG1;2813}2814else2815src1 = TMP_ZERO;2816}28172818if (src2 == SLJIT_IMM) {2819if (src2w != 0) {2820PTR_FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w));2821src2 = src2_tmp_reg;2822}2823else2824src2 = TMP_ZERO;2825}28262827jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));2828PTR_FAIL_IF(!jump);2829set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | IS_COND));2830type &= 0xff;28312832switch (type) {2833case SLJIT_EQUAL:2834inst = BNE | RJ(src1) | RD(src2);2835break;2836case SLJIT_NOT_EQUAL:2837inst = BEQ | RJ(src1) | RD(src2);2838break;2839case SLJIT_LESS:2840inst = BGEU | RJ(src1) | RD(src2);2841break;2842case SLJIT_GREATER_EQUAL:2843inst = BLTU | RJ(src1) | RD(src2);2844break;2845case SLJIT_GREATER:2846inst = BGEU | RJ(src2) | RD(src1);2847break;2848case SLJIT_LESS_EQUAL:2849inst = BLTU | RJ(src2) | RD(src1);2850break;2851case SLJIT_SIG_LESS:2852inst = BGE | RJ(src1) | RD(src2);2853break;2854case SLJIT_SIG_GREATER_EQUAL:2855inst = BLT | RJ(src1) | RD(src2);2856break;2857case SLJIT_SIG_GREATER:2858inst = BGE | RJ(src2) | RD(src1);2859break;2860case SLJIT_SIG_LESS_EQUAL:2861inst = BLT | RJ(src2) | RD(src1);2862break;2863default:2864inst = BREAK;2865SLJIT_UNREACHABLE();2866}28672868PTR_FAIL_IF(push_inst(compiler, inst));28692870jump->addr = compiler->size;2871PTR_FAIL_IF(push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));28722873/* Maximum number of instructions required for generating a constant. */2874compiler->size += JUMP_MAX_SIZE - 1;28752876return jump;2877}28782879SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)2880{2881struct sljit_jump *jump;28822883CHECK_ERROR();2884CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));28852886if (src != SLJIT_IMM) {2887if (src & SLJIT_MEM) {2888ADJUST_LOCAL_OFFSET(src, srcw);2889FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));2890src = TMP_REG1;2891}2892return push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(src) | IMM_I12(0));2893}28942895/* These jumps are converted to jump/call instructions when possible. */2896jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));2897FAIL_IF(!jump);2898set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_CALL : 0));2899jump->u.target = (sljit_uw)srcw;29002901jump->addr = compiler->size;2902FAIL_IF(push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));29032904/* Maximum number of instructions required for generating a constant. */2905compiler->size += JUMP_MAX_SIZE - 1;29062907return SLJIT_SUCCESS;2908}29092910SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,2911sljit_s32 arg_types,2912sljit_s32 src, sljit_sw srcw)2913{2914SLJIT_UNUSED_ARG(arg_types);2915CHECK_ERROR();2916CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));29172918if (src & SLJIT_MEM) {2919ADJUST_LOCAL_OFFSET(src, srcw);2920FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));2921src = TMP_REG1;2922}29232924if (type & SLJIT_CALL_RETURN) {2925if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {2926FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));2927src = TMP_REG1;2928}29292930FAIL_IF(emit_stack_frame_release(compiler, 0));2931type = SLJIT_JUMP;2932}29332934SLJIT_SKIP_CHECKS(compiler);2935return sljit_emit_ijump(compiler, type, src, srcw);2936}29372938SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,2939sljit_s32 dst, sljit_sw dstw,2940sljit_s32 type)2941{2942sljit_s32 src_r, dst_r, invert;2943sljit_s32 saved_op = op;2944sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;29452946CHECK_ERROR();2947CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));2948ADJUST_LOCAL_OFFSET(dst, dstw);29492950op = GET_OPCODE(op);2951dst_r = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;29522953compiler->cache_arg = 0;2954compiler->cache_argw = 0;29552956if (op >= SLJIT_ADD && (dst & SLJIT_MEM))2957FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));29582959if (type < SLJIT_F_EQUAL) {2960src_r = OTHER_FLAG;2961invert = type & 0x1;29622963switch (type) {2964case SLJIT_EQUAL:2965case SLJIT_NOT_EQUAL:2966FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));2967src_r = dst_r;2968break;2969case SLJIT_ATOMIC_STORED:2970case SLJIT_ATOMIC_NOT_STORED:2971FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(OTHER_FLAG) | IMM_I12(1)));2972src_r = dst_r;2973invert ^= 0x1;2974break;2975case SLJIT_OVERFLOW:2976case SLJIT_NOT_OVERFLOW:2977if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) {2978src_r = OTHER_FLAG;2979break;2980}2981FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(OTHER_FLAG) | IMM_I12(1)));2982src_r = dst_r;2983invert ^= 0x1;2984break;2985}2986} else {2987invert = 0;2988src_r = OTHER_FLAG;29892990switch (type) {2991case SLJIT_ORDERED_NOT_EQUAL:2992case SLJIT_ORDERED_LESS_EQUAL:2993case SLJIT_ORDERED_GREATER_EQUAL:2994case SLJIT_F_NOT_EQUAL:2995case SLJIT_UNORDERED_OR_NOT_EQUAL:2996case SLJIT_UNORDERED_OR_GREATER_EQUAL:2997case SLJIT_UNORDERED_OR_LESS_EQUAL:2998case SLJIT_F_LESS_EQUAL:2999case SLJIT_F_GREATER_EQUAL:3000case SLJIT_ORDERED:3001invert = 1;3002break;3003}3004}30053006if (invert) {3007FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RJ(src_r) | IMM_I12(1)));3008src_r = dst_r;3009}30103011if (op < SLJIT_ADD) {3012if (dst & SLJIT_MEM)3013return emit_op_mem(compiler, mem_type, src_r, dst, dstw);30143015if (src_r != dst_r)3016return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(src_r) | IMM_I12(0));3017return SLJIT_SUCCESS;3018}30193020mem_type |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE;30213022if (dst & SLJIT_MEM)3023return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, src_r, 0);3024return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0);3025}30263027SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,3028sljit_s32 dst_reg,3029sljit_s32 src1, sljit_sw src1w,3030sljit_s32 src2_reg)3031{3032sljit_ins *ptr;3033sljit_uw size;3034sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;30353036CHECK_ERROR();3037CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));3038ADJUST_LOCAL_OFFSET(src1, src1w);30393040if (dst_reg != src2_reg) {3041if (dst_reg == src1) {3042src1 = src2_reg;3043src1w = 0;3044type ^= 0x1;3045} else {3046if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {3047FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(dst_reg) | IMM_I12(0)));30483049if ((src1 & REG_MASK) == dst_reg)3050src1 = (src1 & ~REG_MASK) | TMP_REG1;30513052if (OFFS_REG(src1) == dst_reg)3053src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);3054}30553056FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src2_reg) | IMM_I12(0)));3057}3058}30593060size = compiler->size;30613062ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));3063FAIL_IF(!ptr);3064compiler->size++;30653066if (src1 & SLJIT_MEM) {3067FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w));3068} else if (src1 == SLJIT_IMM) {3069if (type & SLJIT_32)3070src1w = (sljit_s32)src1w;3071FAIL_IF(load_immediate(compiler, dst_reg, src1w));3072} else3073FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src1) | IMM_I12(0)));30743075*ptr = get_jump_instruction(type & ~SLJIT_32) | IMM_I16(compiler->size - size);3076return SLJIT_SUCCESS;3077}30783079SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,3080sljit_s32 dst_freg,3081sljit_s32 src1, sljit_sw src1w,3082sljit_s32 src2_freg)3083{3084sljit_s32 invert = 0;30853086CHECK_ERROR();3087CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));30883089ADJUST_LOCAL_OFFSET(src1, src1w);30903091if ((type & ~SLJIT_32) == SLJIT_EQUAL || (type & ~SLJIT_32) == SLJIT_NOT_EQUAL) {3092if ((type & ~SLJIT_32) == SLJIT_EQUAL)3093invert = 1;3094FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(EQUAL_FLAG)));3095} else {3096if (get_jump_instruction(type & ~SLJIT_32) == (BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO)))3097invert = 1;3098FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(OTHER_FLAG)));3099}31003101if (src1 & SLJIT_MEM) {3102FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src1, src1w));3103if (invert)3104return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(TMP_FREG2) | FRK(src2_freg) | FCA(F_OTHER_FLAG));3105return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(TMP_FREG2) | FCA(F_OTHER_FLAG));3106} else {3107if (invert)3108return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src1) | FRK(src2_freg) | FCA(F_OTHER_FLAG));3109return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(src1) | FCA(F_OTHER_FLAG));3110}3111}31123113#undef FLOAT_DATA31143115SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,3116sljit_s32 reg,3117sljit_s32 mem, sljit_sw memw)3118{3119sljit_s32 flags;31203121CHECK_ERROR();3122CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));31233124if (!(reg & REG_PAIR_MASK))3125return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);31263127if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {3128memw &= 0x3;31293130if (SLJIT_UNLIKELY(memw != 0)) {3131FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(mem)) | IMM_I12(memw)));3132FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));3133} else3134FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(mem & REG_MASK) | RK(OFFS_REG(mem))));31353136mem = TMP_REG1;3137memw = 0;3138} else if (memw > I12_MAX - SSIZE_OF(sw) || memw < I12_MIN) {3139if (((memw + 0x800) & 0xfff) <= 0xfff - SSIZE_OF(sw)) {3140FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw)));3141memw &= 0xfff;3142} else {3143FAIL_IF(load_immediate(compiler, TMP_REG1, memw));3144memw = 0;3145}31463147if (mem & REG_MASK)3148FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));31493150mem = TMP_REG1;3151} else {3152mem &= REG_MASK;3153memw &= 0xfff;3154}31553156SLJIT_ASSERT((memw >= 0 && memw <= I12_MAX - SSIZE_OF(sw)) || (memw > I12_MAX && memw <= 0xfff));31573158if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) {3159FAIL_IF(push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff));3160return push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw);3161}31623163flags = WORD_DATA | (!(type & SLJIT_MEM_STORE) ? LOAD_DATA : 0);31643165FAIL_IF(push_mem_inst(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw));3166return push_mem_inst(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff);3167}31683169#undef TO_ARGW_HI31703171static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)3172{3173sljit_s32 mem = *mem_ptr;31743175if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {3176*mem_ptr = TMP_REG3;3177FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(mem)) | IMM_I12(memw & 0x3)));3178return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem & REG_MASK));3179}31803181if (!(mem & REG_MASK)) {3182*mem_ptr = TMP_REG3;3183return load_immediate(compiler, TMP_REG3, memw);3184}31853186mem &= REG_MASK;31873188if (memw == 0) {3189*mem_ptr = mem;3190return SLJIT_SUCCESS;3191}31923193*mem_ptr = TMP_REG3;31943195FAIL_IF(load_immediate(compiler, TMP_REG3, memw));3196return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem));3197}31983199SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,3200sljit_s32 vreg,3201sljit_s32 srcdst, sljit_sw srcdstw)3202{3203sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);3204sljit_ins ins = 0;32053206CHECK_ERROR();3207CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));32083209ADJUST_LOCAL_OFFSET(srcdst, srcdstw);32103211if (reg_size != 5 && reg_size != 4)3212return SLJIT_ERR_UNSUPPORTED;32133214if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))3215return SLJIT_ERR_UNSUPPORTED;32163217if (type & SLJIT_SIMD_TEST)3218return SLJIT_SUCCESS;32193220if (!(srcdst & SLJIT_MEM)) {3221if (type & SLJIT_SIMD_STORE)3222ins = FRD(srcdst) | FRJ(vreg) | FRK(vreg);3223else3224ins = FRD(vreg) | FRJ(srcdst) | FRK(srcdst);32253226if (reg_size == 5)3227ins |= VOR_V | (sljit_ins)1 << 26;3228else3229ins |= VOR_V;32303231return push_inst(compiler, ins);3232}32333234ins = (type & SLJIT_SIMD_STORE) ? VST : VLD;32353236if (reg_size == 5)3237ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD;32383239if (FAST_IS_REG(srcdst) && srcdst >= 0 && (srcdstw >= I12_MIN && srcdstw <= I12_MAX))3240return push_inst(compiler, ins | FRD(vreg) | RJ((sljit_u8)srcdst) | IMM_I12(srcdstw));3241else {3242FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));3243return push_inst(compiler, ins | FRD(vreg) | RJ(srcdst) | IMM_I12(0));3244}3245}32463247SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,3248sljit_s32 vreg,3249sljit_s32 src, sljit_sw srcw)3250{3251sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);3252sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);3253sljit_ins ins = 0;32543255CHECK_ERROR();3256CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));32573258ADJUST_LOCAL_OFFSET(src, srcw);32593260if (reg_size != 5 && reg_size != 4)3261return SLJIT_ERR_UNSUPPORTED;32623263if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))3264return SLJIT_ERR_UNSUPPORTED;32653266if (type & SLJIT_SIMD_TEST)3267return SLJIT_SUCCESS;32683269if (src & SLJIT_MEM) {3270FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));32713272if (reg_size == 5)3273ins = (sljit_ins)1 << 25;32743275return push_inst(compiler, VLDREPL | ins | FRD(vreg) | RJ(src) | (sljit_ins)1 << (23 - elem_size));3276}32773278if (reg_size == 5)3279ins = (sljit_ins)1 << 26;32803281if (type & SLJIT_SIMD_FLOAT) {3282if (src == SLJIT_IMM)3283return push_inst(compiler, VREPLGR2VR | ins | FRD(vreg) | RJ(TMP_ZERO) | (sljit_ins)elem_size << 10);32843285FAIL_IF(push_inst(compiler, VREPLVE | ins | FRD(vreg) | FRJ(src) | RK(TMP_ZERO) | (sljit_ins)elem_size << 15));32863287if (reg_size == 5) {3288ins = (sljit_ins)(0x44 << 10);3289return push_inst(compiler, XVPERMI | ins | FRD(vreg) | FRJ(vreg));3290}32913292return SLJIT_SUCCESS;3293}32943295ins |= VREPLGR2VR | (sljit_ins)elem_size << 10;32963297if (src == SLJIT_IMM) {3298FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));3299src = TMP_REG2;3300}33013302return push_inst(compiler, ins | FRD(vreg) | RJ(src));3303}33043305SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,3306sljit_s32 vreg, sljit_s32 lane_index,3307sljit_s32 srcdst, sljit_sw srcdstw)3308{3309sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);3310sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);3311sljit_ins ins = 0;33123313CHECK_ERROR();3314CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));33153316ADJUST_LOCAL_OFFSET(srcdst, srcdstw);33173318if (reg_size != 5 && reg_size != 4)3319return SLJIT_ERR_UNSUPPORTED;33203321if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))3322return SLJIT_ERR_UNSUPPORTED;33233324if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))3325return SLJIT_ERR_UNSUPPORTED;33263327if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))3328return SLJIT_ERR_UNSUPPORTED;33293330if (type & SLJIT_SIMD_TEST)3331return SLJIT_SUCCESS;33323333if (type & SLJIT_SIMD_LANE_ZERO) {3334ins = (reg_size == 5) ? ((sljit_ins)1 << 26) : 0;33353336if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) {3337FAIL_IF(push_inst(compiler, VOR_V | ins | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg)));3338srcdst = TMP_FREG1;3339srcdstw = 0;3340}33413342FAIL_IF(push_inst(compiler, VXOR_V | ins | FRD(vreg) | FRJ(vreg) | FRK(vreg)));3343}33443345if (srcdst & SLJIT_MEM) {3346FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));33473348if (reg_size == 5)3349ins = (sljit_ins)1 << 25;33503351if (type & SLJIT_SIMD_STORE) {3352ins |= (sljit_ins)lane_index << 18 | (sljit_ins)(1 << (23 - elem_size));3353return push_inst(compiler, VSTELM | ins | FRD(vreg) | RJ(srcdst));3354} else {3355emit_op_mem(compiler, (elem_size == 3 ? WORD_DATA : (elem_size == 2 ? INT_DATA : (elem_size == 1 ? HALF_DATA : BYTE_DATA))) | LOAD_DATA, TMP_REG1, srcdst | SLJIT_MEM, 0);3356srcdst = TMP_REG1;3357ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;33583359if (reg_size == 5) {3360if (elem_size < 2) {3361FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg)));3362if (lane_index >= (2 << (3 - elem_size))) {3363FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(vreg) | IMM_I8(1)));3364FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size)))));3365return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(2));3366} else {3367FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index)));3368return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(18));3369}3370} else3371ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26;3372}33733374return push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index));3375}3376}33773378if (type & SLJIT_SIMD_FLOAT) {3379ins = (reg_size == 5) ? (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26 : (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;33803381if (type & SLJIT_SIMD_STORE) {3382FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(vreg) | IMM_V(lane_index)));3383return push_inst(compiler, VINSGR2VR | ins | FRD(srcdst) | RJ(TMP_REG1) | IMM_V(0));3384} else {3385FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(srcdst) | IMM_V(0)));3386return push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(TMP_REG1) | IMM_V(lane_index));3387}3388}33893390if (srcdst == SLJIT_IMM) {3391FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw));3392srcdst = TMP_REG1;3393}33943395if (type & SLJIT_SIMD_STORE) {3396ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;33973398if (type & SLJIT_SIMD_LANE_SIGNED)3399ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18));3400else3401ins |= VPICKVE2GR_U;34023403if (reg_size == 5) {3404if (elem_size < 2) {3405if (lane_index >= (2 << (3 - elem_size))) {3406if (type & SLJIT_SIMD_LANE_SIGNED)3407ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18));3408else3409ins |= VPICKVE2GR_U;34103411FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg)));3412FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(vreg) | IMM_I8(1)));3413return push_inst(compiler, ins | RD(srcdst) | FRJ(TMP_FREG1) | IMM_V(lane_index % (2 << (3 - elem_size))));3414}3415} else {3416ins ^= (sljit_ins)1 << (15 - elem_size);3417ins |= (sljit_ins)1 << 26;3418}3419}34203421return push_inst(compiler, ins | RD(srcdst) | FRJ(vreg) | IMM_V(lane_index));3422} else {3423ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;34243425if (reg_size == 5) {3426if (elem_size < 2) {3427FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg)));3428if (lane_index >= (2 << (3 - elem_size))) {3429FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(vreg) | IMM_I8(1)));3430FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size)))));3431return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(2));3432} else {3433FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index)));3434return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(18));3435}3436} else3437ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26;3438}34393440return push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index));3441}34423443return SLJIT_ERR_UNSUPPORTED;3444}34453446SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,3447sljit_s32 vreg,3448sljit_s32 src, sljit_s32 src_lane_index)3449{3450sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);3451sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);3452sljit_ins ins = 0;34533454CHECK_ERROR();3455CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));34563457if (reg_size != 5 && reg_size != 4)3458return SLJIT_ERR_UNSUPPORTED;34593460if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))3461return SLJIT_ERR_UNSUPPORTED;34623463if (type & SLJIT_SIMD_TEST)3464return SLJIT_SUCCESS;34653466ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;34673468if (reg_size == 5) {3469FAIL_IF(push_inst(compiler, VREPLVEI | (sljit_ins)1 << 26 | ins | FRD(vreg) | FRJ(src) | IMM_V(src_lane_index % (2 << (3 - elem_size)))));34703471ins = (src_lane_index < (2 << (3 - elem_size))) ? (sljit_ins)(0x44 << 10) : (sljit_ins)(0xee << 10);34723473return push_inst(compiler, XVPERMI | ins | FRD(vreg) | FRJ(vreg));3474}34753476return push_inst(compiler, VREPLVEI | ins | FRD(vreg) | FRJ(src) | IMM_V(src_lane_index));3477}34783479SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,3480sljit_s32 vreg,3481sljit_s32 src, sljit_sw srcw)3482{3483sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);3484sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);3485sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);3486sljit_ins ins = 0;34873488CHECK_ERROR();3489CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));34903491ADJUST_LOCAL_OFFSET(src, srcw);34923493if (reg_size != 5 && reg_size != 4)3494return SLJIT_ERR_UNSUPPORTED;34953496if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))3497return SLJIT_ERR_UNSUPPORTED;34983499if (type & SLJIT_SIMD_TEST)3500return SLJIT_SUCCESS;35013502if (src & SLJIT_MEM) {3503ins = (type & SLJIT_SIMD_STORE) ? VST : VLD;35043505if (reg_size == 5)3506ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD;35073508if (FAST_IS_REG(src) && src >= 0 && (srcw >= I12_MIN && srcw <= I12_MAX))3509FAIL_IF(push_inst(compiler, ins | FRD(vreg) | RJ(src) | IMM_I12(srcw)));3510else {3511FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));3512FAIL_IF(push_inst(compiler, ins | FRD(vreg) | RJ(src) | IMM_I12(0)));3513}3514src = vreg;3515}35163517if (type & SLJIT_SIMD_FLOAT) {3518if (elem_size != 2 || elem2_size != 3)3519return SLJIT_ERR_UNSUPPORTED;35203521ins = 0;3522if (reg_size == 5) {3523ins = (sljit_ins)1 << 26;3524FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16)));3525}35263527return push_inst(compiler, VFCVTL_D_S | ins | FRD(vreg) | FRJ(src));3528}35293530ins = (type & SLJIT_SIMD_EXTEND_SIGNED) ? VSLLWIL : (VSLLWIL | (sljit_ins)1 << 18);35313532if (reg_size == 5)3533ins |= (sljit_ins)1 << 26;35343535do {3536if (reg_size == 5)3537FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16)));35383539FAIL_IF(push_inst(compiler, ins | ((sljit_ins)1 << (13 + elem_size)) | FRD(vreg) | FRJ(src)));3540src = vreg;3541} while (++elem_size < elem2_size);35423543return SLJIT_SUCCESS;3544}35453546SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,3547sljit_s32 vreg,3548sljit_s32 dst, sljit_sw dstw)3549{3550sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);3551sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);3552sljit_ins ins = 0;3553sljit_s32 dst_r;35543555CHECK_ERROR();3556CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));35573558ADJUST_LOCAL_OFFSET(dst, dstw);35593560if (reg_size != 5 && reg_size != 4)3561return SLJIT_ERR_UNSUPPORTED;35623563if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))3564return SLJIT_ERR_UNSUPPORTED;35653566if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))3567return SLJIT_ERR_UNSUPPORTED;35683569if (type & SLJIT_SIMD_TEST)3570return SLJIT_SUCCESS;35713572dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;35733574if (reg_size == 5)3575ins = (sljit_ins)1 << 26;35763577FAIL_IF(push_inst(compiler, VMSKLTZ | ins | (sljit_ins)(elem_size << 10) | FRD(TMP_FREG1) | FRJ(vreg)));35783579FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x3c << 10) | RD(dst_r) | FRJ(TMP_FREG1)));35803581if (reg_size == 5) {3582FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x38 << 10) | ins | RD(TMP_REG3) | FRJ(TMP_FREG1) | IMM_V(2)));3583FAIL_IF(push_inst(compiler, SLLI_W | RD(TMP_REG3) | RJ(TMP_REG3) | IMM_I12(2 << (3 - elem_size))));3584FAIL_IF(push_inst(compiler, OR | RD(dst_r) | RJ(dst_r) | RK(TMP_REG3)));3585}35863587if (dst_r == TMP_REG2)3588return emit_op_mem(compiler, ((type & SLJIT_32) ? INT_DATA : WORD_DATA), TMP_REG2, dst, dstw);35893590return SLJIT_SUCCESS;3591}35923593SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,3594sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)3595{3596sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);3597sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);3598sljit_ins ins = 0;35993600CHECK_ERROR();3601CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));3602ADJUST_LOCAL_OFFSET(src2, src2w);36033604if (reg_size != 5 && reg_size != 4)3605return SLJIT_ERR_UNSUPPORTED;36063607if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))3608return SLJIT_ERR_UNSUPPORTED;36093610if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))3611return SLJIT_ERR_UNSUPPORTED;36123613if (type & SLJIT_SIMD_TEST)3614return SLJIT_SUCCESS;36153616if (src2 & SLJIT_MEM) {3617FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src2, src2w));3618FAIL_IF(push_inst(compiler, (reg_size == 4 ? VLD : XVLD) | FRD(TMP_FREG1) | RJ(src2) | IMM_I12(0)));3619src2 = TMP_FREG1;3620}36213622switch (SLJIT_SIMD_GET_OPCODE(type)) {3623case SLJIT_SIMD_OP2_AND:3624ins = VAND_V;3625break;3626case SLJIT_SIMD_OP2_OR:3627ins = VOR_V;3628break;3629case SLJIT_SIMD_OP2_XOR:3630ins = VXOR_V;3631break;3632case SLJIT_SIMD_OP2_SHUFFLE:3633if (reg_size != 4)3634return SLJIT_ERR_UNSUPPORTED;36353636return push_inst(compiler, VSHUF_B | FRD(dst_vreg) | FRJ(src1_vreg) | FRK(src1_vreg) | FRA(src2));3637}36383639if (reg_size == 5)3640ins |= (sljit_ins)1 << 26;36413642return push_inst(compiler, ins | FRD(dst_vreg) | FRJ(src1_vreg) | FRK(src2));3643}36443645SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler,3646sljit_s32 op,3647sljit_s32 dst_reg,3648sljit_s32 mem_reg)3649{3650sljit_ins ins;36513652CHECK_ERROR();3653CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));36543655if ((op & SLJIT_ATOMIC_USE_LS) || !LOONGARCH_SUPPORT_AMCAS) {3656if (op & SLJIT_ATOMIC_USE_CAS)3657return SLJIT_ERR_UNSUPPORTED;36583659switch (GET_OPCODE(op)) {3660case SLJIT_MOV:3661case SLJIT_MOV_P:3662ins = LL_D;3663break;3664case SLJIT_MOV_S32:3665case SLJIT_MOV32:3666ins = LL_W;3667break;36683669default:3670return SLJIT_ERR_UNSUPPORTED;3671}36723673if (op & SLJIT_ATOMIC_TEST)3674return SLJIT_SUCCESS;36753676return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg));3677}36783679switch(GET_OPCODE(op)) {3680case SLJIT_MOV_S8:3681ins = LD_B;3682break;3683case SLJIT_MOV_U8:3684ins = LD_BU;3685break;3686case SLJIT_MOV_S16:3687ins = LD_H;3688break;3689case SLJIT_MOV_U16:3690ins = LD_HU;3691break;3692case SLJIT_MOV32:3693case SLJIT_MOV_S32:3694ins = LD_W;3695break;3696case SLJIT_MOV_U32:3697ins = LD_WU;3698break;3699default:3700ins = LD_D;3701break;3702}37033704if (op & SLJIT_ATOMIC_TEST)3705return SLJIT_SUCCESS;37063707return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg) | IMM_I12(0));3708}37093710SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler,3711sljit_s32 op,3712sljit_s32 src_reg,3713sljit_s32 mem_reg,3714sljit_s32 temp_reg)3715{3716sljit_ins ins = 0;3717sljit_ins unsign = 0;3718sljit_s32 tmp = temp_reg;37193720CHECK_ERROR();3721CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));37223723if ((op & SLJIT_ATOMIC_USE_LS) || !LOONGARCH_SUPPORT_AMCAS) {3724if (op & SLJIT_ATOMIC_USE_CAS)3725return SLJIT_ERR_UNSUPPORTED;37263727switch (GET_OPCODE(op)) {3728case SLJIT_MOV:3729case SLJIT_MOV_P:3730ins = SC_D;3731break;3732case SLJIT_MOV_S32:3733case SLJIT_MOV32:3734ins = SC_W;3735break;37363737default:3738return SLJIT_ERR_UNSUPPORTED;3739}37403741if (op & SLJIT_ATOMIC_TEST)3742return SLJIT_SUCCESS;37433744FAIL_IF(push_inst(compiler, ADD_D | RD(OTHER_FLAG) | RJ(src_reg) | RK(TMP_ZERO)));3745return push_inst(compiler, ins | RD(OTHER_FLAG) | RJ(mem_reg));3746}37473748switch (GET_OPCODE(op)) {3749case SLJIT_MOV_S8:3750ins = AMCAS_B;3751break;3752case SLJIT_MOV_U8:3753ins = AMCAS_B;3754unsign = BSTRPICK_D | (7 << 16);3755break;3756case SLJIT_MOV_S16:3757ins = AMCAS_H;3758break;3759case SLJIT_MOV_U16:3760ins = AMCAS_H;3761unsign = BSTRPICK_D | (15 << 16);3762break;3763case SLJIT_MOV32:3764case SLJIT_MOV_S32:3765ins = AMCAS_W;3766break;3767case SLJIT_MOV_U32:3768ins = AMCAS_W;3769unsign = BSTRPICK_D | (31 << 16);3770break;3771default:3772ins = AMCAS_D;3773break;3774}37753776if (op & SLJIT_ATOMIC_TEST)3777return SLJIT_SUCCESS;37783779if (op & SLJIT_SET_ATOMIC_STORED) {3780FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG3) | RJ(temp_reg) | RK(TMP_ZERO)));3781tmp = TMP_REG3;3782}3783FAIL_IF(push_inst(compiler, ins | RD(tmp) | RJ(mem_reg) | RK(src_reg)));3784if (!(op & SLJIT_SET_ATOMIC_STORED))3785return SLJIT_SUCCESS;37863787if (unsign)3788FAIL_IF(push_inst(compiler, unsign | RD(tmp) | RJ(tmp)));37893790FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(tmp) | RK(temp_reg)));3791return push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | IMM_I12(1));3792}37933794static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)3795{3796SLJIT_UNUSED_ARG(last_ins);37973798FAIL_IF(push_inst(compiler, LU12I_W | RD(dst) | (sljit_ins)(((init_value & 0xffffffff) >> 12) << 5)));3799FAIL_IF(push_inst(compiler, LU32I_D | RD(dst) | (sljit_ins)(((init_value >> 32) & 0xfffff) << 5)));3800FAIL_IF(push_inst(compiler, LU52I_D | RD(dst) | RJ(dst) | (sljit_ins)(IMM_I12(init_value >> 52))));3801return push_inst(compiler, ORI | RD(dst) | RJ(dst) | IMM_I12(init_value));3802}38033804SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)3805{3806sljit_ins *inst = (sljit_ins*)addr;3807SLJIT_UNUSED_ARG(executable_offset);38083809SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);38103811SLJIT_ASSERT((inst[0] & OPC_1RI20(0x7f)) == LU12I_W);3812inst[0] = (inst[0] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(((new_target & 0xffffffff) >> 12) << 5);38133814SLJIT_ASSERT((inst[1] & OPC_1RI20(0x7f)) == LU32I_D);3815inst[1] = (inst[1] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(sljit_ins)(((new_target >> 32) & 0xfffff) << 5);38163817SLJIT_ASSERT((inst[2] & OPC_2RI12(0x3ff)) == LU52I_D);3818inst[2] = (inst[2] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target >> 52);38193820SLJIT_ASSERT((inst[3] & OPC_2RI12(0x3ff)) == ORI || (inst[3] & OPC_2RI16(0x3f)) == JIRL);3821if ((inst[3] & OPC_2RI12(0x3ff)) == ORI)3822inst[3] = (inst[3] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target);3823else3824inst[3] = (inst[3] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I12((new_target & 0xfff) >> 2);38253826SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);38273828inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);3829SLJIT_CACHE_FLUSH(inst, inst + 4);3830}38313832SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)3833{3834struct sljit_const *const_;3835sljit_s32 dst_r;38363837CHECK_ERROR_PTR();3838CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));3839ADJUST_LOCAL_OFFSET(dst, dstw);38403841const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));3842PTR_FAIL_IF(!const_);3843set_const(const_, compiler);38443845dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;3846PTR_FAIL_IF(emit_const(compiler, dst_r, init_value, 0));38473848if (dst & SLJIT_MEM)3849PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));38503851return const_;3852}38533854SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)3855{3856struct sljit_jump *jump;3857sljit_s32 dst_r;38583859CHECK_ERROR_PTR();3860CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));3861ADJUST_LOCAL_OFFSET(dst, dstw);38623863jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));3864PTR_FAIL_IF(!jump);3865set_mov_addr(jump, compiler, 0);38663867dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;3868PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r));38693870compiler->size += JUMP_MAX_SIZE - 1;38713872if (dst & SLJIT_MEM)3873PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));38743875return jump;3876}38773878SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)3879{3880sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);3881}388238833884