Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeS390X.c
9913 views
/*1* Stack-less Just-In-Time compiler2*3* Copyright Zoltan Herczeg ([email protected]). All rights reserved.4*5* Redistribution and use in source and binary forms, with or without modification, are6* permitted provided that the following conditions are met:7*8* 1. Redistributions of source code must retain the above copyright notice, this list of9* conditions and the following disclaimer.10*11* 2. Redistributions in binary form must reproduce the above copyright notice, this list12* of conditions and the following disclaimer in the documentation and/or other materials13* provided with the distribution.14*15* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY16* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES17* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT18* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,19* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED20* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR21* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN22* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN23* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.24*/2526#include <sys/auxv.h>2728#ifdef __ARCH__29#define ENABLE_STATIC_FACILITY_DETECTION 130#else31#define ENABLE_STATIC_FACILITY_DETECTION 032#endif33#define ENABLE_DYNAMIC_FACILITY_DETECTION 13435SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)36{37return "s390x" SLJIT_CPUINFO;38}3940/* Instructions are stored as 64 bit values regardless their size. */41typedef sljit_uw sljit_ins;4243#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)44#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)4546static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {470, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1, 1448};4950/* there are also a[2-15] available, but they are slower to access and51* their use is limited as mundaym explained:52* https://github.com/zherczeg/sljit/pull/91#discussion_r48689568953*/5455/* General Purpose Registers [0-15]. */56typedef sljit_uw sljit_gpr;5758/*59* WARNING60* the following code is non standard and should be improved for61* consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based62* registers because r0 and r1 are the ABI recommended volatiles.63* there is a gpr() function that maps sljit to physical register numbers64* that should be used instead of the usual index into reg_map[] and65* will be retired ASAP (TODO: carenas)66*/6768static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */69static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */70static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */71static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */72static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */73static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */74static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */75static const sljit_gpr r7 = 7; /* reg_map[6] */76static const sljit_gpr r8 = 8; /* reg_map[7] */77static const sljit_gpr r9 = 9; /* reg_map[8] */78static const sljit_gpr r10 = 10; /* reg_map[9] */79static const sljit_gpr r11 = 11; /* reg_map[10] */80static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */81static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */82static const sljit_gpr r14 = 14; /* reg_map[0]: return address */83static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */8485/* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */86/* TODO(carenas): r12 might conflict in PIC code, reserve? */87/* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp88* like we do know might be faster though, reserve?89*/9091/* TODO(carenas): should be named TMP_REG[1-2] for consistency */92#define tmp0 r093#define tmp1 r19495/* When reg cannot be unused. */96#define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP)9798/* Link register. */99static const sljit_gpr link_r = 14; /* r14 */100101#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)102103static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {1040, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, 1105};106107#define R0A(r) (r)108#define R4A(r) ((r) << 4)109#define R8A(r) ((r) << 8)110#define R12A(r) ((r) << 12)111#define R16A(r) ((r) << 16)112#define R20A(r) ((r) << 20)113#define R28A(r) ((r) << 28)114#define R32A(r) ((r) << 32)115#define R36A(r) ((r) << 36)116117#define R0(r) ((sljit_ins)reg_map[r])118119#define F0(r) ((sljit_ins)freg_map[r])120#define F4(r) (R4A((sljit_ins)freg_map[r]))121#define F12(r) (R12A((sljit_ins)freg_map[r]))122#define F20(r) (R20A((sljit_ins)freg_map[r]))123#define F28(r) (R28A((sljit_ins)freg_map[r]))124#define F32(r) (R32A((sljit_ins)freg_map[r]))125#define F36(r) (R36A((sljit_ins)freg_map[r]))126127struct sljit_s390x_const {128struct sljit_const const_; /* must be first */129sljit_sw init_value; /* required to build literal pool */130};131132/* Convert SLJIT register to hardware register. */133static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)134{135SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));136return reg_map[r];137}138139static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)140{141sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));142FAIL_IF(!ibuf);143*ibuf = ins;144145SLJIT_ASSERT(ins <= 0xffffffffffffL);146147compiler->size++;148if (ins & 0xffff00000000L)149compiler->size++;150151if (ins & 0xffffffff0000L)152compiler->size++;153154return SLJIT_SUCCESS;155}156157#define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \158(((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \159&& !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE))160161/* Map the given type to a 4-bit condition code mask. */162static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) {163const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */164const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */165const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */166const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */167168switch (type) {169case SLJIT_EQUAL:170if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {171sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);172if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)173return cc0;174if (type == SLJIT_OVERFLOW)175return (cc0 | cc3);176return (cc0 | cc2);177}178/* fallthrough */179180case SLJIT_ATOMIC_STORED:181case SLJIT_F_EQUAL:182case SLJIT_ORDERED_EQUAL:183return cc0;184185case SLJIT_NOT_EQUAL:186if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {187sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);188if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)189return (cc1 | cc2 | cc3);190if (type == SLJIT_OVERFLOW)191return (cc1 | cc2);192return (cc1 | cc3);193}194/* fallthrough */195196case SLJIT_UNORDERED_OR_NOT_EQUAL:197return (cc1 | cc2 | cc3);198199case SLJIT_LESS:200case SLJIT_ATOMIC_NOT_STORED:201return cc1;202203case SLJIT_GREATER_EQUAL:204case SLJIT_UNORDERED_OR_GREATER_EQUAL:205return (cc0 | cc2 | cc3);206207case SLJIT_GREATER:208if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)209return cc2;210return cc3;211212case SLJIT_LESS_EQUAL:213if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)214return (cc0 | cc1);215return (cc0 | cc1 | cc2);216217case SLJIT_SIG_LESS:218case SLJIT_F_LESS:219case SLJIT_ORDERED_LESS:220return cc1;221222case SLJIT_NOT_CARRY:223if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)224return (cc2 | cc3);225/* fallthrough */226227case SLJIT_SIG_LESS_EQUAL:228case SLJIT_F_LESS_EQUAL:229case SLJIT_ORDERED_LESS_EQUAL:230return (cc0 | cc1);231232case SLJIT_CARRY:233if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)234return (cc0 | cc1);235/* fallthrough */236237case SLJIT_SIG_GREATER:238case SLJIT_UNORDERED_OR_GREATER:239/* Overflow is considered greater, see SLJIT_SUB. */240return cc2 | cc3;241242case SLJIT_SIG_GREATER_EQUAL:243return (cc0 | cc2 | cc3);244245case SLJIT_OVERFLOW:246if (compiler->status_flags_state & SLJIT_SET_Z)247return (cc2 | cc3);248/* fallthrough */249250case SLJIT_UNORDERED:251return cc3;252253case SLJIT_NOT_OVERFLOW:254if (compiler->status_flags_state & SLJIT_SET_Z)255return (cc0 | cc1);256/* fallthrough */257258case SLJIT_ORDERED:259return (cc0 | cc1 | cc2);260261case SLJIT_F_NOT_EQUAL:262case SLJIT_ORDERED_NOT_EQUAL:263return (cc1 | cc2);264265case SLJIT_F_GREATER:266case SLJIT_ORDERED_GREATER:267return cc2;268269case SLJIT_F_GREATER_EQUAL:270case SLJIT_ORDERED_GREATER_EQUAL:271return (cc0 | cc2);272273case SLJIT_UNORDERED_OR_LESS_EQUAL:274return (cc0 | cc1 | cc3);275276case SLJIT_UNORDERED_OR_EQUAL:277return (cc0 | cc3);278279case SLJIT_UNORDERED_OR_LESS:280return (cc1 | cc3);281}282283SLJIT_UNREACHABLE();284return (sljit_u8)-1;285}286287/* Facility to bit index mappings.288Note: some facilities share the same bit index. */289typedef sljit_uw facility_bit;290#define STORE_FACILITY_LIST_EXTENDED_FACILITY 7291#define FAST_LONG_DISPLACEMENT_FACILITY 19292#define EXTENDED_IMMEDIATE_FACILITY 21293#define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34294#define DISTINCT_OPERAND_FACILITY 45295#define HIGH_WORD_FACILITY 45296#define POPULATION_COUNT_FACILITY 45297#define LOAD_STORE_ON_CONDITION_1_FACILITY 45298#define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49299#define LOAD_STORE_ON_CONDITION_2_FACILITY 53300#define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58301#define VECTOR_FACILITY 129302#define VECTOR_ENHANCEMENTS_1_FACILITY 135303304/* Report whether a facility is known to be present due to the compiler305settings. This function should always be compiled to a constant306value given a constant argument. */307static SLJIT_INLINE int have_facility_static(facility_bit x)308{309#if ENABLE_STATIC_FACILITY_DETECTION310switch (x) {311case FAST_LONG_DISPLACEMENT_FACILITY:312return (__ARCH__ >= 6 /* z990 */);313case EXTENDED_IMMEDIATE_FACILITY:314case STORE_FACILITY_LIST_EXTENDED_FACILITY:315return (__ARCH__ >= 7 /* z9-109 */);316case GENERAL_INSTRUCTION_EXTENSION_FACILITY:317return (__ARCH__ >= 8 /* z10 */);318case DISTINCT_OPERAND_FACILITY:319return (__ARCH__ >= 9 /* z196 */);320case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:321return (__ARCH__ >= 10 /* zEC12 */);322case LOAD_STORE_ON_CONDITION_2_FACILITY:323case VECTOR_FACILITY:324return (__ARCH__ >= 11 /* z13 */);325case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:326case VECTOR_ENHANCEMENTS_1_FACILITY:327return (__ARCH__ >= 12 /* z14 */);328default:329SLJIT_UNREACHABLE();330}331#endif332return 0;333}334335static SLJIT_INLINE unsigned long get_hwcap()336{337static unsigned long hwcap = 0;338if (SLJIT_UNLIKELY(!hwcap)) {339hwcap = getauxval(AT_HWCAP);340SLJIT_ASSERT(hwcap != 0);341}342return hwcap;343}344345static SLJIT_INLINE int have_stfle()346{347if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))348return 1;349350return (get_hwcap() & HWCAP_S390_STFLE);351}352353/* Report whether the given facility is available. This function always354performs a runtime check. */355static int have_facility_dynamic(facility_bit x)356{357#if ENABLE_DYNAMIC_FACILITY_DETECTION358static struct {359sljit_uw bits[4];360} cpu_features;361size_t size = sizeof(cpu_features);362const sljit_uw word_index = x >> 6;363const sljit_uw bit_index = ((1UL << 63) >> (x & 63));364365SLJIT_ASSERT(x < size * 8);366if (SLJIT_UNLIKELY(!have_stfle()))367return 0;368369if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {370__asm__ __volatile__ (371"lgr %%r0, %0;"372"stfle 0(%1);"373/* outputs */:374/* inputs */: "d" ((size / 8) - 1), "a" (&cpu_features)375/* clobbers */: "r0", "cc", "memory"376);377SLJIT_ASSERT(cpu_features.bits[0] != 0);378}379return (cpu_features.bits[word_index] & bit_index) != 0;380#else381return 0;382#endif383}384385#define HAVE_FACILITY(name, bit) \386static SLJIT_INLINE int name() \387{ \388static int have = -1; \389/* Static check first. May allow the function to be optimized away. */ \390if (have_facility_static(bit)) \391have = 1; \392else if (SLJIT_UNLIKELY(have < 0)) \393have = have_facility_dynamic(bit) ? 1 : 0; \394\395return have; \396}397398HAVE_FACILITY(have_eimm, EXTENDED_IMMEDIATE_FACILITY)399HAVE_FACILITY(have_ldisp, FAST_LONG_DISPLACEMENT_FACILITY)400HAVE_FACILITY(have_genext, GENERAL_INSTRUCTION_EXTENSION_FACILITY)401HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)402HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)403HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)404#undef HAVE_FACILITY405406#define is_u12(d) (0 <= (d) && (d) <= 0x00000fffL)407#define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL)408409#define CHECK_SIGNED(v, bitlen) \410((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1)))411412#define is_s8(d) CHECK_SIGNED((d), 8)413#define is_s16(d) CHECK_SIGNED((d), 16)414#define is_s20(d) CHECK_SIGNED((d), 20)415#define is_s32(d) ((d) == (sljit_s32)(d))416417static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)418{419sljit_uw dh, dl;420421SLJIT_ASSERT(is_s20(d));422423dh = (d >> 12) & 0xff;424dl = ((sljit_uw)d << 8) & 0xfff00;425return (dh | dl) << 8;426}427428/* TODO(carenas): variadic macro is not strictly needed */429#define SLJIT_S390X_INSTRUCTION(op, ...) \430static SLJIT_INLINE sljit_ins op(__VA_ARGS__)431432/* RR form instructions. */433#define SLJIT_S390X_RR(name, pattern) \434SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \435{ \436return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \437}438439/* AND */440SLJIT_S390X_RR(nr, 0x1400)441442/* BRANCH AND SAVE */443SLJIT_S390X_RR(basr, 0x0d00)444445/* BRANCH ON CONDITION */446SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */447448/* DIVIDE */449SLJIT_S390X_RR(dr, 0x1d00)450451/* EXCLUSIVE OR */452SLJIT_S390X_RR(xr, 0x1700)453454/* LOAD */455SLJIT_S390X_RR(lr, 0x1800)456457/* LOAD COMPLEMENT */458SLJIT_S390X_RR(lcr, 0x1300)459460/* OR */461SLJIT_S390X_RR(or, 0x1600)462463#undef SLJIT_S390X_RR464465/* RRE form instructions */466#define SLJIT_S390X_RRE(name, pattern) \467SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \468{ \469return (pattern) | R4A(dst) | R0A(src); \470}471472/* AND */473SLJIT_S390X_RRE(ngr, 0xb9800000)474475/* DIVIDE LOGICAL */476SLJIT_S390X_RRE(dlr, 0xb9970000)477SLJIT_S390X_RRE(dlgr, 0xb9870000)478479/* DIVIDE SINGLE */480SLJIT_S390X_RRE(dsgr, 0xb90d0000)481482/* EXCLUSIVE OR */483SLJIT_S390X_RRE(xgr, 0xb9820000)484485/* LOAD */486SLJIT_S390X_RRE(lgr, 0xb9040000)487SLJIT_S390X_RRE(lgfr, 0xb9140000)488489/* LOAD BYTE */490SLJIT_S390X_RRE(lbr, 0xb9260000)491SLJIT_S390X_RRE(lgbr, 0xb9060000)492493/* LOAD COMPLEMENT */494SLJIT_S390X_RRE(lcgr, 0xb9030000)495496/* LOAD HALFWORD */497SLJIT_S390X_RRE(lhr, 0xb9270000)498SLJIT_S390X_RRE(lghr, 0xb9070000)499500/* LOAD LOGICAL */501SLJIT_S390X_RRE(llgfr, 0xb9160000)502503/* LOAD LOGICAL CHARACTER */504SLJIT_S390X_RRE(llcr, 0xb9940000)505SLJIT_S390X_RRE(llgcr, 0xb9840000)506507/* LOAD LOGICAL HALFWORD */508SLJIT_S390X_RRE(llhr, 0xb9950000)509SLJIT_S390X_RRE(llghr, 0xb9850000)510511/* MULTIPLY LOGICAL */512SLJIT_S390X_RRE(mlgr, 0xb9860000)513514/* MULTIPLY SINGLE */515SLJIT_S390X_RRE(msgfr, 0xb91c0000)516517/* OR */518SLJIT_S390X_RRE(ogr, 0xb9810000)519520/* SUBTRACT */521SLJIT_S390X_RRE(sgr, 0xb9090000)522523#undef SLJIT_S390X_RRE524525/* RI-a form instructions */526#define SLJIT_S390X_RIA(name, pattern, imm_type) \527SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \528{ \529return (pattern) | R20A(reg) | (imm & 0xffff); \530}531532/* ADD HALFWORD IMMEDIATE */533SLJIT_S390X_RIA(aghi, 0xa70b0000, sljit_s16)534535/* LOAD HALFWORD IMMEDIATE */536SLJIT_S390X_RIA(lhi, 0xa7080000, sljit_s16)537SLJIT_S390X_RIA(lghi, 0xa7090000, sljit_s16)538539/* LOAD LOGICAL IMMEDIATE */540SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)541SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)542SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)543SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)544545/* MULTIPLY HALFWORD IMMEDIATE */546SLJIT_S390X_RIA(mhi, 0xa70c0000, sljit_s16)547SLJIT_S390X_RIA(mghi, 0xa70d0000, sljit_s16)548549/* OR IMMEDIATE */550SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16)551552#undef SLJIT_S390X_RIA553554/* RIL-a form instructions (requires extended immediate facility) */555#define SLJIT_S390X_RILA(name, pattern, imm_type) \556SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \557{ \558SLJIT_ASSERT(have_eimm()); \559return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \560}561562/* ADD IMMEDIATE */563SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32)564565/* ADD IMMEDIATE HIGH */566SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */567568/* AND IMMEDIATE */569SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32)570571/* EXCLUSIVE OR IMMEDIATE */572SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32)573574/* INSERT IMMEDIATE */575SLJIT_S390X_RILA(iihf, 0xc00800000000, sljit_u32)576SLJIT_S390X_RILA(iilf, 0xc00900000000, sljit_u32)577578/* LOAD IMMEDIATE */579SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32)580581/* LOAD LOGICAL IMMEDIATE */582SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)583SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)584585/* SUBTRACT LOGICAL IMMEDIATE */586SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32)587588#undef SLJIT_S390X_RILA589590/* RX-a form instructions */591#define SLJIT_S390X_RXA(name, pattern) \592SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \593{ \594SLJIT_ASSERT((d & 0xfff) == d); \595\596return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \597}598599/* LOAD */600SLJIT_S390X_RXA(l, 0x58000000)601602/* LOAD ADDRESS */603SLJIT_S390X_RXA(la, 0x41000000)604605/* LOAD HALFWORD */606SLJIT_S390X_RXA(lh, 0x48000000)607608/* MULTIPLY SINGLE */609SLJIT_S390X_RXA(ms, 0x71000000)610611/* STORE */612SLJIT_S390X_RXA(st, 0x50000000)613614/* STORE CHARACTER */615SLJIT_S390X_RXA(stc, 0x42000000)616617/* STORE HALFWORD */618SLJIT_S390X_RXA(sth, 0x40000000)619620#undef SLJIT_S390X_RXA621622/* RXY-a instructions */623#define SLJIT_S390X_RXYA(name, pattern, cond) \624SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \625{ \626SLJIT_ASSERT(cond); \627\628return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \629}630631/* LOAD */632SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp())633SLJIT_S390X_RXYA(lg, 0xe30000000004, 1)634SLJIT_S390X_RXYA(lgf, 0xe30000000014, 1)635636/* LOAD BYTE */637SLJIT_S390X_RXYA(lb, 0xe30000000076, have_ldisp())638SLJIT_S390X_RXYA(lgb, 0xe30000000077, have_ldisp())639640/* LOAD HALFWORD */641SLJIT_S390X_RXYA(lhy, 0xe30000000078, have_ldisp())642SLJIT_S390X_RXYA(lgh, 0xe30000000015, 1)643644/* LOAD LOGICAL */645SLJIT_S390X_RXYA(llgf, 0xe30000000016, 1)646647/* LOAD LOGICAL CHARACTER */648SLJIT_S390X_RXYA(llc, 0xe30000000094, have_eimm())649SLJIT_S390X_RXYA(llgc, 0xe30000000090, 1)650651/* LOAD LOGICAL HALFWORD */652SLJIT_S390X_RXYA(llh, 0xe30000000095, have_eimm())653SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1)654655/* MULTIPLY SINGLE */656SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp())657SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1)658659/* STORE */660SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp())661SLJIT_S390X_RXYA(stg, 0xe30000000024, 1)662663/* STORE CHARACTER */664SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp())665666/* STORE HALFWORD */667SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp())668669#undef SLJIT_S390X_RXYA670671/* RSY-a instructions */672#define SLJIT_S390X_RSYA(name, pattern, cond) \673SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \674{ \675SLJIT_ASSERT(cond); \676\677return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \678}679680/* LOAD MULTIPLE */681SLJIT_S390X_RSYA(lmg, 0xeb0000000004, 1)682683/* SHIFT LEFT LOGICAL */684SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1)685686/* SHIFT RIGHT SINGLE */687SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1)688689/* STORE MULTIPLE */690SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1)691692#undef SLJIT_S390X_RSYA693694/* RIE-f instructions (require general-instructions-extension facility) */695#define SLJIT_S390X_RIEF(name, pattern) \696SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \697{ \698sljit_ins i3, i4, i5; \699\700SLJIT_ASSERT(have_genext()); \701i3 = (sljit_ins)start << 24; \702i4 = (sljit_ins)end << 16; \703i5 = (sljit_ins)rot << 8; \704\705return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \706}707708/* ROTATE THEN AND SELECTED BITS */709/* SLJIT_S390X_RIEF(rnsbg, 0xec0000000054) */710711/* ROTATE THEN EXCLUSIVE OR SELECTED BITS */712/* SLJIT_S390X_RIEF(rxsbg, 0xec0000000057) */713714/* ROTATE THEN OR SELECTED BITS */715SLJIT_S390X_RIEF(rosbg, 0xec0000000056)716717/* ROTATE THEN INSERT SELECTED BITS */718/* SLJIT_S390X_RIEF(risbg, 0xec0000000055) */719/* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */720721/* ROTATE THEN INSERT SELECTED BITS HIGH */722SLJIT_S390X_RIEF(risbhg, 0xec000000005d)723724/* ROTATE THEN INSERT SELECTED BITS LOW */725/* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */726727#undef SLJIT_S390X_RIEF728729/* RRF-c instructions (require load/store-on-condition 1 facility) */730#define SLJIT_S390X_RRFC(name, pattern) \731SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \732{ \733sljit_ins m3; \734\735SLJIT_ASSERT(have_lscond1()); \736m3 = (sljit_ins)(mask & 0xf) << 12; \737\738return (pattern) | m3 | R4A(dst) | R0A(src); \739}740741/* LOAD HALFWORD IMMEDIATE ON CONDITION */742SLJIT_S390X_RRFC(locr, 0xb9f20000)743SLJIT_S390X_RRFC(locgr, 0xb9e20000)744745#undef SLJIT_S390X_RRFC746747/* RIE-g instructions (require load/store-on-condition 2 facility) */748#define SLJIT_S390X_RIEG(name, pattern) \749SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \750{ \751sljit_ins m3, i2; \752\753SLJIT_ASSERT(have_lscond2()); \754m3 = (sljit_ins)(mask & 0xf) << 32; \755i2 = (sljit_ins)(imm & 0xffffL) << 16; \756\757return (pattern) | R36A(reg) | m3 | i2; \758}759760/* LOAD HALFWORD IMMEDIATE ON CONDITION */761SLJIT_S390X_RIEG(lochi, 0xec0000000042)762SLJIT_S390X_RIEG(locghi, 0xec0000000046)763764#undef SLJIT_S390X_RIEG765766#define SLJIT_S390X_RILB(name, pattern, cond) \767SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \768{ \769SLJIT_ASSERT(cond); \770\771return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \772}773774/* BRANCH RELATIVE AND SAVE LONG */775SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)776777/* LOAD ADDRESS RELATIVE LONG */778SLJIT_S390X_RILB(larl, 0xc00000000000, 1)779780/* LOAD RELATIVE LONG */781SLJIT_S390X_RILB(lgrl, 0xc40800000000, have_genext())782783#undef SLJIT_S390X_RILB784785SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)786{787return 0x07f0 | target;788}789790SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target)791{792sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20;793sljit_ins ri2 = (sljit_ins)target & 0xffff;794return 0xa7040000L | m1 | ri2;795}796797SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)798{799sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;800sljit_ins ri2 = (sljit_ins)target & 0xffffffff;801return 0xc00400000000L | m1 | ri2;802}803804SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)805{806SLJIT_ASSERT(have_eimm());807return 0xb9830000 | R8A(dst) | R0A(src);808}809810/* INSERT PROGRAM MASK */811SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)812{813return 0xb2220000 | R4A(dst);814}815816/* SET PROGRAM MASK */817SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst)818{819return 0x0400 | R4A(dst);820}821822/* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */823SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)824{825return risbhg(dst, src, start, 0x8 | end, rot);826}827828#undef SLJIT_S390X_INSTRUCTION829830static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r)831{832/* Condition codes: bits 18 and 19.833Transformation:8340 (zero and no overflow) : unchanged8351 (non-zero and no overflow) : unchanged8362 (zero and overflow) : decreased by 18373 (non-zero and overflow) : decreased by 1 if non-zero */838FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1)));839FAIL_IF(push_inst(compiler, ipm(tmp1)));840FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)));841FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3)));842FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000)));843FAIL_IF(push_inst(compiler, spm(tmp1)));844return SLJIT_SUCCESS;845}846847/* load 64-bit immediate into register without clobbering flags */848static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)849{850/* 4 byte instructions */851if (is_s16(v))852return push_inst(compiler, lghi(target, (sljit_s16)v));853854if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0)855return push_inst(compiler, llill(target, (sljit_u16)v));856857if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0)858return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));859860if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0)861return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));862863if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)864return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));865866if (is_s32(v))867return push_inst(compiler, lgfi(target, (sljit_s32)v));868869if (((sljit_uw)v >> 32) == 0)870return push_inst(compiler, llilf(target, (sljit_u32)v));871872if (((sljit_uw)v << 32) == 0)873return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));874875FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));876return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));877}878879struct addr {880sljit_gpr base;881sljit_gpr index;882sljit_s32 offset;883};884885/* transform memory operand into D(X,B) form with a signed 20-bit offset */886static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,887struct addr *addr, sljit_s32 mem, sljit_sw off,888sljit_gpr tmp /* clobbered, must not be r0 */)889{890sljit_gpr base = r0;891sljit_gpr index = r0;892893SLJIT_ASSERT(tmp != r0);894if (mem & REG_MASK)895base = gpr(mem & REG_MASK);896897if (mem & OFFS_REG_MASK) {898index = gpr(OFFS_REG(mem));899if (off != 0) {900/* shift and put the result into tmp */901SLJIT_ASSERT(0 <= off && off < 64);902FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));903index = tmp;904off = 0; /* clear offset */905}906}907else if (!is_s20(off)) {908FAIL_IF(push_load_imm_inst(compiler, tmp, off));909index = tmp;910off = 0; /* clear offset */911}912addr->base = base;913addr->index = index;914addr->offset = (sljit_s32)off;915return SLJIT_SUCCESS;916}917918/* transform memory operand into D(X,B) form with an unsigned 12-bit offset */919static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,920struct addr *addr, sljit_s32 mem, sljit_sw off,921sljit_gpr tmp /* clobbered, must not be r0 */)922{923sljit_gpr base = r0;924sljit_gpr index = r0;925926SLJIT_ASSERT(tmp != r0);927if (mem & REG_MASK)928base = gpr(mem & REG_MASK);929930if (mem & OFFS_REG_MASK) {931index = gpr(OFFS_REG(mem));932if (off != 0) {933/* shift and put the result into tmp */934SLJIT_ASSERT(0 <= off && off < 64);935FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));936index = tmp;937off = 0; /* clear offset */938}939}940else if (!is_u12(off)) {941FAIL_IF(push_load_imm_inst(compiler, tmp, off));942index = tmp;943off = 0; /* clear offset */944}945addr->base = base;946addr->index = index;947addr->offset = (sljit_s32)off;948return SLJIT_SUCCESS;949}950951#define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)952#define WHEN(cond, r, i1, i2, addr) \953(cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)954955/* May clobber tmp1. */956static sljit_s32 load_store_op(struct sljit_compiler *compiler, sljit_gpr reg,957sljit_s32 mem, sljit_sw memw,958sljit_s32 is_32bit, const sljit_ins* forms)959{960struct addr addr;961962SLJIT_ASSERT(mem & SLJIT_MEM);963964if (is_32bit && ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw))) {965FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));966return push_inst(compiler, forms[0] | R20A(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);967}968969FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));970return push_inst(compiler, (is_32bit ? forms[1] : forms[2]) | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));971}972973static const sljit_ins load_forms[3] = {9740x58000000 /* l */,9750xe30000000058 /* ly */,9760xe30000000004 /* lg */977};978979static const sljit_ins store_forms[3] = {9800x50000000 /* st */,9810xe30000000050 /* sty */,9820xe30000000024 /* stg */983};984985static const sljit_ins load_halfword_forms[3] = {9860x48000000 /* lh */,9870xe30000000078 /* lhy */,9880xe30000000015 /* lgh */989};990991/* May clobber tmp1. */992static SLJIT_INLINE sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,993sljit_s32 src, sljit_sw srcw,994sljit_s32 is_32bit)995{996return load_store_op(compiler, dst_r, src, srcw, is_32bit, load_forms);997}998999/* May clobber tmp1. */1000static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr dst_r,1001sljit_s32 src, sljit_sw srcw,1002sljit_s32 is_32bit)1003{1004struct addr addr;1005sljit_ins ins;10061007SLJIT_ASSERT(src & SLJIT_MEM);10081009FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));10101011ins = is_32bit ? 0xe30000000016 /* llgf */ : 0xe30000000004 /* lg */;1012return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));1013}10141015/* May clobber tmp1. */1016static SLJIT_INLINE sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,1017sljit_s32 dst, sljit_sw dstw,1018sljit_s32 is_32bit)1019{1020return load_store_op(compiler, src_r, dst, dstw, is_32bit, store_forms);1021}10221023#undef WHEN10241025static sljit_s32 emit_move(struct sljit_compiler *compiler,1026sljit_gpr dst_r,1027sljit_s32 src, sljit_sw srcw)1028{1029sljit_gpr src_r;10301031SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK));10321033if (src == SLJIT_IMM)1034return push_load_imm_inst(compiler, dst_r, srcw);10351036if (src & SLJIT_MEM)1037return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);10381039src_r = gpr(src & REG_MASK);1040return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));1041}10421043static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,1044sljit_s32 dst,1045sljit_s32 src1, sljit_sw src1w,1046sljit_s32 src2, sljit_sw src2w)1047{1048sljit_gpr dst_r = tmp0;1049sljit_gpr src_r = tmp1;1050sljit_s32 needs_move = 1;10511052if (FAST_IS_REG(dst)) {1053dst_r = gpr(dst);10541055if (dst == src1)1056needs_move = 0;1057else if (dst == src2) {1058dst_r = tmp0;1059needs_move = 2;1060}1061}10621063if (needs_move)1064FAIL_IF(emit_move(compiler, dst_r, src1, src1w));10651066if (FAST_IS_REG(src2))1067src_r = gpr(src2);1068else1069FAIL_IF(emit_move(compiler, tmp1, src2, src2w));10701071FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)));10721073if (needs_move != 2)1074return SLJIT_SUCCESS;10751076dst_r = gpr(dst & REG_MASK);1077return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));1078}10791080static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins,1081sljit_s32 dst,1082sljit_s32 src1, sljit_sw src1w)1083{1084sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;1085sljit_gpr src_r = tmp1;10861087if (FAST_IS_REG(src1))1088src_r = gpr(src1);1089else1090FAIL_IF(emit_move(compiler, tmp1, src1, src1w));10911092return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r));1093}10941095static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,1096sljit_s32 dst,1097sljit_s32 src1, sljit_sw src1w,1098sljit_s32 src2, sljit_sw src2w)1099{1100sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;1101sljit_gpr src1_r = tmp0;1102sljit_gpr src2_r = tmp1;11031104if (FAST_IS_REG(src1))1105src1_r = gpr(src1);1106else1107FAIL_IF(emit_move(compiler, tmp0, src1, src1w));11081109if (FAST_IS_REG(src2))1110src2_r = gpr(src2);1111else1112FAIL_IF(emit_move(compiler, tmp1, src2, src2w));11131114return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r));1115}11161117typedef enum {1118RI_A,1119RIL_A,1120} emit_ril_type;11211122static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,1123sljit_s32 dst,1124sljit_s32 src1, sljit_sw src1w,1125sljit_sw src2w,1126emit_ril_type type)1127{1128sljit_gpr dst_r = tmp0;1129sljit_s32 needs_move = 1;11301131if (FAST_IS_REG(dst)) {1132dst_r = gpr(dst);11331134if (dst == src1)1135needs_move = 0;1136}11371138if (needs_move)1139FAIL_IF(emit_move(compiler, dst_r, src1, src1w));11401141if (type == RIL_A)1142return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff));1143return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff));1144}11451146static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,1147sljit_s32 dst,1148sljit_s32 src1, sljit_sw src1w,1149sljit_sw src2w)1150{1151sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;1152sljit_gpr src_r = tmp0;11531154if (!FAST_IS_REG(src1))1155FAIL_IF(emit_move(compiler, tmp0, src1, src1w));1156else1157src_r = gpr(src1 & REG_MASK);11581159return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16);1160}11611162typedef enum {1163RX_A,1164RXY_A,1165} emit_rx_type;11661167static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,1168sljit_s32 dst,1169sljit_s32 src1, sljit_sw src1w,1170sljit_s32 src2, sljit_sw src2w,1171emit_rx_type type)1172{1173sljit_gpr dst_r = tmp0;1174sljit_s32 needs_move = 1;1175sljit_gpr base, index;11761177SLJIT_ASSERT(src2 & SLJIT_MEM);11781179if (FAST_IS_REG(dst)) {1180dst_r = gpr(dst);11811182if (dst == src1)1183needs_move = 0;1184else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) {1185dst_r = tmp0;1186needs_move = 2;1187}1188}11891190if (needs_move)1191FAIL_IF(emit_move(compiler, dst_r, src1, src1w));11921193base = gpr(src2 & REG_MASK);1194index = tmp0;11951196if (src2 & OFFS_REG_MASK) {1197index = gpr(OFFS_REG(src2));11981199if (src2w != 0) {1200FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0)));1201src2w = 0;1202index = tmp1;1203}1204} else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) {1205FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w));12061207if (src2 & REG_MASK)1208index = tmp1;1209else1210base = tmp1;1211src2w = 0;1212}12131214if (type == RX_A)1215ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w;1216else1217ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w);12181219FAIL_IF(push_inst(compiler, ins));12201221if (needs_move != 2)1222return SLJIT_SUCCESS;12231224dst_r = gpr(dst);1225return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));1226}12271228static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,1229sljit_s32 dst, sljit_sw dstw,1230sljit_sw srcw)1231{1232sljit_gpr dst_r = tmp1;12331234SLJIT_ASSERT(dst & SLJIT_MEM);12351236if (dst & OFFS_REG_MASK) {1237sljit_gpr index = tmp1;12381239if ((dstw & 0x3) == 0)1240index = gpr(OFFS_REG(dst));1241else1242FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0)));12431244FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index)));1245dstw = 0;1246}1247else if (!is_s20(dstw)) {1248FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw));12491250if (dst & REG_MASK)1251FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1)));12521253dstw = 0;1254}1255else1256dst_r = gpr(dst & REG_MASK);12571258return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw));1259}12601261struct ins_forms {1262sljit_ins op_r;1263sljit_ins op_gr;1264sljit_ins op_rk;1265sljit_ins op_grk;1266sljit_ins op;1267sljit_ins op_y;1268sljit_ins op_g;1269};12701271static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,1272sljit_s32 dst,1273sljit_s32 src1, sljit_sw src1w,1274sljit_s32 src2, sljit_sw src2w)1275{1276sljit_s32 mode = compiler->mode;1277sljit_ins ins, ins_k;12781279if ((src1 | src2) & SLJIT_MEM) {1280sljit_ins ins12, ins20;12811282if (mode & SLJIT_32) {1283ins12 = forms->op;1284ins20 = forms->op_y;1285}1286else {1287ins12 = 0;1288ins20 = forms->op_g;1289}12901291if (ins12 && ins20) {1292/* Extra instructions needed for address computation can be executed independently. */1293if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)1294|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) {1295if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))1296return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);12971298return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);1299}13001301if (src1 & SLJIT_MEM) {1302if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w))1303return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A);13041305return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A);1306}1307}1308else if (ins12 || ins20) {1309emit_rx_type rx_type;13101311if (ins12) {1312rx_type = RX_A;1313ins = ins12;1314}1315else {1316rx_type = RXY_A;1317ins = ins20;1318}13191320if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)1321|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w)))))1322return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type);13231324if (src1 & SLJIT_MEM)1325return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type);1326}1327}13281329if (mode & SLJIT_32) {1330ins = forms->op_r;1331ins_k = forms->op_rk;1332}1333else {1334ins = forms->op_gr;1335ins_k = forms->op_grk;1336}13371338SLJIT_ASSERT(ins != 0 || ins_k != 0);13391340if (ins && FAST_IS_REG(dst)) {1341if (dst == src1)1342return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);13431344if (dst == src2)1345return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w);1346}13471348if (ins_k == 0)1349return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);13501351return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w);1352}13531354static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,1355sljit_s32 dst,1356sljit_s32 src1, sljit_sw src1w,1357sljit_s32 src2, sljit_sw src2w)1358{1359sljit_s32 mode = compiler->mode;1360sljit_ins ins;13611362if (src2 & SLJIT_MEM) {1363sljit_ins ins12, ins20;13641365if (mode & SLJIT_32) {1366ins12 = forms->op;1367ins20 = forms->op_y;1368}1369else {1370ins12 = 0;1371ins20 = forms->op_g;1372}13731374if (ins12 && ins20) {1375if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))1376return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);13771378return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);1379}1380else if (ins12)1381return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);1382else if (ins20)1383return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);1384}13851386ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk;13871388if (ins == 0 || (FAST_IS_REG(dst) && dst == src1))1389return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w);13901391return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);1392}13931394SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)1395{1396struct sljit_label *label;1397struct sljit_jump *jump;1398struct sljit_const *const_;1399sljit_sw executable_offset;1400sljit_uw ins_size = compiler->size << 1;1401sljit_uw pool_size = 0; /* literal pool */1402sljit_uw pad_size;1403sljit_uw half_count;1404SLJIT_NEXT_DEFINE_TYPES;1405struct sljit_memory_fragment *buf;1406sljit_ins *buf_ptr;1407sljit_ins *buf_end;1408sljit_u16 *code;1409sljit_u16 *code_ptr;1410sljit_uw *pool, *pool_ptr;1411sljit_ins ins;1412sljit_sw source, offset;14131414CHECK_ERROR_PTR();1415CHECK_PTR(check_sljit_generate_code(compiler));1416reverse_buf(compiler);14171418jump = compiler->jumps;1419while (jump != NULL) {1420if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) {1421/* encoded: */1422/* brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */1423/* replace with: */1424/* lgrl %r1, <pool_addr> */1425/* bras %r14, %r1 (or bcr <mask>, %r1) */1426pool_size += sizeof(*pool);1427if (!(jump->flags & JUMP_MOV_ADDR))1428ins_size += 2;1429}1430jump = jump->next;1431}14321433const_ = compiler->consts;1434while (const_) {1435pool_size += sizeof(*pool);1436const_ = const_->next;1437}14381439/* pad code size to 8 bytes so is accessible with half word offsets */1440/* the literal pool needs to be doubleword aligned */1441pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;1442SLJIT_ASSERT(pad_size < 8UL);14431444/* allocate target buffer */1445code = (sljit_u16*)allocate_executable_memory(ins_size + pad_size + pool_size, options, exec_allocator_data, &executable_offset);1446PTR_FAIL_WITH_EXEC_IF(code);1447code_ptr = code;14481449/* TODO(carenas): pool is optional, and the ABI recommends it to1450* be created before the function code, instead of1451* globally; if generated code is too big could1452* need offsets bigger than 32bit words and asser()1453*/1454pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);1455pool_ptr = pool;1456buf = compiler->buf;1457half_count = 0;14581459label = compiler->labels;1460jump = compiler->jumps;1461const_ = compiler->consts;1462SLJIT_NEXT_INIT_TYPES();1463SLJIT_GET_NEXT_MIN();14641465do {1466buf_ptr = (sljit_ins*)buf->memory;1467buf_end = buf_ptr + (buf->used_size >> 3);1468do {1469ins = *buf_ptr++;14701471if (next_min_addr == half_count) {1472SLJIT_ASSERT(!label || label->size >= half_count);1473SLJIT_ASSERT(!jump || jump->addr >= half_count);1474SLJIT_ASSERT(!const_ || const_->addr >= half_count);14751476if (next_min_addr == next_label_size) {1477label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);1478label = label->next;1479next_label_size = SLJIT_GET_NEXT_SIZE(label);1480}14811482if (next_min_addr == next_jump_addr) {1483if (SLJIT_UNLIKELY(jump->flags & JUMP_MOV_ADDR)) {1484source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);14851486jump->addr = (sljit_uw)pool_ptr;14871488/* store target into pool */1489offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;1490pool_ptr++;14911492SLJIT_ASSERT(!(offset & 1));1493offset >>= 1;1494SLJIT_ASSERT(is_s32(offset));1495ins |= (sljit_ins)offset & 0xffffffff;1496} else if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR)) {1497sljit_ins arg;14981499jump->addr = (sljit_uw)pool_ptr;15001501/* load address into tmp1 */1502source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);1503offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;15041505SLJIT_ASSERT(!(offset & 1));1506offset >>= 1;1507SLJIT_ASSERT(is_s32(offset));15081509code_ptr[0] = (sljit_u16)(0xc408 | R4A(tmp1) /* lgrl */);1510code_ptr[1] = (sljit_u16)(offset >> 16);1511code_ptr[2] = (sljit_u16)offset;1512code_ptr += 3;1513pool_ptr++;15141515/* branch to tmp1 */1516arg = (ins >> 36) & 0xf;1517if (((ins >> 32) & 0xf) == 4) {1518/* brcl -> bcr */1519ins = bcr(arg, tmp1);1520} else {1521SLJIT_ASSERT(((ins >> 32) & 0xf) == 5);1522/* brasl -> basr */1523ins = basr(arg, tmp1);1524}15251526/* Adjust half_count. */1527half_count += 2;1528} else1529jump->addr = (sljit_uw)code_ptr;15301531jump = jump->next;1532next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);1533} else if (next_min_addr == next_const_addr) {1534/* update instruction with relative address of constant */1535source = (sljit_sw)code_ptr;1536offset = (sljit_sw)pool_ptr - source;15371538SLJIT_ASSERT(!(offset & 0x1));1539offset >>= 1; /* halfword (not byte) offset */1540SLJIT_ASSERT(is_s32(offset));15411542ins |= (sljit_ins)offset & 0xffffffff;15431544/* update address */1545const_->addr = (sljit_uw)pool_ptr;15461547/* store initial value into pool and update pool address */1548*(pool_ptr++) = (sljit_uw)(((struct sljit_s390x_const*)const_)->init_value);15491550/* move to next constant */1551const_ = const_->next;1552next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);1553}15541555SLJIT_GET_NEXT_MIN();1556}15571558if (ins & 0xffff00000000L) {1559*code_ptr++ = (sljit_u16)(ins >> 32);1560half_count++;1561}15621563if (ins & 0xffffffff0000L) {1564*code_ptr++ = (sljit_u16)(ins >> 16);1565half_count++;1566}15671568*code_ptr++ = (sljit_u16)ins;1569half_count++;1570} while (buf_ptr < buf_end);15711572buf = buf->next;1573} while (buf);15741575if (next_label_size == half_count) {1576label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);1577label = label->next;1578}15791580SLJIT_ASSERT(!label);1581SLJIT_ASSERT(!jump);1582SLJIT_ASSERT(!const_);1583SLJIT_ASSERT(code + (ins_size >> 1) == code_ptr);1584SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);15851586jump = compiler->jumps;1587while (jump != NULL) {1588offset = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr);15891590if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) {1591/* Store jump target into pool. */1592*(sljit_uw*)(jump->addr) = (sljit_uw)offset;1593} else {1594code_ptr = (sljit_u16*)jump->addr;1595offset -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);15961597/* offset must be halfword aligned */1598SLJIT_ASSERT(!(offset & 1));1599offset >>= 1;1600SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */16011602code_ptr[1] = (sljit_u16)(offset >> 16);1603code_ptr[2] = (sljit_u16)offset;1604}1605jump = jump->next;1606}16071608compiler->error = SLJIT_ERR_COMPILED;1609compiler->executable_offset = executable_offset;1610compiler->executable_size = ins_size;1611if (pool_size)1612compiler->executable_size += (pad_size + pool_size);16131614code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);1615code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);1616SLJIT_CACHE_FLUSH(code, code_ptr);1617SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);1618return code;1619}16201621SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)1622{1623/* TODO(mundaym): implement all */1624switch (feature_type) {1625case SLJIT_HAS_FPU:1626#ifdef SLJIT_IS_FPU_AVAILABLE1627return (SLJIT_IS_FPU_AVAILABLE) != 0;1628#else1629return 1;1630#endif /* SLJIT_IS_FPU_AVAILABLE */16311632case SLJIT_HAS_CLZ:1633case SLJIT_HAS_REV:1634case SLJIT_HAS_ROT:1635case SLJIT_HAS_PREFETCH:1636case SLJIT_HAS_COPY_F32:1637case SLJIT_HAS_COPY_F64:1638case SLJIT_HAS_SIMD:1639case SLJIT_HAS_ATOMIC:1640case SLJIT_HAS_MEMORY_BARRIER:1641return 1;16421643case SLJIT_HAS_CTZ:1644return 2;16451646case SLJIT_HAS_CMOV:1647return have_lscond1() ? 1 : 0;1648}1649return 0;1650}16511652SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)1653{1654SLJIT_UNUSED_ARG(type);1655return 0;1656}16571658/* --------------------------------------------------------------------- */1659/* Entry, exit */1660/* --------------------------------------------------------------------- */16611662SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,1663sljit_s32 options, sljit_s32 arg_types,1664sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)1665{1666sljit_s32 fscratches;1667sljit_s32 fsaveds;1668sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);1669sljit_s32 offset, i, tmp;16701671CHECK_ERROR();1672CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));1673set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);16741675/* Saved registers are stored in callee allocated save area. */1676SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13);16771678scratches = ENTER_GET_REGS(scratches);1679saveds = ENTER_GET_REGS(saveds);1680fscratches = compiler->fscratches;1681fsaveds = compiler->fsaveds;16821683offset = 2 * SSIZE_OF(sw);1684if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {1685if (saved_arg_count == 0) {1686FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15)));1687offset += 9 * SSIZE_OF(sw);1688} else {1689FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15)));1690offset += (8 - saved_arg_count) * SSIZE_OF(sw);1691}1692} else {1693if (scratches == SLJIT_FIRST_SAVED_REG) {1694FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));1695offset += SSIZE_OF(sw);1696} else if (scratches > SLJIT_FIRST_SAVED_REG) {1697FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));1698offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);1699}17001701if (saved_arg_count == 0) {1702if (saveds == 0) {1703FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));1704offset += SSIZE_OF(sw);1705} else {1706FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));1707offset += (saveds + 1) * SSIZE_OF(sw);1708}1709} else if (saveds > saved_arg_count) {1710if (saveds == saved_arg_count + 1) {1711FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15)));1712offset += SSIZE_OF(sw);1713} else {1714FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15)));1715offset += (saveds - saved_arg_count) * SSIZE_OF(sw);1716}1717}1718}17191720if (saved_arg_count > 0) {1721FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));1722offset += SSIZE_OF(sw);1723}17241725tmp = SLJIT_FS0 - fsaveds;1726for (i = SLJIT_FS0; i > tmp; i--) {1727FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));1728offset += SSIZE_OF(sw);1729}17301731for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {1732FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));1733offset += SSIZE_OF(sw);1734}17351736local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;1737compiler->local_size = local_size;17381739if (is_s20(-local_size))1740FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));1741else1742FAIL_IF(push_inst(compiler, 0xc20400000000 /* slgfi */ | R36A(r15) | (sljit_ins)local_size));17431744if (options & SLJIT_ENTER_REG_ARG)1745return SLJIT_SUCCESS;17461747arg_types >>= SLJIT_ARG_SHIFT;1748saved_arg_count = 0;1749tmp = 0;1750while (arg_types > 0) {1751if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {1752if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {1753FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp))));1754saved_arg_count++;1755}1756tmp++;1757}17581759arg_types >>= SLJIT_ARG_SHIFT;1760}17611762return SLJIT_SUCCESS;1763}17641765SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,1766sljit_s32 options, sljit_s32 arg_types,1767sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)1768{1769CHECK_ERROR();1770CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));1771set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);17721773compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;1774return SLJIT_SUCCESS;1775}17761777static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_gpr last_reg)1778{1779sljit_s32 offset, i, tmp;1780sljit_s32 local_size = compiler->local_size;1781sljit_s32 saveds = compiler->saveds;1782sljit_s32 scratches = compiler->scratches;1783sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);17841785if (is_u12(local_size))1786FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));1787else if (is_s20(local_size))1788FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));1789else1790FAIL_IF(push_inst(compiler, 0xc20a00000000 /* algfi */ | R36A(r15) | (sljit_ins)local_size));17911792offset = 2 * SSIZE_OF(sw);1793if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {1794if (kept_saveds_count == 0) {1795FAIL_IF(push_inst(compiler, lmg(r6, last_reg, offset, r15)));1796offset += 9 * SSIZE_OF(sw);1797} else {1798FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));1799offset += (8 - kept_saveds_count) * SSIZE_OF(sw);1800}1801} else {1802if (scratches == SLJIT_FIRST_SAVED_REG) {1803FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));1804offset += SSIZE_OF(sw);1805} else if (scratches > SLJIT_FIRST_SAVED_REG) {1806FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));1807offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);1808}18091810if (kept_saveds_count == 0) {1811if (saveds == 0) {1812if (last_reg == r14)1813FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));1814offset += SSIZE_OF(sw);1815} else if (saveds == 1 && last_reg == r13) {1816FAIL_IF(push_inst(compiler, lg(r13, offset, 0, r15)));1817offset += 2 * SSIZE_OF(sw);1818} else {1819FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, last_reg, offset, r15)));1820offset += (saveds + 1) * SSIZE_OF(sw);1821}1822} else if (saveds > kept_saveds_count) {1823if (saveds == kept_saveds_count + 1) {1824FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15)));1825offset += SSIZE_OF(sw);1826} else {1827FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));1828offset += (saveds - kept_saveds_count) * SSIZE_OF(sw);1829}1830}1831}18321833if (kept_saveds_count > 0) {1834if (last_reg == r14)1835FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));1836offset += SSIZE_OF(sw);1837}18381839tmp = SLJIT_FS0 - compiler->fsaveds;1840for (i = SLJIT_FS0; i > tmp; i--) {1841FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));1842offset += SSIZE_OF(sw);1843}18441845for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {1846FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));1847offset += SSIZE_OF(sw);1848}18491850return SLJIT_SUCCESS;1851}18521853SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)1854{1855CHECK_ERROR();1856CHECK(check_sljit_emit_return_void(compiler));18571858FAIL_IF(emit_stack_frame_release(compiler, r14));1859return push_inst(compiler, br(r14)); /* return */1860}18611862SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,1863sljit_s32 src, sljit_sw srcw)1864{1865CHECK_ERROR();1866CHECK(check_sljit_emit_return_to(compiler, src, srcw));18671868if (src & SLJIT_MEM) {1869ADJUST_LOCAL_OFFSET(src, srcw);1870FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));1871src = TMP_REG2;1872srcw = 0;1873} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {1874FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));1875src = TMP_REG2;1876srcw = 0;1877}18781879FAIL_IF(emit_stack_frame_release(compiler, r13));18801881SLJIT_SKIP_CHECKS(compiler);1882return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);1883}18841885/* --------------------------------------------------------------------- */1886/* Operators */1887/* --------------------------------------------------------------------- */18881889SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)1890{1891sljit_gpr arg0 = gpr(SLJIT_R0);1892sljit_gpr arg1 = gpr(SLJIT_R1);18931894CHECK_ERROR();1895CHECK(check_sljit_emit_op0(compiler, op));18961897op = GET_OPCODE(op) | (op & SLJIT_32);1898switch (op) {1899case SLJIT_BREAKPOINT:1900/* The following invalid instruction is emitted by gdb. */1901return push_inst(compiler, 0x0001 /* 2-byte trap */);1902case SLJIT_NOP:1903return push_inst(compiler, 0x0700 /* 2-byte nop */);1904case SLJIT_LMUL_UW:1905FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));1906break;1907case SLJIT_LMUL_SW:1908/* signed multiplication from: */1909/* Hacker's Delight, Second Edition: Chapter 8-3. */1910FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));1911FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));1912FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));1913FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));19141915/* unsigned multiplication */1916FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));19171918FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));1919FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));1920break;1921case SLJIT_DIV_U32:1922case SLJIT_DIVMOD_U32:1923FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));1924FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));1925FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));1926FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */1927if (op == SLJIT_DIVMOD_U32)1928return push_inst(compiler, lr(arg1, tmp0)); /* remainder */19291930return SLJIT_SUCCESS;1931case SLJIT_DIV_S32:1932case SLJIT_DIVMOD_S32:1933FAIL_IF(push_inst(compiler, 0xeb00000000dc /* srak */ | R36A(tmp0) | R32A(arg0) | (31 << 16)));1934FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));1935FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));1936FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */1937if (op == SLJIT_DIVMOD_S32)1938return push_inst(compiler, lr(arg1, tmp0)); /* remainder */19391940return SLJIT_SUCCESS;1941case SLJIT_DIV_UW:1942case SLJIT_DIVMOD_UW:1943FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));1944FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));1945FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));1946FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */1947if (op == SLJIT_DIVMOD_UW)1948return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */19491950return SLJIT_SUCCESS;1951case SLJIT_DIV_SW:1952case SLJIT_DIVMOD_SW:1953FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));1954FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));1955FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */1956if (op == SLJIT_DIVMOD_SW)1957return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */19581959return SLJIT_SUCCESS;1960case SLJIT_MEMORY_BARRIER:1961return push_inst(compiler, 0x0700 /* bcr */ | (0xe << 4) | 0);1962case SLJIT_ENDBR:1963return SLJIT_SUCCESS;1964case SLJIT_SKIP_FRAMES_BEFORE_RETURN:1965return SLJIT_SUCCESS;1966default:1967SLJIT_UNREACHABLE();1968}1969/* swap result registers */1970FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));1971FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));1972return push_inst(compiler, lgr(arg1, tmp0));1973}19741975static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r, sljit_gpr src_r)1976{1977sljit_s32 is_ctz = (GET_OPCODE(op) == SLJIT_CTZ);19781979if ((op & SLJIT_32) && src_r != tmp0) {1980FAIL_IF(push_inst(compiler, 0xb9160000 /* llgfr */ | R4A(tmp0) | R0A(src_r)));1981src_r = tmp0;1982}19831984if (is_ctz) {1985FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */) | R4A(tmp1) | R0A(src_r)));19861987if (src_r == tmp0)1988FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1400 /* nr */ : 0xb9800000 /* ngr */) | R4A(tmp0) | R0A(tmp1)));1989else1990FAIL_IF(push_inst(compiler, 0xb9e40000 /* ngrk */ | R12A(tmp1) | R4A(tmp0) | R0A(src_r)));19911992src_r = tmp0;1993}19941995FAIL_IF(push_inst(compiler, 0xb9830000 /* flogr */ | R4A(tmp0) | R0A(src_r)));19961997if (is_ctz)1998FAIL_IF(push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(tmp1) | R32A(tmp0) | ((sljit_ins)(-64 & 0xffff) << 16)));19992000if (op & SLJIT_32) {2001if (!is_ctz && dst_r != tmp0)2002return push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(dst_r) | R32A(tmp0) | ((sljit_ins)(-32 & 0xffff) << 16));20032004FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp0) | (sljit_u32)-32));2005}20062007if (is_ctz)2008FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp0) | R32A(tmp1) | ((sljit_ins)((op & SLJIT_32) ? 59 : 58) << 24) | (63 << 16) | ((sljit_ins)((op & SLJIT_32) ? 5 : 6) << 8)));20092010if (dst_r == tmp0)2011return SLJIT_SUCCESS;20122013return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0));2014}20152016static sljit_s32 sljit_emit_rev(struct sljit_compiler *compiler, sljit_s32 op,2017sljit_s32 dst, sljit_sw dstw,2018sljit_s32 src, sljit_sw srcw)2019{2020struct addr addr;2021sljit_gpr reg;2022sljit_ins ins;2023sljit_s32 opcode = GET_OPCODE(op);2024sljit_s32 is_16bit = (opcode == SLJIT_REV_U16 || opcode == SLJIT_REV_S16);20252026if (dst & SLJIT_MEM) {2027if (src & SLJIT_MEM) {2028FAIL_IF(load_store_op(compiler, tmp0, src, srcw, op & SLJIT_32, is_16bit ? load_halfword_forms : load_forms));2029reg = tmp0;2030} else2031reg = gpr(src);20322033FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));20342035if (is_16bit)2036ins = 0xe3000000003f /* strvh */;2037else2038ins = (op & SLJIT_32) ? 0xe3000000003e /* strv */ : 0xe3000000002f /* strvg */;20392040return push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));2041}20422043reg = gpr(dst);20442045if (src & SLJIT_MEM) {2046FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));20472048if (is_16bit)2049ins = 0xe3000000001f /* lrvh */;2050else2051ins = (op & SLJIT_32) ? 0xe3000000001e /* lrv */ : 0xe3000000000f /* lrvg */;20522053FAIL_IF(push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)));20542055if (opcode == SLJIT_REV)2056return SLJIT_SUCCESS;20572058if (is_16bit) {2059if (op & SLJIT_32)2060ins = (opcode == SLJIT_REV_U16) ? 0xb9950000 /* llhr */ : 0xb9270000 /* lhr */;2061else2062ins = (opcode == SLJIT_REV_U16) ? 0xb9850000 /* llghr */ : 0xb9070000 /* lghr */;2063} else2064ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;20652066return push_inst(compiler, ins | R4A(reg) | R0A(reg));2067}20682069ins = (op & SLJIT_32) ? 0xb91f0000 /* lrvr */ : 0xb90f0000 /* lrvgr */;2070FAIL_IF(push_inst(compiler, ins | R4A(reg) | R0A(gpr(src))));20712072if (opcode == SLJIT_REV)2073return SLJIT_SUCCESS;20742075if (!is_16bit) {2076ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;2077return push_inst(compiler, ins | R4A(reg) | R0A(reg));2078}20792080if (op & SLJIT_32) {2081ins = (opcode == SLJIT_REV_U16) ? 0x88000000 /* srl */ : 0x8a000000 /* sra */;2082return push_inst(compiler, ins | R20A(reg) | 16);2083}20842085ins = (opcode == SLJIT_REV_U16) ? 0xeb000000000c /* srlg */ : 0xeb000000000a /* srag */;2086return push_inst(compiler, ins | R36A(reg) | R32A(reg) | (48 << 16));2087}20882089/* LEVAL will be defined later with different parameters as needed */2090#define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)20912092SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,2093sljit_s32 dst, sljit_sw dstw,2094sljit_s32 src, sljit_sw srcw)2095{2096sljit_ins ins;2097struct addr mem;2098sljit_gpr dst_r;2099sljit_gpr src_r;2100sljit_s32 opcode = GET_OPCODE(op);21012102CHECK_ERROR();2103CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));2104ADJUST_LOCAL_OFFSET(dst, dstw);2105ADJUST_LOCAL_OFFSET(src, srcw);21062107if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {2108/* LOAD REGISTER */2109if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {2110dst_r = gpr(dst);2111src_r = gpr(src);2112switch (opcode | (op & SLJIT_32)) {2113/* 32-bit */2114case SLJIT_MOV32_U8:2115ins = llcr(dst_r, src_r);2116break;2117case SLJIT_MOV32_S8:2118ins = lbr(dst_r, src_r);2119break;2120case SLJIT_MOV32_U16:2121ins = llhr(dst_r, src_r);2122break;2123case SLJIT_MOV32_S16:2124ins = lhr(dst_r, src_r);2125break;2126case SLJIT_MOV32:2127if (dst_r == src_r)2128return SLJIT_SUCCESS;2129ins = lr(dst_r, src_r);2130break;2131/* 64-bit */2132case SLJIT_MOV_U8:2133ins = llgcr(dst_r, src_r);2134break;2135case SLJIT_MOV_S8:2136ins = lgbr(dst_r, src_r);2137break;2138case SLJIT_MOV_U16:2139ins = llghr(dst_r, src_r);2140break;2141case SLJIT_MOV_S16:2142ins = lghr(dst_r, src_r);2143break;2144case SLJIT_MOV_U32:2145ins = llgfr(dst_r, src_r);2146break;2147case SLJIT_MOV_S32:2148ins = lgfr(dst_r, src_r);2149break;2150case SLJIT_MOV:2151case SLJIT_MOV_P:2152if (dst_r == src_r)2153return SLJIT_SUCCESS;2154ins = lgr(dst_r, src_r);2155break;2156default:2157ins = 0;2158SLJIT_UNREACHABLE();2159break;2160}2161FAIL_IF(push_inst(compiler, ins));2162return SLJIT_SUCCESS;2163}2164/* LOAD IMMEDIATE */2165if (FAST_IS_REG(dst) && src == SLJIT_IMM) {2166switch (opcode) {2167case SLJIT_MOV_U8:2168srcw = (sljit_sw)((sljit_u8)(srcw));2169break;2170case SLJIT_MOV_S8:2171srcw = (sljit_sw)((sljit_s8)(srcw));2172break;2173case SLJIT_MOV_U16:2174srcw = (sljit_sw)((sljit_u16)(srcw));2175break;2176case SLJIT_MOV_S16:2177srcw = (sljit_sw)((sljit_s16)(srcw));2178break;2179case SLJIT_MOV_U32:2180srcw = (sljit_sw)((sljit_u32)(srcw));2181break;2182case SLJIT_MOV_S32:2183case SLJIT_MOV32:2184srcw = (sljit_sw)((sljit_s32)(srcw));2185break;2186}2187return push_load_imm_inst(compiler, gpr(dst), srcw);2188}2189/* LOAD */2190/* TODO(carenas): avoid reg being defined later */2191#define LEVAL(i) EVAL(i, reg, mem)2192if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {2193sljit_gpr reg = gpr(dst);21942195FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));2196/* TODO(carenas): convert all calls below to LEVAL */2197switch (opcode | (op & SLJIT_32)) {2198case SLJIT_MOV32_U8:2199ins = llc(reg, mem.offset, mem.index, mem.base);2200break;2201case SLJIT_MOV32_S8:2202ins = lb(reg, mem.offset, mem.index, mem.base);2203break;2204case SLJIT_MOV32_U16:2205ins = llh(reg, mem.offset, mem.index, mem.base);2206break;2207case SLJIT_MOV32_S16:2208ins = WHEN2(is_u12(mem.offset), lh, lhy);2209break;2210case SLJIT_MOV32:2211ins = WHEN2(is_u12(mem.offset), l, ly);2212break;2213case SLJIT_MOV_U8:2214ins = LEVAL(llgc);2215break;2216case SLJIT_MOV_S8:2217ins = lgb(reg, mem.offset, mem.index, mem.base);2218break;2219case SLJIT_MOV_U16:2220ins = LEVAL(llgh);2221break;2222case SLJIT_MOV_S16:2223ins = lgh(reg, mem.offset, mem.index, mem.base);2224break;2225case SLJIT_MOV_U32:2226ins = LEVAL(llgf);2227break;2228case SLJIT_MOV_S32:2229ins = lgf(reg, mem.offset, mem.index, mem.base);2230break;2231case SLJIT_MOV_P:2232case SLJIT_MOV:2233ins = lg(reg, mem.offset, mem.index, mem.base);2234break;2235default:2236ins = 0;2237SLJIT_UNREACHABLE();2238break;2239}2240FAIL_IF(push_inst(compiler, ins));2241return SLJIT_SUCCESS;2242}2243/* STORE and STORE IMMEDIATE */2244if ((dst & SLJIT_MEM) && (FAST_IS_REG(src) || src == SLJIT_IMM)) {2245struct addr mem;2246sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;22472248if (src == SLJIT_IMM) {2249/* TODO(mundaym): MOVE IMMEDIATE? */2250FAIL_IF(push_load_imm_inst(compiler, reg, srcw));2251}2252FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));2253switch (opcode) {2254case SLJIT_MOV_U8:2255case SLJIT_MOV_S8:2256return push_inst(compiler,2257WHEN2(is_u12(mem.offset), stc, stcy));2258case SLJIT_MOV_U16:2259case SLJIT_MOV_S16:2260return push_inst(compiler,2261WHEN2(is_u12(mem.offset), sth, sthy));2262case SLJIT_MOV_U32:2263case SLJIT_MOV_S32:2264case SLJIT_MOV32:2265return push_inst(compiler,2266WHEN2(is_u12(mem.offset), st, sty));2267case SLJIT_MOV_P:2268case SLJIT_MOV:2269FAIL_IF(push_inst(compiler, LEVAL(stg)));2270return SLJIT_SUCCESS;2271default:2272SLJIT_UNREACHABLE();2273}2274}2275#undef LEVAL2276/* MOVE CHARACTERS */2277if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {2278struct addr mem;2279FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));2280switch (opcode) {2281case SLJIT_MOV_U8:2282case SLJIT_MOV_S8:2283FAIL_IF(push_inst(compiler,2284EVAL(llgc, tmp0, mem)));2285FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));2286return push_inst(compiler,2287EVAL(stcy, tmp0, mem));2288case SLJIT_MOV_U16:2289case SLJIT_MOV_S16:2290FAIL_IF(push_inst(compiler,2291EVAL(llgh, tmp0, mem)));2292FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));2293return push_inst(compiler,2294EVAL(sthy, tmp0, mem));2295case SLJIT_MOV_U32:2296case SLJIT_MOV_S32:2297case SLJIT_MOV32:2298FAIL_IF(push_inst(compiler,2299EVAL(ly, tmp0, mem)));2300FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));2301return push_inst(compiler,2302EVAL(sty, tmp0, mem));2303case SLJIT_MOV_P:2304case SLJIT_MOV:2305FAIL_IF(push_inst(compiler,2306EVAL(lg, tmp0, mem)));2307FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));2308FAIL_IF(push_inst(compiler,2309EVAL(stg, tmp0, mem)));2310return SLJIT_SUCCESS;2311default:2312SLJIT_UNREACHABLE();2313}2314}2315SLJIT_UNREACHABLE();2316}23172318SLJIT_ASSERT(src != SLJIT_IMM);23192320dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;2321src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;23222323compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);23242325/* TODO(mundaym): optimize loads and stores */2326switch (opcode) {2327case SLJIT_CLZ:2328case SLJIT_CTZ:2329if (src & SLJIT_MEM)2330FAIL_IF(load_unsigned_word(compiler, src_r, src, srcw, op & SLJIT_32));23312332FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r));2333break;2334case SLJIT_REV_U32:2335case SLJIT_REV_S32:2336op |= SLJIT_32;2337/* fallthrough */2338case SLJIT_REV:2339case SLJIT_REV_U16:2340case SLJIT_REV_S16:2341return sljit_emit_rev(compiler, op, dst, dstw, src, srcw);2342default:2343SLJIT_UNREACHABLE();2344}23452346if (dst & SLJIT_MEM)2347return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32);23482349return SLJIT_SUCCESS;2350}23512352static SLJIT_INLINE int is_commutative(sljit_s32 op)2353{2354switch (GET_OPCODE(op)) {2355case SLJIT_ADD:2356case SLJIT_ADDC:2357case SLJIT_MUL:2358case SLJIT_AND:2359case SLJIT_OR:2360case SLJIT_XOR:2361return 1;2362}2363return 0;2364}23652366static const struct ins_forms add_forms = {23670x1a00, /* ar */23680xb9080000, /* agr */23690xb9f80000, /* ark */23700xb9e80000, /* agrk */23710x5a000000, /* a */23720xe3000000005a, /* ay */23730xe30000000008, /* ag */2374};23752376static const struct ins_forms logical_add_forms = {23770x1e00, /* alr */23780xb90a0000, /* algr */23790xb9fa0000, /* alrk */23800xb9ea0000, /* algrk */23810x5e000000, /* al */23820xe3000000005e, /* aly */23830xe3000000000a, /* alg */2384};23852386static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,2387sljit_s32 dst, sljit_sw dstw,2388sljit_s32 src1, sljit_sw src1w,2389sljit_s32 src2, sljit_sw src2w)2390{2391int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW;2392int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);2393const struct ins_forms *forms;2394sljit_ins ins;23952396if (src2 == SLJIT_IMM) {2397if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {2398if (sets_overflow)2399ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;2400else2401ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;2402return emit_siy(compiler, ins, dst, dstw, src2w);2403}24042405if (is_s16(src2w)) {2406if (sets_overflow)2407ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;2408else2409ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;2410FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w));2411goto done;2412}24132414if (!sets_overflow) {2415if ((op & SLJIT_32) || is_u32(src2w)) {2416ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */;2417FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));2418goto done;2419}2420if (is_u32(-src2w)) {2421FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A));2422goto done;2423}2424}2425else if ((op & SLJIT_32) || is_s32(src2w)) {2426ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;2427FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));2428goto done;2429}2430}24312432forms = sets_overflow ? &add_forms : &logical_add_forms;2433FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w));24342435done:2436if (sets_zero_overflow)2437FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0));24382439if (dst & SLJIT_MEM)2440return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);24412442return SLJIT_SUCCESS;2443}24442445static const struct ins_forms sub_forms = {24460x1b00, /* sr */24470xb9090000, /* sgr */24480xb9f90000, /* srk */24490xb9e90000, /* sgrk */24500x5b000000, /* s */24510xe3000000005b, /* sy */24520xe30000000009, /* sg */2453};24542455static const struct ins_forms logical_sub_forms = {24560x1f00, /* slr */24570xb90b0000, /* slgr */24580xb9fb0000, /* slrk */24590xb9eb0000, /* slgrk */24600x5f000000, /* sl */24610xe3000000005f, /* sly */24620xe3000000000b, /* slg */2463};24642465static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,2466sljit_s32 dst, sljit_sw dstw,2467sljit_s32 src1, sljit_sw src1w,2468sljit_s32 src2, sljit_sw src2w)2469{2470sljit_s32 flag_type = GET_FLAG_TYPE(op);2471int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW);2472int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);2473const struct ins_forms *forms;2474sljit_ins ins;24752476if (dst == TMP_REG2 && flag_type <= SLJIT_SIG_LESS_EQUAL) {2477int compare_signed = flag_type >= SLJIT_SIG_LESS;24782479compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;24802481if (src2 == SLJIT_IMM) {2482if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) {2483if ((op & SLJIT_32) || is_s32(src2w)) {2484ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;2485return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);2486}2487} else if ((op & SLJIT_32) || is_u32(src2w)) {2488ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;2489return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);2490}2491}2492else if (src2 & SLJIT_MEM) {2493if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) {2494ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */;2495return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A);2496}24972498if (compare_signed)2499ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */;2500else2501ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */;2502return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A);2503}25042505if (compare_signed)2506ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;2507else2508ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;2509return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);2510}25112512if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) {2513ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;2514FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w));2515goto done;2516}25172518if (src2 == SLJIT_IMM) {2519sljit_sw neg_src2w = -src2w;25202521if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {2522if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {2523if (sets_signed)2524ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;2525else2526ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;2527return emit_siy(compiler, ins, dst, dstw, neg_src2w);2528}25292530if (is_s16(neg_src2w)) {2531if (sets_signed)2532ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;2533else2534ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;2535FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w));2536goto done;2537}2538}25392540if (!sets_signed) {2541if ((op & SLJIT_32) || is_u32(src2w)) {2542ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */;2543FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));2544goto done;2545}2546if (is_u32(neg_src2w)) {2547FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A));2548goto done;2549}2550}2551else if ((op & SLJIT_32) || is_s32(neg_src2w)) {2552ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;2553FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A));2554goto done;2555}2556}25572558forms = sets_signed ? &sub_forms : &logical_sub_forms;2559FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w));25602561done:2562if (sets_signed) {2563sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;25642565if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) {2566/* In case of overflow, the sign bit of the two source operands must be different, and2567- the first operand is greater if the sign bit of the result is set2568- the first operand is less if the sign bit of the result is not set2569The -result operation sets the corrent sign, because the result cannot be zero.2570The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */2571FAIL_IF(push_inst(compiler, brc(0xe, (op & SLJIT_32) ? (2 + 1) : (2 + 2))));2572FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));2573}2574else if (op & SLJIT_SET_Z)2575FAIL_IF(update_zero_overflow(compiler, op, dst_r));2576}25772578if (dst & SLJIT_MEM)2579return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);25802581return SLJIT_SUCCESS;2582}25832584static const struct ins_forms multiply_forms = {25850xb2520000, /* msr */25860xb90c0000, /* msgr */25870xb9fd0000, /* msrkc */25880xb9ed0000, /* msgrkc */25890x71000000, /* ms */25900xe30000000051, /* msy */25910xe3000000000c, /* msg */2592};25932594static const struct ins_forms multiply_overflow_forms = {25950,25960,25970xb9fd0000, /* msrkc */25980xb9ed0000, /* msgrkc */25990,26000xe30000000053, /* msc */26010xe30000000083, /* msgc */2602};26032604static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op,2605sljit_s32 dst,2606sljit_s32 src1, sljit_sw src1w,2607sljit_s32 src2, sljit_sw src2w)2608{2609sljit_ins ins;26102611if (HAS_FLAGS(op)) {2612/* if have_misc2 fails, this operation should be emulated. 32 bit emulation:2613FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));2614FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));2615if (dst_r != tmp0) {2616FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));2617}2618FAIL_IF(push_inst(compiler, aih(tmp0, 1)));2619FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));2620FAIL_IF(push_inst(compiler, ipm(tmp1)));2621FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */26222623return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);2624}26252626if (src2 == SLJIT_IMM) {2627if (is_s16(src2w)) {2628ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;2629return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);2630}26312632if (is_s32(src2w)) {2633ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */;2634return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A);2635}2636}26372638return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w);2639}26402641static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type,2642sljit_s32 dst,2643sljit_s32 src1, sljit_sw src1w,2644sljit_uw imm, sljit_s32 count16)2645{2646sljit_s32 mode = compiler->mode;2647sljit_gpr dst_r = tmp0;2648sljit_s32 needs_move = 1;26492650if (IS_GPR_REG(dst)) {2651dst_r = gpr(dst & REG_MASK);2652if (dst == src1)2653needs_move = 0;2654}26552656if (needs_move)2657FAIL_IF(emit_move(compiler, dst_r, src1, src1w));26582659if (type == SLJIT_AND) {2660if (!(mode & SLJIT_32))2661FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32)));2662return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff));2663}2664else if (type == SLJIT_OR) {2665if (count16 >= 3) {2666FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)));2667return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));2668}26692670if (count16 >= 2) {2671if ((imm & 0x00000000ffffffffull) == 0)2672return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32));2673if ((imm & 0xffffffff00000000ull) == 0)2674return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));2675}26762677if ((imm & 0xffff000000000000ull) != 0)2678FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48)));2679if ((imm & 0x0000ffff00000000ull) != 0)2680FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff)));2681if ((imm & 0x00000000ffff0000ull) != 0)2682FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff)));2683if ((imm & 0x000000000000ffffull) != 0 || imm == 0)2684return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff));2685return SLJIT_SUCCESS;2686}26872688if ((imm & 0xffffffff00000000ull) != 0)2689FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32)));2690if ((imm & 0x00000000ffffffffull) != 0 || imm == 0)2691return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff));2692return SLJIT_SUCCESS;2693}26942695static const struct ins_forms bitwise_and_forms = {26960x1400, /* nr */26970xb9800000, /* ngr */26980xb9f40000, /* nrk */26990xb9e40000, /* ngrk */27000x54000000, /* n */27010xe30000000054, /* ny */27020xe30000000080, /* ng */2703};27042705static const struct ins_forms bitwise_or_forms = {27060x1600, /* or */27070xb9810000, /* ogr */27080xb9f60000, /* ork */27090xb9e60000, /* ogrk */27100x56000000, /* o */27110xe30000000056, /* oy */27120xe30000000081, /* og */2713};27142715static const struct ins_forms bitwise_xor_forms = {27160x1700, /* xr */27170xb9820000, /* xgr */27180xb9f70000, /* xrk */27190xb9e70000, /* xgrk */27200x57000000, /* x */27210xe30000000057, /* xy */27220xe30000000082, /* xg */2723};27242725static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op,2726sljit_s32 dst,2727sljit_s32 src1, sljit_sw src1w,2728sljit_s32 src2, sljit_sw src2w)2729{2730sljit_s32 type = GET_OPCODE(op);2731const struct ins_forms *forms;27322733if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == TMP_REG2))) {2734sljit_s32 count16 = 0;2735sljit_uw imm = (sljit_uw)src2w;27362737if (op & SLJIT_32)2738imm &= 0xffffffffull;27392740if ((imm & 0x000000000000ffffull) != 0 || imm == 0)2741count16++;2742if ((imm & 0x00000000ffff0000ull) != 0)2743count16++;2744if ((imm & 0x0000ffff00000000ull) != 0)2745count16++;2746if ((imm & 0xffff000000000000ull) != 0)2747count16++;27482749if (type == SLJIT_AND && dst == TMP_REG2 && count16 == 1) {2750sljit_gpr src_r = tmp1;27512752if (FAST_IS_REG(src1))2753src_r = gpr(src1 & REG_MASK);2754else2755FAIL_IF(emit_move(compiler, tmp1, src1, src1w));27562757if ((imm & 0x000000000000ffffull) != 0 || imm == 0)2758return push_inst(compiler, 0xa7010000 /* tmll */ | R20A(src_r) | imm);2759if ((imm & 0x00000000ffff0000ull) != 0)2760return push_inst(compiler, 0xa7000000 /* tmlh */ | R20A(src_r) | (imm >> 16));2761if ((imm & 0x0000ffff00000000ull) != 0)2762return push_inst(compiler, 0xa7030000 /* tmhl */ | R20A(src_r) | (imm >> 32));2763return push_inst(compiler, 0xa7020000 /* tmhh */ | R20A(src_r) | (imm >> 48));2764}27652766if (!(op & SLJIT_SET_Z))2767return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16);2768}27692770if (type == SLJIT_AND)2771forms = &bitwise_and_forms;2772else if (type == SLJIT_OR)2773forms = &bitwise_or_forms;2774else2775forms = &bitwise_xor_forms;27762777return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w);2778}27792780static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,2781sljit_s32 dst,2782sljit_s32 src1, sljit_sw src1w,2783sljit_s32 src2, sljit_sw src2w)2784{2785sljit_s32 type = GET_OPCODE(op);2786sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;2787sljit_gpr src_r = tmp0;2788sljit_gpr base_r = tmp0;2789sljit_ins imm = 0;2790sljit_ins ins;27912792if (FAST_IS_REG(src1))2793src_r = gpr(src1);2794else2795FAIL_IF(emit_move(compiler, tmp0, src1, src1w));27962797if (src2 != SLJIT_IMM) {2798if (FAST_IS_REG(src2))2799base_r = gpr(src2);2800else {2801FAIL_IF(emit_move(compiler, tmp1, src2, src2w));2802base_r = tmp1;2803}28042805if ((op & SLJIT_32) && (type == SLJIT_MSHL || type == SLJIT_MLSHR || type == SLJIT_MASHR)) {2806if (base_r != tmp1) {2807FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(base_r) | (59 << 24) | (1 << 23) | (63 << 16)));2808base_r = tmp1;2809} else2810FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));2811}2812} else2813imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));28142815if ((op & SLJIT_32) && dst_r == src_r) {2816if (type == SLJIT_SHL || type == SLJIT_MSHL)2817ins = 0x89000000 /* sll */;2818else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)2819ins = 0x88000000 /* srl */;2820else2821ins = 0x8a000000 /* sra */;28222823FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));2824} else {2825if (type == SLJIT_SHL || type == SLJIT_MSHL)2826ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;2827else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)2828ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;2829else2830ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;28312832FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)));2833}28342835if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR)2836return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r));28372838return SLJIT_SUCCESS;2839}28402841static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op,2842sljit_s32 dst,2843sljit_s32 src1, sljit_sw src1w,2844sljit_s32 src2, sljit_sw src2w)2845{2846sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;2847sljit_gpr src_r = tmp0;2848sljit_gpr base_r = tmp0;2849sljit_ins imm = 0;2850sljit_ins ins;28512852if (FAST_IS_REG(src1))2853src_r = gpr(src1);2854else2855FAIL_IF(emit_move(compiler, tmp0, src1, src1w));28562857if (src2 != SLJIT_IMM) {2858if (FAST_IS_REG(src2))2859base_r = gpr(src2);2860else {2861FAIL_IF(emit_move(compiler, tmp1, src2, src2w));2862base_r = tmp1;2863}2864}28652866if (GET_OPCODE(op) == SLJIT_ROTR) {2867if (src2 != SLJIT_IMM) {2868ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;2869FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));2870base_r = tmp1;2871} else2872src2w = -src2w;2873}28742875if (src2 == SLJIT_IMM)2876imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));28772878ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;2879return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16));2880}28812882static const struct ins_forms addc_forms = {28830xb9980000, /* alcr */28840xb9880000, /* alcgr */28850,28860,28870,28880xe30000000098, /* alc */28890xe30000000088, /* alcg */2890};28912892static const struct ins_forms subc_forms = {28930xb9990000, /* slbr */28940xb9890000, /* slbgr */28950,28960,28970,28980xe30000000099, /* slb */28990xe30000000089, /* slbg */2900};29012902SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,2903sljit_s32 dst, sljit_sw dstw,2904sljit_s32 src1, sljit_sw src1w,2905sljit_s32 src2, sljit_sw src2w)2906{2907CHECK_ERROR();2908CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));2909ADJUST_LOCAL_OFFSET(dst, dstw);2910ADJUST_LOCAL_OFFSET(src1, src1w);2911ADJUST_LOCAL_OFFSET(src2, src2w);29122913compiler->mode = op & SLJIT_32;2914compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);29152916if (is_commutative(op) && src1 == SLJIT_IMM && src2 != SLJIT_IMM) {2917src1 ^= src2;2918src2 ^= src1;2919src1 ^= src2;29202921src1w ^= src2w;2922src2w ^= src1w;2923src1w ^= src2w;2924}29252926switch (GET_OPCODE(op)) {2927case SLJIT_ADD:2928compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;2929return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);2930case SLJIT_ADDC:2931compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;2932FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w));2933if (dst & SLJIT_MEM)2934return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);2935return SLJIT_SUCCESS;2936case SLJIT_SUB:2937compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;2938return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);2939case SLJIT_SUBC:2940compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;2941FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w));2942if (dst & SLJIT_MEM)2943return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);2944return SLJIT_SUCCESS;2945case SLJIT_MUL:2946FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w));2947break;2948case SLJIT_AND:2949case SLJIT_OR:2950case SLJIT_XOR:2951FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));2952break;2953case SLJIT_SHL:2954case SLJIT_MSHL:2955case SLJIT_LSHR:2956case SLJIT_MLSHR:2957case SLJIT_ASHR:2958case SLJIT_MASHR:2959FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));2960break;2961case SLJIT_ROTL:2962case SLJIT_ROTR:2963FAIL_IF(sljit_emit_rotate(compiler, op, dst, src1, src1w, src2, src2w));2964break;2965}29662967if (dst & SLJIT_MEM)2968return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);2969return SLJIT_SUCCESS;2970}29712972SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,2973sljit_s32 src1, sljit_sw src1w,2974sljit_s32 src2, sljit_sw src2w)2975{2976sljit_s32 dst_reg = (GET_OPCODE(op) == SLJIT_SUB || GET_OPCODE(op) == SLJIT_AND) ? TMP_REG2 : TMP_REG1;29772978CHECK_ERROR();2979CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));29802981SLJIT_SKIP_CHECKS(compiler);2982return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w);2983}29842985SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,2986sljit_s32 dst_reg,2987sljit_s32 src1, sljit_sw src1w,2988sljit_s32 src2, sljit_sw src2w)2989{2990CHECK_ERROR();2991CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));29922993switch (GET_OPCODE(op)) {2994case SLJIT_MULADD:2995SLJIT_SKIP_CHECKS(compiler);2996FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), 0 /* tmp0 */, 0, src1, src1w, src2, src2w));2997return push_inst(compiler, ((op & SLJIT_32) ? 0x1a00 /* ar */ : 0xb9080000 /* agr */) | R4A(gpr(dst_reg)) | R0A(tmp0));2998}29993000return SLJIT_SUCCESS;3001}30023003SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,3004sljit_s32 dst_reg,3005sljit_s32 src1_reg,3006sljit_s32 src2_reg,3007sljit_s32 src3, sljit_sw src3w)3008{3009sljit_s32 is_right;3010sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;3011sljit_gpr dst_r = gpr(dst_reg);3012sljit_gpr src1_r = gpr(src1_reg);3013sljit_gpr src2_r = gpr(src2_reg);3014sljit_gpr src3_r = tmp1;3015sljit_ins ins;30163017CHECK_ERROR();3018CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));30193020is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);30213022if (src1_reg == src2_reg) {3023SLJIT_SKIP_CHECKS(compiler);3024return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);3025}30263027ADJUST_LOCAL_OFFSET(src3, src3w);30283029if (src3 == SLJIT_IMM) {3030src3w &= bit_length - 1;30313032if (src3w == 0)3033return SLJIT_SUCCESS;30343035if (op & SLJIT_32) {3036if (dst_r == src1_r) {3037ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;3038FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | (sljit_ins)src3w));3039} else {3040ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;3041FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));3042}3043} else {3044ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;3045FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));3046}30473048ins = 0xec0000000055 /* risbg */;30493050if (is_right) {3051src3w = bit_length - src3w;3052ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src3w) << 16) | ((sljit_ins)src3w << 8);3053} else3054ins |= ((sljit_ins)(64 - src3w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)(src3w + 64 - bit_length) << 8);30553056return push_inst(compiler, ins | R36A(dst_r) | R32A(src2_r));3057}30583059if (!(src3 & SLJIT_MEM)) {3060src3_r = gpr(src3);30613062if (dst_r == src3_r) {3063FAIL_IF(push_inst(compiler, 0x1800 /* lr */ | R4A(tmp1) | R0A(src3_r)));3064src3_r = tmp1;3065}3066} else3067FAIL_IF(load_word(compiler, tmp1, src3, src3w, op & SLJIT_32));30683069if (op & SLJIT_32) {3070if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {3071if (src3_r != tmp1) {3072FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src3_r) | (59 << 24) | (1 << 23) | (63 << 16)));3073src3_r = tmp1;3074} else3075FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));3076}30773078if (dst_r == src1_r) {3079ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;3080FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(src3_r)));3081} else {3082ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;3083FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));3084}30853086if (src3_r != tmp1) {3087FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f));3088FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src3_r)));3089} else3090FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f));30913092ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;3093FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1) | (0x1 << 16)));30943095return push_inst(compiler, 0x1600 /* or */ | R4A(dst_r) | R0A(tmp0));3096}30973098ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;3099FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));31003101ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */;31023103if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {3104if (src3_r != tmp1)3105FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f));31063107FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | (0x1 << 16)));3108src2_r = tmp0;31093110if (src3_r != tmp1)3111FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src3_r)));3112else3113FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f));3114} else3115FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src3_r)));31163117FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1)));3118return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(dst_r) | R0A(tmp0));3119}31203121SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,3122sljit_s32 src, sljit_sw srcw)3123{3124sljit_gpr src_r;3125struct addr addr;31263127CHECK_ERROR();3128CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));3129ADJUST_LOCAL_OFFSET(src, srcw);31303131switch (op) {3132case SLJIT_FAST_RETURN:3133src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;3134if (src & SLJIT_MEM)3135FAIL_IF(load_word(compiler, tmp1, src, srcw, 0));31363137return push_inst(compiler, br(src_r));3138case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:3139return SLJIT_SUCCESS;3140case SLJIT_PREFETCH_L1:3141case SLJIT_PREFETCH_L2:3142case SLJIT_PREFETCH_L3:3143case SLJIT_PREFETCH_ONCE:3144FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));3145return push_inst(compiler, 0xe31000000036 /* pfd */ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));3146default:3147return SLJIT_SUCCESS;3148}31493150return SLJIT_SUCCESS;3151}31523153SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,3154sljit_s32 dst, sljit_sw dstw)3155{3156sljit_gpr dst_r = link_r;3157sljit_s32 size;31583159CHECK_ERROR();3160CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));3161ADJUST_LOCAL_OFFSET(dst, dstw);31623163switch (op) {3164case SLJIT_FAST_ENTER:3165if (FAST_IS_REG(dst))3166return push_inst(compiler, lgr(gpr(dst), link_r));3167break;3168case SLJIT_GET_RETURN_ADDRESS:3169dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;31703171size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 2);3172FAIL_IF(load_word(compiler, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, 0));3173break;3174}31753176if (dst & SLJIT_MEM)3177return store_word(compiler, dst_r, dst, dstw, 0);31783179return SLJIT_SUCCESS;3180}31813182SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)3183{3184CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));31853186if (type == SLJIT_GP_REGISTER)3187return (sljit_s32)gpr(reg);31883189if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128)3190return -1;31913192return (sljit_s32)freg_map[reg];3193}31943195SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,3196void *instruction, sljit_u32 size)3197{3198sljit_ins ins = 0;31993200CHECK_ERROR();3201CHECK(check_sljit_emit_op_custom(compiler, instruction, size));32023203memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);3204return push_inst(compiler, ins);3205}32063207/* --------------------------------------------------------------------- */3208/* Floating point operators */3209/* --------------------------------------------------------------------- */32103211#define FLOAT_LOAD 03212#define FLOAT_STORE 132133214static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op,3215sljit_s32 reg,3216sljit_s32 mem, sljit_sw memw)3217{3218struct addr addr;3219sljit_ins ins;32203221SLJIT_ASSERT(mem & SLJIT_MEM);32223223if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) {3224FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));32253226if (op & FLOAT_STORE)3227ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */;3228else3229ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */;32303231return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);3232}32333234FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));32353236if (op & FLOAT_STORE)3237ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */;3238else3239ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */;32403241return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));3242}32433244static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins,3245sljit_s32 reg,3246sljit_s32 src, sljit_sw srcw)3247{3248struct addr addr;32493250if (!(src & SLJIT_MEM))3251return push_inst(compiler, ins_r | F4(reg) | F0(src));32523253FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));3254return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16));3255}32563257static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,3258sljit_s32 dst, sljit_sw dstw,3259sljit_s32 src, sljit_sw srcw)3260{3261sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;3262sljit_ins ins;32633264if (src & SLJIT_MEM) {3265FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw));3266src = TMP_FREG1;3267}32683269/* M3 is set to 5 */3270if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)3271ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */;3272else3273ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */;32743275FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src)));32763277if (dst & SLJIT_MEM)3278return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64);32793280return SLJIT_SUCCESS;3281}32823283static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,3284sljit_s32 dst, sljit_sw dstw,3285sljit_s32 src, sljit_sw srcw)3286{3287sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;32883289if (src == SLJIT_IMM) {3290FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));3291src = (sljit_s32)tmp0;3292}3293else if (src & SLJIT_MEM) {3294FAIL_IF(load_word(compiler, tmp0, src, srcw, ins & 0x100000));3295src = (sljit_s32)tmp0;3296}32973298FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));32993300if (dst & SLJIT_MEM)3301return float_mem(compiler, FLOAT_STORE | ((ins & 0x10000) ? 0 : SLJIT_32), TMP_FREG1, dst, dstw);33023303return SLJIT_SUCCESS;3304}33053306static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,3307sljit_s32 dst, sljit_sw dstw,3308sljit_s32 src, sljit_sw srcw)3309{3310sljit_ins ins;33113312if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)3313srcw = (sljit_s32)srcw;33143315if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)3316ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;3317else3318ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;33193320return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);3321}33223323static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,3324sljit_s32 dst, sljit_sw dstw,3325sljit_s32 src, sljit_sw srcw)3326{3327sljit_ins ins;33283329if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)3330srcw = (sljit_u32)srcw;33313332if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW)3333ins = (op & SLJIT_32) ? 0xb3a00000 /* celgbr */ : 0xb3a10000 /* cdlgbr */;3334else3335ins = (op & SLJIT_32) ? 0xb3900000 /* celfbr */ : 0xb3910000 /* cdlfbr */;33363337return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);3338}33393340static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,3341sljit_s32 src1, sljit_sw src1w,3342sljit_s32 src2, sljit_sw src2w)3343{3344sljit_ins ins_r, ins;33453346if (src1 & SLJIT_MEM) {3347FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w));3348src1 = TMP_FREG1;3349}33503351if (op & SLJIT_32) {3352ins_r = 0xb3090000 /* cebr */;3353ins = 0xed0000000009 /* ceb */;3354} else {3355ins_r = 0xb3190000 /* cdbr */;3356ins = 0xed0000000019 /* cdb */;3357}33583359return emit_float(compiler, ins_r, ins, src1, src2, src2w);3360}33613362SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,3363sljit_s32 dst, sljit_sw dstw,3364sljit_s32 src, sljit_sw srcw)3365{3366sljit_s32 dst_r;3367sljit_ins ins;33683369CHECK_ERROR();33703371SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);33723373dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;33743375if (op == SLJIT_CONV_F64_FROM_F32)3376FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw));3377else {3378if (src & SLJIT_MEM) {3379FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw));3380src = dst_r;3381}33823383switch (GET_OPCODE(op)) {3384case SLJIT_MOV_F64:3385if (FAST_IS_REG(dst)) {3386if (dst == src)3387return SLJIT_SUCCESS;33883389ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;3390break;3391}3392return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw);3393case SLJIT_CONV_F64_FROM_F32:3394/* Only SLJIT_CONV_F32_FROM_F64. */3395ins = 0xb3440000 /* ledbr */;3396break;3397case SLJIT_NEG_F64:3398ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */;3399break;3400default:3401SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64);3402ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */;3403break;3404}34053406FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src)));3407}34083409if (dst & SLJIT_MEM)3410return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);34113412return SLJIT_SUCCESS;3413}34143415#define FLOAT_MOV(op, dst_r, src_r) \3416(((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r))34173418SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,3419sljit_s32 dst, sljit_sw dstw,3420sljit_s32 src1, sljit_sw src1w,3421sljit_s32 src2, sljit_sw src2w)3422{3423sljit_s32 dst_r = TMP_FREG1;3424sljit_ins ins_r, ins;34253426CHECK_ERROR();3427CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));3428ADJUST_LOCAL_OFFSET(dst, dstw);3429ADJUST_LOCAL_OFFSET(src1, src1w);3430ADJUST_LOCAL_OFFSET(src2, src2w);34313432do {3433if (FAST_IS_REG(dst)) {3434dst_r = dst;34353436if (dst == src1)3437break;34383439if (dst == src2) {3440if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) {3441src2 = src1;3442src2w = src1w;3443src1 = dst;3444break;3445}34463447FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2)));3448src2 = TMP_FREG1;3449}3450}34513452if (src1 & SLJIT_MEM)3453FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w));3454else3455FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1)));3456} while (0);34573458switch (GET_OPCODE(op)) {3459case SLJIT_ADD_F64:3460ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */;3461ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */;3462break;3463case SLJIT_SUB_F64:3464ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */;3465ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */;3466break;3467case SLJIT_MUL_F64:3468ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */;3469ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */;3470break;3471default:3472SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64);3473ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */;3474ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */;3475break;3476}34773478FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w));34793480if (dst & SLJIT_MEM)3481return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);34823483return SLJIT_SUCCESS;3484}34853486SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,3487sljit_s32 dst_freg,3488sljit_s32 src1, sljit_sw src1w,3489sljit_s32 src2, sljit_sw src2w)3490{3491sljit_s32 reg;34923493CHECK_ERROR();3494CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));3495ADJUST_LOCAL_OFFSET(src1, src1w);3496ADJUST_LOCAL_OFFSET(src2, src2w);34973498if (src2 & SLJIT_MEM) {3499FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src2, src2w));3500src2 = TMP_FREG1;3501}35023503if (src1 & SLJIT_MEM) {3504reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;3505FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), reg, src1, src1w));3506src1 = reg;3507}35083509return push_inst(compiler, 0xb3720000 /* cpsdr */ | F12(src2) | F4(dst_freg) | F0(src1));3510}35113512SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,3513sljit_s32 freg, sljit_f32 value)3514{3515union {3516sljit_s32 imm;3517sljit_f32 value;3518} u;35193520CHECK_ERROR();3521CHECK(check_sljit_emit_fset32(compiler, freg, value));35223523u.value = value;35243525FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)(((sljit_uw)u.imm << 32))));3526return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));3527}35283529SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,3530sljit_s32 freg, sljit_f64 value)3531{3532union {3533sljit_sw imm;3534sljit_f64 value;3535} u;35363537CHECK_ERROR();3538CHECK(check_sljit_emit_fset64(compiler, freg, value));35393540u.value = value;35413542FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)u.imm));3543return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));3544}35453546SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,3547sljit_s32 freg, sljit_s32 reg)3548{3549sljit_gpr gen_r;35503551CHECK_ERROR();3552CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));35533554gen_r = gpr(reg);35553556if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {3557if (op & SLJIT_32) {3558FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(gen_r) | (32 << 16)));3559gen_r = tmp0;3560}35613562return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(gen_r));3563}35643565FAIL_IF(push_inst(compiler, 0xb3cd0000 /* lgdr */ | R4A(gen_r) | F0(freg)));35663567if (!(op & SLJIT_32))3568return SLJIT_SUCCESS;35693570return push_inst(compiler, 0xeb000000000c /* srlg */ | R36A(gen_r) | R32A(gen_r) | (32 << 16));3571}35723573/* --------------------------------------------------------------------- */3574/* Conditional instructions */3575/* --------------------------------------------------------------------- */35763577SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)3578{3579struct sljit_label *label;35803581CHECK_ERROR_PTR();3582CHECK_PTR(check_sljit_emit_label(compiler));35833584if (compiler->last_label && compiler->last_label->size == compiler->size)3585return compiler->last_label;35863587label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));3588PTR_FAIL_IF(!label);3589set_label(label, compiler);3590return label;3591}35923593SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)3594{3595struct sljit_jump *jump;3596sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;35973598CHECK_ERROR_PTR();3599CHECK_PTR(check_sljit_emit_jump(compiler, type));36003601/* record jump */3602jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));3603PTR_FAIL_IF(!jump);3604set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);3605jump->addr = compiler->size;36063607/* emit jump instruction */3608type &= 0xff;3609if (type >= SLJIT_FAST_CALL)3610PTR_FAIL_IF(push_inst(compiler, brasl(link_r, 0)));3611else3612PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));36133614return jump;3615}36163617SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,3618sljit_s32 arg_types)3619{3620SLJIT_UNUSED_ARG(arg_types);3621CHECK_ERROR_PTR();3622CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));36233624if (type & SLJIT_CALL_RETURN) {3625PTR_FAIL_IF(emit_stack_frame_release(compiler, r14));3626type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);3627}36283629SLJIT_SKIP_CHECKS(compiler);3630return sljit_emit_jump(compiler, type);3631}36323633SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)3634{3635sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;36363637CHECK_ERROR();3638CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));36393640if (src == SLJIT_IMM) {3641SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */3642FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));3643}3644else if (src & SLJIT_MEM) {3645ADJUST_LOCAL_OFFSET(src, srcw);3646FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */));3647}36483649/* emit jump instruction */3650if (type >= SLJIT_FAST_CALL)3651return push_inst(compiler, basr(link_r, src_r));36523653return push_inst(compiler, br(src_r));3654}36553656SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,3657sljit_s32 arg_types,3658sljit_s32 src, sljit_sw srcw)3659{3660SLJIT_UNUSED_ARG(arg_types);36613662CHECK_ERROR();3663CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));36643665SLJIT_ASSERT(gpr(TMP_REG2) == tmp1);36663667if (src & SLJIT_MEM) {3668ADJUST_LOCAL_OFFSET(src, srcw);3669FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));3670src = TMP_REG2;3671srcw = 0;3672}36733674if (type & SLJIT_CALL_RETURN) {3675if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {3676FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));3677src = TMP_REG2;3678srcw = 0;3679}36803681FAIL_IF(emit_stack_frame_release(compiler, r14));3682type = SLJIT_JUMP;3683}36843685SLJIT_SKIP_CHECKS(compiler);3686return sljit_emit_ijump(compiler, type, src, srcw);3687}36883689SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,3690sljit_s32 dst, sljit_sw dstw,3691sljit_s32 type)3692{3693sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;3694sljit_gpr loc_r = tmp1;3695sljit_u8 mask = get_cc(compiler, type);36963697CHECK_ERROR();3698CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));36993700switch (GET_OPCODE(op)) {3701case SLJIT_AND:3702case SLJIT_OR:3703case SLJIT_XOR:3704compiler->status_flags_state = op & SLJIT_SET_Z;37053706/* dst is also source operand */3707if (dst & SLJIT_MEM)3708FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32));37093710break;3711case SLJIT_MOV32:3712op |= SLJIT_32;3713/* fallthrough */3714case SLJIT_MOV:3715/* can write straight into destination */3716loc_r = dst_r;3717break;3718default:3719SLJIT_UNREACHABLE();3720}37213722/* TODO(mundaym): fold into cmov helper function? */3723#define LEVAL(i) i(loc_r, 1, mask)3724if (have_lscond2()) {3725FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));3726FAIL_IF(push_inst(compiler,3727WHEN2(op & SLJIT_32, lochi, locghi)));3728} else {3729FAIL_IF(push_load_imm_inst(compiler, loc_r, 1));3730FAIL_IF(push_inst(compiler, brc(mask, 2 + 2)));3731FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));3732}3733#undef LEVAL37343735/* apply bitwise op and set condition codes */3736switch (GET_OPCODE(op)) {3737#define LEVAL(i) i(dst_r, loc_r)3738case SLJIT_AND:3739FAIL_IF(push_inst(compiler,3740WHEN2(op & SLJIT_32, nr, ngr)));3741break;3742case SLJIT_OR:3743FAIL_IF(push_inst(compiler,3744WHEN2(op & SLJIT_32, or, ogr)));3745break;3746case SLJIT_XOR:3747FAIL_IF(push_inst(compiler,3748WHEN2(op & SLJIT_32, xr, xgr)));3749break;3750#undef LEVAL3751}37523753/* store result to memory if required */3754if (dst & SLJIT_MEM)3755return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32));37563757return SLJIT_SUCCESS;3758}37593760SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,3761sljit_s32 dst_reg,3762sljit_s32 src1, sljit_sw src1w,3763sljit_s32 src2_reg)3764{3765sljit_ins mask;3766sljit_gpr src_r;3767sljit_gpr dst_r = gpr(dst_reg);3768sljit_ins ins;37693770CHECK_ERROR();3771CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));37723773ADJUST_LOCAL_OFFSET(src1, src1w);37743775if (dst_reg != src2_reg) {3776if (src1 == dst_reg) {3777src1 = src2_reg;3778src1w = 0;3779type ^= 0x1;3780} else {3781if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {3782FAIL_IF(load_word(compiler, dst_r, src1, src1w, type & SLJIT_32));3783src1 = src2_reg;3784src1w = 0;3785type ^= 0x1;3786} else3787FAIL_IF(push_inst(compiler, ((type & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(gpr(src2_reg))));3788}3789}37903791mask = get_cc(compiler, type & ~SLJIT_32);37923793if (src1 & SLJIT_MEM) {3794if (src1 & OFFS_REG_MASK) {3795src_r = gpr(OFFS_REG(src1));37963797if (src1w != 0) {3798FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(src_r) | ((sljit_ins)(src1w & 0x3) << 16)));3799src_r = tmp1;3800}38013802FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(src_r) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));3803src_r = tmp1;3804src1w = 0;3805} else if (!is_s20(src1w)) {3806FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));38073808if (src1 & REG_MASK)3809FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(tmp1) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));38103811src_r = tmp1;3812src1w = 0;3813} else3814src_r = gpr(src1 & REG_MASK);38153816ins = (type & SLJIT_32) ? 0xeb00000000f2 /* loc */ : 0xeb00000000e2 /* locg */;3817return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | R28A(src_r) | disp_s20((sljit_s32)src1w));3818}38193820if (src1 == SLJIT_IMM) {3821if (type & SLJIT_32)3822src1w = (sljit_s32)src1w;38233824if (have_lscond2() && is_s16(src1w)) {3825ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;3826return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | (sljit_ins)(src1w & 0xffff) << 16);3827}38283829FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));3830src_r = tmp1;3831} else3832src_r = gpr(src1);38333834ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;3835return push_inst(compiler, ins | (mask << 12) | R4A(dst_r) | R0A(src_r));3836}38373838SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,3839sljit_s32 dst_freg,3840sljit_s32 src1, sljit_sw src1w,3841sljit_s32 src2_freg)3842{3843sljit_ins ins;3844struct sljit_label *label;3845struct sljit_jump *jump;38463847CHECK_ERROR();3848CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));38493850ADJUST_LOCAL_OFFSET(src1, src1w);38513852if (dst_freg != src2_freg) {3853if (dst_freg == src1) {3854src1 = src2_freg;3855src1w = 0;3856type ^= 0x1;3857} else {3858ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;3859FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src2_freg)));3860}3861}38623863SLJIT_SKIP_CHECKS(compiler);3864jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1);3865FAIL_IF(!jump);38663867if (!(src1 & SLJIT_MEM)) {3868ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;3869FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src1)));3870} else3871FAIL_IF(float_mem(compiler, FLOAT_LOAD | (type & SLJIT_32), dst_freg, src1, src1w));38723873SLJIT_SKIP_CHECKS(compiler);3874label = sljit_emit_label(compiler);3875FAIL_IF(!label);38763877sljit_set_label(jump, label);3878return SLJIT_SUCCESS;3879}38803881SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,3882sljit_s32 reg,3883sljit_s32 mem, sljit_sw memw)3884{3885sljit_ins ins, reg1, reg2, base, offs = 0;38863887CHECK_ERROR();3888CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));38893890if (!(reg & REG_PAIR_MASK))3891return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);38923893ADJUST_LOCAL_OFFSET(mem, memw);38943895base = gpr(mem & REG_MASK);3896reg1 = gpr(REG_PAIR_FIRST(reg));3897reg2 = gpr(REG_PAIR_SECOND(reg));38983899if (mem & OFFS_REG_MASK) {3900memw &= 0x3;3901offs = gpr(OFFS_REG(mem));39023903if (memw != 0) {3904FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(offs) | ((sljit_ins)memw << 16)));3905offs = tmp1;3906} else if (!(type & SLJIT_MEM_STORE) && (base == reg1 || base == reg2) && (offs == reg1 || offs == reg2)) {3907FAIL_IF(push_inst(compiler, 0xb9f80000 | R12A(tmp1) | R4A(base) | R0A(offs)));3908base = tmp1;3909offs = 0;3910}39113912memw = 0;3913} else if (memw < -0x80000 || memw > 0x7ffff - ((reg2 == reg1 + 1) ? 0 : SSIZE_OF(sw))) {3914FAIL_IF(push_load_imm_inst(compiler, tmp1, memw));39153916if (base == 0)3917base = tmp1;3918else3919offs = tmp1;39203921memw = 0;3922}39233924if (offs == 0 && reg2 == (reg1 + 1)) {3925ins = (type & SLJIT_MEM_STORE) ? 0xeb0000000024 /* stmg */ : 0xeb0000000004 /* lmg */;3926return push_inst(compiler, ins | R36A(reg1) | R32A(reg2) | R28A(base) | disp_s20((sljit_s32)memw));3927}39283929ins = ((type & SLJIT_MEM_STORE) ? 0xe30000000024 /* stg */ : 0xe30000000004 /* lg */) | R32A(offs) | R28A(base);39303931if (!(type & SLJIT_MEM_STORE) && base == reg1) {3932FAIL_IF(push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))));3933return push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw));3934}39353936FAIL_IF(push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw)));3937return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)));3938}39393940SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,3941sljit_s32 vreg,3942sljit_s32 srcdst, sljit_sw srcdstw)3943{3944sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);3945sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);3946sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);3947struct addr addr;3948sljit_ins ins;39493950CHECK_ERROR();3951CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));39523953ADJUST_LOCAL_OFFSET(srcdst, srcdstw);39543955if (reg_size != 4)3956return SLJIT_ERR_UNSUPPORTED;39573958if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))3959return SLJIT_ERR_UNSUPPORTED;39603961if (type & SLJIT_SIMD_TEST)3962return SLJIT_SUCCESS;39633964if (!(srcdst & SLJIT_MEM)) {3965if (type & SLJIT_SIMD_STORE)3966ins = F36(srcdst) | F32(vreg);3967else3968ins = F36(vreg) | F32(srcdst);39693970return push_inst(compiler, 0xe70000000056 /* vlr */ | ins);3971}39723973FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));3974ins = F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);39753976if (alignment >= 4)3977ins |= 4 << 12;3978else if (alignment == 3)3979ins |= 3 << 12;39803981return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? 0xe7000000000e /* vst */ : 0xe70000000006 /* vl */) | ins);3982}39833984SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,3985sljit_s32 vreg,3986sljit_s32 src, sljit_sw srcw)3987{3988sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);3989sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);3990struct addr addr;3991sljit_gpr reg;3992sljit_sw sign_ext;39933994CHECK_ERROR();3995CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));39963997ADJUST_LOCAL_OFFSET(src, srcw);39983999if (reg_size != 4)4000return SLJIT_ERR_UNSUPPORTED;40014002if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)4003return SLJIT_ERR_UNSUPPORTED;40044005if (type & SLJIT_SIMD_TEST)4006return SLJIT_SUCCESS;40074008if (src & SLJIT_MEM) {4009FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));4010return push_inst(compiler, 0xe70000000005 /* vlrep */ | F36(vreg)4011| R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset) | ((sljit_ins)elem_size << 12));4012}40134014if (type & SLJIT_SIMD_FLOAT) {4015if (src == SLJIT_IMM)4016return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(vreg));40174018return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(vreg) | F32(src) | ((sljit_ins)elem_size << 12));4019}40204021if (src == SLJIT_IMM) {4022sign_ext = 0x10000;40234024switch (elem_size) {4025case 0:4026srcw &= 0xff;4027sign_ext = (sljit_s8)srcw;4028break;4029case 1:4030srcw &= 0xffff;4031sign_ext = (sljit_s16)srcw;4032break;4033case 2:4034if ((sljit_s32)srcw == (sljit_s16)srcw) {4035srcw &= 0xffff;4036sign_ext = (sljit_s16)srcw;4037} else4038srcw &= 0xffffffff;4039break;4040default:4041if (srcw == (sljit_s16)srcw) {4042srcw &= 0xffff;4043sign_ext = (sljit_s16)srcw;4044}4045break;4046}40474048if (sign_ext != 0x10000) {4049if (sign_ext == 0 || sign_ext == -1)4050return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(vreg)4051| (sign_ext == 0 ? 0 : ((sljit_ins)0xffff << 16)));40524053return push_inst(compiler, 0xe70000000045 /* vrepi */ | F36(vreg)4054| ((sljit_ins)srcw << 16) | ((sljit_ins)elem_size << 12));4055}40564057push_load_imm_inst(compiler, tmp0, srcw);4058reg = tmp0;4059} else4060reg = gpr(src);40614062FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(vreg) | R32A(reg) | ((sljit_ins)elem_size << 12)));4063return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(vreg) | F32(vreg) | ((sljit_ins)elem_size << 12));4064}40654066SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,4067sljit_s32 vreg, sljit_s32 lane_index,4068sljit_s32 srcdst, sljit_sw srcdstw)4069{4070sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4071sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4072struct addr addr;4073sljit_gpr reg;4074sljit_ins ins = 0;40754076CHECK_ERROR();4077CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));40784079ADJUST_LOCAL_OFFSET(srcdst, srcdstw);40804081if (reg_size != 4)4082return SLJIT_ERR_UNSUPPORTED;40834084if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)4085return SLJIT_ERR_UNSUPPORTED;40864087if (type & SLJIT_SIMD_TEST)4088return SLJIT_SUCCESS;40894090if (srcdst & SLJIT_MEM) {4091FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));4092ins = F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);4093}40944095if (type & SLJIT_SIMD_LANE_ZERO) {4096if ((srcdst & SLJIT_MEM) && lane_index == ((1 << (3 - elem_size)) - 1))4097return push_inst(compiler, 0xe70000000004 /* vllez */ | ins | ((sljit_ins)elem_size << 12));40984099if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) {4100FAIL_IF(push_inst(compiler, 0xe70000000056 /* vlr */ | F36(TMP_FREG1) | F32(vreg)));4101srcdst = TMP_FREG1;4102srcdstw = 0;4103}41044105FAIL_IF(push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(vreg)));4106}41074108if (srcdst & SLJIT_MEM) {4109switch (elem_size) {4110case 0:4111ins |= 0xe70000000000 /* vleb */;4112break;4113case 1:4114ins |= 0xe70000000001 /* vleh */;4115break;4116case 2:4117ins |= 0xe70000000003 /* vlef */;4118break;4119default:4120ins |= 0xe70000000002 /* vleg */;4121break;4122}41234124/* Convert to vsteb - vsteg */4125if (type & SLJIT_SIMD_STORE)4126ins |= 0x8;41274128return push_inst(compiler, ins | ((sljit_ins)lane_index << 12));4129}41304131if (type & SLJIT_SIMD_FLOAT) {4132if (type & SLJIT_SIMD_STORE)4133return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(srcdst) | F32(vreg) | ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12));41344135if (elem_size == 3) {4136if (lane_index == 0)4137ins = F32(srcdst) | F28(vreg) | (1 << 12);4138else4139ins = F32(vreg) | F28(srcdst);41404141return push_inst(compiler, 0xe70000000084 /* vpdi */ | F36(vreg) | ins);4142}41434144FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(tmp0) | F32(srcdst) | ((sljit_ins)2 << 12)));4145return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(vreg) | R32A(tmp0) | ((sljit_ins)lane_index << 16) | ((sljit_ins)2 << 12));4146}41474148if (srcdst == SLJIT_IMM) {4149switch (elem_size) {4150case 0:4151ins = 0xe70000000040 /* vleib */;4152srcdstw &= 0xff;4153break;4154case 1:4155ins = 0xe70000000041 /* vleih */;4156srcdstw &= 0xffff;4157break;4158case 2:4159if ((sljit_s32)srcdstw == (sljit_s16)srcdstw) {4160srcdstw &= 0xffff;4161ins = 0xe70000000043 /* vleif */;4162} else4163srcdstw &= 0xffffffff;4164break;4165default:4166if (srcdstw == (sljit_s16)srcdstw) {4167srcdstw &= 0xffff;4168ins = 0xe70000000042 /* vleig */;4169}4170break;4171}41724173if (ins != 0)4174return push_inst(compiler, ins | F36(vreg) | ((sljit_ins)srcdstw << 16) | ((sljit_ins)lane_index << 12));41754176push_load_imm_inst(compiler, tmp0, srcdstw);4177reg = tmp0;4178} else4179reg = gpr(srcdst);41804181ins = ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12);41824183if (!(type & SLJIT_SIMD_STORE))4184return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(vreg) | R32A(reg) | ins);41854186FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(reg) | F32(vreg) | ins));41874188if (!(type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3)4189return SLJIT_SUCCESS;41904191switch (elem_size) {4192case 0:4193ins = 0xb9060000 /* lgbr */;4194break;4195case 1:4196ins = 0xb9070000 /* lghr */;4197break;4198default:4199ins = 0xb9140000 /* lgfr */;4200break;4201}42024203return push_inst(compiler, ins | R4A(reg) | R0A(reg));4204}42054206SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,4207sljit_s32 vreg,4208sljit_s32 src, sljit_s32 src_lane_index)4209{4210sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4211sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);42124213CHECK_ERROR();4214CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));42154216if (reg_size != 4)4217return SLJIT_ERR_UNSUPPORTED;42184219if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)4220return SLJIT_ERR_UNSUPPORTED;42214222if (type & SLJIT_SIMD_TEST)4223return SLJIT_SUCCESS;42244225return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(vreg) | F32(src)4226| ((sljit_ins)src_lane_index << 16) | ((sljit_ins)elem_size << 12));4227}42284229SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,4230sljit_s32 vreg,4231sljit_s32 src, sljit_sw srcw)4232{4233sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4234sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4235sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);4236struct addr addr;4237sljit_ins ins;42384239CHECK_ERROR();4240CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));42414242ADJUST_LOCAL_OFFSET(src, srcw);42434244if (reg_size != 4)4245return SLJIT_ERR_UNSUPPORTED;42464247if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)4248return SLJIT_ERR_UNSUPPORTED;42494250if (type & SLJIT_SIMD_TEST)4251return SLJIT_SUCCESS;42524253if (src & SLJIT_MEM) {4254FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));4255ins = F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);42564257switch (elem2_size - elem_size) {4258case 1:4259ins |= 0xe70000000002 /* vleg */;4260break;4261case 2:4262ins |= 0xe70000000003 /* vlef */;4263break;4264default:4265ins |= 0xe70000000001 /* vleh */;4266break;4267}42684269FAIL_IF(push_inst(compiler, ins));4270src = vreg;4271}42724273if (type & SLJIT_SIMD_FLOAT) {4274FAIL_IF(push_inst(compiler, 0xe700000000d5 /* vuplh */ | F36(vreg) | F32(src) | (2 << 12)));4275FAIL_IF(push_inst(compiler, 0xe70000000030 /* vesl */ | F36(vreg) | F32(vreg) | (32 << 16) | (3 << 12)));4276return push_inst(compiler, 0xe700000000c4 /* vfll */ | F36(vreg) | F32(vreg) | (2 << 12));4277}42784279ins = ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0xe700000000d7 /* vuph */ : 0xe700000000d5 /* vuplh */) | F36(vreg);42804281do {4282FAIL_IF(push_inst(compiler, ins | F32(src) | ((sljit_ins)elem_size << 12)));4283src = vreg;4284} while (++elem_size < elem2_size);42854286return SLJIT_SUCCESS;4287}42884289SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,4290sljit_s32 vreg,4291sljit_s32 dst, sljit_sw dstw)4292{4293sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4294sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4295sljit_gpr dst_r;42964297CHECK_ERROR();4298CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));42994300ADJUST_LOCAL_OFFSET(dst, dstw);43014302if (reg_size != 4)4303return SLJIT_ERR_UNSUPPORTED;43044305if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)4306return SLJIT_ERR_UNSUPPORTED;43074308if (type & SLJIT_SIMD_TEST)4309return SLJIT_SUCCESS;43104311switch (elem_size) {4312case 0:4313push_load_imm_inst(compiler, tmp0, (sljit_sw)0x4048505860687078);4314push_load_imm_inst(compiler, tmp1, (sljit_sw)0x0008101820283038);4315FAIL_IF(push_inst(compiler, 0xe70000000062 /* vlvgp */ | F36(TMP_FREG1) | R32A(tmp1) | R28A(tmp0)));4316break;4317case 1:4318push_load_imm_inst(compiler, tmp0, (sljit_sw)0x0010203040506070);4319break;4320case 2:4321push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808000204060);4322break;4323default:4324push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808080800040);4325break;4326}43274328if (elem_size != 0)4329FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(TMP_FREG1) | R32A(tmp0) | (1 << 16) | (3 << 12)));43304331FAIL_IF(push_inst(compiler, 0xe70000000085 /* vbperm */ | F36(TMP_FREG1) | F32(vreg) | F28(TMP_FREG1)));43324333dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;4334FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(dst_r) | F32(TMP_FREG1)4335| (elem_size == 0 ? ((3 << 16) | (1 << 12)) : (7 << 16))));43364337if (dst_r == tmp0)4338return store_word(compiler, tmp0, dst, dstw, type & SLJIT_32);43394340return SLJIT_SUCCESS;4341}43424343SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,4344sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)4345{4346sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4347sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4348sljit_s32 alignment;4349struct addr addr;4350sljit_ins ins = 0, load_ins;43514352CHECK_ERROR();4353CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));4354ADJUST_LOCAL_OFFSET(src2, src2w);43554356if (reg_size != 4)4357return SLJIT_ERR_UNSUPPORTED;43584359if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))4360return SLJIT_ERR_UNSUPPORTED;43614362if (type & SLJIT_SIMD_TEST)4363return SLJIT_SUCCESS;43644365switch (SLJIT_SIMD_GET_OPCODE(type)) {4366case SLJIT_SIMD_OP2_AND:4367ins = 0xe70000000068 /* vn */;4368break;4369case SLJIT_SIMD_OP2_OR:4370ins = 0xe7000000006a /* vo */;4371break;4372case SLJIT_SIMD_OP2_XOR:4373ins = 0xe7000000006d /* vx */;4374break;4375case SLJIT_SIMD_OP2_SHUFFLE:4376ins = 0xe7000000008c /* vperm */;4377break;4378}43794380if (src2 & SLJIT_MEM) {4381FAIL_IF(make_addr_bx(compiler, &addr, src2, src2w, tmp1));4382load_ins = 0xe70000000006 /* vl */ | F36(TMP_FREG1) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);4383alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);43844385if (alignment >= 4)4386load_ins |= 4 << 12;4387else if (alignment == 3)4388load_ins |= 3 << 12;43894390FAIL_IF(push_inst(compiler, load_ins));4391src2 = TMP_FREG1;4392}43934394if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE)4395return push_inst(compiler, ins | F36(dst_vreg) | F32(src1_vreg) | F28(src1_vreg) | F12(src2));43964397return push_inst(compiler, ins | F36(dst_vreg) | F32(src1_vreg) | F28(src2));4398}43994400SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,4401sljit_s32 dst_reg,4402sljit_s32 mem_reg)4403{4404CHECK_ERROR();4405CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));44064407if (op & SLJIT_ATOMIC_USE_LS)4408return SLJIT_ERR_UNSUPPORTED;44094410switch (GET_OPCODE(op)) {4411case SLJIT_MOV32:4412case SLJIT_MOV_U32:4413case SLJIT_MOV:4414case SLJIT_MOV_P:4415if (op & SLJIT_ATOMIC_TEST)4416return SLJIT_SUCCESS;44174418SLJIT_SKIP_CHECKS(compiler);4419return sljit_emit_op1(compiler, op & ~SLJIT_ATOMIC_USE_CAS, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);4420default:4421return SLJIT_ERR_UNSUPPORTED;4422}4423}44244425SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,4426sljit_s32 src_reg,4427sljit_s32 mem_reg,4428sljit_s32 temp_reg)4429{4430sljit_ins ins;4431sljit_gpr tmp_r = gpr(temp_reg);4432sljit_gpr mem_r = gpr(mem_reg);44334434CHECK_ERROR();4435CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));44364437if (op & SLJIT_ATOMIC_USE_LS)4438return SLJIT_ERR_UNSUPPORTED;44394440switch (GET_OPCODE(op)) {4441case SLJIT_MOV32:4442case SLJIT_MOV_U32:4443ins = 0xba000000 /* cs */ | R20A(tmp_r) | R16A(gpr(src_reg)) | R12A(mem_r);4444break;4445case SLJIT_MOV:4446case SLJIT_MOV_P:4447ins = 0xeb0000000030 /* csg */ | R36A(tmp_r) | R32A(gpr(src_reg)) | R28A(mem_r);4448break;4449default:4450return SLJIT_ERR_UNSUPPORTED;4451}44524453if (op & SLJIT_ATOMIC_TEST)4454return SLJIT_SUCCESS;44554456return push_inst(compiler, ins);4457}44584459/* --------------------------------------------------------------------- */4460/* Other instructions */4461/* --------------------------------------------------------------------- */44624463/* On s390x we build a literal pool to hold constants. This has two main4464advantages:446544661. we only need one instruction in the instruction stream (LGRL)44672. we can store 64 bit addresses and use 32 bit offsets44684469To retrofit the extra information needed to build the literal pool we4470add a new sljit_s390x_const struct that contains the initial value but4471can still be cast to a sljit_const. */44724473SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)4474{4475struct sljit_s390x_const *const_;4476sljit_gpr dst_r;44774478CHECK_ERROR_PTR();4479CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));44804481const_ = (struct sljit_s390x_const*)ensure_abuf(compiler,4482sizeof(struct sljit_s390x_const));4483PTR_FAIL_IF(!const_);4484set_const((struct sljit_const*)const_, compiler);4485const_->init_value = init_value;44864487dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;4488if (have_genext())4489PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));4490else {4491PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));4492PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));4493}44944495if (dst & SLJIT_MEM)4496PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */));44974498return (struct sljit_const*)const_;4499}45004501SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)4502{4503/* Update the constant pool. */4504sljit_uw *ptr = (sljit_uw *)addr;4505SLJIT_UNUSED_ARG(executable_offset);45064507SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);4508*ptr = new_target;4509SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);4510SLJIT_CACHE_FLUSH(ptr, ptr + 1);4511}45124513SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)4514{4515sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);4516}45174518SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)4519{4520struct sljit_jump *jump;4521sljit_gpr dst_r;45224523CHECK_ERROR_PTR();4524CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));4525ADJUST_LOCAL_OFFSET(dst, dstw);45264527jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));4528PTR_FAIL_IF(!jump);4529set_mov_addr(jump, compiler, 0);45304531dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;45324533if (have_genext())4534PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));4535else {4536PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));4537PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));4538}45394540if (dst & SLJIT_MEM)4541PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));45424543return jump;4544}45454546/* TODO(carenas): EVAL probably should move up or be refactored */4547#undef WHEN24548#undef EVAL45494550#undef tmp14551#undef tmp045524553/* TODO(carenas): undef other macros that spill like is_u12? */455445554556