Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeS390X.c
22434 views
/*1* Stack-less Just-In-Time compiler2*3* Copyright Zoltan Herczeg ([email protected]). All rights reserved.4*5* Redistribution and use in source and binary forms, with or without modification, are6* permitted provided that the following conditions are met:7*8* 1. Redistributions of source code must retain the above copyright notice, this list of9* conditions and the following disclaimer.10*11* 2. Redistributions in binary form must reproduce the above copyright notice, this list12* of conditions and the following disclaimer in the documentation and/or other materials13* provided with the distribution.14*15* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY16* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES17* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT18* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,19* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED20* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR21* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN22* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN23* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.24*/2526#include <sys/auxv.h>2728#ifdef __ARCH__29#define ENABLE_STATIC_FACILITY_DETECTION 130#else31#define ENABLE_STATIC_FACILITY_DETECTION 032#endif33#define ENABLE_DYNAMIC_FACILITY_DETECTION 13435SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)36{37return "s390x" SLJIT_CPUINFO;38}3940/* Instructions are stored as 64 bit values regardless their size. */41typedef sljit_uw sljit_ins;4243#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)44#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)4546static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {470, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1, 1448};4950/* there are also a[2-15] available, but they are slower to access and51* their use is limited as mundaym explained:52* https://github.com/zherczeg/sljit/pull/91#discussion_r48689568953*/5455/* General Purpose Registers [0-15]. */56typedef sljit_uw sljit_gpr;5758/*59* WARNING60* the following code is non standard and should be improved for61* consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based62* registers because r0 and r1 are the ABI recommended volatiles.63* there is a gpr() function that maps sljit to physical register numbers64* that should be used instead of the usual index into reg_map[] and65* will be retired ASAP (TODO: carenas)66*/6768static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */69static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */70static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */71static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */72static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */73static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */74static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */75static const sljit_gpr r7 = 7; /* reg_map[6] */76static const sljit_gpr r8 = 8; /* reg_map[7] */77static const sljit_gpr r9 = 9; /* reg_map[8] */78static const sljit_gpr r10 = 10; /* reg_map[9] */79static const sljit_gpr r11 = 11; /* reg_map[10] */80static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */81static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */82static const sljit_gpr r14 = 14; /* reg_map[0]: return address */83static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */8485/* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */86/* TODO(carenas): r12 might conflict in PIC code, reserve? */87/* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp88* like we do know might be faster though, reserve?89*/9091/* TODO(carenas): should be named TMP_REG[1-2] for consistency */92#define tmp0 r093#define tmp1 r19495/* Link register. */96static const sljit_gpr link_r = 14; /* r14 */9798#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)99100static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {1010, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, 1102};103104#define R0A(r) (r)105#define R4A(r) ((r) << 4)106#define R8A(r) ((r) << 8)107#define R12A(r) ((r) << 12)108#define R16A(r) ((r) << 16)109#define R20A(r) ((r) << 20)110#define R28A(r) ((r) << 28)111#define R32A(r) ((r) << 32)112#define R36A(r) ((r) << 36)113114#define R0(r) ((sljit_ins)reg_map[r])115116#define F0(r) ((sljit_ins)freg_map[r])117#define F4(r) (R4A((sljit_ins)freg_map[r]))118#define F12(r) (R12A((sljit_ins)freg_map[r]))119#define F20(r) (R20A((sljit_ins)freg_map[r]))120#define F28(r) (R28A((sljit_ins)freg_map[r]))121#define F32(r) (R32A((sljit_ins)freg_map[r]))122#define F36(r) (R36A((sljit_ins)freg_map[r]))123124/* Convert SLJIT register to hardware register. */125static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)126{127SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));128return reg_map[r];129}130131static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)132{133sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));134FAIL_IF(!ibuf);135*ibuf = ins;136137SLJIT_ASSERT(ins <= 0xffffffffffffL);138139compiler->size++;140if (ins & 0xffff00000000L)141compiler->size++;142143if (ins & 0xffffffff0000L)144compiler->size++;145146return SLJIT_SUCCESS;147}148149#define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \150(((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \151&& !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE))152153/* Map the given type to a 4-bit condition code mask. */154static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) {155const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */156const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */157const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */158const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */159160switch (type) {161case SLJIT_EQUAL:162if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {163sljit_s32 flag_type = GET_FLAG_TYPE(compiler->status_flags_state);164if (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_SIG_LESS_EQUAL)165return cc0;166if (flag_type == SLJIT_OVERFLOW)167return (cc0 | cc3);168return (cc0 | cc2);169}170SLJIT_FALLTHROUGH171172case SLJIT_ATOMIC_STORED:173case SLJIT_F_EQUAL:174case SLJIT_ORDERED_EQUAL:175return cc0;176177case SLJIT_NOT_EQUAL:178if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {179sljit_s32 flag_type = GET_FLAG_TYPE(compiler->status_flags_state);180if (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_SIG_LESS_EQUAL)181return (cc1 | cc2 | cc3);182if (flag_type == SLJIT_OVERFLOW)183return (cc1 | cc2);184return (cc1 | cc3);185}186SLJIT_FALLTHROUGH187188case SLJIT_UNORDERED_OR_NOT_EQUAL:189return (cc1 | cc2 | cc3);190191case SLJIT_LESS:192case SLJIT_ATOMIC_NOT_STORED:193return cc1;194195case SLJIT_GREATER_EQUAL:196case SLJIT_UNORDERED_OR_GREATER_EQUAL:197return (cc0 | cc2 | cc3);198199case SLJIT_GREATER:200if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)201return cc2;202return cc3;203204case SLJIT_LESS_EQUAL:205if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)206return (cc0 | cc1);207return (cc0 | cc1 | cc2);208209case SLJIT_SIG_LESS:210case SLJIT_F_LESS:211case SLJIT_ORDERED_LESS:212return cc1;213214case SLJIT_NOT_CARRY:215if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)216return (cc2 | cc3);217SLJIT_FALLTHROUGH218219case SLJIT_SIG_LESS_EQUAL:220case SLJIT_F_LESS_EQUAL:221case SLJIT_ORDERED_LESS_EQUAL:222return (cc0 | cc1);223224case SLJIT_CARRY:225if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)226return (cc0 | cc1);227SLJIT_FALLTHROUGH228229case SLJIT_SIG_GREATER:230case SLJIT_UNORDERED_OR_GREATER:231/* Overflow is considered greater, see SLJIT_SUB. */232return cc2 | cc3;233234case SLJIT_SIG_GREATER_EQUAL:235return (cc0 | cc2 | cc3);236237case SLJIT_OVERFLOW:238if (compiler->status_flags_state & SLJIT_SET_Z)239return (cc2 | cc3);240SLJIT_FALLTHROUGH241242case SLJIT_UNORDERED:243return cc3;244245case SLJIT_NOT_OVERFLOW:246if (compiler->status_flags_state & SLJIT_SET_Z)247return (cc0 | cc1);248SLJIT_FALLTHROUGH249250case SLJIT_ORDERED:251return (cc0 | cc1 | cc2);252253case SLJIT_F_NOT_EQUAL:254case SLJIT_ORDERED_NOT_EQUAL:255return (cc1 | cc2);256257case SLJIT_F_GREATER:258case SLJIT_ORDERED_GREATER:259return cc2;260261case SLJIT_F_GREATER_EQUAL:262case SLJIT_ORDERED_GREATER_EQUAL:263return (cc0 | cc2);264265case SLJIT_UNORDERED_OR_LESS_EQUAL:266return (cc0 | cc1 | cc3);267268case SLJIT_UNORDERED_OR_EQUAL:269return (cc0 | cc3);270271case SLJIT_UNORDERED_OR_LESS:272return (cc1 | cc3);273}274275SLJIT_UNREACHABLE();276return (sljit_u8)-1;277}278279/* Facility to bit index mappings.280Note: some facilities share the same bit index. */281typedef sljit_uw facility_bit;282#define STORE_FACILITY_LIST_EXTENDED_FACILITY 7283#define FAST_LONG_DISPLACEMENT_FACILITY 19284#define EXTENDED_IMMEDIATE_FACILITY 21285#define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34286#define DISTINCT_OPERAND_FACILITY 45287#define HIGH_WORD_FACILITY 45288#define POPULATION_COUNT_FACILITY 45289#define LOAD_STORE_ON_CONDITION_1_FACILITY 45290#define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49291#define LOAD_STORE_ON_CONDITION_2_FACILITY 53292#define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58293#define VECTOR_FACILITY 129294#define VECTOR_ENHANCEMENTS_1_FACILITY 135295296/* Report whether a facility is known to be present due to the compiler297settings. This function should always be compiled to a constant298value given a constant argument. */299static SLJIT_INLINE int have_facility_static(facility_bit x)300{301#if ENABLE_STATIC_FACILITY_DETECTION302switch (x) {303case FAST_LONG_DISPLACEMENT_FACILITY:304return (__ARCH__ >= 6 /* z990 */);305case EXTENDED_IMMEDIATE_FACILITY:306case STORE_FACILITY_LIST_EXTENDED_FACILITY:307return (__ARCH__ >= 7 /* z9-109 */);308case GENERAL_INSTRUCTION_EXTENSION_FACILITY:309return (__ARCH__ >= 8 /* z10 */);310case DISTINCT_OPERAND_FACILITY:311return (__ARCH__ >= 9 /* z196 */);312case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:313return (__ARCH__ >= 10 /* zEC12 */);314case LOAD_STORE_ON_CONDITION_2_FACILITY:315case VECTOR_FACILITY:316return (__ARCH__ >= 11 /* z13 */);317case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:318case VECTOR_ENHANCEMENTS_1_FACILITY:319return (__ARCH__ >= 12 /* z14 */);320default:321SLJIT_UNREACHABLE();322}323#endif324return 0;325}326327static SLJIT_INLINE unsigned long get_hwcap(void)328{329static unsigned long hwcap = 0;330if (SLJIT_UNLIKELY(!hwcap)) {331hwcap = getauxval(AT_HWCAP);332SLJIT_ASSERT(hwcap != 0);333}334return hwcap;335}336337static SLJIT_INLINE int have_stfle(void)338{339if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))340return 1;341342return (get_hwcap() & HWCAP_S390_STFLE);343}344345/* Report whether the given facility is available. This function always346performs a runtime check. */347static int have_facility_dynamic(facility_bit x)348{349#if ENABLE_DYNAMIC_FACILITY_DETECTION350static struct {351sljit_uw bits[4];352} cpu_features;353size_t size = sizeof(cpu_features);354const sljit_uw word_index = x >> 6;355const sljit_uw bit_index = ((1UL << 63) >> (x & 63));356357SLJIT_ASSERT(x < size * 8);358if (SLJIT_UNLIKELY(!have_stfle()))359return 0;360361if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {362__asm__ __volatile__ (363"lgr %%r0, %0;"364"stfle 0(%1);"365/* outputs */:366/* inputs */: "d" ((size / 8) - 1), "a" (&cpu_features)367/* clobbers */: "r0", "cc", "memory"368);369SLJIT_ASSERT(cpu_features.bits[0] != 0);370}371return (cpu_features.bits[word_index] & bit_index) != 0;372#else373return 0;374#endif375}376377#define HAVE_FACILITY(name, bit) \378static SLJIT_INLINE int name() \379{ \380static int have = -1; \381/* Static check first. May allow the function to be optimized away. */ \382if (have_facility_static(bit)) \383have = 1; \384else if (SLJIT_UNLIKELY(have < 0)) \385have = have_facility_dynamic(bit) ? 1 : 0; \386\387return have; \388}389390HAVE_FACILITY(have_eimm, EXTENDED_IMMEDIATE_FACILITY)391HAVE_FACILITY(have_ldisp, FAST_LONG_DISPLACEMENT_FACILITY)392HAVE_FACILITY(have_genext, GENERAL_INSTRUCTION_EXTENSION_FACILITY)393HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)394HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)395HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)396#undef HAVE_FACILITY397398#define is_u12(d) (0 <= (d) && (d) <= 0x00000fffL)399#define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL)400401#define CHECK_SIGNED(v, bitlen) \402((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1)))403404#define is_s8(d) ((sljit_sw)(d) == (sljit_s8)(d))405#define is_s16(d) ((sljit_sw)(d) == (sljit_s16)(d))406#define is_s20(d) CHECK_SIGNED((d), 20)407#define is_s32(d) ((sljit_sw)(d) == (sljit_s32)(d))408409static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)410{411sljit_uw dh, dl;412413SLJIT_ASSERT(is_s20(d));414415dh = (d >> 12) & 0xff;416dl = ((sljit_uw)d << 8) & 0xfff00;417return (dh | dl) << 8;418}419420/* TODO(carenas): variadic macro is not strictly needed */421#define SLJIT_S390X_INSTRUCTION(op, ...) \422static SLJIT_INLINE sljit_ins op(__VA_ARGS__)423424/* RR form instructions. */425#define SLJIT_S390X_RR(name, pattern) \426SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \427{ \428return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \429}430431/* AND */432SLJIT_S390X_RR(nr, 0x1400)433434/* BRANCH AND SAVE */435SLJIT_S390X_RR(basr, 0x0d00)436437/* BRANCH ON CONDITION */438SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */439440/* DIVIDE */441SLJIT_S390X_RR(dr, 0x1d00)442443/* EXCLUSIVE OR */444SLJIT_S390X_RR(xr, 0x1700)445446/* LOAD */447SLJIT_S390X_RR(lr, 0x1800)448449/* LOAD COMPLEMENT */450SLJIT_S390X_RR(lcr, 0x1300)451452/* OR */453SLJIT_S390X_RR(or, 0x1600)454455#undef SLJIT_S390X_RR456457/* RRE form instructions */458#define SLJIT_S390X_RRE(name, pattern) \459SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \460{ \461return (pattern) | R4A(dst) | R0A(src); \462}463464/* AND */465SLJIT_S390X_RRE(ngr, 0xb9800000)466467/* DIVIDE LOGICAL */468SLJIT_S390X_RRE(dlr, 0xb9970000)469SLJIT_S390X_RRE(dlgr, 0xb9870000)470471/* DIVIDE SINGLE */472SLJIT_S390X_RRE(dsgr, 0xb90d0000)473474/* EXCLUSIVE OR */475SLJIT_S390X_RRE(xgr, 0xb9820000)476477/* LOAD */478SLJIT_S390X_RRE(lgr, 0xb9040000)479SLJIT_S390X_RRE(lgfr, 0xb9140000)480481/* LOAD BYTE */482SLJIT_S390X_RRE(lbr, 0xb9260000)483SLJIT_S390X_RRE(lgbr, 0xb9060000)484485/* LOAD COMPLEMENT */486SLJIT_S390X_RRE(lcgr, 0xb9030000)487488/* LOAD HALFWORD */489SLJIT_S390X_RRE(lhr, 0xb9270000)490SLJIT_S390X_RRE(lghr, 0xb9070000)491492/* LOAD LOGICAL */493SLJIT_S390X_RRE(llgfr, 0xb9160000)494495/* LOAD LOGICAL CHARACTER */496SLJIT_S390X_RRE(llcr, 0xb9940000)497SLJIT_S390X_RRE(llgcr, 0xb9840000)498499/* LOAD LOGICAL HALFWORD */500SLJIT_S390X_RRE(llhr, 0xb9950000)501SLJIT_S390X_RRE(llghr, 0xb9850000)502503/* MULTIPLY LOGICAL */504SLJIT_S390X_RRE(mlgr, 0xb9860000)505506/* MULTIPLY SINGLE */507SLJIT_S390X_RRE(msgfr, 0xb91c0000)508509/* OR */510SLJIT_S390X_RRE(ogr, 0xb9810000)511512/* SUBTRACT */513SLJIT_S390X_RRE(sgr, 0xb9090000)514515#undef SLJIT_S390X_RRE516517/* RI-a form instructions */518#define SLJIT_S390X_RIA(name, pattern, imm_type) \519SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \520{ \521return (pattern) | R20A(reg) | (imm & 0xffff); \522}523524/* ADD HALFWORD IMMEDIATE */525SLJIT_S390X_RIA(aghi, 0xa70b0000, sljit_s16)526527/* LOAD HALFWORD IMMEDIATE */528SLJIT_S390X_RIA(lhi, 0xa7080000, sljit_s16)529SLJIT_S390X_RIA(lghi, 0xa7090000, sljit_s16)530531/* LOAD LOGICAL IMMEDIATE */532SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)533SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)534SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)535SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)536537/* MULTIPLY HALFWORD IMMEDIATE */538SLJIT_S390X_RIA(mhi, 0xa70c0000, sljit_s16)539SLJIT_S390X_RIA(mghi, 0xa70d0000, sljit_s16)540541/* OR IMMEDIATE */542SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16)543544#undef SLJIT_S390X_RIA545546/* RIL-a form instructions (requires extended immediate facility) */547#define SLJIT_S390X_RILA(name, pattern, imm_type) \548SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \549{ \550SLJIT_ASSERT(have_eimm()); \551return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \552}553554/* ADD IMMEDIATE */555SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32)556557/* ADD IMMEDIATE HIGH */558SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */559560/* AND IMMEDIATE */561SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32)562563/* EXCLUSIVE OR IMMEDIATE */564SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32)565566/* INSERT IMMEDIATE */567SLJIT_S390X_RILA(iihf, 0xc00800000000, sljit_u32)568SLJIT_S390X_RILA(iilf, 0xc00900000000, sljit_u32)569570/* LOAD IMMEDIATE */571SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32)572573/* LOAD LOGICAL IMMEDIATE */574SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)575SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)576577/* SUBTRACT LOGICAL IMMEDIATE */578SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32)579580#undef SLJIT_S390X_RILA581582/* RX-a form instructions */583#define SLJIT_S390X_RXA(name, pattern) \584SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \585{ \586SLJIT_ASSERT((d & 0xfff) == d); \587\588return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \589}590591/* LOAD */592SLJIT_S390X_RXA(l, 0x58000000)593594/* LOAD ADDRESS */595SLJIT_S390X_RXA(la, 0x41000000)596597/* LOAD HALFWORD */598SLJIT_S390X_RXA(lh, 0x48000000)599600/* MULTIPLY SINGLE */601SLJIT_S390X_RXA(ms, 0x71000000)602603/* STORE */604SLJIT_S390X_RXA(st, 0x50000000)605606/* STORE CHARACTER */607SLJIT_S390X_RXA(stc, 0x42000000)608609/* STORE HALFWORD */610SLJIT_S390X_RXA(sth, 0x40000000)611612#undef SLJIT_S390X_RXA613614/* RXY-a instructions */615#define SLJIT_S390X_RXYA(name, pattern, cond) \616SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \617{ \618SLJIT_ASSERT(cond); \619\620return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \621}622623/* LOAD */624SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp())625SLJIT_S390X_RXYA(lg, 0xe30000000004, 1)626SLJIT_S390X_RXYA(lgf, 0xe30000000014, 1)627628/* LOAD BYTE */629SLJIT_S390X_RXYA(lb, 0xe30000000076, have_ldisp())630SLJIT_S390X_RXYA(lgb, 0xe30000000077, have_ldisp())631632/* LOAD HALFWORD */633SLJIT_S390X_RXYA(lhy, 0xe30000000078, have_ldisp())634SLJIT_S390X_RXYA(lgh, 0xe30000000015, 1)635636/* LOAD LOGICAL */637SLJIT_S390X_RXYA(llgf, 0xe30000000016, 1)638639/* LOAD LOGICAL CHARACTER */640SLJIT_S390X_RXYA(llc, 0xe30000000094, have_eimm())641SLJIT_S390X_RXYA(llgc, 0xe30000000090, 1)642643/* LOAD LOGICAL HALFWORD */644SLJIT_S390X_RXYA(llh, 0xe30000000095, have_eimm())645SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1)646647/* MULTIPLY SINGLE */648SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp())649SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1)650651/* STORE */652SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp())653SLJIT_S390X_RXYA(stg, 0xe30000000024, 1)654655/* STORE CHARACTER */656SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp())657658/* STORE HALFWORD */659SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp())660661#undef SLJIT_S390X_RXYA662663/* RSY-a instructions */664#define SLJIT_S390X_RSYA(name, pattern, cond) \665SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \666{ \667SLJIT_ASSERT(cond); \668\669return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \670}671672/* LOAD MULTIPLE */673SLJIT_S390X_RSYA(lmg, 0xeb0000000004, 1)674675/* SHIFT LEFT LOGICAL */676SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1)677678/* SHIFT RIGHT SINGLE */679SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1)680681/* STORE MULTIPLE */682SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1)683684#undef SLJIT_S390X_RSYA685686/* RIE-f instructions (require general-instructions-extension facility) */687#define SLJIT_S390X_RIEF(name, pattern) \688SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \689{ \690sljit_ins i3, i4, i5; \691\692SLJIT_ASSERT(have_genext()); \693i3 = (sljit_ins)start << 24; \694i4 = (sljit_ins)end << 16; \695i5 = (sljit_ins)rot << 8; \696\697return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \698}699700/* ROTATE THEN AND SELECTED BITS */701/* SLJIT_S390X_RIEF(rnsbg, 0xec0000000054) */702703/* ROTATE THEN EXCLUSIVE OR SELECTED BITS */704/* SLJIT_S390X_RIEF(rxsbg, 0xec0000000057) */705706/* ROTATE THEN OR SELECTED BITS */707SLJIT_S390X_RIEF(rosbg, 0xec0000000056)708709/* ROTATE THEN INSERT SELECTED BITS */710/* SLJIT_S390X_RIEF(risbg, 0xec0000000055) */711/* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */712713/* ROTATE THEN INSERT SELECTED BITS HIGH */714SLJIT_S390X_RIEF(risbhg, 0xec000000005d)715716/* ROTATE THEN INSERT SELECTED BITS LOW */717/* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */718719#undef SLJIT_S390X_RIEF720721/* RRF-c instructions (require load/store-on-condition 1 facility) */722#define SLJIT_S390X_RRFC(name, pattern) \723SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \724{ \725sljit_ins m3; \726\727SLJIT_ASSERT(have_lscond1()); \728m3 = (sljit_ins)(mask & 0xf) << 12; \729\730return (pattern) | m3 | R4A(dst) | R0A(src); \731}732733/* LOAD HALFWORD IMMEDIATE ON CONDITION */734SLJIT_S390X_RRFC(locr, 0xb9f20000)735SLJIT_S390X_RRFC(locgr, 0xb9e20000)736737#undef SLJIT_S390X_RRFC738739/* RIE-g instructions (require load/store-on-condition 2 facility) */740#define SLJIT_S390X_RIEG(name, pattern) \741SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \742{ \743sljit_ins m3, i2; \744\745SLJIT_ASSERT(have_lscond2()); \746m3 = (sljit_ins)(mask & 0xf) << 32; \747i2 = (sljit_ins)(imm & 0xffffL) << 16; \748\749return (pattern) | R36A(reg) | m3 | i2; \750}751752/* LOAD HALFWORD IMMEDIATE ON CONDITION */753SLJIT_S390X_RIEG(lochi, 0xec0000000042)754SLJIT_S390X_RIEG(locghi, 0xec0000000046)755756#undef SLJIT_S390X_RIEG757758#define SLJIT_S390X_RILB(name, pattern, cond) \759SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \760{ \761SLJIT_ASSERT(cond); \762\763return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \764}765766/* BRANCH RELATIVE AND SAVE LONG */767SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)768769/* LOAD ADDRESS RELATIVE LONG */770SLJIT_S390X_RILB(larl, 0xc00000000000, 1)771772/* LOAD RELATIVE LONG */773SLJIT_S390X_RILB(lgrl, 0xc40800000000, have_genext())774775#undef SLJIT_S390X_RILB776777SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)778{779return 0x07f0 | target;780}781782SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target)783{784sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20;785sljit_ins ri2 = (sljit_ins)target & 0xffff;786return 0xa7040000L | m1 | ri2;787}788789SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)790{791sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;792sljit_ins ri2 = (sljit_ins)target & 0xffffffff;793return 0xc00400000000L | m1 | ri2;794}795796SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)797{798SLJIT_ASSERT(have_eimm());799return 0xb9830000 | R8A(dst) | R0A(src);800}801802/* INSERT PROGRAM MASK */803SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)804{805return 0xb2220000 | R4A(dst);806}807808/* SET PROGRAM MASK */809SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst)810{811return 0x0400 | R4A(dst);812}813814/* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */815SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)816{817return risbhg(dst, src, start, 0x8 | end, rot);818}819820#undef SLJIT_S390X_INSTRUCTION821822static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r)823{824/* Condition codes: bits 18 and 19.825Transformation:8260 (zero and no overflow) : unchanged8271 (non-zero and no overflow) : unchanged8282 (zero and overflow) : decreased by 18293 (non-zero and overflow) : decreased by 1 if non-zero */830FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1)));831FAIL_IF(push_inst(compiler, ipm(tmp1)));832FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)));833FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3)));834FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000)));835FAIL_IF(push_inst(compiler, spm(tmp1)));836return SLJIT_SUCCESS;837}838839/* load 64-bit immediate into register without clobbering flags */840static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)841{842/* 4 byte instructions */843if (is_s16(v))844return push_inst(compiler, lghi(target, (sljit_s16)v));845846if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0)847return push_inst(compiler, llill(target, (sljit_u16)v));848849if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0)850return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));851852if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0)853return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));854855if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)856return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));857858if (is_s32(v))859return push_inst(compiler, lgfi(target, (sljit_s32)v));860861if (((sljit_uw)v >> 32) == 0)862return push_inst(compiler, llilf(target, (sljit_u32)v));863864if (((sljit_uw)v << 32) == 0)865return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));866867FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));868return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));869}870871struct addr {872sljit_gpr base;873sljit_gpr index;874sljit_s32 offset;875};876877/* transform memory operand into D(X,B) form with a signed 20-bit offset */878static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,879struct addr *addr, sljit_s32 mem, sljit_sw off,880sljit_gpr tmp /* clobbered, must not be r0 */)881{882sljit_gpr base = r0;883sljit_gpr index = r0;884885SLJIT_ASSERT(tmp != r0);886if (mem & REG_MASK)887base = gpr(mem & REG_MASK);888889if (mem & OFFS_REG_MASK) {890index = gpr(OFFS_REG(mem));891if (off != 0) {892/* shift and put the result into tmp */893SLJIT_ASSERT(0 <= off && off < 64);894FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));895index = tmp;896off = 0; /* clear offset */897}898}899else if (!is_s20(off)) {900FAIL_IF(push_load_imm_inst(compiler, tmp, off));901index = tmp;902off = 0; /* clear offset */903}904addr->base = base;905addr->index = index;906addr->offset = (sljit_s32)off;907return SLJIT_SUCCESS;908}909910/* transform memory operand into D(X,B) form with an unsigned 12-bit offset */911static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,912struct addr *addr, sljit_s32 mem, sljit_sw off,913sljit_gpr tmp /* clobbered, must not be r0 */)914{915sljit_gpr base = r0;916sljit_gpr index = r0;917918SLJIT_ASSERT(tmp != r0);919if (mem & REG_MASK)920base = gpr(mem & REG_MASK);921922if (mem & OFFS_REG_MASK) {923index = gpr(OFFS_REG(mem));924if (off != 0) {925/* shift and put the result into tmp */926SLJIT_ASSERT(0 <= off && off < 64);927FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));928index = tmp;929off = 0; /* clear offset */930}931}932else if (!is_u12(off)) {933FAIL_IF(push_load_imm_inst(compiler, tmp, off));934index = tmp;935off = 0; /* clear offset */936}937addr->base = base;938addr->index = index;939addr->offset = (sljit_s32)off;940return SLJIT_SUCCESS;941}942943#define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)944#define WHEN(cond, r, i1, i2, addr) \945(cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)946947/* May clobber tmp1. */948static sljit_s32 load_store_op(struct sljit_compiler *compiler, sljit_gpr reg,949sljit_s32 mem, sljit_sw memw,950sljit_s32 is_32bit, const sljit_ins* forms)951{952struct addr addr;953954SLJIT_ASSERT(mem & SLJIT_MEM);955956if (is_32bit && ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw))) {957FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));958return push_inst(compiler, forms[0] | R20A(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);959}960961FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));962return push_inst(compiler, (is_32bit ? forms[1] : forms[2]) | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));963}964965static const sljit_ins load_forms[3] = {9660x58000000 /* l */,9670xe30000000058 /* ly */,9680xe30000000004 /* lg */969};970971static const sljit_ins store_forms[3] = {9720x50000000 /* st */,9730xe30000000050 /* sty */,9740xe30000000024 /* stg */975};976977static const sljit_ins store_byte_forms[3] = {9780x42000000 /* stc */,9790xe30000000072 /* stcy */,9800981};982983static const sljit_ins load_halfword_forms[3] = {9840x48000000 /* lh */,9850xe30000000078 /* lhy */,9860xe30000000015 /* lgh */987};988989/* May clobber tmp1. */990static SLJIT_INLINE sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,991sljit_s32 src, sljit_sw srcw,992sljit_s32 is_32bit)993{994return load_store_op(compiler, dst_r, src, srcw, is_32bit, load_forms);995}996997/* May clobber tmp1. */998static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr dst_r,999sljit_s32 src, sljit_sw srcw,1000sljit_s32 is_32bit)1001{1002struct addr addr;1003sljit_ins ins;10041005SLJIT_ASSERT(src & SLJIT_MEM);10061007FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));10081009ins = is_32bit ? 0xe30000000016 /* llgf */ : 0xe30000000004 /* lg */;1010return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));1011}10121013/* May clobber tmp1. */1014static SLJIT_INLINE sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,1015sljit_s32 dst, sljit_sw dstw,1016sljit_s32 is_32bit)1017{1018return load_store_op(compiler, src_r, dst, dstw, is_32bit, store_forms);1019}10201021/* May clobber tmp1. */1022static SLJIT_INLINE sljit_s32 store_byte(struct sljit_compiler *compiler, sljit_gpr src_r,1023sljit_s32 dst, sljit_sw dstw)1024{1025return load_store_op(compiler, src_r, dst, dstw, 1, store_byte_forms);1026}10271028#undef WHEN10291030static sljit_s32 emit_move(struct sljit_compiler *compiler,1031sljit_gpr dst_r,1032sljit_s32 src, sljit_sw srcw)1033{1034sljit_gpr src_r;10351036SLJIT_ASSERT(!FAST_IS_REG(src) || dst_r != gpr(src & REG_MASK));10371038if (src == SLJIT_IMM)1039return push_load_imm_inst(compiler, dst_r, srcw);10401041if (src & SLJIT_MEM)1042return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);10431044src_r = gpr(src & REG_MASK);1045return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));1046}10471048static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,1049sljit_s32 dst,1050sljit_s32 src1, sljit_sw src1w,1051sljit_s32 src2, sljit_sw src2w)1052{1053sljit_gpr dst_r = tmp0;1054sljit_gpr src_r = tmp1;1055sljit_s32 needs_move = 1;10561057if (FAST_IS_REG(dst)) {1058dst_r = gpr(dst);10591060if (dst == src1)1061needs_move = 0;1062else if (dst == src2) {1063dst_r = tmp0;1064needs_move = 2;1065}1066}10671068if (needs_move)1069FAIL_IF(emit_move(compiler, dst_r, src1, src1w));10701071if (FAST_IS_REG(src2))1072src_r = gpr(src2);1073else1074FAIL_IF(emit_move(compiler, tmp1, src2, src2w));10751076FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)));10771078if (needs_move != 2)1079return SLJIT_SUCCESS;10801081dst_r = gpr(dst & REG_MASK);1082return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));1083}10841085static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins,1086sljit_s32 dst,1087sljit_s32 src1, sljit_sw src1w)1088{1089sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;1090sljit_gpr src_r = tmp1;10911092if (FAST_IS_REG(src1))1093src_r = gpr(src1);1094else1095FAIL_IF(emit_move(compiler, tmp1, src1, src1w));10961097return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r));1098}10991100static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,1101sljit_s32 dst,1102sljit_s32 src1, sljit_sw src1w,1103sljit_s32 src2, sljit_sw src2w)1104{1105sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;1106sljit_gpr src1_r = tmp0;1107sljit_gpr src2_r = tmp1;11081109if (FAST_IS_REG(src1))1110src1_r = gpr(src1);1111else1112FAIL_IF(emit_move(compiler, tmp0, src1, src1w));11131114if (FAST_IS_REG(src2))1115src2_r = gpr(src2);1116else1117FAIL_IF(emit_move(compiler, tmp1, src2, src2w));11181119return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r));1120}11211122typedef enum {1123RI_A,1124RIL_A,1125} emit_ril_type;11261127static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,1128sljit_s32 dst,1129sljit_s32 src1, sljit_sw src1w,1130sljit_sw src2w,1131emit_ril_type type)1132{1133sljit_gpr dst_r = tmp0;1134sljit_s32 needs_move = 1;11351136if (FAST_IS_REG(dst)) {1137dst_r = gpr(dst);11381139if (dst == src1)1140needs_move = 0;1141}11421143if (needs_move)1144FAIL_IF(emit_move(compiler, dst_r, src1, src1w));11451146if (type == RIL_A)1147return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff));1148return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff));1149}11501151static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,1152sljit_s32 dst,1153sljit_s32 src1, sljit_sw src1w,1154sljit_sw src2w)1155{1156sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;1157sljit_gpr src_r = tmp0;11581159if (!FAST_IS_REG(src1))1160FAIL_IF(emit_move(compiler, tmp0, src1, src1w));1161else1162src_r = gpr(src1 & REG_MASK);11631164return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16);1165}11661167typedef enum {1168RX_A,1169RXY_A,1170} emit_rx_type;11711172static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,1173sljit_s32 dst,1174sljit_s32 src1, sljit_sw src1w,1175sljit_s32 src2, sljit_sw src2w,1176emit_rx_type type)1177{1178sljit_gpr dst_r = tmp0;1179sljit_s32 needs_move = 1;1180sljit_gpr base, index;11811182SLJIT_ASSERT(src2 & SLJIT_MEM);11831184if (FAST_IS_REG(dst)) {1185dst_r = gpr(dst);11861187if (dst == src1)1188needs_move = 0;1189else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) {1190dst_r = tmp0;1191needs_move = 2;1192}1193}11941195if (needs_move)1196FAIL_IF(emit_move(compiler, dst_r, src1, src1w));11971198base = gpr(src2 & REG_MASK);1199index = tmp0;12001201if (src2 & OFFS_REG_MASK) {1202index = gpr(OFFS_REG(src2));12031204if (src2w != 0) {1205FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0)));1206src2w = 0;1207index = tmp1;1208}1209} else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) {1210FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w));12111212if (src2 & REG_MASK)1213index = tmp1;1214else1215base = tmp1;1216src2w = 0;1217}12181219if (type == RX_A)1220ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w;1221else1222ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w);12231224FAIL_IF(push_inst(compiler, ins));12251226if (needs_move != 2)1227return SLJIT_SUCCESS;12281229dst_r = gpr(dst);1230return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));1231}12321233static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,1234sljit_s32 dst, sljit_sw dstw,1235sljit_sw srcw)1236{1237sljit_gpr dst_r = tmp1;12381239SLJIT_ASSERT(dst & SLJIT_MEM);12401241if (dst & OFFS_REG_MASK) {1242sljit_gpr index = tmp1;12431244if ((dstw & 0x3) == 0)1245index = gpr(OFFS_REG(dst));1246else1247FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0)));12481249FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index)));1250dstw = 0;1251}1252else if (!is_s20(dstw)) {1253FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw));12541255if (dst & REG_MASK)1256FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1)));12571258dstw = 0;1259}1260else1261dst_r = gpr(dst & REG_MASK);12621263return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw));1264}12651266struct ins_forms {1267sljit_ins op_r;1268sljit_ins op_gr;1269sljit_ins op_rk;1270sljit_ins op_grk;1271sljit_ins op;1272sljit_ins op_y;1273sljit_ins op_g;1274};12751276static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,1277sljit_s32 dst,1278sljit_s32 src1, sljit_sw src1w,1279sljit_s32 src2, sljit_sw src2w)1280{1281sljit_s32 mode = compiler->mode;1282sljit_ins ins, ins_k;12831284if ((src1 | src2) & SLJIT_MEM) {1285sljit_ins ins12, ins20;12861287if (mode & SLJIT_32) {1288ins12 = forms->op;1289ins20 = forms->op_y;1290}1291else {1292ins12 = 0;1293ins20 = forms->op_g;1294}12951296if (ins12 && ins20) {1297/* Extra instructions needed for address computation can be executed independently. */1298if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)1299|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) {1300if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))1301return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);13021303return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);1304}13051306if (src1 & SLJIT_MEM) {1307if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w))1308return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A);13091310return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A);1311}1312}1313else if (ins12 || ins20) {1314emit_rx_type rx_type;13151316if (ins12) {1317rx_type = RX_A;1318ins = ins12;1319}1320else {1321rx_type = RXY_A;1322ins = ins20;1323}13241325if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)1326|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w)))))1327return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type);13281329if (src1 & SLJIT_MEM)1330return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type);1331}1332}13331334if (mode & SLJIT_32) {1335ins = forms->op_r;1336ins_k = forms->op_rk;1337}1338else {1339ins = forms->op_gr;1340ins_k = forms->op_grk;1341}13421343SLJIT_ASSERT(ins != 0 || ins_k != 0);13441345if (ins && FAST_IS_REG(dst)) {1346if (dst == src1)1347return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);13481349if (dst == src2)1350return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w);1351}13521353if (ins_k == 0)1354return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);13551356return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w);1357}13581359static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,1360sljit_s32 dst,1361sljit_s32 src1, sljit_sw src1w,1362sljit_s32 src2, sljit_sw src2w)1363{1364sljit_s32 mode = compiler->mode;1365sljit_ins ins;13661367if (src2 & SLJIT_MEM) {1368sljit_ins ins12, ins20;13691370if (mode & SLJIT_32) {1371ins12 = forms->op;1372ins20 = forms->op_y;1373}1374else {1375ins12 = 0;1376ins20 = forms->op_g;1377}13781379if (ins12 && ins20) {1380if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))1381return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);13821383return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);1384}1385else if (ins12)1386return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);1387else if (ins20)1388return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);1389}13901391ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk;13921393if (ins == 0 || (FAST_IS_REG(dst) && dst == src1))1394return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w);13951396return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);1397}13981399static SLJIT_INLINE sljit_u16 *process_extended_label(sljit_u16 *code_ptr, struct sljit_extended_label *ext_label)1400{1401SLJIT_ASSERT(ext_label->label.u.index == SLJIT_LABEL_ALIGNED);1402return (sljit_u16*)((sljit_uw)code_ptr & ~(ext_label->data));1403}14041405SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)1406{1407struct sljit_label *label;1408struct sljit_jump *jump;1409struct sljit_const *const_;1410sljit_sw executable_offset;1411sljit_uw ins_size = compiler->size << 1;1412sljit_uw pool_size = 0; /* literal pool */1413sljit_uw pad_size;1414sljit_uw half_count;1415SLJIT_NEXT_DEFINE_TYPES;1416struct sljit_memory_fragment *buf;1417sljit_ins *buf_ptr;1418sljit_ins *buf_end;1419sljit_u16 *code;1420sljit_u16 *code_ptr;1421sljit_uw *pool, *pool_ptr;1422sljit_ins ins;1423sljit_sw source, offset;14241425CHECK_ERROR_PTR();1426CHECK_PTR(check_sljit_generate_code(compiler, options));1427reverse_buf(compiler);14281429jump = compiler->jumps;1430while (jump != NULL) {1431if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR)) {1432/* encoded: */1433/* brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */1434/* replace with: */1435/* lgrl %r1, <pool_addr> */1436/* bras %r14, %r1 (or bcr <mask>, %r1) */1437if (((jump->flags & SLJIT_REWRITABLE_JUMP) || !is_s32(jump->u.target)))1438pool_size += sizeof(sljit_uw);1439else1440jump->flags |= PATCH_IMM32;14411442if (!(jump->flags & JUMP_MOV_ADDR))1443ins_size += 2;1444}1445jump = jump->next;1446}14471448/* pad code size to 8 bytes so is accessible with half word offsets */1449/* the literal pool needs to be doubleword aligned */1450pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;1451SLJIT_ASSERT(pad_size < 8UL);14521453/* allocate target buffer */1454code = (sljit_u16*)allocate_executable_memory(ins_size + pad_size + pool_size, options, exec_allocator_data, &executable_offset);1455PTR_FAIL_WITH_EXEC_IF(code);1456code_ptr = code;14571458/* TODO(carenas): pool is optional, and the ABI recommends it to1459* be created before the function code, instead of1460* globally; if generated code is too big could1461* need offsets bigger than 32bit words and asser()1462*/1463pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);1464pool_ptr = pool;1465buf = compiler->buf;1466half_count = 0;14671468label = compiler->labels;1469jump = compiler->jumps;1470const_ = compiler->consts;1471SLJIT_NEXT_INIT_TYPES();1472SLJIT_GET_NEXT_MIN();14731474do {1475buf_ptr = (sljit_ins*)buf->memory;1476buf_end = buf_ptr + (buf->used_size >> 3);1477do {1478ins = *buf_ptr++;14791480if (next_min_addr == half_count) {1481SLJIT_ASSERT(!label || label->size >= half_count);1482SLJIT_ASSERT(!jump || jump->addr >= half_count);1483SLJIT_ASSERT(!const_ || const_->addr >= half_count);14841485if (next_min_addr == next_label_size) {1486if (label->u.index >= SLJIT_LABEL_ALIGNED)1487code_ptr = process_extended_label(code_ptr, (struct sljit_extended_label*)label);14881489label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);1490label = label->next;1491next_label_size = SLJIT_GET_NEXT_SIZE(label);1492}14931494if (next_min_addr == next_jump_addr) {1495jump->addr = (sljit_uw)code_ptr;14961497if (SLJIT_UNLIKELY(jump->flags & JUMP_MOV_ADDR)) {1498if (jump->flags & PATCH_IMM32) {1499SLJIT_ASSERT((jump->flags & JUMP_ADDR) && is_s32(jump->u.target));1500ins = 0xc00100000000 /* lgfi */ | (ins & 0xf000000000);1501} else if (jump->flags & JUMP_ADDR) {1502source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);1503offset = (sljit_sw)(jump->u.target - (sljit_uw)source);15041505if ((offset & 0x1) != 0 || offset > 0xffffffffl || offset < -0x100000000l) {1506jump->addr = (sljit_uw)pool_ptr;1507jump->flags |= PATCH_POOL;15081509/* store target into pool */1510offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;1511pool_ptr++;15121513SLJIT_ASSERT(!(offset & 1));1514offset >>= 1;1515SLJIT_ASSERT(is_s32(offset));1516ins = 0xc40800000000 /* lgrl */ | (ins & 0xf000000000) | (sljit_ins)(offset & 0xffffffff);1517}1518}1519} else if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR)) {1520source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);15211522if (jump->flags & PATCH_IMM32) {1523SLJIT_ASSERT((jump->flags & JUMP_ADDR) && is_s32(jump->u.target));1524code_ptr[0] = (sljit_u16)(0xc001 /* lgfi */ | R4A(tmp1));1525code_ptr += 3;1526} else if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {1527offset = (sljit_sw)(jump->u.target - (sljit_uw)source);15281529if ((offset & 0x1) != 0 || offset > 0xffffffffl || offset < -0x100000000l)1530jump->flags |= PATCH_POOL;1531} else1532jump->flags |= PATCH_POOL;15331534if (jump->flags & PATCH_POOL) {1535jump->addr = (sljit_uw)pool_ptr;15361537/* load address into tmp1 */1538offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;15391540SLJIT_ASSERT(!(offset & 1));1541offset >>= 1;1542SLJIT_ASSERT(is_s32(offset));15431544code_ptr[0] = (sljit_u16)(0xc408 /* lgrl */ | R4A(tmp1));1545code_ptr[1] = (sljit_u16)(offset >> 16);1546code_ptr[2] = (sljit_u16)offset;1547code_ptr += 3;1548pool_ptr++;1549}15501551if (jump->flags & (PATCH_POOL | PATCH_IMM32)) {1552/* branch to tmp1 */1553if (((ins >> 32) & 0xf) == 4) {1554/* brcl -> bcr */1555ins = 0x0700 /* bcr */ | ((ins >> 32) & 0xf0) | R0A(tmp1);1556} else {1557SLJIT_ASSERT(((ins >> 32) & 0xf) == 5);1558/* brasl -> basr */1559ins = 0x0d00 /* basr */ | ((ins >> 32) & 0xf0) | R0A(tmp1);1560}15611562/* Adjust half_count. */1563half_count += 2;1564}1565}15661567jump = jump->next;1568next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);1569} else if (next_min_addr == next_const_addr) {1570const_->addr = (sljit_uw)code_ptr;1571const_ = const_->next;1572next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);1573}15741575SLJIT_GET_NEXT_MIN();1576}15771578if (ins & 0xffff00000000L) {1579*code_ptr++ = (sljit_u16)(ins >> 32);1580half_count++;1581}15821583if (ins & 0xffffffff0000L) {1584*code_ptr++ = (sljit_u16)(ins >> 16);1585half_count++;1586}15871588*code_ptr++ = (sljit_u16)ins;1589half_count++;1590} while (buf_ptr < buf_end);15911592buf = buf->next;1593} while (buf);15941595if (next_label_size == half_count) {1596if (label->u.index >= SLJIT_LABEL_ALIGNED)1597code_ptr = process_extended_label(code_ptr, (struct sljit_extended_label*)label);15981599label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);1600label = label->next;1601}16021603SLJIT_ASSERT(!label);1604SLJIT_ASSERT(!jump);1605SLJIT_ASSERT(!const_);1606SLJIT_ASSERT(code_ptr <= code + (ins_size >> 1));1607SLJIT_ASSERT((sljit_u8 *)pool_ptr <= (sljit_u8 *)pool + pool_size);16081609jump = compiler->jumps;1610while (jump != NULL) {1611offset = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr);16121613if (!(jump->flags & (PATCH_POOL | PATCH_IMM32))) {1614code_ptr = (sljit_u16*)jump->addr;1615offset -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);16161617/* Offset must be halfword aligned. */1618SLJIT_ASSERT(!(offset & 1));1619offset >>= 1;1620SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */16211622code_ptr[1] = (sljit_u16)(offset >> 16);1623code_ptr[2] = (sljit_u16)offset;1624} else if (jump->flags & PATCH_POOL) {1625/* Store jump target into pool. */1626*(sljit_uw*)(jump->addr) = (sljit_uw)offset;1627} else {1628SLJIT_ASSERT(is_s32(offset));1629code_ptr = (sljit_u16*)jump->addr;1630code_ptr[1] = (sljit_u16)(offset >> 16);1631code_ptr[2] = (sljit_u16)offset;1632}1633jump = jump->next;1634}16351636compiler->error = SLJIT_ERR_COMPILED;1637compiler->executable_offset = executable_offset;1638compiler->executable_size = ins_size;1639if (pool_size)1640compiler->executable_size += (pad_size + pool_size);16411642code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);1643code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);1644SLJIT_CACHE_FLUSH(code, code_ptr);1645SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);1646return code;1647}16481649SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)1650{1651/* TODO(mundaym): implement all */1652switch (feature_type) {1653case SLJIT_HAS_FPU:1654#ifdef SLJIT_IS_FPU_AVAILABLE1655return (SLJIT_IS_FPU_AVAILABLE) != 0;1656#else1657return 1;1658#endif /* SLJIT_IS_FPU_AVAILABLE */16591660case SLJIT_HAS_CLZ:1661case SLJIT_HAS_REV:1662case SLJIT_HAS_ROT:1663case SLJIT_HAS_PREFETCH:1664case SLJIT_HAS_COPY_F32:1665case SLJIT_HAS_COPY_F64:1666case SLJIT_HAS_SIMD:1667case SLJIT_HAS_ATOMIC:1668case SLJIT_HAS_MEMORY_BARRIER:1669return 1;16701671case SLJIT_HAS_CTZ:1672return 2;16731674case SLJIT_HAS_CMOV:1675return have_lscond1() ? 1 : 0;1676}1677return 0;1678}16791680SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)1681{1682SLJIT_UNUSED_ARG(type);1683return 0;1684}16851686/* --------------------------------------------------------------------- */1687/* Entry, exit */1688/* --------------------------------------------------------------------- */16891690SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,1691sljit_s32 options, sljit_s32 arg_types,1692sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)1693{1694sljit_s32 fscratches;1695sljit_s32 fsaveds;1696sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);1697sljit_s32 offset, i, tmp;16981699CHECK_ERROR();1700CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));1701set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);17021703/* Saved registers are stored in callee allocated save area. */1704SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13);17051706scratches = ENTER_GET_REGS(scratches);1707saveds = ENTER_GET_REGS(saveds);1708fscratches = compiler->fscratches;1709fsaveds = compiler->fsaveds;17101711offset = 2 * SSIZE_OF(sw);1712if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {1713if (saved_arg_count == 0) {1714FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15)));1715offset += 9 * SSIZE_OF(sw);1716} else {1717FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15)));1718offset += (8 - saved_arg_count) * SSIZE_OF(sw);1719}1720} else {1721if (scratches == SLJIT_FIRST_SAVED_REG) {1722FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));1723offset += SSIZE_OF(sw);1724} else if (scratches > SLJIT_FIRST_SAVED_REG) {1725FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));1726offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);1727}17281729if (saved_arg_count == 0) {1730if (saveds == 0) {1731FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));1732offset += SSIZE_OF(sw);1733} else {1734FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));1735offset += (saveds + 1) * SSIZE_OF(sw);1736}1737} else if (saveds > saved_arg_count) {1738if (saveds == saved_arg_count + 1) {1739FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15)));1740offset += SSIZE_OF(sw);1741} else {1742FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15)));1743offset += (saveds - saved_arg_count) * SSIZE_OF(sw);1744}1745}1746}17471748if (saved_arg_count > 0) {1749FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));1750offset += SSIZE_OF(sw);1751}17521753tmp = SLJIT_FS0 - fsaveds;1754for (i = SLJIT_FS0; i > tmp; i--) {1755FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));1756offset += SSIZE_OF(sw);1757}17581759for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {1760FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));1761offset += SSIZE_OF(sw);1762}17631764local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;1765compiler->local_size = local_size;17661767if (is_s20(-local_size))1768FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));1769else1770FAIL_IF(push_inst(compiler, 0xc20400000000 /* slgfi */ | R36A(r15) | (sljit_ins)local_size));17711772if (options & SLJIT_ENTER_REG_ARG)1773return SLJIT_SUCCESS;17741775arg_types >>= SLJIT_ARG_SHIFT;1776saved_arg_count = 0;1777tmp = 0;1778while (arg_types > 0) {1779if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {1780if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {1781FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp))));1782saved_arg_count++;1783}1784tmp++;1785}17861787arg_types >>= SLJIT_ARG_SHIFT;1788}17891790return SLJIT_SUCCESS;1791}17921793SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,1794sljit_s32 options, sljit_s32 arg_types,1795sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)1796{1797CHECK_ERROR();1798CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));1799set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);18001801compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;1802return SLJIT_SUCCESS;1803}18041805static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_gpr last_reg)1806{1807sljit_s32 offset, i, tmp;1808sljit_s32 local_size = compiler->local_size;1809sljit_s32 saveds = compiler->saveds;1810sljit_s32 scratches = compiler->scratches;1811sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);18121813if (is_u12(local_size))1814FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));1815else if (is_s20(local_size))1816FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));1817else1818FAIL_IF(push_inst(compiler, 0xc20a00000000 /* algfi */ | R36A(r15) | (sljit_ins)local_size));18191820offset = 2 * SSIZE_OF(sw);1821if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {1822if (kept_saveds_count == 0) {1823FAIL_IF(push_inst(compiler, lmg(r6, last_reg, offset, r15)));1824offset += 9 * SSIZE_OF(sw);1825} else {1826FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));1827offset += (8 - kept_saveds_count) * SSIZE_OF(sw);1828}1829} else {1830if (scratches == SLJIT_FIRST_SAVED_REG) {1831FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));1832offset += SSIZE_OF(sw);1833} else if (scratches > SLJIT_FIRST_SAVED_REG) {1834FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));1835offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);1836}18371838if (kept_saveds_count == 0) {1839if (saveds == 0) {1840if (last_reg == r14)1841FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));1842offset += SSIZE_OF(sw);1843} else if (saveds == 1 && last_reg == r13) {1844FAIL_IF(push_inst(compiler, lg(r13, offset, 0, r15)));1845offset += 2 * SSIZE_OF(sw);1846} else {1847FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, last_reg, offset, r15)));1848offset += (saveds + 1) * SSIZE_OF(sw);1849}1850} else if (saveds > kept_saveds_count) {1851if (saveds == kept_saveds_count + 1) {1852FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15)));1853offset += SSIZE_OF(sw);1854} else {1855FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));1856offset += (saveds - kept_saveds_count) * SSIZE_OF(sw);1857}1858}1859}18601861if (kept_saveds_count > 0) {1862if (last_reg == r14)1863FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));1864offset += SSIZE_OF(sw);1865}18661867tmp = SLJIT_FS0 - compiler->fsaveds;1868for (i = SLJIT_FS0; i > tmp; i--) {1869FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));1870offset += SSIZE_OF(sw);1871}18721873for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {1874FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));1875offset += SSIZE_OF(sw);1876}18771878return SLJIT_SUCCESS;1879}18801881SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)1882{1883CHECK_ERROR();1884CHECK(check_sljit_emit_return_void(compiler));18851886FAIL_IF(emit_stack_frame_release(compiler, r14));1887return push_inst(compiler, br(r14)); /* return */1888}18891890SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,1891sljit_s32 src, sljit_sw srcw)1892{1893CHECK_ERROR();1894CHECK(check_sljit_emit_return_to(compiler, src, srcw));18951896if (src & SLJIT_MEM) {1897ADJUST_LOCAL_OFFSET(src, srcw);1898FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));1899src = TMP_REG2;1900srcw = 0;1901} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {1902FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));1903src = TMP_REG2;1904srcw = 0;1905}19061907FAIL_IF(emit_stack_frame_release(compiler, r13));19081909SLJIT_SKIP_CHECKS(compiler);1910return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);1911}19121913/* --------------------------------------------------------------------- */1914/* Operators */1915/* --------------------------------------------------------------------- */19161917SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)1918{1919sljit_gpr arg0 = gpr(SLJIT_R0);1920sljit_gpr arg1 = gpr(SLJIT_R1);19211922CHECK_ERROR();1923CHECK(check_sljit_emit_op0(compiler, op));19241925op = GET_OPCODE(op) | (op & SLJIT_32);1926switch (op) {1927case SLJIT_BREAKPOINT:1928/* The following invalid instruction is emitted by gdb. */1929return push_inst(compiler, 0x0001 /* 2-byte trap */);1930case SLJIT_NOP:1931return push_inst(compiler, 0x0700 /* 2-byte nop */);1932case SLJIT_LMUL_UW:1933FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));1934break;1935case SLJIT_LMUL_SW:1936/* signed multiplication from: */1937/* Hacker's Delight, Second Edition: Chapter 8-3. */1938FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));1939FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));1940FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));1941FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));19421943/* unsigned multiplication */1944FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));19451946FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));1947FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));1948break;1949case SLJIT_DIV_U32:1950case SLJIT_DIVMOD_U32:1951FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));1952FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));1953FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));1954FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */1955if (op == SLJIT_DIVMOD_U32)1956return push_inst(compiler, lr(arg1, tmp0)); /* remainder */19571958return SLJIT_SUCCESS;1959case SLJIT_DIV_S32:1960case SLJIT_DIVMOD_S32:1961FAIL_IF(push_inst(compiler, 0xeb00000000dc /* srak */ | R36A(tmp0) | R32A(arg0) | (31 << 16)));1962FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));1963FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));1964FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */1965if (op == SLJIT_DIVMOD_S32)1966return push_inst(compiler, lr(arg1, tmp0)); /* remainder */19671968return SLJIT_SUCCESS;1969case SLJIT_DIV_UW:1970case SLJIT_DIVMOD_UW:1971FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));1972FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));1973FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));1974FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */1975if (op == SLJIT_DIVMOD_UW)1976return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */19771978return SLJIT_SUCCESS;1979case SLJIT_DIV_SW:1980case SLJIT_DIVMOD_SW:1981FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));1982FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));1983FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */1984if (op == SLJIT_DIVMOD_SW)1985return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */19861987return SLJIT_SUCCESS;1988case SLJIT_MEMORY_BARRIER:1989return push_inst(compiler, 0x0700 /* bcr */ | (0xe << 4) | 0);1990case SLJIT_ENDBR:1991return SLJIT_SUCCESS;1992case SLJIT_SKIP_FRAMES_BEFORE_RETURN:1993return SLJIT_SUCCESS;1994default:1995SLJIT_UNREACHABLE();1996}1997/* swap result registers */1998FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));1999FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));2000return push_inst(compiler, lgr(arg1, tmp0));2001}20022003static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r, sljit_gpr src_r)2004{2005sljit_s32 is_ctz = (GET_OPCODE(op) == SLJIT_CTZ);20062007if ((op & SLJIT_32) && src_r != tmp0) {2008FAIL_IF(push_inst(compiler, 0xb9160000 /* llgfr */ | R4A(tmp0) | R0A(src_r)));2009src_r = tmp0;2010}20112012if (is_ctz) {2013FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */) | R4A(tmp1) | R0A(src_r)));20142015if (src_r == tmp0)2016FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1400 /* nr */ : 0xb9800000 /* ngr */) | R4A(tmp0) | R0A(tmp1)));2017else2018FAIL_IF(push_inst(compiler, 0xb9e40000 /* ngrk */ | R12A(tmp1) | R4A(tmp0) | R0A(src_r)));20192020src_r = tmp0;2021}20222023FAIL_IF(push_inst(compiler, 0xb9830000 /* flogr */ | R4A(tmp0) | R0A(src_r)));20242025if (is_ctz)2026FAIL_IF(push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(tmp1) | R32A(tmp0) | ((sljit_ins)(-64 & 0xffff) << 16)));20272028if (op & SLJIT_32) {2029if (!is_ctz && dst_r != tmp0)2030return push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(dst_r) | R32A(tmp0) | ((sljit_ins)(-32 & 0xffff) << 16));20312032FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp0) | (sljit_u32)-32));2033}20342035if (is_ctz)2036FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp0) | R32A(tmp1) | ((sljit_ins)((op & SLJIT_32) ? 59 : 58) << 24) | (63 << 16) | ((sljit_ins)((op & SLJIT_32) ? 5 : 6) << 8)));20372038if (dst_r == tmp0)2039return SLJIT_SUCCESS;20402041return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0));2042}20432044static sljit_s32 sljit_emit_rev(struct sljit_compiler *compiler, sljit_s32 op,2045sljit_s32 dst, sljit_sw dstw,2046sljit_s32 src, sljit_sw srcw)2047{2048struct addr addr;2049sljit_gpr reg;2050sljit_ins ins;2051sljit_s32 opcode = GET_OPCODE(op);2052sljit_s32 is_16bit = (opcode == SLJIT_REV_U16 || opcode == SLJIT_REV_S16);20532054if (dst & SLJIT_MEM) {2055if (src & SLJIT_MEM) {2056FAIL_IF(load_store_op(compiler, tmp0, src, srcw, op & SLJIT_32, is_16bit ? load_halfword_forms : load_forms));2057reg = tmp0;2058} else2059reg = gpr(src);20602061FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));20622063if (is_16bit)2064ins = 0xe3000000003f /* strvh */;2065else2066ins = (op & SLJIT_32) ? 0xe3000000003e /* strv */ : 0xe3000000002f /* strvg */;20672068return push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));2069}20702071reg = gpr(dst);20722073if (src & SLJIT_MEM) {2074FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));20752076if (is_16bit)2077ins = 0xe3000000001f /* lrvh */;2078else2079ins = (op & SLJIT_32) ? 0xe3000000001e /* lrv */ : 0xe3000000000f /* lrvg */;20802081FAIL_IF(push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)));20822083if (opcode == SLJIT_REV)2084return SLJIT_SUCCESS;20852086if (is_16bit) {2087if (op & SLJIT_32)2088ins = (opcode == SLJIT_REV_U16) ? 0xb9950000 /* llhr */ : 0xb9270000 /* lhr */;2089else2090ins = (opcode == SLJIT_REV_U16) ? 0xb9850000 /* llghr */ : 0xb9070000 /* lghr */;2091} else2092ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;20932094return push_inst(compiler, ins | R4A(reg) | R0A(reg));2095}20962097ins = (op & SLJIT_32) ? 0xb91f0000 /* lrvr */ : 0xb90f0000 /* lrvgr */;2098FAIL_IF(push_inst(compiler, ins | R4A(reg) | R0A(gpr(src))));20992100if (opcode == SLJIT_REV)2101return SLJIT_SUCCESS;21022103if (!is_16bit) {2104ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;2105return push_inst(compiler, ins | R4A(reg) | R0A(reg));2106}21072108if (op & SLJIT_32) {2109ins = (opcode == SLJIT_REV_U16) ? 0x88000000 /* srl */ : 0x8a000000 /* sra */;2110return push_inst(compiler, ins | R20A(reg) | 16);2111}21122113ins = (opcode == SLJIT_REV_U16) ? 0xeb000000000c /* srlg */ : 0xeb000000000a /* srag */;2114return push_inst(compiler, ins | R36A(reg) | R32A(reg) | (48 << 16));2115}21162117/* LEVAL will be defined later with different parameters as needed */2118#define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)21192120SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,2121sljit_s32 dst, sljit_sw dstw,2122sljit_s32 src, sljit_sw srcw)2123{2124sljit_ins ins;2125struct addr mem;2126sljit_gpr dst_r;2127sljit_gpr src_r;2128sljit_s32 opcode = GET_OPCODE(op);21292130CHECK_ERROR();2131CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));2132ADJUST_LOCAL_OFFSET(dst, dstw);2133ADJUST_LOCAL_OFFSET(src, srcw);21342135if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {2136/* LOAD REGISTER */2137if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {2138dst_r = gpr(dst);2139src_r = gpr(src);2140switch (opcode | (op & SLJIT_32)) {2141/* 32-bit */2142case SLJIT_MOV32_U8:2143ins = llcr(dst_r, src_r);2144break;2145case SLJIT_MOV32_S8:2146ins = lbr(dst_r, src_r);2147break;2148case SLJIT_MOV32_U16:2149ins = llhr(dst_r, src_r);2150break;2151case SLJIT_MOV32_S16:2152ins = lhr(dst_r, src_r);2153break;2154case SLJIT_MOV32:2155if (dst_r == src_r)2156return SLJIT_SUCCESS;2157ins = lr(dst_r, src_r);2158break;2159/* 64-bit */2160case SLJIT_MOV_U8:2161ins = llgcr(dst_r, src_r);2162break;2163case SLJIT_MOV_S8:2164ins = lgbr(dst_r, src_r);2165break;2166case SLJIT_MOV_U16:2167ins = llghr(dst_r, src_r);2168break;2169case SLJIT_MOV_S16:2170ins = lghr(dst_r, src_r);2171break;2172case SLJIT_MOV_U32:2173ins = llgfr(dst_r, src_r);2174break;2175case SLJIT_MOV_S32:2176ins = lgfr(dst_r, src_r);2177break;2178case SLJIT_MOV:2179case SLJIT_MOV_P:2180if (dst_r == src_r)2181return SLJIT_SUCCESS;2182ins = lgr(dst_r, src_r);2183break;2184default:2185ins = 0;2186SLJIT_UNREACHABLE();2187break;2188}2189FAIL_IF(push_inst(compiler, ins));2190return SLJIT_SUCCESS;2191}2192/* LOAD IMMEDIATE */2193if (FAST_IS_REG(dst) && src == SLJIT_IMM) {2194switch (opcode) {2195case SLJIT_MOV_U8:2196srcw = (sljit_sw)((sljit_u8)(srcw));2197break;2198case SLJIT_MOV_S8:2199srcw = (sljit_sw)((sljit_s8)(srcw));2200break;2201case SLJIT_MOV_U16:2202srcw = (sljit_sw)((sljit_u16)(srcw));2203break;2204case SLJIT_MOV_S16:2205srcw = (sljit_sw)((sljit_s16)(srcw));2206break;2207case SLJIT_MOV_U32:2208srcw = (sljit_sw)((sljit_u32)(srcw));2209break;2210case SLJIT_MOV_S32:2211case SLJIT_MOV32:2212srcw = (sljit_sw)((sljit_s32)(srcw));2213break;2214}2215return push_load_imm_inst(compiler, gpr(dst), srcw);2216}2217/* LOAD */2218/* TODO(carenas): avoid reg being defined later */2219#define LEVAL(i) EVAL(i, reg, mem)2220if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {2221sljit_gpr reg = gpr(dst);22222223FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));2224/* TODO(carenas): convert all calls below to LEVAL */2225switch (opcode | (op & SLJIT_32)) {2226case SLJIT_MOV32_U8:2227ins = llc(reg, mem.offset, mem.index, mem.base);2228break;2229case SLJIT_MOV32_S8:2230ins = lb(reg, mem.offset, mem.index, mem.base);2231break;2232case SLJIT_MOV32_U16:2233ins = llh(reg, mem.offset, mem.index, mem.base);2234break;2235case SLJIT_MOV32_S16:2236ins = WHEN2(is_u12(mem.offset), lh, lhy);2237break;2238case SLJIT_MOV32:2239ins = WHEN2(is_u12(mem.offset), l, ly);2240break;2241case SLJIT_MOV_U8:2242ins = LEVAL(llgc);2243break;2244case SLJIT_MOV_S8:2245ins = lgb(reg, mem.offset, mem.index, mem.base);2246break;2247case SLJIT_MOV_U16:2248ins = LEVAL(llgh);2249break;2250case SLJIT_MOV_S16:2251ins = lgh(reg, mem.offset, mem.index, mem.base);2252break;2253case SLJIT_MOV_U32:2254ins = LEVAL(llgf);2255break;2256case SLJIT_MOV_S32:2257ins = lgf(reg, mem.offset, mem.index, mem.base);2258break;2259case SLJIT_MOV_P:2260case SLJIT_MOV:2261ins = lg(reg, mem.offset, mem.index, mem.base);2262break;2263default:2264ins = 0;2265SLJIT_UNREACHABLE();2266break;2267}2268FAIL_IF(push_inst(compiler, ins));2269return SLJIT_SUCCESS;2270}2271/* STORE and STORE IMMEDIATE */2272if ((dst & SLJIT_MEM) && (FAST_IS_REG(src) || src == SLJIT_IMM)) {2273sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;22742275if (src == SLJIT_IMM) {2276/* TODO(mundaym): MOVE IMMEDIATE? */2277FAIL_IF(push_load_imm_inst(compiler, reg, srcw));2278}2279FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));2280switch (opcode) {2281case SLJIT_MOV_U8:2282case SLJIT_MOV_S8:2283return push_inst(compiler,2284WHEN2(is_u12(mem.offset), stc, stcy));2285case SLJIT_MOV_U16:2286case SLJIT_MOV_S16:2287return push_inst(compiler,2288WHEN2(is_u12(mem.offset), sth, sthy));2289case SLJIT_MOV_U32:2290case SLJIT_MOV_S32:2291case SLJIT_MOV32:2292return push_inst(compiler,2293WHEN2(is_u12(mem.offset), st, sty));2294case SLJIT_MOV_P:2295case SLJIT_MOV:2296FAIL_IF(push_inst(compiler, LEVAL(stg)));2297return SLJIT_SUCCESS;2298default:2299SLJIT_UNREACHABLE();2300}2301}2302#undef LEVAL2303/* MOVE CHARACTERS */2304if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {2305FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));2306switch (opcode) {2307case SLJIT_MOV_U8:2308case SLJIT_MOV_S8:2309FAIL_IF(push_inst(compiler,2310EVAL(llgc, tmp0, mem)));2311FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));2312return push_inst(compiler,2313EVAL(stcy, tmp0, mem));2314case SLJIT_MOV_U16:2315case SLJIT_MOV_S16:2316FAIL_IF(push_inst(compiler,2317EVAL(llgh, tmp0, mem)));2318FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));2319return push_inst(compiler,2320EVAL(sthy, tmp0, mem));2321case SLJIT_MOV_U32:2322case SLJIT_MOV_S32:2323case SLJIT_MOV32:2324FAIL_IF(push_inst(compiler,2325EVAL(ly, tmp0, mem)));2326FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));2327return push_inst(compiler,2328EVAL(sty, tmp0, mem));2329case SLJIT_MOV_P:2330case SLJIT_MOV:2331FAIL_IF(push_inst(compiler,2332EVAL(lg, tmp0, mem)));2333FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));2334FAIL_IF(push_inst(compiler,2335EVAL(stg, tmp0, mem)));2336return SLJIT_SUCCESS;2337default:2338SLJIT_UNREACHABLE();2339}2340}2341SLJIT_UNREACHABLE();2342}23432344SLJIT_ASSERT(src != SLJIT_IMM);23452346dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;2347src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;23482349compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);23502351/* TODO(mundaym): optimize loads and stores */2352switch (opcode) {2353case SLJIT_CLZ:2354case SLJIT_CTZ:2355if (src & SLJIT_MEM)2356FAIL_IF(load_unsigned_word(compiler, src_r, src, srcw, op & SLJIT_32));23572358FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r));2359break;2360case SLJIT_REV_U32:2361case SLJIT_REV_S32:2362op |= SLJIT_32;2363SLJIT_FALLTHROUGH2364case SLJIT_REV:2365case SLJIT_REV_U16:2366case SLJIT_REV_S16:2367return sljit_emit_rev(compiler, op, dst, dstw, src, srcw);2368default:2369SLJIT_UNREACHABLE();2370}23712372if (dst & SLJIT_MEM)2373return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32);23742375return SLJIT_SUCCESS;2376}23772378static SLJIT_INLINE int is_commutative(sljit_s32 op)2379{2380switch (GET_OPCODE(op)) {2381case SLJIT_ADD:2382case SLJIT_ADDC:2383case SLJIT_MUL:2384case SLJIT_AND:2385case SLJIT_OR:2386case SLJIT_XOR:2387return 1;2388}2389return 0;2390}23912392static const struct ins_forms add_forms = {23930x1a00, /* ar */23940xb9080000, /* agr */23950xb9f80000, /* ark */23960xb9e80000, /* agrk */23970x5a000000, /* a */23980xe3000000005a, /* ay */23990xe30000000008, /* ag */2400};24012402static const struct ins_forms logical_add_forms = {24030x1e00, /* alr */24040xb90a0000, /* algr */24050xb9fa0000, /* alrk */24060xb9ea0000, /* algrk */24070x5e000000, /* al */24080xe3000000005e, /* aly */24090xe3000000000a, /* alg */2410};24112412static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,2413sljit_s32 dst, sljit_sw dstw,2414sljit_s32 src1, sljit_sw src1w,2415sljit_s32 src2, sljit_sw src2w)2416{2417int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW;2418int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);2419const struct ins_forms *forms;2420sljit_ins ins;24212422if (src2 == SLJIT_IMM) {2423if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {2424if (sets_overflow)2425ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;2426else2427ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;2428return emit_siy(compiler, ins, dst, dstw, src2w);2429}24302431if (is_s16(src2w)) {2432if (sets_overflow)2433ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;2434else2435ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;2436FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w));2437goto done;2438}24392440if (!sets_overflow) {2441if ((op & SLJIT_32) || is_u32(src2w)) {2442ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */;2443FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));2444goto done;2445}2446if (is_u32(-src2w)) {2447FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A));2448goto done;2449}2450}2451else if ((op & SLJIT_32) || is_s32(src2w)) {2452ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;2453FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));2454goto done;2455}2456}24572458forms = sets_overflow ? &add_forms : &logical_add_forms;2459FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w));24602461done:2462if (sets_zero_overflow)2463FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0));24642465if (dst & SLJIT_MEM)2466return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);24672468return SLJIT_SUCCESS;2469}24702471static const struct ins_forms sub_forms = {24720x1b00, /* sr */24730xb9090000, /* sgr */24740xb9f90000, /* srk */24750xb9e90000, /* sgrk */24760x5b000000, /* s */24770xe3000000005b, /* sy */24780xe30000000009, /* sg */2479};24802481static const struct ins_forms logical_sub_forms = {24820x1f00, /* slr */24830xb90b0000, /* slgr */24840xb9fb0000, /* slrk */24850xb9eb0000, /* slgrk */24860x5f000000, /* sl */24870xe3000000005f, /* sly */24880xe3000000000b, /* slg */2489};24902491static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,2492sljit_s32 dst, sljit_sw dstw,2493sljit_s32 src1, sljit_sw src1w,2494sljit_s32 src2, sljit_sw src2w)2495{2496sljit_s32 flag_type = GET_FLAG_TYPE(op);2497int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW);2498int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);2499const struct ins_forms *forms;2500sljit_ins ins;25012502if (dst == TMP_REG2 && flag_type <= SLJIT_SIG_LESS_EQUAL) {2503int compare_signed = flag_type >= SLJIT_SIG_LESS;25042505compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;25062507if (src2 == SLJIT_IMM) {2508if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) {2509if ((op & SLJIT_32) || is_s32(src2w)) {2510ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;2511return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);2512}2513} else if ((op & SLJIT_32) || is_u32(src2w)) {2514ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;2515return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);2516}2517}2518else if (src2 & SLJIT_MEM) {2519if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) {2520ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */;2521return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A);2522}25232524if (compare_signed)2525ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */;2526else2527ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */;2528return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A);2529}25302531if (compare_signed)2532ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;2533else2534ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;2535return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);2536}25372538if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) {2539ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;2540FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w));2541goto done;2542}25432544if (src2 == SLJIT_IMM) {2545sljit_sw neg_src2w = -src2w;25462547if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {2548if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {2549if (sets_signed)2550ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;2551else2552ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;2553return emit_siy(compiler, ins, dst, dstw, neg_src2w);2554}25552556if (is_s16(neg_src2w)) {2557if (sets_signed)2558ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;2559else2560ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;2561FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w));2562goto done;2563}2564}25652566if (!sets_signed) {2567if ((op & SLJIT_32) || is_u32(src2w)) {2568ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */;2569FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));2570goto done;2571}2572if (is_u32(neg_src2w)) {2573FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A));2574goto done;2575}2576}2577else if ((op & SLJIT_32) || is_s32(neg_src2w)) {2578ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;2579FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A));2580goto done;2581}2582}25832584forms = sets_signed ? &sub_forms : &logical_sub_forms;2585FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w));25862587done:2588if (sets_signed) {2589sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;25902591if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) {2592/* In case of overflow, the sign bit of the two source operands must be different, and2593- the first operand is greater if the sign bit of the result is set2594- the first operand is less if the sign bit of the result is not set2595The -result operation sets the corrent sign, because the result cannot be zero.2596The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */2597FAIL_IF(push_inst(compiler, brc(0xe, (op & SLJIT_32) ? (2 + 1) : (2 + 2))));2598FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));2599}2600else if (op & SLJIT_SET_Z)2601FAIL_IF(update_zero_overflow(compiler, op, dst_r));2602}26032604if (dst & SLJIT_MEM)2605return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);26062607return SLJIT_SUCCESS;2608}26092610static const struct ins_forms multiply_forms = {26110xb2520000, /* msr */26120xb90c0000, /* msgr */26130xb9fd0000, /* msrkc */26140xb9ed0000, /* msgrkc */26150x71000000, /* ms */26160xe30000000051, /* msy */26170xe3000000000c, /* msg */2618};26192620static const struct ins_forms multiply_overflow_forms = {26210,26220,26230xb9fd0000, /* msrkc */26240xb9ed0000, /* msgrkc */26250,26260xe30000000053, /* msc */26270xe30000000083, /* msgc */2628};26292630static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op,2631sljit_s32 dst,2632sljit_s32 src1, sljit_sw src1w,2633sljit_s32 src2, sljit_sw src2w)2634{2635sljit_ins ins;26362637if (HAS_FLAGS(op)) {2638/* if have_misc2 fails, this operation should be emulated. 32 bit emulation:2639FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));2640FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));2641if (dst_r != tmp0) {2642FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));2643}2644FAIL_IF(push_inst(compiler, aih(tmp0, 1)));2645FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));2646FAIL_IF(push_inst(compiler, ipm(tmp1)));2647FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */26482649return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);2650}26512652if (src2 == SLJIT_IMM) {2653if (is_s16(src2w)) {2654ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;2655return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);2656}26572658if (is_s32(src2w)) {2659ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */;2660return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A);2661}2662}26632664return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w);2665}26662667static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type,2668sljit_s32 dst,2669sljit_s32 src1, sljit_sw src1w,2670sljit_uw imm, sljit_s32 count16)2671{2672sljit_s32 mode = compiler->mode;2673sljit_gpr dst_r = tmp0;2674sljit_s32 needs_move = 1;26752676if (FAST_IS_REG(dst)) {2677dst_r = gpr(dst & REG_MASK);2678if (dst == src1)2679needs_move = 0;2680}26812682if (needs_move)2683FAIL_IF(emit_move(compiler, dst_r, src1, src1w));26842685if (type == SLJIT_AND) {2686if (!(mode & SLJIT_32))2687FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32)));2688return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff));2689}2690else if (type == SLJIT_OR) {2691if (count16 >= 3) {2692FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)));2693return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));2694}26952696if (count16 >= 2) {2697if ((imm & 0x00000000ffffffffull) == 0)2698return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32));2699if ((imm & 0xffffffff00000000ull) == 0)2700return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));2701}27022703if ((imm & 0xffff000000000000ull) != 0)2704FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48)));2705if ((imm & 0x0000ffff00000000ull) != 0)2706FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff)));2707if ((imm & 0x00000000ffff0000ull) != 0)2708FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff)));2709if ((imm & 0x000000000000ffffull) != 0 || imm == 0)2710return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff));2711return SLJIT_SUCCESS;2712}27132714if ((imm & 0xffffffff00000000ull) != 0)2715FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32)));2716if ((imm & 0x00000000ffffffffull) != 0 || imm == 0)2717return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff));2718return SLJIT_SUCCESS;2719}27202721static const struct ins_forms bitwise_and_forms = {27220x1400, /* nr */27230xb9800000, /* ngr */27240xb9f40000, /* nrk */27250xb9e40000, /* ngrk */27260x54000000, /* n */27270xe30000000054, /* ny */27280xe30000000080, /* ng */2729};27302731static const struct ins_forms bitwise_or_forms = {27320x1600, /* or */27330xb9810000, /* ogr */27340xb9f60000, /* ork */27350xb9e60000, /* ogrk */27360x56000000, /* o */27370xe30000000056, /* oy */27380xe30000000081, /* og */2739};27402741static const struct ins_forms bitwise_xor_forms = {27420x1700, /* xr */27430xb9820000, /* xgr */27440xb9f70000, /* xrk */27450xb9e70000, /* xgrk */27460x57000000, /* x */27470xe30000000057, /* xy */27480xe30000000082, /* xg */2749};27502751static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op,2752sljit_s32 dst,2753sljit_s32 src1, sljit_sw src1w,2754sljit_s32 src2, sljit_sw src2w)2755{2756sljit_s32 type = GET_OPCODE(op);2757const struct ins_forms *forms;27582759if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == TMP_REG2))) {2760sljit_s32 count16 = 0;2761sljit_uw imm = (sljit_uw)src2w;27622763if (op & SLJIT_32)2764imm &= 0xffffffffull;27652766if ((imm & 0x000000000000ffffull) != 0 || imm == 0)2767count16++;2768if ((imm & 0x00000000ffff0000ull) != 0)2769count16++;2770if ((imm & 0x0000ffff00000000ull) != 0)2771count16++;2772if ((imm & 0xffff000000000000ull) != 0)2773count16++;27742775if (type == SLJIT_AND && dst == TMP_REG2 && count16 == 1) {2776sljit_gpr src_r = tmp1;27772778if (FAST_IS_REG(src1))2779src_r = gpr(src1 & REG_MASK);2780else2781FAIL_IF(emit_move(compiler, tmp1, src1, src1w));27822783if ((imm & 0x000000000000ffffull) != 0 || imm == 0)2784return push_inst(compiler, 0xa7010000 /* tmll */ | R20A(src_r) | imm);2785if ((imm & 0x00000000ffff0000ull) != 0)2786return push_inst(compiler, 0xa7000000 /* tmlh */ | R20A(src_r) | (imm >> 16));2787if ((imm & 0x0000ffff00000000ull) != 0)2788return push_inst(compiler, 0xa7030000 /* tmhl */ | R20A(src_r) | (imm >> 32));2789return push_inst(compiler, 0xa7020000 /* tmhh */ | R20A(src_r) | (imm >> 48));2790}27912792if (!(op & SLJIT_SET_Z))2793return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16);2794}27952796if (type == SLJIT_AND)2797forms = &bitwise_and_forms;2798else if (type == SLJIT_OR)2799forms = &bitwise_or_forms;2800else2801forms = &bitwise_xor_forms;28022803return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w);2804}28052806static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,2807sljit_s32 dst,2808sljit_s32 src1, sljit_sw src1w,2809sljit_s32 src2, sljit_sw src2w)2810{2811sljit_s32 type = GET_OPCODE(op);2812sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;2813sljit_gpr src_r = tmp0;2814sljit_gpr base_r = tmp0;2815sljit_ins imm = 0;2816sljit_ins ins;28172818if (FAST_IS_REG(src1))2819src_r = gpr(src1);2820else2821FAIL_IF(emit_move(compiler, tmp0, src1, src1w));28222823if (src2 != SLJIT_IMM) {2824if (FAST_IS_REG(src2))2825base_r = gpr(src2);2826else {2827FAIL_IF(emit_move(compiler, tmp1, src2, src2w));2828base_r = tmp1;2829}28302831if ((op & SLJIT_32) && (type == SLJIT_MSHL || type == SLJIT_MLSHR || type == SLJIT_MASHR)) {2832if (base_r != tmp1) {2833FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(base_r) | (59 << 24) | (1 << 23) | (63 << 16)));2834base_r = tmp1;2835} else2836FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));2837}2838} else2839imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));28402841if ((op & SLJIT_32) && dst_r == src_r) {2842if (type == SLJIT_SHL || type == SLJIT_MSHL)2843ins = 0x89000000 /* sll */;2844else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)2845ins = 0x88000000 /* srl */;2846else2847ins = 0x8a000000 /* sra */;28482849FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));2850} else {2851if (type == SLJIT_SHL || type == SLJIT_MSHL)2852ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;2853else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)2854ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;2855else2856ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;28572858FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)));2859}28602861if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR)2862return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r));28632864return SLJIT_SUCCESS;2865}28662867static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op,2868sljit_s32 dst,2869sljit_s32 src1, sljit_sw src1w,2870sljit_s32 src2, sljit_sw src2w)2871{2872sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;2873sljit_gpr src_r = tmp0;2874sljit_gpr base_r = tmp0;2875sljit_ins imm = 0;2876sljit_ins ins;28772878if (FAST_IS_REG(src1))2879src_r = gpr(src1);2880else2881FAIL_IF(emit_move(compiler, tmp0, src1, src1w));28822883if (src2 != SLJIT_IMM) {2884if (FAST_IS_REG(src2))2885base_r = gpr(src2);2886else {2887FAIL_IF(emit_move(compiler, tmp1, src2, src2w));2888base_r = tmp1;2889}2890}28912892if (GET_OPCODE(op) == SLJIT_ROTR) {2893if (src2 != SLJIT_IMM) {2894ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;2895FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));2896base_r = tmp1;2897} else2898src2w = -src2w;2899}29002901if (src2 == SLJIT_IMM)2902imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));29032904ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;2905return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16));2906}29072908static const struct ins_forms addc_forms = {29090xb9980000, /* alcr */29100xb9880000, /* alcgr */29110,29120,29130,29140xe30000000098, /* alc */29150xe30000000088, /* alcg */2916};29172918static const struct ins_forms subc_forms = {29190xb9990000, /* slbr */29200xb9890000, /* slbgr */29210,29220,29230,29240xe30000000099, /* slb */29250xe30000000089, /* slbg */2926};29272928SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,2929sljit_s32 dst, sljit_sw dstw,2930sljit_s32 src1, sljit_sw src1w,2931sljit_s32 src2, sljit_sw src2w)2932{2933CHECK_ERROR();2934CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));2935ADJUST_LOCAL_OFFSET(dst, dstw);2936ADJUST_LOCAL_OFFSET(src1, src1w);2937ADJUST_LOCAL_OFFSET(src2, src2w);29382939compiler->mode = op & SLJIT_32;2940compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);29412942if (is_commutative(op) && src1 == SLJIT_IMM && src2 != SLJIT_IMM) {2943src1 ^= src2;2944src2 ^= src1;2945src1 ^= src2;29462947src1w ^= src2w;2948src2w ^= src1w;2949src1w ^= src2w;2950}29512952switch (GET_OPCODE(op)) {2953case SLJIT_ADD:2954compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;2955return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);2956case SLJIT_ADDC:2957compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;2958FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w));2959if (dst & SLJIT_MEM)2960return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);2961return SLJIT_SUCCESS;2962case SLJIT_SUB:2963compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;2964return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);2965case SLJIT_SUBC:2966compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;2967FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w));2968if (dst & SLJIT_MEM)2969return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);2970return SLJIT_SUCCESS;2971case SLJIT_MUL:2972FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w));2973break;2974case SLJIT_AND:2975case SLJIT_OR:2976case SLJIT_XOR:2977FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));2978break;2979case SLJIT_SHL:2980case SLJIT_MSHL:2981case SLJIT_LSHR:2982case SLJIT_MLSHR:2983case SLJIT_ASHR:2984case SLJIT_MASHR:2985FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));2986break;2987case SLJIT_ROTL:2988case SLJIT_ROTR:2989FAIL_IF(sljit_emit_rotate(compiler, op, dst, src1, src1w, src2, src2w));2990break;2991}29922993if (dst & SLJIT_MEM)2994return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);2995return SLJIT_SUCCESS;2996}29972998SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,2999sljit_s32 src1, sljit_sw src1w,3000sljit_s32 src2, sljit_sw src2w)3001{3002sljit_s32 dst_reg = (GET_OPCODE(op) == SLJIT_SUB || GET_OPCODE(op) == SLJIT_AND) ? TMP_REG2 : TMP_REG1;30033004CHECK_ERROR();3005CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));30063007SLJIT_SKIP_CHECKS(compiler);3008return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w);3009}30103011SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,3012sljit_s32 dst_reg,3013sljit_s32 src1, sljit_sw src1w,3014sljit_s32 src2, sljit_sw src2w)3015{3016CHECK_ERROR();3017CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));30183019switch (GET_OPCODE(op)) {3020case SLJIT_MULADD:3021SLJIT_SKIP_CHECKS(compiler);3022FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), 0 /* tmp0 */, 0, src1, src1w, src2, src2w));3023return push_inst(compiler, ((op & SLJIT_32) ? 0x1a00 /* ar */ : 0xb9080000 /* agr */) | R4A(gpr(dst_reg)) | R0A(tmp0));3024}30253026return SLJIT_SUCCESS;3027}30283029SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,3030sljit_s32 dst_reg,3031sljit_s32 src1_reg,3032sljit_s32 src2_reg,3033sljit_s32 src3, sljit_sw src3w)3034{3035sljit_s32 is_right;3036sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;3037sljit_gpr dst_r = gpr(dst_reg);3038sljit_gpr src1_r = gpr(src1_reg);3039sljit_gpr src2_r = gpr(src2_reg);3040sljit_gpr src3_r = tmp1;3041sljit_ins ins;30423043CHECK_ERROR();3044CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));30453046is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);30473048if (src1_reg == src2_reg) {3049SLJIT_SKIP_CHECKS(compiler);3050return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);3051}30523053ADJUST_LOCAL_OFFSET(src3, src3w);30543055if (src3 == SLJIT_IMM) {3056src3w &= bit_length - 1;30573058if (src3w == 0)3059return SLJIT_SUCCESS;30603061if (op & SLJIT_32) {3062if (dst_r == src1_r) {3063ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;3064FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | (sljit_ins)src3w));3065} else {3066ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;3067FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));3068}3069} else {3070ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;3071FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));3072}30733074ins = 0xec0000000055 /* risbg */;30753076if (is_right) {3077src3w = bit_length - src3w;3078ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src3w) << 16) | ((sljit_ins)src3w << 8);3079} else3080ins |= ((sljit_ins)(64 - src3w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)(src3w + 64 - bit_length) << 8);30813082return push_inst(compiler, ins | R36A(dst_r) | R32A(src2_r));3083}30843085if (!(src3 & SLJIT_MEM)) {3086src3_r = gpr(src3);30873088if (dst_r == src3_r) {3089FAIL_IF(push_inst(compiler, 0x1800 /* lr */ | R4A(tmp1) | R0A(src3_r)));3090src3_r = tmp1;3091}3092} else3093FAIL_IF(load_word(compiler, tmp1, src3, src3w, op & SLJIT_32));30943095if (op & SLJIT_32) {3096if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {3097if (src3_r != tmp1) {3098FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src3_r) | (59 << 24) | (1 << 23) | (63 << 16)));3099src3_r = tmp1;3100} else3101FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));3102}31033104if (dst_r == src1_r) {3105ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;3106FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(src3_r)));3107} else {3108ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;3109FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));3110}31113112if (src3_r != tmp1) {3113FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f));3114FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src3_r)));3115} else3116FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f));31173118ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;3119FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1) | (0x1 << 16)));31203121return push_inst(compiler, 0x1600 /* or */ | R4A(dst_r) | R0A(tmp0));3122}31233124ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;3125FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));31263127ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */;31283129if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {3130if (src3_r != tmp1)3131FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f));31323133FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | (0x1 << 16)));3134src2_r = tmp0;31353136if (src3_r != tmp1)3137FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src3_r)));3138else3139FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f));3140} else3141FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src3_r)));31423143FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1)));3144return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(dst_r) | R0A(tmp0));3145}31463147SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2_shift(struct sljit_compiler *compiler, sljit_s32 op,3148sljit_s32 dst, sljit_sw dstw,3149sljit_s32 src1, sljit_sw src1w,3150sljit_s32 src2, sljit_sw src2w,3151sljit_sw shift_arg)3152{3153sljit_gpr dst_r, tmp_r, src_r;3154struct addr addr;31553156CHECK_ERROR();3157CHECK(check_sljit_emit_op2_shift(compiler, op, dst, dstw, src1, src1w, src2, src2w, shift_arg));3158ADJUST_LOCAL_OFFSET(dst, dstw);3159ADJUST_LOCAL_OFFSET(src1, src1w);3160ADJUST_LOCAL_OFFSET(src2, src2w);31613162shift_arg &= 0x3f;31633164if (src2 == SLJIT_IMM) {3165src2w = src2w << shift_arg;3166shift_arg = 0;3167}31683169if (shift_arg == 0) {3170SLJIT_SKIP_CHECKS(compiler);3171return sljit_emit_op2(compiler, GET_OPCODE(op), dst, dstw, src1, src1w, src2, src2w);3172}31733174tmp_r = FAST_IS_REG(dst) && (dst != src1) ? gpr(dst) : tmp0;31753176if (src2 & SLJIT_MEM) {3177FAIL_IF(load_word(compiler, tmp_r, src2, src2w, 0 /* 64-bit */));3178src_r = tmp_r;3179} else {3180src_r = gpr(src2);3181}31823183FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp_r) | R32A(src_r) | ((sljit_ins)shift_arg << 16)));31843185if (src1 & SLJIT_MEM) {3186FAIL_IF(make_addr_bxy(compiler, &addr, src1, src1w, tmp1));3187FAIL_IF(push_inst(compiler, 0xe30000000008 /* ag */ | R36A(tmp_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)));3188src_r = tmp_r;3189} else if (src1 == SLJIT_IMM) {3190if (is_s32(src1w)) {3191FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp_r) | (sljit_u32)src1w));3192src_r = tmp_r;3193} else {3194src_r = tmp_r != tmp0 ? tmp0 : tmp1;3195FAIL_IF(push_load_imm_inst(compiler, src_r, src1w));3196}3197} else {3198src_r = gpr(src1);3199}32003201dst_r = (FAST_IS_REG(dst) ? gpr(dst) : tmp0);32023203if (src_r != tmp_r) {3204if (src_r == dst_r) {3205FAIL_IF(push_inst(compiler, 0xb9080000 /* agr */ | R4A(dst_r) | R0A(tmp_r)));3206} else {3207FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(tmp_r) | R4A(dst_r) | R0A(src_r)));3208}3209}32103211if (dst & SLJIT_MEM)3212return store_word(compiler, dst_r, dst, dstw, 0 /* 64-bit */);3213return SLJIT_SUCCESS;3214}32153216SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,3217sljit_s32 src, sljit_sw srcw)3218{3219sljit_gpr src_r;3220struct addr addr;32213222CHECK_ERROR();3223CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));3224ADJUST_LOCAL_OFFSET(src, srcw);32253226switch (op) {3227case SLJIT_FAST_RETURN:3228if (FAST_IS_REG(src)) {3229src_r = gpr(src);3230if (src_r != link_r)3231FAIL_IF(push_inst(compiler, lgr(link_r, src_r)));3232} else3233FAIL_IF(load_word(compiler, link_r, src, srcw, 0));32343235return push_inst(compiler, br(link_r));3236case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:3237return SLJIT_SUCCESS;3238case SLJIT_PREFETCH_L1:3239case SLJIT_PREFETCH_L2:3240case SLJIT_PREFETCH_L3:3241case SLJIT_PREFETCH_ONCE:3242FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));3243return push_inst(compiler, 0xe31000000036 /* pfd */ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));3244default:3245return SLJIT_SUCCESS;3246}32473248return SLJIT_SUCCESS;3249}32503251SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,3252sljit_s32 dst, sljit_sw dstw)3253{3254sljit_gpr dst_r = link_r;3255sljit_s32 size;32563257CHECK_ERROR();3258CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));3259ADJUST_LOCAL_OFFSET(dst, dstw);32603261switch (op) {3262case SLJIT_FAST_ENTER:3263if (FAST_IS_REG(dst)) {3264dst_r = gpr(dst);32653266if (dst_r == link_r)3267return SLJIT_SUCCESS;3268return push_inst(compiler, lgr(dst_r, link_r));3269}3270break;3271case SLJIT_GET_RETURN_ADDRESS:3272dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;32733274size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 2);3275FAIL_IF(load_word(compiler, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, 0));3276break;3277}32783279if (dst & SLJIT_MEM)3280return store_word(compiler, dst_r, dst, dstw, 0);32813282return SLJIT_SUCCESS;3283}32843285SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)3286{3287CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));32883289if (type == SLJIT_GP_REGISTER)3290return (sljit_s32)gpr(reg);32913292if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128)3293return -1;32943295return (sljit_s32)freg_map[reg];3296}32973298SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,3299void *instruction, sljit_u32 size)3300{3301sljit_ins ins = 0;33023303CHECK_ERROR();3304CHECK(check_sljit_emit_op_custom(compiler, instruction, size));33053306memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);3307return push_inst(compiler, ins);3308}33093310/* --------------------------------------------------------------------- */3311/* Floating point operators */3312/* --------------------------------------------------------------------- */33133314#define FLOAT_LOAD 03315#define FLOAT_STORE 133163317static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op,3318sljit_s32 reg,3319sljit_s32 mem, sljit_sw memw)3320{3321struct addr addr;3322sljit_ins ins;33233324SLJIT_ASSERT(mem & SLJIT_MEM);33253326if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) {3327FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));33283329if (op & FLOAT_STORE)3330ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */;3331else3332ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */;33333334return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);3335}33363337FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));33383339if (op & FLOAT_STORE)3340ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */;3341else3342ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */;33433344return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));3345}33463347static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins,3348sljit_s32 reg,3349sljit_s32 src, sljit_sw srcw)3350{3351struct addr addr;33523353if (!(src & SLJIT_MEM))3354return push_inst(compiler, ins_r | F4(reg) | F0(src));33553356FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));3357return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16));3358}33593360static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,3361sljit_s32 dst, sljit_sw dstw,3362sljit_s32 src, sljit_sw srcw)3363{3364sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;3365sljit_ins ins;33663367if (src & SLJIT_MEM) {3368FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw));3369src = TMP_FREG1;3370}33713372/* M3 is set to 5 */3373if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)3374ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */;3375else3376ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */;33773378FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src)));33793380if (dst & SLJIT_MEM)3381return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64);33823383return SLJIT_SUCCESS;3384}33853386static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,3387sljit_s32 dst, sljit_sw dstw,3388sljit_s32 src, sljit_sw srcw)3389{3390sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;33913392if (src == SLJIT_IMM) {3393FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));3394src = (sljit_s32)tmp0;3395}3396else if (src & SLJIT_MEM) {3397FAIL_IF(load_word(compiler, tmp0, src, srcw, ins & 0x100000));3398src = (sljit_s32)tmp0;3399}34003401FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));34023403if (dst & SLJIT_MEM)3404return float_mem(compiler, FLOAT_STORE | ((ins & 0x10000) ? 0 : SLJIT_32), TMP_FREG1, dst, dstw);34053406return SLJIT_SUCCESS;3407}34083409static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,3410sljit_s32 dst, sljit_sw dstw,3411sljit_s32 src, sljit_sw srcw)3412{3413sljit_ins ins;34143415if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)3416srcw = (sljit_s32)srcw;34173418if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)3419ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;3420else3421ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;34223423return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);3424}34253426static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,3427sljit_s32 dst, sljit_sw dstw,3428sljit_s32 src, sljit_sw srcw)3429{3430sljit_ins ins;34313432if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)3433srcw = (sljit_u32)srcw;34343435if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW)3436ins = (op & SLJIT_32) ? 0xb3a00000 /* celgbr */ : 0xb3a10000 /* cdlgbr */;3437else3438ins = (op & SLJIT_32) ? 0xb3900000 /* celfbr */ : 0xb3910000 /* cdlfbr */;34393440return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);3441}34423443static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,3444sljit_s32 src1, sljit_sw src1w,3445sljit_s32 src2, sljit_sw src2w)3446{3447sljit_ins ins_r, ins;34483449if (src1 & SLJIT_MEM) {3450FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w));3451src1 = TMP_FREG1;3452}34533454if (op & SLJIT_32) {3455ins_r = 0xb3090000 /* cebr */;3456ins = 0xed0000000009 /* ceb */;3457} else {3458ins_r = 0xb3190000 /* cdbr */;3459ins = 0xed0000000019 /* cdb */;3460}34613462return emit_float(compiler, ins_r, ins, src1, src2, src2w);3463}34643465SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,3466sljit_s32 dst, sljit_sw dstw,3467sljit_s32 src, sljit_sw srcw)3468{3469sljit_s32 dst_r;3470sljit_ins ins;34713472CHECK_ERROR();34733474SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);34753476dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;34773478if (op == SLJIT_CONV_F64_FROM_F32)3479FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw));3480else {3481if (src & SLJIT_MEM) {3482FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw));3483src = dst_r;3484}34853486switch (GET_OPCODE(op)) {3487case SLJIT_MOV_F64:3488if (FAST_IS_REG(dst)) {3489if (dst == src)3490return SLJIT_SUCCESS;34913492ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;3493break;3494}3495return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw);3496case SLJIT_CONV_F64_FROM_F32:3497/* Only SLJIT_CONV_F32_FROM_F64. */3498ins = 0xb3440000 /* ledbr */;3499break;3500case SLJIT_NEG_F64:3501ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */;3502break;3503default:3504SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64);3505ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */;3506break;3507}35083509FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src)));3510}35113512if (dst & SLJIT_MEM)3513return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);35143515return SLJIT_SUCCESS;3516}35173518#define FLOAT_MOV(op, dst_r, src_r) \3519(((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r))35203521SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,3522sljit_s32 dst, sljit_sw dstw,3523sljit_s32 src1, sljit_sw src1w,3524sljit_s32 src2, sljit_sw src2w)3525{3526sljit_s32 dst_r = TMP_FREG1;3527sljit_ins ins_r, ins;35283529CHECK_ERROR();3530CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));3531ADJUST_LOCAL_OFFSET(dst, dstw);3532ADJUST_LOCAL_OFFSET(src1, src1w);3533ADJUST_LOCAL_OFFSET(src2, src2w);35343535do {3536if (FAST_IS_REG(dst)) {3537dst_r = dst;35383539if (dst == src1)3540break;35413542if (dst == src2) {3543if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) {3544src2 = src1;3545src2w = src1w;3546src1 = dst;3547break;3548}35493550FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2)));3551src2 = TMP_FREG1;3552}3553}35543555if (src1 & SLJIT_MEM)3556FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w));3557else3558FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1)));3559} while (0);35603561switch (GET_OPCODE(op)) {3562case SLJIT_ADD_F64:3563ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */;3564ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */;3565break;3566case SLJIT_SUB_F64:3567ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */;3568ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */;3569break;3570case SLJIT_MUL_F64:3571ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */;3572ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */;3573break;3574default:3575SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64);3576ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */;3577ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */;3578break;3579}35803581FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w));35823583if (dst & SLJIT_MEM)3584return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);35853586return SLJIT_SUCCESS;3587}35883589SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,3590sljit_s32 dst_freg,3591sljit_s32 src1, sljit_sw src1w,3592sljit_s32 src2, sljit_sw src2w)3593{3594sljit_s32 reg;35953596CHECK_ERROR();3597CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));3598ADJUST_LOCAL_OFFSET(src1, src1w);3599ADJUST_LOCAL_OFFSET(src2, src2w);36003601if (src2 & SLJIT_MEM) {3602FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src2, src2w));3603src2 = TMP_FREG1;3604}36053606if (src1 & SLJIT_MEM) {3607reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;3608FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), reg, src1, src1w));3609src1 = reg;3610}36113612return push_inst(compiler, 0xb3720000 /* cpsdr */ | F12(src2) | F4(dst_freg) | F0(src1));3613}36143615SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,3616sljit_s32 freg, sljit_f32 value)3617{3618union {3619sljit_s32 imm;3620sljit_f32 value;3621} u;36223623CHECK_ERROR();3624CHECK(check_sljit_emit_fset32(compiler, freg, value));36253626u.value = value;36273628FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)(((sljit_uw)u.imm << 32))));3629return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));3630}36313632SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,3633sljit_s32 freg, sljit_f64 value)3634{3635union {3636sljit_sw imm;3637sljit_f64 value;3638} u;36393640CHECK_ERROR();3641CHECK(check_sljit_emit_fset64(compiler, freg, value));36423643u.value = value;36443645FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)u.imm));3646return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));3647}36483649SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,3650sljit_s32 freg, sljit_s32 reg)3651{3652sljit_gpr gen_r;36533654CHECK_ERROR();3655CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));36563657gen_r = gpr(reg);36583659if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {3660if (op & SLJIT_32) {3661FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(gen_r) | (32 << 16)));3662gen_r = tmp0;3663}36643665return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(gen_r));3666}36673668FAIL_IF(push_inst(compiler, 0xb3cd0000 /* lgdr */ | R4A(gen_r) | F0(freg)));36693670if (!(op & SLJIT_32))3671return SLJIT_SUCCESS;36723673return push_inst(compiler, 0xeb000000000c /* srlg */ | R36A(gen_r) | R32A(gen_r) | (32 << 16));3674}36753676/* --------------------------------------------------------------------- */3677/* Conditional instructions */3678/* --------------------------------------------------------------------- */36793680SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)3681{3682struct sljit_label *label;36833684CHECK_ERROR_PTR();3685CHECK_PTR(check_sljit_emit_label(compiler));36863687if (compiler->last_label && compiler->last_label->size == compiler->size)3688return compiler->last_label;36893690label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));3691PTR_FAIL_IF(!label);3692set_label(label, compiler);3693return label;3694}36953696SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_aligned_label(struct sljit_compiler *compiler,3697sljit_s32 alignment, struct sljit_read_only_buffer *buffers)3698{3699sljit_uw mask, i;3700struct sljit_label *label;3701struct sljit_label *next_label;3702struct sljit_extended_label *ext_label;37033704CHECK_ERROR_PTR();3705CHECK_PTR(check_sljit_emit_aligned_label(compiler, alignment, buffers));37063707sljit_reset_read_only_buffers(buffers);37083709if (alignment <= SLJIT_LABEL_ALIGN_2) {3710SLJIT_SKIP_CHECKS(compiler);3711label = sljit_emit_label(compiler);3712PTR_FAIL_IF(!label);3713} else {3714/* The used space is filled with NOPs. */3715mask = ((sljit_uw)1 << alignment) - sizeof(sljit_u16);37163717for (i = (mask >> 1); i != 0; i--)3718PTR_FAIL_IF(push_inst(compiler, 0x0700 /* 2-byte nop */));37193720ext_label = (struct sljit_extended_label*)ensure_abuf(compiler, sizeof(struct sljit_extended_label));3721PTR_FAIL_IF(!ext_label);3722set_extended_label(ext_label, compiler, SLJIT_LABEL_ALIGNED, mask);3723label = &ext_label->label;3724}37253726if (buffers == NULL)3727return label;37283729next_label = label;37303731while (1) {3732buffers->u.label = next_label;37333734for (i = (buffers->size + 1) >> 1; i > 0; i--)3735PTR_FAIL_IF(push_inst(compiler, 0x0700 /* 2-byte nop */));37363737buffers = buffers->next;37383739if (buffers == NULL)3740break;37413742SLJIT_SKIP_CHECKS(compiler);3743next_label = sljit_emit_label(compiler);3744PTR_FAIL_IF(!next_label);3745}37463747return label;3748}37493750SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)3751{3752struct sljit_jump *jump;3753sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;37543755CHECK_ERROR_PTR();3756CHECK_PTR(check_sljit_emit_jump(compiler, type));37573758/* record jump */3759jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));3760PTR_FAIL_IF(!jump);3761set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);3762jump->addr = compiler->size;37633764/* emit jump instruction */3765type &= 0xff;3766if (type >= SLJIT_FAST_CALL)3767PTR_FAIL_IF(push_inst(compiler, brasl(link_r, 0)));3768else3769PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));37703771return jump;3772}37733774SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,3775sljit_s32 arg_types)3776{3777SLJIT_UNUSED_ARG(arg_types);3778CHECK_ERROR_PTR();3779CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));37803781if (type & SLJIT_CALL_RETURN) {3782PTR_FAIL_IF(emit_stack_frame_release(compiler, r14));3783type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);3784}37853786SLJIT_SKIP_CHECKS(compiler);3787return sljit_emit_jump(compiler, type);3788}37893790SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)3791{3792struct sljit_jump *jump;3793sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;37943795CHECK_ERROR();3796CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));37973798if (src != SLJIT_IMM) {3799if (src & SLJIT_MEM) {3800ADJUST_LOCAL_OFFSET(src, srcw);3801FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */));3802}38033804/* emit jump instruction */3805if (type >= SLJIT_FAST_CALL)3806return push_inst(compiler, basr(link_r, src_r));38073808return push_inst(compiler, br(src_r));3809}38103811jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));3812FAIL_IF(!jump);3813set_jump(jump, compiler, JUMP_ADDR);3814jump->addr = compiler->size;3815jump->u.target = (sljit_uw)srcw;38163817type &= 0xff;3818if (type >= SLJIT_FAST_CALL)3819return push_inst(compiler, brasl(link_r, 0));38203821return push_inst(compiler, brcl(0xf, 0));3822}38233824SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,3825sljit_s32 arg_types,3826sljit_s32 src, sljit_sw srcw)3827{3828SLJIT_UNUSED_ARG(arg_types);38293830CHECK_ERROR();3831CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));38323833SLJIT_ASSERT(gpr(TMP_REG2) == tmp1);38343835if (src & SLJIT_MEM) {3836ADJUST_LOCAL_OFFSET(src, srcw);3837FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));3838src = TMP_REG2;3839srcw = 0;3840}38413842if (type & SLJIT_CALL_RETURN) {3843if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {3844FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));3845src = TMP_REG2;3846srcw = 0;3847}38483849FAIL_IF(emit_stack_frame_release(compiler, r14));3850type = SLJIT_JUMP;3851}38523853SLJIT_SKIP_CHECKS(compiler);3854return sljit_emit_ijump(compiler, type, src, srcw);3855}38563857SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,3858sljit_s32 dst, sljit_sw dstw,3859sljit_s32 type)3860{3861sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;3862sljit_gpr loc_r = tmp1;3863sljit_u8 mask = get_cc(compiler, type);38643865CHECK_ERROR();3866CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));38673868switch (GET_OPCODE(op)) {3869case SLJIT_AND:3870case SLJIT_OR:3871case SLJIT_XOR:3872compiler->status_flags_state = op & SLJIT_SET_Z;38733874/* dst is also source operand */3875if (dst & SLJIT_MEM)3876FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32));38773878break;3879case SLJIT_MOV32:3880op |= SLJIT_32;3881SLJIT_FALLTHROUGH3882case SLJIT_MOV:3883/* can write straight into destination */3884loc_r = dst_r;3885break;3886default:3887SLJIT_UNREACHABLE();3888}38893890/* TODO(mundaym): fold into cmov helper function? */3891#define LEVAL(i) i(loc_r, 1, mask)3892if (have_lscond2()) {3893FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));3894FAIL_IF(push_inst(compiler,3895WHEN2(op & SLJIT_32, lochi, locghi)));3896} else {3897FAIL_IF(push_load_imm_inst(compiler, loc_r, 1));3898FAIL_IF(push_inst(compiler, brc(mask, 2 + 2)));3899FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));3900}3901#undef LEVAL39023903/* apply bitwise op and set condition codes */3904switch (GET_OPCODE(op)) {3905#define LEVAL(i) i(dst_r, loc_r)3906case SLJIT_AND:3907FAIL_IF(push_inst(compiler,3908WHEN2(op & SLJIT_32, nr, ngr)));3909break;3910case SLJIT_OR:3911FAIL_IF(push_inst(compiler,3912WHEN2(op & SLJIT_32, or, ogr)));3913break;3914case SLJIT_XOR:3915FAIL_IF(push_inst(compiler,3916WHEN2(op & SLJIT_32, xr, xgr)));3917break;3918#undef LEVAL3919}39203921/* store result to memory if required */3922if (dst & SLJIT_MEM)3923return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32));39243925return SLJIT_SUCCESS;3926}39273928SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,3929sljit_s32 dst_reg,3930sljit_s32 src1, sljit_sw src1w,3931sljit_s32 src2_reg)3932{3933sljit_ins mask;3934sljit_gpr src_r;3935sljit_gpr dst_r = gpr(dst_reg);3936sljit_s32 is_32bit = (type & SLJIT_32) != 0;3937sljit_ins ins;39383939CHECK_ERROR();3940CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));39413942ADJUST_LOCAL_OFFSET(src1, src1w);39433944type &= ~SLJIT_32;3945if (src1 == SLJIT_IMM && is_32bit)3946src1w = (sljit_s32)src1w;39473948if (type & SLJIT_COMPARE_SELECT) {3949type ^= SLJIT_COMPARE_SELECT;3950compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE;39513952if (src1 & SLJIT_MEM) {3953FAIL_IF(load_word(compiler, tmp0, src1, src1w, is_32bit));3954src1 = TMP_REG1;3955src1w = 0;3956} else if (src1 == SLJIT_IMM) {3957if (type >= SLJIT_LESS && type <= SLJIT_LESS_EQUAL && src1w >= 0 && src1w <= 0x7fff) {3958ins = is_32bit ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;3959FAIL_IF(push_inst(compiler, ins | R36A(gpr(src2_reg)) | (sljit_ins)src1w));3960type ^= 0x1;3961} else if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL && is_s20(src1w)) {3962ins = is_32bit ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;3963FAIL_IF(push_inst(compiler, ins | R36A(gpr(src2_reg)) | ((sljit_ins)src1w & 0xffffffff)));3964type ^= 0x1;3965} else {3966FAIL_IF(push_load_imm_inst(compiler, tmp0, src1w));3967src1 = TMP_REG1;3968src1w = 0;3969}3970}39713972if (FAST_IS_REG(src1)) {3973if (type >= SLJIT_LESS && type <= SLJIT_LESS_EQUAL)3974ins = is_32bit ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;3975else3976ins = is_32bit ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;3977FAIL_IF(push_inst(compiler, ins | R4A(gpr(src1)) | R0A(gpr(src2_reg))));3978}3979}39803981if (dst_reg != src2_reg) {3982if (src1 == dst_reg) {3983src1 = src2_reg;3984src1w = 0;3985type ^= 0x1;3986} else {3987if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {3988FAIL_IF(load_word(compiler, dst_r, src1, src1w, is_32bit));3989src1 = src2_reg;3990src1w = 0;3991type ^= 0x1;3992} else3993FAIL_IF(push_inst(compiler, (is_32bit ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(gpr(src2_reg))));3994}3995}39963997mask = get_cc(compiler, type);39983999if (src1 & SLJIT_MEM) {4000if (src1 & OFFS_REG_MASK) {4001src_r = gpr(OFFS_REG(src1));40024003if (src1w != 0) {4004FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(src_r) | ((sljit_ins)(src1w & 0x3) << 16)));4005src_r = tmp1;4006}40074008FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(src_r) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));4009src_r = tmp1;4010src1w = 0;4011} else if (!is_s20(src1w)) {4012FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));40134014if (src1 & REG_MASK)4015FAIL_IF(push_inst(compiler, 0xb9080000 /* agr */ | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));40164017src_r = tmp1;4018src1w = 0;4019} else4020src_r = gpr(src1 & REG_MASK);40214022ins = is_32bit ? 0xeb00000000f2 /* loc */ : 0xeb00000000e2 /* locg */;4023return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | R28A(src_r) | disp_s20((sljit_s32)src1w));4024}40254026if (src1 == SLJIT_IMM) {4027if (have_lscond2() && is_s16(src1w)) {4028ins = is_32bit ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;4029return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | (sljit_ins)(src1w & 0xffff) << 16);4030}40314032FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));4033src_r = tmp1;4034} else4035src_r = gpr(src1);40364037ins = is_32bit ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;4038return push_inst(compiler, ins | (mask << 12) | R4A(dst_r) | R0A(src_r));4039}40404041SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,4042sljit_s32 dst_freg,4043sljit_s32 src1, sljit_sw src1w,4044sljit_s32 src2_freg)4045{4046sljit_ins ins;4047struct sljit_label *label;4048struct sljit_jump *jump;40494050CHECK_ERROR();4051CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));40524053ADJUST_LOCAL_OFFSET(src1, src1w);40544055if (dst_freg != src2_freg) {4056if (dst_freg == src1) {4057src1 = src2_freg;4058src1w = 0;4059type ^= 0x1;4060} else {4061ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;4062FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src2_freg)));4063}4064}40654066SLJIT_SKIP_CHECKS(compiler);4067jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1);4068FAIL_IF(!jump);40694070if (!(src1 & SLJIT_MEM)) {4071ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;4072FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src1)));4073} else4074FAIL_IF(float_mem(compiler, FLOAT_LOAD | (type & SLJIT_32), dst_freg, src1, src1w));40754076SLJIT_SKIP_CHECKS(compiler);4077label = sljit_emit_label(compiler);4078FAIL_IF(!label);40794080sljit_set_label(jump, label);4081return SLJIT_SUCCESS;4082}40834084SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,4085sljit_s32 reg,4086sljit_s32 mem, sljit_sw memw)4087{4088sljit_ins ins, reg1, reg2, base, offs = 0;40894090CHECK_ERROR();4091CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));40924093if (!(reg & REG_PAIR_MASK))4094return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);40954096ADJUST_LOCAL_OFFSET(mem, memw);40974098base = gpr(mem & REG_MASK);4099reg1 = gpr(REG_PAIR_FIRST(reg));4100reg2 = gpr(REG_PAIR_SECOND(reg));41014102if (mem & OFFS_REG_MASK) {4103memw &= 0x3;4104offs = gpr(OFFS_REG(mem));41054106if (memw != 0) {4107FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(offs) | ((sljit_ins)memw << 16)));4108offs = tmp1;4109} else if (!(type & SLJIT_MEM_STORE) && (base == reg1 || base == reg2) && (offs == reg1 || offs == reg2)) {4110FAIL_IF(push_inst(compiler, 0xb9f80000 | R12A(tmp1) | R4A(base) | R0A(offs)));4111base = tmp1;4112offs = 0;4113}41144115memw = 0;4116} else if (memw < -0x80000 || memw > 0x7ffff - ((reg2 == reg1 + 1) ? 0 : SSIZE_OF(sw))) {4117FAIL_IF(push_load_imm_inst(compiler, tmp1, memw));41184119if (base == 0)4120base = tmp1;4121else4122offs = tmp1;41234124memw = 0;4125}41264127if (offs == 0 && reg2 == (reg1 + 1)) {4128ins = (type & SLJIT_MEM_STORE) ? 0xeb0000000024 /* stmg */ : 0xeb0000000004 /* lmg */;4129return push_inst(compiler, ins | R36A(reg1) | R32A(reg2) | R28A(base) | disp_s20((sljit_s32)memw));4130}41314132ins = ((type & SLJIT_MEM_STORE) ? 0xe30000000024 /* stg */ : 0xe30000000004 /* lg */) | R32A(offs) | R28A(base);41334134if (!(type & SLJIT_MEM_STORE) && base == reg1) {4135FAIL_IF(push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))));4136return push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw));4137}41384139FAIL_IF(push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw)));4140return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)));4141}41424143SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,4144sljit_s32 vreg,4145sljit_s32 srcdst, sljit_sw srcdstw)4146{4147sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4148sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4149sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);4150struct addr addr;4151sljit_ins ins;41524153CHECK_ERROR();4154CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));41554156ADJUST_LOCAL_OFFSET(srcdst, srcdstw);41574158if (reg_size != 4)4159return SLJIT_ERR_UNSUPPORTED;41604161if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))4162return SLJIT_ERR_UNSUPPORTED;41634164if (type & SLJIT_SIMD_TEST)4165return SLJIT_SUCCESS;41664167if (!(srcdst & SLJIT_MEM)) {4168if (type & SLJIT_SIMD_STORE)4169ins = F36(srcdst) | F32(vreg);4170else4171ins = F36(vreg) | F32(srcdst);41724173return push_inst(compiler, 0xe70000000056 /* vlr */ | ins);4174}41754176FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));4177ins = F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);41784179if (alignment >= 4)4180ins |= 4 << 12;4181else if (alignment == 3)4182ins |= 3 << 12;41834184return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? 0xe7000000000e /* vst */ : 0xe70000000006 /* vl */) | ins);4185}41864187SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,4188sljit_s32 vreg,4189sljit_s32 src, sljit_sw srcw)4190{4191sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4192sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4193struct addr addr;4194sljit_gpr reg;4195sljit_sw sign_ext;41964197CHECK_ERROR();4198CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));41994200ADJUST_LOCAL_OFFSET(src, srcw);42014202if (reg_size != 4)4203return SLJIT_ERR_UNSUPPORTED;42044205if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)4206return SLJIT_ERR_UNSUPPORTED;42074208if (type & SLJIT_SIMD_TEST)4209return SLJIT_SUCCESS;42104211if (src & SLJIT_MEM) {4212FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));4213return push_inst(compiler, 0xe70000000005 /* vlrep */ | F36(vreg)4214| R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset) | ((sljit_ins)elem_size << 12));4215}42164217if (type & SLJIT_SIMD_FLOAT) {4218if (src == SLJIT_IMM)4219return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(vreg));42204221return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(vreg) | F32(src) | ((sljit_ins)elem_size << 12));4222}42234224if (src == SLJIT_IMM) {4225sign_ext = 0x10000;42264227switch (elem_size) {4228case 0:4229srcw &= 0xff;4230sign_ext = (sljit_s8)srcw;4231break;4232case 1:4233srcw &= 0xffff;4234sign_ext = (sljit_s16)srcw;4235break;4236case 2:4237if ((sljit_s32)srcw == (sljit_s16)srcw) {4238srcw &= 0xffff;4239sign_ext = (sljit_s16)srcw;4240} else4241srcw &= 0xffffffff;4242break;4243default:4244if (srcw == (sljit_s16)srcw) {4245srcw &= 0xffff;4246sign_ext = (sljit_s16)srcw;4247}4248break;4249}42504251if (sign_ext != 0x10000) {4252if (sign_ext == 0 || sign_ext == -1)4253return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(vreg)4254| (sign_ext == 0 ? 0 : ((sljit_ins)0xffff << 16)));42554256return push_inst(compiler, 0xe70000000045 /* vrepi */ | F36(vreg)4257| ((sljit_ins)srcw << 16) | ((sljit_ins)elem_size << 12));4258}42594260push_load_imm_inst(compiler, tmp0, srcw);4261reg = tmp0;4262} else4263reg = gpr(src);42644265FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(vreg) | R32A(reg) | ((sljit_ins)elem_size << 12)));4266return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(vreg) | F32(vreg) | ((sljit_ins)elem_size << 12));4267}42684269SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,4270sljit_s32 vreg, sljit_s32 lane_index,4271sljit_s32 srcdst, sljit_sw srcdstw)4272{4273sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4274sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4275struct addr addr;4276sljit_gpr reg;4277sljit_ins ins = 0;42784279CHECK_ERROR();4280CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));42814282ADJUST_LOCAL_OFFSET(srcdst, srcdstw);42834284if (reg_size != 4)4285return SLJIT_ERR_UNSUPPORTED;42864287if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)4288return SLJIT_ERR_UNSUPPORTED;42894290if (type & SLJIT_SIMD_TEST)4291return SLJIT_SUCCESS;42924293if (srcdst & SLJIT_MEM) {4294FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));4295ins = F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);4296}42974298if (type & SLJIT_SIMD_LANE_ZERO) {4299if ((srcdst & SLJIT_MEM) && lane_index == ((1 << (3 - elem_size)) - 1))4300return push_inst(compiler, 0xe70000000004 /* vllez */ | ins | ((sljit_ins)elem_size << 12));43014302if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) {4303FAIL_IF(push_inst(compiler, 0xe70000000056 /* vlr */ | F36(TMP_FREG1) | F32(vreg)));4304srcdst = TMP_FREG1;4305srcdstw = 0;4306}43074308FAIL_IF(push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(vreg)));4309}43104311if (srcdst & SLJIT_MEM) {4312switch (elem_size) {4313case 0:4314ins |= 0xe70000000000 /* vleb */;4315break;4316case 1:4317ins |= 0xe70000000001 /* vleh */;4318break;4319case 2:4320ins |= 0xe70000000003 /* vlef */;4321break;4322default:4323ins |= 0xe70000000002 /* vleg */;4324break;4325}43264327/* Convert to vsteb - vsteg */4328if (type & SLJIT_SIMD_STORE)4329ins |= 0x8;43304331return push_inst(compiler, ins | ((sljit_ins)lane_index << 12));4332}43334334if (type & SLJIT_SIMD_FLOAT) {4335if (type & SLJIT_SIMD_STORE)4336return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(srcdst) | F32(vreg) | ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12));43374338if (elem_size == 3) {4339if (lane_index == 0)4340ins = F32(srcdst) | F28(vreg) | (1 << 12);4341else4342ins = F32(vreg) | F28(srcdst);43434344return push_inst(compiler, 0xe70000000084 /* vpdi */ | F36(vreg) | ins);4345}43464347FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(tmp0) | F32(srcdst) | ((sljit_ins)2 << 12)));4348return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(vreg) | R32A(tmp0) | ((sljit_ins)lane_index << 16) | ((sljit_ins)2 << 12));4349}43504351if (srcdst == SLJIT_IMM) {4352switch (elem_size) {4353case 0:4354ins = 0xe70000000040 /* vleib */;4355srcdstw &= 0xff;4356break;4357case 1:4358ins = 0xe70000000041 /* vleih */;4359srcdstw &= 0xffff;4360break;4361case 2:4362if ((sljit_s32)srcdstw == (sljit_s16)srcdstw) {4363srcdstw &= 0xffff;4364ins = 0xe70000000043 /* vleif */;4365} else4366srcdstw &= 0xffffffff;4367break;4368default:4369if (srcdstw == (sljit_s16)srcdstw) {4370srcdstw &= 0xffff;4371ins = 0xe70000000042 /* vleig */;4372}4373break;4374}43754376if (ins != 0)4377return push_inst(compiler, ins | F36(vreg) | ((sljit_ins)srcdstw << 16) | ((sljit_ins)lane_index << 12));43784379push_load_imm_inst(compiler, tmp0, srcdstw);4380reg = tmp0;4381} else4382reg = gpr(srcdst);43834384ins = ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12);43854386if (!(type & SLJIT_SIMD_STORE))4387return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(vreg) | R32A(reg) | ins);43884389FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(reg) | F32(vreg) | ins));43904391if (!(type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3)4392return SLJIT_SUCCESS;43934394switch (elem_size) {4395case 0:4396ins = 0xb9060000 /* lgbr */;4397break;4398case 1:4399ins = 0xb9070000 /* lghr */;4400break;4401default:4402ins = 0xb9140000 /* lgfr */;4403break;4404}44054406return push_inst(compiler, ins | R4A(reg) | R0A(reg));4407}44084409SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,4410sljit_s32 vreg,4411sljit_s32 src, sljit_s32 src_lane_index)4412{4413sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4414sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);44154416CHECK_ERROR();4417CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));44184419if (reg_size != 4)4420return SLJIT_ERR_UNSUPPORTED;44214422if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)4423return SLJIT_ERR_UNSUPPORTED;44244425if (type & SLJIT_SIMD_TEST)4426return SLJIT_SUCCESS;44274428return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(vreg) | F32(src)4429| ((sljit_ins)src_lane_index << 16) | ((sljit_ins)elem_size << 12));4430}44314432SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,4433sljit_s32 vreg,4434sljit_s32 src, sljit_sw srcw)4435{4436sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4437sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4438sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);4439struct addr addr;4440sljit_ins ins;44414442CHECK_ERROR();4443CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));44444445ADJUST_LOCAL_OFFSET(src, srcw);44464447if (reg_size != 4)4448return SLJIT_ERR_UNSUPPORTED;44494450if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)4451return SLJIT_ERR_UNSUPPORTED;44524453if (type & SLJIT_SIMD_TEST)4454return SLJIT_SUCCESS;44554456if (src & SLJIT_MEM) {4457FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));4458ins = F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);44594460switch (elem2_size - elem_size) {4461case 1:4462ins |= 0xe70000000002 /* vleg */;4463break;4464case 2:4465ins |= 0xe70000000003 /* vlef */;4466break;4467default:4468ins |= 0xe70000000001 /* vleh */;4469break;4470}44714472FAIL_IF(push_inst(compiler, ins));4473src = vreg;4474}44754476if (type & SLJIT_SIMD_FLOAT) {4477FAIL_IF(push_inst(compiler, 0xe700000000d5 /* vuplh */ | F36(vreg) | F32(src) | (2 << 12)));4478FAIL_IF(push_inst(compiler, 0xe70000000030 /* vesl */ | F36(vreg) | F32(vreg) | (32 << 16) | (3 << 12)));4479return push_inst(compiler, 0xe700000000c4 /* vfll */ | F36(vreg) | F32(vreg) | (2 << 12));4480}44814482ins = ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0xe700000000d7 /* vuph */ : 0xe700000000d5 /* vuplh */) | F36(vreg);44834484do {4485FAIL_IF(push_inst(compiler, ins | F32(src) | ((sljit_ins)elem_size << 12)));4486src = vreg;4487} while (++elem_size < elem2_size);44884489return SLJIT_SUCCESS;4490}44914492SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,4493sljit_s32 vreg,4494sljit_s32 dst, sljit_sw dstw)4495{4496sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4497sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4498sljit_gpr dst_r;44994500CHECK_ERROR();4501CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));45024503ADJUST_LOCAL_OFFSET(dst, dstw);45044505if (reg_size != 4)4506return SLJIT_ERR_UNSUPPORTED;45074508if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)4509return SLJIT_ERR_UNSUPPORTED;45104511if (type & SLJIT_SIMD_TEST)4512return SLJIT_SUCCESS;45134514switch (elem_size) {4515case 0:4516push_load_imm_inst(compiler, tmp0, (sljit_sw)0x4048505860687078);4517push_load_imm_inst(compiler, tmp1, (sljit_sw)0x0008101820283038);4518FAIL_IF(push_inst(compiler, 0xe70000000062 /* vlvgp */ | F36(TMP_FREG1) | R32A(tmp1) | R28A(tmp0)));4519break;4520case 1:4521push_load_imm_inst(compiler, tmp0, (sljit_sw)0x0010203040506070);4522break;4523case 2:4524push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808000204060);4525break;4526default:4527push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808080800040);4528break;4529}45304531if (elem_size != 0)4532FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(TMP_FREG1) | R32A(tmp0) | (1 << 16) | (3 << 12)));45334534FAIL_IF(push_inst(compiler, 0xe70000000085 /* vbperm */ | F36(TMP_FREG1) | F32(vreg) | F28(TMP_FREG1)));45354536dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;4537FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(dst_r) | F32(TMP_FREG1)4538| (elem_size == 0 ? ((3 << 16) | (1 << 12)) : (7 << 16))));45394540if (dst_r == tmp0)4541return store_word(compiler, tmp0, dst, dstw, type & SLJIT_32);45424543return SLJIT_SUCCESS;4544}45454546SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,4547sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)4548{4549sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);4550sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);4551sljit_s32 alignment;4552struct addr addr;4553sljit_ins ins = 0, load_ins;45544555CHECK_ERROR();4556CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));4557ADJUST_LOCAL_OFFSET(src2, src2w);45584559if (reg_size != 4)4560return SLJIT_ERR_UNSUPPORTED;45614562if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))4563return SLJIT_ERR_UNSUPPORTED;45644565if (type & SLJIT_SIMD_TEST)4566return SLJIT_SUCCESS;45674568switch (SLJIT_SIMD_GET_OPCODE(type)) {4569case SLJIT_SIMD_OP2_AND:4570ins = 0xe70000000068 /* vn */;4571break;4572case SLJIT_SIMD_OP2_OR:4573ins = 0xe7000000006a /* vo */;4574break;4575case SLJIT_SIMD_OP2_XOR:4576ins = 0xe7000000006d /* vx */;4577break;4578case SLJIT_SIMD_OP2_SHUFFLE:4579ins = 0xe7000000008c /* vperm */;4580break;4581}45824583if (src2 & SLJIT_MEM) {4584FAIL_IF(make_addr_bx(compiler, &addr, src2, src2w, tmp1));4585load_ins = 0xe70000000006 /* vl */ | F36(TMP_FREG1) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);4586alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);45874588if (alignment >= 4)4589load_ins |= 4 << 12;4590else if (alignment == 3)4591load_ins |= 3 << 12;45924593FAIL_IF(push_inst(compiler, load_ins));4594src2 = TMP_FREG1;4595}45964597if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE)4598return push_inst(compiler, ins | F36(dst_vreg) | F32(src1_vreg) | F28(src1_vreg) | F12(src2));45994600return push_inst(compiler, ins | F36(dst_vreg) | F32(src1_vreg) | F28(src2));4601}46024603SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,4604sljit_s32 dst_reg,4605sljit_s32 mem_reg)4606{4607CHECK_ERROR();4608CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));46094610if (op & SLJIT_ATOMIC_USE_LS)4611return SLJIT_ERR_UNSUPPORTED;46124613switch (GET_OPCODE(op)) {4614case SLJIT_MOV32:4615case SLJIT_MOV_U32:4616case SLJIT_MOV:4617case SLJIT_MOV_P:4618if (op & SLJIT_ATOMIC_TEST)4619return SLJIT_SUCCESS;46204621SLJIT_SKIP_CHECKS(compiler);4622return sljit_emit_op1(compiler, op & ~SLJIT_ATOMIC_USE_CAS, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);4623default:4624return SLJIT_ERR_UNSUPPORTED;4625}4626}46274628SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,4629sljit_s32 src_reg,4630sljit_s32 mem_reg,4631sljit_s32 temp_reg)4632{4633sljit_ins ins;4634sljit_gpr tmp_r = gpr(temp_reg);4635sljit_gpr mem_r = gpr(mem_reg);46364637CHECK_ERROR();4638CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));46394640if (op & SLJIT_ATOMIC_USE_LS)4641return SLJIT_ERR_UNSUPPORTED;46424643switch (GET_OPCODE(op)) {4644case SLJIT_MOV32:4645case SLJIT_MOV_U32:4646ins = 0xba000000 /* cs */ | R20A(tmp_r) | R16A(gpr(src_reg)) | R12A(mem_r);4647break;4648case SLJIT_MOV:4649case SLJIT_MOV_P:4650ins = 0xeb0000000030 /* csg */ | R36A(tmp_r) | R32A(gpr(src_reg)) | R28A(mem_r);4651break;4652default:4653return SLJIT_ERR_UNSUPPORTED;4654}46554656if (op & SLJIT_ATOMIC_TEST)4657return SLJIT_SUCCESS;46584659return push_inst(compiler, ins);4660}46614662/* --------------------------------------------------------------------- */4663/* Other instructions */4664/* --------------------------------------------------------------------- */46654666SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 op,4667sljit_s32 dst, sljit_sw dstw,4668sljit_sw init_value)4669{4670struct sljit_const *const_;4671sljit_gpr dst_r;4672int is_32 = 0;46734674CHECK_ERROR_PTR();4675CHECK_PTR(check_sljit_emit_const(compiler, op, dst, dstw, init_value));4676ADJUST_LOCAL_OFFSET(dst, dstw);46774678const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));4679PTR_FAIL_IF(!const_);4680set_const((struct sljit_const*)const_, compiler);46814682dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;46834684switch (GET_OPCODE(op)) {4685case SLJIT_MOV_U8:4686if (init_value & 0x100)4687init_value |= 0xff00;4688else4689init_value &= 0xff;46904691PTR_FAIL_IF(push_inst(compiler, 0xa7090000 /* lghi */ | R20A(dst_r) | (sljit_ins)(init_value & 0xffff)));46924693if (dst & SLJIT_MEM)4694PTR_FAIL_IF(store_byte(compiler, dst_r, dst, dstw));4695return (struct sljit_const*)const_;46964697case SLJIT_MOV32:4698is_32 = 1;4699SLJIT_FALLTHROUGH4700case SLJIT_MOV_S32:4701PTR_FAIL_IF(push_inst(compiler, 0xc00100000000 /* lgfi */ | R36A(dst_r) | (sljit_ins)(init_value & 0xffffffff)));4702break;47034704default:4705PTR_FAIL_IF(push_inst(compiler, 0xc00f00000000 /* llilf */ | R36A(dst_r) | (sljit_ins)(init_value & 0xffffffff)));4706PTR_FAIL_IF(push_inst(compiler, 0xc00800000000 /* iihf */ | R36A(dst_r) | (sljit_ins)((init_value >> 32) & 0xffffffff)));4707break;4708}47094710if (dst & SLJIT_MEM)4711PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, is_32));47124713return (struct sljit_const*)const_;4714}47154716SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)4717{4718/* Update the constant pool. */4719sljit_uw *ptr = (sljit_uw *)addr;4720SLJIT_UNUSED_ARG(executable_offset);47214722SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);4723*ptr = new_target;4724SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);4725SLJIT_CACHE_FLUSH(ptr, ptr + 1);4726}47274728SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_s32 op, sljit_sw new_constant, sljit_sw executable_offset)4729{4730sljit_u16 *inst = (sljit_u16*)addr;4731SLJIT_UNUSED_ARG(executable_offset);47324733switch (GET_OPCODE(op)) {4734case SLJIT_MOV_U8:4735SLJIT_ASSERT((inst[0] & 0xff0f) == 0xa709 /* lghi */);47364737if (new_constant & 0x100)4738new_constant |= 0xff00;4739else4740new_constant &= 0xff;47414742SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);4743inst[1] = (sljit_u16)new_constant;4744SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);4745inst = (sljit_u16*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);4746SLJIT_CACHE_FLUSH(inst, inst + 2);4747return;47484749case SLJIT_MOV32:4750case SLJIT_MOV_S32:4751SLJIT_ASSERT((inst[0] & 0xff0f) == 0xc001 /* lgfi */);47524753SLJIT_UPDATE_WX_FLAGS(inst, inst + 3, 0);4754inst[1] = (sljit_u16)(new_constant >> 16);4755inst[2] = (sljit_u16)new_constant;4756SLJIT_UPDATE_WX_FLAGS(inst, inst + 3, 1);4757inst = (sljit_u16*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);4758SLJIT_CACHE_FLUSH(inst, inst + 3);4759return;47604761default:4762SLJIT_ASSERT((inst[0] & 0xff0f) == 0xc00f /* llilf */ && (inst[3] & 0xff0f) == 0xc008 /* iihf */);47634764SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 0);4765inst[1] = (sljit_u16)(new_constant >> 16);4766inst[2] = (sljit_u16)new_constant;4767inst[4] = (sljit_u16)(new_constant >> 48);4768inst[5] = (sljit_u16)(new_constant >> 32);4769SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 1);4770inst = (sljit_u16*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);4771SLJIT_CACHE_FLUSH(inst, inst + 6);4772return;4773}4774}47754776SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_op_addr(struct sljit_compiler *compiler, sljit_s32 op,4777sljit_s32 dst, sljit_sw dstw)4778{4779struct sljit_jump *jump;4780sljit_gpr dst_r, target_r;4781SLJIT_UNUSED_ARG(op);47824783CHECK_ERROR_PTR();4784CHECK_PTR(check_sljit_emit_op_addr(compiler, op, dst, dstw));4785ADJUST_LOCAL_OFFSET(dst, dstw);47864787dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;47884789if (op != SLJIT_ADD_ABS_ADDR)4790target_r = dst_r;4791else {4792target_r = tmp1;47934794if (dst & SLJIT_MEM)4795PTR_FAIL_IF(load_word(compiler, dst_r, dst, dstw, 0));4796}47974798jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));4799PTR_FAIL_IF(!jump);4800set_mov_addr(jump, compiler, 0);48014802/* Might be converted to lgrl. */4803PTR_FAIL_IF(push_inst(compiler, 0xc00000000000 /* larl */ | R36A(target_r)));48044805if (op == SLJIT_ADD_ABS_ADDR)4806PTR_FAIL_IF(push_inst(compiler, 0xb90a0000 /* algr */ | R4A(dst_r) | R0A(tmp1)));48074808if (dst & SLJIT_MEM)4809PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));48104811return jump;4812}48134814/* TODO(carenas): EVAL probably should move up or be refactored */4815#undef WHEN24816#undef EVAL48174818#undef tmp14819#undef tmp048204821/* TODO(carenas): undef other macros that spill like is_u12? */482248234824