Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeLOONGARCH_64.c
9913 views
1
/*
2
* Stack-less Just-In-Time compiler
3
*
4
* Copyright Zoltan Herczeg ([email protected]). All rights reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without modification, are
7
* permitted provided that the following conditions are met:
8
*
9
* 1. Redistributions of source code must retain the above copyright notice, this list of
10
* conditions and the following disclaimer.
11
*
12
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
13
* of conditions and the following disclaimer in the documentation and/or other materials
14
* provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
*/
26
27
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28
{
29
return "LOONGARCH" SLJIT_CPUINFO;
30
}
31
32
typedef sljit_u32 sljit_ins;
33
34
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
35
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
36
#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
37
#define TMP_ZERO 0
38
39
/* Flags are kept in volatile registers. */
40
#define EQUAL_FLAG (SLJIT_NUMBER_OF_REGISTERS + 5)
41
#define RETURN_ADDR_REG TMP_REG2
42
#define OTHER_FLAG (SLJIT_NUMBER_OF_REGISTERS + 6)
43
44
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
45
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
46
47
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
48
0, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 22, 31, 30, 29, 28, 27, 26, 25, 24, 23, 3, 13, 1, 14, 12, 15
49
};
50
51
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
52
0, 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 31, 30, 29, 28, 27, 26, 25, 24, 8, 9
53
};
54
55
/* --------------------------------------------------------------------- */
56
/* Instrucion forms */
57
/* --------------------------------------------------------------------- */
58
59
/*
60
LoongArch instructions are 32 bits wide, belonging to 9 basic instruction formats (and variants of them):
61
62
| Format name | Composition |
63
| 2R | Opcode + Rj + Rd |
64
| 3R | Opcode + Rk + Rj + Rd |
65
| 4R | Opcode + Ra + Rk + Rj + Rd |
66
| 2RI8 | Opcode + I8 + Rj + Rd |
67
| 2RI12 | Opcode + I12 + Rj + Rd |
68
| 2RI14 | Opcode + I14 + Rj + Rd |
69
| 2RI16 | Opcode + I16 + Rj + Rd |
70
| 1RI21 | Opcode + I21L + Rj + I21H |
71
| I26 | Opcode + I26L + I26H |
72
73
Rd is the destination register operand, while Rj, Rk and Ra (“a” stands for “additional”) are the source register operands.
74
I8/I12/I14/I16/I21/I26 are immediate operands of respective width. The longer I21 and I26 are stored in separate higher and
75
lower parts in the instruction word, denoted by the “L” and “H” suffixes. */
76
77
#define RD(rd) ((sljit_ins)reg_map[rd])
78
#define RJ(rj) ((sljit_ins)reg_map[rj] << 5)
79
#define RK(rk) ((sljit_ins)reg_map[rk] << 10)
80
#define RA(ra) ((sljit_ins)reg_map[ra] << 15)
81
82
#define FD(fd) ((sljit_ins)reg_map[fd])
83
#define FRD(fd) ((sljit_ins)freg_map[fd])
84
#define FRJ(fj) ((sljit_ins)freg_map[fj] << 5)
85
#define FRK(fk) ((sljit_ins)freg_map[fk] << 10)
86
#define FRA(fa) ((sljit_ins)freg_map[fa] << 15)
87
88
#define IMM_V(imm) ((sljit_ins)(imm) << 10)
89
#define IMM_I8(imm) (((sljit_ins)(imm)&0xff) << 10)
90
#define IMM_I12(imm) (((sljit_ins)(imm)&0xfff) << 10)
91
#define IMM_I14(imm) (((sljit_ins)(imm)&0xfff3) << 10)
92
#define IMM_I16(imm) (((sljit_ins)(imm)&0xffff) << 10)
93
#define IMM_I20(imm) (((sljit_ins)(imm)&0xffffffff) >> 12 << 5)
94
#define IMM_I21(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x1f))
95
#define IMM_I26(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x3ff))
96
97
#define OPC_I26(opc) ((sljit_ins)(opc) << 26)
98
#define OPC_1RI21(opc) ((sljit_ins)(opc) << 26)
99
#define OPC_2RI16(opc) ((sljit_ins)(opc) << 26)
100
#define OPC_2RI14(opc) ((sljit_ins)(opc) << 24)
101
#define OPC_2RI12(opc) ((sljit_ins)(opc) << 22)
102
#define OPC_2RI8(opc) ((sljit_ins)(opc) << 18)
103
#define OPC_4R(opc) ((sljit_ins)(opc) << 20)
104
#define OPC_3R(opc) ((sljit_ins)(opc) << 15)
105
#define OPC_2R(opc) ((sljit_ins)(opc) << 10)
106
#define OPC_1RI20(opc) ((sljit_ins)(opc) << 25)
107
108
/* Arithmetic operation instructions */
109
#define ADD_W OPC_3R(0x20)
110
#define ADD_D OPC_3R(0x21)
111
#define SUB_W OPC_3R(0x22)
112
#define SUB_D OPC_3R(0x23)
113
#define ADDI_W OPC_2RI12(0xa)
114
#define ADDI_D OPC_2RI12(0xb)
115
#define ANDI OPC_2RI12(0xd)
116
#define ORI OPC_2RI12(0xe)
117
#define XORI OPC_2RI12(0xf)
118
#define ADDU16I_D OPC_2RI16(0x4)
119
#define LU12I_W OPC_1RI20(0xa)
120
#define LU32I_D OPC_1RI20(0xb)
121
#define LU52I_D OPC_2RI12(0xc)
122
#define SLT OPC_3R(0x24)
123
#define SLTU OPC_3R(0x25)
124
#define SLTI OPC_2RI12(0x8)
125
#define SLTUI OPC_2RI12(0x9)
126
#define PCADDI OPC_1RI20(0xc)
127
#define PCALAU12I OPC_1RI20(0xd)
128
#define PCADDU12I OPC_1RI20(0xe)
129
#define PCADDU18I OPC_1RI20(0xf)
130
#define NOR OPC_3R(0x28)
131
#define AND OPC_3R(0x29)
132
#define OR OPC_3R(0x2a)
133
#define XOR OPC_3R(0x2b)
134
#define ORN OPC_3R(0x2c)
135
#define ANDN OPC_3R(0x2d)
136
#define MUL_W OPC_3R(0x38)
137
#define MULH_W OPC_3R(0x39)
138
#define MULH_WU OPC_3R(0x3a)
139
#define MUL_D OPC_3R(0x3b)
140
#define MULH_D OPC_3R(0x3c)
141
#define MULH_DU OPC_3R(0x3d)
142
#define MULW_D_W OPC_3R(0x3e)
143
#define MULW_D_WU OPC_3R(0x3f)
144
#define DIV_W OPC_3R(0x40)
145
#define MOD_W OPC_3R(0x41)
146
#define DIV_WU OPC_3R(0x42)
147
#define MOD_WU OPC_3R(0x43)
148
#define DIV_D OPC_3R(0x44)
149
#define MOD_D OPC_3R(0x45)
150
#define DIV_DU OPC_3R(0x46)
151
#define MOD_DU OPC_3R(0x47)
152
153
/* Bit-shift instructions */
154
#define SLL_W OPC_3R(0x2e)
155
#define SRL_W OPC_3R(0x2f)
156
#define SRA_W OPC_3R(0x30)
157
#define SLL_D OPC_3R(0x31)
158
#define SRL_D OPC_3R(0x32)
159
#define SRA_D OPC_3R(0x33)
160
#define ROTR_W OPC_3R(0x36)
161
#define ROTR_D OPC_3R(0x37)
162
#define SLLI_W OPC_3R(0x81)
163
#define SLLI_D ((sljit_ins)(0x41) << 16)
164
#define SRLI_W OPC_3R(0x89)
165
#define SRLI_D ((sljit_ins)(0x45) << 16)
166
#define SRAI_W OPC_3R(0x91)
167
#define SRAI_D ((sljit_ins)(0x49) << 16)
168
#define ROTRI_W OPC_3R(0x99)
169
#define ROTRI_D ((sljit_ins)(0x4d) << 16)
170
171
/* Bit-manipulation instructions */
172
#define CLO_W OPC_2R(0x4)
173
#define CLZ_W OPC_2R(0x5)
174
#define CTO_W OPC_2R(0x6)
175
#define CTZ_W OPC_2R(0x7)
176
#define CLO_D OPC_2R(0x8)
177
#define CLZ_D OPC_2R(0x9)
178
#define CTO_D OPC_2R(0xa)
179
#define CTZ_D OPC_2R(0xb)
180
#define REVB_2H OPC_2R(0xc)
181
#define REVB_4H OPC_2R(0xd)
182
#define REVB_2W OPC_2R(0xe)
183
#define REVB_D OPC_2R(0xf)
184
#define REVH_2W OPC_2R(0x10)
185
#define REVH_D OPC_2R(0x11)
186
#define BITREV_4B OPC_2R(0x12)
187
#define BITREV_8B OPC_2R(0x13)
188
#define BITREV_W OPC_2R(0x14)
189
#define BITREV_D OPC_2R(0x15)
190
#define EXT_W_H OPC_2R(0x16)
191
#define EXT_W_B OPC_2R(0x17)
192
#define BSTRINS_W (0x1 << 22 | 1 << 21)
193
#define BSTRPICK_W (0x1 << 22 | 1 << 21 | 1 << 15)
194
#define BSTRINS_D (0x2 << 22)
195
#define BSTRPICK_D (0x3 << 22)
196
197
/* Branch instructions */
198
#define BEQZ OPC_1RI21(0x10)
199
#define BNEZ OPC_1RI21(0x11)
200
#define JIRL OPC_2RI16(0x13)
201
#define B OPC_I26(0x14)
202
#define BL OPC_I26(0x15)
203
#define BEQ OPC_2RI16(0x16)
204
#define BNE OPC_2RI16(0x17)
205
#define BLT OPC_2RI16(0x18)
206
#define BGE OPC_2RI16(0x19)
207
#define BLTU OPC_2RI16(0x1a)
208
#define BGEU OPC_2RI16(0x1b)
209
210
/* Memory access instructions */
211
#define LD_B OPC_2RI12(0xa0)
212
#define LD_H OPC_2RI12(0xa1)
213
#define LD_W OPC_2RI12(0xa2)
214
#define LD_D OPC_2RI12(0xa3)
215
216
#define ST_B OPC_2RI12(0xa4)
217
#define ST_H OPC_2RI12(0xa5)
218
#define ST_W OPC_2RI12(0xa6)
219
#define ST_D OPC_2RI12(0xa7)
220
221
#define LD_BU OPC_2RI12(0xa8)
222
#define LD_HU OPC_2RI12(0xa9)
223
#define LD_WU OPC_2RI12(0xaa)
224
225
#define LDX_B OPC_3R(0x7000)
226
#define LDX_H OPC_3R(0x7008)
227
#define LDX_W OPC_3R(0x7010)
228
#define LDX_D OPC_3R(0x7018)
229
230
#define STX_B OPC_3R(0x7020)
231
#define STX_H OPC_3R(0x7028)
232
#define STX_W OPC_3R(0x7030)
233
#define STX_D OPC_3R(0x7038)
234
235
#define LDX_BU OPC_3R(0x7040)
236
#define LDX_HU OPC_3R(0x7048)
237
#define LDX_WU OPC_3R(0x7050)
238
239
#define PRELD OPC_2RI12(0xab)
240
241
/* Atomic memory access instructions */
242
#define LL_W OPC_2RI14(0x20)
243
#define SC_W OPC_2RI14(0x21)
244
#define LL_D OPC_2RI14(0x22)
245
#define SC_D OPC_2RI14(0x23)
246
247
/* LoongArch V1.10 Instructions */
248
#define AMCAS_B OPC_3R(0x70B0)
249
#define AMCAS_H OPC_3R(0x70B1)
250
#define AMCAS_W OPC_3R(0x70B2)
251
#define AMCAS_D OPC_3R(0x70B3)
252
253
/* Memory barrier instructions */
254
#define DBAR OPC_3R(0x70e4)
255
256
/* Other instructions */
257
#define BREAK OPC_3R(0x54)
258
#define DBGCALL OPC_3R(0x55)
259
#define SYSCALL OPC_3R(0x56)
260
261
/* Basic Floating-Point Instructions */
262
/* Floating-Point Arithmetic Operation Instructions */
263
#define FADD_S OPC_3R(0x201)
264
#define FADD_D OPC_3R(0x202)
265
#define FSUB_S OPC_3R(0x205)
266
#define FSUB_D OPC_3R(0x206)
267
#define FMUL_S OPC_3R(0x209)
268
#define FMUL_D OPC_3R(0x20a)
269
#define FDIV_S OPC_3R(0x20d)
270
#define FDIV_D OPC_3R(0x20e)
271
#define FCMP_COND_S OPC_4R(0xc1)
272
#define FCMP_COND_D OPC_4R(0xc2)
273
#define FCOPYSIGN_S OPC_3R(0x225)
274
#define FCOPYSIGN_D OPC_3R(0x226)
275
#define FSEL OPC_4R(0xd0)
276
#define FABS_S OPC_2R(0x4501)
277
#define FABS_D OPC_2R(0x4502)
278
#define FNEG_S OPC_2R(0x4505)
279
#define FNEG_D OPC_2R(0x4506)
280
#define FMOV_S OPC_2R(0x4525)
281
#define FMOV_D OPC_2R(0x4526)
282
283
/* Floating-Point Conversion Instructions */
284
#define FCVT_S_D OPC_2R(0x4646)
285
#define FCVT_D_S OPC_2R(0x4649)
286
#define FTINTRZ_W_S OPC_2R(0x46a1)
287
#define FTINTRZ_W_D OPC_2R(0x46a2)
288
#define FTINTRZ_L_S OPC_2R(0x46a9)
289
#define FTINTRZ_L_D OPC_2R(0x46aa)
290
#define FFINT_S_W OPC_2R(0x4744)
291
#define FFINT_S_L OPC_2R(0x4746)
292
#define FFINT_D_W OPC_2R(0x4748)
293
#define FFINT_D_L OPC_2R(0x474a)
294
295
/* Floating-Point Move Instructions */
296
#define FMOV_S OPC_2R(0x4525)
297
#define FMOV_D OPC_2R(0x4526)
298
#define MOVGR2FR_W OPC_2R(0x4529)
299
#define MOVGR2FR_D OPC_2R(0x452a)
300
#define MOVGR2FRH_W OPC_2R(0x452b)
301
#define MOVFR2GR_S OPC_2R(0x452d)
302
#define MOVFR2GR_D OPC_2R(0x452e)
303
#define MOVFRH2GR_S OPC_2R(0x452f)
304
#define MOVGR2FCSR OPC_2R(0x4530)
305
#define MOVFCSR2GR OPC_2R(0x4532)
306
#define MOVFR2CF OPC_2R(0x4534)
307
#define MOVCF2FR OPC_2R(0x4535)
308
#define MOVGR2CF OPC_2R(0x4536)
309
#define MOVCF2GR OPC_2R(0x4537)
310
311
/* Floating-Point Branch Instructions */
312
#define BCEQZ OPC_I26(0x12)
313
#define BCNEZ OPC_I26(0x12)
314
315
/* Floating-Point Common Memory Access Instructions */
316
#define FLD_S OPC_2RI12(0xac)
317
#define FLD_D OPC_2RI12(0xae)
318
#define FST_S OPC_2RI12(0xad)
319
#define FST_D OPC_2RI12(0xaf)
320
321
#define FLDX_S OPC_3R(0x7060)
322
#define FLDX_D OPC_3R(0x7068)
323
#define FSTX_S OPC_3R(0x7070)
324
#define FSTX_D OPC_3R(0x7078)
325
326
/* Vector Instructions */
327
328
/* Vector Arithmetic Instructions */
329
#define VOR_V OPC_3R(0xe24d)
330
#define VXOR_V OPC_3R(0xe24e)
331
#define VAND_V OPC_3R(0xe24c)
332
#define VMSKLTZ OPC_2R(0x1ca710)
333
334
/* Vector Memory Access Instructions */
335
#define VLD OPC_2RI12(0xb0)
336
#define VST OPC_2RI12(0xb1)
337
#define XVLD OPC_2RI12(0xb2)
338
#define XVST OPC_2RI12(0xb3)
339
#define VSTELM OPC_2RI8(0xc40)
340
341
/* Vector Float Conversion Instructions */
342
#define VFCVTL_D_S OPC_2R(0x1ca77c)
343
344
/* Vector Bit Manipulate Instructions */
345
#define VSLLWIL OPC_2R(0x1cc200)
346
347
/* Vector Move And Shuffle Instructions */
348
#define VLDREPL OPC_2R(0xc0000)
349
#define VINSGR2VR OPC_2R(0x1cbac0)
350
#define VPICKVE2GR_U OPC_2R(0x1cbce0)
351
#define VREPLGR2VR OPC_2R(0x1ca7c0)
352
#define VREPLVE OPC_3R(0xe244)
353
#define VREPLVEI OPC_2R(0x1cbde0)
354
#define VSHUF_B OPC_4R(0xd5)
355
#define XVPERMI OPC_2RI8(0x1dfa)
356
357
#define I12_MAX (0x7ff)
358
#define I12_MIN (-0x800)
359
#define BRANCH16_MAX (0x7fff << 2)
360
#define BRANCH16_MIN (-(0x8000 << 2))
361
#define BRANCH21_MAX (0xfffff << 2)
362
#define BRANCH21_MIN (-(0x100000 << 2))
363
#define JUMP_MAX (0x1ffffff << 2)
364
#define JUMP_MIN (-(0x2000000 << 2))
365
#define JIRL_MAX (0x7fff << 2)
366
#define JIRL_MIN (-(0x8000 << 2))
367
368
#define S32_MAX (0x7fffffffl)
369
#define S32_MIN (-0x80000000l)
370
#define S52_MAX (0x7ffffffffffffl)
371
372
#define INST(inst, type) ((sljit_ins)((type & SLJIT_32) ? inst##_W : inst##_D))
373
374
/* LoongArch CPUCFG register for feature detection */
375
#define LOONGARCH_CFG2 0x02
376
#define LOONGARCH_CFG2_LAMCAS (1 << 28)
377
378
static sljit_u32 cfg2_feature_list = 0;
379
380
/* According to Software Development and Build Convention for LoongArch Architectures,
381
+ the status of LSX and LASX extension must be checked through HWCAP */
382
#include <sys/auxv.h>
383
384
#define LOONGARCH_HWCAP_LSX (1 << 4)
385
#define LOONGARCH_HWCAP_LASX (1 << 5)
386
387
static sljit_u32 hwcap_feature_list = 0;
388
389
/* Feature type */
390
#define GET_CFG2 0
391
#define GET_HWCAP 1
392
393
#define LOONGARCH_SUPPORT_AMCAS (LOONGARCH_CFG2_LAMCAS & get_cpu_features(GET_CFG2))
394
395
static SLJIT_INLINE sljit_u32 get_cpu_features(sljit_u32 feature_type)
396
{
397
if (cfg2_feature_list == 0)
398
__asm__ ("cpucfg %0, %1" : "+&r"(cfg2_feature_list) : "r"(LOONGARCH_CFG2));
399
if (hwcap_feature_list == 0)
400
hwcap_feature_list = (sljit_u32)getauxval(AT_HWCAP);
401
402
return feature_type ? hwcap_feature_list : cfg2_feature_list;
403
}
404
405
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
406
{
407
sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
408
FAIL_IF(!ptr);
409
*ptr = ins;
410
compiler->size++;
411
return SLJIT_SUCCESS;
412
}
413
414
static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
415
{
416
sljit_sw diff;
417
sljit_uw target_addr;
418
sljit_uw jump_addr = (sljit_uw)code_ptr;
419
sljit_uw orig_addr = jump->addr;
420
SLJIT_UNUSED_ARG(executable_offset);
421
422
jump->addr = jump_addr;
423
if (jump->flags & SLJIT_REWRITABLE_JUMP)
424
goto exit;
425
426
if (jump->flags & JUMP_ADDR)
427
target_addr = jump->u.target;
428
else {
429
SLJIT_ASSERT(jump->u.label != NULL);
430
target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
431
432
if (jump->u.label->size > orig_addr)
433
jump_addr = (sljit_uw)(code + orig_addr);
434
}
435
436
diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);
437
438
if (jump->flags & IS_COND) {
439
diff += SSIZE_OF(ins);
440
441
if (diff >= BRANCH16_MIN && diff <= BRANCH16_MAX) {
442
code_ptr--;
443
code_ptr[0] = (code_ptr[0] & 0xfc0003ff) ^ 0x4000000;
444
jump->flags |= PATCH_B;
445
jump->addr = (sljit_uw)code_ptr;
446
return code_ptr;
447
}
448
449
diff -= SSIZE_OF(ins);
450
}
451
452
if (diff >= JUMP_MIN && diff <= JUMP_MAX) {
453
if (jump->flags & IS_COND) {
454
code_ptr[-1] |= (sljit_ins)IMM_I16(2);
455
}
456
457
jump->flags |= PATCH_J;
458
return code_ptr;
459
}
460
461
if (diff >= S32_MIN && diff <= S32_MAX) {
462
if (jump->flags & IS_COND)
463
code_ptr[-1] |= (sljit_ins)IMM_I16(3);
464
465
jump->flags |= PATCH_REL32;
466
code_ptr[1] = code_ptr[0];
467
return code_ptr + 1;
468
}
469
470
if (target_addr <= (sljit_uw)S32_MAX) {
471
if (jump->flags & IS_COND)
472
code_ptr[-1] |= (sljit_ins)IMM_I16(3);
473
474
jump->flags |= PATCH_ABS32;
475
code_ptr[1] = code_ptr[0];
476
return code_ptr + 1;
477
}
478
479
if (target_addr <= S52_MAX) {
480
if (jump->flags & IS_COND)
481
code_ptr[-1] |= (sljit_ins)IMM_I16(4);
482
483
jump->flags |= PATCH_ABS52;
484
code_ptr[2] = code_ptr[0];
485
return code_ptr + 2;
486
}
487
488
exit:
489
if (jump->flags & IS_COND)
490
code_ptr[-1] |= (sljit_ins)IMM_I16(5);
491
code_ptr[3] = code_ptr[0];
492
return code_ptr + 3;
493
}
494
495
static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
496
{
497
sljit_uw addr;
498
sljit_uw jump_addr = (sljit_uw)code_ptr;
499
sljit_sw diff;
500
SLJIT_UNUSED_ARG(executable_offset);
501
502
SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT));
503
if (jump->flags & JUMP_ADDR)
504
addr = jump->u.target;
505
else {
506
addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
507
508
if (jump->u.label->size > jump->addr)
509
jump_addr = (sljit_uw)(code + jump->addr);
510
}
511
512
diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);
513
514
if (diff >= S32_MIN && diff <= S32_MAX) {
515
SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
516
jump->flags |= PATCH_REL32;
517
return 1;
518
}
519
520
if (addr <= S32_MAX) {
521
SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
522
jump->flags |= PATCH_ABS32;
523
return 1;
524
}
525
526
if (addr <= S52_MAX) {
527
SLJIT_ASSERT(jump->flags >= ((sljit_uw)2 << JUMP_SIZE_SHIFT));
528
jump->flags |= PATCH_ABS52;
529
return 2;
530
}
531
532
SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT));
533
return 3;
534
}
535
536
static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump, sljit_sw executable_offset)
537
{
538
sljit_uw flags = jump->flags;
539
sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
540
sljit_ins *ins = (sljit_ins*)jump->addr;
541
sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : TMP_REG1;
542
SLJIT_UNUSED_ARG(executable_offset);
543
544
if (flags & PATCH_REL32) {
545
addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset);
546
547
SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX);
548
549
if ((addr & 0x800) != 0)
550
addr += 0x1000;
551
552
ins[0] = PCADDU12I | RD(reg) | IMM_I20(addr);
553
554
if (!(flags & JUMP_MOV_ADDR)) {
555
SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL);
556
ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2);
557
} else
558
ins[1] = ADDI_D | RD(reg) | RJ(reg) | IMM_I12(addr);
559
return;
560
}
561
562
if (flags & PATCH_ABS32) {
563
SLJIT_ASSERT(addr <= S32_MAX);
564
ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
565
} else if (flags & PATCH_ABS52) {
566
ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
567
ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);
568
ins += 1;
569
} else {
570
ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
571
ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);
572
ins[2] = LU52I_D | RD(reg) | RJ(reg) | IMM_I12(addr >> 52);
573
ins += 2;
574
}
575
576
if (!(flags & JUMP_MOV_ADDR)) {
577
SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL);
578
ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2);
579
} else
580
ins[1] = ORI | RD(reg) | RJ(reg) | IMM_I12(addr);
581
}
582
583
static void reduce_code_size(struct sljit_compiler *compiler)
584
{
585
struct sljit_label *label;
586
struct sljit_jump *jump;
587
struct sljit_const *const_;
588
SLJIT_NEXT_DEFINE_TYPES;
589
sljit_uw total_size;
590
sljit_uw size_reduce = 0;
591
sljit_sw diff;
592
593
label = compiler->labels;
594
jump = compiler->jumps;
595
const_ = compiler->consts;
596
597
SLJIT_NEXT_INIT_TYPES();
598
599
while (1) {
600
SLJIT_GET_NEXT_MIN();
601
602
if (next_min_addr == SLJIT_MAX_ADDRESS)
603
break;
604
605
if (next_min_addr == next_label_size) {
606
label->size -= size_reduce;
607
608
label = label->next;
609
next_label_size = SLJIT_GET_NEXT_SIZE(label);
610
}
611
612
if (next_min_addr == next_const_addr) {
613
const_->addr -= size_reduce;
614
const_ = const_->next;
615
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
616
continue;
617
}
618
619
if (next_min_addr != next_jump_addr)
620
continue;
621
622
jump->addr -= size_reduce;
623
if (!(jump->flags & JUMP_MOV_ADDR)) {
624
total_size = JUMP_MAX_SIZE;
625
626
if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {
627
if (jump->flags & JUMP_ADDR) {
628
if (jump->u.target <= S32_MAX)
629
total_size = 2;
630
else if (jump->u.target <= S52_MAX)
631
total_size = 3;
632
} else {
633
/* Unit size: instruction. */
634
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
635
if (jump->u.label->size > jump->addr) {
636
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
637
diff -= (sljit_sw)size_reduce;
638
}
639
640
if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH16_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH16_MIN / SSIZE_OF(ins)))
641
total_size = 0;
642
else if (diff >= (JUMP_MIN / SSIZE_OF(ins)) && diff <= (JUMP_MAX / SSIZE_OF(ins)))
643
total_size = 1;
644
else if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
645
total_size = 2;
646
}
647
}
648
649
size_reduce += JUMP_MAX_SIZE - total_size;
650
jump->flags |= total_size << JUMP_SIZE_SHIFT;
651
} else {
652
total_size = 3;
653
654
if (!(jump->flags & JUMP_ADDR)) {
655
/* Real size minus 1. Unit size: instruction. */
656
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
657
if (jump->u.label->size > jump->addr) {
658
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
659
diff -= (sljit_sw)size_reduce;
660
}
661
662
if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
663
total_size = 1;
664
} else if (jump->u.target < S32_MAX)
665
total_size = 1;
666
else if (jump->u.target <= S52_MAX)
667
total_size = 2;
668
669
size_reduce += 3 - total_size;
670
jump->flags |= total_size << JUMP_SIZE_SHIFT;
671
}
672
673
jump = jump->next;
674
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
675
}
676
677
compiler->size -= size_reduce;
678
}
679
680
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
681
{
682
struct sljit_memory_fragment *buf;
683
sljit_ins *code;
684
sljit_ins *code_ptr;
685
sljit_ins *buf_ptr;
686
sljit_ins *buf_end;
687
sljit_uw word_count;
688
SLJIT_NEXT_DEFINE_TYPES;
689
sljit_sw executable_offset;
690
sljit_uw addr;
691
692
struct sljit_label *label;
693
struct sljit_jump *jump;
694
struct sljit_const *const_;
695
696
CHECK_ERROR_PTR();
697
CHECK_PTR(check_sljit_generate_code(compiler));
698
699
reduce_code_size(compiler);
700
701
code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset);
702
PTR_FAIL_WITH_EXEC_IF(code);
703
704
reverse_buf(compiler);
705
buf = compiler->buf;
706
707
code_ptr = code;
708
word_count = 0;
709
label = compiler->labels;
710
jump = compiler->jumps;
711
const_ = compiler->consts;
712
SLJIT_NEXT_INIT_TYPES();
713
SLJIT_GET_NEXT_MIN();
714
715
do {
716
buf_ptr = (sljit_ins*)buf->memory;
717
buf_end = buf_ptr + (buf->used_size >> 2);
718
do {
719
*code_ptr = *buf_ptr++;
720
if (next_min_addr == word_count) {
721
SLJIT_ASSERT(!label || label->size >= word_count);
722
SLJIT_ASSERT(!jump || jump->addr >= word_count);
723
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
724
725
/* These structures are ordered by their address. */
726
if (next_min_addr == next_label_size) {
727
label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
728
label->size = (sljit_uw)(code_ptr - code);
729
label = label->next;
730
next_label_size = SLJIT_GET_NEXT_SIZE(label);
731
}
732
733
if (next_min_addr == next_jump_addr) {
734
if (!(jump->flags & JUMP_MOV_ADDR)) {
735
word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT);
736
code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
737
SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins)));
738
} else {
739
word_count += jump->flags >> JUMP_SIZE_SHIFT;
740
addr = (sljit_uw)code_ptr;
741
code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
742
jump->addr = addr;
743
}
744
jump = jump->next;
745
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
746
} else if (next_min_addr == next_const_addr) {
747
const_->addr = (sljit_uw)code_ptr;
748
const_ = const_->next;
749
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
750
}
751
752
SLJIT_GET_NEXT_MIN();
753
}
754
code_ptr++;
755
word_count++;
756
} while (buf_ptr < buf_end);
757
758
buf = buf->next;
759
} while (buf);
760
761
if (label && label->size == word_count) {
762
label->u.addr = (sljit_uw)code_ptr;
763
label->size = (sljit_uw)(code_ptr - code);
764
label = label->next;
765
}
766
767
SLJIT_ASSERT(!label);
768
SLJIT_ASSERT(!jump);
769
SLJIT_ASSERT(!const_);
770
SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
771
772
jump = compiler->jumps;
773
while (jump) {
774
do {
775
if (!(jump->flags & (PATCH_B | PATCH_J)) || (jump->flags & JUMP_MOV_ADDR)) {
776
load_addr_to_reg(jump, executable_offset);
777
break;
778
}
779
780
addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
781
buf_ptr = (sljit_ins *)jump->addr;
782
addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
783
784
if (jump->flags & PATCH_B) {
785
SLJIT_ASSERT((sljit_sw)addr >= BRANCH16_MIN && (sljit_sw)addr <= BRANCH16_MAX);
786
buf_ptr[0] |= (sljit_ins)IMM_I16(addr >> 2);
787
break;
788
}
789
790
SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX);
791
if (jump->flags & IS_CALL)
792
buf_ptr[0] = BL | (sljit_ins)IMM_I26(addr >> 2);
793
else
794
buf_ptr[0] = B | (sljit_ins)IMM_I26(addr >> 2);
795
} while (0);
796
jump = jump->next;
797
}
798
799
compiler->error = SLJIT_ERR_COMPILED;
800
compiler->executable_offset = executable_offset;
801
compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
802
803
code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
804
code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
805
806
SLJIT_CACHE_FLUSH(code, code_ptr);
807
SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
808
return code;
809
}
810
811
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
812
{
813
switch (feature_type)
814
{
815
case SLJIT_HAS_FPU:
816
#ifdef SLJIT_IS_FPU_AVAILABLE
817
return (SLJIT_IS_FPU_AVAILABLE) != 0;
818
#else
819
/* Available by default. */
820
return 1;
821
#endif
822
823
case SLJIT_HAS_LASX:
824
return (LOONGARCH_HWCAP_LASX & get_cpu_features(GET_HWCAP));
825
826
case SLJIT_HAS_SIMD:
827
return (LOONGARCH_HWCAP_LSX & get_cpu_features(GET_HWCAP));
828
829
case SLJIT_HAS_CLZ:
830
case SLJIT_HAS_CTZ:
831
case SLJIT_HAS_REV:
832
case SLJIT_HAS_ROT:
833
case SLJIT_HAS_PREFETCH:
834
case SLJIT_HAS_COPY_F32:
835
case SLJIT_HAS_COPY_F64:
836
case SLJIT_HAS_ATOMIC:
837
case SLJIT_HAS_MEMORY_BARRIER:
838
return 1;
839
840
default:
841
return 0;
842
}
843
}
844
845
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
846
{
847
SLJIT_UNUSED_ARG(type);
848
849
return 0;
850
}
851
852
/* --------------------------------------------------------------------- */
853
/* Entry, exit */
854
/* --------------------------------------------------------------------- */
855
856
/* Creates an index in data_transfer_insts array. */
857
#define LOAD_DATA 0x01
858
#define WORD_DATA 0x00
859
#define BYTE_DATA 0x02
860
#define HALF_DATA 0x04
861
#define INT_DATA 0x06
862
#define SIGNED_DATA 0x08
863
/* Separates integer and floating point registers */
864
#define GPR_REG 0x0f
865
#define DOUBLE_DATA 0x10
866
#define SINGLE_DATA 0x12
867
868
#define MEM_MASK 0x1f
869
870
#define ARG_TEST 0x00020
871
#define ALT_KEEP_CACHE 0x00040
872
#define CUMULATIVE_OP 0x00080
873
#define IMM_OP 0x00100
874
#define MOVE_OP 0x00200
875
#define SRC2_IMM 0x00400
876
877
#define UNUSED_DEST 0x00800
878
#define REG_DEST 0x01000
879
#define REG1_SOURCE 0x02000
880
#define REG2_SOURCE 0x04000
881
#define SLOW_SRC1 0x08000
882
#define SLOW_SRC2 0x10000
883
#define SLOW_DEST 0x20000
884
#define MEM_USE_TMP2 0x40000
885
886
#define STACK_STORE ST_D
887
#define STACK_LOAD LD_D
888
889
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm)
890
{
891
if (imm <= I12_MAX && imm >= I12_MIN)
892
return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(TMP_ZERO) | IMM_I12(imm));
893
894
if (imm <= 0x7fffffffl && imm >= -0x80000000l) {
895
FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
896
return push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm));
897
} else if (imm <= 0x7ffffffffffffl && imm >= -0x8000000000000l) {
898
FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
899
FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));
900
return push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5));
901
}
902
FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
903
FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));
904
FAIL_IF(push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5)));
905
return push_inst(compiler, LU52I_D | RD(dst_r) | RJ(dst_r) | IMM_I12(imm >> 52));
906
}
907
908
#define STACK_MAX_DISTANCE (-I12_MIN)
909
910
static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw);
911
912
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
913
sljit_s32 options, sljit_s32 arg_types,
914
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
915
{
916
sljit_s32 fscratches;
917
sljit_s32 fsaveds;
918
sljit_s32 i, tmp, offset;
919
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
920
921
CHECK_ERROR();
922
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));
923
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
924
925
scratches = ENTER_GET_REGS(scratches);
926
saveds = ENTER_GET_REGS(saveds);
927
fscratches = compiler->fscratches;
928
fsaveds = compiler->fsaveds;
929
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
930
local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
931
932
local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
933
compiler->local_size = local_size;
934
935
if (local_size <= STACK_MAX_DISTANCE) {
936
/* Frequent case. */
937
FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));
938
offset = local_size - SSIZE_OF(sw);
939
local_size = 0;
940
} else {
941
FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(STACK_MAX_DISTANCE)));
942
local_size -= STACK_MAX_DISTANCE;
943
944
if (local_size > STACK_MAX_DISTANCE)
945
FAIL_IF(load_immediate(compiler, TMP_REG1, local_size));
946
offset = STACK_MAX_DISTANCE - SSIZE_OF(sw);
947
}
948
949
FAIL_IF(push_inst(compiler, STACK_STORE | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));
950
951
tmp = SLJIT_S0 - saveds;
952
for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
953
offset -= SSIZE_OF(sw);
954
FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
955
}
956
957
for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
958
offset -= SSIZE_OF(sw);
959
FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
960
}
961
962
tmp = SLJIT_FS0 - fsaveds;
963
for (i = SLJIT_FS0; i > tmp; i--) {
964
offset -= SSIZE_OF(f64);
965
FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
966
}
967
968
for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
969
offset -= SSIZE_OF(f64);
970
FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
971
}
972
973
if (local_size > STACK_MAX_DISTANCE)
974
FAIL_IF(push_inst(compiler, SUB_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG1)));
975
else if (local_size > 0)
976
FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));
977
978
if (options & SLJIT_ENTER_REG_ARG)
979
return SLJIT_SUCCESS;
980
981
arg_types >>= SLJIT_ARG_SHIFT;
982
saved_arg_count = 0;
983
tmp = SLJIT_R0;
984
985
while (arg_types > 0) {
986
if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
987
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
988
FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_S0 - saved_arg_count) | RJ(tmp) | IMM_I12(0)));
989
saved_arg_count++;
990
}
991
tmp++;
992
}
993
994
arg_types >>= SLJIT_ARG_SHIFT;
995
}
996
997
return SLJIT_SUCCESS;
998
}
999
1000
#undef STACK_MAX_DISTANCE
1001
1002
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1003
sljit_s32 options, sljit_s32 arg_types,
1004
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
1005
{
1006
sljit_s32 fscratches;
1007
sljit_s32 fsaveds;
1008
1009
CHECK_ERROR();
1010
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));
1011
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
1012
1013
scratches = ENTER_GET_REGS(scratches);
1014
saveds = ENTER_GET_REGS(saveds);
1015
fscratches = compiler->fscratches;
1016
fsaveds = compiler->fsaveds;
1017
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
1018
local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
1019
1020
compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
1021
1022
return SLJIT_SUCCESS;
1023
}
1024
1025
#define STACK_MAX_DISTANCE (-I12_MIN - 16)
1026
1027
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
1028
{
1029
sljit_s32 i, tmp, offset;
1030
sljit_s32 local_size = compiler->local_size;
1031
1032
if (local_size > STACK_MAX_DISTANCE) {
1033
local_size -= STACK_MAX_DISTANCE;
1034
1035
if (local_size > STACK_MAX_DISTANCE) {
1036
FAIL_IF(load_immediate(compiler, TMP_REG2, local_size));
1037
FAIL_IF(push_inst(compiler, ADD_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG2)));
1038
} else
1039
FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size)));
1040
1041
local_size = STACK_MAX_DISTANCE;
1042
}
1043
1044
SLJIT_ASSERT(local_size > 0);
1045
1046
offset = local_size - SSIZE_OF(sw);
1047
if (!is_return_to)
1048
FAIL_IF(push_inst(compiler, STACK_LOAD | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));
1049
1050
tmp = SLJIT_S0 - compiler->saveds;
1051
for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
1052
offset -= SSIZE_OF(sw);
1053
FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1054
}
1055
1056
for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1057
offset -= SSIZE_OF(sw);
1058
FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1059
}
1060
1061
tmp = SLJIT_FS0 - compiler->fsaveds;
1062
for (i = SLJIT_FS0; i > tmp; i--) {
1063
offset -= SSIZE_OF(f64);
1064
FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1065
}
1066
1067
for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1068
offset -= SSIZE_OF(f64);
1069
FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1070
}
1071
1072
return push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size));
1073
}
1074
1075
#undef STACK_MAX_DISTANCE
1076
1077
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1078
{
1079
CHECK_ERROR();
1080
CHECK(check_sljit_emit_return_void(compiler));
1081
1082
FAIL_IF(emit_stack_frame_release(compiler, 0));
1083
return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
1084
}
1085
1086
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1087
sljit_s32 src, sljit_sw srcw)
1088
{
1089
CHECK_ERROR();
1090
CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1091
1092
if (src & SLJIT_MEM) {
1093
ADJUST_LOCAL_OFFSET(src, srcw);
1094
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
1095
src = TMP_REG1;
1096
srcw = 0;
1097
} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1098
FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));
1099
src = TMP_REG1;
1100
srcw = 0;
1101
}
1102
1103
FAIL_IF(emit_stack_frame_release(compiler, 1));
1104
1105
SLJIT_SKIP_CHECKS(compiler);
1106
return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1107
}
1108
1109
/* --------------------------------------------------------------------- */
1110
/* Operators */
1111
/* --------------------------------------------------------------------- */
1112
1113
static const sljit_ins data_transfer_insts[16 + 4] = {
1114
/* u w s */ ST_D /* st.d */,
1115
/* u w l */ LD_D /* ld.d */,
1116
/* u b s */ ST_B /* st.b */,
1117
/* u b l */ LD_BU /* ld.bu */,
1118
/* u h s */ ST_H /* st.h */,
1119
/* u h l */ LD_HU /* ld.hu */,
1120
/* u i s */ ST_W /* st.w */,
1121
/* u i l */ LD_WU /* ld.wu */,
1122
1123
/* s w s */ ST_D /* st.d */,
1124
/* s w l */ LD_D /* ld.d */,
1125
/* s b s */ ST_B /* st.b */,
1126
/* s b l */ LD_B /* ld.b */,
1127
/* s h s */ ST_H /* st.h */,
1128
/* s h l */ LD_H /* ld.h */,
1129
/* s i s */ ST_W /* st.w */,
1130
/* s i l */ LD_W /* ld.w */,
1131
1132
/* d s */ FST_D /* fst.d */,
1133
/* d l */ FLD_D /* fld.d */,
1134
/* s s */ FST_S /* fst.s */,
1135
/* s l */ FLD_S /* fld.s */,
1136
};
1137
1138
static const sljit_ins data_transfer_insts_x[16 + 4] = {
1139
/* u w s */ STX_D /* stx.d */,
1140
/* u w l */ LDX_D /* ldx.d */,
1141
/* u b s */ STX_B /* stx.b */,
1142
/* u b l */ LDX_BU /* ldx.bu */,
1143
/* u h s */ STX_H /* stx.h */,
1144
/* u h l */ LDX_HU /* ldx.hu */,
1145
/* u i s */ STX_W /* stx.w */,
1146
/* u i l */ LDX_WU /* ldx.wu */,
1147
1148
/* s w s */ STX_D /* stx.d */,
1149
/* s w l */ LDX_D /* ldx.d */,
1150
/* s b s */ STX_B /* stx.b */,
1151
/* s b l */ LDX_B /* ldx.b */,
1152
/* s h s */ STX_H /* stx.h */,
1153
/* s h l */ LDX_H /* ldx.h */,
1154
/* s i s */ STX_W /* stx.w */,
1155
/* s i l */ LDX_W /* ldx.w */,
1156
1157
/* d s */ FSTX_D /* fstx.d */,
1158
/* d l */ FLDX_D /* fldx.d */,
1159
/* s s */ FSTX_S /* fstx.s */,
1160
/* s l */ FLDX_S /* fldx.s */,
1161
};
1162
1163
static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1164
{
1165
sljit_ins ins;
1166
sljit_s32 base = arg & REG_MASK;
1167
1168
SLJIT_ASSERT(arg & SLJIT_MEM);
1169
1170
if (arg & OFFS_REG_MASK) {
1171
sljit_s32 offs = OFFS_REG(arg);
1172
1173
SLJIT_ASSERT(!argw);
1174
ins = data_transfer_insts_x[flags & MEM_MASK] |
1175
((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |
1176
RJ(base) | RK(offs);
1177
} else {
1178
SLJIT_ASSERT(argw <= 0xfff && argw >= I12_MIN);
1179
1180
ins = data_transfer_insts[flags & MEM_MASK] |
1181
((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |
1182
RJ(base) | IMM_I12(argw);
1183
}
1184
return push_inst(compiler, ins);
1185
}
1186
1187
/* Can perform an operation using at most 1 instruction. */
1188
static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1189
{
1190
SLJIT_ASSERT(arg & SLJIT_MEM);
1191
1192
/* argw == 0 (ldx/stx rd, rj, rk) can be used.
1193
* argw in [-2048, 2047] (ld/st rd, rj, imm) can be used. */
1194
if (!argw || (!(arg & OFFS_REG_MASK) && (argw <= I12_MAX && argw >= I12_MIN))) {
1195
/* Works for both absolute and relative addresses. */
1196
if (SLJIT_UNLIKELY(flags & ARG_TEST))
1197
return 1;
1198
1199
FAIL_IF(push_mem_inst(compiler, flags, reg, arg, argw));
1200
return -1;
1201
}
1202
return 0;
1203
}
1204
1205
#define TO_ARGW_HI(argw) (((argw) & ~0xfff) + (((argw) & 0x800) ? 0x1000 : 0))
1206
1207
/* See getput_arg below.
1208
Note: can_cache is called only for binary operators. */
1209
static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1210
{
1211
SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
1212
1213
if (arg & OFFS_REG_MASK)
1214
return 0;
1215
1216
if (arg == next_arg) {
1217
if (((next_argw - argw) <= I12_MAX && (next_argw - argw) >= I12_MIN)
1218
|| TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw))
1219
return 1;
1220
return 0;
1221
}
1222
1223
return 0;
1224
}
1225
1226
/* Emit the necessary instructions. See can_cache above. */
1227
static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1228
{
1229
sljit_s32 base = arg & REG_MASK;
1230
sljit_s32 tmp_r = (flags & MEM_USE_TMP2) ? TMP_REG2 : TMP_REG1;
1231
sljit_sw offset;
1232
1233
SLJIT_ASSERT(arg & SLJIT_MEM);
1234
if (!(next_arg & SLJIT_MEM)) {
1235
next_arg = 0;
1236
next_argw = 0;
1237
}
1238
1239
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1240
argw &= 0x3;
1241
1242
if (SLJIT_UNLIKELY(argw))
1243
FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));
1244
return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);
1245
}
1246
1247
if (compiler->cache_arg == arg && argw - compiler->cache_argw <= I12_MAX && argw - compiler->cache_argw >= I12_MIN)
1248
return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), argw - compiler->cache_argw);
1249
1250
if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw <= I12_MAX) && (argw - compiler->cache_argw >= I12_MIN)) {
1251
offset = argw - compiler->cache_argw;
1252
} else {
1253
sljit_sw argw_hi=TO_ARGW_HI(argw);
1254
compiler->cache_arg = SLJIT_MEM;
1255
1256
if (next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN && argw_hi != TO_ARGW_HI(next_argw)) {
1257
FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1258
compiler->cache_argw = argw;
1259
offset = 0;
1260
} else {
1261
FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi));
1262
compiler->cache_argw = argw_hi;
1263
offset = argw & 0xfff;
1264
argw = argw_hi;
1265
}
1266
}
1267
1268
if (!base)
1269
return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);
1270
1271
if (arg == next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN) {
1272
compiler->cache_arg = arg;
1273
FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(base)));
1274
return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);
1275
}
1276
1277
if (!offset)
1278
return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);
1279
1280
FAIL_IF(push_inst(compiler, ADD_D | RD(tmp_r) | RJ(TMP_REG3) | RK(base)));
1281
return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), offset);
1282
}
1283
1284
static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1285
{
1286
sljit_s32 base = arg & REG_MASK;
1287
sljit_s32 tmp_r = TMP_REG1;
1288
1289
if (getput_arg_fast(compiler, flags, reg, arg, argw))
1290
return compiler->error;
1291
1292
if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
1293
tmp_r = reg;
1294
1295
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1296
argw &= 0x3;
1297
1298
if (SLJIT_UNLIKELY(argw))
1299
FAIL_IF(push_inst(compiler, SLLI_D | RD(tmp_r) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));
1300
return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);
1301
} else {
1302
FAIL_IF(load_immediate(compiler, tmp_r, argw));
1303
1304
if (base != 0)
1305
return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);
1306
return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), 0);
1307
}
1308
}
1309
1310
static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
1311
{
1312
if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1313
return compiler->error;
1314
return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1315
}
1316
1317
#define IMM_EXTEND(v) (IMM_I12((op & SLJIT_32) ? (v) : (32 + (v))))
1318
1319
/* andi/ori/xori are zero-extended */
1320
#define EMIT_LOGICAL(op_imm, op_reg) \
1321
if (flags & SRC2_IMM) { \
1322
if (op & SLJIT_SET_Z) {\
1323
FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1324
FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); \
1325
} \
1326
if (!(flags & UNUSED_DEST)) { \
1327
if (dst == src1) { \
1328
FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1329
FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(TMP_REG1))); \
1330
} else { \
1331
FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1332
FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(dst))); \
1333
} \
1334
} \
1335
} else { \
1336
if (op & SLJIT_SET_Z) \
1337
FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); \
1338
if (!(flags & UNUSED_DEST)) \
1339
FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2))); \
1340
} \
1341
while (0)
1342
1343
#define EMIT_SHIFT(imm, reg) \
1344
op_imm = (imm); \
1345
op_reg = (reg)
1346
1347
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1348
sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
1349
{
1350
sljit_s32 is_overflow, is_carry, carry_src_r, is_handled, reg;
1351
sljit_ins op_imm, op_reg;
1352
sljit_ins word_size = ((op & SLJIT_32) ? 32 : 64);
1353
1354
switch (GET_OPCODE(op)) {
1355
case SLJIT_MOV:
1356
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1357
if (dst != src2)
1358
return push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src2) | IMM_I12(0));
1359
return SLJIT_SUCCESS;
1360
1361
case SLJIT_MOV_U8:
1362
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1363
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1364
return push_inst(compiler, ANDI | RD(dst) | RJ(src2) | IMM_I12(0xff));
1365
SLJIT_ASSERT(dst == src2);
1366
return SLJIT_SUCCESS;
1367
1368
case SLJIT_MOV_S8:
1369
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1370
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1371
return push_inst(compiler, EXT_W_B | RD(dst) | RJ(src2));
1372
SLJIT_ASSERT(dst == src2);
1373
return SLJIT_SUCCESS;
1374
1375
case SLJIT_MOV_U16:
1376
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1377
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1378
return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(src2) | (15 << 16));
1379
SLJIT_ASSERT(dst == src2);
1380
return SLJIT_SUCCESS;
1381
1382
case SLJIT_MOV_S16:
1383
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1384
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1385
return push_inst(compiler, EXT_W_H | RD(dst) | RJ(src2));
1386
SLJIT_ASSERT(dst == src2);
1387
return SLJIT_SUCCESS;
1388
1389
case SLJIT_MOV_U32:
1390
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1391
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1392
return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(src2) | (31 << 16));
1393
SLJIT_ASSERT(dst == src2);
1394
return SLJIT_SUCCESS;
1395
1396
case SLJIT_MOV_S32:
1397
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1398
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1399
return push_inst(compiler, SLLI_W | RD(dst) | RJ(src2) | IMM_I12(0));
1400
SLJIT_ASSERT(dst == src2);
1401
return SLJIT_SUCCESS;
1402
1403
case SLJIT_CLZ:
1404
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1405
return push_inst(compiler, INST(CLZ, op) | RD(dst) | RJ(src2));
1406
1407
case SLJIT_CTZ:
1408
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1409
return push_inst(compiler, INST(CTZ, op) | RD(dst) | RJ(src2));
1410
1411
case SLJIT_REV:
1412
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1413
return push_inst(compiler, ((op & SLJIT_32) ? REVB_2W : REVB_D) | RD(dst) | RJ(src2));
1414
1415
case SLJIT_REV_S16:
1416
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1417
FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));
1418
return push_inst(compiler, EXT_W_H | RD(dst) | RJ(dst));
1419
1420
case SLJIT_REV_U16:
1421
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1422
FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));
1423
return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(dst) | (15 << 16));
1424
1425
case SLJIT_REV_S32:
1426
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1);
1427
FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
1428
return push_inst(compiler, SLLI_W | RD(dst) | RJ(dst) | IMM_I12(0));
1429
1430
case SLJIT_REV_U32:
1431
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1);
1432
FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
1433
return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(dst) | (31 << 16));
1434
1435
case SLJIT_ADD:
1436
/* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */
1437
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1438
carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1439
1440
if (flags & SRC2_IMM) {
1441
if (is_overflow) {
1442
if (src2 >= 0)
1443
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1444
else {
1445
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(-1)));
1446
FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));
1447
}
1448
} else if (op & SLJIT_SET_Z)
1449
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1450
1451
/* Only the zero flag is needed. */
1452
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1453
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(src2)));
1454
} else {
1455
if (is_overflow)
1456
FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1457
else if (op & SLJIT_SET_Z)
1458
FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1459
1460
if (is_overflow || carry_src_r != 0) {
1461
if (src1 != dst)
1462
carry_src_r = (sljit_s32)src1;
1463
else if (src2 != dst)
1464
carry_src_r = (sljit_s32)src2;
1465
else {
1466
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(0)));
1467
carry_src_r = OTHER_FLAG;
1468
}
1469
}
1470
1471
/* Only the zero flag is needed. */
1472
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1473
FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src1) | RK(src2)));
1474
}
1475
1476
/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1477
if (is_overflow || carry_src_r != 0) {
1478
if (flags & SRC2_IMM)
1479
FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(dst) | IMM_I12(src2)));
1480
else
1481
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(carry_src_r)));
1482
}
1483
1484
if (!is_overflow)
1485
return SLJIT_SUCCESS;
1486
1487
FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));
1488
if (op & SLJIT_SET_Z)
1489
FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));
1490
FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));
1491
return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));
1492
1493
case SLJIT_ADDC:
1494
carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1495
1496
if (flags & SRC2_IMM) {
1497
FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(src1) | IMM_I12(src2)));
1498
} else {
1499
if (carry_src_r != 0) {
1500
if (src1 != dst)
1501
carry_src_r = (sljit_s32)src1;
1502
else if (src2 != dst)
1503
carry_src_r = (sljit_s32)src2;
1504
else {
1505
FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1506
carry_src_r = EQUAL_FLAG;
1507
}
1508
}
1509
1510
FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(src1) | RK(src2)));
1511
}
1512
1513
/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1514
if (carry_src_r != 0) {
1515
if (flags & SRC2_IMM)
1516
FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(src2)));
1517
else
1518
FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(dst) | RK(carry_src_r)));
1519
}
1520
1521
FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));
1522
1523
if (carry_src_r == 0)
1524
return SLJIT_SUCCESS;
1525
1526
/* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
1527
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG)));
1528
/* Set carry flag. */
1529
return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(EQUAL_FLAG));
1530
1531
case SLJIT_SUB:
1532
if ((flags & SRC2_IMM) && src2 == I12_MIN) {
1533
FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));
1534
src2 = TMP_REG2;
1535
flags &= ~SRC2_IMM;
1536
}
1537
1538
is_handled = 0;
1539
1540
if (flags & SRC2_IMM) {
1541
if (GET_FLAG_TYPE(op) == SLJIT_LESS) {
1542
FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1543
is_handled = 1;
1544
} else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) {
1545
FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1546
is_handled = 1;
1547
}
1548
}
1549
1550
if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
1551
is_handled = 1;
1552
1553
if (flags & SRC2_IMM) {
1554
reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
1555
FAIL_IF(push_inst(compiler, ADDI_D | RD(reg) | RJ(TMP_ZERO) | IMM_I12(src2)));
1556
src2 = reg;
1557
flags &= ~SRC2_IMM;
1558
}
1559
1560
switch (GET_FLAG_TYPE(op)) {
1561
case SLJIT_LESS:
1562
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1563
break;
1564
case SLJIT_GREATER:
1565
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));
1566
break;
1567
case SLJIT_SIG_LESS:
1568
FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1569
break;
1570
case SLJIT_SIG_GREATER:
1571
FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));
1572
break;
1573
}
1574
}
1575
1576
if (is_handled) {
1577
if (flags & SRC2_IMM) {
1578
if (op & SLJIT_SET_Z)
1579
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));
1580
if (!(flags & UNUSED_DEST))
1581
return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2));
1582
} else {
1583
if (op & SLJIT_SET_Z)
1584
FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1585
if (!(flags & UNUSED_DEST))
1586
return push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2));
1587
}
1588
return SLJIT_SUCCESS;
1589
}
1590
1591
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1592
is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1593
1594
if (flags & SRC2_IMM) {
1595
if (is_overflow) {
1596
if (src2 >= 0)
1597
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1598
else {
1599
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-1)));
1600
FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));
1601
}
1602
} else if (op & SLJIT_SET_Z)
1603
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));
1604
1605
if (is_overflow || is_carry)
1606
FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1607
1608
/* Only the zero flag is needed. */
1609
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1610
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));
1611
} else {
1612
if (is_overflow)
1613
FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1614
else if (op & SLJIT_SET_Z)
1615
FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1616
1617
if (is_overflow || is_carry)
1618
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1619
1620
/* Only the zero flag is needed. */
1621
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1622
FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));
1623
}
1624
1625
if (!is_overflow)
1626
return SLJIT_SUCCESS;
1627
1628
FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));
1629
if (op & SLJIT_SET_Z)
1630
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));
1631
FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));
1632
return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));
1633
1634
case SLJIT_SUBC:
1635
if ((flags & SRC2_IMM) && src2 == I12_MIN) {
1636
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));
1637
src2 = TMP_REG2;
1638
flags &= ~SRC2_IMM;
1639
}
1640
1641
is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1642
1643
if (flags & SRC2_IMM) {
1644
if (is_carry)
1645
FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1646
1647
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));
1648
} else {
1649
if (is_carry)
1650
FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1651
1652
FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));
1653
}
1654
1655
if (is_carry)
1656
FAIL_IF(push_inst(compiler, SLTU | RD(TMP_REG1) | RJ(dst) | RK(OTHER_FLAG)));
1657
1658
FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));
1659
1660
if (!is_carry)
1661
return SLJIT_SUCCESS;
1662
1663
return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(TMP_REG1));
1664
1665
case SLJIT_MUL:
1666
SLJIT_ASSERT(!(flags & SRC2_IMM));
1667
1668
if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW)
1669
return push_inst(compiler, INST(MUL, op) | RD(dst) | RJ(src1) | RK(src2));
1670
1671
if (op & SLJIT_32) {
1672
FAIL_IF(push_inst(compiler, MUL_D | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1673
FAIL_IF(push_inst(compiler, MUL_W | RD(dst) | RJ(src1) | RK(src2)));
1674
return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG));
1675
}
1676
1677
FAIL_IF(push_inst(compiler, MULH_D | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1678
FAIL_IF(push_inst(compiler, MUL_D | RD(dst) | RJ(src1) | RK(src2)));
1679
FAIL_IF(push_inst(compiler, SRAI_D | RD(OTHER_FLAG) | RJ(dst) | IMM_I12((63))));
1680
return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(OTHER_FLAG));
1681
1682
case SLJIT_AND:
1683
EMIT_LOGICAL(ANDI, AND);
1684
return SLJIT_SUCCESS;
1685
1686
case SLJIT_OR:
1687
EMIT_LOGICAL(ORI, OR);
1688
return SLJIT_SUCCESS;
1689
1690
case SLJIT_XOR:
1691
EMIT_LOGICAL(XORI, XOR);
1692
return SLJIT_SUCCESS;
1693
1694
case SLJIT_SHL:
1695
case SLJIT_MSHL:
1696
if (op & SLJIT_32) {
1697
EMIT_SHIFT(SLLI_W, SLL_W);
1698
} else {
1699
EMIT_SHIFT(SLLI_D, SLL_D);
1700
}
1701
break;
1702
1703
case SLJIT_LSHR:
1704
case SLJIT_MLSHR:
1705
if (op & SLJIT_32) {
1706
EMIT_SHIFT(SRLI_W, SRL_W);
1707
} else {
1708
EMIT_SHIFT(SRLI_D, SRL_D);
1709
}
1710
break;
1711
1712
case SLJIT_ASHR:
1713
case SLJIT_MASHR:
1714
if (op & SLJIT_32) {
1715
EMIT_SHIFT(SRAI_W, SRA_W);
1716
} else {
1717
EMIT_SHIFT(SRAI_D, SRA_D);
1718
}
1719
break;
1720
1721
case SLJIT_ROTL:
1722
case SLJIT_ROTR:
1723
if (flags & SRC2_IMM) {
1724
SLJIT_ASSERT(src2 != 0);
1725
1726
if (GET_OPCODE(op) == SLJIT_ROTL)
1727
src2 = word_size - src2;
1728
return push_inst(compiler, INST(ROTRI, op) | RD(dst) | RJ(src1) | IMM_I12(src2));
1729
}
1730
1731
if (src2 == TMP_ZERO) {
1732
if (dst != src1)
1733
return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(0));
1734
return SLJIT_SUCCESS;
1735
}
1736
1737
if (GET_OPCODE(op) == SLJIT_ROTL) {
1738
FAIL_IF(push_inst(compiler, INST(SUB, op)| RD(OTHER_FLAG) | RJ(TMP_ZERO) | RK(src2)));
1739
src2 = OTHER_FLAG;
1740
}
1741
return push_inst(compiler, INST(ROTR, op) | RD(dst) | RJ(src1) | RK(src2));
1742
1743
default:
1744
SLJIT_UNREACHABLE();
1745
return SLJIT_SUCCESS;
1746
}
1747
1748
if (flags & SRC2_IMM) {
1749
if (op & SLJIT_SET_Z)
1750
FAIL_IF(push_inst(compiler, op_imm | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1751
1752
if (flags & UNUSED_DEST)
1753
return SLJIT_SUCCESS;
1754
return push_inst(compiler, op_imm | RD(dst) | RJ(src1) | IMM_I12(src2));
1755
}
1756
1757
if (op & SLJIT_SET_Z)
1758
FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1759
1760
if (flags & UNUSED_DEST)
1761
return SLJIT_SUCCESS;
1762
return push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2));
1763
}
1764
1765
#undef IMM_EXTEND
1766
1767
static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1768
sljit_s32 dst, sljit_sw dstw,
1769
sljit_s32 src1, sljit_sw src1w,
1770
sljit_s32 src2, sljit_sw src2w)
1771
{
1772
/* arg1 goes to TMP_REG1 or src reg
1773
arg2 goes to TMP_REG2, imm or src reg
1774
TMP_REG3 can be used for caching
1775
result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1776
sljit_s32 dst_r = TMP_REG2;
1777
sljit_s32 src1_r;
1778
sljit_sw src2_r = 0;
1779
sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2;
1780
1781
if (!(flags & ALT_KEEP_CACHE)) {
1782
compiler->cache_arg = 0;
1783
compiler->cache_argw = 0;
1784
}
1785
1786
if (dst == 0) {
1787
SLJIT_ASSERT(HAS_FLAGS(op));
1788
flags |= UNUSED_DEST;
1789
dst = TMP_REG2;
1790
} else if (FAST_IS_REG(dst)) {
1791
dst_r = dst;
1792
flags |= REG_DEST;
1793
if (flags & MOVE_OP)
1794
src2_tmp_reg = dst_r;
1795
} else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
1796
flags |= SLOW_DEST;
1797
1798
if (flags & IMM_OP) {
1799
if (src2 == SLJIT_IMM && src2w != 0 && src2w <= I12_MAX && src2w >= I12_MIN) {
1800
flags |= SRC2_IMM;
1801
src2_r = src2w;
1802
} else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) {
1803
flags |= SRC2_IMM;
1804
src2_r = src1w;
1805
1806
/* And swap arguments. */
1807
src1 = src2;
1808
src1w = src2w;
1809
src2 = SLJIT_IMM;
1810
/* src2w = src2_r unneeded. */
1811
}
1812
}
1813
1814
/* Source 1. */
1815
if (FAST_IS_REG(src1)) {
1816
src1_r = src1;
1817
flags |= REG1_SOURCE;
1818
} else if (src1 == SLJIT_IMM) {
1819
if (src1w) {
1820
FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1821
src1_r = TMP_REG1;
1822
}
1823
else
1824
src1_r = TMP_ZERO;
1825
} else {
1826
if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))
1827
FAIL_IF(compiler->error);
1828
else
1829
flags |= SLOW_SRC1;
1830
src1_r = TMP_REG1;
1831
}
1832
1833
/* Source 2. */
1834
if (FAST_IS_REG(src2)) {
1835
src2_r = src2;
1836
flags |= REG2_SOURCE;
1837
if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
1838
dst_r = (sljit_s32)src2_r;
1839
} else if (src2 == SLJIT_IMM) {
1840
if (!(flags & SRC2_IMM)) {
1841
if (src2w) {
1842
FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w));
1843
src2_r = src2_tmp_reg;
1844
} else {
1845
src2_r = TMP_ZERO;
1846
if (flags & MOVE_OP) {
1847
if (dst & SLJIT_MEM)
1848
dst_r = 0;
1849
else
1850
op = SLJIT_MOV;
1851
}
1852
}
1853
}
1854
} else {
1855
if (getput_arg_fast(compiler, flags | LOAD_DATA, src2_tmp_reg, src2, src2w))
1856
FAIL_IF(compiler->error);
1857
else
1858
flags |= SLOW_SRC2;
1859
1860
src2_r = src2_tmp_reg;
1861
}
1862
1863
if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
1864
SLJIT_ASSERT(src2_r == TMP_REG2);
1865
if ((flags & SLOW_DEST) && !can_cache(src2, src2w, src1, src1w) && can_cache(src2, src2w, dst, dstw)) {
1866
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1867
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | MEM_USE_TMP2, TMP_REG2, src2, src2w, dst, dstw));
1868
} else {
1869
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
1870
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1871
}
1872
}
1873
else if (flags & SLOW_SRC1)
1874
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1875
else if (flags & SLOW_SRC2)
1876
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | ((src1_r == TMP_REG1) ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, dst, dstw));
1877
1878
FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1879
1880
if (dst & SLJIT_MEM) {
1881
if (!(flags & SLOW_DEST)) {
1882
getput_arg_fast(compiler, flags, dst_r, dst, dstw);
1883
return compiler->error;
1884
}
1885
return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);
1886
}
1887
1888
return SLJIT_SUCCESS;
1889
}
1890
1891
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1892
{
1893
CHECK_ERROR();
1894
CHECK(check_sljit_emit_op0(compiler, op));
1895
1896
switch (GET_OPCODE(op)) {
1897
case SLJIT_BREAKPOINT:
1898
return push_inst(compiler, BREAK);
1899
case SLJIT_NOP:
1900
return push_inst(compiler, ANDI | RD(TMP_ZERO) | RJ(TMP_ZERO) | IMM_I12(0));
1901
case SLJIT_LMUL_UW:
1902
FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));
1903
FAIL_IF(push_inst(compiler, MULH_DU | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1904
return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));
1905
case SLJIT_LMUL_SW:
1906
FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));
1907
FAIL_IF(push_inst(compiler, MULH_D | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1908
return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));
1909
case SLJIT_DIVMOD_UW:
1910
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));
1911
FAIL_IF(push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1912
return push_inst(compiler, ((op & SLJIT_32)? MOD_WU: MOD_DU) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));
1913
case SLJIT_DIVMOD_SW:
1914
FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));
1915
FAIL_IF(push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1916
return push_inst(compiler, INST(MOD, op) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));
1917
case SLJIT_DIV_UW:
1918
return push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));
1919
case SLJIT_DIV_SW:
1920
return push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));
1921
case SLJIT_MEMORY_BARRIER:
1922
return push_inst(compiler, DBAR);
1923
case SLJIT_ENDBR:
1924
case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1925
return SLJIT_SUCCESS;
1926
}
1927
1928
SLJIT_UNREACHABLE();
1929
return SLJIT_ERR_UNSUPPORTED;
1930
}
1931
1932
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1933
sljit_s32 dst, sljit_sw dstw,
1934
sljit_s32 src, sljit_sw srcw)
1935
{
1936
sljit_s32 flags = 0;
1937
1938
CHECK_ERROR();
1939
CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1940
ADJUST_LOCAL_OFFSET(dst, dstw);
1941
ADJUST_LOCAL_OFFSET(src, srcw);
1942
1943
if (op & SLJIT_32)
1944
flags = INT_DATA | SIGNED_DATA;
1945
1946
switch (GET_OPCODE(op)) {
1947
case SLJIT_MOV:
1948
case SLJIT_MOV_P:
1949
return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, srcw);
1950
1951
case SLJIT_MOV_U32:
1952
return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw);
1953
1954
case SLJIT_MOV_S32:
1955
/* Logical operators have no W variant, so sign extended input is necessary for them. */
1956
case SLJIT_MOV32:
1957
return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw);
1958
1959
case SLJIT_MOV_U8:
1960
return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
1961
1962
case SLJIT_MOV_S8:
1963
return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
1964
1965
case SLJIT_MOV_U16:
1966
return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
1967
1968
case SLJIT_MOV_S16:
1969
return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
1970
1971
case SLJIT_CLZ:
1972
case SLJIT_CTZ:
1973
case SLJIT_REV:
1974
return emit_op(compiler, op, flags, dst, dstw, TMP_ZERO, 0, src, srcw);
1975
1976
case SLJIT_REV_U16:
1977
case SLJIT_REV_S16:
1978
return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);
1979
1980
case SLJIT_REV_U32:
1981
case SLJIT_REV_S32:
1982
return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);
1983
}
1984
1985
SLJIT_UNREACHABLE();
1986
return SLJIT_SUCCESS;
1987
}
1988
1989
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1990
sljit_s32 dst, sljit_sw dstw,
1991
sljit_s32 src1, sljit_sw src1w,
1992
sljit_s32 src2, sljit_sw src2w)
1993
{
1994
sljit_s32 flags = 0;
1995
1996
CHECK_ERROR();
1997
CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
1998
ADJUST_LOCAL_OFFSET(dst, dstw);
1999
ADJUST_LOCAL_OFFSET(src1, src1w);
2000
ADJUST_LOCAL_OFFSET(src2, src2w);
2001
2002
if (op & SLJIT_32) {
2003
flags |= INT_DATA | SIGNED_DATA;
2004
if (src1 == SLJIT_IMM)
2005
src1w = (sljit_s32)src1w;
2006
if (src2 == SLJIT_IMM)
2007
src2w = (sljit_s32)src2w;
2008
}
2009
2010
2011
switch (GET_OPCODE(op)) {
2012
case SLJIT_ADD:
2013
case SLJIT_ADDC:
2014
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
2015
return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2016
2017
case SLJIT_SUB:
2018
case SLJIT_SUBC:
2019
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
2020
return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2021
2022
case SLJIT_MUL:
2023
compiler->status_flags_state = 0;
2024
return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
2025
2026
case SLJIT_AND:
2027
case SLJIT_OR:
2028
case SLJIT_XOR:
2029
return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2030
2031
case SLJIT_SHL:
2032
case SLJIT_MSHL:
2033
case SLJIT_LSHR:
2034
case SLJIT_MLSHR:
2035
case SLJIT_ASHR:
2036
case SLJIT_MASHR:
2037
case SLJIT_ROTL:
2038
case SLJIT_ROTR:
2039
if (src2 == SLJIT_IMM) {
2040
if (op & SLJIT_32)
2041
src2w &= 0x1f;
2042
else
2043
src2w &= 0x3f;
2044
}
2045
2046
return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2047
}
2048
2049
SLJIT_UNREACHABLE();
2050
return SLJIT_SUCCESS;
2051
}
2052
2053
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2054
sljit_s32 src1, sljit_sw src1w,
2055
sljit_s32 src2, sljit_sw src2w)
2056
{
2057
CHECK_ERROR();
2058
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2059
2060
SLJIT_SKIP_CHECKS(compiler);
2061
return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w);
2062
}
2063
2064
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2065
sljit_s32 dst_reg,
2066
sljit_s32 src1, sljit_sw src1w,
2067
sljit_s32 src2, sljit_sw src2w)
2068
{
2069
CHECK_ERROR();
2070
CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2071
2072
switch (GET_OPCODE(op)) {
2073
case SLJIT_MULADD:
2074
SLJIT_SKIP_CHECKS(compiler);
2075
FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w));
2076
return push_inst(compiler, ADD_D | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG2));
2077
}
2078
2079
return SLJIT_SUCCESS;
2080
}
2081
2082
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2083
sljit_s32 dst_reg,
2084
sljit_s32 src1_reg,
2085
sljit_s32 src2_reg,
2086
sljit_s32 src3, sljit_sw src3w)
2087
{
2088
sljit_s32 is_left;
2089
sljit_ins ins1, ins2, ins3;
2090
sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2091
sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
2092
2093
2094
CHECK_ERROR();
2095
CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2096
2097
is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
2098
2099
if (src1_reg == src2_reg) {
2100
SLJIT_SKIP_CHECKS(compiler);
2101
return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
2102
}
2103
2104
ADJUST_LOCAL_OFFSET(src3, src3w);
2105
2106
if (src3 == SLJIT_IMM) {
2107
src3w &= bit_length - 1;
2108
2109
if (src3w == 0)
2110
return SLJIT_SUCCESS;
2111
2112
if (is_left) {
2113
ins1 = INST(SLLI, op) | IMM_I12(src3w);
2114
src3w = bit_length - src3w;
2115
ins2 = INST(SRLI, op) | IMM_I12(src3w);
2116
} else {
2117
ins1 = INST(SRLI, op) | IMM_I12(src3w);
2118
src3w = bit_length - src3w;
2119
ins2 = INST(SLLI, op) | IMM_I12(src3w);
2120
}
2121
2122
FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg)));
2123
FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg)));
2124
return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));
2125
}
2126
2127
if (src3 & SLJIT_MEM) {
2128
FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src3, src3w));
2129
src3 = TMP_REG2;
2130
} else if (dst_reg == src3) {
2131
push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(src3) | IMM_I12(0));
2132
src3 = TMP_REG2;
2133
}
2134
2135
if (is_left) {
2136
ins1 = INST(SLL, op);
2137
ins2 = INST(SRLI, op);
2138
ins3 = INST(SRL, op);
2139
} else {
2140
ins1 = INST(SRL, op);
2141
ins2 = INST(SLLI, op);
2142
ins3 = INST(SLL, op);
2143
}
2144
2145
FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg) | RK(src3)));
2146
2147
if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
2148
FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg) | IMM_I12(1)));
2149
FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RJ(src3) | IMM_I12((sljit_ins)bit_length - 1)));
2150
src2_reg = TMP_REG1;
2151
} else
2152
FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | RK(src3)));
2153
2154
FAIL_IF(push_inst(compiler, ins3 | RD(TMP_REG1) | RJ(src2_reg) | RK(TMP_REG2)));
2155
return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));
2156
}
2157
2158
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2159
sljit_s32 src, sljit_sw srcw)
2160
{
2161
sljit_s32 base = src & REG_MASK;
2162
2163
CHECK_ERROR();
2164
CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2165
ADJUST_LOCAL_OFFSET(src, srcw);
2166
2167
switch (op) {
2168
case SLJIT_FAST_RETURN:
2169
if (FAST_IS_REG(src))
2170
FAIL_IF(push_inst(compiler, ADDI_D | RD(RETURN_ADDR_REG) | RJ(src) | IMM_I12(0)));
2171
else
2172
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
2173
2174
return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
2175
case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2176
return SLJIT_SUCCESS;
2177
case SLJIT_PREFETCH_L1:
2178
case SLJIT_PREFETCH_L2:
2179
case SLJIT_PREFETCH_L3:
2180
case SLJIT_PREFETCH_ONCE:
2181
if (SLJIT_UNLIKELY(src & OFFS_REG_MASK)) {
2182
srcw &= 0x3;
2183
if (SLJIT_UNLIKELY(srcw))
2184
FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(src)) | IMM_I12(srcw)));
2185
FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));
2186
} else {
2187
if (base && srcw <= I12_MAX && srcw >= I12_MIN)
2188
return push_inst(compiler,PRELD | RJ(base) | IMM_I12(srcw));
2189
2190
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2191
if (base != 0)
2192
FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));
2193
}
2194
return push_inst(compiler, PRELD | RD(0) | RJ(TMP_REG1));
2195
}
2196
return SLJIT_SUCCESS;
2197
}
2198
2199
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2200
sljit_s32 dst, sljit_sw dstw)
2201
{
2202
sljit_s32 dst_r;
2203
2204
CHECK_ERROR();
2205
CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2206
ADJUST_LOCAL_OFFSET(dst, dstw);
2207
2208
switch (op) {
2209
case SLJIT_FAST_ENTER:
2210
if (FAST_IS_REG(dst))
2211
return push_inst(compiler, ADDI_D | RD(dst) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
2212
2213
SLJIT_ASSERT(RETURN_ADDR_REG == TMP_REG2);
2214
break;
2215
case SLJIT_GET_RETURN_ADDRESS:
2216
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2217
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size - SSIZE_OF(sw)));
2218
break;
2219
}
2220
2221
if (dst & SLJIT_MEM)
2222
return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw);
2223
2224
return SLJIT_SUCCESS;
2225
}
2226
2227
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2228
{
2229
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2230
2231
if (type == SLJIT_GP_REGISTER)
2232
return reg_map[reg];
2233
2234
if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256)
2235
return -1;
2236
2237
return freg_map[reg];
2238
}
2239
2240
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2241
void *instruction, sljit_u32 size)
2242
{
2243
SLJIT_UNUSED_ARG(size);
2244
CHECK_ERROR();
2245
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2246
2247
return push_inst(compiler, *(sljit_ins*)instruction);
2248
}
2249
2250
/* --------------------------------------------------------------------- */
2251
/* Floating point operators */
2252
/* --------------------------------------------------------------------- */
2253
#define SET_COND(cond) (sljit_ins)(cond << 15)
2254
2255
#define COND_CUN SET_COND(0x8) /* UN */
2256
#define COND_CEQ SET_COND(0x4) /* EQ */
2257
#define COND_CUEQ SET_COND(0xc) /* UN EQ */
2258
#define COND_CLT SET_COND(0x2) /* LT */
2259
#define COND_CULT SET_COND(0xa) /* UN LT */
2260
#define COND_CLE SET_COND(0x6) /* LT EQ */
2261
#define COND_CULE SET_COND(0xe) /* UN LT EQ */
2262
#define COND_CNE SET_COND(0x10) /* GT LT */
2263
#define COND_CUNE SET_COND(0x18) /* UN GT LT */
2264
#define COND_COR SET_COND(0x14) /* GT LT EQ */
2265
2266
#define FINST(inst, type) (sljit_ins)((type & SLJIT_32) ? inst##_S : inst##_D)
2267
#define FCD(cd) (sljit_ins)(cd & 0x7)
2268
#define FCJ(cj) (sljit_ins)((cj & 0x7) << 5)
2269
#define FCA(ca) (sljit_ins)((ca & 0x7) << 15)
2270
#define F_OTHER_FLAG 1
2271
2272
#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7))
2273
2274
/* convert to inter exact toward zero */
2275
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2276
sljit_s32 dst, sljit_sw dstw,
2277
sljit_s32 src, sljit_sw srcw)
2278
{
2279
sljit_ins inst;
2280
sljit_u32 word_data = 0;
2281
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2282
2283
switch (GET_OPCODE(op))
2284
{
2285
case SLJIT_CONV_SW_FROM_F64:
2286
word_data = 1;
2287
inst = FINST(FTINTRZ_L, op);
2288
break;
2289
case SLJIT_CONV_S32_FROM_F64:
2290
inst = FINST(FTINTRZ_W, op);
2291
break;
2292
default:
2293
inst = BREAK;
2294
SLJIT_UNREACHABLE();
2295
}
2296
2297
if (src & SLJIT_MEM) {
2298
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
2299
src = TMP_FREG1;
2300
}
2301
2302
FAIL_IF(push_inst(compiler, inst | FRD(TMP_FREG1) | FRJ(src)));
2303
FAIL_IF(push_inst(compiler, FINST(MOVFR2GR, word_data) | RD(dst_r) | FRJ(TMP_FREG1)));
2304
2305
if (dst & SLJIT_MEM)
2306
return emit_op_mem2(compiler, word_data ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0);
2307
return SLJIT_SUCCESS;
2308
}
2309
2310
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_s32 op,
2311
sljit_s32 dst, sljit_sw dstw,
2312
sljit_s32 src, sljit_sw srcw)
2313
{
2314
sljit_ins inst;
2315
sljit_u32 word_data = 0;
2316
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2317
2318
switch (GET_OPCODE(op))
2319
{
2320
case SLJIT_CONV_F64_FROM_SW:
2321
word_data = 1;
2322
inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);
2323
break;
2324
case SLJIT_CONV_F64_FROM_S32:
2325
inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);
2326
break;
2327
default:
2328
inst = BREAK;
2329
SLJIT_UNREACHABLE();
2330
}
2331
2332
if (src & SLJIT_MEM) {
2333
FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2334
src = TMP_REG1;
2335
} else if (src == SLJIT_IMM) {
2336
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2337
srcw = (sljit_s32)srcw;
2338
2339
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2340
src = TMP_REG1;
2341
}
2342
FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));
2343
FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2344
2345
if (dst & SLJIT_MEM)
2346
return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
2347
return SLJIT_SUCCESS;
2348
}
2349
2350
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2351
sljit_s32 dst, sljit_sw dstw,
2352
sljit_s32 src, sljit_sw srcw)
2353
{
2354
return sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw);
2355
}
2356
2357
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2358
sljit_s32 dst, sljit_sw dstw,
2359
sljit_s32 src, sljit_sw srcw)
2360
{
2361
sljit_ins inst;
2362
sljit_u32 word_data = 0;
2363
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2364
2365
switch (GET_OPCODE(op))
2366
{
2367
case SLJIT_CONV_F64_FROM_UW:
2368
word_data = 1;
2369
inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);
2370
break;
2371
case SLJIT_CONV_F64_FROM_U32:
2372
inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);
2373
break;
2374
default:
2375
inst = BREAK;
2376
SLJIT_UNREACHABLE();
2377
}
2378
2379
if (src & SLJIT_MEM) {
2380
FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2381
src = TMP_REG1;
2382
} else if (src == SLJIT_IMM) {
2383
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
2384
srcw = (sljit_u32)srcw;
2385
2386
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2387
src = TMP_REG1;
2388
}
2389
2390
if (!word_data)
2391
FAIL_IF(push_inst(compiler, SRLI_W | RD(src) | RJ(src) | IMM_I12(0)));
2392
2393
FAIL_IF(push_inst(compiler, BLT | RJ(src) | RD(TMP_ZERO) | IMM_I16(4)));
2394
2395
FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));
2396
FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2397
FAIL_IF(push_inst(compiler, B | IMM_I26(7)));
2398
2399
FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG2) | RJ(src) | IMM_I12(1)));
2400
FAIL_IF(push_inst(compiler, (word_data ? SRLI_D : SRLI_W) | RD(TMP_REG1) | RJ(src) | IMM_I12(1)));
2401
FAIL_IF(push_inst(compiler, OR | RD(TMP_REG1) | RJ(TMP_REG1) | RK(TMP_REG2)));
2402
FAIL_IF(push_inst(compiler, INST(MOVGR2FR, (!word_data)) | FRD(dst_r) | RJ(TMP_REG1)));
2403
FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2404
FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(dst_r) | FRK(dst_r)));
2405
2406
if (dst & SLJIT_MEM)
2407
return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
2408
return SLJIT_SUCCESS;
2409
}
2410
2411
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2412
sljit_s32 src1, sljit_sw src1w,
2413
sljit_s32 src2, sljit_sw src2w)
2414
{
2415
if (src1 & SLJIT_MEM) {
2416
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2417
src1 = TMP_FREG1;
2418
}
2419
2420
if (src2 & SLJIT_MEM) {
2421
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
2422
src2 = TMP_FREG2;
2423
}
2424
2425
FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(OTHER_FLAG)));
2426
2427
switch (GET_FLAG_TYPE(op)) {
2428
case SLJIT_F_EQUAL:
2429
case SLJIT_ORDERED_EQUAL:
2430
FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2431
break;
2432
case SLJIT_F_LESS:
2433
case SLJIT_ORDERED_LESS:
2434
FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2435
break;
2436
case SLJIT_F_GREATER:
2437
case SLJIT_ORDERED_GREATER:
2438
FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));
2439
break;
2440
case SLJIT_UNORDERED_OR_GREATER:
2441
FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));
2442
break;
2443
case SLJIT_UNORDERED_OR_LESS:
2444
FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2445
break;
2446
case SLJIT_UNORDERED_OR_EQUAL:
2447
FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2448
break;
2449
default: /* SLJIT_UNORDERED */
2450
FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUN | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2451
}
2452
return push_inst(compiler, MOVCF2GR | RD(OTHER_FLAG) | FCJ(F_OTHER_FLAG));
2453
}
2454
2455
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2456
sljit_s32 dst, sljit_sw dstw,
2457
sljit_s32 src, sljit_sw srcw)
2458
{
2459
sljit_s32 dst_r;
2460
2461
CHECK_ERROR();
2462
compiler->cache_arg = 0;
2463
compiler->cache_argw = 0;
2464
2465
SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
2466
SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2467
2468
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
2469
op ^= SLJIT_32;
2470
2471
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2472
2473
if (src & SLJIT_MEM) {
2474
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
2475
src = dst_r;
2476
}
2477
2478
switch (GET_OPCODE(op)) {
2479
case SLJIT_MOV_F64:
2480
if (src != dst_r) {
2481
if (!(dst & SLJIT_MEM))
2482
FAIL_IF(push_inst(compiler, FINST(FMOV, op) | FRD(dst_r) | FRJ(src)));
2483
else
2484
dst_r = src;
2485
}
2486
break;
2487
case SLJIT_NEG_F64:
2488
FAIL_IF(push_inst(compiler, FINST(FNEG, op) | FRD(dst_r) | FRJ(src)));
2489
break;
2490
case SLJIT_ABS_F64:
2491
FAIL_IF(push_inst(compiler, FINST(FABS, op) | FRD(dst_r) | FRJ(src)));
2492
break;
2493
case SLJIT_CONV_F64_FROM_F32:
2494
/* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */
2495
FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? FCVT_D_S : FCVT_S_D) | FRD(dst_r) | FRJ(src)));
2496
op ^= SLJIT_32;
2497
break;
2498
}
2499
2500
if (dst & SLJIT_MEM)
2501
return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0);
2502
return SLJIT_SUCCESS;
2503
}
2504
2505
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2506
sljit_s32 dst, sljit_sw dstw,
2507
sljit_s32 src1, sljit_sw src1w,
2508
sljit_s32 src2, sljit_sw src2w)
2509
{
2510
sljit_s32 dst_r, flags = 0;
2511
2512
CHECK_ERROR();
2513
CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2514
ADJUST_LOCAL_OFFSET(dst, dstw);
2515
ADJUST_LOCAL_OFFSET(src1, src1w);
2516
ADJUST_LOCAL_OFFSET(src2, src2w);
2517
2518
compiler->cache_arg = 0;
2519
compiler->cache_argw = 0;
2520
2521
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
2522
2523
if (src1 & SLJIT_MEM) {
2524
if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
2525
FAIL_IF(compiler->error);
2526
src1 = TMP_FREG1;
2527
} else
2528
flags |= SLOW_SRC1;
2529
}
2530
2531
if (src2 & SLJIT_MEM) {
2532
if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
2533
FAIL_IF(compiler->error);
2534
src2 = TMP_FREG2;
2535
} else
2536
flags |= SLOW_SRC2;
2537
}
2538
2539
if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
2540
if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
2541
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
2542
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2543
} else {
2544
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2545
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2546
}
2547
}
2548
else if (flags & SLOW_SRC1)
2549
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2550
else if (flags & SLOW_SRC2)
2551
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2552
2553
if (flags & SLOW_SRC1)
2554
src1 = TMP_FREG1;
2555
if (flags & SLOW_SRC2)
2556
src2 = TMP_FREG2;
2557
2558
switch (GET_OPCODE(op)) {
2559
case SLJIT_ADD_F64:
2560
FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2561
break;
2562
case SLJIT_SUB_F64:
2563
FAIL_IF(push_inst(compiler, FINST(FSUB, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2564
break;
2565
case SLJIT_MUL_F64:
2566
FAIL_IF(push_inst(compiler, FINST(FMUL, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2567
break;
2568
case SLJIT_DIV_F64:
2569
FAIL_IF(push_inst(compiler, FINST(FDIV, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2570
break;
2571
}
2572
2573
if (dst_r != dst)
2574
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
2575
return SLJIT_SUCCESS;
2576
}
2577
2578
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
2579
sljit_s32 dst_freg,
2580
sljit_s32 src1, sljit_sw src1w,
2581
sljit_s32 src2, sljit_sw src2w)
2582
{
2583
sljit_s32 reg;
2584
2585
CHECK_ERROR();
2586
CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
2587
ADJUST_LOCAL_OFFSET(src1, src1w);
2588
ADJUST_LOCAL_OFFSET(src2, src2w);
2589
2590
if (src2 & SLJIT_MEM) {
2591
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src2, src2w, 0, 0));
2592
src2 = TMP_FREG1;
2593
}
2594
2595
if (src1 & SLJIT_MEM) {
2596
reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
2597
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, reg, src1, src1w, 0, 0));
2598
src1 = reg;
2599
}
2600
2601
return push_inst(compiler, FINST(FCOPYSIGN, op) | FRD(dst_freg) | FRJ(src1) | FRK(src2));
2602
}
2603
2604
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2605
sljit_s32 freg, sljit_f32 value)
2606
{
2607
union {
2608
sljit_s32 imm;
2609
sljit_f32 value;
2610
} u;
2611
2612
CHECK_ERROR();
2613
CHECK(check_sljit_emit_fset32(compiler, freg, value));
2614
2615
u.value = value;
2616
2617
if (u.imm == 0)
2618
return push_inst(compiler, MOVGR2FR_W | RJ(TMP_ZERO) | FRD(freg));
2619
2620
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2621
return push_inst(compiler, MOVGR2FR_W | RJ(TMP_REG1) | FRD(freg));
2622
}
2623
2624
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2625
sljit_s32 freg, sljit_f64 value)
2626
{
2627
union {
2628
sljit_sw imm;
2629
sljit_f64 value;
2630
} u;
2631
2632
CHECK_ERROR();
2633
CHECK(check_sljit_emit_fset64(compiler, freg, value));
2634
2635
u.value = value;
2636
2637
if (u.imm == 0)
2638
return push_inst(compiler, MOVGR2FR_D | RJ(TMP_ZERO) | FRD(freg));
2639
2640
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2641
return push_inst(compiler, MOVGR2FR_D | RJ(TMP_REG1) | FRD(freg));
2642
}
2643
2644
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2645
sljit_s32 freg, sljit_s32 reg)
2646
{
2647
sljit_ins inst;
2648
2649
CHECK_ERROR();
2650
CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2651
2652
if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)
2653
inst = ((op & SLJIT_32) ? MOVGR2FR_W : MOVGR2FR_D) | FRD(freg) | RJ(reg);
2654
else
2655
inst = ((op & SLJIT_32) ? MOVFR2GR_S : MOVFR2GR_D) | RD(reg) | FRJ(freg);
2656
return push_inst(compiler, inst);
2657
}
2658
2659
/* --------------------------------------------------------------------- */
2660
/* Conditional instructions */
2661
/* --------------------------------------------------------------------- */
2662
2663
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2664
{
2665
struct sljit_label *label;
2666
2667
CHECK_ERROR_PTR();
2668
CHECK_PTR(check_sljit_emit_label(compiler));
2669
2670
if (compiler->last_label && compiler->last_label->size == compiler->size)
2671
return compiler->last_label;
2672
2673
label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2674
PTR_FAIL_IF(!label);
2675
set_label(label, compiler);
2676
return label;
2677
}
2678
2679
static sljit_ins get_jump_instruction(sljit_s32 type)
2680
{
2681
switch (type) {
2682
case SLJIT_EQUAL:
2683
return BNE | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
2684
case SLJIT_NOT_EQUAL:
2685
return BEQ | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
2686
case SLJIT_LESS:
2687
case SLJIT_GREATER:
2688
case SLJIT_SIG_LESS:
2689
case SLJIT_SIG_GREATER:
2690
case SLJIT_OVERFLOW:
2691
case SLJIT_CARRY:
2692
case SLJIT_ATOMIC_STORED:
2693
return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2694
case SLJIT_GREATER_EQUAL:
2695
case SLJIT_LESS_EQUAL:
2696
case SLJIT_SIG_GREATER_EQUAL:
2697
case SLJIT_SIG_LESS_EQUAL:
2698
case SLJIT_NOT_OVERFLOW:
2699
case SLJIT_NOT_CARRY:
2700
case SLJIT_ATOMIC_NOT_STORED:
2701
return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2702
case SLJIT_F_EQUAL:
2703
case SLJIT_ORDERED_EQUAL:
2704
case SLJIT_F_LESS:
2705
case SLJIT_ORDERED_LESS:
2706
case SLJIT_ORDERED_GREATER:
2707
case SLJIT_UNORDERED_OR_GREATER:
2708
case SLJIT_F_GREATER:
2709
case SLJIT_UNORDERED_OR_LESS:
2710
case SLJIT_UNORDERED_OR_EQUAL:
2711
case SLJIT_UNORDERED:
2712
return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2713
case SLJIT_ORDERED_NOT_EQUAL:
2714
case SLJIT_ORDERED_LESS_EQUAL:
2715
case SLJIT_ORDERED_GREATER_EQUAL:
2716
case SLJIT_F_NOT_EQUAL:
2717
case SLJIT_UNORDERED_OR_NOT_EQUAL:
2718
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2719
case SLJIT_UNORDERED_OR_LESS_EQUAL:
2720
case SLJIT_F_LESS_EQUAL:
2721
case SLJIT_F_GREATER_EQUAL:
2722
case SLJIT_ORDERED:
2723
return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2724
default:
2725
/* Not conditional branch. */
2726
return 0;
2727
}
2728
}
2729
2730
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2731
{
2732
struct sljit_jump *jump;
2733
sljit_ins inst;
2734
2735
CHECK_ERROR_PTR();
2736
CHECK_PTR(check_sljit_emit_jump(compiler, type));
2737
2738
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2739
PTR_FAIL_IF(!jump);
2740
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2741
type &= 0xff;
2742
2743
inst = get_jump_instruction(type);
2744
2745
if (inst != 0) {
2746
PTR_FAIL_IF(push_inst(compiler, inst));
2747
jump->flags |= IS_COND;
2748
}
2749
2750
jump->addr = compiler->size;
2751
inst = JIRL | RJ(TMP_REG1) | IMM_I16(0);
2752
2753
if (type >= SLJIT_FAST_CALL) {
2754
jump->flags |= IS_CALL;
2755
inst |= RD(RETURN_ADDR_REG);
2756
}
2757
2758
PTR_FAIL_IF(push_inst(compiler, inst));
2759
2760
/* Maximum number of instructions required for generating a constant. */
2761
compiler->size += JUMP_MAX_SIZE - 1;
2762
return jump;
2763
}
2764
2765
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2766
sljit_s32 arg_types)
2767
{
2768
SLJIT_UNUSED_ARG(arg_types);
2769
CHECK_ERROR_PTR();
2770
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2771
2772
if (type & SLJIT_CALL_RETURN) {
2773
PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
2774
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2775
}
2776
2777
SLJIT_SKIP_CHECKS(compiler);
2778
return sljit_emit_jump(compiler, type);
2779
}
2780
2781
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
2782
sljit_s32 src1, sljit_sw src1w,
2783
sljit_s32 src2, sljit_sw src2w)
2784
{
2785
struct sljit_jump *jump;
2786
sljit_s32 flags;
2787
sljit_ins inst;
2788
sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
2789
2790
CHECK_ERROR_PTR();
2791
CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w));
2792
ADJUST_LOCAL_OFFSET(src1, src1w);
2793
ADJUST_LOCAL_OFFSET(src2, src2w);
2794
2795
compiler->cache_arg = 0;
2796
compiler->cache_argw = 0;
2797
2798
flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2799
2800
if (src1 & SLJIT_MEM) {
2801
PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG1, src1, src1w, src2, src2w));
2802
src1 = TMP_REG1;
2803
}
2804
2805
if (src2 & SLJIT_MEM) {
2806
PTR_FAIL_IF(emit_op_mem2(compiler, flags, src2_tmp_reg, src2, src2w, 0, 0));
2807
src2 = src2_tmp_reg;
2808
}
2809
2810
if (src1 == SLJIT_IMM) {
2811
if (src1w != 0) {
2812
PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
2813
src1 = TMP_REG1;
2814
}
2815
else
2816
src1 = TMP_ZERO;
2817
}
2818
2819
if (src2 == SLJIT_IMM) {
2820
if (src2w != 0) {
2821
PTR_FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w));
2822
src2 = src2_tmp_reg;
2823
}
2824
else
2825
src2 = TMP_ZERO;
2826
}
2827
2828
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2829
PTR_FAIL_IF(!jump);
2830
set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | IS_COND));
2831
type &= 0xff;
2832
2833
switch (type) {
2834
case SLJIT_EQUAL:
2835
inst = BNE | RJ(src1) | RD(src2);
2836
break;
2837
case SLJIT_NOT_EQUAL:
2838
inst = BEQ | RJ(src1) | RD(src2);
2839
break;
2840
case SLJIT_LESS:
2841
inst = BGEU | RJ(src1) | RD(src2);
2842
break;
2843
case SLJIT_GREATER_EQUAL:
2844
inst = BLTU | RJ(src1) | RD(src2);
2845
break;
2846
case SLJIT_GREATER:
2847
inst = BGEU | RJ(src2) | RD(src1);
2848
break;
2849
case SLJIT_LESS_EQUAL:
2850
inst = BLTU | RJ(src2) | RD(src1);
2851
break;
2852
case SLJIT_SIG_LESS:
2853
inst = BGE | RJ(src1) | RD(src2);
2854
break;
2855
case SLJIT_SIG_GREATER_EQUAL:
2856
inst = BLT | RJ(src1) | RD(src2);
2857
break;
2858
case SLJIT_SIG_GREATER:
2859
inst = BGE | RJ(src2) | RD(src1);
2860
break;
2861
case SLJIT_SIG_LESS_EQUAL:
2862
inst = BLT | RJ(src2) | RD(src1);
2863
break;
2864
default:
2865
inst = BREAK;
2866
SLJIT_UNREACHABLE();
2867
}
2868
2869
PTR_FAIL_IF(push_inst(compiler, inst));
2870
2871
jump->addr = compiler->size;
2872
PTR_FAIL_IF(push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));
2873
2874
/* Maximum number of instructions required for generating a constant. */
2875
compiler->size += JUMP_MAX_SIZE - 1;
2876
2877
return jump;
2878
}
2879
2880
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2881
{
2882
struct sljit_jump *jump;
2883
2884
CHECK_ERROR();
2885
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2886
2887
if (src != SLJIT_IMM) {
2888
if (src & SLJIT_MEM) {
2889
ADJUST_LOCAL_OFFSET(src, srcw);
2890
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
2891
src = TMP_REG1;
2892
}
2893
return push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(src) | IMM_I12(0));
2894
}
2895
2896
/* These jumps are converted to jump/call instructions when possible. */
2897
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2898
FAIL_IF(!jump);
2899
set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_CALL : 0));
2900
jump->u.target = (sljit_uw)srcw;
2901
2902
jump->addr = compiler->size;
2903
FAIL_IF(push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));
2904
2905
/* Maximum number of instructions required for generating a constant. */
2906
compiler->size += JUMP_MAX_SIZE - 1;
2907
2908
return SLJIT_SUCCESS;
2909
}
2910
2911
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2912
sljit_s32 arg_types,
2913
sljit_s32 src, sljit_sw srcw)
2914
{
2915
SLJIT_UNUSED_ARG(arg_types);
2916
CHECK_ERROR();
2917
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2918
2919
if (src & SLJIT_MEM) {
2920
ADJUST_LOCAL_OFFSET(src, srcw);
2921
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
2922
src = TMP_REG1;
2923
}
2924
2925
if (type & SLJIT_CALL_RETURN) {
2926
if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
2927
FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));
2928
src = TMP_REG1;
2929
}
2930
2931
FAIL_IF(emit_stack_frame_release(compiler, 0));
2932
type = SLJIT_JUMP;
2933
}
2934
2935
SLJIT_SKIP_CHECKS(compiler);
2936
return sljit_emit_ijump(compiler, type, src, srcw);
2937
}
2938
2939
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2940
sljit_s32 dst, sljit_sw dstw,
2941
sljit_s32 type)
2942
{
2943
sljit_s32 src_r, dst_r, invert;
2944
sljit_s32 saved_op = op;
2945
sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
2946
2947
CHECK_ERROR();
2948
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2949
ADJUST_LOCAL_OFFSET(dst, dstw);
2950
2951
op = GET_OPCODE(op);
2952
dst_r = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
2953
2954
compiler->cache_arg = 0;
2955
compiler->cache_argw = 0;
2956
2957
if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
2958
FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
2959
2960
if (type < SLJIT_F_EQUAL) {
2961
src_r = OTHER_FLAG;
2962
invert = type & 0x1;
2963
2964
switch (type) {
2965
case SLJIT_EQUAL:
2966
case SLJIT_NOT_EQUAL:
2967
FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
2968
src_r = dst_r;
2969
break;
2970
case SLJIT_ATOMIC_STORED:
2971
case SLJIT_ATOMIC_NOT_STORED:
2972
FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(OTHER_FLAG) | IMM_I12(1)));
2973
src_r = dst_r;
2974
invert ^= 0x1;
2975
break;
2976
case SLJIT_OVERFLOW:
2977
case SLJIT_NOT_OVERFLOW:
2978
if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) {
2979
src_r = OTHER_FLAG;
2980
break;
2981
}
2982
FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(OTHER_FLAG) | IMM_I12(1)));
2983
src_r = dst_r;
2984
invert ^= 0x1;
2985
break;
2986
}
2987
} else {
2988
invert = 0;
2989
src_r = OTHER_FLAG;
2990
2991
switch (type) {
2992
case SLJIT_ORDERED_NOT_EQUAL:
2993
case SLJIT_ORDERED_LESS_EQUAL:
2994
case SLJIT_ORDERED_GREATER_EQUAL:
2995
case SLJIT_F_NOT_EQUAL:
2996
case SLJIT_UNORDERED_OR_NOT_EQUAL:
2997
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2998
case SLJIT_UNORDERED_OR_LESS_EQUAL:
2999
case SLJIT_F_LESS_EQUAL:
3000
case SLJIT_F_GREATER_EQUAL:
3001
case SLJIT_ORDERED:
3002
invert = 1;
3003
break;
3004
}
3005
}
3006
3007
if (invert) {
3008
FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RJ(src_r) | IMM_I12(1)));
3009
src_r = dst_r;
3010
}
3011
3012
if (op < SLJIT_ADD) {
3013
if (dst & SLJIT_MEM)
3014
return emit_op_mem(compiler, mem_type, src_r, dst, dstw);
3015
3016
if (src_r != dst_r)
3017
return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(src_r) | IMM_I12(0));
3018
return SLJIT_SUCCESS;
3019
}
3020
3021
mem_type |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE;
3022
3023
if (dst & SLJIT_MEM)
3024
return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, src_r, 0);
3025
return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0);
3026
}
3027
3028
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3029
sljit_s32 dst_reg,
3030
sljit_s32 src1, sljit_sw src1w,
3031
sljit_s32 src2_reg)
3032
{
3033
sljit_ins *ptr;
3034
sljit_uw size;
3035
sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
3036
3037
CHECK_ERROR();
3038
CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3039
ADJUST_LOCAL_OFFSET(src1, src1w);
3040
3041
if (dst_reg != src2_reg) {
3042
if (dst_reg == src1) {
3043
src1 = src2_reg;
3044
src1w = 0;
3045
type ^= 0x1;
3046
} else {
3047
if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
3048
FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(dst_reg) | IMM_I12(0)));
3049
3050
if ((src1 & REG_MASK) == dst_reg)
3051
src1 = (src1 & ~REG_MASK) | TMP_REG1;
3052
3053
if (OFFS_REG(src1) == dst_reg)
3054
src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
3055
}
3056
3057
FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src2_reg) | IMM_I12(0)));
3058
}
3059
}
3060
3061
size = compiler->size;
3062
3063
ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
3064
FAIL_IF(!ptr);
3065
compiler->size++;
3066
3067
if (src1 & SLJIT_MEM) {
3068
FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w));
3069
} else if (src1 == SLJIT_IMM) {
3070
if (type & SLJIT_32)
3071
src1w = (sljit_s32)src1w;
3072
FAIL_IF(load_immediate(compiler, dst_reg, src1w));
3073
} else
3074
FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src1) | IMM_I12(0)));
3075
3076
*ptr = get_jump_instruction(type & ~SLJIT_32) | IMM_I16(compiler->size - size);
3077
return SLJIT_SUCCESS;
3078
}
3079
3080
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3081
sljit_s32 dst_freg,
3082
sljit_s32 src1, sljit_sw src1w,
3083
sljit_s32 src2_freg)
3084
{
3085
sljit_s32 invert = 0;
3086
3087
CHECK_ERROR();
3088
CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3089
3090
ADJUST_LOCAL_OFFSET(src1, src1w);
3091
3092
if ((type & ~SLJIT_32) == SLJIT_EQUAL || (type & ~SLJIT_32) == SLJIT_NOT_EQUAL) {
3093
if ((type & ~SLJIT_32) == SLJIT_EQUAL)
3094
invert = 1;
3095
FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(EQUAL_FLAG)));
3096
} else {
3097
if (get_jump_instruction(type & ~SLJIT_32) == (BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO)))
3098
invert = 1;
3099
FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(OTHER_FLAG)));
3100
}
3101
3102
if (src1 & SLJIT_MEM) {
3103
FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src1, src1w));
3104
if (invert)
3105
return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(TMP_FREG2) | FRK(src2_freg) | FCA(F_OTHER_FLAG));
3106
return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(TMP_FREG2) | FCA(F_OTHER_FLAG));
3107
} else {
3108
if (invert)
3109
return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src1) | FRK(src2_freg) | FCA(F_OTHER_FLAG));
3110
return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(src1) | FCA(F_OTHER_FLAG));
3111
}
3112
}
3113
3114
#undef FLOAT_DATA
3115
3116
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3117
sljit_s32 reg,
3118
sljit_s32 mem, sljit_sw memw)
3119
{
3120
sljit_s32 flags;
3121
3122
CHECK_ERROR();
3123
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3124
3125
if (!(reg & REG_PAIR_MASK))
3126
return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3127
3128
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3129
memw &= 0x3;
3130
3131
if (SLJIT_UNLIKELY(memw != 0)) {
3132
FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(mem)) | IMM_I12(memw)));
3133
FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));
3134
} else
3135
FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(mem & REG_MASK) | RK(OFFS_REG(mem))));
3136
3137
mem = TMP_REG1;
3138
memw = 0;
3139
} else if (memw > I12_MAX - SSIZE_OF(sw) || memw < I12_MIN) {
3140
if (((memw + 0x800) & 0xfff) <= 0xfff - SSIZE_OF(sw)) {
3141
FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw)));
3142
memw &= 0xfff;
3143
} else {
3144
FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
3145
memw = 0;
3146
}
3147
3148
if (mem & REG_MASK)
3149
FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));
3150
3151
mem = TMP_REG1;
3152
} else {
3153
mem &= REG_MASK;
3154
memw &= 0xfff;
3155
}
3156
3157
SLJIT_ASSERT((memw >= 0 && memw <= I12_MAX - SSIZE_OF(sw)) || (memw > I12_MAX && memw <= 0xfff));
3158
3159
if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) {
3160
FAIL_IF(push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff));
3161
return push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw);
3162
}
3163
3164
flags = WORD_DATA | (!(type & SLJIT_MEM_STORE) ? LOAD_DATA : 0);
3165
3166
FAIL_IF(push_mem_inst(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw));
3167
return push_mem_inst(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff);
3168
}
3169
3170
#undef TO_ARGW_HI
3171
3172
static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3173
{
3174
sljit_s32 mem = *mem_ptr;
3175
3176
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3177
*mem_ptr = TMP_REG3;
3178
FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(mem)) | IMM_I12(memw & 0x3)));
3179
return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem & REG_MASK));
3180
}
3181
3182
if (!(mem & REG_MASK)) {
3183
*mem_ptr = TMP_REG3;
3184
return load_immediate(compiler, TMP_REG3, memw);
3185
}
3186
3187
mem &= REG_MASK;
3188
3189
if (memw == 0) {
3190
*mem_ptr = mem;
3191
return SLJIT_SUCCESS;
3192
}
3193
3194
*mem_ptr = TMP_REG3;
3195
3196
FAIL_IF(load_immediate(compiler, TMP_REG3, memw));
3197
return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem));
3198
}
3199
3200
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3201
sljit_s32 vreg,
3202
sljit_s32 srcdst, sljit_sw srcdstw)
3203
{
3204
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3205
sljit_ins ins = 0;
3206
3207
CHECK_ERROR();
3208
CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));
3209
3210
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3211
3212
if (reg_size != 5 && reg_size != 4)
3213
return SLJIT_ERR_UNSUPPORTED;
3214
3215
if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3216
return SLJIT_ERR_UNSUPPORTED;
3217
3218
if (type & SLJIT_SIMD_TEST)
3219
return SLJIT_SUCCESS;
3220
3221
if (!(srcdst & SLJIT_MEM)) {
3222
if (type & SLJIT_SIMD_STORE)
3223
ins = FRD(srcdst) | FRJ(vreg) | FRK(vreg);
3224
else
3225
ins = FRD(vreg) | FRJ(srcdst) | FRK(srcdst);
3226
3227
if (reg_size == 5)
3228
ins |= VOR_V | (sljit_ins)1 << 26;
3229
else
3230
ins |= VOR_V;
3231
3232
return push_inst(compiler, ins);
3233
}
3234
3235
ins = (type & SLJIT_SIMD_STORE) ? VST : VLD;
3236
3237
if (reg_size == 5)
3238
ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD;
3239
3240
if (FAST_IS_REG(srcdst) && srcdst >= 0 && (srcdstw >= I12_MIN && srcdstw <= I12_MAX))
3241
return push_inst(compiler, ins | FRD(vreg) | RJ((sljit_u8)srcdst) | IMM_I12(srcdstw));
3242
else {
3243
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3244
return push_inst(compiler, ins | FRD(vreg) | RJ(srcdst) | IMM_I12(0));
3245
}
3246
}
3247
3248
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3249
sljit_s32 vreg,
3250
sljit_s32 src, sljit_sw srcw)
3251
{
3252
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3253
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3254
sljit_ins ins = 0;
3255
3256
CHECK_ERROR();
3257
CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));
3258
3259
ADJUST_LOCAL_OFFSET(src, srcw);
3260
3261
if (reg_size != 5 && reg_size != 4)
3262
return SLJIT_ERR_UNSUPPORTED;
3263
3264
if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3265
return SLJIT_ERR_UNSUPPORTED;
3266
3267
if (type & SLJIT_SIMD_TEST)
3268
return SLJIT_SUCCESS;
3269
3270
if (src & SLJIT_MEM) {
3271
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3272
3273
if (reg_size == 5)
3274
ins = (sljit_ins)1 << 25;
3275
3276
return push_inst(compiler, VLDREPL | ins | FRD(vreg) | RJ(src) | (sljit_ins)1 << (23 - elem_size));
3277
}
3278
3279
if (reg_size == 5)
3280
ins = (sljit_ins)1 << 26;
3281
3282
if (type & SLJIT_SIMD_FLOAT) {
3283
if (src == SLJIT_IMM)
3284
return push_inst(compiler, VREPLGR2VR | ins | FRD(vreg) | RJ(TMP_ZERO) | (sljit_ins)elem_size << 10);
3285
3286
FAIL_IF(push_inst(compiler, VREPLVE | ins | FRD(vreg) | FRJ(src) | RK(TMP_ZERO) | (sljit_ins)elem_size << 15));
3287
3288
if (reg_size == 5) {
3289
ins = (sljit_ins)(0x44 << 10);
3290
return push_inst(compiler, XVPERMI | ins | FRD(vreg) | FRJ(vreg));
3291
}
3292
3293
return SLJIT_SUCCESS;
3294
}
3295
3296
ins |= VREPLGR2VR | (sljit_ins)elem_size << 10;
3297
3298
if (src == SLJIT_IMM) {
3299
FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
3300
src = TMP_REG2;
3301
}
3302
3303
return push_inst(compiler, ins | FRD(vreg) | RJ(src));
3304
}
3305
3306
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
3307
sljit_s32 vreg, sljit_s32 lane_index,
3308
sljit_s32 srcdst, sljit_sw srcdstw)
3309
{
3310
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3311
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3312
sljit_ins ins = 0;
3313
3314
CHECK_ERROR();
3315
CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));
3316
3317
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3318
3319
if (reg_size != 5 && reg_size != 4)
3320
return SLJIT_ERR_UNSUPPORTED;
3321
3322
if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3323
return SLJIT_ERR_UNSUPPORTED;
3324
3325
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3326
return SLJIT_ERR_UNSUPPORTED;
3327
3328
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3329
return SLJIT_ERR_UNSUPPORTED;
3330
3331
if (type & SLJIT_SIMD_TEST)
3332
return SLJIT_SUCCESS;
3333
3334
if (type & SLJIT_SIMD_LANE_ZERO) {
3335
ins = (reg_size == 5) ? ((sljit_ins)1 << 26) : 0;
3336
3337
if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) {
3338
FAIL_IF(push_inst(compiler, VOR_V | ins | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg)));
3339
srcdst = TMP_FREG1;
3340
srcdstw = 0;
3341
}
3342
3343
FAIL_IF(push_inst(compiler, VXOR_V | ins | FRD(vreg) | FRJ(vreg) | FRK(vreg)));
3344
}
3345
3346
if (srcdst & SLJIT_MEM) {
3347
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3348
3349
if (reg_size == 5)
3350
ins = (sljit_ins)1 << 25;
3351
3352
if (type & SLJIT_SIMD_STORE) {
3353
ins |= (sljit_ins)lane_index << 18 | (sljit_ins)(1 << (23 - elem_size));
3354
return push_inst(compiler, VSTELM | ins | FRD(vreg) | RJ(srcdst));
3355
} else {
3356
emit_op_mem(compiler, (elem_size == 3 ? WORD_DATA : (elem_size == 2 ? INT_DATA : (elem_size == 1 ? HALF_DATA : BYTE_DATA))) | LOAD_DATA, TMP_REG1, srcdst | SLJIT_MEM, 0);
3357
srcdst = TMP_REG1;
3358
ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3359
3360
if (reg_size == 5) {
3361
if (elem_size < 2) {
3362
FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg)));
3363
if (lane_index >= (2 << (3 - elem_size))) {
3364
FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(vreg) | IMM_I8(1)));
3365
FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size)))));
3366
return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(2));
3367
} else {
3368
FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index)));
3369
return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(18));
3370
}
3371
} else
3372
ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26;
3373
}
3374
3375
return push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index));
3376
}
3377
}
3378
3379
if (type & SLJIT_SIMD_FLOAT) {
3380
ins = (reg_size == 5) ? (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26 : (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3381
3382
if (type & SLJIT_SIMD_STORE) {
3383
FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(vreg) | IMM_V(lane_index)));
3384
return push_inst(compiler, VINSGR2VR | ins | FRD(srcdst) | RJ(TMP_REG1) | IMM_V(0));
3385
} else {
3386
FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(srcdst) | IMM_V(0)));
3387
return push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(TMP_REG1) | IMM_V(lane_index));
3388
}
3389
}
3390
3391
if (srcdst == SLJIT_IMM) {
3392
FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw));
3393
srcdst = TMP_REG1;
3394
}
3395
3396
if (type & SLJIT_SIMD_STORE) {
3397
ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3398
3399
if (type & SLJIT_SIMD_LANE_SIGNED)
3400
ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18));
3401
else
3402
ins |= VPICKVE2GR_U;
3403
3404
if (reg_size == 5) {
3405
if (elem_size < 2) {
3406
if (lane_index >= (2 << (3 - elem_size))) {
3407
if (type & SLJIT_SIMD_LANE_SIGNED)
3408
ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18));
3409
else
3410
ins |= VPICKVE2GR_U;
3411
3412
FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg)));
3413
FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(vreg) | IMM_I8(1)));
3414
return push_inst(compiler, ins | RD(srcdst) | FRJ(TMP_FREG1) | IMM_V(lane_index % (2 << (3 - elem_size))));
3415
}
3416
} else {
3417
ins ^= (sljit_ins)1 << (15 - elem_size);
3418
ins |= (sljit_ins)1 << 26;
3419
}
3420
}
3421
3422
return push_inst(compiler, ins | RD(srcdst) | FRJ(vreg) | IMM_V(lane_index));
3423
} else {
3424
ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3425
3426
if (reg_size == 5) {
3427
if (elem_size < 2) {
3428
FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(vreg) | FRK(vreg)));
3429
if (lane_index >= (2 << (3 - elem_size))) {
3430
FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(vreg) | IMM_I8(1)));
3431
FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size)))));
3432
return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(2));
3433
} else {
3434
FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index)));
3435
return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(vreg) | FRJ(TMP_FREG1) | IMM_I8(18));
3436
}
3437
} else
3438
ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26;
3439
}
3440
3441
return push_inst(compiler, VINSGR2VR | ins | FRD(vreg) | RJ(srcdst) | IMM_V(lane_index));
3442
}
3443
3444
return SLJIT_ERR_UNSUPPORTED;
3445
}
3446
3447
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3448
sljit_s32 vreg,
3449
sljit_s32 src, sljit_s32 src_lane_index)
3450
{
3451
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3452
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3453
sljit_ins ins = 0;
3454
3455
CHECK_ERROR();
3456
CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));
3457
3458
if (reg_size != 5 && reg_size != 4)
3459
return SLJIT_ERR_UNSUPPORTED;
3460
3461
if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3462
return SLJIT_ERR_UNSUPPORTED;
3463
3464
if (type & SLJIT_SIMD_TEST)
3465
return SLJIT_SUCCESS;
3466
3467
ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3468
3469
if (reg_size == 5) {
3470
FAIL_IF(push_inst(compiler, VREPLVEI | (sljit_ins)1 << 26 | ins | FRD(vreg) | FRJ(src) | IMM_V(src_lane_index % (2 << (3 - elem_size)))));
3471
3472
ins = (src_lane_index < (2 << (3 - elem_size))) ? (sljit_ins)(0x44 << 10) : (sljit_ins)(0xee << 10);
3473
3474
return push_inst(compiler, XVPERMI | ins | FRD(vreg) | FRJ(vreg));
3475
}
3476
3477
return push_inst(compiler, VREPLVEI | ins | FRD(vreg) | FRJ(src) | IMM_V(src_lane_index));
3478
}
3479
3480
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
3481
sljit_s32 vreg,
3482
sljit_s32 src, sljit_sw srcw)
3483
{
3484
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3485
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3486
sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3487
sljit_ins ins = 0;
3488
3489
CHECK_ERROR();
3490
CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));
3491
3492
ADJUST_LOCAL_OFFSET(src, srcw);
3493
3494
if (reg_size != 5 && reg_size != 4)
3495
return SLJIT_ERR_UNSUPPORTED;
3496
3497
if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3498
return SLJIT_ERR_UNSUPPORTED;
3499
3500
if (type & SLJIT_SIMD_TEST)
3501
return SLJIT_SUCCESS;
3502
3503
if (src & SLJIT_MEM) {
3504
ins = (type & SLJIT_SIMD_STORE) ? VST : VLD;
3505
3506
if (reg_size == 5)
3507
ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD;
3508
3509
if (FAST_IS_REG(src) && src >= 0 && (srcw >= I12_MIN && srcw <= I12_MAX))
3510
FAIL_IF(push_inst(compiler, ins | FRD(vreg) | RJ(src) | IMM_I12(srcw)));
3511
else {
3512
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3513
FAIL_IF(push_inst(compiler, ins | FRD(vreg) | RJ(src) | IMM_I12(0)));
3514
}
3515
src = vreg;
3516
}
3517
3518
if (type & SLJIT_SIMD_FLOAT) {
3519
if (elem_size != 2 || elem2_size != 3)
3520
return SLJIT_ERR_UNSUPPORTED;
3521
3522
ins = 0;
3523
if (reg_size == 5) {
3524
ins = (sljit_ins)1 << 26;
3525
FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16)));
3526
}
3527
3528
return push_inst(compiler, VFCVTL_D_S | ins | FRD(vreg) | FRJ(src));
3529
}
3530
3531
ins = (type & SLJIT_SIMD_EXTEND_SIGNED) ? VSLLWIL : (VSLLWIL | (sljit_ins)1 << 18);
3532
3533
if (reg_size == 5)
3534
ins |= (sljit_ins)1 << 26;
3535
3536
do {
3537
if (reg_size == 5)
3538
FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16)));
3539
3540
FAIL_IF(push_inst(compiler, ins | ((sljit_ins)1 << (13 + elem_size)) | FRD(vreg) | FRJ(src)));
3541
src = vreg;
3542
} while (++elem_size < elem2_size);
3543
3544
return SLJIT_SUCCESS;
3545
}
3546
3547
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
3548
sljit_s32 vreg,
3549
sljit_s32 dst, sljit_sw dstw)
3550
{
3551
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3552
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3553
sljit_ins ins = 0;
3554
sljit_s32 dst_r;
3555
3556
CHECK_ERROR();
3557
CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));
3558
3559
ADJUST_LOCAL_OFFSET(dst, dstw);
3560
3561
if (reg_size != 5 && reg_size != 4)
3562
return SLJIT_ERR_UNSUPPORTED;
3563
3564
if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3565
return SLJIT_ERR_UNSUPPORTED;
3566
3567
if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3568
return SLJIT_ERR_UNSUPPORTED;
3569
3570
if (type & SLJIT_SIMD_TEST)
3571
return SLJIT_SUCCESS;
3572
3573
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3574
3575
if (reg_size == 5)
3576
ins = (sljit_ins)1 << 26;
3577
3578
FAIL_IF(push_inst(compiler, VMSKLTZ | ins | (sljit_ins)(elem_size << 10) | FRD(TMP_FREG1) | FRJ(vreg)));
3579
3580
FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x3c << 10) | RD(dst_r) | FRJ(TMP_FREG1)));
3581
3582
if (reg_size == 5) {
3583
FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x38 << 10) | ins | RD(TMP_REG3) | FRJ(TMP_FREG1) | IMM_V(2)));
3584
FAIL_IF(push_inst(compiler, SLLI_W | RD(TMP_REG3) | RJ(TMP_REG3) | IMM_I12(2 << (3 - elem_size))));
3585
FAIL_IF(push_inst(compiler, OR | RD(dst_r) | RJ(dst_r) | RK(TMP_REG3)));
3586
}
3587
3588
if (dst_r == TMP_REG2)
3589
return emit_op_mem(compiler, ((type & SLJIT_32) ? INT_DATA : WORD_DATA), TMP_REG2, dst, dstw);
3590
3591
return SLJIT_SUCCESS;
3592
}
3593
3594
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
3595
sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)
3596
{
3597
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3598
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3599
sljit_ins ins = 0;
3600
3601
CHECK_ERROR();
3602
CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));
3603
ADJUST_LOCAL_OFFSET(src2, src2w);
3604
3605
if (reg_size != 5 && reg_size != 4)
3606
return SLJIT_ERR_UNSUPPORTED;
3607
3608
if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3609
return SLJIT_ERR_UNSUPPORTED;
3610
3611
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3612
return SLJIT_ERR_UNSUPPORTED;
3613
3614
if (type & SLJIT_SIMD_TEST)
3615
return SLJIT_SUCCESS;
3616
3617
if (src2 & SLJIT_MEM) {
3618
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src2, src2w));
3619
FAIL_IF(push_inst(compiler, (reg_size == 4 ? VLD : XVLD) | FRD(TMP_FREG1) | RJ(src2) | IMM_I12(0)));
3620
src2 = TMP_FREG1;
3621
}
3622
3623
switch (SLJIT_SIMD_GET_OPCODE(type)) {
3624
case SLJIT_SIMD_OP2_AND:
3625
ins = VAND_V;
3626
break;
3627
case SLJIT_SIMD_OP2_OR:
3628
ins = VOR_V;
3629
break;
3630
case SLJIT_SIMD_OP2_XOR:
3631
ins = VXOR_V;
3632
break;
3633
case SLJIT_SIMD_OP2_SHUFFLE:
3634
if (reg_size != 4)
3635
return SLJIT_ERR_UNSUPPORTED;
3636
3637
return push_inst(compiler, VSHUF_B | FRD(dst_vreg) | FRJ(src1_vreg) | FRK(src1_vreg) | FRA(src2));
3638
}
3639
3640
if (reg_size == 5)
3641
ins |= (sljit_ins)1 << 26;
3642
3643
return push_inst(compiler, ins | FRD(dst_vreg) | FRJ(src1_vreg) | FRK(src2));
3644
}
3645
3646
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler,
3647
sljit_s32 op,
3648
sljit_s32 dst_reg,
3649
sljit_s32 mem_reg)
3650
{
3651
sljit_ins ins;
3652
3653
CHECK_ERROR();
3654
CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
3655
3656
if ((op & SLJIT_ATOMIC_USE_LS) || !LOONGARCH_SUPPORT_AMCAS) {
3657
if (op & SLJIT_ATOMIC_USE_CAS)
3658
return SLJIT_ERR_UNSUPPORTED;
3659
3660
switch (GET_OPCODE(op)) {
3661
case SLJIT_MOV:
3662
case SLJIT_MOV_P:
3663
ins = LL_D;
3664
break;
3665
case SLJIT_MOV_S32:
3666
case SLJIT_MOV32:
3667
ins = LL_W;
3668
break;
3669
3670
default:
3671
return SLJIT_ERR_UNSUPPORTED;
3672
}
3673
3674
if (op & SLJIT_ATOMIC_TEST)
3675
return SLJIT_SUCCESS;
3676
3677
return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg));
3678
}
3679
3680
switch(GET_OPCODE(op)) {
3681
case SLJIT_MOV_S8:
3682
ins = LD_B;
3683
break;
3684
case SLJIT_MOV_U8:
3685
ins = LD_BU;
3686
break;
3687
case SLJIT_MOV_S16:
3688
ins = LD_H;
3689
break;
3690
case SLJIT_MOV_U16:
3691
ins = LD_HU;
3692
break;
3693
case SLJIT_MOV32:
3694
case SLJIT_MOV_S32:
3695
ins = LD_W;
3696
break;
3697
case SLJIT_MOV_U32:
3698
ins = LD_WU;
3699
break;
3700
default:
3701
ins = LD_D;
3702
break;
3703
}
3704
3705
if (op & SLJIT_ATOMIC_TEST)
3706
return SLJIT_SUCCESS;
3707
3708
return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg) | IMM_I12(0));
3709
}
3710
3711
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler,
3712
sljit_s32 op,
3713
sljit_s32 src_reg,
3714
sljit_s32 mem_reg,
3715
sljit_s32 temp_reg)
3716
{
3717
sljit_ins ins = 0;
3718
sljit_ins unsign = 0;
3719
sljit_s32 tmp = temp_reg;
3720
3721
CHECK_ERROR();
3722
CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
3723
3724
if ((op & SLJIT_ATOMIC_USE_LS) || !LOONGARCH_SUPPORT_AMCAS) {
3725
if (op & SLJIT_ATOMIC_USE_CAS)
3726
return SLJIT_ERR_UNSUPPORTED;
3727
3728
switch (GET_OPCODE(op)) {
3729
case SLJIT_MOV:
3730
case SLJIT_MOV_P:
3731
ins = SC_D;
3732
break;
3733
case SLJIT_MOV_S32:
3734
case SLJIT_MOV32:
3735
ins = SC_W;
3736
break;
3737
3738
default:
3739
return SLJIT_ERR_UNSUPPORTED;
3740
}
3741
3742
if (op & SLJIT_ATOMIC_TEST)
3743
return SLJIT_SUCCESS;
3744
3745
FAIL_IF(push_inst(compiler, ADD_D | RD(OTHER_FLAG) | RJ(src_reg) | RK(TMP_ZERO)));
3746
return push_inst(compiler, ins | RD(OTHER_FLAG) | RJ(mem_reg));
3747
}
3748
3749
switch (GET_OPCODE(op)) {
3750
case SLJIT_MOV_S8:
3751
ins = AMCAS_B;
3752
break;
3753
case SLJIT_MOV_U8:
3754
ins = AMCAS_B;
3755
unsign = BSTRPICK_D | (7 << 16);
3756
break;
3757
case SLJIT_MOV_S16:
3758
ins = AMCAS_H;
3759
break;
3760
case SLJIT_MOV_U16:
3761
ins = AMCAS_H;
3762
unsign = BSTRPICK_D | (15 << 16);
3763
break;
3764
case SLJIT_MOV32:
3765
case SLJIT_MOV_S32:
3766
ins = AMCAS_W;
3767
break;
3768
case SLJIT_MOV_U32:
3769
ins = AMCAS_W;
3770
unsign = BSTRPICK_D | (31 << 16);
3771
break;
3772
default:
3773
ins = AMCAS_D;
3774
break;
3775
}
3776
3777
if (op & SLJIT_ATOMIC_TEST)
3778
return SLJIT_SUCCESS;
3779
3780
if (op & SLJIT_SET_ATOMIC_STORED) {
3781
FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG3) | RJ(temp_reg) | RK(TMP_ZERO)));
3782
tmp = TMP_REG3;
3783
}
3784
FAIL_IF(push_inst(compiler, ins | RD(tmp) | RJ(mem_reg) | RK(src_reg)));
3785
if (!(op & SLJIT_SET_ATOMIC_STORED))
3786
return SLJIT_SUCCESS;
3787
3788
if (unsign)
3789
FAIL_IF(push_inst(compiler, unsign | RD(tmp) | RJ(tmp)));
3790
3791
FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(tmp) | RK(temp_reg)));
3792
return push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | IMM_I12(1));
3793
}
3794
3795
static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)
3796
{
3797
SLJIT_UNUSED_ARG(last_ins);
3798
3799
FAIL_IF(push_inst(compiler, LU12I_W | RD(dst) | (sljit_ins)(((init_value & 0xffffffff) >> 12) << 5)));
3800
FAIL_IF(push_inst(compiler, LU32I_D | RD(dst) | (sljit_ins)(((init_value >> 32) & 0xfffff) << 5)));
3801
FAIL_IF(push_inst(compiler, LU52I_D | RD(dst) | RJ(dst) | (sljit_ins)(IMM_I12(init_value >> 52))));
3802
return push_inst(compiler, ORI | RD(dst) | RJ(dst) | IMM_I12(init_value));
3803
}
3804
3805
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3806
{
3807
sljit_ins *inst = (sljit_ins*)addr;
3808
SLJIT_UNUSED_ARG(executable_offset);
3809
3810
SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);
3811
3812
SLJIT_ASSERT((inst[0] & OPC_1RI20(0x7f)) == LU12I_W);
3813
inst[0] = (inst[0] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(((new_target & 0xffffffff) >> 12) << 5);
3814
3815
SLJIT_ASSERT((inst[1] & OPC_1RI20(0x7f)) == LU32I_D);
3816
inst[1] = (inst[1] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(sljit_ins)(((new_target >> 32) & 0xfffff) << 5);
3817
3818
SLJIT_ASSERT((inst[2] & OPC_2RI12(0x3ff)) == LU52I_D);
3819
inst[2] = (inst[2] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target >> 52);
3820
3821
SLJIT_ASSERT((inst[3] & OPC_2RI12(0x3ff)) == ORI || (inst[3] & OPC_2RI16(0x3f)) == JIRL);
3822
if ((inst[3] & OPC_2RI12(0x3ff)) == ORI)
3823
inst[3] = (inst[3] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target);
3824
else
3825
inst[3] = (inst[3] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I12((new_target & 0xfff) >> 2);
3826
3827
SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);
3828
3829
inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
3830
SLJIT_CACHE_FLUSH(inst, inst + 4);
3831
}
3832
3833
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3834
{
3835
struct sljit_const *const_;
3836
sljit_s32 dst_r;
3837
3838
CHECK_ERROR_PTR();
3839
CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3840
ADJUST_LOCAL_OFFSET(dst, dstw);
3841
3842
const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
3843
PTR_FAIL_IF(!const_);
3844
set_const(const_, compiler);
3845
3846
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3847
PTR_FAIL_IF(emit_const(compiler, dst_r, init_value, 0));
3848
3849
if (dst & SLJIT_MEM)
3850
PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
3851
3852
return const_;
3853
}
3854
3855
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3856
{
3857
struct sljit_jump *jump;
3858
sljit_s32 dst_r;
3859
3860
CHECK_ERROR_PTR();
3861
CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
3862
ADJUST_LOCAL_OFFSET(dst, dstw);
3863
3864
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3865
PTR_FAIL_IF(!jump);
3866
set_mov_addr(jump, compiler, 0);
3867
3868
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3869
PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r));
3870
3871
compiler->size += JUMP_MAX_SIZE - 1;
3872
3873
if (dst & SLJIT_MEM)
3874
PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
3875
3876
return jump;
3877
}
3878
3879
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3880
{
3881
sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
3882
}
3883
3884