Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeRISCV_common.c
9913 views
1
/*
2
* Stack-less Just-In-Time compiler
3
*
4
* Copyright Zoltan Herczeg ([email protected]). All rights reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without modification, are
7
* permitted provided that the following conditions are met:
8
*
9
* 1. Redistributions of source code must retain the above copyright notice, this list of
10
* conditions and the following disclaimer.
11
*
12
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
13
* of conditions and the following disclaimer in the documentation and/or other materials
14
* provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
*/
26
27
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28
{
29
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
30
return "RISC-V-32" SLJIT_CPUINFO;
31
#else /* !SLJIT_CONFIG_RISCV_32 */
32
return "RISC-V-64" SLJIT_CPUINFO;
33
#endif /* SLJIT_CONFIG_RISCV_32 */
34
}
35
36
/* Length of an instruction word
37
Both for riscv-32 and riscv-64 */
38
typedef sljit_u32 sljit_ins;
39
40
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
41
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
42
#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
43
#define TMP_ZERO 0
44
45
/* Flags are kept in volatile registers. */
46
#define EQUAL_FLAG (SLJIT_NUMBER_OF_REGISTERS + 5)
47
#define RETURN_ADDR_REG TMP_REG2
48
#define OTHER_FLAG (SLJIT_NUMBER_OF_REGISTERS + 6)
49
50
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
51
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
52
53
#define TMP_VREG1 (SLJIT_NUMBER_OF_VECTOR_REGISTERS + 1)
54
#define TMP_VREG2 (SLJIT_NUMBER_OF_VECTOR_REGISTERS + 2)
55
56
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
57
0, 10, 11, 12, 13, 14, 15, 16, 17, 29, 30, 31, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 9, 8, 2, 6, 1, 7, 5, 28
58
};
59
60
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
61
0, 10, 11, 12, 13, 14, 15, 16, 17, 2, 3, 4, 5, 6, 7, 28, 29, 30, 31, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 9, 8, 0, 1,
62
};
63
64
static const sljit_u8 vreg_map[SLJIT_NUMBER_OF_VECTOR_REGISTERS + 3] = {
65
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
66
};
67
68
/* --------------------------------------------------------------------- */
69
/* Instrucion forms */
70
/* --------------------------------------------------------------------- */
71
72
#define RD(rd) ((sljit_ins)reg_map[rd] << 7)
73
#define RS1(rs1) ((sljit_ins)reg_map[rs1] << 15)
74
#define RS2(rs2) ((sljit_ins)reg_map[rs2] << 20)
75
#define FRD(rd) ((sljit_ins)freg_map[rd] << 7)
76
#define FRS1(rs1) ((sljit_ins)freg_map[rs1] << 15)
77
#define FRS2(rs2) ((sljit_ins)freg_map[rs2] << 20)
78
#define VRD(rd) ((sljit_ins)vreg_map[rd] << 7)
79
#define VRS1(rs1) ((sljit_ins)vreg_map[rs1] << 15)
80
#define VRS2(rs2) ((sljit_ins)vreg_map[rs2] << 20)
81
#define IMM_I(imm) ((sljit_ins)(imm) << 20)
82
#define IMM_S(imm) ((((sljit_ins)(imm) & 0xfe0) << 20) | (((sljit_ins)(imm) & 0x1f) << 7))
83
84
/* Represents funct(i) parts of the instructions. */
85
#define OPC(o) ((sljit_ins)(o))
86
#define F3(f) ((sljit_ins)(f) << 12)
87
#define F12(f) ((sljit_ins)(f) << 20)
88
#define F7(f) ((sljit_ins)(f) << 25)
89
90
/* Vector instruction types. */
91
#define OPFVF (F3(0x5) | OPC(0x57))
92
#define OPFVV (F3(0x1) | OPC(0x57))
93
#define OPIVI (F3(0x3) | OPC(0x57))
94
#define OPIVV (F3(0x0) | OPC(0x57))
95
#define OPIVX (F3(0x4) | OPC(0x57))
96
#define OPMVV (F3(0x2) | OPC(0x57))
97
#define OPMVX (F3(0x6) | OPC(0x57))
98
99
#define ADD (F7(0x0) | F3(0x0) | OPC(0x33))
100
#define ADDI (F3(0x0) | OPC(0x13))
101
#define AND (F7(0x0) | F3(0x7) | OPC(0x33))
102
#define ANDI (F3(0x7) | OPC(0x13))
103
#define AUIPC (OPC(0x17))
104
#define BEQ (F3(0x0) | OPC(0x63))
105
#define BNE (F3(0x1) | OPC(0x63))
106
#define BLT (F3(0x4) | OPC(0x63))
107
#define BGE (F3(0x5) | OPC(0x63))
108
#define BLTU (F3(0x6) | OPC(0x63))
109
#define BGEU (F3(0x7) | OPC(0x63))
110
#if defined __riscv_zbb
111
#define CLZ (F7(0x30) | F3(0x1) | OPC(0x13))
112
#define CTZ (F7(0x30) | F12(0x1) | F3(0x1) | OPC(0x13))
113
#endif /* __riscv_zbb */
114
#define DIV (F7(0x1) | F3(0x4) | OPC(0x33))
115
#define DIVU (F7(0x1) | F3(0x5) | OPC(0x33))
116
#define EBREAK (F12(0x1) | F3(0x0) | OPC(0x73))
117
#define FADD_S (F7(0x0) | F3(0x7) | OPC(0x53))
118
#define FDIV_S (F7(0xc) | F3(0x7) | OPC(0x53))
119
#define FENCE (F3(0x0) | OPC(0xf))
120
#define FEQ_S (F7(0x50) | F3(0x2) | OPC(0x53))
121
#define FLD (F3(0x3) | OPC(0x7))
122
#define FLE_S (F7(0x50) | F3(0x0) | OPC(0x53))
123
#define FLT_S (F7(0x50) | F3(0x1) | OPC(0x53))
124
/* These conversion opcodes are partly defined. */
125
#define FCVT_S_D (F7(0x20) | OPC(0x53))
126
#define FCVT_S_W (F7(0x68) | OPC(0x53))
127
#define FCVT_S_WU (F7(0x68) | F12(0x1) | OPC(0x53))
128
#define FCVT_W_S (F7(0x60) | F3(0x1) | OPC(0x53))
129
#define FMUL_S (F7(0x8) | F3(0x7) | OPC(0x53))
130
#define FMV_X_W (F7(0x70) | F3(0x0) | OPC(0x53))
131
#define FMV_W_X (F7(0x78) | F3(0x0) | OPC(0x53))
132
#define FSD (F3(0x3) | OPC(0x27))
133
#define FSGNJ_S (F7(0x10) | F3(0x0) | OPC(0x53))
134
#define FSGNJN_S (F7(0x10) | F3(0x1) | OPC(0x53))
135
#define FSGNJX_S (F7(0x10) | F3(0x2) | OPC(0x53))
136
#define FSUB_S (F7(0x4) | F3(0x7) | OPC(0x53))
137
#define FSW (F3(0x2) | OPC(0x27))
138
#define JAL (OPC(0x6f))
139
#define JALR (F3(0x0) | OPC(0x67))
140
#define LD (F3(0x3) | OPC(0x3))
141
#define LUI (OPC(0x37))
142
#define LW (F3(0x2) | OPC(0x3))
143
#define LR (F7(0x8) | OPC(0x2f))
144
#define MUL (F7(0x1) | F3(0x0) | OPC(0x33))
145
#define MULH (F7(0x1) | F3(0x1) | OPC(0x33))
146
#define MULHU (F7(0x1) | F3(0x3) | OPC(0x33))
147
#define OR (F7(0x0) | F3(0x6) | OPC(0x33))
148
#define ORI (F3(0x6) | OPC(0x13))
149
#define REM (F7(0x1) | F3(0x6) | OPC(0x33))
150
#define REMU (F7(0x1) | F3(0x7) | OPC(0x33))
151
#if defined __riscv_zbb
152
#if defined SLJIT_CONFIG_RISCV_32
153
#define REV8 (F12(0x698) | F3(0x5) | OPC(0x13))
154
#elif defined SLJIT_CONFIG_RISCV_64
155
#define REV8 (F12(0x6b8) | F3(0x5) | OPC(0x13))
156
#endif /* SLJIT_CONFIG_RISCV_32 */
157
#define ROL (F7(0x30) | F3(0x1) | OPC(0x33))
158
#define ROR (F7(0x30) | F3(0x5) | OPC(0x33))
159
#define RORI (F7(0x30) | F3(0x5) | OPC(0x13))
160
#endif /* __riscv_zbb */
161
#define SC (F7(0xc) | OPC(0x2f))
162
#define SD (F3(0x3) | OPC(0x23))
163
#if defined __riscv_zbb
164
#define SEXTB (F7(0x30) | F12(0x4) | F3(0x1) | OPC(0x13))
165
#define SEXTH (F7(0x30) | F12(0x5) | F3(0x1) | OPC(0x13))
166
#endif /* __riscv_zbb */
167
#if defined __riscv_zba
168
#define SH1ADD (F7(0x10) | F3(0x2) | OPC(0x33))
169
#define SH2ADD (F7(0x10) | F3(0x4) | OPC(0x33))
170
#define SH3ADD (F7(0x10) | F3(0x6) | OPC(0x33))
171
#endif /* __riscv_zba */
172
#define SLL (F7(0x0) | F3(0x1) | OPC(0x33))
173
#define SLLI (F3(0x1) | OPC(0x13))
174
#define SLT (F7(0x0) | F3(0x2) | OPC(0x33))
175
#define SLTI (F3(0x2) | OPC(0x13))
176
#define SLTU (F7(0x0) | F3(0x3) | OPC(0x33))
177
#define SLTUI (F3(0x3) | OPC(0x13))
178
#define SRL (F7(0x0) | F3(0x5) | OPC(0x33))
179
#define SRLI (F3(0x5) | OPC(0x13))
180
#define SRA (F7(0x20) | F3(0x5) | OPC(0x33))
181
#define SRAI (F7(0x20) | F3(0x5) | OPC(0x13))
182
#define SUB (F7(0x20) | F3(0x0) | OPC(0x33))
183
#define SW (F3(0x2) | OPC(0x23))
184
#define VAND_VV (F7(0x13) | OPIVV)
185
#define VFMV_FS (F7(0x21) | OPFVV)
186
#define VFMV_SF (F7(0x21) | OPFVF)
187
#define VFMV_VF (F7(0x2f) | OPFVF)
188
#define VFWCVT_FFV (F7(0x25) | (0xc << 15) | OPFVV)
189
#define VL (F7(0x1) | OPC(0x7))
190
#define VMSLE_VI (F7(0x3b) | OPIVI)
191
#define VMV_SX (F7(0x21) | OPMVX)
192
#define VMV_VI (F7(0x2f) | OPIVI)
193
#define VMV_VV (F7(0x2f) | OPIVV)
194
#define VMV_VX (F7(0x2f) | OPIVX)
195
#define VMV_XS (F7(0x21) | OPMVV)
196
#define VOR_VV (F7(0x15) | OPIVV)
197
#define VSETIVLI (F7(0x60) | F3(0x7) | OPC(0x57))
198
#define VS (F7(0x1) | OPC(0x27))
199
#define VSLIDEDOWN_VX (F7(0x1f) | OPIVX)
200
#define VSLIDEDOWN_VI (F7(0x1f) | OPIVI)
201
#define VSLIDEUP_VX (F7(0x1d) | OPIVX)
202
#define VSLIDEUP_VI (F7(0x1d) | OPIVI)
203
#define VRGATHER_VI (F7(0x19) | OPIVI)
204
#define VRGATHER_VV (F7(0x19) | OPIVV)
205
#define VXOR_VV (F7(0x17) | OPIVV)
206
#define VZEXT_VF2 (F7(0x25) | (0x6 << 15) | OPMVV)
207
#define VZEXT_VF4 (F7(0x25) | (0x4 << 15) | OPMVV)
208
#define VZEXT_VF8 (F7(0x25) | (0x2 << 15) | OPMVV)
209
#define XOR (F7(0x0) | F3(0x4) | OPC(0x33))
210
#define XORI (F3(0x4) | OPC(0x13))
211
#if defined __riscv_zbb
212
#if defined SLJIT_CONFIG_RISCV_32
213
#define ZEXTH (F7(0x4) | F3(0x4) | OPC(0x33))
214
#elif defined SLJIT_CONFIG_RISCV_64
215
#define ZEXTH (F7(0x4) | F3(0x4) | OPC(0x3B))
216
#endif /* SLJIT_CONFIG_RISCV_32 */
217
#endif /* __riscv_zbb */
218
219
#define SIMM_MAX (0x7ff)
220
#define SIMM_MIN (-0x800)
221
#define BRANCH_MAX (0xfff)
222
#define BRANCH_MIN (-0x1000)
223
#define JUMP_MAX (0xfffff)
224
#define JUMP_MIN (-0x100000)
225
226
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
227
#define S32_MAX (0x7ffff7ffl)
228
#define S32_MIN (-0x80000000l)
229
#define S44_MAX (0x7fffffff7ffl)
230
#define S52_MAX (0x7ffffffffffffl)
231
#endif /* SLJIT_CONFIG_RISCV_64 */
232
233
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
234
{
235
sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
236
FAIL_IF(!ptr);
237
*ptr = ins;
238
compiler->size++;
239
return SLJIT_SUCCESS;
240
}
241
242
static sljit_s32 push_imm_s_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit_sw imm)
243
{
244
return push_inst(compiler, ins | IMM_S(imm));
245
}
246
247
static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
248
{
249
sljit_sw diff;
250
sljit_uw target_addr;
251
sljit_uw jump_addr = (sljit_uw)code_ptr;
252
sljit_uw orig_addr = jump->addr;
253
SLJIT_UNUSED_ARG(executable_offset);
254
255
jump->addr = jump_addr;
256
if (jump->flags & SLJIT_REWRITABLE_JUMP)
257
goto exit;
258
259
if (jump->flags & JUMP_ADDR)
260
target_addr = jump->u.target;
261
else {
262
SLJIT_ASSERT(jump->u.label != NULL);
263
target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
264
265
if (jump->u.label->size > orig_addr)
266
jump_addr = (sljit_uw)(code + orig_addr);
267
}
268
269
diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);
270
271
if (jump->flags & IS_COND) {
272
diff += SSIZE_OF(ins);
273
274
if (diff >= BRANCH_MIN && diff <= BRANCH_MAX) {
275
code_ptr--;
276
code_ptr[0] = (code_ptr[0] & 0x1fff07f) ^ 0x1000;
277
jump->flags |= PATCH_B;
278
jump->addr = (sljit_uw)code_ptr;
279
return code_ptr;
280
}
281
282
diff -= SSIZE_OF(ins);
283
}
284
285
if (diff >= JUMP_MIN && diff <= JUMP_MAX) {
286
if (jump->flags & IS_COND) {
287
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
288
code_ptr[-1] -= (sljit_ins)(1 * sizeof(sljit_ins)) << 7;
289
#else /* !SLJIT_CONFIG_RISCV_32 */
290
code_ptr[-1] -= (sljit_ins)(5 * sizeof(sljit_ins)) << 7;
291
#endif /* SLJIT_CONFIG_RISCV_32 */
292
}
293
294
jump->flags |= PATCH_J;
295
return code_ptr;
296
}
297
298
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
299
if (diff >= S32_MIN && diff <= S32_MAX) {
300
if (jump->flags & IS_COND)
301
code_ptr[-1] -= (sljit_ins)(4 * sizeof(sljit_ins)) << 7;
302
303
jump->flags |= PATCH_REL32;
304
code_ptr[1] = code_ptr[0];
305
return code_ptr + 1;
306
}
307
308
if (target_addr <= (sljit_uw)S32_MAX) {
309
if (jump->flags & IS_COND)
310
code_ptr[-1] -= (sljit_ins)(4 * sizeof(sljit_ins)) << 7;
311
312
jump->flags |= PATCH_ABS32;
313
code_ptr[1] = code_ptr[0];
314
return code_ptr + 1;
315
}
316
317
if (target_addr <= S44_MAX) {
318
if (jump->flags & IS_COND)
319
code_ptr[-1] -= (sljit_ins)(2 * sizeof(sljit_ins)) << 7;
320
321
jump->flags |= PATCH_ABS44;
322
code_ptr[3] = code_ptr[0];
323
return code_ptr + 3;
324
}
325
326
if (target_addr <= S52_MAX) {
327
if (jump->flags & IS_COND)
328
code_ptr[-1] -= (sljit_ins)(1 * sizeof(sljit_ins)) << 7;
329
330
jump->flags |= PATCH_ABS52;
331
code_ptr[4] = code_ptr[0];
332
return code_ptr + 4;
333
}
334
#endif /* SLJIT_CONFIG_RISCV_64 */
335
336
exit:
337
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
338
code_ptr[1] = code_ptr[0];
339
return code_ptr + 1;
340
#else /* !SLJIT_CONFIG_RISCV_32 */
341
code_ptr[5] = code_ptr[0];
342
return code_ptr + 5;
343
#endif /* SLJIT_CONFIG_RISCV_32 */
344
}
345
346
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
347
348
static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
349
{
350
sljit_uw addr;
351
sljit_uw jump_addr = (sljit_uw)code_ptr;
352
sljit_sw diff;
353
SLJIT_UNUSED_ARG(executable_offset);
354
355
SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT));
356
if (jump->flags & JUMP_ADDR)
357
addr = jump->u.target;
358
else {
359
addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
360
361
if (jump->u.label->size > jump->addr)
362
jump_addr = (sljit_uw)(code + jump->addr);
363
}
364
365
diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);
366
367
if (diff >= S32_MIN && diff <= S32_MAX) {
368
SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
369
jump->flags |= PATCH_REL32;
370
return 1;
371
}
372
373
if (addr <= S32_MAX) {
374
SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
375
jump->flags |= PATCH_ABS32;
376
return 1;
377
}
378
379
if (addr <= S44_MAX) {
380
SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT));
381
jump->flags |= PATCH_ABS44;
382
return 3;
383
}
384
385
if (addr <= S52_MAX) {
386
SLJIT_ASSERT(jump->flags >= ((sljit_uw)4 << JUMP_SIZE_SHIFT));
387
jump->flags |= PATCH_ABS52;
388
return 4;
389
}
390
391
SLJIT_ASSERT(jump->flags >= ((sljit_uw)5 << JUMP_SIZE_SHIFT));
392
return 5;
393
}
394
395
#endif /* SLJIT_CONFIG_RISCV_64 */
396
397
static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump, sljit_sw executable_offset)
398
{
399
sljit_uw flags = jump->flags;
400
sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
401
sljit_ins *ins = (sljit_ins*)jump->addr;
402
sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : TMP_REG1;
403
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
404
sljit_sw high;
405
#endif /* SLJIT_CONFIG_RISCV_64 */
406
SLJIT_UNUSED_ARG(executable_offset);
407
408
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
409
if (flags & PATCH_REL32) {
410
addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset);
411
412
SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX);
413
414
if ((addr & 0x800) != 0)
415
addr += 0x1000;
416
417
ins[0] = AUIPC | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff);
418
419
if (!(flags & JUMP_MOV_ADDR)) {
420
SLJIT_ASSERT((ins[1] & 0x707f) == JALR);
421
ins[1] = (ins[1] & 0xfffff) | IMM_I(addr);
422
} else
423
ins[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(addr);
424
return;
425
}
426
#endif /* SLJIT_CONFIG_RISCV_64 */
427
428
if ((addr & 0x800) != 0)
429
addr += 0x1000;
430
431
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
432
ins[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff);
433
#else /* !SLJIT_CONFIG_RISCV_32 */
434
435
if (flags & PATCH_ABS32) {
436
SLJIT_ASSERT(addr <= S32_MAX);
437
ins[0] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff);
438
} else if (flags & PATCH_ABS44) {
439
high = (sljit_sw)addr >> 12;
440
SLJIT_ASSERT((sljit_uw)high <= 0x7fffffff);
441
442
if (high > S32_MAX) {
443
SLJIT_ASSERT((high & 0x800) != 0);
444
ins[0] = LUI | RD(reg) | (sljit_ins)0x80000000u;
445
ins[1] = XORI | RD(reg) | RS1(reg) | IMM_I(high);
446
} else {
447
if ((high & 0x800) != 0)
448
high += 0x1000;
449
450
ins[0] = LUI | RD(reg) | (sljit_ins)(high & ~0xfff);
451
ins[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(high);
452
}
453
454
ins[2] = SLLI | RD(reg) | RS1(reg) | IMM_I(12);
455
ins += 2;
456
} else {
457
high = (sljit_sw)addr >> 32;
458
459
if ((addr & 0x80000000l) != 0)
460
high = ~high;
461
462
if (flags & PATCH_ABS52) {
463
SLJIT_ASSERT(addr <= S52_MAX);
464
ins[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high << 12);
465
} else {
466
if ((high & 0x800) != 0)
467
high += 0x1000;
468
ins[0] = LUI | RD(TMP_REG3) | (sljit_ins)(high & ~0xfff);
469
ins[1] = ADDI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I(high);
470
ins++;
471
}
472
473
ins[1] = LUI | RD(reg) | (sljit_ins)((sljit_sw)addr & ~0xfff);
474
ins[2] = SLLI | RD(TMP_REG3) | RS1(TMP_REG3) | IMM_I((flags & PATCH_ABS52) ? 20 : 32);
475
ins[3] = XOR | RD(reg) | RS1(reg) | RS2(TMP_REG3);
476
ins += 3;
477
}
478
#endif /* !SLJIT_CONFIG_RISCV_32 */
479
480
if (!(flags & JUMP_MOV_ADDR)) {
481
SLJIT_ASSERT((ins[1] & 0x707f) == JALR);
482
ins[1] = (ins[1] & 0xfffff) | IMM_I(addr);
483
} else
484
ins[1] = ADDI | RD(reg) | RS1(reg) | IMM_I(addr);
485
}
486
487
static void reduce_code_size(struct sljit_compiler *compiler)
488
{
489
struct sljit_label *label;
490
struct sljit_jump *jump;
491
struct sljit_const *const_;
492
SLJIT_NEXT_DEFINE_TYPES;
493
sljit_uw total_size;
494
sljit_uw size_reduce = 0;
495
sljit_sw diff;
496
497
label = compiler->labels;
498
jump = compiler->jumps;
499
const_ = compiler->consts;
500
SLJIT_NEXT_INIT_TYPES();
501
502
while (1) {
503
SLJIT_GET_NEXT_MIN();
504
505
if (next_min_addr == SLJIT_MAX_ADDRESS)
506
break;
507
508
if (next_min_addr == next_label_size) {
509
label->size -= size_reduce;
510
511
label = label->next;
512
next_label_size = SLJIT_GET_NEXT_SIZE(label);
513
}
514
515
if (next_min_addr == next_const_addr) {
516
const_->addr -= size_reduce;
517
const_ = const_->next;
518
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
519
continue;
520
}
521
522
if (next_min_addr != next_jump_addr)
523
continue;
524
525
jump->addr -= size_reduce;
526
if (!(jump->flags & JUMP_MOV_ADDR)) {
527
total_size = JUMP_MAX_SIZE;
528
529
if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {
530
if (jump->flags & JUMP_ADDR) {
531
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
532
if (jump->u.target <= S32_MAX)
533
total_size = 2;
534
else if (jump->u.target <= S44_MAX)
535
total_size = 4;
536
else if (jump->u.target <= S52_MAX)
537
total_size = 5;
538
#endif /* SLJIT_CONFIG_RISCV_64 */
539
} else {
540
/* Unit size: instruction. */
541
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
542
if (jump->u.label->size > jump->addr) {
543
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
544
diff -= (sljit_sw)size_reduce;
545
}
546
547
if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH_MIN / SSIZE_OF(ins)))
548
total_size = 0;
549
else if (diff >= (JUMP_MIN / SSIZE_OF(ins)) && diff <= (JUMP_MAX / SSIZE_OF(ins)))
550
total_size = 1;
551
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
552
else if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
553
total_size = 2;
554
#endif /* SLJIT_CONFIG_RISCV_64 */
555
}
556
}
557
558
size_reduce += JUMP_MAX_SIZE - total_size;
559
jump->flags |= total_size << JUMP_SIZE_SHIFT;
560
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
561
} else {
562
total_size = 5;
563
564
if (!(jump->flags & JUMP_ADDR)) {
565
/* Real size minus 1. Unit size: instruction. */
566
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
567
if (jump->u.label->size > jump->addr) {
568
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
569
diff -= (sljit_sw)size_reduce;
570
}
571
572
if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
573
total_size = 1;
574
} else if (jump->u.target < S32_MAX)
575
total_size = 1;
576
else if (jump->u.target < S44_MAX)
577
total_size = 3;
578
else if (jump->u.target <= S52_MAX)
579
total_size = 4;
580
581
size_reduce += 5 - total_size;
582
jump->flags |= total_size << JUMP_SIZE_SHIFT;
583
#endif /* !SLJIT_CONFIG_RISCV_64 */
584
}
585
586
jump = jump->next;
587
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
588
}
589
590
compiler->size -= size_reduce;
591
}
592
593
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
594
{
595
struct sljit_memory_fragment *buf;
596
sljit_ins *code;
597
sljit_ins *code_ptr;
598
sljit_ins *buf_ptr;
599
sljit_ins *buf_end;
600
sljit_uw word_count;
601
SLJIT_NEXT_DEFINE_TYPES;
602
sljit_sw executable_offset;
603
sljit_uw addr;
604
605
struct sljit_label *label;
606
struct sljit_jump *jump;
607
struct sljit_const *const_;
608
609
CHECK_ERROR_PTR();
610
CHECK_PTR(check_sljit_generate_code(compiler));
611
612
reduce_code_size(compiler);
613
614
code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset);
615
PTR_FAIL_WITH_EXEC_IF(code);
616
617
reverse_buf(compiler);
618
buf = compiler->buf;
619
620
code_ptr = code;
621
word_count = 0;
622
label = compiler->labels;
623
jump = compiler->jumps;
624
const_ = compiler->consts;
625
SLJIT_NEXT_INIT_TYPES();
626
SLJIT_GET_NEXT_MIN();
627
628
do {
629
buf_ptr = (sljit_ins*)buf->memory;
630
buf_end = buf_ptr + (buf->used_size >> 2);
631
do {
632
*code_ptr = *buf_ptr++;
633
if (next_min_addr == word_count) {
634
SLJIT_ASSERT(!label || label->size >= word_count);
635
SLJIT_ASSERT(!jump || jump->addr >= word_count);
636
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
637
638
/* These structures are ordered by their address. */
639
if (next_min_addr == next_label_size) {
640
label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
641
label->size = (sljit_uw)(code_ptr - code);
642
label = label->next;
643
next_label_size = SLJIT_GET_NEXT_SIZE(label);
644
}
645
646
if (next_min_addr == next_jump_addr) {
647
if (!(jump->flags & JUMP_MOV_ADDR)) {
648
word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT);
649
code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
650
SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins)));
651
} else {
652
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
653
word_count += 1;
654
jump->addr = (sljit_uw)code_ptr;
655
code_ptr += 1;
656
#else /* !SLJIT_CONFIG_RISCV_32 */
657
word_count += jump->flags >> JUMP_SIZE_SHIFT;
658
addr = (sljit_uw)code_ptr;
659
code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
660
jump->addr = addr;
661
#endif /* SLJIT_CONFIG_RISCV_32 */
662
}
663
jump = jump->next;
664
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
665
} else if (next_min_addr == next_const_addr) {
666
const_->addr = (sljit_uw)code_ptr;
667
const_ = const_->next;
668
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
669
}
670
671
SLJIT_GET_NEXT_MIN();
672
}
673
code_ptr++;
674
word_count++;
675
} while (buf_ptr < buf_end);
676
677
buf = buf->next;
678
} while (buf);
679
680
if (label && label->size == word_count) {
681
label->u.addr = (sljit_uw)code_ptr;
682
label->size = (sljit_uw)(code_ptr - code);
683
label = label->next;
684
}
685
686
SLJIT_ASSERT(!label);
687
SLJIT_ASSERT(!jump);
688
SLJIT_ASSERT(!const_);
689
SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
690
691
jump = compiler->jumps;
692
while (jump) {
693
do {
694
if (!(jump->flags & (PATCH_B | PATCH_J)) || (jump->flags & JUMP_MOV_ADDR)) {
695
load_addr_to_reg(jump, executable_offset);
696
break;
697
}
698
699
addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
700
buf_ptr = (sljit_ins *)jump->addr;
701
addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
702
703
if (jump->flags & PATCH_B) {
704
SLJIT_ASSERT((sljit_sw)addr >= BRANCH_MIN && (sljit_sw)addr <= BRANCH_MAX);
705
addr = ((addr & 0x800) >> 4) | ((addr & 0x1e) << 7) | ((addr & 0x7e0) << 20) | ((addr & 0x1000) << 19);
706
buf_ptr[0] |= (sljit_ins)addr;
707
break;
708
}
709
710
SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX);
711
addr = (addr & 0xff000) | ((addr & 0x800) << 9) | ((addr & 0x7fe) << 20) | ((addr & 0x100000) << 11);
712
buf_ptr[0] = JAL | RD((jump->flags & IS_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | (sljit_ins)addr;
713
} while (0);
714
715
jump = jump->next;
716
}
717
718
compiler->error = SLJIT_ERR_COMPILED;
719
compiler->executable_offset = executable_offset;
720
compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
721
722
code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
723
code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
724
725
SLJIT_CACHE_FLUSH(code, code_ptr);
726
SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
727
return code;
728
}
729
730
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
731
{
732
switch (feature_type) {
733
case SLJIT_HAS_FPU:
734
#ifdef SLJIT_IS_FPU_AVAILABLE
735
return (SLJIT_IS_FPU_AVAILABLE) != 0;
736
#elif defined(__riscv_float_abi_soft)
737
return 0;
738
#else /* !SLJIT_IS_FPU_AVAILABLE && !__riscv_float_abi_soft */
739
return 1;
740
#endif /* SLJIT_IS_FPU_AVAILABLE */
741
case SLJIT_HAS_ZERO_REGISTER:
742
case SLJIT_HAS_COPY_F32:
743
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
744
case SLJIT_HAS_COPY_F64:
745
#endif /* !SLJIT_CONFIG_RISCV_64 */
746
case SLJIT_HAS_ATOMIC:
747
case SLJIT_HAS_MEMORY_BARRIER:
748
#ifdef __riscv_vector
749
case SLJIT_HAS_SIMD:
750
#endif /* __riscv_vector */
751
return 1;
752
#ifdef __riscv_zbb
753
case SLJIT_HAS_CLZ:
754
case SLJIT_HAS_CTZ:
755
case SLJIT_HAS_REV:
756
case SLJIT_HAS_ROT:
757
return 1;
758
#endif /* __riscv_zbb */
759
default:
760
return 0;
761
}
762
}
763
764
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
765
{
766
switch (type) {
767
case SLJIT_UNORDERED_OR_EQUAL:
768
case SLJIT_ORDERED_NOT_EQUAL:
769
return 2;
770
771
case SLJIT_UNORDERED:
772
case SLJIT_ORDERED:
773
return 1;
774
}
775
776
return 0;
777
}
778
779
/* --------------------------------------------------------------------- */
780
/* Entry, exit */
781
/* --------------------------------------------------------------------- */
782
783
/* Creates an index in data_transfer_insts array. */
784
#define LOAD_DATA 0x01
785
#define WORD_DATA 0x00
786
#define BYTE_DATA 0x02
787
#define HALF_DATA 0x04
788
#define INT_DATA 0x06
789
#define SIGNED_DATA 0x08
790
/* Separates integer and floating point registers */
791
#define GPR_REG 0x0f
792
#define DOUBLE_DATA 0x10
793
#define SINGLE_DATA 0x12
794
795
#define MEM_MASK 0x1f
796
797
#define ARG_TEST 0x00020
798
#define ALT_KEEP_CACHE 0x00040
799
#define CUMULATIVE_OP 0x00080
800
#define IMM_OP 0x00100
801
#define MOVE_OP 0x00200
802
#define SRC2_IMM 0x00400
803
804
#define UNUSED_DEST 0x00800
805
#define REG_DEST 0x01000
806
#define REG1_SOURCE 0x02000
807
#define REG2_SOURCE 0x04000
808
#define SLOW_SRC1 0x08000
809
#define SLOW_SRC2 0x10000
810
#define SLOW_DEST 0x20000
811
#define MEM_USE_TMP2 0x40000
812
813
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
814
#define STACK_STORE SW
815
#define STACK_LOAD LW
816
#else /* !SLJIT_CONFIG_RISCV_32 */
817
#define STACK_STORE SD
818
#define STACK_LOAD LD
819
#endif /* SLJIT_CONFIG_RISCV_32 */
820
821
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
822
#include "sljitNativeRISCV_32.c"
823
#else /* !SLJIT_CONFIG_RISCV_32 */
824
#include "sljitNativeRISCV_64.c"
825
#endif /* SLJIT_CONFIG_RISCV_32 */
826
827
#define STACK_MAX_DISTANCE (-SIMM_MIN)
828
829
static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw);
830
831
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
832
sljit_s32 options, sljit_s32 arg_types,
833
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
834
{
835
sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches);
836
sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds);
837
sljit_s32 i, tmp, offset;
838
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
839
840
CHECK_ERROR();
841
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));
842
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
843
844
scratches = ENTER_GET_REGS(scratches);
845
saveds = ENTER_GET_REGS(saveds);
846
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
847
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
848
if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
849
if ((local_size & SSIZE_OF(sw)) != 0)
850
local_size += SSIZE_OF(sw);
851
local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
852
}
853
#else /* !SLJIT_CONFIG_RISCV_32 */
854
local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
855
#endif /* SLJIT_CONFIG_RISCV_32 */
856
local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
857
compiler->local_size = local_size;
858
859
if (local_size <= STACK_MAX_DISTANCE) {
860
/* Frequent case. */
861
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-local_size)));
862
offset = local_size - SSIZE_OF(sw);
863
local_size = 0;
864
} else {
865
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(STACK_MAX_DISTANCE)));
866
local_size -= STACK_MAX_DISTANCE;
867
868
if (local_size > STACK_MAX_DISTANCE)
869
FAIL_IF(load_immediate(compiler, TMP_REG1, local_size, TMP_REG3));
870
offset = STACK_MAX_DISTANCE - SSIZE_OF(sw);
871
}
872
873
FAIL_IF(push_imm_s_inst(compiler, STACK_STORE | RS1(SLJIT_SP) | RS2(RETURN_ADDR_REG), offset));
874
875
tmp = SLJIT_S0 - saveds;
876
for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
877
offset -= SSIZE_OF(sw);
878
FAIL_IF(push_imm_s_inst(compiler, STACK_STORE | RS1(SLJIT_SP) | RS2(i), offset));
879
}
880
881
for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
882
offset -= SSIZE_OF(sw);
883
FAIL_IF(push_imm_s_inst(compiler, STACK_STORE | RS1(SLJIT_SP) | RS2(i), offset));
884
}
885
886
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
887
/* This alignment is valid because offset is not used after storing FPU regs. */
888
if ((offset & SSIZE_OF(sw)) != 0)
889
offset -= SSIZE_OF(sw);
890
#endif /* SLJIT_CONFIG_RISCV_32 */
891
892
tmp = SLJIT_FS0 - fsaveds;
893
for (i = SLJIT_FS0; i > tmp; i--) {
894
offset -= SSIZE_OF(f64);
895
FAIL_IF(push_imm_s_inst(compiler, FSD | RS1(SLJIT_SP) | FRS2(i), offset));
896
}
897
898
for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
899
offset -= SSIZE_OF(f64);
900
FAIL_IF(push_imm_s_inst(compiler, FSD | RS1(SLJIT_SP) | FRS2(i), offset));
901
}
902
903
if (local_size > STACK_MAX_DISTANCE)
904
FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RS1(SLJIT_SP) | RS2(TMP_REG1)));
905
else if (local_size > 0)
906
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(-local_size)));
907
908
if (options & SLJIT_ENTER_REG_ARG)
909
return SLJIT_SUCCESS;
910
911
arg_types >>= SLJIT_ARG_SHIFT;
912
saved_arg_count = 0;
913
tmp = SLJIT_R0;
914
915
while (arg_types > 0) {
916
if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
917
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
918
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_S0 - saved_arg_count) | RS1(tmp) | IMM_I(0)));
919
saved_arg_count++;
920
}
921
tmp++;
922
}
923
924
arg_types >>= SLJIT_ARG_SHIFT;
925
}
926
927
return SLJIT_SUCCESS;
928
}
929
930
#undef STACK_MAX_DISTANCE
931
932
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
933
sljit_s32 options, sljit_s32 arg_types,
934
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
935
{
936
sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches);
937
sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds);
938
939
CHECK_ERROR();
940
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));
941
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
942
943
scratches = ENTER_GET_REGS(scratches);
944
saveds = ENTER_GET_REGS(saveds);
945
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
946
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
947
if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
948
if ((local_size & SSIZE_OF(sw)) != 0)
949
local_size += SSIZE_OF(sw);
950
local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
951
}
952
#else /* !SLJIT_CONFIG_RISCV_32 */
953
local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
954
#endif /* SLJIT_CONFIG_RISCV_32 */
955
compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
956
957
return SLJIT_SUCCESS;
958
}
959
960
#define STACK_MAX_DISTANCE (-SIMM_MIN - 16)
961
962
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
963
{
964
sljit_s32 i, tmp, offset;
965
sljit_s32 local_size = compiler->local_size;
966
967
if (local_size > STACK_MAX_DISTANCE) {
968
local_size -= STACK_MAX_DISTANCE;
969
970
if (local_size > STACK_MAX_DISTANCE) {
971
FAIL_IF(load_immediate(compiler, TMP_REG2, local_size, TMP_REG3));
972
FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RS1(SLJIT_SP) | RS2(TMP_REG2)));
973
} else
974
FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(local_size)));
975
976
local_size = STACK_MAX_DISTANCE;
977
}
978
979
SLJIT_ASSERT(local_size > 0);
980
981
offset = local_size - SSIZE_OF(sw);
982
if (!is_return_to)
983
FAIL_IF(push_inst(compiler, STACK_LOAD | RD(RETURN_ADDR_REG) | RS1(SLJIT_SP) | IMM_I(offset)));
984
985
tmp = SLJIT_S0 - compiler->saveds;
986
for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
987
offset -= SSIZE_OF(sw);
988
FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RS1(SLJIT_SP) | IMM_I(offset)));
989
}
990
991
for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
992
offset -= SSIZE_OF(sw);
993
FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RS1(SLJIT_SP) | IMM_I(offset)));
994
}
995
996
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
997
/* This alignment is valid because offset is not used after storing FPU regs. */
998
if ((offset & SSIZE_OF(sw)) != 0)
999
offset -= SSIZE_OF(sw);
1000
#endif /* SLJIT_CONFIG_RISCV_32 */
1001
1002
tmp = SLJIT_FS0 - compiler->fsaveds;
1003
for (i = SLJIT_FS0; i > tmp; i--) {
1004
offset -= SSIZE_OF(f64);
1005
FAIL_IF(push_inst(compiler, FLD | FRD(i) | RS1(SLJIT_SP) | IMM_I(offset)));
1006
}
1007
1008
for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1009
offset -= SSIZE_OF(f64);
1010
FAIL_IF(push_inst(compiler, FLD | FRD(i) | RS1(SLJIT_SP) | IMM_I(offset)));
1011
}
1012
1013
return push_inst(compiler, ADDI | RD(SLJIT_SP) | RS1(SLJIT_SP) | IMM_I(local_size));
1014
}
1015
1016
#undef STACK_MAX_DISTANCE
1017
1018
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1019
{
1020
CHECK_ERROR();
1021
CHECK(check_sljit_emit_return_void(compiler));
1022
1023
FAIL_IF(emit_stack_frame_release(compiler, 0));
1024
return push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(RETURN_ADDR_REG) | IMM_I(0));
1025
}
1026
1027
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1028
sljit_s32 src, sljit_sw srcw)
1029
{
1030
CHECK_ERROR();
1031
CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1032
1033
if (src & SLJIT_MEM) {
1034
ADJUST_LOCAL_OFFSET(src, srcw);
1035
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
1036
src = TMP_REG1;
1037
srcw = 0;
1038
} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1039
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(src) | IMM_I(0)));
1040
src = TMP_REG1;
1041
srcw = 0;
1042
}
1043
1044
FAIL_IF(emit_stack_frame_release(compiler, 1));
1045
1046
SLJIT_SKIP_CHECKS(compiler);
1047
return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1048
}
1049
1050
/* --------------------------------------------------------------------- */
1051
/* Operators */
1052
/* --------------------------------------------------------------------- */
1053
1054
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
1055
#define ARCH_32_64(a, b) a
1056
#else /* !SLJIT_CONFIG_RISCV_32 */
1057
#define ARCH_32_64(a, b) b
1058
#endif /* SLJIT_CONFIG_RISCV_32 */
1059
1060
static const sljit_ins data_transfer_insts[16 + 4] = {
1061
/* u w s */ ARCH_32_64(F3(0x2) | OPC(0x23) /* sw */, F3(0x3) | OPC(0x23) /* sd */),
1062
/* u w l */ ARCH_32_64(F3(0x2) | OPC(0x3) /* lw */, F3(0x3) | OPC(0x3) /* ld */),
1063
/* u b s */ F3(0x0) | OPC(0x23) /* sb */,
1064
/* u b l */ F3(0x4) | OPC(0x3) /* lbu */,
1065
/* u h s */ F3(0x1) | OPC(0x23) /* sh */,
1066
/* u h l */ F3(0x5) | OPC(0x3) /* lhu */,
1067
/* u i s */ F3(0x2) | OPC(0x23) /* sw */,
1068
/* u i l */ ARCH_32_64(F3(0x2) | OPC(0x3) /* lw */, F3(0x6) | OPC(0x3) /* lwu */),
1069
1070
/* s w s */ ARCH_32_64(F3(0x2) | OPC(0x23) /* sw */, F3(0x3) | OPC(0x23) /* sd */),
1071
/* s w l */ ARCH_32_64(F3(0x2) | OPC(0x3) /* lw */, F3(0x3) | OPC(0x3) /* ld */),
1072
/* s b s */ F3(0x0) | OPC(0x23) /* sb */,
1073
/* s b l */ F3(0x0) | OPC(0x3) /* lb */,
1074
/* s h s */ F3(0x1) | OPC(0x23) /* sh */,
1075
/* s h l */ F3(0x1) | OPC(0x3) /* lh */,
1076
/* s i s */ F3(0x2) | OPC(0x23) /* sw */,
1077
/* s i l */ F3(0x2) | OPC(0x3) /* lw */,
1078
1079
/* d s */ F3(0x3) | OPC(0x27) /* fsd */,
1080
/* d l */ F3(0x3) | OPC(0x7) /* fld */,
1081
/* s s */ F3(0x2) | OPC(0x27) /* fsw */,
1082
/* s l */ F3(0x2) | OPC(0x7) /* flw */,
1083
};
1084
1085
#undef ARCH_32_64
1086
1087
static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 base, sljit_sw offset)
1088
{
1089
sljit_ins ins;
1090
1091
SLJIT_ASSERT(FAST_IS_REG(base) && offset <= 0xfff && offset >= SIMM_MIN);
1092
1093
ins = data_transfer_insts[flags & MEM_MASK] | RS1(base);
1094
if (flags & LOAD_DATA)
1095
ins |= ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) | IMM_I(offset);
1096
else
1097
ins |= ((flags & MEM_MASK) <= GPR_REG ? RS2(reg) : FRS2(reg)) | IMM_S(offset);
1098
1099
return push_inst(compiler, ins);
1100
}
1101
1102
/* Can perform an operation using at most 1 instruction. */
1103
static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1104
{
1105
SLJIT_ASSERT(arg & SLJIT_MEM);
1106
1107
if (!(arg & OFFS_REG_MASK) && argw <= SIMM_MAX && argw >= SIMM_MIN) {
1108
/* Works for both absoulte and relative addresses. */
1109
if (SLJIT_UNLIKELY(flags & ARG_TEST))
1110
return 1;
1111
1112
FAIL_IF(push_mem_inst(compiler, flags, reg, arg & REG_MASK, argw));
1113
return -1;
1114
}
1115
return 0;
1116
}
1117
1118
#define TO_ARGW_HI(argw) (((argw) & ~0xfff) + (((argw) & 0x800) ? 0x1000 : 0))
1119
1120
/* See getput_arg below.
1121
Note: can_cache is called only for binary operators. */
1122
static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1123
{
1124
SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
1125
1126
/* Simple operation except for updates. */
1127
if (arg & OFFS_REG_MASK) {
1128
argw &= 0x3;
1129
next_argw &= 0x3;
1130
if (argw && argw == next_argw && (arg == next_arg || (arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK)))
1131
return 1;
1132
return 0;
1133
}
1134
1135
if (arg == next_arg) {
1136
if (((next_argw - argw) <= SIMM_MAX && (next_argw - argw) >= SIMM_MIN)
1137
|| TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw))
1138
return 1;
1139
return 0;
1140
}
1141
1142
return 0;
1143
}
1144
1145
/* Emit the necessary instructions. See can_cache above. */
1146
static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1147
{
1148
sljit_s32 base = arg & REG_MASK;
1149
sljit_s32 tmp_r = (flags & MEM_USE_TMP2) ? TMP_REG2 : TMP_REG1;
1150
sljit_sw offset, argw_hi;
1151
#if defined __riscv_zba
1152
sljit_ins ins = ADD;
1153
#endif /* __riscv_zba */
1154
1155
SLJIT_ASSERT(arg & SLJIT_MEM);
1156
if (!(next_arg & SLJIT_MEM)) {
1157
next_arg = 0;
1158
next_argw = 0;
1159
}
1160
1161
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1162
argw &= 0x3;
1163
1164
#if defined __riscv_zba
1165
switch (argw) {
1166
case 1:
1167
ins = SH1ADD;
1168
break;
1169
case 2:
1170
ins = SH2ADD;
1171
break;
1172
case 3:
1173
ins = SH3ADD;
1174
break;
1175
}
1176
FAIL_IF(push_inst(compiler, ins | RD(tmp_r) | RS1(OFFS_REG(arg)) | RS2(base)));
1177
#else /* !__riscv_zba */
1178
/* Using the cache. */
1179
if (argw == compiler->cache_argw) {
1180
if (arg == compiler->cache_arg)
1181
return push_mem_inst(compiler, flags, reg, TMP_REG3, 0);
1182
1183
if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) {
1184
if (arg == next_arg && argw == (next_argw & 0x3)) {
1185
compiler->cache_arg = arg;
1186
compiler->cache_argw = argw;
1187
FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RS1(TMP_REG3) | RS2(base)));
1188
return push_mem_inst(compiler, flags, reg, TMP_REG3, 0);
1189
}
1190
FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(TMP_REG3)));
1191
return push_mem_inst(compiler, flags, reg, tmp_r, 0);
1192
}
1193
}
1194
1195
if (SLJIT_UNLIKELY(argw)) {
1196
compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
1197
compiler->cache_argw = argw;
1198
FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG3) | RS1(OFFS_REG(arg)) | IMM_I(argw)));
1199
}
1200
1201
if (arg == next_arg && argw == (next_argw & 0x3)) {
1202
compiler->cache_arg = arg;
1203
compiler->cache_argw = argw;
1204
FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RS1(base) | RS2(!argw ? OFFS_REG(arg) : TMP_REG3)));
1205
tmp_r = TMP_REG3;
1206
}
1207
else
1208
FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(!argw ? OFFS_REG(arg) : TMP_REG3)));
1209
#endif /* __riscv_zba */
1210
1211
return push_mem_inst(compiler, flags, reg, tmp_r, 0);
1212
}
1213
1214
if (compiler->cache_arg == arg && argw - compiler->cache_argw <= SIMM_MAX && argw - compiler->cache_argw >= SIMM_MIN)
1215
return push_mem_inst(compiler, flags, reg, TMP_REG3, argw - compiler->cache_argw);
1216
1217
if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw <= SIMM_MAX) && (argw - compiler->cache_argw >= SIMM_MIN)) {
1218
offset = argw - compiler->cache_argw;
1219
} else {
1220
compiler->cache_arg = SLJIT_MEM;
1221
1222
argw_hi = TO_ARGW_HI(argw);
1223
1224
if (next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN && argw_hi != TO_ARGW_HI(next_argw)) {
1225
FAIL_IF(load_immediate(compiler, TMP_REG3, argw, tmp_r));
1226
compiler->cache_argw = argw;
1227
offset = 0;
1228
} else {
1229
FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi, tmp_r));
1230
compiler->cache_argw = argw_hi;
1231
offset = argw & 0xfff;
1232
argw = argw_hi;
1233
}
1234
}
1235
1236
if (!base)
1237
return push_mem_inst(compiler, flags, reg, TMP_REG3, offset);
1238
1239
if (arg == next_arg && next_argw - argw <= SIMM_MAX && next_argw - argw >= SIMM_MIN) {
1240
compiler->cache_arg = arg;
1241
FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG3) | RS1(TMP_REG3) | RS2(base)));
1242
return push_mem_inst(compiler, flags, reg, TMP_REG3, offset);
1243
}
1244
1245
FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(TMP_REG3) | RS2(base)));
1246
return push_mem_inst(compiler, flags, reg, tmp_r, offset);
1247
}
1248
1249
static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1250
{
1251
sljit_s32 base = arg & REG_MASK;
1252
sljit_s32 tmp_r = TMP_REG1;
1253
1254
if (getput_arg_fast(compiler, flags, reg, arg, argw))
1255
return compiler->error;
1256
1257
if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
1258
tmp_r = reg;
1259
1260
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1261
argw &= 0x3;
1262
1263
if (SLJIT_UNLIKELY(argw)) {
1264
FAIL_IF(push_inst(compiler, SLLI | RD(tmp_r) | RS1(OFFS_REG(arg)) | IMM_I(argw)));
1265
FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(tmp_r) | RS2(base)));
1266
}
1267
else
1268
FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(base) | RS2(OFFS_REG(arg))));
1269
1270
argw = 0;
1271
} else {
1272
FAIL_IF(load_immediate(compiler, tmp_r, TO_ARGW_HI(argw), TMP_REG3));
1273
1274
if (base != 0)
1275
FAIL_IF(push_inst(compiler, ADD | RD(tmp_r) | RS1(tmp_r) | RS2(base)));
1276
}
1277
1278
return push_mem_inst(compiler, flags, reg, tmp_r, argw & 0xfff);
1279
}
1280
1281
static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
1282
{
1283
if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1284
return compiler->error;
1285
return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1286
}
1287
1288
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
1289
#define WORD 0
1290
#define WORD_32 0
1291
#define IMM_EXTEND(v) (IMM_I(v))
1292
#else /* !SLJIT_CONFIG_RISCV_32 */
1293
#define WORD word
1294
#define WORD_32 0x08
1295
#define IMM_EXTEND(v) (IMM_I((op & SLJIT_32) ? (v) : (32 + (v))))
1296
#endif /* SLJIT_CONFIG_RISCV_32 */
1297
#ifndef __riscv_zbb
1298
static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src)
1299
{
1300
sljit_s32 is_clz = (GET_OPCODE(op) == SLJIT_CLZ);
1301
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
1302
sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
1303
sljit_ins word_size = (op & SLJIT_32) ? 32 : 64;
1304
#else /* !SLJIT_CONFIG_RISCV_64 */
1305
sljit_ins word_size = 32;
1306
#endif /* SLJIT_CONFIG_RISCV_64 */
1307
1308
SLJIT_ASSERT(WORD == 0 || WORD == 0x8);
1309
1310
/* The OTHER_FLAG is the counter. */
1311
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(OTHER_FLAG) | RS1(TMP_ZERO) | IMM_I(word_size)));
1312
1313
/* The TMP_REG2 is the next value. */
1314
if (src != TMP_REG2)
1315
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG2) | RS1(src) | IMM_I(0)));
1316
1317
FAIL_IF(push_inst(compiler, BEQ | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)((is_clz ? 4 : 5) * SSIZE_OF(ins)) << 7) | ((sljit_ins)(8 * SSIZE_OF(ins)) << 20)));
1318
1319
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(OTHER_FLAG) | RS1(TMP_ZERO) | IMM_I(0)));
1320
if (!is_clz) {
1321
FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG1) | RS1(TMP_REG2) | IMM_I(1)));
1322
FAIL_IF(push_inst(compiler, BNE | RS1(TMP_REG1) | RS2(TMP_ZERO) | ((sljit_ins)(2 * SSIZE_OF(ins)) << 7) | ((sljit_ins)(8 * SSIZE_OF(ins)) << 20)));
1323
} else
1324
FAIL_IF(push_inst(compiler, BLT | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)(2 * SSIZE_OF(ins)) << 7) | ((sljit_ins)(8 * SSIZE_OF(ins)) << 20)));
1325
1326
/* The TMP_REG1 is the next shift. */
1327
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG1) | RS1(TMP_ZERO) | IMM_I(word_size)));
1328
1329
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(TMP_REG2) | IMM_I(0)));
1330
FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_I(1)));
1331
1332
FAIL_IF(push_inst(compiler, (is_clz ? SRL : SLL) | WORD | RD(TMP_REG2) | RS1(EQUAL_FLAG) | RS2(TMP_REG1)));
1333
FAIL_IF(push_inst(compiler, BNE | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)0xfe000e80 - ((2 * SSIZE_OF(ins)) << 7))));
1334
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(TMP_REG2) | RS1(TMP_REG1) | IMM_I(-1)));
1335
FAIL_IF(push_inst(compiler, (is_clz ? SRL : SLL) | WORD | RD(TMP_REG2) | RS1(EQUAL_FLAG) | RS2(TMP_REG2)));
1336
FAIL_IF(push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(TMP_REG1)));
1337
FAIL_IF(push_inst(compiler, BEQ | RS1(TMP_REG2) | RS2(TMP_ZERO) | ((sljit_ins)0xfe000e80 - ((5 * SSIZE_OF(ins)) << 7))));
1338
1339
return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(OTHER_FLAG) | IMM_I(0));
1340
}
1341
1342
static sljit_s32 emit_rev(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src)
1343
{
1344
SLJIT_UNUSED_ARG(op);
1345
1346
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
1347
if (!(op & SLJIT_32)) {
1348
FAIL_IF(push_inst(compiler, LUI | RD(OTHER_FLAG) | 0x10000));
1349
FAIL_IF(push_inst(compiler, SRLI | RD(TMP_REG1) | RS1(src) | IMM_I(32)));
1350
FAIL_IF(push_inst(compiler, ADDI | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | IMM_I(0xfff)));
1351
FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(src) | IMM_I(32)));
1352
FAIL_IF(push_inst(compiler, SLLI | RD(EQUAL_FLAG) | RS1(OTHER_FLAG) | IMM_I(32)));
1353
FAIL_IF(push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1)));
1354
FAIL_IF(push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(EQUAL_FLAG)));
1355
1356
FAIL_IF(push_inst(compiler, SRLI | RD(TMP_REG1) | RS1(dst) | IMM_I(16)));
1357
FAIL_IF(push_inst(compiler, AND | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)));
1358
FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(OTHER_FLAG)));
1359
FAIL_IF(push_inst(compiler, SLLI | RD(EQUAL_FLAG) | RS1(OTHER_FLAG) | IMM_I(8)));
1360
FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(dst) | IMM_I(16)));
1361
FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(EQUAL_FLAG)));
1362
FAIL_IF(push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1)));
1363
1364
FAIL_IF(push_inst(compiler, SRLI | RD(TMP_REG1) | RS1(dst) | IMM_I(8)));
1365
FAIL_IF(push_inst(compiler, AND | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)));
1366
FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(OTHER_FLAG)));
1367
FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(dst) | IMM_I(8)));
1368
return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1));
1369
}
1370
#endif /* SLJIT_CONFIG_RISCV_64 */
1371
1372
FAIL_IF(push_inst(compiler, SRLI | WORD_32 | RD(TMP_REG1) | RS1(src) | IMM_I(16)));
1373
FAIL_IF(push_inst(compiler, LUI | RD(OTHER_FLAG) | 0xff0000));
1374
FAIL_IF(push_inst(compiler, SLLI | WORD_32 | RD(dst) | RS1(src) | IMM_I(16)));
1375
FAIL_IF(push_inst(compiler, ORI | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | IMM_I(0xff)));
1376
FAIL_IF(push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1)));
1377
1378
FAIL_IF(push_inst(compiler, SRLI | WORD_32 | RD(TMP_REG1) | RS1(dst) | IMM_I(8)));
1379
FAIL_IF(push_inst(compiler, AND | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)));
1380
FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(OTHER_FLAG)));
1381
FAIL_IF(push_inst(compiler, SLLI | WORD_32 | RD(dst) | RS1(dst) | IMM_I(8)));
1382
return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1));
1383
}
1384
1385
static sljit_s32 emit_rev16(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw src)
1386
{
1387
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
1388
sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
1389
sljit_ins word_size = (op & SLJIT_32) ? 32 : 64;
1390
#else /* !SLJIT_CONFIG_RISCV_64 */
1391
sljit_ins word_size = 32;
1392
#endif /* SLJIT_CONFIG_RISCV_64 */
1393
1394
FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(src) | IMM_I(8)));
1395
FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src) | IMM_I(word_size - 8)));
1396
FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_I(0xff)));
1397
FAIL_IF(push_inst(compiler, (GET_OPCODE(op) == SLJIT_REV_U16 ? SRLI : SRAI) | WORD | RD(dst) | RS1(dst) | IMM_I(word_size - 16)));
1398
return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(TMP_REG1));
1399
}
1400
#endif /* !__riscv_zbb */
1401
1402
#define EMIT_LOGICAL(op_imm, op_reg) \
1403
if (flags & SRC2_IMM) { \
1404
if (op & SLJIT_SET_Z) \
1405
FAIL_IF(push_inst(compiler, op_imm | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2))); \
1406
if (!(flags & UNUSED_DEST)) \
1407
FAIL_IF(push_inst(compiler, op_imm | RD(dst) | RS1(src1) | IMM_I(src2))); \
1408
} \
1409
else { \
1410
if (op & SLJIT_SET_Z) \
1411
FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2))); \
1412
if (!(flags & UNUSED_DEST)) \
1413
FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RS1(src1) | RS2(src2))); \
1414
}
1415
1416
#define EMIT_SHIFT(imm, reg) \
1417
op_imm = (imm); \
1418
op_reg = (reg);
1419
1420
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1421
sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
1422
{
1423
sljit_s32 is_overflow, is_carry, carry_src_r, is_handled, reg;
1424
sljit_ins op_imm, op_reg;
1425
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
1426
sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
1427
#endif /* SLJIT_CONFIG_RISCV_64 */
1428
1429
SLJIT_ASSERT(WORD == 0 || WORD == 0x8);
1430
1431
switch (GET_OPCODE(op)) {
1432
case SLJIT_MOV:
1433
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1434
if (dst != src2)
1435
return push_inst(compiler, ADDI | RD(dst) | RS1(src2) | IMM_I(0));
1436
return SLJIT_SUCCESS;
1437
1438
case SLJIT_MOV_U8:
1439
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1440
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1441
return push_inst(compiler, ANDI | RD(dst) | RS1(src2) | IMM_I(0xff));
1442
SLJIT_ASSERT(dst == src2);
1443
return SLJIT_SUCCESS;
1444
1445
case SLJIT_MOV_S8:
1446
#if defined __riscv_zbb
1447
return push_inst(compiler, SEXTB | RD(dst) | RS1(src2));
1448
#else /* !__riscv_zbb */
1449
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1450
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
1451
FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(24)));
1452
return push_inst(compiler, SRAI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(24));
1453
}
1454
SLJIT_ASSERT(dst == src2);
1455
return SLJIT_SUCCESS;
1456
#endif /* __riscv_zbb */
1457
1458
case SLJIT_MOV_U16:
1459
#if defined __riscv_zbb
1460
return push_inst(compiler, ZEXTH | RD(dst) | RS1(src2));
1461
#else /* !__riscv_zbb */
1462
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1463
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
1464
FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16)));
1465
return push_inst(compiler, SRLI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(16));
1466
}
1467
SLJIT_ASSERT(dst == src2);
1468
return SLJIT_SUCCESS;
1469
#endif /* __riscv_zbb */
1470
1471
case SLJIT_MOV_S16:
1472
#if defined __riscv_zbb
1473
return push_inst(compiler, SEXTH | RD(dst) | RS1(src2));
1474
#else /* !__riscv_zbb */
1475
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1476
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
1477
FAIL_IF(push_inst(compiler, SLLI | WORD | RD(dst) | RS1(src2) | IMM_EXTEND(16)));
1478
return push_inst(compiler, SRAI | WORD | RD(dst) | RS1(dst) | IMM_EXTEND(16));
1479
}
1480
SLJIT_ASSERT(dst == src2);
1481
return SLJIT_SUCCESS;
1482
#endif /* !__riscv_zbb */
1483
1484
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
1485
case SLJIT_MOV_U32:
1486
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1487
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE)) {
1488
FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(src2) | IMM_I(32)));
1489
return push_inst(compiler, SRLI | RD(dst) | RS1(dst) | IMM_I(32));
1490
}
1491
SLJIT_ASSERT(dst == src2);
1492
return SLJIT_SUCCESS;
1493
1494
case SLJIT_MOV_S32:
1495
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1496
if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1497
return push_inst(compiler, ADDI | 0x8 | RD(dst) | RS1(src2) | IMM_I(0));
1498
SLJIT_ASSERT(dst == src2);
1499
return SLJIT_SUCCESS;
1500
#endif /* SLJIT_CONFIG_RISCV_64 */
1501
1502
case SLJIT_CLZ:
1503
#if defined __riscv_zbb
1504
return push_inst(compiler, CLZ | WORD | RD(dst) | RS1(src2));
1505
#endif /* __riscv_zbb */
1506
case SLJIT_CTZ:
1507
#if defined __riscv_zbb
1508
return push_inst(compiler, CTZ | WORD | RD(dst) | RS1(src2));
1509
#else /* !__riscv_zbb */
1510
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1511
return emit_clz_ctz(compiler, op, dst, src2);
1512
#endif /* __riscv_zbb */
1513
1514
case SLJIT_REV:
1515
#if defined __riscv_zbb
1516
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1517
FAIL_IF(push_inst(compiler, REV8 | RD(dst) | RS1(src2)));
1518
#if defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64
1519
if (op & SLJIT_32)
1520
return push_inst(compiler, SRAI | RD(dst) | RS1(dst) | IMM_I(32));
1521
return SLJIT_SUCCESS;
1522
#else /* !SLJIT_CONFIG_RISCV_64 */
1523
return SLJIT_SUCCESS;
1524
#endif /* SLJIT_CONFIG_RISCV_64 */
1525
#endif /* __riscv_zbb */
1526
case SLJIT_REV_S32:
1527
#if ((defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32) || defined __riscv_zbb)
1528
case SLJIT_REV_U32:
1529
#endif /* SLJIT_CONFIG_RISCV_32 || __riscv_zbb */
1530
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1531
#if defined __riscv_zbb
1532
FAIL_IF(push_inst(compiler, REV8 | RD(dst) | RS1(src2)));
1533
#if defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64
1534
return push_inst(compiler, (GET_OPCODE(op) == SLJIT_REV_U32 ? SRLI : SRAI )| RD(dst) | RS1(dst) | IMM_I(32));
1535
#else /* !SLJIT_CONFIG_RISCV_64 */
1536
return SLJIT_SUCCESS;
1537
#endif /* SLJIT_CONFIG_RISCV_64 */
1538
#else /* !__riscv_zbb */
1539
return emit_rev(compiler, op, dst, src2);
1540
#endif /* __riscv_zbb */
1541
case SLJIT_REV_U16:
1542
case SLJIT_REV_S16:
1543
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1544
#if defined __riscv_zbb
1545
FAIL_IF(push_inst(compiler, REV8 | RD(dst) | RS1(src2)));
1546
#if defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64
1547
return push_inst(compiler, (GET_OPCODE(op) == SLJIT_REV_U16 ? SRLI : SRAI )| RD(dst) | RS1(dst) | IMM_I(48));
1548
#else /* !SLJIT_CONFIG_RISCV_64 */
1549
return push_inst(compiler, (GET_OPCODE(op) == SLJIT_REV_U16 ? SRLI : SRAI) | RD(dst) | RS1(dst) | IMM_I(16));
1550
#endif /* SLJIT_CONFIG_RISCV_64 */
1551
#else /* !__riscv_zbb */
1552
return emit_rev16(compiler, op, dst, src2);
1553
#endif /* __riscv_zbb */
1554
1555
#if ((defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64) && !defined __riscv_zbb)
1556
case SLJIT_REV_U32:
1557
SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1);
1558
FAIL_IF(emit_rev(compiler, op, dst, src2));
1559
if (dst == TMP_REG2)
1560
return SLJIT_SUCCESS;
1561
FAIL_IF(push_inst(compiler, SLLI | RD(dst) | RS1(dst) | IMM_I(32)));
1562
return push_inst(compiler, SRLI | RD(dst) | RS1(dst) | IMM_I(32));
1563
#endif /* SLJIT_CONFIG_RISCV_64 && !__riscv_zbb */
1564
case SLJIT_ADD:
1565
/* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */
1566
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1567
carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1568
1569
if (flags & SRC2_IMM) {
1570
if (is_overflow) {
1571
if (src2 >= 0)
1572
FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0)));
1573
else
1574
FAIL_IF(push_inst(compiler, XORI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-1)));
1575
}
1576
else if (op & SLJIT_SET_Z)
1577
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2)));
1578
1579
/* Only the zero flag is needed. */
1580
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1581
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2)));
1582
}
1583
else {
1584
if (is_overflow)
1585
FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
1586
else if (op & SLJIT_SET_Z)
1587
FAIL_IF(push_inst(compiler, ADD | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
1588
1589
if (is_overflow || carry_src_r != 0) {
1590
if (src1 != dst)
1591
carry_src_r = (sljit_s32)src1;
1592
else if (src2 != dst)
1593
carry_src_r = (sljit_s32)src2;
1594
else {
1595
FAIL_IF(push_inst(compiler, ADDI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(0)));
1596
carry_src_r = OTHER_FLAG;
1597
}
1598
}
1599
1600
/* Only the zero flag is needed. */
1601
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1602
FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(src1) | RS2(src2)));
1603
}
1604
1605
/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1606
if (is_overflow || carry_src_r != 0) {
1607
if (flags & SRC2_IMM)
1608
FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(src2)));
1609
else
1610
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(dst) | RS2(carry_src_r)));
1611
}
1612
1613
if (!is_overflow)
1614
return SLJIT_SUCCESS;
1615
1616
FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RS1(dst) | RS2(EQUAL_FLAG)));
1617
if (op & SLJIT_SET_Z)
1618
FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(0)));
1619
FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_EXTEND(31)));
1620
return push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(TMP_REG1) | RS2(OTHER_FLAG));
1621
1622
case SLJIT_ADDC:
1623
carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1624
1625
if (flags & SRC2_IMM) {
1626
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(src2)));
1627
} else {
1628
if (carry_src_r != 0) {
1629
if (src1 != dst)
1630
carry_src_r = (sljit_s32)src1;
1631
else if (src2 != dst)
1632
carry_src_r = (sljit_s32)src2;
1633
else {
1634
FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0)));
1635
carry_src_r = EQUAL_FLAG;
1636
}
1637
}
1638
1639
FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(src1) | RS2(src2)));
1640
}
1641
1642
/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1643
if (carry_src_r != 0) {
1644
if (flags & SRC2_IMM)
1645
FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(src2)));
1646
else
1647
FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RS1(dst) | RS2(carry_src_r)));
1648
}
1649
1650
FAIL_IF(push_inst(compiler, ADD | WORD | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)));
1651
1652
if (carry_src_r == 0)
1653
return SLJIT_SUCCESS;
1654
1655
/* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
1656
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(dst) | RS2(OTHER_FLAG)));
1657
/* Set carry flag. */
1658
return push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(EQUAL_FLAG));
1659
1660
case SLJIT_SUB:
1661
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
1662
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(TMP_ZERO) | IMM_I(src2)));
1663
src2 = TMP_REG2;
1664
flags &= ~SRC2_IMM;
1665
}
1666
1667
is_handled = 0;
1668
1669
if (flags & SRC2_IMM) {
1670
if (GET_FLAG_TYPE(op) == SLJIT_LESS) {
1671
FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2)));
1672
is_handled = 1;
1673
}
1674
else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) {
1675
FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2)));
1676
is_handled = 1;
1677
}
1678
}
1679
1680
if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
1681
is_handled = 1;
1682
1683
if (flags & SRC2_IMM) {
1684
reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
1685
FAIL_IF(push_inst(compiler, ADDI | RD(reg) | RS1(TMP_ZERO) | IMM_I(src2)));
1686
src2 = reg;
1687
flags &= ~SRC2_IMM;
1688
}
1689
1690
switch (GET_FLAG_TYPE(op)) {
1691
case SLJIT_LESS:
1692
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src1) | RS2(src2)));
1693
break;
1694
case SLJIT_GREATER:
1695
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src2) | RS2(src1)));
1696
break;
1697
case SLJIT_SIG_LESS:
1698
FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(src1) | RS2(src2)));
1699
break;
1700
case SLJIT_SIG_GREATER:
1701
FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RS1(src2) | RS2(src1)));
1702
break;
1703
}
1704
}
1705
1706
if (is_handled) {
1707
if (flags & SRC2_IMM) {
1708
if (op & SLJIT_SET_Z)
1709
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-src2)));
1710
if (!(flags & UNUSED_DEST))
1711
return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2));
1712
}
1713
else {
1714
if (op & SLJIT_SET_Z)
1715
FAIL_IF(push_inst(compiler, SUB | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
1716
if (!(flags & UNUSED_DEST))
1717
return push_inst(compiler, SUB | WORD | RD(dst) | RS1(src1) | RS2(src2));
1718
}
1719
return SLJIT_SUCCESS;
1720
}
1721
1722
is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1723
is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1724
1725
if (flags & SRC2_IMM) {
1726
if (is_overflow) {
1727
if (src2 >= 0)
1728
FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(0)));
1729
else
1730
FAIL_IF(push_inst(compiler, XORI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-1)));
1731
}
1732
else if (op & SLJIT_SET_Z)
1733
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(-src2)));
1734
1735
if (is_overflow || is_carry)
1736
FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2)));
1737
1738
/* Only the zero flag is needed. */
1739
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1740
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2)));
1741
}
1742
else {
1743
if (is_overflow)
1744
FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
1745
else if (op & SLJIT_SET_Z)
1746
FAIL_IF(push_inst(compiler, SUB | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
1747
1748
if (is_overflow || is_carry)
1749
FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RS1(src1) | RS2(src2)));
1750
1751
/* Only the zero flag is needed. */
1752
if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1753
FAIL_IF(push_inst(compiler, SUB | WORD | RD(dst) | RS1(src1) | RS2(src2)));
1754
}
1755
1756
if (!is_overflow)
1757
return SLJIT_SUCCESS;
1758
1759
FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RS1(dst) | RS2(EQUAL_FLAG)));
1760
if (op & SLJIT_SET_Z)
1761
FAIL_IF(push_inst(compiler, ADDI | RD(EQUAL_FLAG) | RS1(dst) | IMM_I(0)));
1762
FAIL_IF(push_inst(compiler, SRLI | WORD | RD(TMP_REG1) | RS1(TMP_REG1) | IMM_EXTEND(31)));
1763
return push_inst(compiler, XOR | RD(OTHER_FLAG) | RS1(TMP_REG1) | RS2(OTHER_FLAG));
1764
1765
case SLJIT_SUBC:
1766
if ((flags & SRC2_IMM) && src2 == SIMM_MIN) {
1767
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG2) | RS1(TMP_ZERO) | IMM_I(src2)));
1768
src2 = TMP_REG2;
1769
flags &= ~SRC2_IMM;
1770
}
1771
1772
is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1773
1774
if (flags & SRC2_IMM) {
1775
if (is_carry)
1776
FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2)));
1777
1778
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(-src2)));
1779
}
1780
else {
1781
if (is_carry)
1782
FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
1783
1784
FAIL_IF(push_inst(compiler, SUB | WORD | RD(dst) | RS1(src1) | RS2(src2)));
1785
}
1786
1787
if (is_carry)
1788
FAIL_IF(push_inst(compiler, SLTU | RD(TMP_REG1) | RS1(dst) | RS2(OTHER_FLAG)));
1789
1790
FAIL_IF(push_inst(compiler, SUB | WORD | RD(dst) | RS1(dst) | RS2(OTHER_FLAG)));
1791
1792
if (!is_carry)
1793
return SLJIT_SUCCESS;
1794
1795
return push_inst(compiler, OR | RD(OTHER_FLAG) | RS1(EQUAL_FLAG) | RS2(TMP_REG1));
1796
1797
case SLJIT_MUL:
1798
SLJIT_ASSERT(!(flags & SRC2_IMM));
1799
1800
if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW)
1801
return push_inst(compiler, MUL | WORD | RD(dst) | RS1(src1) | RS2(src2));
1802
1803
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
1804
if (word) {
1805
FAIL_IF(push_inst(compiler, MUL | RD(OTHER_FLAG) | RS1(src1) | RS2(src2)));
1806
FAIL_IF(push_inst(compiler, MUL | 0x8 | RD(dst) | RS1(src1) | RS2(src2)));
1807
return push_inst(compiler, SUB | RD(OTHER_FLAG) | RS1(dst) | RS2(OTHER_FLAG));
1808
}
1809
#endif /* SLJIT_CONFIG_RISCV_64 */
1810
1811
FAIL_IF(push_inst(compiler, MULH | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
1812
FAIL_IF(push_inst(compiler, MUL | RD(dst) | RS1(src1) | RS2(src2)));
1813
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
1814
FAIL_IF(push_inst(compiler, SRAI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(31)));
1815
#else /* !SLJIT_CONFIG_RISCV_32 */
1816
FAIL_IF(push_inst(compiler, SRAI | RD(OTHER_FLAG) | RS1(dst) | IMM_I(63)));
1817
#endif /* SLJIT_CONFIG_RISCV_32 */
1818
return push_inst(compiler, SUB | RD(OTHER_FLAG) | RS1(EQUAL_FLAG) | RS2(OTHER_FLAG));
1819
1820
case SLJIT_AND:
1821
EMIT_LOGICAL(ANDI, AND);
1822
return SLJIT_SUCCESS;
1823
1824
case SLJIT_OR:
1825
EMIT_LOGICAL(ORI, OR);
1826
return SLJIT_SUCCESS;
1827
1828
case SLJIT_XOR:
1829
EMIT_LOGICAL(XORI, XOR);
1830
return SLJIT_SUCCESS;
1831
1832
case SLJIT_SHL:
1833
case SLJIT_MSHL:
1834
EMIT_SHIFT(SLLI, SLL);
1835
break;
1836
1837
case SLJIT_LSHR:
1838
case SLJIT_MLSHR:
1839
EMIT_SHIFT(SRLI, SRL);
1840
break;
1841
1842
case SLJIT_ASHR:
1843
case SLJIT_MASHR:
1844
EMIT_SHIFT(SRAI, SRA);
1845
break;
1846
1847
case SLJIT_ROTL:
1848
case SLJIT_ROTR:
1849
if (flags & SRC2_IMM) {
1850
SLJIT_ASSERT(src2 != 0);
1851
#if defined __riscv_zbb
1852
if (GET_OPCODE(op) == SLJIT_ROTL) {
1853
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
1854
src2 = ((op & SLJIT_32) ? 32 : 64) - src2;
1855
#else /* !SLJIT_CONFIG_RISCV_64 */
1856
src2 = 32 - src2;
1857
#endif /* SLJIT_CONFIG_RISCV_64 */
1858
}
1859
return push_inst(compiler, RORI | WORD | RD(dst) | RS1(src1) | IMM_I(src2));
1860
#else /* !__riscv_zbb */
1861
op_imm = (GET_OPCODE(op) == SLJIT_ROTL) ? SLLI : SRLI;
1862
FAIL_IF(push_inst(compiler, op_imm | WORD | RD(OTHER_FLAG) | RS1(src1) | IMM_I(src2)));
1863
1864
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
1865
src2 = ((op & SLJIT_32) ? 32 : 64) - src2;
1866
#else /* !SLJIT_CONFIG_RISCV_64 */
1867
src2 = 32 - src2;
1868
#endif /* SLJIT_CONFIG_RISCV_64 */
1869
op_imm = (GET_OPCODE(op) == SLJIT_ROTL) ? SRLI : SLLI;
1870
FAIL_IF(push_inst(compiler, op_imm | WORD | RD(dst) | RS1(src1) | IMM_I(src2)));
1871
return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(OTHER_FLAG));
1872
#endif /* !__riscv_zbb */
1873
}
1874
1875
#if defined __riscv_zbb
1876
return push_inst(compiler, (GET_OPCODE(op) == SLJIT_ROTL ? ROL : ROR) | WORD | RD(dst) | RS1(src1) | RS2(src2));
1877
#else /* !__riscv_zbb */
1878
if (src2 == TMP_ZERO) {
1879
if (dst != src1)
1880
return push_inst(compiler, ADDI | WORD | RD(dst) | RS1(src1) | IMM_I(0));
1881
return SLJIT_SUCCESS;
1882
}
1883
1884
FAIL_IF(push_inst(compiler, SUB | WORD | RD(EQUAL_FLAG) | RS1(TMP_ZERO) | RS2(src2)));
1885
op_reg = (GET_OPCODE(op) == SLJIT_ROTL) ? SLL : SRL;
1886
FAIL_IF(push_inst(compiler, op_reg | WORD | RD(OTHER_FLAG) | RS1(src1) | RS2(src2)));
1887
op_reg = (GET_OPCODE(op) == SLJIT_ROTL) ? SRL : SLL;
1888
FAIL_IF(push_inst(compiler, op_reg | WORD | RD(dst) | RS1(src1) | RS2(EQUAL_FLAG)));
1889
return push_inst(compiler, OR | RD(dst) | RS1(dst) | RS2(OTHER_FLAG));
1890
#endif /* !riscv_zbb */
1891
default:
1892
SLJIT_UNREACHABLE();
1893
return SLJIT_SUCCESS;
1894
}
1895
1896
if (flags & SRC2_IMM) {
1897
if (op & SLJIT_SET_Z)
1898
FAIL_IF(push_inst(compiler, op_imm | WORD | RD(EQUAL_FLAG) | RS1(src1) | IMM_I(src2)));
1899
1900
if (flags & UNUSED_DEST)
1901
return SLJIT_SUCCESS;
1902
return push_inst(compiler, op_imm | WORD | RD(dst) | RS1(src1) | IMM_I(src2));
1903
}
1904
1905
if (op & SLJIT_SET_Z)
1906
FAIL_IF(push_inst(compiler, op_reg | WORD | RD(EQUAL_FLAG) | RS1(src1) | RS2(src2)));
1907
1908
if (flags & UNUSED_DEST)
1909
return SLJIT_SUCCESS;
1910
return push_inst(compiler, op_reg | WORD | RD(dst) | RS1(src1) | RS2(src2));
1911
}
1912
1913
#undef IMM_EXTEND
1914
1915
static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1916
sljit_s32 dst, sljit_sw dstw,
1917
sljit_s32 src1, sljit_sw src1w,
1918
sljit_s32 src2, sljit_sw src2w)
1919
{
1920
/* arg1 goes to TMP_REG1 or src reg
1921
arg2 goes to TMP_REG2, imm or src reg
1922
TMP_REG3 can be used for caching
1923
result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1924
sljit_s32 dst_r = TMP_REG2;
1925
sljit_s32 src1_r;
1926
sljit_sw src2_r = 0;
1927
sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2;
1928
1929
if (!(flags & ALT_KEEP_CACHE)) {
1930
compiler->cache_arg = 0;
1931
compiler->cache_argw = 0;
1932
}
1933
1934
if (dst == 0) {
1935
SLJIT_ASSERT(HAS_FLAGS(op));
1936
flags |= UNUSED_DEST;
1937
dst = TMP_REG2;
1938
}
1939
else if (FAST_IS_REG(dst)) {
1940
dst_r = dst;
1941
flags |= REG_DEST;
1942
if (flags & MOVE_OP)
1943
src2_tmp_reg = dst_r;
1944
}
1945
else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
1946
flags |= SLOW_DEST;
1947
1948
if (flags & IMM_OP) {
1949
if (src2 == SLJIT_IMM && src2w != 0 && src2w <= SIMM_MAX && src2w >= SIMM_MIN) {
1950
flags |= SRC2_IMM;
1951
src2_r = src2w;
1952
}
1953
else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= SIMM_MAX && src1w >= SIMM_MIN) {
1954
flags |= SRC2_IMM;
1955
src2_r = src1w;
1956
1957
/* And swap arguments. */
1958
src1 = src2;
1959
src1w = src2w;
1960
src2 = SLJIT_IMM;
1961
/* src2w = src2_r unneeded. */
1962
}
1963
}
1964
1965
/* Source 1. */
1966
if (FAST_IS_REG(src1)) {
1967
src1_r = src1;
1968
flags |= REG1_SOURCE;
1969
} else if (src1 == SLJIT_IMM) {
1970
if (src1w) {
1971
FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3));
1972
src1_r = TMP_REG1;
1973
}
1974
else
1975
src1_r = TMP_ZERO;
1976
} else {
1977
if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))
1978
FAIL_IF(compiler->error);
1979
else
1980
flags |= SLOW_SRC1;
1981
src1_r = TMP_REG1;
1982
}
1983
1984
/* Source 2. */
1985
if (FAST_IS_REG(src2)) {
1986
src2_r = src2;
1987
flags |= REG2_SOURCE;
1988
if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
1989
dst_r = (sljit_s32)src2_r;
1990
} else if (src2 == SLJIT_IMM) {
1991
if (!(flags & SRC2_IMM)) {
1992
if (src2w) {
1993
FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w, TMP_REG3));
1994
src2_r = src2_tmp_reg;
1995
} else {
1996
src2_r = TMP_ZERO;
1997
if (flags & MOVE_OP) {
1998
if (dst & SLJIT_MEM)
1999
dst_r = 0;
2000
else
2001
op = SLJIT_MOV;
2002
}
2003
}
2004
}
2005
} else {
2006
if (getput_arg_fast(compiler, flags | LOAD_DATA, src2_tmp_reg, src2, src2w))
2007
FAIL_IF(compiler->error);
2008
else
2009
flags |= SLOW_SRC2;
2010
src2_r = src2_tmp_reg;
2011
}
2012
2013
if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
2014
SLJIT_ASSERT(src2_r == TMP_REG2);
2015
if ((flags & SLOW_DEST) && !can_cache(src2, src2w, src1, src1w) && can_cache(src2, src2w, dst, dstw)) {
2016
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
2017
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | MEM_USE_TMP2, TMP_REG2, src2, src2w, dst, dstw));
2018
} else {
2019
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
2020
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
2021
}
2022
}
2023
else if (flags & SLOW_SRC1)
2024
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
2025
else if (flags & SLOW_SRC2)
2026
FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | ((src1_r == TMP_REG1) ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, dst, dstw));
2027
2028
FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
2029
2030
if (dst & SLJIT_MEM) {
2031
if (!(flags & SLOW_DEST)) {
2032
getput_arg_fast(compiler, flags, dst_r, dst, dstw);
2033
return compiler->error;
2034
}
2035
return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);
2036
}
2037
2038
return SLJIT_SUCCESS;
2039
}
2040
2041
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
2042
{
2043
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
2044
sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
2045
2046
SLJIT_ASSERT(word == 0 || word == 0x8);
2047
#endif /* SLJIT_CONFIG_RISCV_64 */
2048
2049
CHECK_ERROR();
2050
CHECK(check_sljit_emit_op0(compiler, op));
2051
2052
switch (GET_OPCODE(op)) {
2053
case SLJIT_BREAKPOINT:
2054
return push_inst(compiler, EBREAK);
2055
case SLJIT_NOP:
2056
return push_inst(compiler, ADDI | RD(TMP_ZERO) | RS1(TMP_ZERO) | IMM_I(0));
2057
case SLJIT_LMUL_UW:
2058
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R1) | IMM_I(0)));
2059
FAIL_IF(push_inst(compiler, MULHU | RD(SLJIT_R1) | RS1(SLJIT_R0) | RS2(SLJIT_R1)));
2060
return push_inst(compiler, MUL | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(TMP_REG1));
2061
case SLJIT_LMUL_SW:
2062
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R1) | IMM_I(0)));
2063
FAIL_IF(push_inst(compiler, MULH | RD(SLJIT_R1) | RS1(SLJIT_R0) | RS2(SLJIT_R1)));
2064
return push_inst(compiler, MUL | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(TMP_REG1));
2065
case SLJIT_DIVMOD_UW:
2066
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R0) | IMM_I(0)));
2067
FAIL_IF(push_inst(compiler, DIVU | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1)));
2068
return push_inst(compiler, REMU | WORD | RD(SLJIT_R1) | RS1(TMP_REG1) | RS2(SLJIT_R1));
2069
case SLJIT_DIVMOD_SW:
2070
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(SLJIT_R0) | IMM_I(0)));
2071
FAIL_IF(push_inst(compiler, DIV | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1)));
2072
return push_inst(compiler, REM | WORD | RD(SLJIT_R1) | RS1(TMP_REG1) | RS2(SLJIT_R1));
2073
case SLJIT_DIV_UW:
2074
return push_inst(compiler, DIVU | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1));
2075
case SLJIT_DIV_SW:
2076
return push_inst(compiler, DIV | WORD | RD(SLJIT_R0) | RS1(SLJIT_R0) | RS2(SLJIT_R1));
2077
case SLJIT_MEMORY_BARRIER:
2078
return push_inst(compiler, FENCE | 0x0ff00000);
2079
case SLJIT_ENDBR:
2080
case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
2081
return SLJIT_SUCCESS;
2082
}
2083
2084
return SLJIT_SUCCESS;
2085
}
2086
2087
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2088
sljit_s32 dst, sljit_sw dstw,
2089
sljit_s32 src, sljit_sw srcw)
2090
{
2091
sljit_s32 flags = 0;
2092
2093
CHECK_ERROR();
2094
CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2095
ADJUST_LOCAL_OFFSET(dst, dstw);
2096
ADJUST_LOCAL_OFFSET(src, srcw);
2097
2098
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
2099
if (op & SLJIT_32)
2100
flags = INT_DATA | SIGNED_DATA;
2101
#endif /* SLJIT_CONFIG_RISCV_64 */
2102
2103
switch (GET_OPCODE(op)) {
2104
case SLJIT_MOV:
2105
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
2106
case SLJIT_MOV_U32:
2107
case SLJIT_MOV_S32:
2108
case SLJIT_MOV32:
2109
#endif /* SLJIT_CONFIG_RISCV_32 */
2110
case SLJIT_MOV_P:
2111
return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, srcw);
2112
2113
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
2114
case SLJIT_MOV_U32:
2115
return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw);
2116
2117
case SLJIT_MOV_S32:
2118
/* Logical operators have no W variant, so sign extended input is necessary for them. */
2119
case SLJIT_MOV32:
2120
return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw);
2121
#endif /* SLJIT_CONFIG_RISCV_64 */
2122
2123
case SLJIT_MOV_U8:
2124
return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
2125
2126
case SLJIT_MOV_S8:
2127
return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
2128
2129
case SLJIT_MOV_U16:
2130
return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
2131
2132
case SLJIT_MOV_S16:
2133
return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
2134
2135
case SLJIT_CLZ:
2136
case SLJIT_CTZ:
2137
case SLJIT_REV:
2138
return emit_op(compiler, op, flags, dst, dstw, TMP_ZERO, 0, src, srcw);
2139
2140
case SLJIT_REV_U16:
2141
case SLJIT_REV_S16:
2142
return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);
2143
2144
case SLJIT_REV_U32:
2145
case SLJIT_REV_S32:
2146
return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);
2147
}
2148
2149
SLJIT_UNREACHABLE();
2150
return SLJIT_SUCCESS;
2151
}
2152
2153
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2154
sljit_s32 dst, sljit_sw dstw,
2155
sljit_s32 src1, sljit_sw src1w,
2156
sljit_s32 src2, sljit_sw src2w)
2157
{
2158
sljit_s32 flags = 0;
2159
2160
CHECK_ERROR();
2161
CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2162
ADJUST_LOCAL_OFFSET(dst, dstw);
2163
ADJUST_LOCAL_OFFSET(src1, src1w);
2164
ADJUST_LOCAL_OFFSET(src2, src2w);
2165
2166
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
2167
if (op & SLJIT_32) {
2168
flags |= INT_DATA | SIGNED_DATA;
2169
if (src1 == SLJIT_IMM)
2170
src1w = (sljit_s32)src1w;
2171
if (src2 == SLJIT_IMM)
2172
src2w = (sljit_s32)src2w;
2173
}
2174
#endif /* SLJIT_CONFIG_RISCV_64 */
2175
2176
switch (GET_OPCODE(op)) {
2177
case SLJIT_ADD:
2178
case SLJIT_ADDC:
2179
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
2180
return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2181
2182
case SLJIT_SUB:
2183
case SLJIT_SUBC:
2184
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
2185
return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2186
2187
case SLJIT_MUL:
2188
compiler->status_flags_state = 0;
2189
return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
2190
2191
case SLJIT_AND:
2192
case SLJIT_OR:
2193
case SLJIT_XOR:
2194
return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2195
2196
case SLJIT_SHL:
2197
case SLJIT_MSHL:
2198
case SLJIT_LSHR:
2199
case SLJIT_MLSHR:
2200
case SLJIT_ASHR:
2201
case SLJIT_MASHR:
2202
case SLJIT_ROTL:
2203
case SLJIT_ROTR:
2204
if (src2 == SLJIT_IMM) {
2205
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
2206
src2w &= 0x1f;
2207
#else /* !SLJIT_CONFIG_RISCV_32 */
2208
if (op & SLJIT_32)
2209
src2w &= 0x1f;
2210
else
2211
src2w &= 0x3f;
2212
#endif /* SLJIT_CONFIG_RISCV_32 */
2213
}
2214
2215
return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2216
}
2217
2218
SLJIT_UNREACHABLE();
2219
return SLJIT_SUCCESS;
2220
}
2221
2222
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2223
sljit_s32 src1, sljit_sw src1w,
2224
sljit_s32 src2, sljit_sw src2w)
2225
{
2226
CHECK_ERROR();
2227
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2228
2229
SLJIT_SKIP_CHECKS(compiler);
2230
return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w);
2231
}
2232
2233
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2234
sljit_s32 dst_reg,
2235
sljit_s32 src1, sljit_sw src1w,
2236
sljit_s32 src2, sljit_sw src2w)
2237
{
2238
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
2239
sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
2240
#endif /* SLJIT_CONFIG_RISCV_64 */
2241
2242
CHECK_ERROR();
2243
CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2244
2245
SLJIT_ASSERT(WORD == 0 || WORD == 0x8);
2246
2247
switch (GET_OPCODE(op)) {
2248
case SLJIT_MULADD:
2249
SLJIT_SKIP_CHECKS(compiler);
2250
FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w));
2251
return push_inst(compiler, ADD | WORD | RD(dst_reg) | RS1(dst_reg) | RS2(TMP_REG2));
2252
}
2253
2254
return SLJIT_SUCCESS;
2255
}
2256
2257
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2258
sljit_s32 dst_reg,
2259
sljit_s32 src1_reg,
2260
sljit_s32 src2_reg,
2261
sljit_s32 src3, sljit_sw src3w)
2262
{
2263
sljit_s32 is_left;
2264
sljit_ins ins1, ins2, ins3;
2265
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
2266
sljit_ins word = (sljit_ins)(op & SLJIT_32) >> 5;
2267
sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2268
sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
2269
#else /* !SLJIT_CONFIG_RISCV_64 */
2270
sljit_s32 inp_flags = WORD_DATA | LOAD_DATA;
2271
sljit_sw bit_length = 32;
2272
#endif /* SLJIT_CONFIG_RISCV_64 */
2273
2274
SLJIT_ASSERT(WORD == 0 || WORD == 0x8);
2275
2276
CHECK_ERROR();
2277
CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2278
2279
is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
2280
2281
if (src1_reg == src2_reg) {
2282
SLJIT_SKIP_CHECKS(compiler);
2283
return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
2284
}
2285
2286
ADJUST_LOCAL_OFFSET(src3, src3w);
2287
2288
if (src3 == SLJIT_IMM) {
2289
src3w &= bit_length - 1;
2290
2291
if (src3w == 0)
2292
return SLJIT_SUCCESS;
2293
2294
if (is_left) {
2295
ins1 = SLLI | WORD | IMM_I(src3w);
2296
src3w = bit_length - src3w;
2297
ins2 = SRLI | WORD | IMM_I(src3w);
2298
} else {
2299
ins1 = SRLI | WORD | IMM_I(src3w);
2300
src3w = bit_length - src3w;
2301
ins2 = SLLI | WORD | IMM_I(src3w);
2302
}
2303
2304
FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RS1(src1_reg)));
2305
FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RS1(src2_reg)));
2306
return push_inst(compiler, OR | RD(dst_reg) | RS1(dst_reg) | RS2(TMP_REG1));
2307
}
2308
2309
if (src3 & SLJIT_MEM) {
2310
FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src3, src3w));
2311
src3 = TMP_REG2;
2312
} else if (dst_reg == src3) {
2313
push_inst(compiler, ADDI | WORD | RD(TMP_REG2) | RS1(src3) | IMM_I(0));
2314
src3 = TMP_REG2;
2315
}
2316
2317
if (is_left) {
2318
ins1 = SLL;
2319
ins2 = SRLI;
2320
ins3 = SRL;
2321
} else {
2322
ins1 = SRL;
2323
ins2 = SLLI;
2324
ins3 = SLL;
2325
}
2326
2327
FAIL_IF(push_inst(compiler, ins1 | WORD | RD(dst_reg) | RS1(src1_reg) | RS2(src3)));
2328
2329
if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
2330
FAIL_IF(push_inst(compiler, ins2 | WORD | RD(TMP_REG1) | RS1(src2_reg) | IMM_I(1)));
2331
FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RS1(src3) | IMM_I((sljit_ins)bit_length - 1)));
2332
src2_reg = TMP_REG1;
2333
} else
2334
FAIL_IF(push_inst(compiler, SUB | WORD | RD(TMP_REG2) | RS1(TMP_ZERO) | RS2(src3)));
2335
2336
FAIL_IF(push_inst(compiler, ins3 | WORD | RD(TMP_REG1) | RS1(src2_reg) | RS2(TMP_REG2)));
2337
return push_inst(compiler, OR | RD(dst_reg) | RS1(dst_reg) | RS2(TMP_REG1));
2338
}
2339
2340
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2341
sljit_s32 src, sljit_sw srcw)
2342
{
2343
CHECK_ERROR();
2344
CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2345
ADJUST_LOCAL_OFFSET(src, srcw);
2346
2347
switch (op) {
2348
case SLJIT_FAST_RETURN:
2349
if (FAST_IS_REG(src))
2350
FAIL_IF(push_inst(compiler, ADDI | RD(RETURN_ADDR_REG) | RS1(src) | IMM_I(0)));
2351
else
2352
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
2353
2354
return push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(RETURN_ADDR_REG) | IMM_I(0));
2355
case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2356
return SLJIT_SUCCESS;
2357
case SLJIT_PREFETCH_L1:
2358
case SLJIT_PREFETCH_L2:
2359
case SLJIT_PREFETCH_L3:
2360
case SLJIT_PREFETCH_ONCE:
2361
return SLJIT_SUCCESS;
2362
}
2363
2364
return SLJIT_SUCCESS;
2365
}
2366
2367
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2368
sljit_s32 dst, sljit_sw dstw)
2369
{
2370
sljit_s32 dst_r;
2371
2372
CHECK_ERROR();
2373
CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2374
ADJUST_LOCAL_OFFSET(dst, dstw);
2375
2376
switch (op) {
2377
case SLJIT_FAST_ENTER:
2378
if (FAST_IS_REG(dst))
2379
return push_inst(compiler, ADDI | RD(dst) | RS1(RETURN_ADDR_REG) | IMM_I(0));
2380
2381
SLJIT_ASSERT(RETURN_ADDR_REG == TMP_REG2);
2382
break;
2383
case SLJIT_GET_RETURN_ADDRESS:
2384
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2385
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size - SSIZE_OF(sw)));
2386
break;
2387
}
2388
2389
if (dst & SLJIT_MEM)
2390
return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw);
2391
2392
return SLJIT_SUCCESS;
2393
}
2394
2395
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2396
{
2397
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2398
2399
if (type == SLJIT_GP_REGISTER)
2400
return reg_map[reg];
2401
2402
if (type == SLJIT_FLOAT_REGISTER)
2403
return freg_map[reg];
2404
2405
return vreg_map[reg];
2406
}
2407
2408
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2409
void *instruction, sljit_u32 size)
2410
{
2411
SLJIT_UNUSED_ARG(size);
2412
2413
CHECK_ERROR();
2414
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2415
2416
return push_inst(compiler, *(sljit_ins*)instruction);
2417
}
2418
2419
/* --------------------------------------------------------------------- */
2420
/* Floating point operators */
2421
/* --------------------------------------------------------------------- */
2422
2423
#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7))
2424
#define FMT(op) ((sljit_ins)((op & SLJIT_32) ^ SLJIT_32) << 17)
2425
2426
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2427
sljit_s32 dst, sljit_sw dstw,
2428
sljit_s32 src, sljit_sw srcw)
2429
{
2430
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
2431
# define flags (sljit_u32)0
2432
#else /* !SLJIT_CONFIG_RISCV_32 */
2433
sljit_u32 flags = ((sljit_u32)(GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)) << 21;
2434
#endif /* SLJIT_CONFIG_RISCV_32 */
2435
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2436
2437
if (src & SLJIT_MEM) {
2438
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
2439
src = TMP_FREG1;
2440
}
2441
2442
FAIL_IF(push_inst(compiler, FCVT_W_S | FMT(op) | flags | RD(dst_r) | FRS1(src)));
2443
2444
/* Store the integer value from a VFP register. */
2445
if (dst & SLJIT_MEM) {
2446
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
2447
return emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0);
2448
#else /* !SLJIT_CONFIG_RISCV_32 */
2449
return emit_op_mem2(compiler, flags ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0);
2450
#endif /* SLJIT_CONFIG_RISCV_32 */
2451
}
2452
return SLJIT_SUCCESS;
2453
2454
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
2455
# undef flags
2456
#endif /* SLJIT_CONFIG_RISCV_32 */
2457
}
2458
2459
static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
2460
sljit_s32 dst, sljit_sw dstw,
2461
sljit_s32 src, sljit_sw srcw)
2462
{
2463
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2464
2465
if (src & SLJIT_MEM) {
2466
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
2467
FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2468
#else /* SLJIT_CONFIG_RISCV_32 */
2469
FAIL_IF(emit_op_mem2(compiler, ((ins & (1 << 21)) ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2470
#endif /* !SLJIT_CONFIG_RISCV_32 */
2471
src = TMP_REG1;
2472
} else if (src == SLJIT_IMM) {
2473
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw, TMP_REG3));
2474
src = TMP_REG1;
2475
}
2476
2477
FAIL_IF(push_inst(compiler, ins | FRD(dst_r) | RS1(src)));
2478
2479
if (dst & SLJIT_MEM)
2480
return emit_op_mem2(compiler, DOUBLE_DATA | ((sljit_s32)(~ins >> 24) & 0x2), TMP_FREG1, dst, dstw, 0, 0);
2481
return SLJIT_SUCCESS;
2482
}
2483
2484
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2485
sljit_s32 dst, sljit_sw dstw,
2486
sljit_s32 src, sljit_sw srcw)
2487
{
2488
sljit_ins ins = FCVT_S_W | FMT(op);
2489
2490
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
2491
if (op & SLJIT_32)
2492
ins |= F3(0x7);
2493
#else /* !SLJIT_CONFIG_RISCV_32 */
2494
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2495
ins |= (1 << 21);
2496
else if (src == SLJIT_IMM)
2497
srcw = (sljit_s32)srcw;
2498
2499
if (op != SLJIT_CONV_F64_FROM_S32)
2500
ins |= F3(0x7);
2501
#endif /* SLJIT_CONFIG_RISCV_32 */
2502
2503
return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
2504
}
2505
2506
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2507
sljit_s32 dst, sljit_sw dstw,
2508
sljit_s32 src, sljit_sw srcw)
2509
{
2510
sljit_ins ins = FCVT_S_WU | FMT(op);
2511
2512
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
2513
if (op & SLJIT_32)
2514
ins |= F3(0x7);
2515
#else /* !SLJIT_CONFIG_RISCV_32 */
2516
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW)
2517
ins |= (1 << 21);
2518
else if (src == SLJIT_IMM)
2519
srcw = (sljit_u32)srcw;
2520
2521
if (op != SLJIT_CONV_F64_FROM_S32)
2522
ins |= F3(0x7);
2523
#endif /* SLJIT_CONFIG_RISCV_32 */
2524
2525
return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
2526
}
2527
2528
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2529
sljit_s32 src1, sljit_sw src1w,
2530
sljit_s32 src2, sljit_sw src2w)
2531
{
2532
sljit_ins inst;
2533
2534
if (src1 & SLJIT_MEM) {
2535
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2536
src1 = TMP_FREG1;
2537
}
2538
2539
if (src2 & SLJIT_MEM) {
2540
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
2541
src2 = TMP_FREG2;
2542
}
2543
2544
switch (GET_FLAG_TYPE(op)) {
2545
case SLJIT_F_EQUAL:
2546
case SLJIT_ORDERED_EQUAL:
2547
inst = FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2);
2548
break;
2549
case SLJIT_F_LESS:
2550
case SLJIT_ORDERED_LESS:
2551
inst = FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2);
2552
break;
2553
case SLJIT_ORDERED_GREATER:
2554
inst = FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src2) | FRS2(src1);
2555
break;
2556
case SLJIT_F_GREATER:
2557
case SLJIT_UNORDERED_OR_GREATER:
2558
inst = FLE_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2);
2559
break;
2560
case SLJIT_UNORDERED_OR_LESS:
2561
inst = FLE_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src2) | FRS2(src1);
2562
break;
2563
case SLJIT_UNORDERED_OR_EQUAL:
2564
FAIL_IF(push_inst(compiler, FLT_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src2)));
2565
FAIL_IF(push_inst(compiler, FLT_S | FMT(op) | RD(TMP_REG1) | FRS1(src2) | FRS2(src1)));
2566
inst = OR | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(TMP_REG1);
2567
break;
2568
default: /* SLJIT_UNORDERED */
2569
if (src1 == src2) {
2570
inst = FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src1);
2571
break;
2572
}
2573
FAIL_IF(push_inst(compiler, FEQ_S | FMT(op) | RD(OTHER_FLAG) | FRS1(src1) | FRS2(src1)));
2574
FAIL_IF(push_inst(compiler, FEQ_S | FMT(op) | RD(TMP_REG1) | FRS1(src2) | FRS2(src2)));
2575
inst = AND | RD(OTHER_FLAG) | RS1(OTHER_FLAG) | RS2(TMP_REG1);
2576
break;
2577
}
2578
2579
return push_inst(compiler, inst);
2580
}
2581
2582
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2583
sljit_s32 dst, sljit_sw dstw,
2584
sljit_s32 src, sljit_sw srcw)
2585
{
2586
sljit_s32 dst_r;
2587
2588
CHECK_ERROR();
2589
compiler->cache_arg = 0;
2590
compiler->cache_argw = 0;
2591
2592
SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
2593
SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2594
2595
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
2596
op ^= SLJIT_32;
2597
2598
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2599
2600
if (src & SLJIT_MEM) {
2601
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
2602
src = dst_r;
2603
}
2604
2605
switch (GET_OPCODE(op)) {
2606
case SLJIT_MOV_F64:
2607
if (src != dst_r) {
2608
if (!(dst & SLJIT_MEM))
2609
FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src)));
2610
else
2611
dst_r = src;
2612
}
2613
break;
2614
case SLJIT_NEG_F64:
2615
FAIL_IF(push_inst(compiler, FSGNJN_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src)));
2616
break;
2617
case SLJIT_ABS_F64:
2618
FAIL_IF(push_inst(compiler, FSGNJX_S | FMT(op) | FRD(dst_r) | FRS1(src) | FRS2(src)));
2619
break;
2620
case SLJIT_CONV_F64_FROM_F32:
2621
/* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */
2622
FAIL_IF(push_inst(compiler, FCVT_S_D | ((op & SLJIT_32) ? (1 << 25) : ((1 << 20) | F3(7))) | FRD(dst_r) | FRS1(src)));
2623
op ^= SLJIT_32;
2624
break;
2625
}
2626
2627
if (dst & SLJIT_MEM)
2628
return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0);
2629
return SLJIT_SUCCESS;
2630
}
2631
2632
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2633
sljit_s32 dst, sljit_sw dstw,
2634
sljit_s32 src1, sljit_sw src1w,
2635
sljit_s32 src2, sljit_sw src2w)
2636
{
2637
sljit_s32 dst_r, flags = 0;
2638
2639
CHECK_ERROR();
2640
CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2641
ADJUST_LOCAL_OFFSET(dst, dstw);
2642
ADJUST_LOCAL_OFFSET(src1, src1w);
2643
ADJUST_LOCAL_OFFSET(src2, src2w);
2644
2645
compiler->cache_arg = 0;
2646
compiler->cache_argw = 0;
2647
2648
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
2649
2650
if (src1 & SLJIT_MEM) {
2651
if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
2652
FAIL_IF(compiler->error);
2653
src1 = TMP_FREG1;
2654
} else
2655
flags |= SLOW_SRC1;
2656
}
2657
2658
if (src2 & SLJIT_MEM) {
2659
if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
2660
FAIL_IF(compiler->error);
2661
src2 = TMP_FREG2;
2662
} else
2663
flags |= SLOW_SRC2;
2664
}
2665
2666
if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
2667
if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
2668
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
2669
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2670
} else {
2671
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2672
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2673
}
2674
}
2675
else if (flags & SLOW_SRC1)
2676
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2677
else if (flags & SLOW_SRC2)
2678
FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2679
2680
if (flags & SLOW_SRC1)
2681
src1 = TMP_FREG1;
2682
if (flags & SLOW_SRC2)
2683
src2 = TMP_FREG2;
2684
2685
switch (GET_OPCODE(op)) {
2686
case SLJIT_ADD_F64:
2687
FAIL_IF(push_inst(compiler, FADD_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2)));
2688
break;
2689
2690
case SLJIT_SUB_F64:
2691
FAIL_IF(push_inst(compiler, FSUB_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2)));
2692
break;
2693
2694
case SLJIT_MUL_F64:
2695
FAIL_IF(push_inst(compiler, FMUL_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2)));
2696
break;
2697
2698
case SLJIT_DIV_F64:
2699
FAIL_IF(push_inst(compiler, FDIV_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2)));
2700
break;
2701
2702
case SLJIT_COPYSIGN_F64:
2703
return push_inst(compiler, FSGNJ_S | FMT(op) | FRD(dst_r) | FRS1(src1) | FRS2(src2));
2704
}
2705
2706
if (dst_r != dst)
2707
FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
2708
2709
return SLJIT_SUCCESS;
2710
}
2711
2712
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2713
sljit_s32 freg, sljit_f32 value)
2714
{
2715
union {
2716
sljit_s32 imm;
2717
sljit_f32 value;
2718
} u;
2719
2720
CHECK_ERROR();
2721
CHECK(check_sljit_emit_fset32(compiler, freg, value));
2722
2723
u.value = value;
2724
2725
if (u.imm == 0)
2726
return push_inst(compiler, FMV_W_X | RS1(TMP_ZERO) | FRD(freg));
2727
2728
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm, TMP_REG3));
2729
return push_inst(compiler, FMV_W_X | RS1(TMP_REG1) | FRD(freg));
2730
}
2731
2732
/* --------------------------------------------------------------------- */
2733
/* Conditional instructions */
2734
/* --------------------------------------------------------------------- */
2735
2736
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2737
{
2738
struct sljit_label *label;
2739
2740
CHECK_ERROR_PTR();
2741
CHECK_PTR(check_sljit_emit_label(compiler));
2742
2743
if (compiler->last_label && compiler->last_label->size == compiler->size)
2744
return compiler->last_label;
2745
2746
label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2747
PTR_FAIL_IF(!label);
2748
set_label(label, compiler);
2749
return label;
2750
}
2751
2752
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
2753
#define BRANCH_LENGTH ((sljit_ins)(3 * sizeof(sljit_ins)) << 7)
2754
#else /* !SLJIT_CONFIG_RISCV_32 */
2755
#define BRANCH_LENGTH ((sljit_ins)(7 * sizeof(sljit_ins)) << 7)
2756
#endif /* SLJIT_CONFIG_RISCV_32 */
2757
2758
static sljit_ins get_jump_instruction(sljit_s32 type)
2759
{
2760
switch (type) {
2761
case SLJIT_EQUAL:
2762
return BNE | RS1(EQUAL_FLAG) | RS2(TMP_ZERO);
2763
case SLJIT_NOT_EQUAL:
2764
return BEQ | RS1(EQUAL_FLAG) | RS2(TMP_ZERO);
2765
case SLJIT_LESS:
2766
case SLJIT_GREATER:
2767
case SLJIT_SIG_LESS:
2768
case SLJIT_SIG_GREATER:
2769
case SLJIT_OVERFLOW:
2770
case SLJIT_CARRY:
2771
case SLJIT_ATOMIC_NOT_STORED:
2772
case SLJIT_F_EQUAL:
2773
case SLJIT_ORDERED_EQUAL:
2774
case SLJIT_ORDERED_NOT_EQUAL:
2775
case SLJIT_F_LESS:
2776
case SLJIT_ORDERED_LESS:
2777
case SLJIT_ORDERED_GREATER:
2778
case SLJIT_F_LESS_EQUAL:
2779
case SLJIT_ORDERED_LESS_EQUAL:
2780
case SLJIT_ORDERED_GREATER_EQUAL:
2781
case SLJIT_ORDERED:
2782
return BEQ | RS1(OTHER_FLAG) | RS2(TMP_ZERO);
2783
break;
2784
case SLJIT_GREATER_EQUAL:
2785
case SLJIT_LESS_EQUAL:
2786
case SLJIT_SIG_GREATER_EQUAL:
2787
case SLJIT_SIG_LESS_EQUAL:
2788
case SLJIT_NOT_OVERFLOW:
2789
case SLJIT_NOT_CARRY:
2790
case SLJIT_ATOMIC_STORED:
2791
case SLJIT_F_NOT_EQUAL:
2792
case SLJIT_UNORDERED_OR_NOT_EQUAL:
2793
case SLJIT_UNORDERED_OR_EQUAL:
2794
case SLJIT_F_GREATER_EQUAL:
2795
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2796
case SLJIT_UNORDERED_OR_LESS_EQUAL:
2797
case SLJIT_F_GREATER:
2798
case SLJIT_UNORDERED_OR_GREATER:
2799
case SLJIT_UNORDERED_OR_LESS:
2800
case SLJIT_UNORDERED:
2801
return BNE | RS1(OTHER_FLAG) | RS2(TMP_ZERO);
2802
default:
2803
/* Not conditional branch. */
2804
return 0;
2805
}
2806
}
2807
2808
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2809
{
2810
struct sljit_jump *jump;
2811
sljit_ins inst;
2812
2813
CHECK_ERROR_PTR();
2814
CHECK_PTR(check_sljit_emit_jump(compiler, type));
2815
2816
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2817
PTR_FAIL_IF(!jump);
2818
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2819
type &= 0xff;
2820
2821
inst = get_jump_instruction(type);
2822
2823
if (inst != 0) {
2824
PTR_FAIL_IF(push_inst(compiler, inst | BRANCH_LENGTH));
2825
jump->flags |= IS_COND;
2826
}
2827
2828
jump->addr = compiler->size;
2829
inst = JALR | RS1(TMP_REG1) | IMM_I(0);
2830
2831
if (type >= SLJIT_FAST_CALL) {
2832
jump->flags |= IS_CALL;
2833
inst |= RD(RETURN_ADDR_REG);
2834
}
2835
2836
PTR_FAIL_IF(push_inst(compiler, inst));
2837
2838
/* Maximum number of instructions required for generating a constant. */
2839
compiler->size += JUMP_MAX_SIZE - 1;
2840
return jump;
2841
}
2842
2843
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2844
sljit_s32 arg_types)
2845
{
2846
SLJIT_UNUSED_ARG(arg_types);
2847
CHECK_ERROR_PTR();
2848
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2849
2850
if (type & SLJIT_CALL_RETURN) {
2851
PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
2852
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2853
}
2854
2855
SLJIT_SKIP_CHECKS(compiler);
2856
return sljit_emit_jump(compiler, type);
2857
}
2858
2859
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
2860
sljit_s32 src1, sljit_sw src1w,
2861
sljit_s32 src2, sljit_sw src2w)
2862
{
2863
struct sljit_jump *jump;
2864
sljit_s32 flags;
2865
sljit_ins inst;
2866
sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
2867
2868
CHECK_ERROR_PTR();
2869
CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w));
2870
ADJUST_LOCAL_OFFSET(src1, src1w);
2871
ADJUST_LOCAL_OFFSET(src2, src2w);
2872
2873
compiler->cache_arg = 0;
2874
compiler->cache_argw = 0;
2875
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
2876
flags = WORD_DATA | LOAD_DATA;
2877
#else /* !SLJIT_CONFIG_RISCV_32 */
2878
flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2879
#endif /* SLJIT_CONFIG_RISCV_32 */
2880
2881
if (src1 & SLJIT_MEM) {
2882
PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG1, src1, src1w, src2, src2w));
2883
src1 = TMP_REG1;
2884
}
2885
2886
if (src2 & SLJIT_MEM) {
2887
PTR_FAIL_IF(emit_op_mem2(compiler, flags | (src1 == TMP_REG1 ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, 0, 0));
2888
src2 = src2_tmp_reg;
2889
}
2890
2891
if (src1 == SLJIT_IMM) {
2892
if (src1w != 0) {
2893
PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w, TMP_REG3));
2894
src1 = TMP_REG1;
2895
}
2896
else
2897
src1 = TMP_ZERO;
2898
}
2899
2900
if (src2 == SLJIT_IMM) {
2901
if (src2w != 0) {
2902
PTR_FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w, TMP_REG3));
2903
src2 = src2_tmp_reg;
2904
}
2905
else
2906
src2 = TMP_ZERO;
2907
}
2908
2909
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2910
PTR_FAIL_IF(!jump);
2911
set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | IS_COND));
2912
type &= 0xff;
2913
2914
switch (type) {
2915
case SLJIT_EQUAL:
2916
inst = BNE | RS1(src1) | RS2(src2) | BRANCH_LENGTH;
2917
break;
2918
case SLJIT_NOT_EQUAL:
2919
inst = BEQ | RS1(src1) | RS2(src2) | BRANCH_LENGTH;
2920
break;
2921
case SLJIT_LESS:
2922
inst = BGEU | RS1(src1) | RS2(src2) | BRANCH_LENGTH;
2923
break;
2924
case SLJIT_GREATER_EQUAL:
2925
inst = BLTU | RS1(src1) | RS2(src2) | BRANCH_LENGTH;
2926
break;
2927
case SLJIT_GREATER:
2928
inst = BGEU | RS1(src2) | RS2(src1) | BRANCH_LENGTH;
2929
break;
2930
case SLJIT_LESS_EQUAL:
2931
inst = BLTU | RS1(src2) | RS2(src1) | BRANCH_LENGTH;
2932
break;
2933
case SLJIT_SIG_LESS:
2934
inst = BGE | RS1(src1) | RS2(src2) | BRANCH_LENGTH;
2935
break;
2936
case SLJIT_SIG_GREATER_EQUAL:
2937
inst = BLT | RS1(src1) | RS2(src2) | BRANCH_LENGTH;
2938
break;
2939
case SLJIT_SIG_GREATER:
2940
inst = BGE | RS1(src2) | RS2(src1) | BRANCH_LENGTH;
2941
break;
2942
case SLJIT_SIG_LESS_EQUAL:
2943
inst = BLT | RS1(src2) | RS2(src1) | BRANCH_LENGTH;
2944
break;
2945
}
2946
2947
PTR_FAIL_IF(push_inst(compiler, inst));
2948
2949
jump->addr = compiler->size;
2950
PTR_FAIL_IF(push_inst(compiler, JALR | RD(TMP_ZERO) | RS1(TMP_REG1) | IMM_I(0)));
2951
2952
/* Maximum number of instructions required for generating a constant. */
2953
compiler->size += JUMP_MAX_SIZE - 1;
2954
return jump;
2955
}
2956
2957
#undef BRANCH_LENGTH
2958
2959
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2960
{
2961
struct sljit_jump *jump;
2962
2963
CHECK_ERROR();
2964
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2965
2966
if (src != SLJIT_IMM) {
2967
if (src & SLJIT_MEM) {
2968
ADJUST_LOCAL_OFFSET(src, srcw);
2969
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
2970
src = TMP_REG1;
2971
}
2972
return push_inst(compiler, JALR | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RS1(src) | IMM_I(0));
2973
}
2974
2975
/* These jumps are converted to jump/call instructions when possible. */
2976
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2977
FAIL_IF(!jump);
2978
set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_CALL : 0));
2979
jump->u.target = (sljit_uw)srcw;
2980
2981
jump->addr = compiler->size;
2982
FAIL_IF(push_inst(compiler, JALR | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RS1(TMP_REG1) | IMM_I(0)));
2983
2984
/* Maximum number of instructions required for generating a constant. */
2985
compiler->size += JUMP_MAX_SIZE - 1;
2986
return SLJIT_SUCCESS;
2987
}
2988
2989
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2990
sljit_s32 arg_types,
2991
sljit_s32 src, sljit_sw srcw)
2992
{
2993
SLJIT_UNUSED_ARG(arg_types);
2994
CHECK_ERROR();
2995
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2996
2997
if (src & SLJIT_MEM) {
2998
ADJUST_LOCAL_OFFSET(src, srcw);
2999
FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
3000
src = TMP_REG1;
3001
}
3002
3003
if (type & SLJIT_CALL_RETURN) {
3004
if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
3005
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(src) | IMM_I(0)));
3006
src = TMP_REG1;
3007
}
3008
3009
FAIL_IF(emit_stack_frame_release(compiler, 0));
3010
type = SLJIT_JUMP;
3011
}
3012
3013
SLJIT_SKIP_CHECKS(compiler);
3014
return sljit_emit_ijump(compiler, type, src, srcw);
3015
}
3016
3017
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3018
sljit_s32 dst, sljit_sw dstw,
3019
sljit_s32 type)
3020
{
3021
sljit_s32 src_r, dst_r, invert;
3022
sljit_s32 saved_op = op;
3023
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
3024
sljit_s32 mem_type = WORD_DATA;
3025
#else /* !SLJIT_CONFIG_RISCV_32 */
3026
sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
3027
#endif /* SLJIT_CONFIG_RISCV_32 */
3028
3029
CHECK_ERROR();
3030
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3031
ADJUST_LOCAL_OFFSET(dst, dstw);
3032
3033
op = GET_OPCODE(op);
3034
dst_r = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
3035
3036
compiler->cache_arg = 0;
3037
compiler->cache_argw = 0;
3038
3039
if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
3040
FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
3041
3042
if (type < SLJIT_F_EQUAL) {
3043
src_r = OTHER_FLAG;
3044
invert = type & 0x1;
3045
3046
switch (type) {
3047
case SLJIT_EQUAL:
3048
case SLJIT_NOT_EQUAL:
3049
FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RS1(EQUAL_FLAG) | IMM_I(1)));
3050
src_r = dst_r;
3051
break;
3052
case SLJIT_OVERFLOW:
3053
case SLJIT_NOT_OVERFLOW:
3054
if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) {
3055
src_r = OTHER_FLAG;
3056
break;
3057
}
3058
FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RS1(OTHER_FLAG) | IMM_I(1)));
3059
src_r = dst_r;
3060
invert ^= 0x1;
3061
break;
3062
case SLJIT_ATOMIC_STORED:
3063
case SLJIT_ATOMIC_NOT_STORED:
3064
invert ^= 0x1;
3065
break;
3066
}
3067
} else {
3068
invert = 0;
3069
src_r = OTHER_FLAG;
3070
3071
switch (type) {
3072
case SLJIT_F_NOT_EQUAL:
3073
case SLJIT_UNORDERED_OR_NOT_EQUAL:
3074
case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */
3075
case SLJIT_F_GREATER_EQUAL:
3076
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
3077
case SLJIT_UNORDERED_OR_LESS_EQUAL:
3078
case SLJIT_F_GREATER:
3079
case SLJIT_UNORDERED_OR_GREATER:
3080
case SLJIT_UNORDERED_OR_LESS:
3081
case SLJIT_UNORDERED:
3082
invert = 1;
3083
break;
3084
}
3085
}
3086
3087
if (invert) {
3088
FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RS1(src_r) | IMM_I(1)));
3089
src_r = dst_r;
3090
}
3091
3092
if (op < SLJIT_ADD) {
3093
if (dst & SLJIT_MEM)
3094
return emit_op_mem(compiler, mem_type, src_r, dst, dstw);
3095
3096
if (src_r != dst_r)
3097
return push_inst(compiler, ADDI | RD(dst_r) | RS1(src_r) | IMM_I(0));
3098
return SLJIT_SUCCESS;
3099
}
3100
3101
mem_type |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE;
3102
3103
if (dst & SLJIT_MEM)
3104
return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, src_r, 0);
3105
return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0);
3106
}
3107
3108
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3109
sljit_s32 dst_reg,
3110
sljit_s32 src1, sljit_sw src1w,
3111
sljit_s32 src2_reg)
3112
{
3113
sljit_ins *ptr;
3114
sljit_uw size;
3115
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
3116
sljit_ins word = (sljit_ins)(type & SLJIT_32) >> 5;
3117
sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
3118
#else /* !SLJIT_CONFIG_RISCV_64 */
3119
sljit_s32 inp_flags = WORD_DATA | LOAD_DATA;
3120
#endif /* SLJIT_CONFIG_RISCV_64 */
3121
3122
SLJIT_ASSERT(WORD == 0 || WORD == 0x8);
3123
3124
CHECK_ERROR();
3125
CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3126
3127
ADJUST_LOCAL_OFFSET(src1, src1w);
3128
3129
if (dst_reg != src2_reg) {
3130
if (dst_reg == src1) {
3131
src1 = src2_reg;
3132
src1w = 0;
3133
type ^= 0x1;
3134
} else {
3135
if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
3136
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(dst_reg) | IMM_I(0)));
3137
3138
if ((src1 & REG_MASK) == dst_reg)
3139
src1 = (src1 & ~REG_MASK) | TMP_REG1;
3140
3141
if (OFFS_REG(src1) == dst_reg)
3142
src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
3143
}
3144
3145
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst_reg) | RS1(src2_reg) | IMM_I(0)));
3146
}
3147
}
3148
3149
size = compiler->size;
3150
3151
ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
3152
FAIL_IF(!ptr);
3153
compiler->size++;
3154
3155
if (src1 & SLJIT_MEM) {
3156
FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w));
3157
} else if (src1 == SLJIT_IMM) {
3158
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
3159
if (word)
3160
src1w = (sljit_s32)src1w;
3161
#endif /* SLJIT_CONFIG_RISCV_64 */
3162
FAIL_IF(load_immediate(compiler, dst_reg, src1w, TMP_REG1));
3163
} else
3164
FAIL_IF(push_inst(compiler, ADDI | WORD | RD(dst_reg) | RS1(src1) | IMM_I(0)));
3165
3166
size = compiler->size - size;
3167
*ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((size & 0x7) << 9) | (sljit_ins)((size >> 3) << 25);
3168
return SLJIT_SUCCESS;
3169
}
3170
3171
#undef WORD
3172
3173
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3174
sljit_s32 dst_freg,
3175
sljit_s32 src1, sljit_sw src1w,
3176
sljit_s32 src2_freg)
3177
{
3178
sljit_ins *ptr;
3179
sljit_uw size;
3180
3181
CHECK_ERROR();
3182
CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3183
3184
ADJUST_LOCAL_OFFSET(src1, src1w);
3185
3186
if (dst_freg != src2_freg) {
3187
if (dst_freg == src1) {
3188
src1 = src2_freg;
3189
src1w = 0;
3190
type ^= 0x1;
3191
} else
3192
FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(type) | FRD(dst_freg) | FRS1(src2_freg) | FRS2(src2_freg)));
3193
}
3194
3195
size = compiler->size;
3196
3197
ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
3198
FAIL_IF(!ptr);
3199
compiler->size++;
3200
3201
if (src1 & SLJIT_MEM)
3202
FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, dst_freg, src1, src1w));
3203
else
3204
FAIL_IF(push_inst(compiler, FSGNJ_S | FMT(type) | FRD(dst_freg) | FRS1(src1) | FRS2(src1)));
3205
3206
size = compiler->size - size;
3207
*ptr = get_jump_instruction(type & ~SLJIT_32) | (sljit_ins)((size & 0x7) << 9) | (sljit_ins)((size >> 3) << 25);
3208
return SLJIT_SUCCESS;
3209
}
3210
3211
#undef FLOAT_DATA
3212
#undef FMT
3213
3214
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3215
sljit_s32 reg,
3216
sljit_s32 mem, sljit_sw memw)
3217
{
3218
sljit_s32 flags;
3219
3220
CHECK_ERROR();
3221
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3222
3223
if (!(reg & REG_PAIR_MASK))
3224
return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3225
3226
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3227
memw &= 0x3;
3228
3229
if (SLJIT_UNLIKELY(memw != 0)) {
3230
FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG1) | RS1(OFFS_REG(mem)) | IMM_I(memw)));
3231
FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(mem & REG_MASK)));
3232
} else
3233
FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(mem & REG_MASK) | RS2(OFFS_REG(mem))));
3234
3235
mem = TMP_REG1;
3236
memw = 0;
3237
} else if (memw > SIMM_MAX - SSIZE_OF(sw) || memw < SIMM_MIN) {
3238
if (((memw + 0x800) & 0xfff) <= 0xfff - SSIZE_OF(sw)) {
3239
FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw), TMP_REG3));
3240
memw &= 0xfff;
3241
} else {
3242
FAIL_IF(load_immediate(compiler, TMP_REG1, memw, TMP_REG3));
3243
memw = 0;
3244
}
3245
3246
if (mem & REG_MASK)
3247
FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(mem & REG_MASK)));
3248
3249
mem = TMP_REG1;
3250
} else {
3251
mem &= REG_MASK;
3252
memw &= 0xfff;
3253
}
3254
3255
SLJIT_ASSERT((memw >= 0 && memw <= SIMM_MAX - SSIZE_OF(sw)) || (memw > SIMM_MAX && memw <= 0xfff));
3256
3257
if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) {
3258
FAIL_IF(push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_SECOND(reg), mem, (memw + SSIZE_OF(sw)) & 0xfff));
3259
return push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_FIRST(reg), mem, memw);
3260
}
3261
3262
flags = WORD_DATA | (!(type & SLJIT_MEM_STORE) ? LOAD_DATA : 0);
3263
3264
FAIL_IF(push_mem_inst(compiler, flags, REG_PAIR_FIRST(reg), mem, memw));
3265
return push_mem_inst(compiler, flags, REG_PAIR_SECOND(reg), mem, (memw + SSIZE_OF(sw)) & 0xfff);
3266
}
3267
3268
#undef TO_ARGW_HI
3269
3270
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
3271
sljit_s32 dst_reg,
3272
sljit_s32 mem_reg)
3273
{
3274
sljit_ins ins;
3275
3276
CHECK_ERROR();
3277
CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
3278
3279
if (op & SLJIT_ATOMIC_USE_CAS)
3280
return SLJIT_ERR_UNSUPPORTED;
3281
3282
switch (GET_OPCODE(op)) {
3283
case SLJIT_MOV:
3284
case SLJIT_MOV_P:
3285
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
3286
ins = LR | (3 << 12);
3287
break;
3288
#endif /* SLJIT_CONFIG_RISCV_64 */
3289
case SLJIT_MOV_S32:
3290
case SLJIT_MOV32:
3291
ins = LR | (2 << 12);
3292
break;
3293
3294
default:
3295
return SLJIT_ERR_UNSUPPORTED;
3296
}
3297
3298
if (op & SLJIT_ATOMIC_TEST)
3299
return SLJIT_SUCCESS;
3300
3301
return push_inst(compiler, ins | RD(dst_reg) | RS1(mem_reg));
3302
}
3303
3304
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
3305
sljit_s32 src_reg,
3306
sljit_s32 mem_reg,
3307
sljit_s32 temp_reg)
3308
{
3309
sljit_ins ins;
3310
3311
/* temp_reg == mem_reg is undefined so use another temp register */
3312
SLJIT_UNUSED_ARG(temp_reg);
3313
3314
CHECK_ERROR();
3315
CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
3316
3317
if (op & SLJIT_ATOMIC_USE_CAS)
3318
return SLJIT_ERR_UNSUPPORTED;
3319
3320
switch (GET_OPCODE(op)) {
3321
case SLJIT_MOV:
3322
case SLJIT_MOV_P:
3323
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
3324
ins = SC | (3 << 12);
3325
break;
3326
#endif /* SLJIT_CONFIG_RISCV_64 */
3327
case SLJIT_MOV_S32:
3328
case SLJIT_MOV32:
3329
ins = SC | (2 << 12);
3330
break;
3331
3332
default:
3333
return SLJIT_ERR_UNSUPPORTED;
3334
}
3335
3336
if (op & SLJIT_ATOMIC_TEST)
3337
return SLJIT_SUCCESS;
3338
3339
return push_inst(compiler, ins | RD(OTHER_FLAG) | RS1(mem_reg) | RS2(src_reg));
3340
}
3341
3342
/*
3343
SEW = Selected element width
3344
LMUL = Vector register group multiplier
3345
3346
VLMUL values (in binary):
3347
100 : reserved
3348
101 : 1/8
3349
110 : 1/4
3350
111 : 1/2
3351
000 : 1
3352
001 : 2
3353
010 : 4
3354
011 : 8
3355
*/
3356
3357
static SLJIT_INLINE sljit_s32 sljit_emit_vsetivli(struct sljit_compiler *compiler, sljit_s32 type, sljit_ins vlmul)
3358
{
3359
sljit_ins elem_size = (sljit_ins)SLJIT_SIMD_GET_ELEM_SIZE(type);
3360
sljit_ins avl = (sljit_ins)1 << (SLJIT_SIMD_GET_REG_SIZE(type) - elem_size);
3361
3362
return push_inst(compiler, VSETIVLI | RD(TMP_REG1) | (elem_size << 23) | (vlmul << 20) | (avl << 15));
3363
}
3364
3365
static SLJIT_INLINE sljit_s32 sljit_emit_vsetivli_size(struct sljit_compiler *compiler, sljit_s32 reg_size, sljit_s32 elem_size)
3366
{
3367
sljit_ins avl = (sljit_ins)1 << (reg_size - elem_size);
3368
return push_inst(compiler, VSETIVLI | RD(TMP_REG1) | ((sljit_ins)elem_size << 23) | (avl << 15));
3369
}
3370
3371
static sljit_s32 sljit_emit_vmem(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 elem_size, sljit_s32 mem, sljit_sw memw)
3372
{
3373
sljit_s32 base = mem & REG_MASK;
3374
3375
if (elem_size > 0)
3376
ins |= (1 << 14) | ((sljit_ins)elem_size << 12);
3377
3378
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3379
memw &= 0x3;
3380
3381
if (SLJIT_UNLIKELY(memw)) {
3382
FAIL_IF(push_inst(compiler, SLLI | RD(TMP_REG1) | RS1(OFFS_REG(mem)) | IMM_I(memw)));
3383
}
3384
3385
FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(base) | RS2(!memw ? OFFS_REG(mem) : TMP_REG1)));
3386
return push_inst(compiler, ins | RS1(TMP_REG1));
3387
}
3388
3389
if (memw == 0)
3390
return push_inst(compiler, ins | RS1(base));
3391
3392
if (memw <= SIMM_MAX && memw >= SIMM_MIN) {
3393
FAIL_IF(push_inst(compiler, ADDI | RD(TMP_REG1) | RS1(base) | IMM_I(memw)));
3394
return push_inst(compiler, ins | RS1(TMP_REG1));
3395
}
3396
3397
FAIL_IF(load_immediate(compiler, TMP_REG1, memw, TMP_REG3));
3398
3399
if (base != 0)
3400
FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RS1(TMP_REG1) | RS2(base)));
3401
3402
return push_inst(compiler, ins | RS1(TMP_REG1));
3403
}
3404
3405
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3406
sljit_s32 vreg,
3407
sljit_s32 srcdst, sljit_sw srcdstw)
3408
{
3409
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3410
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3411
sljit_ins ins;
3412
3413
CHECK_ERROR();
3414
CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));
3415
3416
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3417
3418
if (reg_size != 4)
3419
return SLJIT_ERR_UNSUPPORTED;
3420
3421
if (type & SLJIT_SIMD_TEST)
3422
return SLJIT_SUCCESS;
3423
3424
if (elem_size > 3)
3425
elem_size = 3;
3426
3427
FAIL_IF(sljit_emit_vsetivli_size(compiler, reg_size, elem_size));
3428
3429
if (srcdst & SLJIT_MEM) {
3430
ins = (type & SLJIT_SIMD_STORE) ? VS : VL;
3431
return sljit_emit_vmem(compiler, ins | VRD(vreg), elem_size, srcdst, srcdstw);
3432
}
3433
3434
if (type & SLJIT_SIMD_STORE)
3435
ins = VRD(srcdst) | VRS1(vreg);
3436
else
3437
ins = VRD(vreg) | VRS1(srcdst);
3438
3439
return push_inst(compiler, VMV_VV | ins);
3440
}
3441
3442
static sljit_s32 sljit_simd_get_mem_flags(sljit_s32 elem_size)
3443
{
3444
switch (elem_size) {
3445
case 0:
3446
return BYTE_DATA;
3447
case 1:
3448
return HALF_DATA;
3449
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
3450
case 2:
3451
return INT_DATA;
3452
#endif /* SLJIT_CONFIG_RISCV_64 */
3453
default:
3454
return WORD_DATA;
3455
}
3456
}
3457
3458
static sljit_sw sljit_simd_get_imm(sljit_s32 elem_size, sljit_sw imm)
3459
{
3460
switch (elem_size) {
3461
case 0:
3462
return (sljit_s8)imm;
3463
case 1:
3464
return (sljit_s16)imm;
3465
#if (defined SLJIT_CONFIG_RISCV_64 && SLJIT_CONFIG_RISCV_64)
3466
case 2:
3467
return (sljit_s32)imm;
3468
#endif /* SLJIT_CONFIG_RISCV_64 */
3469
default:
3470
return imm;
3471
}
3472
}
3473
3474
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3475
sljit_s32 vreg,
3476
sljit_s32 src, sljit_sw srcw)
3477
{
3478
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3479
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3480
sljit_s32 flags;
3481
3482
CHECK_ERROR();
3483
CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));
3484
3485
ADJUST_LOCAL_OFFSET(src, srcw);
3486
3487
if (reg_size != 4)
3488
return SLJIT_ERR_UNSUPPORTED;
3489
3490
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
3491
if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2)
3492
return SLJIT_ERR_UNSUPPORTED;
3493
#else /* !SLJIT_CONFIG_RISCV_32 */
3494
if (((type & SLJIT_SIMD_FLOAT) && elem_size < 2) || elem_size > 3)
3495
return SLJIT_ERR_UNSUPPORTED;
3496
#endif /* SLJIT_CONFIG_RISCV_32 */
3497
3498
if (type & SLJIT_SIMD_TEST)
3499
return SLJIT_SUCCESS;
3500
3501
FAIL_IF(sljit_emit_vsetivli(compiler, type, 0));
3502
3503
if (type & SLJIT_SIMD_FLOAT) {
3504
if (src == SLJIT_IMM)
3505
return push_inst(compiler, VMV_VI | VRD(vreg) | ((sljit_ins)(srcw & 0x1f) << 15));
3506
3507
if (src & SLJIT_MEM) {
3508
flags = (elem_size == 2) ? SINGLE_DATA : DOUBLE_DATA;
3509
FAIL_IF(emit_op_mem(compiler, flags | LOAD_DATA, TMP_FREG1, src, srcw));
3510
src = TMP_FREG1;
3511
}
3512
3513
return push_inst(compiler, VFMV_VF | VRD(vreg) | FRS1(src));
3514
}
3515
3516
if (src == SLJIT_IMM) {
3517
srcw = sljit_simd_get_imm(elem_size, srcw);
3518
3519
if (srcw >= -0x10 && srcw <= 0xf)
3520
return push_inst(compiler, VMV_VI | VRD(vreg) | ((sljit_ins)(srcw & 0x1f) << 15));
3521
3522
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw, TMP_REG3));
3523
src = TMP_REG1;
3524
} else if (src & SLJIT_MEM) {
3525
FAIL_IF(emit_op_mem(compiler, sljit_simd_get_mem_flags(elem_size) | LOAD_DATA, TMP_REG1, src, srcw));
3526
src = TMP_REG1;
3527
}
3528
3529
return push_inst(compiler, VMV_VX | VRD(vreg) | RS1(src));
3530
}
3531
3532
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
3533
sljit_s32 vreg, sljit_s32 lane_index,
3534
sljit_s32 srcdst, sljit_sw srcdstw)
3535
{
3536
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3537
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3538
sljit_s32 flags;
3539
3540
CHECK_ERROR();
3541
CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));
3542
3543
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3544
3545
if (reg_size != 4)
3546
return SLJIT_ERR_UNSUPPORTED;
3547
3548
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
3549
if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2)
3550
return SLJIT_ERR_UNSUPPORTED;
3551
#else /* !SLJIT_CONFIG_RISCV_32 */
3552
if (((type & SLJIT_SIMD_FLOAT) && elem_size < 2) || elem_size > 3)
3553
return SLJIT_ERR_UNSUPPORTED;
3554
#endif /* SLJIT_CONFIG_RISCV_32 */
3555
3556
if (type & SLJIT_SIMD_TEST)
3557
return SLJIT_SUCCESS;
3558
3559
if (type & SLJIT_SIMD_STORE) {
3560
FAIL_IF(push_inst(compiler, VSETIVLI | RD(TMP_REG1) | ((sljit_ins)elem_size << 23) | (1 << 15)));
3561
3562
if (lane_index > 0) {
3563
FAIL_IF(push_inst(compiler, VSLIDEDOWN_VI | VRD(TMP_VREG1) | ((sljit_ins)lane_index << 15) | VRS2(vreg)));
3564
vreg = TMP_VREG1;
3565
}
3566
3567
if (srcdst & SLJIT_MEM)
3568
return sljit_emit_vmem(compiler, VS | VRD(vreg), elem_size, srcdst, srcdstw);
3569
3570
if (type & SLJIT_SIMD_FLOAT)
3571
return push_inst(compiler, VFMV_FS | FRD(srcdst) | VRS2(vreg));
3572
3573
FAIL_IF(push_inst(compiler, VMV_XS | RD(srcdst) | VRS2(vreg)));
3574
3575
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
3576
if ((type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 2)
3577
return SLJIT_SUCCESS;
3578
#else /* !SLJIT_CONFIG_RISCV_32 */
3579
if ((type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3 || (elem_size == 2 && (type & SLJIT_32)))
3580
return SLJIT_SUCCESS;
3581
#endif /* SLJIT_CONFIG_RISCV_32 */
3582
3583
if (elem_size == 0)
3584
return push_inst(compiler, ANDI | RD(srcdst) | RS1(srcdst) | IMM_I(0xff));
3585
3586
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
3587
flags = 16;
3588
#else /* !SLJIT_CONFIG_RISCV_32 */
3589
flags = (elem_size == 1) ? 48 : 32;
3590
#endif /* SLJIT_CONFIG_RISCV_32 */
3591
3592
FAIL_IF(push_inst(compiler, SLLI | RD(srcdst) | RS1(srcdst) | IMM_I(flags)));
3593
return push_inst(compiler, SRLI | RD(srcdst) | RS1(srcdst) | IMM_I(flags));
3594
}
3595
3596
if (type & SLJIT_SIMD_LANE_ZERO) {
3597
FAIL_IF(sljit_emit_vsetivli(compiler, type, 0));
3598
FAIL_IF(push_inst(compiler, VMV_VI | VRD(vreg)));
3599
}
3600
3601
if (srcdst & SLJIT_MEM) {
3602
FAIL_IF(push_inst(compiler, VSETIVLI | RD(TMP_REG1) | ((sljit_ins)elem_size << 23) | (1 << 15)));
3603
FAIL_IF(sljit_emit_vmem(compiler, VL | VRD(lane_index > 0 ? TMP_VREG1 : vreg), elem_size, srcdst, srcdstw));
3604
3605
if (lane_index == 0)
3606
return SLJIT_SUCCESS;
3607
3608
FAIL_IF(push_inst(compiler, VSETIVLI | RD(TMP_REG1) | ((sljit_ins)elem_size << 23) | ((sljit_ins)(lane_index + 1) << 15)));
3609
return push_inst(compiler, VSLIDEUP_VI | VRD(vreg) | ((sljit_ins)lane_index << 15) | VRS2(TMP_VREG1));
3610
}
3611
3612
if (!(type & SLJIT_SIMD_LANE_ZERO) || lane_index > 0)
3613
FAIL_IF(push_inst(compiler, VSETIVLI | RD(TMP_REG1) | ((sljit_ins)elem_size << 23) | ((sljit_ins)(lane_index + 1) << 15)));
3614
3615
if (type & SLJIT_SIMD_FLOAT) {
3616
FAIL_IF(push_inst(compiler, VFMV_SF | VRD(lane_index > 0 ? TMP_VREG1 : vreg) | FRS1(srcdst)));
3617
3618
if (lane_index == 0)
3619
return SLJIT_SUCCESS;
3620
3621
return push_inst(compiler, VSLIDEUP_VI | VRD(vreg) | ((sljit_ins)lane_index << 15) | VRS2(TMP_VREG1));
3622
}
3623
3624
if (srcdst == SLJIT_IMM) {
3625
srcdstw = sljit_simd_get_imm(elem_size, srcdstw);
3626
FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw, TMP_REG3));
3627
srcdst = TMP_REG1;
3628
}
3629
3630
FAIL_IF(push_inst(compiler, VMV_SX | VRD(lane_index > 0 ? TMP_VREG1 : vreg) | RS1(srcdst)));
3631
3632
if (lane_index == 0)
3633
return SLJIT_SUCCESS;
3634
3635
return push_inst(compiler, VSLIDEUP_VI | VRD(vreg) | ((sljit_ins)lane_index << 15) | VRS2(TMP_VREG1));
3636
}
3637
3638
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3639
sljit_s32 vreg,
3640
sljit_s32 src, sljit_s32 src_lane_index)
3641
{
3642
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3643
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3644
3645
CHECK_ERROR();
3646
CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));
3647
3648
if (reg_size != 4)
3649
return SLJIT_ERR_UNSUPPORTED;
3650
3651
if (((type & SLJIT_SIMD_FLOAT) && elem_size < 2) || elem_size > 3)
3652
return SLJIT_ERR_UNSUPPORTED;
3653
3654
if (type & SLJIT_SIMD_TEST)
3655
return SLJIT_SUCCESS;
3656
3657
FAIL_IF(sljit_emit_vsetivli(compiler, type, 0));
3658
3659
FAIL_IF(push_inst(compiler, VRGATHER_VI | VRD(vreg != src ? vreg : TMP_VREG1) | ((sljit_ins)src_lane_index << 15) | VRS2(src)));
3660
if (vreg == src)
3661
return push_inst(compiler, VMV_VV | VRD(vreg) | VRS1(TMP_VREG1));
3662
return SLJIT_SUCCESS;
3663
}
3664
3665
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
3666
sljit_s32 vreg,
3667
sljit_s32 src, sljit_sw srcw)
3668
{
3669
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3670
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3671
sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3672
sljit_ins ins;
3673
3674
CHECK_ERROR();
3675
CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));
3676
3677
ADJUST_LOCAL_OFFSET(src, srcw);
3678
3679
if (reg_size != 4)
3680
return SLJIT_ERR_UNSUPPORTED;
3681
3682
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
3683
if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2)
3684
return SLJIT_ERR_UNSUPPORTED;
3685
#else /* !SLJIT_CONFIG_RISCV_32 */
3686
if (((type & SLJIT_SIMD_FLOAT) && elem_size < 2) || elem_size > 3)
3687
return SLJIT_ERR_UNSUPPORTED;
3688
#endif /* SLJIT_CONFIG_RISCV_32 */
3689
3690
if (type & SLJIT_SIMD_TEST)
3691
return SLJIT_SUCCESS;
3692
3693
if ((src & SLJIT_MEM) || vreg == src) {
3694
ins = (sljit_ins)1 << (reg_size - elem2_size);
3695
FAIL_IF(push_inst(compiler, VSETIVLI | RD(TMP_REG1) | ((sljit_ins)elem_size << 23) | (ins << 15)));
3696
3697
if (src & SLJIT_MEM)
3698
FAIL_IF(sljit_emit_vmem(compiler, VL | VRD(TMP_VREG1), elem_size, src, srcw));
3699
else
3700
FAIL_IF(push_inst(compiler, VMV_VV | VRD(TMP_VREG1) | VRS1(src)));
3701
3702
src = TMP_VREG1;
3703
}
3704
3705
if (type & SLJIT_SIMD_FLOAT) {
3706
FAIL_IF(sljit_emit_vsetivli(compiler, type, 0x7));
3707
return push_inst(compiler, VFWCVT_FFV | VRD(vreg) | VRS2(src));
3708
}
3709
3710
ins = (sljit_ins)1 << (reg_size - elem2_size);
3711
FAIL_IF(push_inst(compiler, VSETIVLI | RD(TMP_REG1) | ((sljit_ins)elem2_size << 23) | (ins << 15)));
3712
3713
switch (elem2_size - elem_size) {
3714
case 1:
3715
ins = VZEXT_VF2;
3716
break;
3717
case 2:
3718
ins = VZEXT_VF4;
3719
break;
3720
default:
3721
ins = VZEXT_VF8;
3722
break;
3723
}
3724
3725
if (type & SLJIT_SIMD_EXTEND_SIGNED)
3726
ins |= 1 << 15;
3727
3728
return push_inst(compiler, ins | VRD(vreg) | VRS2(src));
3729
}
3730
3731
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
3732
sljit_s32 vreg,
3733
sljit_s32 dst, sljit_sw dstw)
3734
{
3735
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3736
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3737
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3738
3739
CHECK_ERROR();
3740
CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));
3741
3742
ADJUST_LOCAL_OFFSET(dst, dstw);
3743
3744
if (reg_size != 4)
3745
return SLJIT_ERR_UNSUPPORTED;
3746
3747
if (((type & SLJIT_SIMD_FLOAT) && elem_size < 2) || elem_size > 3)
3748
return SLJIT_ERR_UNSUPPORTED;
3749
3750
FAIL_IF(sljit_emit_vsetivli(compiler, type, 0));
3751
FAIL_IF(push_inst(compiler, VMV_VI | VRD(TMP_VREG1) | (0x0 << 15)));
3752
FAIL_IF(push_inst(compiler, VMSLE_VI | VRD(TMP_VREG1) | (0x0 << 15) | VRS2(vreg)));
3753
3754
FAIL_IF(sljit_emit_vsetivli_size(compiler, 2, 2));
3755
FAIL_IF(push_inst(compiler, VMV_XS | RD(dst_r) | VRS2(TMP_VREG1)));
3756
3757
if (dst & SLJIT_MEM)
3758
return emit_op_mem(compiler, (type & SLJIT_32) ? INT_DATA : WORD_DATA, dst_r, dst, dstw);
3759
return SLJIT_SUCCESS;
3760
}
3761
3762
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
3763
sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)
3764
{
3765
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3766
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3767
sljit_ins ins = 0;
3768
3769
CHECK_ERROR();
3770
CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));
3771
3772
ADJUST_LOCAL_OFFSET(src2, src2w);
3773
3774
if (reg_size != 4)
3775
return SLJIT_ERR_UNSUPPORTED;
3776
3777
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3778
return SLJIT_ERR_UNSUPPORTED;
3779
3780
if (type & SLJIT_SIMD_TEST)
3781
return SLJIT_SUCCESS;
3782
3783
switch (SLJIT_SIMD_GET_OPCODE(type)) {
3784
case SLJIT_SIMD_OP2_AND:
3785
ins = VAND_VV;
3786
break;
3787
case SLJIT_SIMD_OP2_OR:
3788
ins = VOR_VV;
3789
break;
3790
case SLJIT_SIMD_OP2_XOR:
3791
ins = VXOR_VV;
3792
break;
3793
case SLJIT_SIMD_OP2_SHUFFLE:
3794
ins = VRGATHER_VV;
3795
elem_size = 0;
3796
break;
3797
}
3798
3799
if (elem_size > 3)
3800
elem_size = 3;
3801
3802
FAIL_IF(sljit_emit_vsetivli_size(compiler, reg_size, elem_size));
3803
3804
if (src2 & SLJIT_MEM) {
3805
FAIL_IF(sljit_emit_vmem(compiler, VL | VRD(TMP_VREG1), elem_size, src2, src2w));
3806
src2 = TMP_VREG1;
3807
}
3808
3809
if (SLJIT_SIMD_GET_OPCODE(type) != SLJIT_SIMD_OP2_SHUFFLE)
3810
return push_inst(compiler, ins | VRD(dst_vreg) | VRS1(src1_vreg) | VRS2(src2));
3811
3812
if (dst_vreg == src2) {
3813
FAIL_IF(push_inst(compiler, VMV_VV | VRD(TMP_VREG1) | VRS1(src2)));
3814
src2 = TMP_VREG1;
3815
}
3816
3817
if (dst_vreg == src1_vreg) {
3818
FAIL_IF(push_inst(compiler, VMV_VV | VRD(TMP_VREG2) | VRS1(src1_vreg)));
3819
src1_vreg = TMP_VREG2;
3820
}
3821
3822
return push_inst(compiler, ins | VRD(dst_vreg) | VRS1(src2) | VRS2(src1_vreg));
3823
}
3824
3825
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3826
{
3827
struct sljit_const *const_;
3828
sljit_s32 dst_r;
3829
3830
CHECK_ERROR_PTR();
3831
CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3832
ADJUST_LOCAL_OFFSET(dst, dstw);
3833
3834
const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
3835
PTR_FAIL_IF(!const_);
3836
set_const(const_, compiler);
3837
3838
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3839
PTR_FAIL_IF(emit_const(compiler, dst_r, init_value, ADDI | RD(dst_r)));
3840
3841
if (dst & SLJIT_MEM)
3842
PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
3843
3844
return const_;
3845
}
3846
3847
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3848
{
3849
struct sljit_jump *jump;
3850
sljit_s32 dst_r;
3851
3852
CHECK_ERROR_PTR();
3853
CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
3854
ADJUST_LOCAL_OFFSET(dst, dstw);
3855
3856
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3857
PTR_FAIL_IF(!jump);
3858
set_mov_addr(jump, compiler, 0);
3859
3860
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3861
PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r));
3862
#if (defined SLJIT_CONFIG_RISCV_32 && SLJIT_CONFIG_RISCV_32)
3863
compiler->size += 1;
3864
#else /* !SLJIT_CONFIG_RISCV_32 */
3865
compiler->size += 5;
3866
#endif /* SLJIT_CONFIG_RISCV_32 */
3867
3868
if (dst & SLJIT_MEM)
3869
PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
3870
3871
return jump;
3872
}
3873
3874
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3875
{
3876
sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
3877
}
3878
3879