Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeARM_T2_32.c
22541 views
1
/*
2
* Stack-less Just-In-Time compiler
3
*
4
* Copyright Zoltan Herczeg ([email protected]). All rights reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without modification, are
7
* permitted provided that the following conditions are met:
8
*
9
* 1. Redistributions of source code must retain the above copyright notice, this list of
10
* conditions and the following disclaimer.
11
*
12
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
13
* of conditions and the following disclaimer in the documentation and/or other materials
14
* provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
*/
26
27
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28
{
29
#ifdef __SOFTFP__
30
return "ARM-Thumb2" SLJIT_CPUINFO " ABI:softfp";
31
#else
32
return "ARM-Thumb2" SLJIT_CPUINFO " ABI:hardfp";
33
#endif
34
}
35
36
/* Length of an instruction word. */
37
typedef sljit_u32 sljit_ins;
38
39
/* Last register + 1. */
40
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
41
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
42
#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
43
44
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
45
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
46
47
/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
48
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
49
0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
50
};
51
52
static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
53
0,
54
0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
55
7, 6,
56
0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
57
7, 6
58
};
59
60
static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
61
0,
62
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63
0, 0,
64
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65
1, 1
66
};
67
68
#define COPY_BITS(src, from, to, bits) \
69
((from >= to ? ((sljit_ins)(src) >> (from - to)) : ((sljit_ins)(src) << (to - from))) & (((1 << bits) - 1) << to))
70
71
#define NEGATE(uimm) ((sljit_uw)-(sljit_sw)(uimm))
72
73
/* Thumb16 encodings. */
74
#define RD3(rd) ((sljit_ins)reg_map[rd])
75
#define RN3(rn) ((sljit_ins)reg_map[rn] << 3)
76
#define RM3(rm) ((sljit_ins)reg_map[rm] << 6)
77
#define RDN3(rdn) ((sljit_ins)reg_map[rdn] << 8)
78
#define IMM3(imm) ((sljit_ins)imm << 6)
79
#define IMM8(imm) ((sljit_ins)imm)
80
81
/* Thumb16 helpers. */
82
#define SET_REGS44(rd, rn) \
83
(((sljit_ins)reg_map[rn] << 3) | ((sljit_ins)reg_map[rd] & 0x7) | (((sljit_ins)reg_map[rd] & 0x8) << 4))
84
#define IS_2_LO_REGS(reg1, reg2) \
85
(reg_map[reg1] <= 7 && reg_map[reg2] <= 7)
86
#define IS_3_LO_REGS(reg1, reg2, reg3) \
87
(reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7)
88
89
/* Thumb32 encodings. */
90
#define RM4(rm) ((sljit_ins)reg_map[rm])
91
#define RD4(rd) ((sljit_ins)reg_map[rd] << 8)
92
#define RT4(rt) ((sljit_ins)reg_map[rt] << 12)
93
#define RN4(rn) ((sljit_ins)reg_map[rn] << 16)
94
95
#define VM4(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
96
#define VD4(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
97
#define VN4(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
98
99
#define IMM5(imm) \
100
(COPY_BITS(imm, 2, 12, 3) | (((sljit_ins)imm & 0x3) << 6))
101
#define IMM12(imm) \
102
(COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | ((sljit_ins)imm & 0xff))
103
104
/* --------------------------------------------------------------------- */
105
/* Instruction forms */
106
/* --------------------------------------------------------------------- */
107
108
/* dot '.' changed to _
109
I immediate form (possibly followed by number of immediate bits). */
110
#define ADCI 0xf1400000
111
#define ADCS 0x4140
112
#define ADC_W 0xeb400000
113
#define ADD 0x4400
114
#define ADDS 0x1800
115
#define ADDSI3 0x1c00
116
#define ADDSI8 0x3000
117
#define ADDWI 0xf2000000
118
#define ADD_SP 0x4485
119
#define ADD_SP_I 0xb000
120
#define ADD_W 0xeb000000
121
#define ADD_WI 0xf1000000
122
#define ANDI 0xf0000000
123
#define ANDS 0x4000
124
#define AND_W 0xea000000
125
#define ASRS 0x4100
126
#define ASRSI 0x1000
127
#define ASR_W 0xfa40f000
128
#define ASR_WI 0xea4f0020
129
#define BCC 0xd000
130
#define BICI 0xf0200000
131
#define BKPT 0xbe00
132
#define BLX 0x4780
133
#define BX 0x4700
134
#define CLZ 0xfab0f080
135
#define CMNI_W 0xf1100f00
136
#define CMP 0x4280
137
#define CMPI 0x2800
138
#define CMPI_W 0xf1b00f00
139
#define CMP_X 0x4500
140
#define CMP_W 0xebb00f00
141
#define DMB_SY 0xf3bf8f5f
142
#define EORI 0xf0800000
143
#define EORS 0x4040
144
#define EOR_W 0xea800000
145
#define IT 0xbf00
146
#define LDR 0xf8d00000
147
#define LDR_SP 0x9800
148
#define LDRD 0xe9500000
149
#define LDREX 0xe8500f00
150
#define LDREXB 0xe8d00f4f
151
#define LDREXH 0xe8d00f5f
152
#define LDRI 0xf8500800
153
#define LSLS 0x4080
154
#define LSLSI 0x0000
155
#define LSL_W 0xfa00f000
156
#define LSL_WI 0xea4f0000
157
#define LSRS 0x40c0
158
#define LSRSI 0x0800
159
#define LSR_W 0xfa20f000
160
#define LSR_WI 0xea4f0010
161
#define MLA 0xfb000000
162
#define MOV 0x4600
163
#define MOVI 0x2000
164
#define MOVS 0x0000
165
#define MOVSI 0x2000
166
#define MOVT 0xf2c00000
167
#define MOVW 0xf2400000
168
#define MOV_W 0xea4f0000
169
#define MOV_WI 0xf04f0000
170
#define MUL 0xfb00f000
171
#define MVNS 0x43c0
172
#define MVN_W 0xea6f0000
173
#define MVN_WI 0xf06f0000
174
#define NOP 0xbf00
175
#define ORNI 0xf0600000
176
#define ORRI 0xf0400000
177
#define ORRS 0x4300
178
#define ORR_W 0xea400000
179
#define POP 0xbc00
180
#define POP_W 0xe8bd0000
181
#define PUSH 0xb400
182
#define PUSH_W 0xe92d0000
183
#define REV 0xba00
184
#define REV_W 0xfa90f080
185
#define REV16 0xba40
186
#define REV16_W 0xfa90f090
187
#define RBIT 0xfa90f0a0
188
#define RORS 0x41c0
189
#define ROR_W 0xfa60f000
190
#define ROR_WI 0xea4f0030
191
#define RSB_WI 0xf1c00000
192
#define RSBSI 0x4240
193
#define SBCI 0xf1600000
194
#define SBCS 0x4180
195
#define SBC_W 0xeb600000
196
#define SDIV 0xfb90f0f0
197
#define SMULL 0xfb800000
198
#define STR_SP 0x9000
199
#define STRD 0xe9400000
200
#define STREX 0xe8400000
201
#define STREXB 0xe8c00f40
202
#define STREXH 0xe8c00f50
203
#define SUBS 0x1a00
204
#define SUBSI3 0x1e00
205
#define SUBSI8 0x3800
206
#define SUB_W 0xeba00000
207
#define SUBWI 0xf2a00000
208
#define SUB_SP_I 0xb080
209
#define SUB_WI 0xf1a00000
210
#define SXTB 0xb240
211
#define SXTB_W 0xfa4ff080
212
#define SXTH 0xb200
213
#define SXTH_W 0xfa0ff080
214
#define TST 0x4200
215
#define TSTI 0xf0000f00
216
#define TST_W 0xea000f00
217
#define UDIV 0xfbb0f0f0
218
#define UMULL 0xfba00000
219
#define UXTB 0xb2c0
220
#define UXTB_W 0xfa5ff080
221
#define UXTH 0xb280
222
#define UXTH_W 0xfa1ff080
223
#define VABS_F32 0xeeb00ac0
224
#define VADD_F32 0xee300a00
225
#define VAND 0xef000110
226
#define VCMP_F32 0xeeb40a40
227
#define VCVT_F32_S32 0xeeb80ac0
228
#define VCVT_F32_U32 0xeeb80a40
229
#define VCVT_F64_F32 0xeeb70ac0
230
#define VCVT_S32_F32 0xeebd0ac0
231
#define VDIV_F32 0xee800a00
232
#define VDUP 0xee800b10
233
#define VDUP_s 0xffb00c00
234
#define VEOR 0xff000110
235
#define VLD1 0xf9200000
236
#define VLD1_r 0xf9a00c00
237
#define VLD1_s 0xf9a00000
238
#define VLDR_F32 0xed100a00
239
#define VMOV_F32 0xeeb00a40
240
#define VMOV 0xee000a10
241
#define VMOV2 0xec400a10
242
#define VMOV_i 0xef800010
243
#define VMOV_s 0xee000b10
244
#define VMOVN 0xffb20200
245
#define VMRS 0xeef1fa10
246
#define VMUL_F32 0xee200a00
247
#define VNEG_F32 0xeeb10a40
248
#define VORR 0xef200110
249
#define VPOP 0xecbd0b00
250
#define VPUSH 0xed2d0b00
251
#define VSHLL 0xef800a10
252
#define VSHR 0xef800010
253
#define VSRA 0xef800110
254
#define VST1 0xf9000000
255
#define VST1_s 0xf9800000
256
#define VSTR_F32 0xed000a00
257
#define VSUB_F32 0xee300a40
258
#define VTBL 0xffb00800
259
260
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
261
262
static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
263
{
264
if (compiler->scratches == -1)
265
return 0;
266
267
if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
268
fr -= SLJIT_F64_SECOND(0);
269
270
return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->real_fscratches))
271
|| (fr > (SLJIT_FS0 - compiler->real_fsaveds) && fr <= SLJIT_FS0)
272
|| (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
273
}
274
275
static sljit_s32 function_check_is_vreg(struct sljit_compiler *compiler, sljit_s32 vr, sljit_s32 type)
276
{
277
sljit_s32 vr_low = vr;
278
279
if (compiler->scratches == -1)
280
return 0;
281
282
if (SLJIT_SIMD_GET_REG_SIZE(type) == 4) {
283
vr += (vr & 0x1);
284
vr_low = vr - 1;
285
}
286
287
return (vr >= SLJIT_VR0 && vr < (SLJIT_VR0 + compiler->vscratches))
288
|| (vr_low > (SLJIT_VS0 - compiler->vsaveds) && vr_low <= SLJIT_VS0)
289
|| (vr >= SLJIT_TMP_VREGISTER_BASE && vr < (SLJIT_TMP_VREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS));
290
}
291
292
#endif /* SLJIT_ARGUMENT_CHECKS */
293
294
static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst)
295
{
296
sljit_u16 *ptr;
297
SLJIT_ASSERT(!(inst & 0xffff0000));
298
299
ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_u16));
300
FAIL_IF(!ptr);
301
*ptr = (sljit_u16)(inst);
302
compiler->size++;
303
return SLJIT_SUCCESS;
304
}
305
306
static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst)
307
{
308
sljit_u16 *ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_ins));
309
FAIL_IF(!ptr);
310
*ptr++ = (sljit_u16)(inst >> 16);
311
*ptr = (sljit_u16)(inst);
312
compiler->size += 2;
313
return SLJIT_SUCCESS;
314
}
315
316
static sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
317
{
318
FAIL_IF(push_inst32(compiler, MOVW | RD4(dst)
319
| COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
320
return push_inst32(compiler, MOVT | RD4(dst)
321
| COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
322
}
323
324
/* Dst must be in bits[11-8] */
325
static void set_imm32_const(sljit_u16 *inst, sljit_ins dst, sljit_uw new_imm)
326
{
327
inst[0] = (sljit_u16)((MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1));
328
inst[1] = (sljit_u16)(dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff));
329
inst[2] = (sljit_u16)((MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1));
330
inst[3] = (sljit_u16)(dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16));
331
}
332
333
static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm)
334
{
335
sljit_ins dst = inst[1] & 0x0f00;
336
SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00));
337
set_imm32_const(inst, dst, new_imm);
338
}
339
340
static SLJIT_INLINE sljit_u16* detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset)
341
{
342
sljit_sw diff;
343
sljit_uw target_addr;
344
sljit_uw jump_addr = (sljit_uw)code_ptr;
345
sljit_uw orig_addr = jump->addr;
346
SLJIT_UNUSED_ARG(executable_offset);
347
348
jump->addr = jump_addr;
349
if (jump->flags & SLJIT_REWRITABLE_JUMP)
350
goto exit;
351
352
if (jump->flags & JUMP_ADDR) {
353
/* Branch to ARM code is not optimized yet. */
354
if (!(jump->u.target & 0x1))
355
goto exit;
356
target_addr = jump->u.target;
357
} else {
358
SLJIT_ASSERT(jump->u.label != NULL);
359
target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
360
361
if (jump->u.label->size > orig_addr)
362
jump_addr = (sljit_uw)(code + orig_addr);
363
}
364
365
diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr + 4, executable_offset);
366
367
if (jump->flags & IS_COND) {
368
SLJIT_ASSERT(!(jump->flags & IS_BL));
369
/* Size of the prefix IT instruction. */
370
diff += SSIZE_OF(u16);
371
if (diff <= 0xff && diff >= -0x100) {
372
jump->flags |= PATCH_TYPE1;
373
jump->addr = (sljit_uw)(code_ptr - 1);
374
return code_ptr - 1;
375
}
376
if (diff <= 0xfffff && diff >= -0x100000) {
377
jump->flags |= PATCH_TYPE2;
378
jump->addr = (sljit_uw)(code_ptr - 1);
379
return code_ptr;
380
}
381
diff -= SSIZE_OF(u16);
382
} else if (jump->flags & IS_BL) {
383
/* Branch and link. */
384
if (diff <= 0xffffff && diff >= -0x1000000) {
385
jump->flags |= PATCH_TYPE5;
386
return code_ptr + 1;
387
}
388
goto exit;
389
} else if (diff <= 0x7ff && diff >= -0x800) {
390
jump->flags |= PATCH_TYPE3;
391
return code_ptr;
392
}
393
394
if (diff <= 0xffffff && diff >= -0x1000000) {
395
jump->flags |= PATCH_TYPE4;
396
return code_ptr + 1;
397
}
398
399
exit:
400
code_ptr[4] = code_ptr[0];
401
402
if (jump->flags & IS_COND) {
403
code_ptr[3] = code_ptr[-1];
404
jump->addr = (sljit_uw)(code_ptr - 1);
405
}
406
407
return code_ptr + 4;
408
}
409
410
static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset)
411
{
412
sljit_uw addr;
413
sljit_uw jump_addr = (sljit_uw)code_ptr;
414
sljit_sw diff;
415
SLJIT_UNUSED_ARG(executable_offset);
416
417
if (jump->flags & JUMP_ADDR)
418
addr = jump->u.target;
419
else {
420
addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
421
422
if (jump->u.label->size > jump->addr)
423
jump_addr = (sljit_uw)(code + jump->addr);
424
}
425
426
/* The pc+4 offset is represented by the 2 * SSIZE_OF(sljit_u16) below. */
427
diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);
428
429
/* Note: ADR with imm8 does not set the last bit (Thumb2 flag). */
430
431
if (diff <= 0xffd + 2 * SSIZE_OF(u16) && diff >= -0xfff + 2 * SSIZE_OF(u16)) {
432
jump->flags |= PATCH_TYPE6;
433
return 1;
434
}
435
436
return 3;
437
}
438
439
static SLJIT_INLINE void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset)
440
{
441
sljit_s32 type = (jump->flags >> 4) & 0xf;
442
sljit_u16 *jump_inst = (sljit_u16*)jump->addr;
443
sljit_sw diff;
444
sljit_ins ins;
445
446
diff = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr);
447
448
if ((jump->flags & (JUMP_MOV_ADDR | IS_ABS)) == (JUMP_MOV_ADDR | IS_ABS))
449
diff &= ~(sljit_sw)1;
450
451
if (SLJIT_UNLIKELY(type == 0)) {
452
ins = (jump->flags & JUMP_MOV_ADDR) ? *jump_inst : RDN3(TMP_REG1);
453
set_imm32_const((sljit_u16*)jump->addr, ins, (sljit_uw)diff);
454
return;
455
}
456
457
if (SLJIT_UNLIKELY(type == 6)) {
458
SLJIT_ASSERT(jump->flags & JUMP_MOV_ADDR);
459
diff -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_inst + 2, executable_offset) & ~(sljit_sw)0x3;
460
461
SLJIT_ASSERT(diff <= 0xfff && diff >= -0xfff);
462
463
ins = ADDWI >> 16;
464
if (diff <= 0) {
465
diff = -diff;
466
ins = SUBWI >> 16;
467
}
468
469
jump_inst[1] = (sljit_u16)(jump_inst[0] | COPY_BITS(diff, 8, 12, 3) | (diff & 0xff));
470
jump_inst[0] = (sljit_u16)(ins | 0xf | COPY_BITS(diff, 11, 10, 1));
471
return;
472
}
473
474
SLJIT_ASSERT((diff & 0x1) != 0 && !(jump->flags & JUMP_MOV_ADDR));
475
diff = (diff - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1;
476
477
switch (type) {
478
case 1:
479
/* Encoding T1 of 'B' instruction */
480
SLJIT_ASSERT(diff <= 0x7f && diff >= -0x80 && (jump->flags & IS_COND));
481
jump_inst[0] = (sljit_u16)(0xd000 | (jump->flags & 0xf00) | ((sljit_ins)diff & 0xff));
482
return;
483
case 2:
484
/* Encoding T3 of 'B' instruction */
485
SLJIT_ASSERT(diff <= 0x7ffff && diff >= -0x80000 && (jump->flags & IS_COND));
486
jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1));
487
jump_inst[1] = (sljit_u16)(0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | ((sljit_ins)diff & 0x7ff));
488
return;
489
case 3:
490
/* Encoding T2 of 'B' instruction */
491
SLJIT_ASSERT(diff <= 0x3ff && diff >= -0x400 && !(jump->flags & IS_COND));
492
jump_inst[0] = (sljit_u16)(0xe000 | (diff & 0x7ff));
493
return;
494
}
495
496
SLJIT_ASSERT(diff <= 0x7fffff && diff >= -0x800000);
497
498
/* Really complex instruction form for branches. Negate with sign bit. */
499
diff ^= ((diff >> 2) & 0x600000) ^ 0x600000;
500
501
jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(diff, 11, 0, 10) | COPY_BITS(diff, 23, 10, 1));
502
jump_inst[1] = (sljit_u16)((diff & 0x7ff) | COPY_BITS(diff, 22, 13, 1) | COPY_BITS(diff, 21, 11, 1));
503
504
SLJIT_ASSERT(type == 4 || type == 5);
505
506
/* The others have a common form. */
507
if (type == 4) /* Encoding T4 of 'B' instruction */
508
jump_inst[1] |= 0x9000;
509
else /* Encoding T1 of 'BL' instruction */
510
jump_inst[1] |= 0xd000;
511
}
512
513
static SLJIT_INLINE sljit_u16 *process_extended_label(sljit_u16 *code_ptr, struct sljit_extended_label *ext_label)
514
{
515
SLJIT_ASSERT(ext_label->label.u.index == SLJIT_LABEL_ALIGNED);
516
return (sljit_u16*)((sljit_uw)code_ptr & ~(ext_label->data));
517
}
518
519
static void reduce_code_size(struct sljit_compiler *compiler)
520
{
521
struct sljit_label *label;
522
struct sljit_jump *jump;
523
struct sljit_const *const_;
524
SLJIT_NEXT_DEFINE_TYPES;
525
sljit_uw total_size;
526
sljit_uw size_reduce = 0;
527
sljit_sw diff;
528
529
label = compiler->labels;
530
jump = compiler->jumps;
531
const_ = compiler->consts;
532
SLJIT_NEXT_INIT_TYPES();
533
534
while (1) {
535
SLJIT_GET_NEXT_MIN();
536
537
if (next_min_addr == SLJIT_MAX_ADDRESS)
538
break;
539
540
if (next_min_addr == next_label_size) {
541
label->size -= size_reduce;
542
543
label = label->next;
544
next_label_size = SLJIT_GET_NEXT_SIZE(label);
545
}
546
547
if (next_min_addr == next_const_addr) {
548
const_->addr -= size_reduce;
549
const_ = const_->next;
550
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
551
continue;
552
}
553
554
if (next_min_addr != next_jump_addr)
555
continue;
556
557
jump->addr -= size_reduce;
558
if (!(jump->flags & JUMP_MOV_ADDR)) {
559
total_size = JUMP_MAX_SIZE;
560
561
if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) {
562
/* Unit size: instruction. */
563
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2;
564
if (jump->u.label->size > jump->addr) {
565
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
566
diff -= (sljit_sw)size_reduce;
567
}
568
569
if (jump->flags & IS_COND) {
570
diff++;
571
572
if (diff <= (0xff / SSIZE_OF(u16)) && diff >= (-0x100 / SSIZE_OF(u16)))
573
total_size = 0;
574
else if (diff <= (0xfffff / SSIZE_OF(u16)) && diff >= (-0x100000 / SSIZE_OF(u16)))
575
total_size = 1;
576
diff--;
577
} else if (!(jump->flags & IS_BL) && diff <= (0x7ff / SSIZE_OF(u16)) && diff >= (-0x800 / SSIZE_OF(u16)))
578
total_size = 1;
579
580
if (total_size == JUMP_MAX_SIZE && diff <= (0xffffff / SSIZE_OF(u16)) && diff >= (-0x1000000 / SSIZE_OF(u16)))
581
total_size = 2;
582
}
583
584
size_reduce += JUMP_MAX_SIZE - total_size;
585
} else {
586
/* Real size minus 1. Unit size: instruction. */
587
total_size = 3;
588
589
if (!(jump->flags & JUMP_ADDR)) {
590
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
591
if (jump->u.label->size > jump->addr) {
592
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
593
diff -= (sljit_sw)size_reduce;
594
}
595
596
if (diff <= (0xffd / SSIZE_OF(u16)) && diff >= (-0xfff / SSIZE_OF(u16)))
597
total_size = 1;
598
}
599
600
size_reduce += 3 - total_size;
601
}
602
603
jump->flags |= total_size << JUMP_SIZE_SHIFT;
604
jump = jump->next;
605
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
606
}
607
608
compiler->size -= size_reduce;
609
}
610
611
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
612
{
613
struct sljit_memory_fragment *buf;
614
sljit_u16 *code;
615
sljit_u16 *code_ptr;
616
sljit_u16 *buf_ptr;
617
sljit_u16 *buf_end;
618
sljit_uw half_count;
619
SLJIT_NEXT_DEFINE_TYPES;
620
sljit_sw addr;
621
sljit_sw executable_offset;
622
623
struct sljit_label *label;
624
struct sljit_jump *jump;
625
struct sljit_const *const_;
626
627
CHECK_ERROR_PTR();
628
CHECK_PTR(check_sljit_generate_code(compiler, options));
629
630
reduce_code_size(compiler);
631
632
code = (sljit_u16*)allocate_executable_memory(compiler->size * sizeof(sljit_u16), options, exec_allocator_data, &executable_offset);
633
PTR_FAIL_WITH_EXEC_IF(code);
634
635
reverse_buf(compiler);
636
buf = compiler->buf;
637
638
code_ptr = code;
639
half_count = 0;
640
label = compiler->labels;
641
jump = compiler->jumps;
642
const_ = compiler->consts;
643
SLJIT_NEXT_INIT_TYPES();
644
SLJIT_GET_NEXT_MIN();
645
646
do {
647
buf_ptr = (sljit_u16*)buf->memory;
648
buf_end = buf_ptr + (buf->used_size >> 1);
649
do {
650
*code_ptr = *buf_ptr++;
651
if (next_min_addr == half_count) {
652
SLJIT_ASSERT(!label || label->size >= half_count);
653
SLJIT_ASSERT(!jump || jump->addr >= half_count);
654
SLJIT_ASSERT(!const_ || const_->addr >= half_count);
655
656
/* These structures are ordered by their address. */
657
if (next_min_addr == next_label_size) {
658
if (label->u.index >= SLJIT_LABEL_ALIGNED) {
659
code_ptr = process_extended_label(code_ptr, (struct sljit_extended_label*)label);
660
*code_ptr = buf_ptr[-1];
661
}
662
663
label->u.addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
664
label->size = (sljit_uw)(code_ptr - code);
665
label = label->next;
666
next_label_size = SLJIT_GET_NEXT_SIZE(label);
667
}
668
669
if (next_min_addr == next_jump_addr) {
670
if (!(jump->flags & JUMP_MOV_ADDR)) {
671
half_count = half_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT);
672
code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
673
SLJIT_ASSERT((sljit_uw)code_ptr - jump->addr <
674
((jump->flags >> JUMP_SIZE_SHIFT) + ((jump->flags & 0xf0) <= PATCH_TYPE2)) * sizeof(sljit_u16));
675
} else {
676
half_count += jump->flags >> JUMP_SIZE_SHIFT;
677
addr = (sljit_sw)code_ptr;
678
code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
679
jump->addr = (sljit_uw)addr;
680
}
681
682
jump = jump->next;
683
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
684
} else if (next_min_addr == next_const_addr) {
685
const_->addr = (sljit_uw)code_ptr;
686
const_ = const_->next;
687
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
688
}
689
690
SLJIT_GET_NEXT_MIN();
691
}
692
code_ptr++;
693
half_count++;
694
} while (buf_ptr < buf_end);
695
696
buf = buf->next;
697
} while (buf);
698
699
if (label && label->size == half_count) {
700
if (label->u.index >= SLJIT_LABEL_ALIGNED)
701
code_ptr = process_extended_label(code_ptr, (struct sljit_extended_label*)label);
702
703
label->u.addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
704
label->size = (sljit_uw)(code_ptr - code);
705
label = label->next;
706
}
707
708
SLJIT_ASSERT(!label);
709
SLJIT_ASSERT(!jump);
710
SLJIT_ASSERT(!const_);
711
SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
712
713
jump = compiler->jumps;
714
while (jump) {
715
generate_jump_or_mov_addr(jump, executable_offset);
716
jump = jump->next;
717
}
718
719
compiler->error = SLJIT_ERR_COMPILED;
720
compiler->executable_offset = executable_offset;
721
compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_u16);
722
723
code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
724
code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
725
726
SLJIT_CACHE_FLUSH(code, code_ptr);
727
SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
728
729
/* Set thumb mode flag. */
730
return (void*)((sljit_uw)code | 0x1);
731
}
732
733
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
734
{
735
switch (feature_type) {
736
case SLJIT_HAS_FPU:
737
case SLJIT_HAS_F64_AS_F32_PAIR:
738
case SLJIT_HAS_SIMD:
739
#ifdef SLJIT_IS_FPU_AVAILABLE
740
return (SLJIT_IS_FPU_AVAILABLE) != 0;
741
#else
742
/* Available by default. */
743
return 1;
744
#endif
745
746
case SLJIT_SIMD_REGS_ARE_PAIRS:
747
case SLJIT_HAS_CLZ:
748
case SLJIT_HAS_CTZ:
749
case SLJIT_HAS_REV:
750
case SLJIT_HAS_ROT:
751
case SLJIT_HAS_CMOV:
752
case SLJIT_HAS_PREFETCH:
753
case SLJIT_HAS_COPY_F32:
754
case SLJIT_HAS_COPY_F64:
755
case SLJIT_HAS_ATOMIC:
756
case SLJIT_HAS_MEMORY_BARRIER:
757
return 1;
758
759
default:
760
return 0;
761
}
762
}
763
764
/* --------------------------------------------------------------------- */
765
/* Core code generator functions. */
766
/* --------------------------------------------------------------------- */
767
768
#define INVALID_IMM 0x80000000
769
static sljit_uw get_imm(sljit_uw imm)
770
{
771
/* Thumb immediate form. */
772
sljit_s32 counter;
773
774
if (imm <= 0xff)
775
return imm;
776
777
if ((imm & 0xffff) == (imm >> 16)) {
778
/* Some special cases. */
779
if (!(imm & 0xff00))
780
return (1 << 12) | (imm & 0xff);
781
if (!(imm & 0xff))
782
return (2 << 12) | ((imm >> 8) & 0xff);
783
if ((imm & 0xff00) == ((imm & 0xff) << 8))
784
return (3 << 12) | (imm & 0xff);
785
}
786
787
/* Assembly optimization: count leading zeroes? */
788
counter = 8;
789
if (!(imm & 0xffff0000)) {
790
counter += 16;
791
imm <<= 16;
792
}
793
if (!(imm & 0xff000000)) {
794
counter += 8;
795
imm <<= 8;
796
}
797
if (!(imm & 0xf0000000)) {
798
counter += 4;
799
imm <<= 4;
800
}
801
if (!(imm & 0xc0000000)) {
802
counter += 2;
803
imm <<= 2;
804
}
805
if (!(imm & 0x80000000)) {
806
counter += 1;
807
imm <<= 1;
808
}
809
/* Since imm >= 128, this must be true. */
810
SLJIT_ASSERT(counter <= 31);
811
812
if (imm & 0x00ffffff)
813
return INVALID_IMM; /* Cannot be encoded. */
814
815
return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1);
816
}
817
818
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
819
{
820
sljit_uw tmp;
821
822
/* MOVS cannot be used since it destroy flags. */
823
824
if (imm >= 0x10000) {
825
tmp = get_imm(imm);
826
if (tmp != INVALID_IMM)
827
return push_inst32(compiler, MOV_WI | RD4(dst) | tmp);
828
tmp = get_imm(~imm);
829
if (tmp != INVALID_IMM)
830
return push_inst32(compiler, MVN_WI | RD4(dst) | tmp);
831
}
832
833
/* set low 16 bits, set hi 16 bits to 0. */
834
FAIL_IF(push_inst32(compiler, MOVW | RD4(dst)
835
| COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
836
837
/* set hi 16 bit if needed. */
838
if (imm >= 0x10000)
839
return push_inst32(compiler, MOVT | RD4(dst)
840
| COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
841
return SLJIT_SUCCESS;
842
}
843
844
#define ARG1_IMM 0x0010000
845
#define ARG2_IMM 0x0020000
846
/* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */
847
#define SET_FLAGS 0x0100000
848
#define UNUSED_RETURN 0x0200000
849
#define REGISTER_OP 0x0400000
850
851
static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2)
852
{
853
/* dst must be register
854
arg1 must be register, imm
855
arg2 must be register, imm */
856
sljit_s32 reg;
857
sljit_uw imm, imm2;
858
859
if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
860
/* Both are immediates, no temporaries are used. */
861
flags &= ~ARG1_IMM;
862
FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
863
arg1 = TMP_REG1;
864
}
865
866
if (flags & (ARG1_IMM | ARG2_IMM)) {
867
reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2);
868
imm = (flags & ARG2_IMM) ? arg2 : arg1;
869
870
switch (flags & 0xffff) {
871
case SLJIT_CLZ:
872
case SLJIT_CTZ:
873
case SLJIT_REV:
874
case SLJIT_REV_U16:
875
case SLJIT_REV_S16:
876
case SLJIT_REV_U32:
877
case SLJIT_REV_S32:
878
case SLJIT_MUL:
879
case SLJIT_MULADD:
880
/* No form with immediate operand. */
881
break;
882
case SLJIT_MOV:
883
SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG2);
884
return load_immediate(compiler, dst, imm);
885
case SLJIT_ADD:
886
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
887
imm2 = NEGATE(imm);
888
if (IS_2_LO_REGS(reg, dst)) {
889
if (imm <= 0x7)
890
return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
891
if (imm2 <= 0x7)
892
return push_inst16(compiler, SUBSI3 | IMM3(imm2) | RD3(dst) | RN3(reg));
893
if (reg == dst) {
894
if (imm <= 0xff)
895
return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst));
896
if (imm2 <= 0xff)
897
return push_inst16(compiler, SUBSI8 | IMM8(imm2) | RDN3(dst));
898
}
899
}
900
if (!(flags & SET_FLAGS)) {
901
if (imm <= 0xfff)
902
return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm));
903
if (imm2 <= 0xfff)
904
return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm2));
905
}
906
imm2 = get_imm(imm);
907
if (imm2 != INVALID_IMM)
908
return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
909
imm = get_imm(NEGATE(imm));
910
if (imm != INVALID_IMM)
911
return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
912
break;
913
case SLJIT_ADDC:
914
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
915
imm2 = get_imm(imm);
916
if (imm2 != INVALID_IMM)
917
return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
918
if (flags & ARG2_IMM) {
919
imm = get_imm(~imm);
920
if (imm != INVALID_IMM)
921
return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
922
}
923
break;
924
case SLJIT_SUB:
925
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
926
if (flags & ARG1_IMM) {
927
if (imm == 0 && IS_2_LO_REGS(reg, dst))
928
return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg));
929
imm = get_imm(imm);
930
if (imm != INVALID_IMM)
931
return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
932
break;
933
}
934
if (flags & UNUSED_RETURN) {
935
if (imm <= 0xff && reg_map[reg] <= 7)
936
return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
937
imm2 = get_imm(imm);
938
if (imm2 != INVALID_IMM)
939
return push_inst32(compiler, CMPI_W | RN4(reg) | imm2);
940
imm = get_imm(NEGATE(imm));
941
if (imm != INVALID_IMM)
942
return push_inst32(compiler, CMNI_W | RN4(reg) | imm);
943
break;
944
}
945
imm2 = NEGATE(imm);
946
if (IS_2_LO_REGS(reg, dst)) {
947
if (imm <= 0x7)
948
return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
949
if (imm2 <= 0x7)
950
return push_inst16(compiler, ADDSI3 | IMM3(imm2) | RD3(dst) | RN3(reg));
951
if (reg == dst) {
952
if (imm <= 0xff)
953
return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst));
954
if (imm2 <= 0xff)
955
return push_inst16(compiler, ADDSI8 | IMM8(imm2) | RDN3(dst));
956
}
957
}
958
if (!(flags & SET_FLAGS)) {
959
if (imm <= 0xfff)
960
return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm));
961
if (imm2 <= 0xfff)
962
return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm2));
963
}
964
imm2 = get_imm(imm);
965
if (imm2 != INVALID_IMM)
966
return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
967
imm = get_imm(NEGATE(imm));
968
if (imm != INVALID_IMM)
969
return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
970
break;
971
case SLJIT_SUBC:
972
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
973
if (flags & ARG1_IMM)
974
break;
975
imm2 = get_imm(imm);
976
if (imm2 != INVALID_IMM)
977
return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
978
imm = get_imm(~imm);
979
if (imm != INVALID_IMM)
980
return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
981
break;
982
case SLJIT_AND:
983
imm2 = get_imm(imm);
984
if (imm2 != INVALID_IMM)
985
return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TSTI : ANDI) | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
986
imm = get_imm(~imm);
987
if (imm != INVALID_IMM)
988
return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
989
break;
990
case SLJIT_OR:
991
imm2 = get_imm(imm);
992
if (imm2 != INVALID_IMM)
993
return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
994
imm = get_imm(~imm);
995
if (imm != INVALID_IMM)
996
return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
997
break;
998
case SLJIT_XOR:
999
if (imm == (sljit_uw)-1) {
1000
if (IS_2_LO_REGS(dst, reg))
1001
return push_inst16(compiler, MVNS | RD3(dst) | RN3(reg));
1002
return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(reg));
1003
}
1004
imm = get_imm(imm);
1005
if (imm != INVALID_IMM)
1006
return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
1007
break;
1008
case SLJIT_SHL:
1009
case SLJIT_MSHL:
1010
case SLJIT_LSHR:
1011
case SLJIT_MLSHR:
1012
case SLJIT_ASHR:
1013
case SLJIT_MASHR:
1014
case SLJIT_ROTL:
1015
case SLJIT_ROTR:
1016
if (flags & ARG1_IMM)
1017
break;
1018
imm &= 0x1f;
1019
1020
if (imm == 0) {
1021
if (!(flags & SET_FLAGS))
1022
return push_inst16(compiler, MOV | SET_REGS44(dst, reg));
1023
if (IS_2_LO_REGS(dst, reg))
1024
return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg));
1025
return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg));
1026
}
1027
1028
switch (flags & 0xffff) {
1029
case SLJIT_SHL:
1030
case SLJIT_MSHL:
1031
if (IS_2_LO_REGS(dst, reg))
1032
return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6));
1033
return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
1034
case SLJIT_LSHR:
1035
case SLJIT_MLSHR:
1036
if (IS_2_LO_REGS(dst, reg))
1037
return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6));
1038
return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
1039
case SLJIT_ASHR:
1040
case SLJIT_MASHR:
1041
if (IS_2_LO_REGS(dst, reg))
1042
return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6));
1043
return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
1044
case SLJIT_ROTL:
1045
imm = (imm ^ 0x1f) + 1;
1046
SLJIT_FALLTHROUGH
1047
default: /* SLJIT_ROTR */
1048
return push_inst32(compiler, ROR_WI | RD4(dst) | RM4(reg) | IMM5(imm));
1049
}
1050
default:
1051
SLJIT_UNREACHABLE();
1052
break;
1053
}
1054
1055
if (flags & ARG2_IMM) {
1056
imm = arg2;
1057
arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
1058
FAIL_IF(load_immediate(compiler, (sljit_s32)arg2, imm));
1059
} else {
1060
imm = arg1;
1061
arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
1062
FAIL_IF(load_immediate(compiler, (sljit_s32)arg1, imm));
1063
}
1064
1065
SLJIT_ASSERT(arg1 != arg2);
1066
}
1067
1068
/* Both arguments are registers. */
1069
switch (flags & 0xffff) {
1070
case SLJIT_MOV:
1071
case SLJIT_MOV_U32:
1072
case SLJIT_MOV_S32:
1073
case SLJIT_MOV32:
1074
case SLJIT_MOV_P:
1075
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1076
if (dst == (sljit_s32)arg2)
1077
return SLJIT_SUCCESS;
1078
return push_inst16(compiler, MOV | SET_REGS44(dst, arg2));
1079
case SLJIT_MOV_U8:
1080
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1081
if (IS_2_LO_REGS(dst, arg2))
1082
return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2));
1083
return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2));
1084
case SLJIT_MOV_S8:
1085
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1086
if (IS_2_LO_REGS(dst, arg2))
1087
return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2));
1088
return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2));
1089
case SLJIT_MOV_U16:
1090
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1091
if (IS_2_LO_REGS(dst, arg2))
1092
return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2));
1093
return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2));
1094
case SLJIT_MOV_S16:
1095
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1096
if (IS_2_LO_REGS(dst, arg2))
1097
return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2));
1098
return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2));
1099
case SLJIT_CLZ:
1100
SLJIT_ASSERT(arg1 == TMP_REG2);
1101
return push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2));
1102
case SLJIT_CTZ:
1103
SLJIT_ASSERT(arg1 == TMP_REG2);
1104
FAIL_IF(push_inst32(compiler, RBIT | RN4(arg2) | RD4(dst) | RM4(arg2)));
1105
return push_inst32(compiler, CLZ | RN4(dst) | RD4(dst) | RM4(dst));
1106
case SLJIT_REV:
1107
case SLJIT_REV_U32:
1108
case SLJIT_REV_S32:
1109
SLJIT_ASSERT(arg1 == TMP_REG2);
1110
if (IS_2_LO_REGS(dst, arg2))
1111
return push_inst16(compiler, REV | RD3(dst) | RN3(arg2));
1112
return push_inst32(compiler, REV_W | RN4(arg2) | RD4(dst) | RM4(arg2));
1113
case SLJIT_REV_U16:
1114
case SLJIT_REV_S16:
1115
SLJIT_ASSERT(arg1 == TMP_REG2);
1116
1117
if (IS_2_LO_REGS(dst, arg2))
1118
FAIL_IF(push_inst16(compiler, REV16 | RD3(dst) | RN3(arg2)));
1119
else
1120
FAIL_IF(push_inst32(compiler, REV16_W | RN4(arg2) | RD4(dst) | RM4(arg2)));
1121
1122
if (!(flags & REGISTER_OP))
1123
return SLJIT_SUCCESS;
1124
1125
flags &= 0xffff;
1126
if (reg_map[dst] <= 7)
1127
return push_inst16(compiler, (flags == SLJIT_REV_U16 ? UXTH : SXTH) | RD3(dst) | RN3(dst));
1128
return push_inst32(compiler, (flags == SLJIT_REV_U16 ? UXTH_W : SXTH_W) | RD4(dst) | RM4(dst));
1129
case SLJIT_ADD:
1130
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1131
if (IS_3_LO_REGS(dst, arg1, arg2))
1132
return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2));
1133
if (dst == (sljit_s32)arg1 && !(flags & SET_FLAGS))
1134
return push_inst16(compiler, ADD | SET_REGS44(dst, arg2));
1135
return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1136
case SLJIT_ADDC:
1137
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1138
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1139
return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2));
1140
return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1141
case SLJIT_SUB:
1142
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1143
if (flags & UNUSED_RETURN) {
1144
if (IS_2_LO_REGS(arg1, arg2))
1145
return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2));
1146
return push_inst16(compiler, CMP_X | SET_REGS44(arg1, arg2));
1147
}
1148
if (IS_3_LO_REGS(dst, arg1, arg2))
1149
return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2));
1150
return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1151
case SLJIT_SUBC:
1152
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1153
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1154
return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2));
1155
return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1156
case SLJIT_MUL:
1157
compiler->status_flags_state = 0;
1158
if (!(flags & SET_FLAGS))
1159
return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2));
1160
reg = (dst == TMP_REG2) ? TMP_REG1 : TMP_REG2;
1161
FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(reg) | RN4(arg1) | RM4(arg2)));
1162
/* cmp TMP_REG2, dst asr #31. */
1163
return push_inst32(compiler, CMP_W | RN4(reg) | 0x70e0 | RM4(dst));
1164
case SLJIT_AND:
1165
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1166
return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2));
1167
if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2))
1168
return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2));
1169
return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TST_W : AND_W) | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1170
case SLJIT_OR:
1171
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1172
return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2));
1173
return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1174
case SLJIT_XOR:
1175
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1176
return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2));
1177
return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1178
case SLJIT_MSHL:
1179
reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1180
FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f));
1181
arg2 = (sljit_uw)reg;
1182
SLJIT_FALLTHROUGH
1183
case SLJIT_SHL:
1184
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1185
return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2));
1186
return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1187
case SLJIT_MLSHR:
1188
reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1189
FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f));
1190
arg2 = (sljit_uw)reg;
1191
SLJIT_FALLTHROUGH
1192
case SLJIT_LSHR:
1193
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1194
return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2));
1195
return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1196
case SLJIT_MASHR:
1197
reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1198
FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f));
1199
arg2 = (sljit_uw)reg;
1200
SLJIT_FALLTHROUGH
1201
case SLJIT_ASHR:
1202
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1203
return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2));
1204
return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1205
case SLJIT_ROTL:
1206
reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1207
FAIL_IF(push_inst32(compiler, RSB_WI | RD4(reg) | RN4(arg2) | 0));
1208
arg2 = (sljit_uw)reg;
1209
SLJIT_FALLTHROUGH
1210
case SLJIT_ROTR:
1211
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1212
return push_inst16(compiler, RORS | RD3(dst) | RN3(arg2));
1213
return push_inst32(compiler, ROR_W | RD4(dst) | RN4(arg1) | RM4(arg2));
1214
case SLJIT_MULADD:
1215
compiler->status_flags_state = 0;
1216
return push_inst32(compiler, MLA | RD4(dst) | RN4(arg1) | RM4(arg2) | RT4(dst));
1217
}
1218
1219
SLJIT_UNREACHABLE();
1220
return SLJIT_SUCCESS;
1221
}
1222
1223
#define STORE 0x01
1224
#define SIGNED 0x02
1225
1226
#define WORD_SIZE 0x00
1227
#define BYTE_SIZE 0x04
1228
#define HALF_SIZE 0x08
1229
#define PRELOAD 0x0c
1230
1231
#define IS_WORD_SIZE(flags) (!((flags) & (BYTE_SIZE | HALF_SIZE)))
1232
#define ALIGN_CHECK(argw, imm, shift) (!((argw) & ~((imm) << (shift))))
1233
1234
/*
1235
1st letter:
1236
w = word
1237
b = byte
1238
h = half
1239
1240
2nd letter:
1241
s = signed
1242
u = unsigned
1243
1244
3rd letter:
1245
l = load
1246
s = store
1247
*/
1248
1249
static const sljit_ins sljit_mem16[12] = {
1250
/* w u l */ 0x5800 /* ldr */,
1251
/* w u s */ 0x5000 /* str */,
1252
/* w s l */ 0x5800 /* ldr */,
1253
/* w s s */ 0x5000 /* str */,
1254
1255
/* b u l */ 0x5c00 /* ldrb */,
1256
/* b u s */ 0x5400 /* strb */,
1257
/* b s l */ 0x5600 /* ldrsb */,
1258
/* b s s */ 0x5400 /* strb */,
1259
1260
/* h u l */ 0x5a00 /* ldrh */,
1261
/* h u s */ 0x5200 /* strh */,
1262
/* h s l */ 0x5e00 /* ldrsh */,
1263
/* h s s */ 0x5200 /* strh */,
1264
};
1265
1266
static const sljit_ins sljit_mem16_imm5[12] = {
1267
/* w u l */ 0x6800 /* ldr imm5 */,
1268
/* w u s */ 0x6000 /* str imm5 */,
1269
/* w s l */ 0x6800 /* ldr imm5 */,
1270
/* w s s */ 0x6000 /* str imm5 */,
1271
1272
/* b u l */ 0x7800 /* ldrb imm5 */,
1273
/* b u s */ 0x7000 /* strb imm5 */,
1274
/* b s l */ 0x0000 /* not allowed */,
1275
/* b s s */ 0x7000 /* strb imm5 */,
1276
1277
/* h u l */ 0x8800 /* ldrh imm5 */,
1278
/* h u s */ 0x8000 /* strh imm5 */,
1279
/* h s l */ 0x0000 /* not allowed */,
1280
/* h s s */ 0x8000 /* strh imm5 */,
1281
};
1282
1283
#define MEM_IMM8 0xc00
1284
#define MEM_IMM12 0x800000
1285
static const sljit_ins sljit_mem32[13] = {
1286
/* w u l */ 0xf8500000 /* ldr.w */,
1287
/* w u s */ 0xf8400000 /* str.w */,
1288
/* w s l */ 0xf8500000 /* ldr.w */,
1289
/* w s s */ 0xf8400000 /* str.w */,
1290
1291
/* b u l */ 0xf8100000 /* ldrb.w */,
1292
/* b u s */ 0xf8000000 /* strb.w */,
1293
/* b s l */ 0xf9100000 /* ldrsb.w */,
1294
/* b s s */ 0xf8000000 /* strb.w */,
1295
1296
/* h u l */ 0xf8300000 /* ldrh.w */,
1297
/* h u s */ 0xf8200000 /* strsh.w */,
1298
/* h s l */ 0xf9300000 /* ldrsh.w */,
1299
/* h s s */ 0xf8200000 /* strsh.w */,
1300
1301
/* p u l */ 0xf8100000 /* pld */,
1302
};
1303
1304
/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
1305
static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value)
1306
{
1307
sljit_uw imm;
1308
1309
if (value >= 0) {
1310
if (value <= 0xfff)
1311
return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value));
1312
imm = get_imm((sljit_uw)value);
1313
if (imm != INVALID_IMM)
1314
return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | imm);
1315
}
1316
else {
1317
value = -value;
1318
if (value <= 0xfff)
1319
return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value));
1320
imm = get_imm((sljit_uw)value);
1321
if (imm != INVALID_IMM)
1322
return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | imm);
1323
}
1324
return SLJIT_ERR_UNSUPPORTED;
1325
}
1326
1327
static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
1328
sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
1329
{
1330
sljit_s32 other_r;
1331
sljit_uw imm, tmp;
1332
1333
SLJIT_ASSERT(arg & SLJIT_MEM);
1334
SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -0xff && argw <= 0xfff));
1335
1336
if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1337
imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff);
1338
if (imm != INVALID_IMM) {
1339
FAIL_IF(push_inst32(compiler, MOV_WI | RD4(tmp_reg) | imm));
1340
return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg) | (argw & 0xfff));
1341
}
1342
1343
FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
1344
if (IS_2_LO_REGS(reg, tmp_reg) && sljit_mem16_imm5[flags])
1345
return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(tmp_reg));
1346
return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg));
1347
}
1348
1349
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1350
argw &= 0x3;
1351
other_r = OFFS_REG(arg);
1352
arg &= REG_MASK;
1353
1354
if (!argw && IS_3_LO_REGS(reg, arg, other_r))
1355
return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r));
1356
return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | ((sljit_ins)argw << 4));
1357
}
1358
1359
arg &= REG_MASK;
1360
1361
if (argw > 0xfff) {
1362
imm = get_imm((sljit_uw)(argw & ~0xfff));
1363
if (imm != INVALID_IMM) {
1364
push_inst32(compiler, ADD_WI | RD4(tmp_reg) | RN4(arg) | imm);
1365
arg = tmp_reg;
1366
argw = argw & 0xfff;
1367
}
1368
}
1369
else if (argw < -0xff) {
1370
tmp = (sljit_uw)((-argw + 0xfff) & ~0xfff);
1371
SLJIT_ASSERT(tmp >= (sljit_uw)-argw);
1372
imm = get_imm(tmp);
1373
1374
if (imm != INVALID_IMM) {
1375
push_inst32(compiler, SUB_WI | RD4(tmp_reg) | RN4(arg) | imm);
1376
arg = tmp_reg;
1377
argw += (sljit_sw)tmp;
1378
1379
SLJIT_ASSERT(argw >= 0 && argw <= 0xfff);
1380
}
1381
}
1382
1383
/* 16 bit instruction forms. */
1384
if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) {
1385
tmp = 3;
1386
if (IS_WORD_SIZE(flags)) {
1387
if (ALIGN_CHECK(argw, 0x1f, 2))
1388
tmp = 2;
1389
}
1390
else if (flags & BYTE_SIZE)
1391
{
1392
if (ALIGN_CHECK(argw, 0x1f, 0))
1393
tmp = 0;
1394
}
1395
else {
1396
SLJIT_ASSERT(flags & HALF_SIZE);
1397
if (ALIGN_CHECK(argw, 0x1f, 1))
1398
tmp = 1;
1399
}
1400
1401
if (tmp < 3)
1402
return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | ((sljit_ins)argw << (6 - tmp)));
1403
}
1404
else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && ALIGN_CHECK(argw, 0xff, 2) && reg_map[reg] <= 7) {
1405
/* SP based immediate. */
1406
return push_inst16(compiler, STR_SP | (sljit_ins)((flags & STORE) ? 0 : 0x800) | RDN3(reg) | ((sljit_ins)argw >> 2));
1407
}
1408
1409
if (argw >= 0 && argw <= 0xfff)
1410
return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | (sljit_ins)argw);
1411
else if (argw < 0 && argw >= -0xff)
1412
return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | (sljit_ins)-argw);
1413
1414
SLJIT_ASSERT(arg != tmp_reg);
1415
1416
FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
1417
if (IS_3_LO_REGS(reg, arg, tmp_reg))
1418
return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg));
1419
return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg));
1420
}
1421
1422
#undef ALIGN_CHECK
1423
#undef IS_WORD_SIZE
1424
1425
/* --------------------------------------------------------------------- */
1426
/* Entry, exit */
1427
/* --------------------------------------------------------------------- */
1428
1429
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1430
sljit_s32 options, sljit_s32 arg_types,
1431
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
1432
{
1433
sljit_s32 fscratches;
1434
sljit_s32 fsaveds;
1435
sljit_s32 size, i, tmp, word_arg_count;
1436
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1437
sljit_uw offset;
1438
sljit_uw imm = 0;
1439
#ifdef __SOFTFP__
1440
sljit_u32 float_arg_count;
1441
#else
1442
sljit_u32 old_offset, f32_offset;
1443
sljit_u32 remap[3];
1444
sljit_u32 *remap_ptr = remap;
1445
#endif
1446
1447
CHECK_ERROR();
1448
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));
1449
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
1450
1451
scratches = ENTER_GET_REGS(scratches);
1452
saveds = ENTER_GET_REGS(saveds);
1453
fscratches = compiler->fscratches;
1454
fsaveds = compiler->fsaveds;
1455
1456
tmp = SLJIT_S0 - saveds;
1457
for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--)
1458
imm |= (sljit_uw)1 << reg_map[i];
1459
1460
for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
1461
imm |= (sljit_uw)1 << reg_map[i];
1462
1463
/* At least two registers must be set for PUSH_W and one for PUSH instruction. */
1464
FAIL_IF((imm & 0xff00)
1465
? push_inst32(compiler, PUSH_W | (1 << 14) | imm)
1466
: push_inst16(compiler, PUSH | (1 << 8) | imm));
1467
1468
/* Stack must be aligned to 8 bytes: (LR, R4) */
1469
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
1470
1471
if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1472
if ((size & SSIZE_OF(sw)) != 0) {
1473
FAIL_IF(push_inst16(compiler, SUB_SP_I | (sizeof(sljit_sw) >> 2)));
1474
size += SSIZE_OF(sw);
1475
}
1476
1477
if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1478
FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1479
} else {
1480
if (fsaveds > 0)
1481
FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
1482
if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1483
FAIL_IF(push_inst32(compiler, VPUSH | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1484
}
1485
}
1486
1487
local_size = ((size + local_size + 0x7) & ~0x7) - size;
1488
compiler->local_size = local_size;
1489
1490
if (options & SLJIT_ENTER_REG_ARG)
1491
arg_types = 0;
1492
1493
arg_types >>= SLJIT_ARG_SHIFT;
1494
word_arg_count = 0;
1495
saved_arg_count = 0;
1496
#ifdef __SOFTFP__
1497
SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
1498
1499
offset = 0;
1500
float_arg_count = 0;
1501
1502
while (arg_types) {
1503
switch (arg_types & SLJIT_ARG_MASK) {
1504
case SLJIT_ARG_TYPE_F64:
1505
if (offset & 0x7)
1506
offset += sizeof(sljit_sw);
1507
1508
if (offset < 4 * sizeof(sljit_sw))
1509
FAIL_IF(push_inst32(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
1510
else
1511
FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800100 | RN4(SLJIT_SP)
1512
| (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1513
float_arg_count++;
1514
offset += sizeof(sljit_f64) - sizeof(sljit_sw);
1515
break;
1516
case SLJIT_ARG_TYPE_F32:
1517
if (offset < 4 * sizeof(sljit_sw))
1518
FAIL_IF(push_inst32(compiler, VMOV | (float_arg_count << 16) | (offset << 10)));
1519
else
1520
FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800000 | RN4(SLJIT_SP)
1521
| (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1522
float_arg_count++;
1523
break;
1524
default:
1525
word_arg_count++;
1526
1527
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1528
tmp = SLJIT_S0 - saved_arg_count;
1529
saved_arg_count++;
1530
} else if (word_arg_count - 1 != (sljit_s32)(offset >> 2))
1531
tmp = word_arg_count;
1532
else
1533
break;
1534
1535
if (offset < 4 * sizeof(sljit_sw))
1536
FAIL_IF(push_inst16(compiler, MOV | ((sljit_ins)reg_map[tmp] & 0x7) | (((sljit_ins)reg_map[tmp] & 0x8) << 4) | (offset << 1)));
1537
else if (reg_map[tmp] <= 7)
1538
FAIL_IF(push_inst16(compiler, LDR_SP | RDN3(tmp)
1539
| ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1540
else
1541
FAIL_IF(push_inst32(compiler, LDR | RT4(tmp) | RN4(SLJIT_SP)
1542
| ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))));
1543
break;
1544
}
1545
1546
offset += sizeof(sljit_sw);
1547
arg_types >>= SLJIT_ARG_SHIFT;
1548
}
1549
1550
compiler->args_size = offset;
1551
#else
1552
offset = SLJIT_FR0;
1553
old_offset = SLJIT_FR0;
1554
f32_offset = 0;
1555
1556
while (arg_types) {
1557
switch (arg_types & SLJIT_ARG_MASK) {
1558
case SLJIT_ARG_TYPE_F64:
1559
if (offset != old_offset)
1560
*remap_ptr++ = VMOV_F32 | SLJIT_32 | VD4(offset) | VM4(old_offset);
1561
old_offset++;
1562
offset++;
1563
break;
1564
case SLJIT_ARG_TYPE_F32:
1565
if (f32_offset != 0) {
1566
*remap_ptr++ = VMOV_F32 | 0x20 | VD4(offset) | VM4(f32_offset);
1567
f32_offset = 0;
1568
} else {
1569
if (offset != old_offset)
1570
*remap_ptr++ = VMOV_F32 | VD4(offset) | VM4(old_offset);
1571
f32_offset = old_offset;
1572
old_offset++;
1573
}
1574
offset++;
1575
break;
1576
default:
1577
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1578
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0 - saved_arg_count, SLJIT_R0 + word_arg_count)));
1579
saved_arg_count++;
1580
}
1581
1582
word_arg_count++;
1583
break;
1584
}
1585
arg_types >>= SLJIT_ARG_SHIFT;
1586
}
1587
1588
SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap));
1589
1590
while (remap_ptr > remap)
1591
FAIL_IF(push_inst32(compiler, *(--remap_ptr)));
1592
#endif
1593
1594
#ifdef _WIN32
1595
if (local_size >= 4096) {
1596
imm = get_imm(4096);
1597
SLJIT_ASSERT(imm != INVALID_IMM);
1598
1599
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1600
1601
if (local_size < 4 * 4096) {
1602
if (local_size > 2 * 4096) {
1603
if (local_size > 3 * 4096) {
1604
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1605
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1606
}
1607
1608
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1609
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1610
}
1611
} else {
1612
FAIL_IF(load_immediate(compiler, TMP_REG2, ((sljit_uw)local_size >> 12) - 1));
1613
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1614
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1615
FAIL_IF(push_inst32(compiler, SUB_WI | SET_FLAGS | RD4(TMP_REG2) | RN4(TMP_REG2) | 1));
1616
FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-8 & 0xff)));
1617
}
1618
1619
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1620
local_size &= 0xfff;
1621
}
1622
1623
if (local_size >= 256) {
1624
SLJIT_ASSERT(local_size < 4096);
1625
1626
if (local_size <= (127 << 2))
1627
FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2)));
1628
else
1629
FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size));
1630
1631
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1632
} else if (local_size > 0)
1633
FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | (sljit_uw)local_size));
1634
#else /* !_WIN32 */
1635
if (local_size > 0) {
1636
if (local_size <= (127 << 2))
1637
FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2)));
1638
else
1639
FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size));
1640
}
1641
#endif /* _WIN32 */
1642
1643
return SLJIT_SUCCESS;
1644
}
1645
1646
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1647
sljit_s32 options, sljit_s32 arg_types,
1648
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
1649
{
1650
sljit_s32 fscratches;
1651
sljit_s32 fsaveds;
1652
sljit_s32 size;
1653
1654
CHECK_ERROR();
1655
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));
1656
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
1657
1658
scratches = ENTER_GET_REGS(scratches);
1659
saveds = ENTER_GET_REGS(saveds);
1660
fscratches = compiler->fscratches;
1661
fsaveds = compiler->fsaveds;
1662
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
1663
1664
/* Doubles are saved, so alignment is unaffected. */
1665
if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG))
1666
size += SSIZE_OF(sw);
1667
1668
compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size;
1669
return SLJIT_SUCCESS;
1670
}
1671
1672
static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm)
1673
{
1674
sljit_uw imm2;
1675
1676
/* The TMP_REG1 register must keep its value. */
1677
if (imm <= (127u << 2))
1678
return push_inst16(compiler, ADD_SP_I | (imm >> 2));
1679
1680
if (imm <= 0xfff)
1681
return push_inst32(compiler, ADDWI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | IMM12(imm));
1682
1683
imm2 = get_imm(imm);
1684
1685
if (imm2 != INVALID_IMM)
1686
return push_inst32(compiler, ADD_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm2);
1687
1688
FAIL_IF(load_immediate(compiler, TMP_REG2, imm));
1689
return push_inst16(compiler, ADD_SP | RN3(TMP_REG2));
1690
}
1691
1692
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size)
1693
{
1694
sljit_s32 local_size, fscratches, fsaveds, i, tmp;
1695
sljit_s32 restored_reg = 0;
1696
sljit_s32 lr_dst = TMP_PC;
1697
sljit_uw reg_list = 0;
1698
1699
SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128);
1700
1701
local_size = compiler->local_size;
1702
fscratches = compiler->fscratches;
1703
fsaveds = compiler->fsaveds;
1704
1705
if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1706
if (local_size > 0)
1707
FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1708
1709
if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1710
FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1711
} else {
1712
if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1713
FAIL_IF(push_inst32(compiler, VPOP | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1714
if (fsaveds > 0)
1715
FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
1716
}
1717
1718
local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7;
1719
}
1720
1721
if (frame_size < 0) {
1722
lr_dst = TMP_REG2;
1723
frame_size = 0;
1724
} else if (frame_size > 0) {
1725
SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0);
1726
lr_dst = 0;
1727
frame_size &= ~0x7;
1728
}
1729
1730
tmp = SLJIT_S0 - compiler->saveds;
1731
i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1732
if (tmp < i) {
1733
restored_reg = i;
1734
do {
1735
reg_list |= (sljit_uw)1 << reg_map[i];
1736
} while (--i > tmp);
1737
}
1738
1739
i = compiler->scratches;
1740
if (i >= SLJIT_FIRST_SAVED_REG) {
1741
restored_reg = i;
1742
do {
1743
reg_list |= (sljit_uw)1 << reg_map[i];
1744
} while (--i >= SLJIT_FIRST_SAVED_REG);
1745
}
1746
1747
if (lr_dst == TMP_REG2 && reg_list == 0) {
1748
reg_list |= (sljit_uw)1 << reg_map[TMP_REG2];
1749
restored_reg = TMP_REG2;
1750
lr_dst = 0;
1751
}
1752
1753
if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) {
1754
/* The local_size does not include the saved registers. */
1755
tmp = 0;
1756
if (reg_list != 0) {
1757
tmp = 2;
1758
if (local_size <= 0xfff) {
1759
if (local_size == 0) {
1760
SLJIT_ASSERT(restored_reg != TMP_REG2);
1761
if (frame_size == 0)
1762
return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x308);
1763
if (frame_size > 2 * SSIZE_OF(sw))
1764
return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x100 | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw))));
1765
}
1766
1767
if (reg_map[restored_reg] <= 7 && local_size <= 0x3fc)
1768
FAIL_IF(push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(local_size >> 2)));
1769
else
1770
FAIL_IF(push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)local_size));
1771
tmp = 1;
1772
} else if (frame_size == 0) {
1773
frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw);
1774
tmp = 3;
1775
}
1776
1777
/* Place for the saved register. */
1778
if (restored_reg != TMP_REG2)
1779
local_size += SSIZE_OF(sw);
1780
}
1781
1782
/* Place for the lr register. */
1783
local_size += SSIZE_OF(sw);
1784
1785
if (frame_size > local_size)
1786
FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_ins)(frame_size - local_size) >> 2)));
1787
else if (frame_size < local_size)
1788
FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size)));
1789
1790
if (tmp <= 1)
1791
return SLJIT_SUCCESS;
1792
1793
if (tmp == 2) {
1794
frame_size -= SSIZE_OF(sw);
1795
if (restored_reg != TMP_REG2)
1796
frame_size -= SSIZE_OF(sw);
1797
1798
if (reg_map[restored_reg] <= 7)
1799
return push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(frame_size >> 2));
1800
1801
return push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)frame_size);
1802
}
1803
1804
tmp = (restored_reg == TMP_REG2) ? 0x304 : 0x308;
1805
return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)tmp);
1806
}
1807
1808
if (local_size > 0)
1809
FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1810
1811
if (!(reg_list & 0xff00) && lr_dst != TMP_REG2) {
1812
if (lr_dst == TMP_PC)
1813
reg_list |= 1u << 8;
1814
1815
/* At least one register must be set for POP instruction. */
1816
SLJIT_ASSERT(reg_list != 0);
1817
1818
FAIL_IF(push_inst16(compiler, POP | reg_list));
1819
} else {
1820
if (lr_dst != 0)
1821
reg_list |= (sljit_uw)1 << reg_map[lr_dst];
1822
1823
/* At least two registers must be set for POP_W instruction. */
1824
SLJIT_ASSERT((reg_list & (reg_list - 1)) != 0);
1825
1826
FAIL_IF(push_inst32(compiler, POP_W | reg_list));
1827
}
1828
1829
if (frame_size > 0)
1830
return push_inst16(compiler, SUB_SP_I | (((sljit_ins)frame_size - sizeof(sljit_sw)) >> 2));
1831
1832
if (lr_dst != 0)
1833
return SLJIT_SUCCESS;
1834
1835
return push_inst16(compiler, ADD_SP_I | 1);
1836
}
1837
1838
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1839
{
1840
CHECK_ERROR();
1841
CHECK(check_sljit_emit_return_void(compiler));
1842
1843
return emit_stack_frame_release(compiler, 0);
1844
}
1845
1846
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1847
sljit_s32 src, sljit_sw srcw)
1848
{
1849
CHECK_ERROR();
1850
CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1851
1852
if (src & SLJIT_MEM) {
1853
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
1854
src = TMP_REG1;
1855
srcw = 0;
1856
} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1857
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src)));
1858
src = TMP_REG1;
1859
srcw = 0;
1860
}
1861
1862
FAIL_IF(emit_stack_frame_release(compiler, 1));
1863
1864
SLJIT_SKIP_CHECKS(compiler);
1865
return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1866
}
1867
1868
/* --------------------------------------------------------------------- */
1869
/* Operators */
1870
/* --------------------------------------------------------------------- */
1871
1872
#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
1873
1874
#ifdef __cplusplus
1875
extern "C" {
1876
#endif
1877
1878
#ifdef _WIN32
1879
extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator);
1880
extern long long __rt_sdiv(int denominator, int numerator);
1881
#elif defined(__GNUC__)
1882
extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
1883
extern int __aeabi_idivmod(int numerator, int denominator);
1884
#else
1885
#error "Software divmod functions are needed"
1886
#endif
1887
1888
#ifdef __cplusplus
1889
}
1890
#endif
1891
1892
#endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
1893
1894
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1895
{
1896
#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
1897
sljit_uw saved_reg_list[3];
1898
sljit_uw saved_reg_count;
1899
#endif
1900
1901
CHECK_ERROR();
1902
CHECK(check_sljit_emit_op0(compiler, op));
1903
1904
op = GET_OPCODE(op);
1905
switch (op) {
1906
case SLJIT_BREAKPOINT:
1907
return push_inst16(compiler, BKPT);
1908
case SLJIT_NOP:
1909
return push_inst16(compiler, NOP);
1910
case SLJIT_LMUL_UW:
1911
case SLJIT_LMUL_SW:
1912
return push_inst32(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
1913
| RD4(SLJIT_R1) | RT4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
1914
#if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__)
1915
case SLJIT_DIVMOD_UW:
1916
case SLJIT_DIVMOD_SW:
1917
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
1918
FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
1919
FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
1920
return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1));
1921
case SLJIT_DIV_UW:
1922
case SLJIT_DIV_SW:
1923
return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
1924
#else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
1925
case SLJIT_DIVMOD_UW:
1926
case SLJIT_DIVMOD_SW:
1927
case SLJIT_DIV_UW:
1928
case SLJIT_DIV_SW:
1929
SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
1930
SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
1931
1932
saved_reg_count = 0;
1933
if (compiler->scratches >= 4)
1934
saved_reg_list[saved_reg_count++] = 3;
1935
if (compiler->scratches >= 3)
1936
saved_reg_list[saved_reg_count++] = 2;
1937
if (op >= SLJIT_DIV_UW)
1938
saved_reg_list[saved_reg_count++] = 1;
1939
1940
if (saved_reg_count > 0) {
1941
FAIL_IF(push_inst32(compiler, 0xf84d0d00 | (saved_reg_count >= 3 ? 16 : 8)
1942
| (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
1943
if (saved_reg_count >= 2) {
1944
SLJIT_ASSERT(saved_reg_list[1] < 8);
1945
FAIL_IF(push_inst16(compiler, 0x9001 | (saved_reg_list[1] << 8) /* str rX, [sp, #4] */));
1946
}
1947
if (saved_reg_count >= 3) {
1948
SLJIT_ASSERT(saved_reg_list[2] < 8);
1949
FAIL_IF(push_inst16(compiler, 0x9002 | (saved_reg_list[2] << 8) /* str rX, [sp, #8] */));
1950
}
1951
}
1952
1953
#ifdef _WIN32
1954
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
1955
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1)));
1956
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1)));
1957
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
1958
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__rt_udiv) : SLJIT_FUNC_ADDR(__rt_sdiv))));
1959
#elif defined(__GNUC__)
1960
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
1961
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod))));
1962
#else
1963
#error "Software divmod functions are needed"
1964
#endif
1965
1966
if (saved_reg_count > 0) {
1967
if (saved_reg_count >= 3) {
1968
SLJIT_ASSERT(saved_reg_list[2] < 8);
1969
FAIL_IF(push_inst16(compiler, 0x9802 | (saved_reg_list[2] << 8) /* ldr rX, [sp, #8] */));
1970
}
1971
if (saved_reg_count >= 2) {
1972
SLJIT_ASSERT(saved_reg_list[1] < 8);
1973
FAIL_IF(push_inst16(compiler, 0x9801 | (saved_reg_list[1] << 8) /* ldr rX, [sp, #4] */));
1974
}
1975
return push_inst32(compiler, 0xf85d0b00 | (saved_reg_count >= 3 ? 16 : 8)
1976
| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
1977
}
1978
return SLJIT_SUCCESS;
1979
#endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
1980
case SLJIT_MEMORY_BARRIER:
1981
return push_inst32(compiler, DMB_SY);
1982
case SLJIT_ENDBR:
1983
case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1984
return SLJIT_SUCCESS;
1985
}
1986
1987
return SLJIT_SUCCESS;
1988
}
1989
1990
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1991
sljit_s32 dst, sljit_sw dstw,
1992
sljit_s32 src, sljit_sw srcw)
1993
{
1994
sljit_s32 dst_r, flags;
1995
1996
CHECK_ERROR();
1997
CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1998
ADJUST_LOCAL_OFFSET(dst, dstw);
1999
ADJUST_LOCAL_OFFSET(src, srcw);
2000
2001
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2002
2003
op = GET_OPCODE(op);
2004
if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
2005
switch (op) {
2006
case SLJIT_MOV:
2007
case SLJIT_MOV_U32:
2008
case SLJIT_MOV_S32:
2009
case SLJIT_MOV32:
2010
case SLJIT_MOV_P:
2011
flags = WORD_SIZE;
2012
break;
2013
case SLJIT_MOV_U8:
2014
flags = BYTE_SIZE;
2015
if (src == SLJIT_IMM)
2016
srcw = (sljit_u8)srcw;
2017
break;
2018
case SLJIT_MOV_S8:
2019
flags = BYTE_SIZE | SIGNED;
2020
if (src == SLJIT_IMM)
2021
srcw = (sljit_s8)srcw;
2022
break;
2023
case SLJIT_MOV_U16:
2024
flags = HALF_SIZE;
2025
if (src == SLJIT_IMM)
2026
srcw = (sljit_u16)srcw;
2027
break;
2028
case SLJIT_MOV_S16:
2029
flags = HALF_SIZE | SIGNED;
2030
if (src == SLJIT_IMM)
2031
srcw = (sljit_s16)srcw;
2032
break;
2033
default:
2034
SLJIT_UNREACHABLE();
2035
flags = 0;
2036
break;
2037
}
2038
2039
if (src == SLJIT_IMM)
2040
FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, (sljit_uw)srcw));
2041
else if (src & SLJIT_MEM)
2042
FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1));
2043
else if (FAST_IS_REG(dst))
2044
return emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src);
2045
else
2046
dst_r = src;
2047
2048
if (!(dst & SLJIT_MEM))
2049
return SLJIT_SUCCESS;
2050
2051
return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG1);
2052
}
2053
2054
SLJIT_COMPILE_ASSERT(WORD_SIZE == 0, word_size_must_be_0);
2055
flags = WORD_SIZE;
2056
2057
if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
2058
if (!(dst & SLJIT_MEM) && (!(src & SLJIT_MEM) || op == SLJIT_REV_S16))
2059
op |= REGISTER_OP;
2060
flags |= HALF_SIZE;
2061
}
2062
2063
if (src & SLJIT_MEM) {
2064
FAIL_IF(emit_op_mem(compiler, flags, TMP_REG1, src, srcw, TMP_REG1));
2065
src = TMP_REG1;
2066
}
2067
2068
emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src);
2069
2070
if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
2071
return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG1);
2072
return SLJIT_SUCCESS;
2073
}
2074
2075
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2076
sljit_s32 dst, sljit_sw dstw,
2077
sljit_s32 src1, sljit_sw src1w,
2078
sljit_s32 src2, sljit_sw src2w)
2079
{
2080
sljit_s32 dst_reg, src2_tmp_reg, flags;
2081
2082
CHECK_ERROR();
2083
CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2084
ADJUST_LOCAL_OFFSET(dst, dstw);
2085
ADJUST_LOCAL_OFFSET(src1, src1w);
2086
ADJUST_LOCAL_OFFSET(src2, src2w);
2087
2088
dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2089
flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
2090
2091
if (dst == TMP_REG1)
2092
flags |= UNUSED_RETURN;
2093
2094
if (src2 == SLJIT_IMM)
2095
flags |= ARG2_IMM;
2096
else if (src2 & SLJIT_MEM) {
2097
src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
2098
emit_op_mem(compiler, WORD_SIZE, src2_tmp_reg, src2, src2w, TMP_REG1);
2099
src2w = src2_tmp_reg;
2100
} else
2101
src2w = src2;
2102
2103
if (src1 == SLJIT_IMM)
2104
flags |= ARG1_IMM;
2105
else if (src1 & SLJIT_MEM) {
2106
emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1);
2107
src1w = TMP_REG1;
2108
} else
2109
src1w = src1;
2110
2111
emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, (sljit_uw)src1w, (sljit_uw)src2w);
2112
2113
if (!(dst & SLJIT_MEM))
2114
return SLJIT_SUCCESS;
2115
return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1);
2116
}
2117
2118
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2119
sljit_s32 src1, sljit_sw src1w,
2120
sljit_s32 src2, sljit_sw src2w)
2121
{
2122
CHECK_ERROR();
2123
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2124
2125
SLJIT_SKIP_CHECKS(compiler);
2126
return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2127
}
2128
2129
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2130
sljit_s32 dst_reg,
2131
sljit_s32 src1, sljit_sw src1w,
2132
sljit_s32 src2, sljit_sw src2w)
2133
{
2134
CHECK_ERROR();
2135
CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2136
2137
switch (GET_OPCODE(op)) {
2138
case SLJIT_MULADD:
2139
SLJIT_SKIP_CHECKS(compiler);
2140
return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w);
2141
}
2142
2143
return SLJIT_SUCCESS;
2144
}
2145
2146
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2147
sljit_s32 dst_reg,
2148
sljit_s32 src1_reg,
2149
sljit_s32 src2_reg,
2150
sljit_s32 src3, sljit_sw src3w)
2151
{
2152
sljit_s32 is_left;
2153
2154
CHECK_ERROR();
2155
CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2156
2157
op = GET_OPCODE(op);
2158
is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
2159
2160
if (src1_reg == src2_reg) {
2161
SLJIT_SKIP_CHECKS(compiler);
2162
return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w);
2163
}
2164
2165
ADJUST_LOCAL_OFFSET(src3, src3w);
2166
2167
if (src3 == SLJIT_IMM) {
2168
src3w &= 0x1f;
2169
2170
if (src3w == 0)
2171
return SLJIT_SUCCESS;
2172
2173
if (IS_2_LO_REGS(dst_reg, src1_reg))
2174
FAIL_IF(push_inst16(compiler, (is_left ? LSLSI : LSRSI) | RD3(dst_reg) | RN3(src1_reg) | ((sljit_ins)src3w << 6)));
2175
else
2176
FAIL_IF(push_inst32(compiler, (is_left ? LSL_WI : LSR_WI) | RD4(dst_reg) | RM4(src1_reg) | IMM5(src3w)));
2177
2178
src3w = (src3w ^ 0x1f) + 1;
2179
return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(src2_reg) | (is_left ? 0x10 : 0x0) | IMM5(src3w));
2180
}
2181
2182
if (src3 & SLJIT_MEM) {
2183
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src3, src3w, TMP_REG2));
2184
src3 = TMP_REG2;
2185
}
2186
2187
if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) {
2188
FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(src3) | 0x1f));
2189
src3 = TMP_REG2;
2190
}
2191
2192
if (dst_reg == src1_reg && IS_2_LO_REGS(dst_reg, src3))
2193
FAIL_IF(push_inst16(compiler, (is_left ? LSLS : LSRS) | RD3(dst_reg) | RN3(src3)));
2194
else
2195
FAIL_IF(push_inst32(compiler, (is_left ? LSL_W : LSR_W) | RD4(dst_reg) | RN4(src1_reg) | RM4(src3)));
2196
2197
FAIL_IF(push_inst32(compiler, (is_left ? LSR_WI : LSL_WI) | RD4(TMP_REG1) | RM4(src2_reg) | (1 << 6)));
2198
FAIL_IF(push_inst32(compiler, EORI | RD4(TMP_REG2) | RN4(src3) | 0x1f));
2199
FAIL_IF(push_inst32(compiler, (is_left ? LSR_W : LSL_W) | RD4(TMP_REG1) | RN4(TMP_REG1) | RM4(TMP_REG2)));
2200
return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(TMP_REG1));
2201
}
2202
2203
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2_shift(struct sljit_compiler *compiler, sljit_s32 op,
2204
sljit_s32 dst, sljit_sw dstw,
2205
sljit_s32 src1, sljit_sw src1w,
2206
sljit_s32 src2, sljit_sw src2w,
2207
sljit_sw shift_arg)
2208
{
2209
sljit_s32 dst_r, tmp_r;
2210
2211
CHECK_ERROR();
2212
CHECK(check_sljit_emit_op2_shift(compiler, op, dst, dstw, src1, src1w, src2, src2w, shift_arg));
2213
ADJUST_LOCAL_OFFSET(dst, dstw);
2214
ADJUST_LOCAL_OFFSET(src1, src1w);
2215
ADJUST_LOCAL_OFFSET(src2, src2w);
2216
2217
shift_arg &= 0x1f;
2218
2219
if (src2 == SLJIT_IMM) {
2220
src2w = src2w << shift_arg;
2221
shift_arg = 0;
2222
}
2223
2224
if (shift_arg == 0) {
2225
SLJIT_SKIP_CHECKS(compiler);
2226
return sljit_emit_op2(compiler, GET_OPCODE(op), dst, dstw, src1, src1w, src2, src2w);
2227
}
2228
2229
if (src1 == SLJIT_IMM) {
2230
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2231
src1 = TMP_REG1;
2232
} else if (src1 & SLJIT_MEM) {
2233
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1));
2234
src1 = TMP_REG1;
2235
}
2236
2237
if (src2 & SLJIT_MEM) {
2238
tmp_r = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
2239
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, tmp_r, src2, src2w, tmp_r));
2240
src2 = tmp_r;
2241
}
2242
2243
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2244
FAIL_IF(push_inst32(compiler, ADD_W | RD4(dst_r) | RN4(src1) | RM4(src2) | ((sljit_ins)(shift_arg & 0x3) << 6) | ((sljit_ins)(shift_arg & 0x1c) << 10)));
2245
2246
if (dst & SLJIT_MEM)
2247
return emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG1);
2248
return SLJIT_SUCCESS;
2249
}
2250
2251
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2252
sljit_s32 src, sljit_sw srcw)
2253
{
2254
CHECK_ERROR();
2255
CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2256
ADJUST_LOCAL_OFFSET(src, srcw);
2257
2258
switch (op) {
2259
case SLJIT_FAST_RETURN:
2260
SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2261
2262
if (FAST_IS_REG(src)) {
2263
if (src != TMP_REG2)
2264
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, src)));
2265
} else
2266
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2));
2267
2268
return push_inst16(compiler, BX | RN3(TMP_REG2));
2269
case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2270
return SLJIT_SUCCESS;
2271
case SLJIT_PREFETCH_L1:
2272
case SLJIT_PREFETCH_L2:
2273
case SLJIT_PREFETCH_L3:
2274
case SLJIT_PREFETCH_ONCE:
2275
return emit_op_mem(compiler, PRELOAD, TMP_PC, src, srcw, TMP_REG1);
2276
}
2277
2278
return SLJIT_SUCCESS;
2279
}
2280
2281
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2282
sljit_s32 dst, sljit_sw dstw)
2283
{
2284
sljit_s32 size, dst_r;
2285
2286
CHECK_ERROR();
2287
CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2288
ADJUST_LOCAL_OFFSET(dst, dstw);
2289
2290
switch (op) {
2291
case SLJIT_FAST_ENTER:
2292
SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2293
2294
if (FAST_IS_REG(dst)) {
2295
if (dst == TMP_REG2)
2296
return SLJIT_SUCCESS;
2297
return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2));
2298
}
2299
break;
2300
case SLJIT_GET_RETURN_ADDRESS:
2301
size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
2302
2303
if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
2304
/* The size of pc is not added above. */
2305
if ((size & SSIZE_OF(sw)) == 0)
2306
size += SSIZE_OF(sw);
2307
2308
size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64);
2309
}
2310
2311
SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0);
2312
2313
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2314
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1));
2315
break;
2316
}
2317
2318
if (dst & SLJIT_MEM)
2319
return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, TMP_REG1);
2320
2321
return SLJIT_SUCCESS;
2322
}
2323
2324
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2325
{
2326
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2327
2328
if (type == SLJIT_GP_REGISTER)
2329
return reg_map[reg];
2330
2331
if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
2332
return freg_map[reg];
2333
2334
if (type == SLJIT_SIMD_REG_128)
2335
return freg_map[reg] & ~0x1;
2336
2337
return -1;
2338
}
2339
2340
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2341
void *instruction, sljit_u32 size)
2342
{
2343
CHECK_ERROR();
2344
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2345
2346
if (size == 2)
2347
return push_inst16(compiler, *(sljit_u16*)instruction);
2348
return push_inst32(compiler, *(sljit_ins*)instruction);
2349
}
2350
2351
/* --------------------------------------------------------------------- */
2352
/* Floating point operators */
2353
/* --------------------------------------------------------------------- */
2354
2355
#define FPU_LOAD (1 << 20)
2356
2357
static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
2358
{
2359
sljit_uw imm;
2360
sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD));
2361
2362
SLJIT_ASSERT(arg & SLJIT_MEM);
2363
2364
/* Fast loads and stores. */
2365
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
2366
FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 6)));
2367
arg = SLJIT_MEM | TMP_REG1;
2368
argw = 0;
2369
}
2370
2371
if ((arg & REG_MASK) && (argw & 0x3) == 0) {
2372
if (!(argw & ~0x3fc))
2373
return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)argw >> 2));
2374
if (!(-argw & ~0x3fc))
2375
return push_inst32(compiler, inst | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)-argw >> 2));
2376
}
2377
2378
if (arg & REG_MASK) {
2379
if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
2380
FAIL_IF(compiler->error);
2381
return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg));
2382
}
2383
2384
imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc);
2385
if (imm != INVALID_IMM) {
2386
FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
2387
return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
2388
}
2389
2390
imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc);
2391
if (imm != INVALID_IMM) {
2392
argw = -argw;
2393
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
2394
return push_inst32(compiler, inst | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
2395
}
2396
}
2397
2398
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw));
2399
if (arg & REG_MASK)
2400
FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK))));
2401
return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg));
2402
}
2403
2404
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2405
sljit_s32 dst, sljit_sw dstw,
2406
sljit_s32 src, sljit_sw srcw)
2407
{
2408
op ^= SLJIT_32;
2409
2410
if (src & SLJIT_MEM) {
2411
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw));
2412
src = TMP_FREG1;
2413
}
2414
2415
FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_32) | VD4(TMP_FREG1) | VM4(src)));
2416
2417
if (FAST_IS_REG(dst))
2418
return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | VN4(TMP_FREG1));
2419
2420
/* Store the integer value from a VFP register. */
2421
return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
2422
}
2423
2424
static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
2425
sljit_s32 dst, sljit_sw dstw,
2426
sljit_s32 src, sljit_sw srcw)
2427
{
2428
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2429
2430
if (FAST_IS_REG(src))
2431
FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | VN4(TMP_FREG1)));
2432
else if (src & SLJIT_MEM) {
2433
/* Load the integer value into a VFP register. */
2434
FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
2435
}
2436
else {
2437
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
2438
FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | VN4(TMP_FREG1)));
2439
}
2440
2441
FAIL_IF(push_inst32(compiler, ins | VD4(dst_r) | VM4(TMP_FREG1)));
2442
2443
if (dst & SLJIT_MEM)
2444
return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw);
2445
return SLJIT_SUCCESS;
2446
}
2447
2448
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2449
sljit_s32 dst, sljit_sw dstw,
2450
sljit_s32 src, sljit_sw srcw)
2451
{
2452
return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2453
}
2454
2455
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2456
sljit_s32 dst, sljit_sw dstw,
2457
sljit_s32 src, sljit_sw srcw)
2458
{
2459
return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2460
}
2461
2462
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2463
sljit_s32 src1, sljit_sw src1w,
2464
sljit_s32 src2, sljit_sw src2w)
2465
{
2466
op ^= SLJIT_32;
2467
2468
if (src1 & SLJIT_MEM) {
2469
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2470
src1 = TMP_FREG1;
2471
}
2472
2473
if (src2 & SLJIT_MEM) {
2474
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2475
src2 = TMP_FREG2;
2476
}
2477
2478
FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_32) | VD4(src1) | VM4(src2)));
2479
FAIL_IF(push_inst32(compiler, VMRS));
2480
2481
if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
2482
return SLJIT_SUCCESS;
2483
2484
FAIL_IF(push_inst16(compiler, IT | (0x6 << 4) | 0x8));
2485
return push_inst16(compiler, CMP /* Rm, Rn = r0 */);
2486
}
2487
2488
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2489
sljit_s32 dst, sljit_sw dstw,
2490
sljit_s32 src, sljit_sw srcw)
2491
{
2492
sljit_s32 dst_r;
2493
2494
CHECK_ERROR();
2495
2496
SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error);
2497
SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2498
2499
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2500
2501
if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
2502
op ^= SLJIT_32;
2503
2504
if (src & SLJIT_MEM) {
2505
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw));
2506
src = dst_r;
2507
}
2508
2509
switch (GET_OPCODE(op)) {
2510
case SLJIT_MOV_F64:
2511
if (src != dst_r) {
2512
if (!(dst & SLJIT_MEM))
2513
FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2514
else
2515
dst_r = src;
2516
}
2517
break;
2518
case SLJIT_NEG_F64:
2519
FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2520
break;
2521
case SLJIT_ABS_F64:
2522
FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2523
break;
2524
case SLJIT_CONV_F64_FROM_F32:
2525
FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2526
op ^= SLJIT_32;
2527
break;
2528
}
2529
2530
if (dst & SLJIT_MEM)
2531
return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw);
2532
return SLJIT_SUCCESS;
2533
}
2534
2535
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2536
sljit_s32 dst, sljit_sw dstw,
2537
sljit_s32 src1, sljit_sw src1w,
2538
sljit_s32 src2, sljit_sw src2w)
2539
{
2540
sljit_s32 dst_r;
2541
2542
CHECK_ERROR();
2543
CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2544
ADJUST_LOCAL_OFFSET(dst, dstw);
2545
ADJUST_LOCAL_OFFSET(src1, src1w);
2546
ADJUST_LOCAL_OFFSET(src2, src2w);
2547
2548
op ^= SLJIT_32;
2549
2550
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2551
if (src1 & SLJIT_MEM) {
2552
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2553
src1 = TMP_FREG1;
2554
}
2555
if (src2 & SLJIT_MEM) {
2556
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2557
src2 = TMP_FREG2;
2558
}
2559
2560
switch (GET_OPCODE(op)) {
2561
case SLJIT_ADD_F64:
2562
FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2563
break;
2564
case SLJIT_SUB_F64:
2565
FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2566
break;
2567
case SLJIT_MUL_F64:
2568
FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2569
break;
2570
case SLJIT_DIV_F64:
2571
FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2572
break;
2573
case SLJIT_COPYSIGN_F64:
2574
FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(src2) | RT4(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0)));
2575
FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src1)));
2576
FAIL_IF(push_inst32(compiler, CMPI_W | RN4(TMP_REG1) | 0));
2577
FAIL_IF(push_inst16(compiler, IT | (0xb << 4) | 0x8));
2578
return push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(dst_r));
2579
}
2580
2581
if (!(dst & SLJIT_MEM))
2582
return SLJIT_SUCCESS;
2583
return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw);
2584
}
2585
2586
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2587
sljit_s32 freg, sljit_f32 value)
2588
{
2589
#if defined(__ARM_NEON) && __ARM_NEON
2590
sljit_u32 exp;
2591
sljit_ins ins;
2592
#endif /* NEON */
2593
union {
2594
sljit_u32 imm;
2595
sljit_f32 value;
2596
} u;
2597
2598
CHECK_ERROR();
2599
CHECK(check_sljit_emit_fset32(compiler, freg, value));
2600
2601
u.value = value;
2602
2603
#if defined(__ARM_NEON) && __ARM_NEON
2604
if ((u.imm << (32 - 19)) == 0) {
2605
exp = (u.imm >> (23 + 2)) & 0x3f;
2606
2607
if (exp == 0x20 || exp == 0x1f) {
2608
ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f);
2609
return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
2610
}
2611
}
2612
#endif /* NEON */
2613
2614
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2615
return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG1));
2616
}
2617
2618
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2619
sljit_s32 freg, sljit_f64 value)
2620
{
2621
#if defined(__ARM_NEON) && __ARM_NEON
2622
sljit_u32 exp;
2623
sljit_ins ins;
2624
#endif /* NEON */
2625
union {
2626
sljit_u32 imm[2];
2627
sljit_f64 value;
2628
} u;
2629
2630
CHECK_ERROR();
2631
CHECK(check_sljit_emit_fset64(compiler, freg, value));
2632
2633
u.value = value;
2634
2635
#if defined(__ARM_NEON) && __ARM_NEON
2636
if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
2637
exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
2638
2639
if (exp == 0x100 || exp == 0xff) {
2640
ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f);
2641
return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
2642
}
2643
}
2644
#endif /* NEON */
2645
2646
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
2647
if (u.imm[0] == u.imm[1])
2648
return push_inst32(compiler, VMOV2 | RN4(TMP_REG1) | RT4(TMP_REG1) | VM4(freg));
2649
2650
FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1]));
2651
return push_inst32(compiler, VMOV2 | RN4(TMP_REG2) | RT4(TMP_REG1) | VM4(freg));
2652
}
2653
2654
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2655
sljit_s32 freg, sljit_s32 reg)
2656
{
2657
sljit_s32 reg2;
2658
sljit_ins inst;
2659
2660
CHECK_ERROR();
2661
CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2662
2663
if (reg & REG_PAIR_MASK) {
2664
reg2 = REG_PAIR_SECOND(reg);
2665
reg = REG_PAIR_FIRST(reg);
2666
2667
inst = VMOV2 | RN4(reg) | RT4(reg2) | VM4(freg);
2668
} else {
2669
inst = VMOV | VN4(freg) | RT4(reg);
2670
2671
if (!(op & SLJIT_32))
2672
inst |= 1 << 7;
2673
}
2674
2675
if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64)
2676
inst |= 1 << 20;
2677
2678
return push_inst32(compiler, inst);
2679
}
2680
2681
/* --------------------------------------------------------------------- */
2682
/* Conditional instructions */
2683
/* --------------------------------------------------------------------- */
2684
2685
static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type)
2686
{
2687
switch (type) {
2688
case SLJIT_EQUAL:
2689
case SLJIT_ATOMIC_STORED:
2690
case SLJIT_F_EQUAL:
2691
case SLJIT_ORDERED_EQUAL:
2692
case SLJIT_UNORDERED_OR_EQUAL:
2693
return 0x0;
2694
2695
case SLJIT_NOT_EQUAL:
2696
case SLJIT_ATOMIC_NOT_STORED:
2697
case SLJIT_F_NOT_EQUAL:
2698
case SLJIT_UNORDERED_OR_NOT_EQUAL:
2699
case SLJIT_ORDERED_NOT_EQUAL:
2700
return 0x1;
2701
2702
case SLJIT_CARRY:
2703
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2704
return 0x2;
2705
SLJIT_FALLTHROUGH
2706
2707
case SLJIT_LESS:
2708
return 0x3;
2709
2710
case SLJIT_NOT_CARRY:
2711
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2712
return 0x3;
2713
SLJIT_FALLTHROUGH
2714
2715
case SLJIT_GREATER_EQUAL:
2716
return 0x2;
2717
2718
case SLJIT_GREATER:
2719
case SLJIT_UNORDERED_OR_GREATER:
2720
return 0x8;
2721
2722
case SLJIT_LESS_EQUAL:
2723
case SLJIT_F_LESS_EQUAL:
2724
case SLJIT_ORDERED_LESS_EQUAL:
2725
return 0x9;
2726
2727
case SLJIT_SIG_LESS:
2728
case SLJIT_UNORDERED_OR_LESS:
2729
return 0xb;
2730
2731
case SLJIT_SIG_GREATER_EQUAL:
2732
case SLJIT_F_GREATER_EQUAL:
2733
case SLJIT_ORDERED_GREATER_EQUAL:
2734
return 0xa;
2735
2736
case SLJIT_SIG_GREATER:
2737
case SLJIT_F_GREATER:
2738
case SLJIT_ORDERED_GREATER:
2739
return 0xc;
2740
2741
case SLJIT_SIG_LESS_EQUAL:
2742
case SLJIT_UNORDERED_OR_LESS_EQUAL:
2743
return 0xd;
2744
2745
case SLJIT_OVERFLOW:
2746
if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2747
return 0x1;
2748
SLJIT_FALLTHROUGH
2749
2750
case SLJIT_UNORDERED:
2751
return 0x6;
2752
2753
case SLJIT_NOT_OVERFLOW:
2754
if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2755
return 0x0;
2756
SLJIT_FALLTHROUGH
2757
2758
case SLJIT_ORDERED:
2759
return 0x7;
2760
2761
case SLJIT_F_LESS:
2762
case SLJIT_ORDERED_LESS:
2763
return 0x4;
2764
2765
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2766
return 0x5;
2767
2768
default: /* SLJIT_JUMP */
2769
SLJIT_UNREACHABLE();
2770
return 0xe;
2771
}
2772
}
2773
2774
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2775
{
2776
struct sljit_label *label;
2777
2778
CHECK_ERROR_PTR();
2779
CHECK_PTR(check_sljit_emit_label(compiler));
2780
2781
if (compiler->last_label && compiler->last_label->size == compiler->size)
2782
return compiler->last_label;
2783
2784
label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2785
PTR_FAIL_IF(!label);
2786
set_label(label, compiler);
2787
return label;
2788
}
2789
2790
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_aligned_label(struct sljit_compiler *compiler,
2791
sljit_s32 alignment, struct sljit_read_only_buffer *buffers)
2792
{
2793
sljit_uw mask, i;
2794
struct sljit_label *label;
2795
struct sljit_label *next_label;
2796
struct sljit_extended_label *ext_label;
2797
2798
CHECK_ERROR_PTR();
2799
CHECK_PTR(check_sljit_emit_aligned_label(compiler, alignment, buffers));
2800
2801
sljit_reset_read_only_buffers(buffers);
2802
2803
if (alignment <= SLJIT_LABEL_ALIGN_2) {
2804
SLJIT_SKIP_CHECKS(compiler);
2805
label = sljit_emit_label(compiler);
2806
PTR_FAIL_IF(!label);
2807
} else {
2808
/* The used space is filled with NOPs. */
2809
mask = ((sljit_uw)1 << alignment) - sizeof(sljit_u16);
2810
2811
for (i = (mask >> 1); i != 0; i--)
2812
PTR_FAIL_IF(push_inst16(compiler, NOP));
2813
2814
ext_label = (struct sljit_extended_label*)ensure_abuf(compiler, sizeof(struct sljit_extended_label));
2815
PTR_FAIL_IF(!ext_label);
2816
set_extended_label(ext_label, compiler, SLJIT_LABEL_ALIGNED, mask);
2817
label = &ext_label->label;
2818
}
2819
2820
if (buffers == NULL)
2821
return label;
2822
2823
next_label = label;
2824
2825
while (1) {
2826
buffers->u.label = next_label;
2827
2828
for (i = (buffers->size + 1) >> 1; i > 0; i--)
2829
PTR_FAIL_IF(push_inst16(compiler, NOP));
2830
2831
buffers = buffers->next;
2832
2833
if (buffers == NULL)
2834
break;
2835
2836
SLJIT_SKIP_CHECKS(compiler);
2837
next_label = sljit_emit_label(compiler);
2838
PTR_FAIL_IF(!next_label);
2839
}
2840
2841
return label;
2842
}
2843
2844
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2845
{
2846
struct sljit_jump *jump;
2847
sljit_ins cc;
2848
2849
CHECK_ERROR_PTR();
2850
CHECK_PTR(check_sljit_emit_jump(compiler, type));
2851
2852
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2853
PTR_FAIL_IF(!jump);
2854
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2855
type &= 0xff;
2856
2857
if (type < SLJIT_JUMP) {
2858
jump->flags |= IS_COND;
2859
cc = get_cc(compiler, type);
2860
jump->flags |= cc << 8;
2861
PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
2862
}
2863
2864
jump->addr = compiler->size;
2865
if (type <= SLJIT_JUMP)
2866
PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1)));
2867
else {
2868
jump->flags |= IS_BL;
2869
PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1)));
2870
}
2871
2872
/* Maximum number of instructions required for generating a constant. */
2873
compiler->size += JUMP_MAX_SIZE - 1;
2874
return jump;
2875
}
2876
2877
#ifdef __SOFTFP__
2878
2879
static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space)
2880
{
2881
sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN;
2882
sljit_u32 offset = 0;
2883
sljit_u32 word_arg_offset = 0;
2884
sljit_u32 float_arg_count = 0;
2885
sljit_s32 types = 0;
2886
sljit_u32 src_offset = 4 * sizeof(sljit_sw);
2887
sljit_u8 offsets[4];
2888
sljit_u8 *offset_ptr = offsets;
2889
2890
if (src && FAST_IS_REG(*src))
2891
src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw);
2892
2893
arg_types >>= SLJIT_ARG_SHIFT;
2894
2895
while (arg_types) {
2896
types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
2897
2898
switch (arg_types & SLJIT_ARG_MASK) {
2899
case SLJIT_ARG_TYPE_F64:
2900
if (offset & 0x7)
2901
offset += sizeof(sljit_sw);
2902
*offset_ptr++ = (sljit_u8)offset;
2903
offset += sizeof(sljit_f64);
2904
float_arg_count++;
2905
break;
2906
case SLJIT_ARG_TYPE_F32:
2907
*offset_ptr++ = (sljit_u8)offset;
2908
offset += sizeof(sljit_f32);
2909
float_arg_count++;
2910
break;
2911
default:
2912
*offset_ptr++ = (sljit_u8)offset;
2913
offset += sizeof(sljit_sw);
2914
word_arg_offset += sizeof(sljit_sw);
2915
break;
2916
}
2917
2918
arg_types >>= SLJIT_ARG_SHIFT;
2919
}
2920
2921
if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) {
2922
/* Keep lr register on the stack. */
2923
if (is_tail_call)
2924
offset += sizeof(sljit_sw);
2925
2926
offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7;
2927
2928
*extra_space = offset;
2929
2930
if (is_tail_call)
2931
FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset));
2932
else
2933
FAIL_IF(push_inst16(compiler, SUB_SP_I | (offset >> 2)));
2934
} else {
2935
if (is_tail_call)
2936
FAIL_IF(emit_stack_frame_release(compiler, -1));
2937
*extra_space = 0;
2938
}
2939
2940
SLJIT_ASSERT(reg_map[TMP_REG1] == 12);
2941
2942
/* Process arguments in reversed direction. */
2943
while (types) {
2944
switch (types & SLJIT_ARG_MASK) {
2945
case SLJIT_ARG_TYPE_F64:
2946
float_arg_count--;
2947
offset = *(--offset_ptr);
2948
2949
SLJIT_ASSERT((offset & 0x7) == 0);
2950
2951
if (offset < 4 * sizeof(sljit_sw)) {
2952
if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) {
2953
FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2954
*src = TMP_REG1;
2955
}
2956
FAIL_IF(push_inst32(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
2957
} else
2958
FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800100 | RN4(SLJIT_SP)
2959
| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2960
break;
2961
case SLJIT_ARG_TYPE_F32:
2962
float_arg_count--;
2963
offset = *(--offset_ptr);
2964
2965
if (offset < 4 * sizeof(sljit_sw)) {
2966
if (src_offset == offset) {
2967
FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2968
*src = TMP_REG1;
2969
}
2970
FAIL_IF(push_inst32(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10)));
2971
} else
2972
FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800000 | RN4(SLJIT_SP)
2973
| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2974
break;
2975
default:
2976
word_arg_offset -= sizeof(sljit_sw);
2977
offset = *(--offset_ptr);
2978
2979
SLJIT_ASSERT(offset >= word_arg_offset);
2980
2981
if (offset != word_arg_offset) {
2982
if (offset < 4 * sizeof(sljit_sw)) {
2983
if (src_offset == offset) {
2984
FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2985
*src = TMP_REG1;
2986
}
2987
else if (src_offset == word_arg_offset) {
2988
*src = (sljit_s32)(1 + (offset >> 2));
2989
src_offset = offset;
2990
}
2991
FAIL_IF(push_inst16(compiler, MOV | (offset >> 2) | (word_arg_offset << 1)));
2992
} else
2993
FAIL_IF(push_inst16(compiler, STR_SP | (word_arg_offset << 6) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2994
}
2995
break;
2996
}
2997
2998
types >>= SLJIT_ARG_SHIFT;
2999
}
3000
3001
return SLJIT_SUCCESS;
3002
}
3003
3004
static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
3005
{
3006
if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
3007
FAIL_IF(push_inst32(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
3008
if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32)
3009
FAIL_IF(push_inst32(compiler, VMOV | (0 << 16) | (0 << 12)));
3010
3011
return SLJIT_SUCCESS;
3012
}
3013
3014
#else
3015
3016
static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
3017
{
3018
sljit_u32 offset = SLJIT_FR0;
3019
sljit_u32 new_offset = SLJIT_FR0;
3020
sljit_u32 f32_offset = 0;
3021
3022
/* Remove return value. */
3023
arg_types >>= SLJIT_ARG_SHIFT;
3024
3025
while (arg_types) {
3026
switch (arg_types & SLJIT_ARG_MASK) {
3027
case SLJIT_ARG_TYPE_F64:
3028
if (offset != new_offset)
3029
FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(new_offset) | VM4(offset)));
3030
3031
new_offset++;
3032
offset++;
3033
break;
3034
case SLJIT_ARG_TYPE_F32:
3035
if (f32_offset != 0) {
3036
FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(f32_offset) | VM4(offset)));
3037
f32_offset = 0;
3038
} else {
3039
if (offset != new_offset)
3040
FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(new_offset) | VM4(offset)));
3041
f32_offset = new_offset;
3042
new_offset++;
3043
}
3044
offset++;
3045
break;
3046
}
3047
arg_types >>= SLJIT_ARG_SHIFT;
3048
}
3049
3050
return SLJIT_SUCCESS;
3051
}
3052
3053
#endif
3054
3055
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3056
sljit_s32 arg_types)
3057
{
3058
#ifdef __SOFTFP__
3059
struct sljit_jump *jump;
3060
sljit_u32 extra_space = (sljit_u32)type;
3061
#endif
3062
3063
CHECK_ERROR_PTR();
3064
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3065
3066
#ifdef __SOFTFP__
3067
if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3068
PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space));
3069
SLJIT_ASSERT((extra_space & 0x7) == 0);
3070
3071
if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3072
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3073
3074
SLJIT_SKIP_CHECKS(compiler);
3075
jump = sljit_emit_jump(compiler, type);
3076
PTR_FAIL_IF(jump == NULL);
3077
3078
if (extra_space > 0) {
3079
if (type & SLJIT_CALL_RETURN)
3080
PTR_FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2)
3081
| RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw))));
3082
3083
PTR_FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2)));
3084
3085
if (type & SLJIT_CALL_RETURN) {
3086
PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG2)));
3087
return jump;
3088
}
3089
}
3090
3091
SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3092
PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
3093
return jump;
3094
}
3095
#endif /* __SOFTFP__ */
3096
3097
if (type & SLJIT_CALL_RETURN) {
3098
/* ldmia sp!, {..., lr} */
3099
PTR_FAIL_IF(emit_stack_frame_release(compiler, -1));
3100
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3101
}
3102
3103
#ifndef __SOFTFP__
3104
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3105
PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3106
#endif /* !__SOFTFP__ */
3107
3108
SLJIT_SKIP_CHECKS(compiler);
3109
return sljit_emit_jump(compiler, type);
3110
}
3111
3112
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3113
{
3114
struct sljit_jump *jump;
3115
3116
CHECK_ERROR();
3117
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3118
ADJUST_LOCAL_OFFSET(src, srcw);
3119
3120
SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
3121
3122
if (src != SLJIT_IMM) {
3123
if (FAST_IS_REG(src)) {
3124
SLJIT_ASSERT(reg_map[src] != 14);
3125
return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src));
3126
}
3127
3128
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw, TMP_REG1));
3129
if (type >= SLJIT_FAST_CALL)
3130
return push_inst16(compiler, BLX | RN3(TMP_REG1));
3131
}
3132
3133
/* These jumps are converted to jump/call instructions when possible. */
3134
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3135
FAIL_IF(!jump);
3136
set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
3137
jump->u.target = (sljit_uw)srcw;
3138
3139
jump->addr = compiler->size;
3140
/* Maximum number of instructions required for generating a constant. */
3141
compiler->size += JUMP_MAX_SIZE - 1;
3142
return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1));
3143
}
3144
3145
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3146
sljit_s32 arg_types,
3147
sljit_s32 src, sljit_sw srcw)
3148
{
3149
#ifdef __SOFTFP__
3150
sljit_u32 extra_space = (sljit_u32)type;
3151
#endif
3152
3153
CHECK_ERROR();
3154
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3155
3156
if (src & SLJIT_MEM) {
3157
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
3158
src = TMP_REG1;
3159
}
3160
3161
if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
3162
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src)));
3163
src = TMP_REG1;
3164
}
3165
3166
#ifdef __SOFTFP__
3167
if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3168
FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space));
3169
SLJIT_ASSERT((extra_space & 0x7) == 0);
3170
3171
if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3172
type = SLJIT_JUMP;
3173
3174
SLJIT_SKIP_CHECKS(compiler);
3175
FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
3176
3177
if (extra_space > 0) {
3178
if (type & SLJIT_CALL_RETURN)
3179
FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2)
3180
| RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw))));
3181
3182
FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2)));
3183
3184
if (type & SLJIT_CALL_RETURN)
3185
return push_inst16(compiler, BX | RN3(TMP_REG2));
3186
}
3187
3188
SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3189
return softfloat_post_call_with_args(compiler, arg_types);
3190
}
3191
#endif /* __SOFTFP__ */
3192
3193
if (type & SLJIT_CALL_RETURN) {
3194
/* ldmia sp!, {..., lr} */
3195
FAIL_IF(emit_stack_frame_release(compiler, -1));
3196
type = SLJIT_JUMP;
3197
}
3198
3199
#ifndef __SOFTFP__
3200
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3201
FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3202
#endif /* !__SOFTFP__ */
3203
3204
SLJIT_SKIP_CHECKS(compiler);
3205
return sljit_emit_ijump(compiler, type, src, srcw);
3206
}
3207
3208
#ifdef __SOFTFP__
3209
3210
static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
3211
{
3212
if (compiler->options & SLJIT_ENTER_REG_ARG) {
3213
if (src == SLJIT_FR0)
3214
return SLJIT_SUCCESS;
3215
3216
SLJIT_SKIP_CHECKS(compiler);
3217
return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
3218
}
3219
3220
if (FAST_IS_REG(src)) {
3221
if (op & SLJIT_32)
3222
return push_inst32(compiler, VMOV | (1 << 20) | VN4(src) | RT4(SLJIT_R0));
3223
return push_inst32(compiler, VMOV2 | (1 << 20) | VM4(src) | RT4(SLJIT_R0) | RN4(SLJIT_R1));
3224
}
3225
3226
SLJIT_SKIP_CHECKS(compiler);
3227
3228
if (op & SLJIT_32)
3229
return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw);
3230
return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw);
3231
}
3232
3233
#endif /* __SOFTFP__ */
3234
3235
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3236
sljit_s32 dst, sljit_sw dstw,
3237
sljit_s32 type)
3238
{
3239
sljit_s32 dst_r, flags = GET_ALL_FLAGS(op);
3240
sljit_ins cc;
3241
3242
CHECK_ERROR();
3243
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3244
ADJUST_LOCAL_OFFSET(dst, dstw);
3245
3246
op = GET_OPCODE(op);
3247
cc = get_cc(compiler, type);
3248
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3249
3250
if (op < SLJIT_ADD) {
3251
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
3252
if (reg_map[dst_r] > 7) {
3253
FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1));
3254
FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0));
3255
} else {
3256
/* The movsi (immediate) instruction does not set flags in IT block. */
3257
FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1));
3258
FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0));
3259
}
3260
if (!(dst & SLJIT_MEM))
3261
return SLJIT_SUCCESS;
3262
return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2);
3263
}
3264
3265
if (dst & SLJIT_MEM)
3266
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
3267
3268
if (op == SLJIT_AND) {
3269
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
3270
FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 1));
3271
FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 0));
3272
}
3273
else {
3274
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3275
FAIL_IF(push_inst32(compiler, ((op == SLJIT_OR) ? ORRI : EORI) | RN4(dst_r) | RD4(dst_r) | 1));
3276
}
3277
3278
if (dst & SLJIT_MEM)
3279
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2));
3280
3281
if (!(flags & SLJIT_SET_Z))
3282
return SLJIT_SUCCESS;
3283
3284
/* The condition must always be set, even if the ORR/EORI is not executed above. */
3285
return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r));
3286
}
3287
3288
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3289
sljit_s32 dst_reg,
3290
sljit_s32 src1, sljit_sw src1w,
3291
sljit_s32 src2_reg)
3292
{
3293
sljit_uw cc, tmp, tmp2;
3294
3295
CHECK_ERROR();
3296
CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3297
3298
ADJUST_LOCAL_OFFSET(src1, src1w);
3299
3300
if (src2_reg != dst_reg && src1 == dst_reg) {
3301
src1 = src2_reg;
3302
src1w = 0;
3303
src2_reg = dst_reg;
3304
if (!(type & SLJIT_COMPARE_SELECT))
3305
type ^= 0x1;
3306
}
3307
3308
if (src1 & SLJIT_MEM) {
3309
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG1));
3310
3311
if (src2_reg != dst_reg) {
3312
src1 = src2_reg;
3313
src1w = 0;
3314
if (!(type & SLJIT_COMPARE_SELECT))
3315
type ^= 0x1;
3316
} else {
3317
src1 = TMP_REG1;
3318
src1w = 0;
3319
}
3320
} else if (dst_reg != src2_reg)
3321
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(dst_reg, src2_reg)));
3322
3323
if ((type & SLJIT_COMPARE_SELECT))
3324
type ^= 0x1;
3325
cc = get_cc(compiler, type & ~(SLJIT_32 | SLJIT_COMPARE_SELECT));
3326
3327
if (src1 == SLJIT_IMM && (type & SLJIT_COMPARE_SELECT)) {
3328
tmp = (sljit_uw)src1w;
3329
if (tmp <= 0xff && reg_map[dst_reg] <= 7) {
3330
if (type & SLJIT_COMPARE_SELECT)
3331
FAIL_IF(push_inst16(compiler, CMPI | IMM8(tmp) | RDN3(dst_reg)));
3332
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3333
return push_inst16(compiler, MOVI | IMM8(tmp) | RDN3(dst_reg));
3334
}
3335
3336
tmp = get_imm((sljit_uw)src1w);
3337
if (tmp != INVALID_IMM) {
3338
if (type & SLJIT_COMPARE_SELECT)
3339
FAIL_IF(push_inst32(compiler, CMPI_W | RN4(dst_reg) | tmp));
3340
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3341
return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp);
3342
}
3343
3344
tmp = get_imm(~(sljit_uw)src1w);
3345
if (tmp != INVALID_IMM && (type & SLJIT_COMPARE_SELECT)) {
3346
tmp2 = get_imm(NEGATE(src1w));
3347
if (tmp2 != INVALID_IMM)
3348
FAIL_IF(push_inst32(compiler, CMNI_W | RN4(dst_reg) | tmp2));
3349
else
3350
tmp = INVALID_IMM;
3351
}
3352
3353
if (tmp != INVALID_IMM) {
3354
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3355
return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp);
3356
}
3357
3358
if (type & SLJIT_COMPARE_SELECT) {
3359
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
3360
src1 = TMP_REG1;
3361
}
3362
}
3363
3364
if (src1 != SLJIT_IMM) {
3365
if (type & SLJIT_COMPARE_SELECT) {
3366
if (IS_2_LO_REGS(dst_reg, src1))
3367
FAIL_IF(push_inst16(compiler, CMP | RD3(dst_reg) | RN3(src1)));
3368
else
3369
FAIL_IF(push_inst16(compiler, CMP_X | SET_REGS44(dst_reg, src1)));
3370
}
3371
3372
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3373
return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src1));
3374
}
3375
3376
tmp = (sljit_uw)src1w;
3377
3378
if (tmp < 0x10000) {
3379
/* set low 16 bits, set hi 16 bits to 0. */
3380
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3381
return push_inst32(compiler, MOVW | RD4(dst_reg)
3382
| COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff));
3383
}
3384
3385
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4));
3386
3387
tmp = (sljit_uw)src1w;
3388
FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg)
3389
| COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)));
3390
return push_inst32(compiler, MOVT | RD4(dst_reg)
3391
| COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16));
3392
}
3393
3394
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3395
sljit_s32 dst_freg,
3396
sljit_s32 src1, sljit_sw src1w,
3397
sljit_s32 src2_freg)
3398
{
3399
CHECK_ERROR();
3400
CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3401
3402
ADJUST_LOCAL_OFFSET(src1, src1w);
3403
3404
type ^= SLJIT_32;
3405
3406
if (dst_freg != src2_freg) {
3407
if (dst_freg == src1) {
3408
src1 = src2_freg;
3409
src1w = 0;
3410
type ^= 0x1;
3411
} else
3412
FAIL_IF(push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src2_freg)));
3413
}
3414
3415
if (src1 & SLJIT_MEM) {
3416
FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG2, src1, src1w));
3417
src1 = TMP_FREG2;
3418
}
3419
3420
FAIL_IF(push_inst16(compiler, IT | (get_cc(compiler, type & ~SLJIT_32) << 4) | 0x8));
3421
return push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src1));
3422
}
3423
3424
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3425
sljit_s32 reg,
3426
sljit_s32 mem, sljit_sw memw)
3427
{
3428
sljit_s32 flags;
3429
sljit_uw imm, tmp;
3430
3431
CHECK_ERROR();
3432
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3433
3434
if (!(reg & REG_PAIR_MASK))
3435
return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3436
3437
if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)) {
3438
if ((mem & REG_MASK) == 0) {
3439
if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) {
3440
imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff));
3441
3442
if (imm != INVALID_IMM)
3443
memw = (memw & 0xfff) - 0x1000;
3444
} else {
3445
imm = get_imm((sljit_uw)(memw & ~0xfff));
3446
3447
if (imm != INVALID_IMM)
3448
memw &= 0xfff;
3449
}
3450
3451
if (imm == INVALID_IMM) {
3452
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3453
memw = 0;
3454
} else
3455
FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm));
3456
3457
mem = SLJIT_MEM1(TMP_REG1);
3458
} else if (mem & OFFS_REG_MASK) {
3459
FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6)));
3460
memw = 0;
3461
mem = SLJIT_MEM1(TMP_REG1);
3462
} else if (memw < -0xff) {
3463
/* Zero value can be included in the first case. */
3464
if ((-memw & 0xfff) <= SSIZE_OF(sw))
3465
tmp = (sljit_uw)((-memw + 0x7ff) & ~0x7ff);
3466
else
3467
tmp = (sljit_uw)((-memw + 0xfff) & ~0xfff);
3468
3469
SLJIT_ASSERT(tmp >= (sljit_uw)-memw);
3470
imm = get_imm(tmp);
3471
3472
if (imm != INVALID_IMM) {
3473
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3474
memw += (sljit_sw)tmp;
3475
SLJIT_ASSERT(memw >= 0 && memw <= 0xfff - SSIZE_OF(sw));
3476
} else {
3477
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3478
FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3479
memw = 0;
3480
}
3481
3482
mem = SLJIT_MEM1(TMP_REG1);
3483
} else if (memw >= (0x1000 - SSIZE_OF(sw))) {
3484
if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) {
3485
imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff));
3486
3487
if (imm != INVALID_IMM)
3488
memw = (memw & 0xfff) - 0x1000;
3489
} else {
3490
imm = get_imm((sljit_uw)(memw & ~0xfff));
3491
3492
if (imm != INVALID_IMM)
3493
memw &= 0xfff;
3494
}
3495
3496
if (imm != INVALID_IMM) {
3497
SLJIT_ASSERT(memw >= -0xff && memw <= 0xfff);
3498
FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3499
} else {
3500
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3501
FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3502
memw = 0;
3503
}
3504
3505
mem = SLJIT_MEM1(TMP_REG1);
3506
}
3507
3508
flags = WORD_SIZE;
3509
3510
SLJIT_ASSERT(memw <= 0xfff - SSIZE_OF(sw) && memw >= -0xff);
3511
3512
if (type & SLJIT_MEM_STORE) {
3513
flags |= STORE;
3514
} else if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) {
3515
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2));
3516
return emit_op_mem(compiler, WORD_SIZE, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2);
3517
}
3518
3519
FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2));
3520
return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2);
3521
}
3522
3523
flags = 1 << 23;
3524
3525
if ((mem & REG_MASK) == 0) {
3526
tmp = (sljit_uw)(memw & 0x7fc);
3527
imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3528
3529
if (imm == INVALID_IMM) {
3530
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3531
memw = 0;
3532
} else {
3533
FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm));
3534
memw = (memw & 0x3fc) >> 2;
3535
3536
if (tmp > 0x400) {
3537
memw = 0x100 - memw;
3538
flags = 0;
3539
}
3540
3541
SLJIT_ASSERT(memw >= 0 && memw <= 0xff);
3542
}
3543
3544
mem = SLJIT_MEM1(TMP_REG1);
3545
} else if (mem & OFFS_REG_MASK) {
3546
FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6)));
3547
memw = 0;
3548
mem = SLJIT_MEM1(TMP_REG1);
3549
} else if (memw < 0) {
3550
if ((-memw & ~0x3fc) == 0) {
3551
flags = 0;
3552
memw = -memw >> 2;
3553
} else {
3554
tmp = (sljit_uw)(-memw & 0x7fc);
3555
imm = get_imm((sljit_uw)((-memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3556
3557
if (imm != INVALID_IMM) {
3558
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3559
memw = (-memw & 0x3fc) >> 2;
3560
3561
if (tmp <= 0x400)
3562
flags = 0;
3563
else
3564
memw = 0x100 - memw;
3565
} else {
3566
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3567
FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3568
memw = 0;
3569
}
3570
3571
mem = SLJIT_MEM1(TMP_REG1);
3572
}
3573
} else if ((memw & ~0x3fc) != 0) {
3574
tmp = (sljit_uw)(memw & 0x7fc);
3575
imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3576
3577
if (imm != INVALID_IMM) {
3578
FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3579
memw = (memw & 0x3fc) >> 2;
3580
3581
if (tmp > 0x400) {
3582
memw = 0x100 - memw;
3583
flags = 0;
3584
}
3585
} else {
3586
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3587
FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3588
memw = 0;
3589
}
3590
3591
mem = SLJIT_MEM1(TMP_REG1);
3592
} else
3593
memw >>= 2;
3594
3595
SLJIT_ASSERT(memw >= 0 && memw <= 0xff);
3596
return push_inst32(compiler, ((type & SLJIT_MEM_STORE) ? STRD : LDRD) | (sljit_ins)flags | RN4(mem & REG_MASK) | RT4(REG_PAIR_FIRST(reg)) | RD4(REG_PAIR_SECOND(reg)) | (sljit_ins)memw);
3597
}
3598
3599
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
3600
sljit_s32 reg,
3601
sljit_s32 mem, sljit_sw memw)
3602
{
3603
sljit_s32 flags;
3604
sljit_ins inst;
3605
3606
CHECK_ERROR();
3607
CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
3608
3609
if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -255))
3610
return SLJIT_ERR_UNSUPPORTED;
3611
3612
if (type & SLJIT_MEM_SUPP)
3613
return SLJIT_SUCCESS;
3614
3615
switch (type & 0xff) {
3616
case SLJIT_MOV:
3617
case SLJIT_MOV_U32:
3618
case SLJIT_MOV_S32:
3619
case SLJIT_MOV32:
3620
case SLJIT_MOV_P:
3621
flags = WORD_SIZE;
3622
break;
3623
case SLJIT_MOV_U8:
3624
flags = BYTE_SIZE;
3625
break;
3626
case SLJIT_MOV_S8:
3627
flags = BYTE_SIZE | SIGNED;
3628
break;
3629
case SLJIT_MOV_U16:
3630
flags = HALF_SIZE;
3631
break;
3632
case SLJIT_MOV_S16:
3633
flags = HALF_SIZE | SIGNED;
3634
break;
3635
default:
3636
SLJIT_UNREACHABLE();
3637
flags = WORD_SIZE;
3638
break;
3639
}
3640
3641
if (type & SLJIT_MEM_STORE)
3642
flags |= STORE;
3643
3644
inst = sljit_mem32[flags] | 0x900;
3645
3646
if (!(type & SLJIT_MEM_POST))
3647
inst |= 0x400;
3648
3649
if (memw >= 0)
3650
inst |= 0x200;
3651
else
3652
memw = -memw;
3653
3654
return push_inst32(compiler, inst | RT4(reg) | RN4(mem & REG_MASK) | (sljit_ins)memw);
3655
}
3656
3657
static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset)
3658
{
3659
sljit_s32 arg = *mem;
3660
sljit_sw argw = *memw;
3661
sljit_uw imm;
3662
3663
*mem = TMP_REG1;
3664
3665
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
3666
*memw = 0;
3667
return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((sljit_uw)(argw & 0x3) << 6));
3668
}
3669
3670
arg &= REG_MASK;
3671
3672
if (arg) {
3673
if (argw <= max_offset && argw >= -0xff) {
3674
*mem = arg;
3675
return SLJIT_SUCCESS;
3676
}
3677
3678
if (argw < 0) {
3679
imm = get_imm((sljit_uw)(-argw & ~0xff));
3680
3681
if (imm) {
3682
*memw = -(-argw & 0xff);
3683
return push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3684
}
3685
} else if ((argw & 0xfff) <= max_offset) {
3686
imm = get_imm((sljit_uw)(argw & ~0xfff));
3687
3688
if (imm) {
3689
*memw = argw & 0xfff;
3690
return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3691
}
3692
} else {
3693
imm = get_imm((sljit_uw)((argw | 0xfff) + 1));
3694
3695
if (imm) {
3696
*memw = (argw & 0xfff) - 0x1000;
3697
return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3698
}
3699
}
3700
}
3701
3702
imm = (sljit_uw)(argw & ~0xfff);
3703
3704
if ((argw & 0xfff) > max_offset) {
3705
imm += 0x1000;
3706
*memw = (argw & 0xfff) - 0x1000;
3707
} else
3708
*memw = argw & 0xfff;
3709
3710
FAIL_IF(load_immediate(compiler, TMP_REG1, imm));
3711
3712
if (arg == 0)
3713
return SLJIT_SUCCESS;
3714
3715
return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, arg));
3716
}
3717
3718
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
3719
sljit_s32 freg,
3720
sljit_s32 mem, sljit_sw memw)
3721
{
3722
CHECK_ERROR();
3723
CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
3724
3725
if (type & SLJIT_MEM_ALIGNED_32)
3726
return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw);
3727
3728
if (type & SLJIT_MEM_STORE) {
3729
FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | RT4(TMP_REG2)));
3730
3731
if (type & SLJIT_32)
3732
return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1);
3733
3734
FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3735
mem |= SLJIT_MEM;
3736
3737
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1));
3738
FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | 0x80 | RT4(TMP_REG2)));
3739
return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw + 4, TMP_REG1);
3740
}
3741
3742
if (type & SLJIT_32) {
3743
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3744
return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG2));
3745
}
3746
3747
FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3748
mem |= SLJIT_MEM;
3749
3750
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3751
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, mem, memw + 4, TMP_REG1));
3752
return push_inst32(compiler, VMOV2 | VM4(freg) | RT4(TMP_REG2) | RN4(TMP_REG1));
3753
}
3754
3755
static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3756
{
3757
sljit_uw imm;
3758
sljit_s32 mem = *mem_ptr;
3759
3760
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3761
*mem_ptr = TMP_REG1;
3762
return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6));
3763
}
3764
3765
if (SLJIT_UNLIKELY(!(mem & REG_MASK))) {
3766
*mem_ptr = TMP_REG1;
3767
return load_immediate(compiler, TMP_REG1, (sljit_uw)memw);
3768
}
3769
3770
mem &= REG_MASK;
3771
3772
if (memw == 0) {
3773
*mem_ptr = mem;
3774
return SLJIT_SUCCESS;
3775
}
3776
3777
*mem_ptr = TMP_REG1;
3778
imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw));
3779
3780
if (imm != INVALID_IMM)
3781
return push_inst32(compiler, ((memw < 0) ? SUB_WI : ADD_WI) | RD4(TMP_REG1) | RN4(mem) | imm);
3782
3783
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3784
return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem));
3785
}
3786
3787
static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
3788
{
3789
freg += freg & 0x1;
3790
3791
SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
3792
3793
if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
3794
freg--;
3795
3796
return freg;
3797
}
3798
3799
#define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
3800
3801
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3802
sljit_s32 vreg,
3803
sljit_s32 srcdst, sljit_sw srcdstw)
3804
{
3805
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3806
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3807
sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3808
sljit_ins ins;
3809
3810
CHECK_ERROR();
3811
CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));
3812
3813
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3814
3815
if (reg_size != 3 && reg_size != 4)
3816
return SLJIT_ERR_UNSUPPORTED;
3817
3818
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3819
return SLJIT_ERR_UNSUPPORTED;
3820
3821
if (type & SLJIT_SIMD_TEST)
3822
return SLJIT_SUCCESS;
3823
3824
if (reg_size == 4)
3825
vreg = simd_get_quad_reg_index(vreg);
3826
3827
if (!(srcdst & SLJIT_MEM)) {
3828
if (reg_size == 4)
3829
srcdst = simd_get_quad_reg_index(srcdst);
3830
3831
if (type & SLJIT_SIMD_STORE)
3832
ins = VD4(srcdst) | VN4(vreg) | VM4(vreg);
3833
else
3834
ins = VD4(vreg) | VN4(srcdst) | VM4(srcdst);
3835
3836
if (reg_size == 4)
3837
ins |= (sljit_ins)1 << 6;
3838
3839
return push_inst32(compiler, VORR | ins);
3840
}
3841
3842
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3843
3844
if (elem_size > 3)
3845
elem_size = 3;
3846
3847
ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD4(vreg)
3848
| (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
3849
3850
SLJIT_ASSERT(reg_size >= alignment);
3851
3852
if (alignment == 3)
3853
ins |= 0x10;
3854
else if (alignment >= 4)
3855
ins |= 0x20;
3856
3857
return push_inst32(compiler, ins | RN4(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf);
3858
}
3859
3860
static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
3861
{
3862
sljit_ins result;
3863
3864
if (elem_size > 1 && (sljit_u16)value == (value >> 16)) {
3865
elem_size = 1;
3866
value = (sljit_u16)value;
3867
}
3868
3869
if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
3870
elem_size = 0;
3871
value = (sljit_u8)value;
3872
}
3873
3874
switch (elem_size) {
3875
case 0:
3876
SLJIT_ASSERT(value <= 0xff);
3877
result = 0xe00;
3878
break;
3879
case 1:
3880
SLJIT_ASSERT(value <= 0xffff);
3881
result = 0;
3882
3883
while (1) {
3884
if (value <= 0xff) {
3885
result |= 0x800;
3886
break;
3887
}
3888
3889
if ((value & 0xff) == 0) {
3890
value >>= 8;
3891
result |= 0xa00;
3892
break;
3893
}
3894
3895
if (result != 0)
3896
return ~(sljit_ins)0;
3897
3898
value ^= (sljit_uw)0xffff;
3899
result = (1 << 5);
3900
}
3901
break;
3902
default:
3903
SLJIT_ASSERT(value <= 0xffffffff);
3904
result = 0;
3905
3906
while (1) {
3907
if (value <= 0xff) {
3908
result |= 0x000;
3909
break;
3910
}
3911
3912
if ((value & ~(sljit_uw)0xff00) == 0) {
3913
value >>= 8;
3914
result |= 0x200;
3915
break;
3916
}
3917
3918
if ((value & ~(sljit_uw)0xff0000) == 0) {
3919
value >>= 16;
3920
result |= 0x400;
3921
break;
3922
}
3923
3924
if ((value & ~(sljit_uw)0xff000000) == 0) {
3925
value >>= 24;
3926
result |= 0x600;
3927
break;
3928
}
3929
3930
if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
3931
value >>= 8;
3932
result |= 0xc00;
3933
break;
3934
}
3935
3936
if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
3937
value >>= 16;
3938
result |= 0xd00;
3939
break;
3940
}
3941
3942
if (result != 0)
3943
return ~(sljit_ins)0;
3944
3945
value = ~value;
3946
result = (1 << 5);
3947
}
3948
break;
3949
}
3950
3951
return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 21) | result;
3952
}
3953
3954
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3955
sljit_s32 vreg,
3956
sljit_s32 src, sljit_sw srcw)
3957
{
3958
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3959
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3960
sljit_ins ins, imm;
3961
3962
CHECK_ERROR();
3963
CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));
3964
3965
ADJUST_LOCAL_OFFSET(src, srcw);
3966
3967
if (reg_size != 3 && reg_size != 4)
3968
return SLJIT_ERR_UNSUPPORTED;
3969
3970
if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3971
return SLJIT_ERR_UNSUPPORTED;
3972
3973
if (type & SLJIT_SIMD_TEST)
3974
return SLJIT_SUCCESS;
3975
3976
if (reg_size == 4)
3977
vreg = simd_get_quad_reg_index(vreg);
3978
3979
if (src == SLJIT_IMM && srcw == 0)
3980
return push_inst32(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD4(vreg));
3981
3982
if (SLJIT_UNLIKELY(elem_size == 3)) {
3983
SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT);
3984
3985
if (src & SLJIT_MEM) {
3986
FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, vreg, src, srcw));
3987
src = vreg;
3988
} else if (vreg != src)
3989
FAIL_IF(push_inst32(compiler, VORR | VD4(vreg) | VN4(src) | VM4(src)));
3990
3991
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
3992
3993
if (vreg != src)
3994
return push_inst32(compiler, VORR | VD4(vreg) | VN4(src) | VM4(src));
3995
return SLJIT_SUCCESS;
3996
}
3997
3998
if (src & SLJIT_MEM) {
3999
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
4000
4001
ins = (sljit_ins)(elem_size << 6);
4002
4003
if (reg_size == 4)
4004
ins |= 1 << 5;
4005
4006
return push_inst32(compiler, VLD1_r | ins | VD4(vreg) | RN4(src) | 0xf);
4007
}
4008
4009
if (type & SLJIT_SIMD_FLOAT) {
4010
SLJIT_ASSERT(elem_size == 2);
4011
ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
4012
4013
if (reg_size == 4)
4014
ins |= (sljit_ins)1 << 6;
4015
4016
return push_inst32(compiler, VDUP_s | ins | VD4(vreg) | (sljit_ins)freg_map[src]);
4017
}
4018
4019
if (src == SLJIT_IMM) {
4020
if (elem_size < 2)
4021
srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
4022
4023
imm = simd_get_imm(elem_size, (sljit_uw)srcw);
4024
4025
if (imm != ~(sljit_ins)0) {
4026
if (reg_size == 4)
4027
imm |= (sljit_ins)1 << 6;
4028
4029
return push_inst32(compiler, VMOV_i | imm | VD4(vreg));
4030
}
4031
4032
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
4033
src = TMP_REG1;
4034
}
4035
4036
switch (elem_size) {
4037
case 0:
4038
ins = 1 << 22;
4039
break;
4040
case 1:
4041
ins = 1 << 5;
4042
break;
4043
default:
4044
ins = 0;
4045
break;
4046
}
4047
4048
if (reg_size == 4)
4049
ins |= (sljit_ins)1 << 21;
4050
4051
return push_inst32(compiler, VDUP | ins | VN4(vreg) | RT4(src));
4052
}
4053
4054
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
4055
sljit_s32 vreg, sljit_s32 lane_index,
4056
sljit_s32 srcdst, sljit_sw srcdstw)
4057
{
4058
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4059
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4060
sljit_ins ins;
4061
4062
CHECK_ERROR();
4063
CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));
4064
4065
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4066
4067
if (reg_size != 3 && reg_size != 4)
4068
return SLJIT_ERR_UNSUPPORTED;
4069
4070
if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
4071
return SLJIT_ERR_UNSUPPORTED;
4072
4073
if (type & SLJIT_SIMD_TEST)
4074
return SLJIT_SUCCESS;
4075
4076
if (reg_size == 4)
4077
vreg = simd_get_quad_reg_index(vreg);
4078
4079
if (type & SLJIT_SIMD_LANE_ZERO) {
4080
ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
4081
4082
if (type & SLJIT_SIMD_FLOAT) {
4083
if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
4084
if (lane_index == 1)
4085
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4086
4087
if (srcdst != vreg)
4088
FAIL_IF(push_inst32(compiler, VORR | VD4(vreg) | VN4(srcdst) | VM4(srcdst)));
4089
4090
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4091
return push_inst32(compiler, VMOV_i | VD4(vreg));
4092
}
4093
4094
if (srcdst == vreg || (elem_size == 3 && srcdst == (vreg + SLJIT_QUAD_OTHER_HALF(vreg)))) {
4095
FAIL_IF(push_inst32(compiler, VORR | ins | VD4(TMP_FREG2) | VN4(vreg) | VM4(vreg)));
4096
srcdst = TMP_FREG2;
4097
srcdstw = 0;
4098
}
4099
}
4100
4101
FAIL_IF(push_inst32(compiler, VMOV_i | ins | VD4(vreg)));
4102
}
4103
4104
if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
4105
lane_index -= (0x8 >> elem_size);
4106
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4107
}
4108
4109
if (srcdst & SLJIT_MEM) {
4110
if (elem_size == 3)
4111
return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, vreg, srcdst, srcdstw);
4112
4113
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
4114
4115
lane_index = lane_index << elem_size;
4116
ins = (sljit_ins)((elem_size << 10) | (lane_index << 5));
4117
return push_inst32(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD4(vreg) | RN4(srcdst) | 0xf);
4118
}
4119
4120
if (type & SLJIT_SIMD_FLOAT) {
4121
if (elem_size == 3) {
4122
if (type & SLJIT_SIMD_STORE)
4123
return push_inst32(compiler, VORR | VD4(srcdst) | VN4(vreg) | VM4(vreg));
4124
return push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(vreg) | VM4(srcdst));
4125
}
4126
4127
if (type & SLJIT_SIMD_STORE) {
4128
if (freg_ebit_map[vreg] == 0) {
4129
if (lane_index == 1)
4130
vreg = SLJIT_F64_SECOND(vreg);
4131
4132
return push_inst32(compiler, VMOV_F32 | VD4(srcdst) | VM4(vreg));
4133
}
4134
4135
FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN4(vreg) | RT4(TMP_REG1)));
4136
return push_inst32(compiler, VMOV | VN4(srcdst) | RT4(TMP_REG1));
4137
}
4138
4139
FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(srcdst) | RT4(TMP_REG1)));
4140
return push_inst32(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN4(vreg) | RT4(TMP_REG1));
4141
}
4142
4143
if (srcdst == SLJIT_IMM) {
4144
if (elem_size < 2)
4145
srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
4146
4147
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw));
4148
srcdst = TMP_REG1;
4149
}
4150
4151
if (elem_size == 0)
4152
ins = 0x400000;
4153
else if (elem_size == 1)
4154
ins = 0x20;
4155
else
4156
ins = 0;
4157
4158
lane_index = lane_index << elem_size;
4159
ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
4160
4161
if (type & SLJIT_SIMD_STORE) {
4162
ins |= (1 << 20);
4163
4164
if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
4165
ins |= (1 << 23);
4166
}
4167
4168
return push_inst32(compiler, VMOV_s | ins | VN4(vreg) | RT4(srcdst));
4169
}
4170
4171
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4172
sljit_s32 vreg,
4173
sljit_s32 src, sljit_s32 src_lane_index)
4174
{
4175
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4176
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4177
sljit_ins ins;
4178
4179
CHECK_ERROR();
4180
CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));
4181
4182
if (reg_size != 3 && reg_size != 4)
4183
return SLJIT_ERR_UNSUPPORTED;
4184
4185
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4186
return SLJIT_ERR_UNSUPPORTED;
4187
4188
if (type & SLJIT_SIMD_TEST)
4189
return SLJIT_SUCCESS;
4190
4191
if (reg_size == 4) {
4192
vreg = simd_get_quad_reg_index(vreg);
4193
src = simd_get_quad_reg_index(src);
4194
4195
if (src_lane_index >= (0x8 >> elem_size)) {
4196
src_lane_index -= (0x8 >> elem_size);
4197
src += SLJIT_QUAD_OTHER_HALF(src);
4198
}
4199
}
4200
4201
if (elem_size == 3) {
4202
if (vreg != src)
4203
FAIL_IF(push_inst32(compiler, VORR | VD4(vreg) | VN4(src) | VM4(src)));
4204
4205
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4206
4207
if (vreg != src)
4208
return push_inst32(compiler, VORR | VD4(vreg) | VN4(src) | VM4(src));
4209
return SLJIT_SUCCESS;
4210
}
4211
4212
ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size));
4213
4214
if (reg_size == 4)
4215
ins |= (sljit_ins)1 << 6;
4216
4217
return push_inst32(compiler, VDUP_s | ins | VD4(vreg) | VM4(src));
4218
}
4219
4220
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4221
sljit_s32 vreg,
4222
sljit_s32 src, sljit_sw srcw)
4223
{
4224
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4225
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4226
sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4227
sljit_s32 dst_reg;
4228
4229
CHECK_ERROR();
4230
CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));
4231
4232
ADJUST_LOCAL_OFFSET(src, srcw);
4233
4234
if (reg_size != 3 && reg_size != 4)
4235
return SLJIT_ERR_UNSUPPORTED;
4236
4237
if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
4238
return SLJIT_ERR_UNSUPPORTED;
4239
4240
if (type & SLJIT_SIMD_TEST)
4241
return SLJIT_SUCCESS;
4242
4243
if (reg_size == 4)
4244
vreg = simd_get_quad_reg_index(vreg);
4245
4246
if (src & SLJIT_MEM) {
4247
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
4248
if (reg_size == 4 && elem2_size - elem_size == 1)
4249
FAIL_IF(push_inst32(compiler, VLD1 | (0x7 << 8) | VD4(vreg) | RN4(src) | 0xf));
4250
else
4251
FAIL_IF(push_inst32(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD4(vreg) | RN4(src) | 0xf));
4252
src = vreg;
4253
} else if (reg_size == 4)
4254
src = simd_get_quad_reg_index(src);
4255
4256
if (!(type & SLJIT_SIMD_FLOAT)) {
4257
dst_reg = (reg_size == 4) ? vreg : TMP_FREG2;
4258
4259
do {
4260
FAIL_IF(push_inst32(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 28))
4261
| ((sljit_ins)1 << (19 + elem_size)) | VD4(dst_reg) | VM4(src)));
4262
src = dst_reg;
4263
} while (++elem_size < elem2_size);
4264
4265
if (dst_reg == TMP_FREG2)
4266
return push_inst32(compiler, VORR | VD4(vreg) | VN4(TMP_FREG2) | VM4(TMP_FREG2));
4267
return SLJIT_SUCCESS;
4268
}
4269
4270
/* No SIMD variant, must use VFP instead. */
4271
SLJIT_ASSERT(reg_size == 4);
4272
4273
if (vreg == src) {
4274
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4275
FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(vreg) | VM4(src) | 0x20));
4276
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4277
return push_inst32(compiler, VCVT_F64_F32 | VD4(vreg) | VM4(src));
4278
}
4279
4280
FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(vreg) | VM4(src)));
4281
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4282
return push_inst32(compiler, VCVT_F64_F32 | VD4(vreg) | VM4(src) | 0x20);
4283
}
4284
4285
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4286
sljit_s32 vreg,
4287
sljit_s32 dst, sljit_sw dstw)
4288
{
4289
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4290
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4291
sljit_ins ins, imms;
4292
sljit_s32 dst_r;
4293
4294
CHECK_ERROR();
4295
CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));
4296
4297
ADJUST_LOCAL_OFFSET(dst, dstw);
4298
4299
if (reg_size != 3 && reg_size != 4)
4300
return SLJIT_ERR_UNSUPPORTED;
4301
4302
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4303
return SLJIT_ERR_UNSUPPORTED;
4304
4305
if (type & SLJIT_SIMD_TEST)
4306
return SLJIT_SUCCESS;
4307
4308
switch (elem_size) {
4309
case 0:
4310
imms = 0x243219;
4311
ins = VSHR | (1 << 28) | (0x9 << 16);
4312
break;
4313
case 1:
4314
imms = (reg_size == 4) ? 0x243219 : 0x2231;
4315
ins = VSHR | (1 << 28) | (0x11 << 16);
4316
break;
4317
case 2:
4318
imms = (reg_size == 4) ? 0x2231 : 0x21;
4319
ins = VSHR | (1 << 28) | (0x21 << 16);
4320
break;
4321
default:
4322
imms = 0x21;
4323
ins = VSHR | (1 << 28) | (0x1 << 16) | (1 << 7);
4324
break;
4325
}
4326
4327
if (reg_size == 4) {
4328
vreg = simd_get_quad_reg_index(vreg);
4329
ins |= (sljit_ins)1 << 6;
4330
}
4331
4332
SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0);
4333
FAIL_IF(push_inst32(compiler, ins | VD4(TMP_FREG2) | VM4(vreg)));
4334
4335
if (reg_size == 4 && elem_size > 0)
4336
FAIL_IF(push_inst32(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
4337
4338
ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
4339
4340
while (imms >= 0x100) {
4341
FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | ((imms & 0xff) << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
4342
imms >>= 8;
4343
}
4344
4345
FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | (1 << 7) | (imms << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
4346
4347
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4348
FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(dst_r) | VN4(TMP_FREG2)));
4349
4350
if (reg_size == 4 && elem_size == 0) {
4351
SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]);
4352
FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(TMP_REG2)| VN4(TMP_FREG1)));
4353
FAIL_IF(push_inst32(compiler, ORR_W | RD4(dst_r) | RN4(dst_r) | RM4(TMP_REG2) | (0x2 << 12)));
4354
}
4355
4356
if (dst_r == TMP_REG1)
4357
return emit_op_mem(compiler, STORE | WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
4358
4359
return SLJIT_SUCCESS;
4360
}
4361
4362
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4363
sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)
4364
{
4365
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4366
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4367
sljit_s32 alignment;
4368
sljit_ins ins = 0, load_ins;
4369
4370
CHECK_ERROR();
4371
CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));
4372
ADJUST_LOCAL_OFFSET(src2, src2w);
4373
4374
if (reg_size != 3 && reg_size != 4)
4375
return SLJIT_ERR_UNSUPPORTED;
4376
4377
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4378
return SLJIT_ERR_UNSUPPORTED;
4379
4380
if (type & SLJIT_SIMD_TEST)
4381
return SLJIT_SUCCESS;
4382
4383
switch (SLJIT_SIMD_GET_OPCODE(type)) {
4384
case SLJIT_SIMD_OP2_AND:
4385
ins = VAND;
4386
break;
4387
case SLJIT_SIMD_OP2_OR:
4388
ins = VORR;
4389
break;
4390
case SLJIT_SIMD_OP2_XOR:
4391
ins = VEOR;
4392
break;
4393
case SLJIT_SIMD_OP2_SHUFFLE:
4394
ins = VTBL;
4395
break;
4396
}
4397
4398
if (src2 & SLJIT_MEM) {
4399
if (elem_size > 3)
4400
elem_size = 3;
4401
4402
load_ins = VLD1 | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
4403
alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4404
4405
SLJIT_ASSERT(reg_size >= alignment);
4406
4407
if (alignment == 3)
4408
load_ins |= 0x10;
4409
else if (alignment >= 4)
4410
load_ins |= 0x20;
4411
4412
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src2, src2w));
4413
FAIL_IF(push_inst32(compiler, load_ins | VD4(TMP_FREG2) | RN4(src2) | ((sljit_ins)elem_size) << 6 | 0xf));
4414
src2 = TMP_FREG2;
4415
}
4416
4417
if (reg_size == 4) {
4418
dst_vreg = simd_get_quad_reg_index(dst_vreg);
4419
src1_vreg = simd_get_quad_reg_index(src1_vreg);
4420
src2 = simd_get_quad_reg_index(src2);
4421
4422
if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE) {
4423
ins |= (sljit_ins)1 << 8;
4424
4425
FAIL_IF(push_inst32(compiler, ins | VD4(dst_vreg != src1_vreg ? dst_vreg : TMP_FREG2) | VN4(src1_vreg) | VM4(src2)));
4426
src2 += SLJIT_QUAD_OTHER_HALF(src2);
4427
FAIL_IF(push_inst32(compiler, ins | VD4(dst_vreg + SLJIT_QUAD_OTHER_HALF(dst_vreg)) | VN4(src1_vreg) | VM4(src2)));
4428
4429
if (dst_vreg == src1_vreg)
4430
return push_inst32(compiler, VORR | VD4(dst_vreg) | VN4(TMP_FREG2) | VM4(TMP_FREG2));
4431
return SLJIT_SUCCESS;
4432
}
4433
4434
ins |= (sljit_ins)1 << 6;
4435
}
4436
4437
return push_inst32(compiler, ins | VD4(dst_vreg) | VN4(src1_vreg) | VM4(src2));
4438
}
4439
4440
#undef FPU_LOAD
4441
4442
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4443
sljit_s32 dst_reg,
4444
sljit_s32 mem_reg)
4445
{
4446
sljit_ins ins;
4447
4448
CHECK_ERROR();
4449
CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4450
4451
if (op & SLJIT_ATOMIC_USE_CAS)
4452
return SLJIT_ERR_UNSUPPORTED;
4453
4454
switch (GET_OPCODE(op)) {
4455
case SLJIT_MOV_S8:
4456
case SLJIT_MOV_S16:
4457
case SLJIT_MOV_S32:
4458
return SLJIT_ERR_UNSUPPORTED;
4459
4460
case SLJIT_MOV_U8:
4461
ins = LDREXB;
4462
break;
4463
case SLJIT_MOV_U16:
4464
ins = LDREXH;
4465
break;
4466
default:
4467
ins = LDREX;
4468
break;
4469
}
4470
4471
if (op & SLJIT_ATOMIC_TEST)
4472
return SLJIT_SUCCESS;
4473
4474
return push_inst32(compiler, ins | RN4(mem_reg) | RT4(dst_reg));
4475
}
4476
4477
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4478
sljit_s32 src_reg,
4479
sljit_s32 mem_reg,
4480
sljit_s32 temp_reg)
4481
{
4482
sljit_ins ins;
4483
4484
/* temp_reg == mem_reg is undefined so use another temp register */
4485
SLJIT_UNUSED_ARG(temp_reg);
4486
4487
CHECK_ERROR();
4488
CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4489
4490
if (op & SLJIT_ATOMIC_USE_CAS)
4491
return SLJIT_ERR_UNSUPPORTED;
4492
4493
switch (GET_OPCODE(op)) {
4494
case SLJIT_MOV_S8:
4495
case SLJIT_MOV_S16:
4496
case SLJIT_MOV_S32:
4497
return SLJIT_ERR_UNSUPPORTED;
4498
4499
case SLJIT_MOV_U8:
4500
ins = STREXB | RM4(TMP_REG1);
4501
break;
4502
case SLJIT_MOV_U16:
4503
ins = STREXH | RM4(TMP_REG1);
4504
break;
4505
default:
4506
ins = STREX | RD4(TMP_REG1);
4507
break;
4508
}
4509
4510
if (op & SLJIT_ATOMIC_TEST)
4511
return SLJIT_SUCCESS;
4512
4513
FAIL_IF(push_inst32(compiler, ins | RN4(mem_reg) | RT4(src_reg)));
4514
if (op & SLJIT_SET_ATOMIC_STORED)
4515
return push_inst32(compiler, CMPI_W | RN4(TMP_REG1));
4516
4517
return SLJIT_SUCCESS;
4518
}
4519
4520
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 op,
4521
sljit_s32 dst, sljit_sw dstw,
4522
sljit_sw init_value)
4523
{
4524
struct sljit_const *const_;
4525
sljit_s32 dst_r;
4526
sljit_s32 mem_flags = WORD_SIZE | STORE;
4527
4528
CHECK_ERROR_PTR();
4529
CHECK_PTR(check_sljit_emit_const(compiler, op, dst, dstw, init_value));
4530
ADJUST_LOCAL_OFFSET(dst, dstw);
4531
4532
const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4533
PTR_FAIL_IF(!const_);
4534
set_const(const_, compiler);
4535
4536
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4537
4538
if (GET_OPCODE(op) == SLJIT_MOV_U8) {
4539
PTR_FAIL_IF(push_inst32(compiler,
4540
((init_value & 0x100) != 0 ? (MVN_WI | (~init_value & 0xff)) : (MOV_WI | (init_value & 0xff))) | RD4(dst_r)));
4541
mem_flags = BYTE_SIZE | STORE;
4542
} else
4543
PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, (sljit_uw)init_value));
4544
4545
if (dst & SLJIT_MEM)
4546
PTR_FAIL_IF(emit_op_mem(compiler, mem_flags, dst_r, dst, dstw, TMP_REG2));
4547
return const_;
4548
}
4549
4550
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_op_addr(struct sljit_compiler *compiler, sljit_s32 op,
4551
sljit_s32 dst, sljit_sw dstw)
4552
{
4553
struct sljit_jump *jump;
4554
sljit_s32 dst_r, target_r;
4555
4556
CHECK_ERROR_PTR();
4557
CHECK_PTR(check_sljit_emit_op_addr(compiler, op, dst, dstw));
4558
ADJUST_LOCAL_OFFSET(dst, dstw);
4559
4560
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
4561
4562
if (op != SLJIT_ADD_ABS_ADDR)
4563
target_r = dst_r;
4564
else {
4565
target_r = TMP_REG1;
4566
4567
if (dst & SLJIT_MEM)
4568
PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, dst_r, dst, dstw, TMP_REG1));
4569
}
4570
4571
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4572
PTR_FAIL_IF(!jump);
4573
set_mov_addr(jump, compiler, 0);
4574
4575
if (op != SLJIT_MOV_ADDR)
4576
jump->flags |= IS_ABS;
4577
4578
PTR_FAIL_IF(push_inst16(compiler, RDN3(target_r)));
4579
compiler->size += 3;
4580
4581
if (op == SLJIT_ADD_ABS_ADDR)
4582
PTR_FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(dst_r, TMP_REG1)));
4583
4584
if (dst & SLJIT_MEM)
4585
PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG1));
4586
return jump;
4587
}
4588
4589
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4590
{
4591
sljit_u16 *inst = (sljit_u16*)addr;
4592
SLJIT_UNUSED_ARG(executable_offset);
4593
4594
SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);
4595
modify_imm32_const(inst, new_target);
4596
SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);
4597
inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
4598
SLJIT_CACHE_FLUSH(inst, inst + 4);
4599
}
4600
4601
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_s32 op, sljit_sw new_constant, sljit_sw executable_offset)
4602
{
4603
sljit_u16 *inst;
4604
4605
if (GET_OPCODE(op) != SLJIT_MOV_U8) {
4606
sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
4607
return;
4608
}
4609
4610
inst = (sljit_u16*)addr;
4611
SLJIT_ASSERT(inst[0] == (MOV_WI >> 16) || inst[0] == (MVN_WI >> 16));
4612
4613
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
4614
4615
if ((new_constant & 0x100) != 0) {
4616
inst[0] = (sljit_u16)(MVN_WI >> 16);
4617
new_constant = ~new_constant;
4618
} else
4619
inst[0] = (sljit_u16)(MOV_WI >> 16);
4620
4621
inst[1] = (sljit_u16)((new_constant & 0xff) | (inst[1] & 0xf00));
4622
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
4623
inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
4624
SLJIT_CACHE_FLUSH(inst + 1, inst + 2);
4625
}
4626
4627