Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeARM_T2_32.c
9913 views
1
/*
2
* Stack-less Just-In-Time compiler
3
*
4
* Copyright Zoltan Herczeg ([email protected]). All rights reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without modification, are
7
* permitted provided that the following conditions are met:
8
*
9
* 1. Redistributions of source code must retain the above copyright notice, this list of
10
* conditions and the following disclaimer.
11
*
12
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
13
* of conditions and the following disclaimer in the documentation and/or other materials
14
* provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
*/
26
27
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28
{
29
#ifdef __SOFTFP__
30
return "ARM-Thumb2" SLJIT_CPUINFO " ABI:softfp";
31
#else
32
return "ARM-Thumb2" SLJIT_CPUINFO " ABI:hardfp";
33
#endif
34
}
35
36
/* Length of an instruction word. */
37
typedef sljit_u32 sljit_ins;
38
39
/* Last register + 1. */
40
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
41
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
42
#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
43
44
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
45
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
46
47
/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
48
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
49
0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
50
};
51
52
static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
53
0,
54
0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
55
7, 6,
56
0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
57
7, 6
58
};
59
60
static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
61
0,
62
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63
0, 0,
64
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65
1, 1
66
};
67
68
#define COPY_BITS(src, from, to, bits) \
69
((from >= to ? ((sljit_ins)(src) >> (from - to)) : ((sljit_ins)(src) << (to - from))) & (((1 << bits) - 1) << to))
70
71
#define NEGATE(uimm) ((sljit_uw)-(sljit_sw)(uimm))
72
73
/* Thumb16 encodings. */
74
#define RD3(rd) ((sljit_ins)reg_map[rd])
75
#define RN3(rn) ((sljit_ins)reg_map[rn] << 3)
76
#define RM3(rm) ((sljit_ins)reg_map[rm] << 6)
77
#define RDN3(rdn) ((sljit_ins)reg_map[rdn] << 8)
78
#define IMM3(imm) ((sljit_ins)imm << 6)
79
#define IMM8(imm) ((sljit_ins)imm)
80
81
/* Thumb16 helpers. */
82
#define SET_REGS44(rd, rn) \
83
(((sljit_ins)reg_map[rn] << 3) | ((sljit_ins)reg_map[rd] & 0x7) | (((sljit_ins)reg_map[rd] & 0x8) << 4))
84
#define IS_2_LO_REGS(reg1, reg2) \
85
(reg_map[reg1] <= 7 && reg_map[reg2] <= 7)
86
#define IS_3_LO_REGS(reg1, reg2, reg3) \
87
(reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7)
88
89
/* Thumb32 encodings. */
90
#define RM4(rm) ((sljit_ins)reg_map[rm])
91
#define RD4(rd) ((sljit_ins)reg_map[rd] << 8)
92
#define RT4(rt) ((sljit_ins)reg_map[rt] << 12)
93
#define RN4(rn) ((sljit_ins)reg_map[rn] << 16)
94
95
#define VM4(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
96
#define VD4(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
97
#define VN4(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
98
99
#define IMM5(imm) \
100
(COPY_BITS(imm, 2, 12, 3) | (((sljit_ins)imm & 0x3) << 6))
101
#define IMM12(imm) \
102
(COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | ((sljit_ins)imm & 0xff))
103
104
/* --------------------------------------------------------------------- */
105
/* Instrucion forms */
106
/* --------------------------------------------------------------------- */
107
108
/* dot '.' changed to _
109
I immediate form (possibly followed by number of immediate bits). */
110
#define ADCI 0xf1400000
111
#define ADCS 0x4140
112
#define ADC_W 0xeb400000
113
#define ADD 0x4400
114
#define ADDS 0x1800
115
#define ADDSI3 0x1c00
116
#define ADDSI8 0x3000
117
#define ADDWI 0xf2000000
118
#define ADD_SP 0x4485
119
#define ADD_SP_I 0xb000
120
#define ADD_W 0xeb000000
121
#define ADD_WI 0xf1000000
122
#define ANDI 0xf0000000
123
#define ANDS 0x4000
124
#define AND_W 0xea000000
125
#define ASRS 0x4100
126
#define ASRSI 0x1000
127
#define ASR_W 0xfa40f000
128
#define ASR_WI 0xea4f0020
129
#define BCC 0xd000
130
#define BICI 0xf0200000
131
#define BKPT 0xbe00
132
#define BLX 0x4780
133
#define BX 0x4700
134
#define CLZ 0xfab0f080
135
#define CMNI_W 0xf1100f00
136
#define CMP 0x4280
137
#define CMPI 0x2800
138
#define CMPI_W 0xf1b00f00
139
#define CMP_X 0x4500
140
#define CMP_W 0xebb00f00
141
#define DMB_SY 0xf3bf8f5f
142
#define EORI 0xf0800000
143
#define EORS 0x4040
144
#define EOR_W 0xea800000
145
#define IT 0xbf00
146
#define LDR 0xf8d00000
147
#define LDR_SP 0x9800
148
#define LDRD 0xe9500000
149
#define LDREX 0xe8500f00
150
#define LDREXB 0xe8d00f4f
151
#define LDREXH 0xe8d00f5f
152
#define LDRI 0xf8500800
153
#define LSLS 0x4080
154
#define LSLSI 0x0000
155
#define LSL_W 0xfa00f000
156
#define LSL_WI 0xea4f0000
157
#define LSRS 0x40c0
158
#define LSRSI 0x0800
159
#define LSR_W 0xfa20f000
160
#define LSR_WI 0xea4f0010
161
#define MLA 0xfb000000
162
#define MOV 0x4600
163
#define MOVS 0x0000
164
#define MOVSI 0x2000
165
#define MOVT 0xf2c00000
166
#define MOVW 0xf2400000
167
#define MOV_W 0xea4f0000
168
#define MOV_WI 0xf04f0000
169
#define MUL 0xfb00f000
170
#define MVNS 0x43c0
171
#define MVN_W 0xea6f0000
172
#define MVN_WI 0xf06f0000
173
#define NOP 0xbf00
174
#define ORNI 0xf0600000
175
#define ORRI 0xf0400000
176
#define ORRS 0x4300
177
#define ORR_W 0xea400000
178
#define POP 0xbc00
179
#define POP_W 0xe8bd0000
180
#define PUSH 0xb400
181
#define PUSH_W 0xe92d0000
182
#define REV 0xba00
183
#define REV_W 0xfa90f080
184
#define REV16 0xba40
185
#define REV16_W 0xfa90f090
186
#define RBIT 0xfa90f0a0
187
#define RORS 0x41c0
188
#define ROR_W 0xfa60f000
189
#define ROR_WI 0xea4f0030
190
#define RSB_WI 0xf1c00000
191
#define RSBSI 0x4240
192
#define SBCI 0xf1600000
193
#define SBCS 0x4180
194
#define SBC_W 0xeb600000
195
#define SDIV 0xfb90f0f0
196
#define SMULL 0xfb800000
197
#define STR_SP 0x9000
198
#define STRD 0xe9400000
199
#define STREX 0xe8400000
200
#define STREXB 0xe8c00f40
201
#define STREXH 0xe8c00f50
202
#define SUBS 0x1a00
203
#define SUBSI3 0x1e00
204
#define SUBSI8 0x3800
205
#define SUB_W 0xeba00000
206
#define SUBWI 0xf2a00000
207
#define SUB_SP_I 0xb080
208
#define SUB_WI 0xf1a00000
209
#define SXTB 0xb240
210
#define SXTB_W 0xfa4ff080
211
#define SXTH 0xb200
212
#define SXTH_W 0xfa0ff080
213
#define TST 0x4200
214
#define TSTI 0xf0000f00
215
#define TST_W 0xea000f00
216
#define UDIV 0xfbb0f0f0
217
#define UMULL 0xfba00000
218
#define UXTB 0xb2c0
219
#define UXTB_W 0xfa5ff080
220
#define UXTH 0xb280
221
#define UXTH_W 0xfa1ff080
222
#define VABS_F32 0xeeb00ac0
223
#define VADD_F32 0xee300a00
224
#define VAND 0xef000110
225
#define VCMP_F32 0xeeb40a40
226
#define VCVT_F32_S32 0xeeb80ac0
227
#define VCVT_F32_U32 0xeeb80a40
228
#define VCVT_F64_F32 0xeeb70ac0
229
#define VCVT_S32_F32 0xeebd0ac0
230
#define VDIV_F32 0xee800a00
231
#define VDUP 0xee800b10
232
#define VDUP_s 0xffb00c00
233
#define VEOR 0xff000110
234
#define VLD1 0xf9200000
235
#define VLD1_r 0xf9a00c00
236
#define VLD1_s 0xf9a00000
237
#define VLDR_F32 0xed100a00
238
#define VMOV_F32 0xeeb00a40
239
#define VMOV 0xee000a10
240
#define VMOV2 0xec400a10
241
#define VMOV_i 0xef800010
242
#define VMOV_s 0xee000b10
243
#define VMOVN 0xffb20200
244
#define VMRS 0xeef1fa10
245
#define VMUL_F32 0xee200a00
246
#define VNEG_F32 0xeeb10a40
247
#define VORR 0xef200110
248
#define VPOP 0xecbd0b00
249
#define VPUSH 0xed2d0b00
250
#define VSHLL 0xef800a10
251
#define VSHR 0xef800010
252
#define VSRA 0xef800110
253
#define VST1 0xf9000000
254
#define VST1_s 0xf9800000
255
#define VSTR_F32 0xed000a00
256
#define VSUB_F32 0xee300a40
257
#define VTBL 0xffb00800
258
259
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
260
261
static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
262
{
263
if (compiler->scratches == -1)
264
return 0;
265
266
if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
267
fr -= SLJIT_F64_SECOND(0);
268
269
return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->real_fscratches))
270
|| (fr > (SLJIT_FS0 - compiler->real_fsaveds) && fr <= SLJIT_FS0)
271
|| (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
272
}
273
274
static sljit_s32 function_check_is_vreg(struct sljit_compiler *compiler, sljit_s32 vr, sljit_s32 type)
275
{
276
sljit_s32 vr_low = vr;
277
278
if (compiler->scratches == -1)
279
return 0;
280
281
if (SLJIT_SIMD_GET_REG_SIZE(type) == 4) {
282
vr += (vr & 0x1);
283
vr_low = vr - 1;
284
}
285
286
return (vr >= SLJIT_VR0 && vr < (SLJIT_VR0 + compiler->vscratches))
287
|| (vr_low > (SLJIT_VS0 - compiler->vsaveds) && vr_low <= SLJIT_VS0)
288
|| (vr >= SLJIT_TMP_VREGISTER_BASE && vr < (SLJIT_TMP_VREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS));
289
}
290
291
#endif /* SLJIT_ARGUMENT_CHECKS */
292
293
static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst)
294
{
295
sljit_u16 *ptr;
296
SLJIT_ASSERT(!(inst & 0xffff0000));
297
298
ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_u16));
299
FAIL_IF(!ptr);
300
*ptr = (sljit_u16)(inst);
301
compiler->size++;
302
return SLJIT_SUCCESS;
303
}
304
305
static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst)
306
{
307
sljit_u16 *ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_ins));
308
FAIL_IF(!ptr);
309
*ptr++ = (sljit_u16)(inst >> 16);
310
*ptr = (sljit_u16)(inst);
311
compiler->size += 2;
312
return SLJIT_SUCCESS;
313
}
314
315
static sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
316
{
317
FAIL_IF(push_inst32(compiler, MOVW | RD4(dst)
318
| COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
319
return push_inst32(compiler, MOVT | RD4(dst)
320
| COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
321
}
322
323
/* Dst must be in bits[11-8] */
324
static void set_imm32_const(sljit_u16 *inst, sljit_ins dst, sljit_uw new_imm)
325
{
326
inst[0] = (sljit_u16)((MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1));
327
inst[1] = (sljit_u16)(dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff));
328
inst[2] = (sljit_u16)((MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1));
329
inst[3] = (sljit_u16)(dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16));
330
}
331
332
static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm)
333
{
334
sljit_ins dst = inst[1] & 0x0f00;
335
SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00));
336
set_imm32_const(inst, dst, new_imm);
337
}
338
339
static SLJIT_INLINE sljit_u16* detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset)
340
{
341
sljit_sw diff;
342
sljit_uw target_addr;
343
sljit_uw jump_addr = (sljit_uw)code_ptr;
344
sljit_uw orig_addr = jump->addr;
345
SLJIT_UNUSED_ARG(executable_offset);
346
347
jump->addr = jump_addr;
348
if (jump->flags & SLJIT_REWRITABLE_JUMP)
349
goto exit;
350
351
if (jump->flags & JUMP_ADDR) {
352
/* Branch to ARM code is not optimized yet. */
353
if (!(jump->u.target & 0x1))
354
goto exit;
355
target_addr = jump->u.target;
356
} else {
357
SLJIT_ASSERT(jump->u.label != NULL);
358
target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
359
360
if (jump->u.label->size > orig_addr)
361
jump_addr = (sljit_uw)(code + orig_addr);
362
}
363
364
diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr + 4, executable_offset);
365
366
if (jump->flags & IS_COND) {
367
SLJIT_ASSERT(!(jump->flags & IS_BL));
368
/* Size of the prefix IT instruction. */
369
diff += SSIZE_OF(u16);
370
if (diff <= 0xff && diff >= -0x100) {
371
jump->flags |= PATCH_TYPE1;
372
jump->addr = (sljit_uw)(code_ptr - 1);
373
return code_ptr - 1;
374
}
375
if (diff <= 0xfffff && diff >= -0x100000) {
376
jump->flags |= PATCH_TYPE2;
377
jump->addr = (sljit_uw)(code_ptr - 1);
378
return code_ptr;
379
}
380
diff -= SSIZE_OF(u16);
381
} else if (jump->flags & IS_BL) {
382
/* Branch and link. */
383
if (diff <= 0xffffff && diff >= -0x1000000) {
384
jump->flags |= PATCH_TYPE5;
385
return code_ptr + 1;
386
}
387
goto exit;
388
} else if (diff <= 0x7ff && diff >= -0x800) {
389
jump->flags |= PATCH_TYPE3;
390
return code_ptr;
391
}
392
393
if (diff <= 0xffffff && diff >= -0x1000000) {
394
jump->flags |= PATCH_TYPE4;
395
return code_ptr + 1;
396
}
397
398
exit:
399
code_ptr[4] = code_ptr[0];
400
401
if (jump->flags & IS_COND) {
402
code_ptr[3] = code_ptr[-1];
403
jump->addr = (sljit_uw)(code_ptr - 1);
404
}
405
406
return code_ptr + 4;
407
}
408
409
static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset)
410
{
411
sljit_uw addr;
412
sljit_uw jump_addr = (sljit_uw)code_ptr;
413
sljit_sw diff;
414
SLJIT_UNUSED_ARG(executable_offset);
415
416
if (jump->flags & JUMP_ADDR)
417
addr = jump->u.target;
418
else {
419
addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
420
421
if (jump->u.label->size > jump->addr)
422
jump_addr = (sljit_uw)(code + jump->addr);
423
}
424
425
/* The pc+4 offset is represented by the 2 * SSIZE_OF(sljit_u16) below. */
426
diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);
427
428
/* Note: ADR with imm8 does not set the last bit (Thumb2 flag). */
429
430
if (diff <= 0xffd + 2 * SSIZE_OF(u16) && diff >= -0xfff + 2 * SSIZE_OF(u16)) {
431
jump->flags |= PATCH_TYPE6;
432
return 1;
433
}
434
435
return 3;
436
}
437
438
static SLJIT_INLINE void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset)
439
{
440
sljit_s32 type = (jump->flags >> 4) & 0xf;
441
sljit_u16 *jump_inst = (sljit_u16*)jump->addr;
442
sljit_sw diff;
443
sljit_ins ins;
444
445
diff = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr);
446
447
if (SLJIT_UNLIKELY(type == 0)) {
448
ins = (jump->flags & JUMP_MOV_ADDR) ? *jump_inst : RDN3(TMP_REG1);
449
set_imm32_const((sljit_u16*)jump->addr, ins, (sljit_uw)diff);
450
return;
451
}
452
453
if (SLJIT_UNLIKELY(type == 6)) {
454
SLJIT_ASSERT(jump->flags & JUMP_MOV_ADDR);
455
diff -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_inst + 2, executable_offset) & ~(sljit_sw)0x3;
456
457
SLJIT_ASSERT(diff <= 0xfff && diff >= -0xfff);
458
459
ins = ADDWI >> 16;
460
if (diff <= 0) {
461
diff = -diff;
462
ins = SUBWI >> 16;
463
}
464
465
jump_inst[1] = (sljit_u16)(jump_inst[0] | COPY_BITS(diff, 8, 12, 3) | (diff & 0xff));
466
jump_inst[0] = (sljit_u16)(ins | 0xf | COPY_BITS(diff, 11, 10, 1));
467
return;
468
}
469
470
SLJIT_ASSERT((diff & 0x1) != 0 && !(jump->flags & JUMP_MOV_ADDR));
471
diff = (diff - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1;
472
473
switch (type) {
474
case 1:
475
/* Encoding T1 of 'B' instruction */
476
SLJIT_ASSERT(diff <= 0x7f && diff >= -0x80 && (jump->flags & IS_COND));
477
jump_inst[0] = (sljit_u16)(0xd000 | (jump->flags & 0xf00) | ((sljit_ins)diff & 0xff));
478
return;
479
case 2:
480
/* Encoding T3 of 'B' instruction */
481
SLJIT_ASSERT(diff <= 0x7ffff && diff >= -0x80000 && (jump->flags & IS_COND));
482
jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1));
483
jump_inst[1] = (sljit_u16)(0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | ((sljit_ins)diff & 0x7ff));
484
return;
485
case 3:
486
/* Encoding T2 of 'B' instruction */
487
SLJIT_ASSERT(diff <= 0x3ff && diff >= -0x400 && !(jump->flags & IS_COND));
488
jump_inst[0] = (sljit_u16)(0xe000 | (diff & 0x7ff));
489
return;
490
}
491
492
SLJIT_ASSERT(diff <= 0x7fffff && diff >= -0x800000);
493
494
/* Really complex instruction form for branches. Negate with sign bit. */
495
diff ^= ((diff >> 2) & 0x600000) ^ 0x600000;
496
497
jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(diff, 11, 0, 10) | COPY_BITS(diff, 23, 10, 1));
498
jump_inst[1] = (sljit_u16)((diff & 0x7ff) | COPY_BITS(diff, 22, 13, 1) | COPY_BITS(diff, 21, 11, 1));
499
500
SLJIT_ASSERT(type == 4 || type == 5);
501
502
/* The others have a common form. */
503
if (type == 4) /* Encoding T4 of 'B' instruction */
504
jump_inst[1] |= 0x9000;
505
else /* Encoding T1 of 'BL' instruction */
506
jump_inst[1] |= 0xd000;
507
}
508
509
static void reduce_code_size(struct sljit_compiler *compiler)
510
{
511
struct sljit_label *label;
512
struct sljit_jump *jump;
513
struct sljit_const *const_;
514
SLJIT_NEXT_DEFINE_TYPES;
515
sljit_uw total_size;
516
sljit_uw size_reduce = 0;
517
sljit_sw diff;
518
519
label = compiler->labels;
520
jump = compiler->jumps;
521
const_ = compiler->consts;
522
SLJIT_NEXT_INIT_TYPES();
523
524
while (1) {
525
SLJIT_GET_NEXT_MIN();
526
527
if (next_min_addr == SLJIT_MAX_ADDRESS)
528
break;
529
530
if (next_min_addr == next_label_size) {
531
label->size -= size_reduce;
532
533
label = label->next;
534
next_label_size = SLJIT_GET_NEXT_SIZE(label);
535
}
536
537
if (next_min_addr == next_const_addr) {
538
const_->addr -= size_reduce;
539
const_ = const_->next;
540
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
541
continue;
542
}
543
544
if (next_min_addr != next_jump_addr)
545
continue;
546
547
jump->addr -= size_reduce;
548
if (!(jump->flags & JUMP_MOV_ADDR)) {
549
total_size = JUMP_MAX_SIZE;
550
551
if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) {
552
/* Unit size: instruction. */
553
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2;
554
if (jump->u.label->size > jump->addr) {
555
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
556
diff -= (sljit_sw)size_reduce;
557
}
558
559
if (jump->flags & IS_COND) {
560
diff++;
561
562
if (diff <= (0xff / SSIZE_OF(u16)) && diff >= (-0x100 / SSIZE_OF(u16)))
563
total_size = 0;
564
else if (diff <= (0xfffff / SSIZE_OF(u16)) && diff >= (-0x100000 / SSIZE_OF(u16)))
565
total_size = 1;
566
diff--;
567
} else if (!(jump->flags & IS_BL) && diff <= (0x7ff / SSIZE_OF(u16)) && diff >= (-0x800 / SSIZE_OF(u16)))
568
total_size = 1;
569
570
if (total_size == JUMP_MAX_SIZE && diff <= (0xffffff / SSIZE_OF(u16)) && diff >= (-0x1000000 / SSIZE_OF(u16)))
571
total_size = 2;
572
}
573
574
size_reduce += JUMP_MAX_SIZE - total_size;
575
} else {
576
/* Real size minus 1. Unit size: instruction. */
577
total_size = 3;
578
579
if (!(jump->flags & JUMP_ADDR)) {
580
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
581
if (jump->u.label->size > jump->addr) {
582
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
583
diff -= (sljit_sw)size_reduce;
584
}
585
586
if (diff <= (0xffd / SSIZE_OF(u16)) && diff >= (-0xfff / SSIZE_OF(u16)))
587
total_size = 1;
588
}
589
590
size_reduce += 3 - total_size;
591
}
592
593
jump->flags |= total_size << JUMP_SIZE_SHIFT;
594
jump = jump->next;
595
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
596
}
597
598
compiler->size -= size_reduce;
599
}
600
601
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
602
{
603
struct sljit_memory_fragment *buf;
604
sljit_u16 *code;
605
sljit_u16 *code_ptr;
606
sljit_u16 *buf_ptr;
607
sljit_u16 *buf_end;
608
sljit_uw half_count;
609
SLJIT_NEXT_DEFINE_TYPES;
610
sljit_sw addr;
611
sljit_sw executable_offset;
612
613
struct sljit_label *label;
614
struct sljit_jump *jump;
615
struct sljit_const *const_;
616
617
CHECK_ERROR_PTR();
618
CHECK_PTR(check_sljit_generate_code(compiler));
619
620
reduce_code_size(compiler);
621
622
code = (sljit_u16*)allocate_executable_memory(compiler->size * sizeof(sljit_u16), options, exec_allocator_data, &executable_offset);
623
PTR_FAIL_WITH_EXEC_IF(code);
624
625
reverse_buf(compiler);
626
buf = compiler->buf;
627
628
code_ptr = code;
629
half_count = 0;
630
label = compiler->labels;
631
jump = compiler->jumps;
632
const_ = compiler->consts;
633
SLJIT_NEXT_INIT_TYPES();
634
SLJIT_GET_NEXT_MIN();
635
636
do {
637
buf_ptr = (sljit_u16*)buf->memory;
638
buf_end = buf_ptr + (buf->used_size >> 1);
639
do {
640
*code_ptr = *buf_ptr++;
641
if (next_min_addr == half_count) {
642
SLJIT_ASSERT(!label || label->size >= half_count);
643
SLJIT_ASSERT(!jump || jump->addr >= half_count);
644
SLJIT_ASSERT(!const_ || const_->addr >= half_count);
645
646
/* These structures are ordered by their address. */
647
if (next_min_addr == next_label_size) {
648
label->u.addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
649
label->size = (sljit_uw)(code_ptr - code);
650
label = label->next;
651
next_label_size = SLJIT_GET_NEXT_SIZE(label);
652
}
653
654
if (next_min_addr == next_jump_addr) {
655
if (!(jump->flags & JUMP_MOV_ADDR)) {
656
half_count = half_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT);
657
code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
658
SLJIT_ASSERT((sljit_uw)code_ptr - jump->addr <
659
((jump->flags >> JUMP_SIZE_SHIFT) + ((jump->flags & 0xf0) <= PATCH_TYPE2)) * sizeof(sljit_u16));
660
} else {
661
half_count += jump->flags >> JUMP_SIZE_SHIFT;
662
addr = (sljit_sw)code_ptr;
663
code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
664
jump->addr = (sljit_uw)addr;
665
}
666
667
jump = jump->next;
668
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
669
} else if (next_min_addr == next_const_addr) {
670
const_->addr = (sljit_uw)code_ptr;
671
const_ = const_->next;
672
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
673
}
674
675
SLJIT_GET_NEXT_MIN();
676
}
677
code_ptr++;
678
half_count++;
679
} while (buf_ptr < buf_end);
680
681
buf = buf->next;
682
} while (buf);
683
684
if (label && label->size == half_count) {
685
label->u.addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
686
label->size = (sljit_uw)(code_ptr - code);
687
label = label->next;
688
}
689
690
SLJIT_ASSERT(!label);
691
SLJIT_ASSERT(!jump);
692
SLJIT_ASSERT(!const_);
693
SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
694
695
jump = compiler->jumps;
696
while (jump) {
697
generate_jump_or_mov_addr(jump, executable_offset);
698
jump = jump->next;
699
}
700
701
compiler->error = SLJIT_ERR_COMPILED;
702
compiler->executable_offset = executable_offset;
703
compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_u16);
704
705
code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
706
code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
707
708
SLJIT_CACHE_FLUSH(code, code_ptr);
709
SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
710
711
/* Set thumb mode flag. */
712
return (void*)((sljit_uw)code | 0x1);
713
}
714
715
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
716
{
717
switch (feature_type) {
718
case SLJIT_HAS_FPU:
719
case SLJIT_HAS_F64_AS_F32_PAIR:
720
case SLJIT_HAS_SIMD:
721
#ifdef SLJIT_IS_FPU_AVAILABLE
722
return (SLJIT_IS_FPU_AVAILABLE) != 0;
723
#else
724
/* Available by default. */
725
return 1;
726
#endif
727
728
case SLJIT_SIMD_REGS_ARE_PAIRS:
729
case SLJIT_HAS_CLZ:
730
case SLJIT_HAS_CTZ:
731
case SLJIT_HAS_REV:
732
case SLJIT_HAS_ROT:
733
case SLJIT_HAS_CMOV:
734
case SLJIT_HAS_PREFETCH:
735
case SLJIT_HAS_COPY_F32:
736
case SLJIT_HAS_COPY_F64:
737
case SLJIT_HAS_ATOMIC:
738
case SLJIT_HAS_MEMORY_BARRIER:
739
return 1;
740
741
default:
742
return 0;
743
}
744
}
745
746
/* --------------------------------------------------------------------- */
747
/* Core code generator functions. */
748
/* --------------------------------------------------------------------- */
749
750
#define INVALID_IMM 0x80000000
751
static sljit_uw get_imm(sljit_uw imm)
752
{
753
/* Thumb immediate form. */
754
sljit_s32 counter;
755
756
if (imm <= 0xff)
757
return imm;
758
759
if ((imm & 0xffff) == (imm >> 16)) {
760
/* Some special cases. */
761
if (!(imm & 0xff00))
762
return (1 << 12) | (imm & 0xff);
763
if (!(imm & 0xff))
764
return (2 << 12) | ((imm >> 8) & 0xff);
765
if ((imm & 0xff00) == ((imm & 0xff) << 8))
766
return (3 << 12) | (imm & 0xff);
767
}
768
769
/* Assembly optimization: count leading zeroes? */
770
counter = 8;
771
if (!(imm & 0xffff0000)) {
772
counter += 16;
773
imm <<= 16;
774
}
775
if (!(imm & 0xff000000)) {
776
counter += 8;
777
imm <<= 8;
778
}
779
if (!(imm & 0xf0000000)) {
780
counter += 4;
781
imm <<= 4;
782
}
783
if (!(imm & 0xc0000000)) {
784
counter += 2;
785
imm <<= 2;
786
}
787
if (!(imm & 0x80000000)) {
788
counter += 1;
789
imm <<= 1;
790
}
791
/* Since imm >= 128, this must be true. */
792
SLJIT_ASSERT(counter <= 31);
793
794
if (imm & 0x00ffffff)
795
return INVALID_IMM; /* Cannot be encoded. */
796
797
return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1);
798
}
799
800
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
801
{
802
sljit_uw tmp;
803
804
/* MOVS cannot be used since it destroy flags. */
805
806
if (imm >= 0x10000) {
807
tmp = get_imm(imm);
808
if (tmp != INVALID_IMM)
809
return push_inst32(compiler, MOV_WI | RD4(dst) | tmp);
810
tmp = get_imm(~imm);
811
if (tmp != INVALID_IMM)
812
return push_inst32(compiler, MVN_WI | RD4(dst) | tmp);
813
}
814
815
/* set low 16 bits, set hi 16 bits to 0. */
816
FAIL_IF(push_inst32(compiler, MOVW | RD4(dst)
817
| COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
818
819
/* set hi 16 bit if needed. */
820
if (imm >= 0x10000)
821
return push_inst32(compiler, MOVT | RD4(dst)
822
| COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
823
return SLJIT_SUCCESS;
824
}
825
826
#define ARG1_IMM 0x0010000
827
#define ARG2_IMM 0x0020000
828
/* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */
829
#define SET_FLAGS 0x0100000
830
#define UNUSED_RETURN 0x0200000
831
#define REGISTER_OP 0x0400000
832
833
static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2)
834
{
835
/* dst must be register
836
arg1 must be register, imm
837
arg2 must be register, imm */
838
sljit_s32 reg;
839
sljit_uw imm, imm2;
840
841
if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
842
/* Both are immediates, no temporaries are used. */
843
flags &= ~ARG1_IMM;
844
FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
845
arg1 = TMP_REG1;
846
}
847
848
if (flags & (ARG1_IMM | ARG2_IMM)) {
849
reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2);
850
imm = (flags & ARG2_IMM) ? arg2 : arg1;
851
852
switch (flags & 0xffff) {
853
case SLJIT_CLZ:
854
case SLJIT_CTZ:
855
case SLJIT_REV:
856
case SLJIT_REV_U16:
857
case SLJIT_REV_S16:
858
case SLJIT_REV_U32:
859
case SLJIT_REV_S32:
860
case SLJIT_MUL:
861
case SLJIT_MULADD:
862
/* No form with immediate operand. */
863
break;
864
case SLJIT_MOV:
865
SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG2);
866
return load_immediate(compiler, dst, imm);
867
case SLJIT_ADD:
868
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
869
imm2 = NEGATE(imm);
870
if (IS_2_LO_REGS(reg, dst)) {
871
if (imm <= 0x7)
872
return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
873
if (imm2 <= 0x7)
874
return push_inst16(compiler, SUBSI3 | IMM3(imm2) | RD3(dst) | RN3(reg));
875
if (reg == dst) {
876
if (imm <= 0xff)
877
return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst));
878
if (imm2 <= 0xff)
879
return push_inst16(compiler, SUBSI8 | IMM8(imm2) | RDN3(dst));
880
}
881
}
882
if (!(flags & SET_FLAGS)) {
883
if (imm <= 0xfff)
884
return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm));
885
if (imm2 <= 0xfff)
886
return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm2));
887
}
888
imm2 = get_imm(imm);
889
if (imm2 != INVALID_IMM)
890
return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
891
imm = get_imm(NEGATE(imm));
892
if (imm != INVALID_IMM)
893
return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
894
break;
895
case SLJIT_ADDC:
896
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
897
imm2 = get_imm(imm);
898
if (imm2 != INVALID_IMM)
899
return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
900
if (flags & ARG2_IMM) {
901
imm = get_imm(~imm);
902
if (imm != INVALID_IMM)
903
return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
904
}
905
break;
906
case SLJIT_SUB:
907
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
908
if (flags & ARG1_IMM) {
909
if (imm == 0 && IS_2_LO_REGS(reg, dst))
910
return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg));
911
imm = get_imm(imm);
912
if (imm != INVALID_IMM)
913
return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
914
break;
915
}
916
if (flags & UNUSED_RETURN) {
917
if (imm <= 0xff && reg_map[reg] <= 7)
918
return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
919
imm2 = get_imm(imm);
920
if (imm2 != INVALID_IMM)
921
return push_inst32(compiler, CMPI_W | RN4(reg) | imm2);
922
imm = get_imm(NEGATE(imm));
923
if (imm != INVALID_IMM)
924
return push_inst32(compiler, CMNI_W | RN4(reg) | imm);
925
break;
926
}
927
imm2 = NEGATE(imm);
928
if (IS_2_LO_REGS(reg, dst)) {
929
if (imm <= 0x7)
930
return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
931
if (imm2 <= 0x7)
932
return push_inst16(compiler, ADDSI3 | IMM3(imm2) | RD3(dst) | RN3(reg));
933
if (reg == dst) {
934
if (imm <= 0xff)
935
return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst));
936
if (imm2 <= 0xff)
937
return push_inst16(compiler, ADDSI8 | IMM8(imm2) | RDN3(dst));
938
}
939
}
940
if (!(flags & SET_FLAGS)) {
941
if (imm <= 0xfff)
942
return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm));
943
if (imm2 <= 0xfff)
944
return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm2));
945
}
946
imm2 = get_imm(imm);
947
if (imm2 != INVALID_IMM)
948
return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
949
imm = get_imm(NEGATE(imm));
950
if (imm != INVALID_IMM)
951
return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
952
break;
953
case SLJIT_SUBC:
954
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
955
if (flags & ARG1_IMM)
956
break;
957
imm2 = get_imm(imm);
958
if (imm2 != INVALID_IMM)
959
return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
960
imm = get_imm(~imm);
961
if (imm != INVALID_IMM)
962
return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
963
break;
964
case SLJIT_AND:
965
imm2 = get_imm(imm);
966
if (imm2 != INVALID_IMM)
967
return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TSTI : ANDI) | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
968
imm = get_imm(~imm);
969
if (imm != INVALID_IMM)
970
return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
971
break;
972
case SLJIT_OR:
973
imm2 = get_imm(imm);
974
if (imm2 != INVALID_IMM)
975
return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
976
imm = get_imm(~imm);
977
if (imm != INVALID_IMM)
978
return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
979
break;
980
case SLJIT_XOR:
981
if (imm == (sljit_uw)-1) {
982
if (IS_2_LO_REGS(dst, reg))
983
return push_inst16(compiler, MVNS | RD3(dst) | RN3(reg));
984
return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(reg));
985
}
986
imm = get_imm(imm);
987
if (imm != INVALID_IMM)
988
return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
989
break;
990
case SLJIT_SHL:
991
case SLJIT_MSHL:
992
case SLJIT_LSHR:
993
case SLJIT_MLSHR:
994
case SLJIT_ASHR:
995
case SLJIT_MASHR:
996
case SLJIT_ROTL:
997
case SLJIT_ROTR:
998
if (flags & ARG1_IMM)
999
break;
1000
imm &= 0x1f;
1001
1002
if (imm == 0) {
1003
if (!(flags & SET_FLAGS))
1004
return push_inst16(compiler, MOV | SET_REGS44(dst, reg));
1005
if (IS_2_LO_REGS(dst, reg))
1006
return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg));
1007
return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg));
1008
}
1009
1010
switch (flags & 0xffff) {
1011
case SLJIT_SHL:
1012
case SLJIT_MSHL:
1013
if (IS_2_LO_REGS(dst, reg))
1014
return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6));
1015
return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
1016
case SLJIT_LSHR:
1017
case SLJIT_MLSHR:
1018
if (IS_2_LO_REGS(dst, reg))
1019
return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6));
1020
return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
1021
case SLJIT_ASHR:
1022
case SLJIT_MASHR:
1023
if (IS_2_LO_REGS(dst, reg))
1024
return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6));
1025
return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
1026
case SLJIT_ROTL:
1027
imm = (imm ^ 0x1f) + 1;
1028
/* fallthrough */
1029
default: /* SLJIT_ROTR */
1030
return push_inst32(compiler, ROR_WI | RD4(dst) | RM4(reg) | IMM5(imm));
1031
}
1032
default:
1033
SLJIT_UNREACHABLE();
1034
break;
1035
}
1036
1037
if (flags & ARG2_IMM) {
1038
imm = arg2;
1039
arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
1040
FAIL_IF(load_immediate(compiler, (sljit_s32)arg2, imm));
1041
} else {
1042
imm = arg1;
1043
arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
1044
FAIL_IF(load_immediate(compiler, (sljit_s32)arg1, imm));
1045
}
1046
1047
SLJIT_ASSERT(arg1 != arg2);
1048
}
1049
1050
/* Both arguments are registers. */
1051
switch (flags & 0xffff) {
1052
case SLJIT_MOV:
1053
case SLJIT_MOV_U32:
1054
case SLJIT_MOV_S32:
1055
case SLJIT_MOV32:
1056
case SLJIT_MOV_P:
1057
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1058
if (dst == (sljit_s32)arg2)
1059
return SLJIT_SUCCESS;
1060
return push_inst16(compiler, MOV | SET_REGS44(dst, arg2));
1061
case SLJIT_MOV_U8:
1062
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1063
if (IS_2_LO_REGS(dst, arg2))
1064
return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2));
1065
return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2));
1066
case SLJIT_MOV_S8:
1067
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1068
if (IS_2_LO_REGS(dst, arg2))
1069
return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2));
1070
return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2));
1071
case SLJIT_MOV_U16:
1072
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1073
if (IS_2_LO_REGS(dst, arg2))
1074
return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2));
1075
return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2));
1076
case SLJIT_MOV_S16:
1077
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1078
if (IS_2_LO_REGS(dst, arg2))
1079
return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2));
1080
return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2));
1081
case SLJIT_CLZ:
1082
SLJIT_ASSERT(arg1 == TMP_REG2);
1083
return push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2));
1084
case SLJIT_CTZ:
1085
SLJIT_ASSERT(arg1 == TMP_REG2);
1086
FAIL_IF(push_inst32(compiler, RBIT | RN4(arg2) | RD4(dst) | RM4(arg2)));
1087
return push_inst32(compiler, CLZ | RN4(dst) | RD4(dst) | RM4(dst));
1088
case SLJIT_REV:
1089
case SLJIT_REV_U32:
1090
case SLJIT_REV_S32:
1091
SLJIT_ASSERT(arg1 == TMP_REG2);
1092
if (IS_2_LO_REGS(dst, arg2))
1093
return push_inst16(compiler, REV | RD3(dst) | RN3(arg2));
1094
return push_inst32(compiler, REV_W | RN4(arg2) | RD4(dst) | RM4(arg2));
1095
case SLJIT_REV_U16:
1096
case SLJIT_REV_S16:
1097
SLJIT_ASSERT(arg1 == TMP_REG2);
1098
1099
if (IS_2_LO_REGS(dst, arg2))
1100
FAIL_IF(push_inst16(compiler, REV16 | RD3(dst) | RN3(arg2)));
1101
else
1102
FAIL_IF(push_inst32(compiler, REV16_W | RN4(arg2) | RD4(dst) | RM4(arg2)));
1103
1104
if (!(flags & REGISTER_OP))
1105
return SLJIT_SUCCESS;
1106
1107
flags &= 0xffff;
1108
if (reg_map[dst] <= 7)
1109
return push_inst16(compiler, (flags == SLJIT_REV_U16 ? UXTH : SXTH) | RD3(dst) | RN3(dst));
1110
return push_inst32(compiler, (flags == SLJIT_REV_U16 ? UXTH_W : SXTH_W) | RD4(dst) | RM4(dst));
1111
case SLJIT_ADD:
1112
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1113
if (IS_3_LO_REGS(dst, arg1, arg2))
1114
return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2));
1115
if (dst == (sljit_s32)arg1 && !(flags & SET_FLAGS))
1116
return push_inst16(compiler, ADD | SET_REGS44(dst, arg2));
1117
return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1118
case SLJIT_ADDC:
1119
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1120
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1121
return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2));
1122
return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1123
case SLJIT_SUB:
1124
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1125
if (flags & UNUSED_RETURN) {
1126
if (IS_2_LO_REGS(arg1, arg2))
1127
return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2));
1128
return push_inst16(compiler, CMP_X | SET_REGS44(arg1, arg2));
1129
}
1130
if (IS_3_LO_REGS(dst, arg1, arg2))
1131
return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2));
1132
return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1133
case SLJIT_SUBC:
1134
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1135
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1136
return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2));
1137
return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1138
case SLJIT_MUL:
1139
compiler->status_flags_state = 0;
1140
if (!(flags & SET_FLAGS))
1141
return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2));
1142
reg = (dst == TMP_REG2) ? TMP_REG1 : TMP_REG2;
1143
FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(reg) | RN4(arg1) | RM4(arg2)));
1144
/* cmp TMP_REG2, dst asr #31. */
1145
return push_inst32(compiler, CMP_W | RN4(reg) | 0x70e0 | RM4(dst));
1146
case SLJIT_AND:
1147
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1148
return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2));
1149
if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2))
1150
return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2));
1151
return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TST_W : AND_W) | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1152
case SLJIT_OR:
1153
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1154
return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2));
1155
return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1156
case SLJIT_XOR:
1157
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1158
return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2));
1159
return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1160
case SLJIT_MSHL:
1161
reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1162
FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f));
1163
arg2 = (sljit_uw)reg;
1164
/* fallthrough */
1165
case SLJIT_SHL:
1166
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1167
return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2));
1168
return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1169
case SLJIT_MLSHR:
1170
reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1171
FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f));
1172
arg2 = (sljit_uw)reg;
1173
/* fallthrough */
1174
case SLJIT_LSHR:
1175
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1176
return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2));
1177
return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1178
case SLJIT_MASHR:
1179
reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1180
FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f));
1181
arg2 = (sljit_uw)reg;
1182
/* fallthrough */
1183
case SLJIT_ASHR:
1184
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1185
return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2));
1186
return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1187
case SLJIT_ROTL:
1188
reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1189
FAIL_IF(push_inst32(compiler, RSB_WI | RD4(reg) | RN4(arg2) | 0));
1190
arg2 = (sljit_uw)reg;
1191
/* fallthrough */
1192
case SLJIT_ROTR:
1193
if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1194
return push_inst16(compiler, RORS | RD3(dst) | RN3(arg2));
1195
return push_inst32(compiler, ROR_W | RD4(dst) | RN4(arg1) | RM4(arg2));
1196
case SLJIT_MULADD:
1197
compiler->status_flags_state = 0;
1198
return push_inst32(compiler, MLA | RD4(dst) | RN4(arg1) | RM4(arg2) | RT4(dst));
1199
}
1200
1201
SLJIT_UNREACHABLE();
1202
return SLJIT_SUCCESS;
1203
}
1204
1205
#define STORE 0x01
1206
#define SIGNED 0x02
1207
1208
#define WORD_SIZE 0x00
1209
#define BYTE_SIZE 0x04
1210
#define HALF_SIZE 0x08
1211
#define PRELOAD 0x0c
1212
1213
#define IS_WORD_SIZE(flags) (!((flags) & (BYTE_SIZE | HALF_SIZE)))
1214
#define ALIGN_CHECK(argw, imm, shift) (!((argw) & ~((imm) << (shift))))
1215
1216
/*
1217
1st letter:
1218
w = word
1219
b = byte
1220
h = half
1221
1222
2nd letter:
1223
s = signed
1224
u = unsigned
1225
1226
3rd letter:
1227
l = load
1228
s = store
1229
*/
1230
1231
static const sljit_ins sljit_mem16[12] = {
1232
/* w u l */ 0x5800 /* ldr */,
1233
/* w u s */ 0x5000 /* str */,
1234
/* w s l */ 0x5800 /* ldr */,
1235
/* w s s */ 0x5000 /* str */,
1236
1237
/* b u l */ 0x5c00 /* ldrb */,
1238
/* b u s */ 0x5400 /* strb */,
1239
/* b s l */ 0x5600 /* ldrsb */,
1240
/* b s s */ 0x5400 /* strb */,
1241
1242
/* h u l */ 0x5a00 /* ldrh */,
1243
/* h u s */ 0x5200 /* strh */,
1244
/* h s l */ 0x5e00 /* ldrsh */,
1245
/* h s s */ 0x5200 /* strh */,
1246
};
1247
1248
static const sljit_ins sljit_mem16_imm5[12] = {
1249
/* w u l */ 0x6800 /* ldr imm5 */,
1250
/* w u s */ 0x6000 /* str imm5 */,
1251
/* w s l */ 0x6800 /* ldr imm5 */,
1252
/* w s s */ 0x6000 /* str imm5 */,
1253
1254
/* b u l */ 0x7800 /* ldrb imm5 */,
1255
/* b u s */ 0x7000 /* strb imm5 */,
1256
/* b s l */ 0x0000 /* not allowed */,
1257
/* b s s */ 0x7000 /* strb imm5 */,
1258
1259
/* h u l */ 0x8800 /* ldrh imm5 */,
1260
/* h u s */ 0x8000 /* strh imm5 */,
1261
/* h s l */ 0x0000 /* not allowed */,
1262
/* h s s */ 0x8000 /* strh imm5 */,
1263
};
1264
1265
#define MEM_IMM8 0xc00
1266
#define MEM_IMM12 0x800000
1267
static const sljit_ins sljit_mem32[13] = {
1268
/* w u l */ 0xf8500000 /* ldr.w */,
1269
/* w u s */ 0xf8400000 /* str.w */,
1270
/* w s l */ 0xf8500000 /* ldr.w */,
1271
/* w s s */ 0xf8400000 /* str.w */,
1272
1273
/* b u l */ 0xf8100000 /* ldrb.w */,
1274
/* b u s */ 0xf8000000 /* strb.w */,
1275
/* b s l */ 0xf9100000 /* ldrsb.w */,
1276
/* b s s */ 0xf8000000 /* strb.w */,
1277
1278
/* h u l */ 0xf8300000 /* ldrh.w */,
1279
/* h u s */ 0xf8200000 /* strsh.w */,
1280
/* h s l */ 0xf9300000 /* ldrsh.w */,
1281
/* h s s */ 0xf8200000 /* strsh.w */,
1282
1283
/* p u l */ 0xf8100000 /* pld */,
1284
};
1285
1286
/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
1287
static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value)
1288
{
1289
sljit_uw imm;
1290
1291
if (value >= 0) {
1292
if (value <= 0xfff)
1293
return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value));
1294
imm = get_imm((sljit_uw)value);
1295
if (imm != INVALID_IMM)
1296
return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | imm);
1297
}
1298
else {
1299
value = -value;
1300
if (value <= 0xfff)
1301
return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value));
1302
imm = get_imm((sljit_uw)value);
1303
if (imm != INVALID_IMM)
1304
return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | imm);
1305
}
1306
return SLJIT_ERR_UNSUPPORTED;
1307
}
1308
1309
static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
1310
sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
1311
{
1312
sljit_s32 other_r;
1313
sljit_uw imm, tmp;
1314
1315
SLJIT_ASSERT(arg & SLJIT_MEM);
1316
SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -0xff && argw <= 0xfff));
1317
1318
if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1319
imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff);
1320
if (imm != INVALID_IMM) {
1321
FAIL_IF(push_inst32(compiler, MOV_WI | RD4(tmp_reg) | imm));
1322
return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg) | (argw & 0xfff));
1323
}
1324
1325
FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
1326
if (IS_2_LO_REGS(reg, tmp_reg) && sljit_mem16_imm5[flags])
1327
return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(tmp_reg));
1328
return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg));
1329
}
1330
1331
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1332
argw &= 0x3;
1333
other_r = OFFS_REG(arg);
1334
arg &= REG_MASK;
1335
1336
if (!argw && IS_3_LO_REGS(reg, arg, other_r))
1337
return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r));
1338
return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | ((sljit_ins)argw << 4));
1339
}
1340
1341
arg &= REG_MASK;
1342
1343
if (argw > 0xfff) {
1344
imm = get_imm((sljit_uw)(argw & ~0xfff));
1345
if (imm != INVALID_IMM) {
1346
push_inst32(compiler, ADD_WI | RD4(tmp_reg) | RN4(arg) | imm);
1347
arg = tmp_reg;
1348
argw = argw & 0xfff;
1349
}
1350
}
1351
else if (argw < -0xff) {
1352
tmp = (sljit_uw)((-argw + 0xfff) & ~0xfff);
1353
SLJIT_ASSERT(tmp >= (sljit_uw)-argw);
1354
imm = get_imm(tmp);
1355
1356
if (imm != INVALID_IMM) {
1357
push_inst32(compiler, SUB_WI | RD4(tmp_reg) | RN4(arg) | imm);
1358
arg = tmp_reg;
1359
argw += (sljit_sw)tmp;
1360
1361
SLJIT_ASSERT(argw >= 0 && argw <= 0xfff);
1362
}
1363
}
1364
1365
/* 16 bit instruction forms. */
1366
if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) {
1367
tmp = 3;
1368
if (IS_WORD_SIZE(flags)) {
1369
if (ALIGN_CHECK(argw, 0x1f, 2))
1370
tmp = 2;
1371
}
1372
else if (flags & BYTE_SIZE)
1373
{
1374
if (ALIGN_CHECK(argw, 0x1f, 0))
1375
tmp = 0;
1376
}
1377
else {
1378
SLJIT_ASSERT(flags & HALF_SIZE);
1379
if (ALIGN_CHECK(argw, 0x1f, 1))
1380
tmp = 1;
1381
}
1382
1383
if (tmp < 3)
1384
return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | ((sljit_ins)argw << (6 - tmp)));
1385
}
1386
else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && ALIGN_CHECK(argw, 0xff, 2) && reg_map[reg] <= 7) {
1387
/* SP based immediate. */
1388
return push_inst16(compiler, STR_SP | (sljit_ins)((flags & STORE) ? 0 : 0x800) | RDN3(reg) | ((sljit_ins)argw >> 2));
1389
}
1390
1391
if (argw >= 0 && argw <= 0xfff)
1392
return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | (sljit_ins)argw);
1393
else if (argw < 0 && argw >= -0xff)
1394
return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | (sljit_ins)-argw);
1395
1396
SLJIT_ASSERT(arg != tmp_reg);
1397
1398
FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
1399
if (IS_3_LO_REGS(reg, arg, tmp_reg))
1400
return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg));
1401
return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg));
1402
}
1403
1404
#undef ALIGN_CHECK
1405
#undef IS_WORD_SIZE
1406
1407
/* --------------------------------------------------------------------- */
1408
/* Entry, exit */
1409
/* --------------------------------------------------------------------- */
1410
1411
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1412
sljit_s32 options, sljit_s32 arg_types,
1413
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
1414
{
1415
sljit_s32 fscratches;
1416
sljit_s32 fsaveds;
1417
sljit_s32 size, i, tmp, word_arg_count;
1418
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1419
sljit_uw offset;
1420
sljit_uw imm = 0;
1421
#ifdef __SOFTFP__
1422
sljit_u32 float_arg_count;
1423
#else
1424
sljit_u32 old_offset, f32_offset;
1425
sljit_u32 remap[3];
1426
sljit_u32 *remap_ptr = remap;
1427
#endif
1428
1429
CHECK_ERROR();
1430
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));
1431
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
1432
1433
scratches = ENTER_GET_REGS(scratches);
1434
saveds = ENTER_GET_REGS(saveds);
1435
fscratches = compiler->fscratches;
1436
fsaveds = compiler->fsaveds;
1437
1438
tmp = SLJIT_S0 - saveds;
1439
for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--)
1440
imm |= (sljit_uw)1 << reg_map[i];
1441
1442
for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
1443
imm |= (sljit_uw)1 << reg_map[i];
1444
1445
/* At least two registers must be set for PUSH_W and one for PUSH instruction. */
1446
FAIL_IF((imm & 0xff00)
1447
? push_inst32(compiler, PUSH_W | (1 << 14) | imm)
1448
: push_inst16(compiler, PUSH | (1 << 8) | imm));
1449
1450
/* Stack must be aligned to 8 bytes: (LR, R4) */
1451
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
1452
1453
if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1454
if ((size & SSIZE_OF(sw)) != 0) {
1455
FAIL_IF(push_inst16(compiler, SUB_SP_I | (sizeof(sljit_sw) >> 2)));
1456
size += SSIZE_OF(sw);
1457
}
1458
1459
if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1460
FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1461
} else {
1462
if (fsaveds > 0)
1463
FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
1464
if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1465
FAIL_IF(push_inst32(compiler, VPUSH | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1466
}
1467
}
1468
1469
local_size = ((size + local_size + 0x7) & ~0x7) - size;
1470
compiler->local_size = local_size;
1471
1472
if (options & SLJIT_ENTER_REG_ARG)
1473
arg_types = 0;
1474
1475
arg_types >>= SLJIT_ARG_SHIFT;
1476
word_arg_count = 0;
1477
saved_arg_count = 0;
1478
#ifdef __SOFTFP__
1479
SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
1480
1481
offset = 0;
1482
float_arg_count = 0;
1483
1484
while (arg_types) {
1485
switch (arg_types & SLJIT_ARG_MASK) {
1486
case SLJIT_ARG_TYPE_F64:
1487
if (offset & 0x7)
1488
offset += sizeof(sljit_sw);
1489
1490
if (offset < 4 * sizeof(sljit_sw))
1491
FAIL_IF(push_inst32(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
1492
else
1493
FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800100 | RN4(SLJIT_SP)
1494
| (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1495
float_arg_count++;
1496
offset += sizeof(sljit_f64) - sizeof(sljit_sw);
1497
break;
1498
case SLJIT_ARG_TYPE_F32:
1499
if (offset < 4 * sizeof(sljit_sw))
1500
FAIL_IF(push_inst32(compiler, VMOV | (float_arg_count << 16) | (offset << 10)));
1501
else
1502
FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800000 | RN4(SLJIT_SP)
1503
| (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1504
float_arg_count++;
1505
break;
1506
default:
1507
word_arg_count++;
1508
1509
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1510
tmp = SLJIT_S0 - saved_arg_count;
1511
saved_arg_count++;
1512
} else if (word_arg_count - 1 != (sljit_s32)(offset >> 2))
1513
tmp = word_arg_count;
1514
else
1515
break;
1516
1517
if (offset < 4 * sizeof(sljit_sw))
1518
FAIL_IF(push_inst16(compiler, MOV | ((sljit_ins)reg_map[tmp] & 0x7) | (((sljit_ins)reg_map[tmp] & 0x8) << 4) | (offset << 1)));
1519
else if (reg_map[tmp] <= 7)
1520
FAIL_IF(push_inst16(compiler, LDR_SP | RDN3(tmp)
1521
| ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1522
else
1523
FAIL_IF(push_inst32(compiler, LDR | RT4(tmp) | RN4(SLJIT_SP)
1524
| ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))));
1525
break;
1526
}
1527
1528
offset += sizeof(sljit_sw);
1529
arg_types >>= SLJIT_ARG_SHIFT;
1530
}
1531
1532
compiler->args_size = offset;
1533
#else
1534
offset = SLJIT_FR0;
1535
old_offset = SLJIT_FR0;
1536
f32_offset = 0;
1537
1538
while (arg_types) {
1539
switch (arg_types & SLJIT_ARG_MASK) {
1540
case SLJIT_ARG_TYPE_F64:
1541
if (offset != old_offset)
1542
*remap_ptr++ = VMOV_F32 | SLJIT_32 | VD4(offset) | VM4(old_offset);
1543
old_offset++;
1544
offset++;
1545
break;
1546
case SLJIT_ARG_TYPE_F32:
1547
if (f32_offset != 0) {
1548
*remap_ptr++ = VMOV_F32 | 0x20 | VD4(offset) | VM4(f32_offset);
1549
f32_offset = 0;
1550
} else {
1551
if (offset != old_offset)
1552
*remap_ptr++ = VMOV_F32 | VD4(offset) | VM4(old_offset);
1553
f32_offset = old_offset;
1554
old_offset++;
1555
}
1556
offset++;
1557
break;
1558
default:
1559
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1560
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0 - saved_arg_count, SLJIT_R0 + word_arg_count)));
1561
saved_arg_count++;
1562
}
1563
1564
word_arg_count++;
1565
break;
1566
}
1567
arg_types >>= SLJIT_ARG_SHIFT;
1568
}
1569
1570
SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap));
1571
1572
while (remap_ptr > remap)
1573
FAIL_IF(push_inst32(compiler, *(--remap_ptr)));
1574
#endif
1575
1576
#ifdef _WIN32
1577
if (local_size >= 4096) {
1578
imm = get_imm(4096);
1579
SLJIT_ASSERT(imm != INVALID_IMM);
1580
1581
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1582
1583
if (local_size < 4 * 4096) {
1584
if (local_size > 2 * 4096) {
1585
if (local_size > 3 * 4096) {
1586
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1587
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1588
}
1589
1590
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1591
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1592
}
1593
} else {
1594
FAIL_IF(load_immediate(compiler, TMP_REG2, ((sljit_uw)local_size >> 12) - 1));
1595
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1596
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1597
FAIL_IF(push_inst32(compiler, SUB_WI | SET_FLAGS | RD4(TMP_REG2) | RN4(TMP_REG2) | 1));
1598
FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-8 & 0xff)));
1599
}
1600
1601
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1602
local_size &= 0xfff;
1603
}
1604
1605
if (local_size >= 256) {
1606
SLJIT_ASSERT(local_size < 4096);
1607
1608
if (local_size <= (127 << 2))
1609
FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2)));
1610
else
1611
FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size));
1612
1613
FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1614
} else if (local_size > 0)
1615
FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | (sljit_uw)local_size));
1616
#else /* !_WIN32 */
1617
if (local_size > 0) {
1618
if (local_size <= (127 << 2))
1619
FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2)));
1620
else
1621
FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size));
1622
}
1623
#endif /* _WIN32 */
1624
1625
return SLJIT_SUCCESS;
1626
}
1627
1628
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1629
sljit_s32 options, sljit_s32 arg_types,
1630
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
1631
{
1632
sljit_s32 fscratches;
1633
sljit_s32 fsaveds;
1634
sljit_s32 size;
1635
1636
CHECK_ERROR();
1637
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));
1638
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
1639
1640
scratches = ENTER_GET_REGS(scratches);
1641
saveds = ENTER_GET_REGS(saveds);
1642
fscratches = compiler->fscratches;
1643
fsaveds = compiler->fsaveds;
1644
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
1645
1646
/* Doubles are saved, so alignment is unaffected. */
1647
if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG))
1648
size += SSIZE_OF(sw);
1649
1650
compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size;
1651
return SLJIT_SUCCESS;
1652
}
1653
1654
static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm)
1655
{
1656
sljit_uw imm2;
1657
1658
/* The TMP_REG1 register must keep its value. */
1659
if (imm <= (127u << 2))
1660
return push_inst16(compiler, ADD_SP_I | (imm >> 2));
1661
1662
if (imm <= 0xfff)
1663
return push_inst32(compiler, ADDWI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | IMM12(imm));
1664
1665
imm2 = get_imm(imm);
1666
1667
if (imm2 != INVALID_IMM)
1668
return push_inst32(compiler, ADD_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm2);
1669
1670
FAIL_IF(load_immediate(compiler, TMP_REG2, imm));
1671
return push_inst16(compiler, ADD_SP | RN3(TMP_REG2));
1672
}
1673
1674
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size)
1675
{
1676
sljit_s32 local_size, fscratches, fsaveds, i, tmp;
1677
sljit_s32 restored_reg = 0;
1678
sljit_s32 lr_dst = TMP_PC;
1679
sljit_uw reg_list = 0;
1680
1681
SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128);
1682
1683
local_size = compiler->local_size;
1684
fscratches = compiler->fscratches;
1685
fsaveds = compiler->fsaveds;
1686
1687
if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1688
if (local_size > 0)
1689
FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1690
1691
if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1692
FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1693
} else {
1694
if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1695
FAIL_IF(push_inst32(compiler, VPOP | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1696
if (fsaveds > 0)
1697
FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
1698
}
1699
1700
local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7;
1701
}
1702
1703
if (frame_size < 0) {
1704
lr_dst = TMP_REG2;
1705
frame_size = 0;
1706
} else if (frame_size > 0) {
1707
SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0);
1708
lr_dst = 0;
1709
frame_size &= ~0x7;
1710
}
1711
1712
tmp = SLJIT_S0 - compiler->saveds;
1713
i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1714
if (tmp < i) {
1715
restored_reg = i;
1716
do {
1717
reg_list |= (sljit_uw)1 << reg_map[i];
1718
} while (--i > tmp);
1719
}
1720
1721
i = compiler->scratches;
1722
if (i >= SLJIT_FIRST_SAVED_REG) {
1723
restored_reg = i;
1724
do {
1725
reg_list |= (sljit_uw)1 << reg_map[i];
1726
} while (--i >= SLJIT_FIRST_SAVED_REG);
1727
}
1728
1729
if (lr_dst == TMP_REG2 && reg_list == 0) {
1730
reg_list |= (sljit_uw)1 << reg_map[TMP_REG2];
1731
restored_reg = TMP_REG2;
1732
lr_dst = 0;
1733
}
1734
1735
if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) {
1736
/* The local_size does not include the saved registers. */
1737
tmp = 0;
1738
if (reg_list != 0) {
1739
tmp = 2;
1740
if (local_size <= 0xfff) {
1741
if (local_size == 0) {
1742
SLJIT_ASSERT(restored_reg != TMP_REG2);
1743
if (frame_size == 0)
1744
return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x308);
1745
if (frame_size > 2 * SSIZE_OF(sw))
1746
return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x100 | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw))));
1747
}
1748
1749
if (reg_map[restored_reg] <= 7 && local_size <= 0x3fc)
1750
FAIL_IF(push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(local_size >> 2)));
1751
else
1752
FAIL_IF(push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)local_size));
1753
tmp = 1;
1754
} else if (frame_size == 0) {
1755
frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw);
1756
tmp = 3;
1757
}
1758
1759
/* Place for the saved register. */
1760
if (restored_reg != TMP_REG2)
1761
local_size += SSIZE_OF(sw);
1762
}
1763
1764
/* Place for the lr register. */
1765
local_size += SSIZE_OF(sw);
1766
1767
if (frame_size > local_size)
1768
FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_ins)(frame_size - local_size) >> 2)));
1769
else if (frame_size < local_size)
1770
FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size)));
1771
1772
if (tmp <= 1)
1773
return SLJIT_SUCCESS;
1774
1775
if (tmp == 2) {
1776
frame_size -= SSIZE_OF(sw);
1777
if (restored_reg != TMP_REG2)
1778
frame_size -= SSIZE_OF(sw);
1779
1780
if (reg_map[restored_reg] <= 7)
1781
return push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(frame_size >> 2));
1782
1783
return push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)frame_size);
1784
}
1785
1786
tmp = (restored_reg == TMP_REG2) ? 0x304 : 0x308;
1787
return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)tmp);
1788
}
1789
1790
if (local_size > 0)
1791
FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1792
1793
if (!(reg_list & 0xff00) && lr_dst != TMP_REG2) {
1794
if (lr_dst == TMP_PC)
1795
reg_list |= 1u << 8;
1796
1797
/* At least one register must be set for POP instruction. */
1798
SLJIT_ASSERT(reg_list != 0);
1799
1800
FAIL_IF(push_inst16(compiler, POP | reg_list));
1801
} else {
1802
if (lr_dst != 0)
1803
reg_list |= (sljit_uw)1 << reg_map[lr_dst];
1804
1805
/* At least two registers must be set for POP_W instruction. */
1806
SLJIT_ASSERT((reg_list & (reg_list - 1)) != 0);
1807
1808
FAIL_IF(push_inst32(compiler, POP_W | reg_list));
1809
}
1810
1811
if (frame_size > 0)
1812
return push_inst16(compiler, SUB_SP_I | (((sljit_ins)frame_size - sizeof(sljit_sw)) >> 2));
1813
1814
if (lr_dst != 0)
1815
return SLJIT_SUCCESS;
1816
1817
return push_inst16(compiler, ADD_SP_I | 1);
1818
}
1819
1820
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1821
{
1822
CHECK_ERROR();
1823
CHECK(check_sljit_emit_return_void(compiler));
1824
1825
return emit_stack_frame_release(compiler, 0);
1826
}
1827
1828
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1829
sljit_s32 src, sljit_sw srcw)
1830
{
1831
CHECK_ERROR();
1832
CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1833
1834
if (src & SLJIT_MEM) {
1835
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
1836
src = TMP_REG1;
1837
srcw = 0;
1838
} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1839
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src)));
1840
src = TMP_REG1;
1841
srcw = 0;
1842
}
1843
1844
FAIL_IF(emit_stack_frame_release(compiler, 1));
1845
1846
SLJIT_SKIP_CHECKS(compiler);
1847
return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1848
}
1849
1850
/* --------------------------------------------------------------------- */
1851
/* Operators */
1852
/* --------------------------------------------------------------------- */
1853
1854
#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
1855
1856
#ifdef __cplusplus
1857
extern "C" {
1858
#endif
1859
1860
#ifdef _WIN32
1861
extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator);
1862
extern long long __rt_sdiv(int denominator, int numerator);
1863
#elif defined(__GNUC__)
1864
extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
1865
extern int __aeabi_idivmod(int numerator, int denominator);
1866
#else
1867
#error "Software divmod functions are needed"
1868
#endif
1869
1870
#ifdef __cplusplus
1871
}
1872
#endif
1873
1874
#endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
1875
1876
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1877
{
1878
#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
1879
sljit_uw saved_reg_list[3];
1880
sljit_uw saved_reg_count;
1881
#endif
1882
1883
CHECK_ERROR();
1884
CHECK(check_sljit_emit_op0(compiler, op));
1885
1886
op = GET_OPCODE(op);
1887
switch (op) {
1888
case SLJIT_BREAKPOINT:
1889
return push_inst16(compiler, BKPT);
1890
case SLJIT_NOP:
1891
return push_inst16(compiler, NOP);
1892
case SLJIT_LMUL_UW:
1893
case SLJIT_LMUL_SW:
1894
return push_inst32(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
1895
| RD4(SLJIT_R1) | RT4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
1896
#if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__)
1897
case SLJIT_DIVMOD_UW:
1898
case SLJIT_DIVMOD_SW:
1899
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
1900
FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
1901
FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
1902
return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1));
1903
case SLJIT_DIV_UW:
1904
case SLJIT_DIV_SW:
1905
return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
1906
#else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
1907
case SLJIT_DIVMOD_UW:
1908
case SLJIT_DIVMOD_SW:
1909
case SLJIT_DIV_UW:
1910
case SLJIT_DIV_SW:
1911
SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
1912
SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
1913
1914
saved_reg_count = 0;
1915
if (compiler->scratches >= 4)
1916
saved_reg_list[saved_reg_count++] = 3;
1917
if (compiler->scratches >= 3)
1918
saved_reg_list[saved_reg_count++] = 2;
1919
if (op >= SLJIT_DIV_UW)
1920
saved_reg_list[saved_reg_count++] = 1;
1921
1922
if (saved_reg_count > 0) {
1923
FAIL_IF(push_inst32(compiler, 0xf84d0d00 | (saved_reg_count >= 3 ? 16 : 8)
1924
| (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
1925
if (saved_reg_count >= 2) {
1926
SLJIT_ASSERT(saved_reg_list[1] < 8);
1927
FAIL_IF(push_inst16(compiler, 0x9001 | (saved_reg_list[1] << 8) /* str rX, [sp, #4] */));
1928
}
1929
if (saved_reg_count >= 3) {
1930
SLJIT_ASSERT(saved_reg_list[2] < 8);
1931
FAIL_IF(push_inst16(compiler, 0x9002 | (saved_reg_list[2] << 8) /* str rX, [sp, #8] */));
1932
}
1933
}
1934
1935
#ifdef _WIN32
1936
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
1937
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1)));
1938
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1)));
1939
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
1940
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__rt_udiv) : SLJIT_FUNC_ADDR(__rt_sdiv))));
1941
#elif defined(__GNUC__)
1942
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
1943
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod))));
1944
#else
1945
#error "Software divmod functions are needed"
1946
#endif
1947
1948
if (saved_reg_count > 0) {
1949
if (saved_reg_count >= 3) {
1950
SLJIT_ASSERT(saved_reg_list[2] < 8);
1951
FAIL_IF(push_inst16(compiler, 0x9802 | (saved_reg_list[2] << 8) /* ldr rX, [sp, #8] */));
1952
}
1953
if (saved_reg_count >= 2) {
1954
SLJIT_ASSERT(saved_reg_list[1] < 8);
1955
FAIL_IF(push_inst16(compiler, 0x9801 | (saved_reg_list[1] << 8) /* ldr rX, [sp, #4] */));
1956
}
1957
return push_inst32(compiler, 0xf85d0b00 | (saved_reg_count >= 3 ? 16 : 8)
1958
| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
1959
}
1960
return SLJIT_SUCCESS;
1961
#endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
1962
case SLJIT_MEMORY_BARRIER:
1963
return push_inst32(compiler, DMB_SY);
1964
case SLJIT_ENDBR:
1965
case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1966
return SLJIT_SUCCESS;
1967
}
1968
1969
return SLJIT_SUCCESS;
1970
}
1971
1972
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1973
sljit_s32 dst, sljit_sw dstw,
1974
sljit_s32 src, sljit_sw srcw)
1975
{
1976
sljit_s32 dst_r, flags;
1977
1978
CHECK_ERROR();
1979
CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1980
ADJUST_LOCAL_OFFSET(dst, dstw);
1981
ADJUST_LOCAL_OFFSET(src, srcw);
1982
1983
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1984
1985
op = GET_OPCODE(op);
1986
if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1987
switch (op) {
1988
case SLJIT_MOV:
1989
case SLJIT_MOV_U32:
1990
case SLJIT_MOV_S32:
1991
case SLJIT_MOV32:
1992
case SLJIT_MOV_P:
1993
flags = WORD_SIZE;
1994
break;
1995
case SLJIT_MOV_U8:
1996
flags = BYTE_SIZE;
1997
if (src == SLJIT_IMM)
1998
srcw = (sljit_u8)srcw;
1999
break;
2000
case SLJIT_MOV_S8:
2001
flags = BYTE_SIZE | SIGNED;
2002
if (src == SLJIT_IMM)
2003
srcw = (sljit_s8)srcw;
2004
break;
2005
case SLJIT_MOV_U16:
2006
flags = HALF_SIZE;
2007
if (src == SLJIT_IMM)
2008
srcw = (sljit_u16)srcw;
2009
break;
2010
case SLJIT_MOV_S16:
2011
flags = HALF_SIZE | SIGNED;
2012
if (src == SLJIT_IMM)
2013
srcw = (sljit_s16)srcw;
2014
break;
2015
default:
2016
SLJIT_UNREACHABLE();
2017
flags = 0;
2018
break;
2019
}
2020
2021
if (src == SLJIT_IMM)
2022
FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, (sljit_uw)srcw));
2023
else if (src & SLJIT_MEM)
2024
FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1));
2025
else if (FAST_IS_REG(dst))
2026
return emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src);
2027
else
2028
dst_r = src;
2029
2030
if (!(dst & SLJIT_MEM))
2031
return SLJIT_SUCCESS;
2032
2033
return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG1);
2034
}
2035
2036
SLJIT_COMPILE_ASSERT(WORD_SIZE == 0, word_size_must_be_0);
2037
flags = WORD_SIZE;
2038
2039
if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
2040
if (!(dst & SLJIT_MEM) && (!(src & SLJIT_MEM) || op == SLJIT_REV_S16))
2041
op |= REGISTER_OP;
2042
flags |= HALF_SIZE;
2043
}
2044
2045
if (src & SLJIT_MEM) {
2046
FAIL_IF(emit_op_mem(compiler, flags, TMP_REG1, src, srcw, TMP_REG1));
2047
src = TMP_REG1;
2048
}
2049
2050
emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src);
2051
2052
if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
2053
return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG1);
2054
return SLJIT_SUCCESS;
2055
}
2056
2057
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2058
sljit_s32 dst, sljit_sw dstw,
2059
sljit_s32 src1, sljit_sw src1w,
2060
sljit_s32 src2, sljit_sw src2w)
2061
{
2062
sljit_s32 dst_reg, src2_tmp_reg, flags;
2063
2064
CHECK_ERROR();
2065
CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2066
ADJUST_LOCAL_OFFSET(dst, dstw);
2067
ADJUST_LOCAL_OFFSET(src1, src1w);
2068
ADJUST_LOCAL_OFFSET(src2, src2w);
2069
2070
dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2071
flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
2072
2073
if (dst == TMP_REG1)
2074
flags |= UNUSED_RETURN;
2075
2076
if (src2 == SLJIT_IMM)
2077
flags |= ARG2_IMM;
2078
else if (src2 & SLJIT_MEM) {
2079
src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
2080
emit_op_mem(compiler, WORD_SIZE, src2_tmp_reg, src2, src2w, TMP_REG1);
2081
src2w = src2_tmp_reg;
2082
} else
2083
src2w = src2;
2084
2085
if (src1 == SLJIT_IMM)
2086
flags |= ARG1_IMM;
2087
else if (src1 & SLJIT_MEM) {
2088
emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1);
2089
src1w = TMP_REG1;
2090
} else
2091
src1w = src1;
2092
2093
emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, (sljit_uw)src1w, (sljit_uw)src2w);
2094
2095
if (!(dst & SLJIT_MEM))
2096
return SLJIT_SUCCESS;
2097
return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1);
2098
}
2099
2100
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2101
sljit_s32 src1, sljit_sw src1w,
2102
sljit_s32 src2, sljit_sw src2w)
2103
{
2104
CHECK_ERROR();
2105
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2106
2107
SLJIT_SKIP_CHECKS(compiler);
2108
return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2109
}
2110
2111
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2112
sljit_s32 dst_reg,
2113
sljit_s32 src1, sljit_sw src1w,
2114
sljit_s32 src2, sljit_sw src2w)
2115
{
2116
CHECK_ERROR();
2117
CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2118
2119
switch (GET_OPCODE(op)) {
2120
case SLJIT_MULADD:
2121
SLJIT_SKIP_CHECKS(compiler);
2122
return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w);
2123
}
2124
2125
return SLJIT_SUCCESS;
2126
}
2127
2128
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2129
sljit_s32 dst_reg,
2130
sljit_s32 src1_reg,
2131
sljit_s32 src2_reg,
2132
sljit_s32 src3, sljit_sw src3w)
2133
{
2134
sljit_s32 is_left;
2135
2136
CHECK_ERROR();
2137
CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2138
2139
op = GET_OPCODE(op);
2140
is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
2141
2142
if (src1_reg == src2_reg) {
2143
SLJIT_SKIP_CHECKS(compiler);
2144
return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w);
2145
}
2146
2147
ADJUST_LOCAL_OFFSET(src3, src3w);
2148
2149
if (src3 == SLJIT_IMM) {
2150
src3w &= 0x1f;
2151
2152
if (src3w == 0)
2153
return SLJIT_SUCCESS;
2154
2155
if (IS_2_LO_REGS(dst_reg, src1_reg))
2156
FAIL_IF(push_inst16(compiler, (is_left ? LSLSI : LSRSI) | RD3(dst_reg) | RN3(src1_reg) | ((sljit_ins)src3w << 6)));
2157
else
2158
FAIL_IF(push_inst32(compiler, (is_left ? LSL_WI : LSR_WI) | RD4(dst_reg) | RM4(src1_reg) | IMM5(src3w)));
2159
2160
src3w = (src3w ^ 0x1f) + 1;
2161
return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(src2_reg) | (is_left ? 0x10 : 0x0) | IMM5(src3w));
2162
}
2163
2164
if (src3 & SLJIT_MEM) {
2165
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src3, src3w, TMP_REG2));
2166
src3 = TMP_REG2;
2167
}
2168
2169
if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) {
2170
FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(src3) | 0x1f));
2171
src3 = TMP_REG2;
2172
}
2173
2174
if (dst_reg == src1_reg && IS_2_LO_REGS(dst_reg, src3))
2175
FAIL_IF(push_inst16(compiler, (is_left ? LSLS : LSRS) | RD3(dst_reg) | RN3(src3)));
2176
else
2177
FAIL_IF(push_inst32(compiler, (is_left ? LSL_W : LSR_W) | RD4(dst_reg) | RN4(src1_reg) | RM4(src3)));
2178
2179
FAIL_IF(push_inst32(compiler, (is_left ? LSR_WI : LSL_WI) | RD4(TMP_REG1) | RM4(src2_reg) | (1 << 6)));
2180
FAIL_IF(push_inst32(compiler, EORI | RD4(TMP_REG2) | RN4(src3) | 0x1f));
2181
FAIL_IF(push_inst32(compiler, (is_left ? LSR_W : LSL_W) | RD4(TMP_REG1) | RN4(TMP_REG1) | RM4(TMP_REG2)));
2182
return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(TMP_REG1));
2183
}
2184
2185
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2186
sljit_s32 src, sljit_sw srcw)
2187
{
2188
CHECK_ERROR();
2189
CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2190
ADJUST_LOCAL_OFFSET(src, srcw);
2191
2192
switch (op) {
2193
case SLJIT_FAST_RETURN:
2194
SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2195
2196
if (FAST_IS_REG(src))
2197
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, src)));
2198
else
2199
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2));
2200
2201
return push_inst16(compiler, BX | RN3(TMP_REG2));
2202
case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2203
return SLJIT_SUCCESS;
2204
case SLJIT_PREFETCH_L1:
2205
case SLJIT_PREFETCH_L2:
2206
case SLJIT_PREFETCH_L3:
2207
case SLJIT_PREFETCH_ONCE:
2208
return emit_op_mem(compiler, PRELOAD, TMP_PC, src, srcw, TMP_REG1);
2209
}
2210
2211
return SLJIT_SUCCESS;
2212
}
2213
2214
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2215
sljit_s32 dst, sljit_sw dstw)
2216
{
2217
sljit_s32 size, dst_r;
2218
2219
CHECK_ERROR();
2220
CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2221
ADJUST_LOCAL_OFFSET(dst, dstw);
2222
2223
switch (op) {
2224
case SLJIT_FAST_ENTER:
2225
SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2226
2227
if (FAST_IS_REG(dst))
2228
return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2));
2229
break;
2230
case SLJIT_GET_RETURN_ADDRESS:
2231
size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
2232
2233
if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
2234
/* The size of pc is not added above. */
2235
if ((size & SSIZE_OF(sw)) == 0)
2236
size += SSIZE_OF(sw);
2237
2238
size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64);
2239
}
2240
2241
SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0);
2242
2243
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2244
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1));
2245
break;
2246
}
2247
2248
if (dst & SLJIT_MEM)
2249
return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, TMP_REG1);
2250
2251
return SLJIT_SUCCESS;
2252
}
2253
2254
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2255
{
2256
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2257
2258
if (type == SLJIT_GP_REGISTER)
2259
return reg_map[reg];
2260
2261
if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
2262
return freg_map[reg];
2263
2264
if (type == SLJIT_SIMD_REG_128)
2265
return freg_map[reg] & ~0x1;
2266
2267
return -1;
2268
}
2269
2270
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2271
void *instruction, sljit_u32 size)
2272
{
2273
CHECK_ERROR();
2274
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2275
2276
if (size == 2)
2277
return push_inst16(compiler, *(sljit_u16*)instruction);
2278
return push_inst32(compiler, *(sljit_ins*)instruction);
2279
}
2280
2281
/* --------------------------------------------------------------------- */
2282
/* Floating point operators */
2283
/* --------------------------------------------------------------------- */
2284
2285
#define FPU_LOAD (1 << 20)
2286
2287
static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
2288
{
2289
sljit_uw imm;
2290
sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD));
2291
2292
SLJIT_ASSERT(arg & SLJIT_MEM);
2293
2294
/* Fast loads and stores. */
2295
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
2296
FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 6)));
2297
arg = SLJIT_MEM | TMP_REG1;
2298
argw = 0;
2299
}
2300
2301
if ((arg & REG_MASK) && (argw & 0x3) == 0) {
2302
if (!(argw & ~0x3fc))
2303
return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)argw >> 2));
2304
if (!(-argw & ~0x3fc))
2305
return push_inst32(compiler, inst | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)-argw >> 2));
2306
}
2307
2308
if (arg & REG_MASK) {
2309
if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
2310
FAIL_IF(compiler->error);
2311
return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg));
2312
}
2313
2314
imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc);
2315
if (imm != INVALID_IMM) {
2316
FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
2317
return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
2318
}
2319
2320
imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc);
2321
if (imm != INVALID_IMM) {
2322
argw = -argw;
2323
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
2324
return push_inst32(compiler, inst | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
2325
}
2326
}
2327
2328
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw));
2329
if (arg & REG_MASK)
2330
FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK))));
2331
return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg));
2332
}
2333
2334
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2335
sljit_s32 dst, sljit_sw dstw,
2336
sljit_s32 src, sljit_sw srcw)
2337
{
2338
op ^= SLJIT_32;
2339
2340
if (src & SLJIT_MEM) {
2341
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw));
2342
src = TMP_FREG1;
2343
}
2344
2345
FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_32) | VD4(TMP_FREG1) | VM4(src)));
2346
2347
if (FAST_IS_REG(dst))
2348
return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | VN4(TMP_FREG1));
2349
2350
/* Store the integer value from a VFP register. */
2351
return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
2352
}
2353
2354
static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
2355
sljit_s32 dst, sljit_sw dstw,
2356
sljit_s32 src, sljit_sw srcw)
2357
{
2358
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2359
2360
if (FAST_IS_REG(src))
2361
FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | VN4(TMP_FREG1)));
2362
else if (src & SLJIT_MEM) {
2363
/* Load the integer value into a VFP register. */
2364
FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
2365
}
2366
else {
2367
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
2368
FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | VN4(TMP_FREG1)));
2369
}
2370
2371
FAIL_IF(push_inst32(compiler, ins | VD4(dst_r) | VM4(TMP_FREG1)));
2372
2373
if (dst & SLJIT_MEM)
2374
return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw);
2375
return SLJIT_SUCCESS;
2376
}
2377
2378
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2379
sljit_s32 dst, sljit_sw dstw,
2380
sljit_s32 src, sljit_sw srcw)
2381
{
2382
return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2383
}
2384
2385
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2386
sljit_s32 dst, sljit_sw dstw,
2387
sljit_s32 src, sljit_sw srcw)
2388
{
2389
return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2390
}
2391
2392
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2393
sljit_s32 src1, sljit_sw src1w,
2394
sljit_s32 src2, sljit_sw src2w)
2395
{
2396
op ^= SLJIT_32;
2397
2398
if (src1 & SLJIT_MEM) {
2399
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2400
src1 = TMP_FREG1;
2401
}
2402
2403
if (src2 & SLJIT_MEM) {
2404
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2405
src2 = TMP_FREG2;
2406
}
2407
2408
FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_32) | VD4(src1) | VM4(src2)));
2409
FAIL_IF(push_inst32(compiler, VMRS));
2410
2411
if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
2412
return SLJIT_SUCCESS;
2413
2414
FAIL_IF(push_inst16(compiler, IT | (0x6 << 4) | 0x8));
2415
return push_inst16(compiler, CMP /* Rm, Rn = r0 */);
2416
}
2417
2418
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2419
sljit_s32 dst, sljit_sw dstw,
2420
sljit_s32 src, sljit_sw srcw)
2421
{
2422
sljit_s32 dst_r;
2423
2424
CHECK_ERROR();
2425
2426
SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error);
2427
SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2428
2429
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2430
2431
if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
2432
op ^= SLJIT_32;
2433
2434
if (src & SLJIT_MEM) {
2435
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw));
2436
src = dst_r;
2437
}
2438
2439
switch (GET_OPCODE(op)) {
2440
case SLJIT_MOV_F64:
2441
if (src != dst_r) {
2442
if (!(dst & SLJIT_MEM))
2443
FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2444
else
2445
dst_r = src;
2446
}
2447
break;
2448
case SLJIT_NEG_F64:
2449
FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2450
break;
2451
case SLJIT_ABS_F64:
2452
FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2453
break;
2454
case SLJIT_CONV_F64_FROM_F32:
2455
FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2456
op ^= SLJIT_32;
2457
break;
2458
}
2459
2460
if (dst & SLJIT_MEM)
2461
return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw);
2462
return SLJIT_SUCCESS;
2463
}
2464
2465
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2466
sljit_s32 dst, sljit_sw dstw,
2467
sljit_s32 src1, sljit_sw src1w,
2468
sljit_s32 src2, sljit_sw src2w)
2469
{
2470
sljit_s32 dst_r;
2471
2472
CHECK_ERROR();
2473
CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2474
ADJUST_LOCAL_OFFSET(dst, dstw);
2475
ADJUST_LOCAL_OFFSET(src1, src1w);
2476
ADJUST_LOCAL_OFFSET(src2, src2w);
2477
2478
op ^= SLJIT_32;
2479
2480
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2481
if (src1 & SLJIT_MEM) {
2482
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2483
src1 = TMP_FREG1;
2484
}
2485
if (src2 & SLJIT_MEM) {
2486
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2487
src2 = TMP_FREG2;
2488
}
2489
2490
switch (GET_OPCODE(op)) {
2491
case SLJIT_ADD_F64:
2492
FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2493
break;
2494
case SLJIT_SUB_F64:
2495
FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2496
break;
2497
case SLJIT_MUL_F64:
2498
FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2499
break;
2500
case SLJIT_DIV_F64:
2501
FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2502
break;
2503
case SLJIT_COPYSIGN_F64:
2504
FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(src2) | RT4(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0)));
2505
FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src1)));
2506
FAIL_IF(push_inst32(compiler, CMPI_W | RN4(TMP_REG1) | 0));
2507
FAIL_IF(push_inst16(compiler, IT | (0xb << 4) | 0x8));
2508
return push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(dst_r));
2509
}
2510
2511
if (!(dst & SLJIT_MEM))
2512
return SLJIT_SUCCESS;
2513
return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw);
2514
}
2515
2516
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2517
sljit_s32 freg, sljit_f32 value)
2518
{
2519
#if defined(__ARM_NEON) && __ARM_NEON
2520
sljit_u32 exp;
2521
sljit_ins ins;
2522
#endif /* NEON */
2523
union {
2524
sljit_u32 imm;
2525
sljit_f32 value;
2526
} u;
2527
2528
CHECK_ERROR();
2529
CHECK(check_sljit_emit_fset32(compiler, freg, value));
2530
2531
u.value = value;
2532
2533
#if defined(__ARM_NEON) && __ARM_NEON
2534
if ((u.imm << (32 - 19)) == 0) {
2535
exp = (u.imm >> (23 + 2)) & 0x3f;
2536
2537
if (exp == 0x20 || exp == 0x1f) {
2538
ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f);
2539
return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
2540
}
2541
}
2542
#endif /* NEON */
2543
2544
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2545
return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG1));
2546
}
2547
2548
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2549
sljit_s32 freg, sljit_f64 value)
2550
{
2551
#if defined(__ARM_NEON) && __ARM_NEON
2552
sljit_u32 exp;
2553
sljit_ins ins;
2554
#endif /* NEON */
2555
union {
2556
sljit_u32 imm[2];
2557
sljit_f64 value;
2558
} u;
2559
2560
CHECK_ERROR();
2561
CHECK(check_sljit_emit_fset64(compiler, freg, value));
2562
2563
u.value = value;
2564
2565
#if defined(__ARM_NEON) && __ARM_NEON
2566
if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
2567
exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
2568
2569
if (exp == 0x100 || exp == 0xff) {
2570
ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f);
2571
return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
2572
}
2573
}
2574
#endif /* NEON */
2575
2576
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
2577
if (u.imm[0] == u.imm[1])
2578
return push_inst32(compiler, VMOV2 | RN4(TMP_REG1) | RT4(TMP_REG1) | VM4(freg));
2579
2580
FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1]));
2581
return push_inst32(compiler, VMOV2 | RN4(TMP_REG2) | RT4(TMP_REG1) | VM4(freg));
2582
}
2583
2584
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2585
sljit_s32 freg, sljit_s32 reg)
2586
{
2587
sljit_s32 reg2;
2588
sljit_ins inst;
2589
2590
CHECK_ERROR();
2591
CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2592
2593
if (reg & REG_PAIR_MASK) {
2594
reg2 = REG_PAIR_SECOND(reg);
2595
reg = REG_PAIR_FIRST(reg);
2596
2597
inst = VMOV2 | RN4(reg) | RT4(reg2) | VM4(freg);
2598
} else {
2599
inst = VMOV | VN4(freg) | RT4(reg);
2600
2601
if (!(op & SLJIT_32))
2602
inst |= 1 << 7;
2603
}
2604
2605
if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64)
2606
inst |= 1 << 20;
2607
2608
return push_inst32(compiler, inst);
2609
}
2610
2611
/* --------------------------------------------------------------------- */
2612
/* Conditional instructions */
2613
/* --------------------------------------------------------------------- */
2614
2615
static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type)
2616
{
2617
switch (type) {
2618
case SLJIT_EQUAL:
2619
case SLJIT_ATOMIC_STORED:
2620
case SLJIT_F_EQUAL:
2621
case SLJIT_ORDERED_EQUAL:
2622
case SLJIT_UNORDERED_OR_EQUAL:
2623
return 0x0;
2624
2625
case SLJIT_NOT_EQUAL:
2626
case SLJIT_ATOMIC_NOT_STORED:
2627
case SLJIT_F_NOT_EQUAL:
2628
case SLJIT_UNORDERED_OR_NOT_EQUAL:
2629
case SLJIT_ORDERED_NOT_EQUAL:
2630
return 0x1;
2631
2632
case SLJIT_CARRY:
2633
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2634
return 0x2;
2635
/* fallthrough */
2636
2637
case SLJIT_LESS:
2638
return 0x3;
2639
2640
case SLJIT_NOT_CARRY:
2641
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2642
return 0x3;
2643
/* fallthrough */
2644
2645
case SLJIT_GREATER_EQUAL:
2646
return 0x2;
2647
2648
case SLJIT_GREATER:
2649
case SLJIT_UNORDERED_OR_GREATER:
2650
return 0x8;
2651
2652
case SLJIT_LESS_EQUAL:
2653
case SLJIT_F_LESS_EQUAL:
2654
case SLJIT_ORDERED_LESS_EQUAL:
2655
return 0x9;
2656
2657
case SLJIT_SIG_LESS:
2658
case SLJIT_UNORDERED_OR_LESS:
2659
return 0xb;
2660
2661
case SLJIT_SIG_GREATER_EQUAL:
2662
case SLJIT_F_GREATER_EQUAL:
2663
case SLJIT_ORDERED_GREATER_EQUAL:
2664
return 0xa;
2665
2666
case SLJIT_SIG_GREATER:
2667
case SLJIT_F_GREATER:
2668
case SLJIT_ORDERED_GREATER:
2669
return 0xc;
2670
2671
case SLJIT_SIG_LESS_EQUAL:
2672
case SLJIT_UNORDERED_OR_LESS_EQUAL:
2673
return 0xd;
2674
2675
case SLJIT_OVERFLOW:
2676
if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2677
return 0x1;
2678
/* fallthrough */
2679
2680
case SLJIT_UNORDERED:
2681
return 0x6;
2682
2683
case SLJIT_NOT_OVERFLOW:
2684
if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2685
return 0x0;
2686
/* fallthrough */
2687
2688
case SLJIT_ORDERED:
2689
return 0x7;
2690
2691
case SLJIT_F_LESS:
2692
case SLJIT_ORDERED_LESS:
2693
return 0x4;
2694
2695
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2696
return 0x5;
2697
2698
default: /* SLJIT_JUMP */
2699
SLJIT_UNREACHABLE();
2700
return 0xe;
2701
}
2702
}
2703
2704
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2705
{
2706
struct sljit_label *label;
2707
2708
CHECK_ERROR_PTR();
2709
CHECK_PTR(check_sljit_emit_label(compiler));
2710
2711
if (compiler->last_label && compiler->last_label->size == compiler->size)
2712
return compiler->last_label;
2713
2714
label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2715
PTR_FAIL_IF(!label);
2716
set_label(label, compiler);
2717
return label;
2718
}
2719
2720
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2721
{
2722
struct sljit_jump *jump;
2723
sljit_ins cc;
2724
2725
CHECK_ERROR_PTR();
2726
CHECK_PTR(check_sljit_emit_jump(compiler, type));
2727
2728
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2729
PTR_FAIL_IF(!jump);
2730
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2731
type &= 0xff;
2732
2733
if (type < SLJIT_JUMP) {
2734
jump->flags |= IS_COND;
2735
cc = get_cc(compiler, type);
2736
jump->flags |= cc << 8;
2737
PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
2738
}
2739
2740
jump->addr = compiler->size;
2741
if (type <= SLJIT_JUMP)
2742
PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1)));
2743
else {
2744
jump->flags |= IS_BL;
2745
PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1)));
2746
}
2747
2748
/* Maximum number of instructions required for generating a constant. */
2749
compiler->size += JUMP_MAX_SIZE - 1;
2750
return jump;
2751
}
2752
2753
#ifdef __SOFTFP__
2754
2755
static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space)
2756
{
2757
sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN;
2758
sljit_u32 offset = 0;
2759
sljit_u32 word_arg_offset = 0;
2760
sljit_u32 float_arg_count = 0;
2761
sljit_s32 types = 0;
2762
sljit_u32 src_offset = 4 * sizeof(sljit_sw);
2763
sljit_u8 offsets[4];
2764
sljit_u8 *offset_ptr = offsets;
2765
2766
if (src && FAST_IS_REG(*src))
2767
src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw);
2768
2769
arg_types >>= SLJIT_ARG_SHIFT;
2770
2771
while (arg_types) {
2772
types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
2773
2774
switch (arg_types & SLJIT_ARG_MASK) {
2775
case SLJIT_ARG_TYPE_F64:
2776
if (offset & 0x7)
2777
offset += sizeof(sljit_sw);
2778
*offset_ptr++ = (sljit_u8)offset;
2779
offset += sizeof(sljit_f64);
2780
float_arg_count++;
2781
break;
2782
case SLJIT_ARG_TYPE_F32:
2783
*offset_ptr++ = (sljit_u8)offset;
2784
offset += sizeof(sljit_f32);
2785
float_arg_count++;
2786
break;
2787
default:
2788
*offset_ptr++ = (sljit_u8)offset;
2789
offset += sizeof(sljit_sw);
2790
word_arg_offset += sizeof(sljit_sw);
2791
break;
2792
}
2793
2794
arg_types >>= SLJIT_ARG_SHIFT;
2795
}
2796
2797
if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) {
2798
/* Keep lr register on the stack. */
2799
if (is_tail_call)
2800
offset += sizeof(sljit_sw);
2801
2802
offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7;
2803
2804
*extra_space = offset;
2805
2806
if (is_tail_call)
2807
FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset));
2808
else
2809
FAIL_IF(push_inst16(compiler, SUB_SP_I | (offset >> 2)));
2810
} else {
2811
if (is_tail_call)
2812
FAIL_IF(emit_stack_frame_release(compiler, -1));
2813
*extra_space = 0;
2814
}
2815
2816
SLJIT_ASSERT(reg_map[TMP_REG1] == 12);
2817
2818
/* Process arguments in reversed direction. */
2819
while (types) {
2820
switch (types & SLJIT_ARG_MASK) {
2821
case SLJIT_ARG_TYPE_F64:
2822
float_arg_count--;
2823
offset = *(--offset_ptr);
2824
2825
SLJIT_ASSERT((offset & 0x7) == 0);
2826
2827
if (offset < 4 * sizeof(sljit_sw)) {
2828
if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) {
2829
FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2830
*src = TMP_REG1;
2831
}
2832
FAIL_IF(push_inst32(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
2833
} else
2834
FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800100 | RN4(SLJIT_SP)
2835
| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2836
break;
2837
case SLJIT_ARG_TYPE_F32:
2838
float_arg_count--;
2839
offset = *(--offset_ptr);
2840
2841
if (offset < 4 * sizeof(sljit_sw)) {
2842
if (src_offset == offset) {
2843
FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2844
*src = TMP_REG1;
2845
}
2846
FAIL_IF(push_inst32(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10)));
2847
} else
2848
FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800000 | RN4(SLJIT_SP)
2849
| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2850
break;
2851
default:
2852
word_arg_offset -= sizeof(sljit_sw);
2853
offset = *(--offset_ptr);
2854
2855
SLJIT_ASSERT(offset >= word_arg_offset);
2856
2857
if (offset != word_arg_offset) {
2858
if (offset < 4 * sizeof(sljit_sw)) {
2859
if (src_offset == offset) {
2860
FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2861
*src = TMP_REG1;
2862
}
2863
else if (src_offset == word_arg_offset) {
2864
*src = (sljit_s32)(1 + (offset >> 2));
2865
src_offset = offset;
2866
}
2867
FAIL_IF(push_inst16(compiler, MOV | (offset >> 2) | (word_arg_offset << 1)));
2868
} else
2869
FAIL_IF(push_inst16(compiler, STR_SP | (word_arg_offset << 6) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2870
}
2871
break;
2872
}
2873
2874
types >>= SLJIT_ARG_SHIFT;
2875
}
2876
2877
return SLJIT_SUCCESS;
2878
}
2879
2880
static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
2881
{
2882
if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
2883
FAIL_IF(push_inst32(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
2884
if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32)
2885
FAIL_IF(push_inst32(compiler, VMOV | (0 << 16) | (0 << 12)));
2886
2887
return SLJIT_SUCCESS;
2888
}
2889
2890
#else
2891
2892
static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
2893
{
2894
sljit_u32 offset = SLJIT_FR0;
2895
sljit_u32 new_offset = SLJIT_FR0;
2896
sljit_u32 f32_offset = 0;
2897
2898
/* Remove return value. */
2899
arg_types >>= SLJIT_ARG_SHIFT;
2900
2901
while (arg_types) {
2902
switch (arg_types & SLJIT_ARG_MASK) {
2903
case SLJIT_ARG_TYPE_F64:
2904
if (offset != new_offset)
2905
FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(new_offset) | VM4(offset)));
2906
2907
new_offset++;
2908
offset++;
2909
break;
2910
case SLJIT_ARG_TYPE_F32:
2911
if (f32_offset != 0) {
2912
FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(f32_offset) | VM4(offset)));
2913
f32_offset = 0;
2914
} else {
2915
if (offset != new_offset)
2916
FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(new_offset) | VM4(offset)));
2917
f32_offset = new_offset;
2918
new_offset++;
2919
}
2920
offset++;
2921
break;
2922
}
2923
arg_types >>= SLJIT_ARG_SHIFT;
2924
}
2925
2926
return SLJIT_SUCCESS;
2927
}
2928
2929
#endif
2930
2931
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2932
sljit_s32 arg_types)
2933
{
2934
#ifdef __SOFTFP__
2935
struct sljit_jump *jump;
2936
sljit_u32 extra_space = (sljit_u32)type;
2937
#endif
2938
2939
CHECK_ERROR_PTR();
2940
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2941
2942
#ifdef __SOFTFP__
2943
if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
2944
PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space));
2945
SLJIT_ASSERT((extra_space & 0x7) == 0);
2946
2947
if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
2948
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2949
2950
SLJIT_SKIP_CHECKS(compiler);
2951
jump = sljit_emit_jump(compiler, type);
2952
PTR_FAIL_IF(jump == NULL);
2953
2954
if (extra_space > 0) {
2955
if (type & SLJIT_CALL_RETURN)
2956
PTR_FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2)
2957
| RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw))));
2958
2959
PTR_FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2)));
2960
2961
if (type & SLJIT_CALL_RETURN) {
2962
PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG2)));
2963
return jump;
2964
}
2965
}
2966
2967
SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
2968
PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
2969
return jump;
2970
}
2971
#endif /* __SOFTFP__ */
2972
2973
if (type & SLJIT_CALL_RETURN) {
2974
/* ldmia sp!, {..., lr} */
2975
PTR_FAIL_IF(emit_stack_frame_release(compiler, -1));
2976
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2977
}
2978
2979
#ifndef __SOFTFP__
2980
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
2981
PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
2982
#endif /* !__SOFTFP__ */
2983
2984
SLJIT_SKIP_CHECKS(compiler);
2985
return sljit_emit_jump(compiler, type);
2986
}
2987
2988
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2989
{
2990
struct sljit_jump *jump;
2991
2992
CHECK_ERROR();
2993
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2994
ADJUST_LOCAL_OFFSET(src, srcw);
2995
2996
SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
2997
2998
if (src != SLJIT_IMM) {
2999
if (FAST_IS_REG(src)) {
3000
SLJIT_ASSERT(reg_map[src] != 14);
3001
return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src));
3002
}
3003
3004
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw, TMP_REG1));
3005
if (type >= SLJIT_FAST_CALL)
3006
return push_inst16(compiler, BLX | RN3(TMP_REG1));
3007
}
3008
3009
/* These jumps are converted to jump/call instructions when possible. */
3010
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3011
FAIL_IF(!jump);
3012
set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
3013
jump->u.target = (sljit_uw)srcw;
3014
3015
jump->addr = compiler->size;
3016
/* Maximum number of instructions required for generating a constant. */
3017
compiler->size += JUMP_MAX_SIZE - 1;
3018
return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1));
3019
}
3020
3021
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3022
sljit_s32 arg_types,
3023
sljit_s32 src, sljit_sw srcw)
3024
{
3025
#ifdef __SOFTFP__
3026
sljit_u32 extra_space = (sljit_u32)type;
3027
#endif
3028
3029
CHECK_ERROR();
3030
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3031
3032
if (src & SLJIT_MEM) {
3033
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
3034
src = TMP_REG1;
3035
}
3036
3037
if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
3038
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src)));
3039
src = TMP_REG1;
3040
}
3041
3042
#ifdef __SOFTFP__
3043
if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3044
FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space));
3045
SLJIT_ASSERT((extra_space & 0x7) == 0);
3046
3047
if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3048
type = SLJIT_JUMP;
3049
3050
SLJIT_SKIP_CHECKS(compiler);
3051
FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
3052
3053
if (extra_space > 0) {
3054
if (type & SLJIT_CALL_RETURN)
3055
FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2)
3056
| RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw))));
3057
3058
FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2)));
3059
3060
if (type & SLJIT_CALL_RETURN)
3061
return push_inst16(compiler, BX | RN3(TMP_REG2));
3062
}
3063
3064
SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3065
return softfloat_post_call_with_args(compiler, arg_types);
3066
}
3067
#endif /* __SOFTFP__ */
3068
3069
if (type & SLJIT_CALL_RETURN) {
3070
/* ldmia sp!, {..., lr} */
3071
FAIL_IF(emit_stack_frame_release(compiler, -1));
3072
type = SLJIT_JUMP;
3073
}
3074
3075
#ifndef __SOFTFP__
3076
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3077
FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3078
#endif /* !__SOFTFP__ */
3079
3080
SLJIT_SKIP_CHECKS(compiler);
3081
return sljit_emit_ijump(compiler, type, src, srcw);
3082
}
3083
3084
#ifdef __SOFTFP__
3085
3086
static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
3087
{
3088
if (compiler->options & SLJIT_ENTER_REG_ARG) {
3089
if (src == SLJIT_FR0)
3090
return SLJIT_SUCCESS;
3091
3092
SLJIT_SKIP_CHECKS(compiler);
3093
return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
3094
}
3095
3096
if (FAST_IS_REG(src)) {
3097
if (op & SLJIT_32)
3098
return push_inst32(compiler, VMOV | (1 << 20) | VN4(src) | RT4(SLJIT_R0));
3099
return push_inst32(compiler, VMOV2 | (1 << 20) | VM4(src) | RT4(SLJIT_R0) | RN4(SLJIT_R1));
3100
}
3101
3102
SLJIT_SKIP_CHECKS(compiler);
3103
3104
if (op & SLJIT_32)
3105
return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw);
3106
return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw);
3107
}
3108
3109
#endif /* __SOFTFP__ */
3110
3111
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3112
sljit_s32 dst, sljit_sw dstw,
3113
sljit_s32 type)
3114
{
3115
sljit_s32 dst_r, flags = GET_ALL_FLAGS(op);
3116
sljit_ins cc;
3117
3118
CHECK_ERROR();
3119
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3120
ADJUST_LOCAL_OFFSET(dst, dstw);
3121
3122
op = GET_OPCODE(op);
3123
cc = get_cc(compiler, type);
3124
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3125
3126
if (op < SLJIT_ADD) {
3127
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
3128
if (reg_map[dst_r] > 7) {
3129
FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1));
3130
FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0));
3131
} else {
3132
/* The movsi (immediate) instruction does not set flags in IT block. */
3133
FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1));
3134
FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0));
3135
}
3136
if (!(dst & SLJIT_MEM))
3137
return SLJIT_SUCCESS;
3138
return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2);
3139
}
3140
3141
if (dst & SLJIT_MEM)
3142
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
3143
3144
if (op == SLJIT_AND) {
3145
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
3146
FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 1));
3147
FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 0));
3148
}
3149
else {
3150
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3151
FAIL_IF(push_inst32(compiler, ((op == SLJIT_OR) ? ORRI : EORI) | RN4(dst_r) | RD4(dst_r) | 1));
3152
}
3153
3154
if (dst & SLJIT_MEM)
3155
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2));
3156
3157
if (!(flags & SLJIT_SET_Z))
3158
return SLJIT_SUCCESS;
3159
3160
/* The condition must always be set, even if the ORR/EORI is not executed above. */
3161
return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r));
3162
}
3163
3164
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3165
sljit_s32 dst_reg,
3166
sljit_s32 src1, sljit_sw src1w,
3167
sljit_s32 src2_reg)
3168
{
3169
sljit_uw cc, tmp;
3170
3171
CHECK_ERROR();
3172
CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3173
3174
ADJUST_LOCAL_OFFSET(src1, src1w);
3175
3176
if (src2_reg != dst_reg && src1 == dst_reg) {
3177
src1 = src2_reg;
3178
src1w = 0;
3179
src2_reg = dst_reg;
3180
type ^= 0x1;
3181
}
3182
3183
if (src1 & SLJIT_MEM) {
3184
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG1));
3185
3186
if (src2_reg != dst_reg) {
3187
src1 = src2_reg;
3188
src1w = 0;
3189
type ^= 0x1;
3190
} else {
3191
src1 = TMP_REG1;
3192
src1w = 0;
3193
}
3194
} else if (dst_reg != src2_reg)
3195
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(dst_reg, src2_reg)));
3196
3197
cc = get_cc(compiler, type & ~SLJIT_32);
3198
3199
if (src1 != SLJIT_IMM) {
3200
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3201
return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src1));
3202
}
3203
3204
tmp = (sljit_uw)src1w;
3205
3206
if (tmp < 0x10000) {
3207
/* set low 16 bits, set hi 16 bits to 0. */
3208
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3209
return push_inst32(compiler, MOVW | RD4(dst_reg)
3210
| COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff));
3211
}
3212
3213
tmp = get_imm((sljit_uw)src1w);
3214
if (tmp != INVALID_IMM) {
3215
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3216
return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp);
3217
}
3218
3219
tmp = get_imm(~(sljit_uw)src1w);
3220
if (tmp != INVALID_IMM) {
3221
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3222
return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp);
3223
}
3224
3225
FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4));
3226
3227
tmp = (sljit_uw)src1w;
3228
FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg)
3229
| COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)));
3230
return push_inst32(compiler, MOVT | RD4(dst_reg)
3231
| COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16));
3232
}
3233
3234
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3235
sljit_s32 dst_freg,
3236
sljit_s32 src1, sljit_sw src1w,
3237
sljit_s32 src2_freg)
3238
{
3239
CHECK_ERROR();
3240
CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3241
3242
ADJUST_LOCAL_OFFSET(src1, src1w);
3243
3244
type ^= SLJIT_32;
3245
3246
if (dst_freg != src2_freg) {
3247
if (dst_freg == src1) {
3248
src1 = src2_freg;
3249
src1w = 0;
3250
type ^= 0x1;
3251
} else
3252
FAIL_IF(push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src2_freg)));
3253
}
3254
3255
if (src1 & SLJIT_MEM) {
3256
FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG2, src1, src1w));
3257
src1 = TMP_FREG2;
3258
}
3259
3260
FAIL_IF(push_inst16(compiler, IT | (get_cc(compiler, type & ~SLJIT_32) << 4) | 0x8));
3261
return push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src1));
3262
}
3263
3264
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3265
sljit_s32 reg,
3266
sljit_s32 mem, sljit_sw memw)
3267
{
3268
sljit_s32 flags;
3269
sljit_uw imm, tmp;
3270
3271
CHECK_ERROR();
3272
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3273
3274
if (!(reg & REG_PAIR_MASK))
3275
return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3276
3277
if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)) {
3278
if ((mem & REG_MASK) == 0) {
3279
if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) {
3280
imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff));
3281
3282
if (imm != INVALID_IMM)
3283
memw = (memw & 0xfff) - 0x1000;
3284
} else {
3285
imm = get_imm((sljit_uw)(memw & ~0xfff));
3286
3287
if (imm != INVALID_IMM)
3288
memw &= 0xfff;
3289
}
3290
3291
if (imm == INVALID_IMM) {
3292
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3293
memw = 0;
3294
} else
3295
FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm));
3296
3297
mem = SLJIT_MEM1(TMP_REG1);
3298
} else if (mem & OFFS_REG_MASK) {
3299
FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6)));
3300
memw = 0;
3301
mem = SLJIT_MEM1(TMP_REG1);
3302
} else if (memw < -0xff) {
3303
/* Zero value can be included in the first case. */
3304
if ((-memw & 0xfff) <= SSIZE_OF(sw))
3305
tmp = (sljit_uw)((-memw + 0x7ff) & ~0x7ff);
3306
else
3307
tmp = (sljit_uw)((-memw + 0xfff) & ~0xfff);
3308
3309
SLJIT_ASSERT(tmp >= (sljit_uw)-memw);
3310
imm = get_imm(tmp);
3311
3312
if (imm != INVALID_IMM) {
3313
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3314
memw += (sljit_sw)tmp;
3315
SLJIT_ASSERT(memw >= 0 && memw <= 0xfff - SSIZE_OF(sw));
3316
} else {
3317
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3318
FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3319
memw = 0;
3320
}
3321
3322
mem = SLJIT_MEM1(TMP_REG1);
3323
} else if (memw >= (0x1000 - SSIZE_OF(sw))) {
3324
if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) {
3325
imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff));
3326
3327
if (imm != INVALID_IMM)
3328
memw = (memw & 0xfff) - 0x1000;
3329
} else {
3330
imm = get_imm((sljit_uw)(memw & ~0xfff));
3331
3332
if (imm != INVALID_IMM)
3333
memw &= 0xfff;
3334
}
3335
3336
if (imm != INVALID_IMM) {
3337
SLJIT_ASSERT(memw >= -0xff && memw <= 0xfff);
3338
FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3339
} else {
3340
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3341
FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3342
memw = 0;
3343
}
3344
3345
mem = SLJIT_MEM1(TMP_REG1);
3346
}
3347
3348
flags = WORD_SIZE;
3349
3350
SLJIT_ASSERT(memw <= 0xfff - SSIZE_OF(sw) && memw >= -0xff);
3351
3352
if (type & SLJIT_MEM_STORE) {
3353
flags |= STORE;
3354
} else if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) {
3355
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2));
3356
return emit_op_mem(compiler, WORD_SIZE, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2);
3357
}
3358
3359
FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2));
3360
return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2);
3361
}
3362
3363
flags = 1 << 23;
3364
3365
if ((mem & REG_MASK) == 0) {
3366
tmp = (sljit_uw)(memw & 0x7fc);
3367
imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3368
3369
if (imm == INVALID_IMM) {
3370
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3371
memw = 0;
3372
} else {
3373
FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm));
3374
memw = (memw & 0x3fc) >> 2;
3375
3376
if (tmp > 0x400) {
3377
memw = 0x100 - memw;
3378
flags = 0;
3379
}
3380
3381
SLJIT_ASSERT(memw >= 0 && memw <= 0xff);
3382
}
3383
3384
mem = SLJIT_MEM1(TMP_REG1);
3385
} else if (mem & OFFS_REG_MASK) {
3386
FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6)));
3387
memw = 0;
3388
mem = SLJIT_MEM1(TMP_REG1);
3389
} else if (memw < 0) {
3390
if ((-memw & ~0x3fc) == 0) {
3391
flags = 0;
3392
memw = -memw >> 2;
3393
} else {
3394
tmp = (sljit_uw)(-memw & 0x7fc);
3395
imm = get_imm((sljit_uw)((-memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3396
3397
if (imm != INVALID_IMM) {
3398
FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3399
memw = (-memw & 0x3fc) >> 2;
3400
3401
if (tmp <= 0x400)
3402
flags = 0;
3403
else
3404
memw = 0x100 - memw;
3405
} else {
3406
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3407
FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3408
memw = 0;
3409
}
3410
3411
mem = SLJIT_MEM1(TMP_REG1);
3412
}
3413
} else if ((memw & ~0x3fc) != 0) {
3414
tmp = (sljit_uw)(memw & 0x7fc);
3415
imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3416
3417
if (imm != INVALID_IMM) {
3418
FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3419
memw = (memw & 0x3fc) >> 2;
3420
3421
if (tmp > 0x400) {
3422
memw = 0x100 - memw;
3423
flags = 0;
3424
}
3425
} else {
3426
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3427
FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3428
memw = 0;
3429
}
3430
3431
mem = SLJIT_MEM1(TMP_REG1);
3432
} else
3433
memw >>= 2;
3434
3435
SLJIT_ASSERT(memw >= 0 && memw <= 0xff);
3436
return push_inst32(compiler, ((type & SLJIT_MEM_STORE) ? STRD : LDRD) | (sljit_ins)flags | RN4(mem & REG_MASK) | RT4(REG_PAIR_FIRST(reg)) | RD4(REG_PAIR_SECOND(reg)) | (sljit_ins)memw);
3437
}
3438
3439
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
3440
sljit_s32 reg,
3441
sljit_s32 mem, sljit_sw memw)
3442
{
3443
sljit_s32 flags;
3444
sljit_ins inst;
3445
3446
CHECK_ERROR();
3447
CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
3448
3449
if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -255))
3450
return SLJIT_ERR_UNSUPPORTED;
3451
3452
if (type & SLJIT_MEM_SUPP)
3453
return SLJIT_SUCCESS;
3454
3455
switch (type & 0xff) {
3456
case SLJIT_MOV:
3457
case SLJIT_MOV_U32:
3458
case SLJIT_MOV_S32:
3459
case SLJIT_MOV32:
3460
case SLJIT_MOV_P:
3461
flags = WORD_SIZE;
3462
break;
3463
case SLJIT_MOV_U8:
3464
flags = BYTE_SIZE;
3465
break;
3466
case SLJIT_MOV_S8:
3467
flags = BYTE_SIZE | SIGNED;
3468
break;
3469
case SLJIT_MOV_U16:
3470
flags = HALF_SIZE;
3471
break;
3472
case SLJIT_MOV_S16:
3473
flags = HALF_SIZE | SIGNED;
3474
break;
3475
default:
3476
SLJIT_UNREACHABLE();
3477
flags = WORD_SIZE;
3478
break;
3479
}
3480
3481
if (type & SLJIT_MEM_STORE)
3482
flags |= STORE;
3483
3484
inst = sljit_mem32[flags] | 0x900;
3485
3486
if (!(type & SLJIT_MEM_POST))
3487
inst |= 0x400;
3488
3489
if (memw >= 0)
3490
inst |= 0x200;
3491
else
3492
memw = -memw;
3493
3494
return push_inst32(compiler, inst | RT4(reg) | RN4(mem & REG_MASK) | (sljit_ins)memw);
3495
}
3496
3497
static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset)
3498
{
3499
sljit_s32 arg = *mem;
3500
sljit_sw argw = *memw;
3501
sljit_uw imm;
3502
3503
*mem = TMP_REG1;
3504
3505
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
3506
*memw = 0;
3507
return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((sljit_uw)(argw & 0x3) << 6));
3508
}
3509
3510
arg &= REG_MASK;
3511
3512
if (arg) {
3513
if (argw <= max_offset && argw >= -0xff) {
3514
*mem = arg;
3515
return SLJIT_SUCCESS;
3516
}
3517
3518
if (argw < 0) {
3519
imm = get_imm((sljit_uw)(-argw & ~0xff));
3520
3521
if (imm) {
3522
*memw = -(-argw & 0xff);
3523
return push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3524
}
3525
} else if ((argw & 0xfff) <= max_offset) {
3526
imm = get_imm((sljit_uw)(argw & ~0xfff));
3527
3528
if (imm) {
3529
*memw = argw & 0xfff;
3530
return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3531
}
3532
} else {
3533
imm = get_imm((sljit_uw)((argw | 0xfff) + 1));
3534
3535
if (imm) {
3536
*memw = (argw & 0xfff) - 0x1000;
3537
return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3538
}
3539
}
3540
}
3541
3542
imm = (sljit_uw)(argw & ~0xfff);
3543
3544
if ((argw & 0xfff) > max_offset) {
3545
imm += 0x1000;
3546
*memw = (argw & 0xfff) - 0x1000;
3547
} else
3548
*memw = argw & 0xfff;
3549
3550
FAIL_IF(load_immediate(compiler, TMP_REG1, imm));
3551
3552
if (arg == 0)
3553
return SLJIT_SUCCESS;
3554
3555
return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, arg));
3556
}
3557
3558
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
3559
sljit_s32 freg,
3560
sljit_s32 mem, sljit_sw memw)
3561
{
3562
CHECK_ERROR();
3563
CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
3564
3565
if (type & SLJIT_MEM_ALIGNED_32)
3566
return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw);
3567
3568
if (type & SLJIT_MEM_STORE) {
3569
FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | RT4(TMP_REG2)));
3570
3571
if (type & SLJIT_32)
3572
return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1);
3573
3574
FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3575
mem |= SLJIT_MEM;
3576
3577
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1));
3578
FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | 0x80 | RT4(TMP_REG2)));
3579
return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw + 4, TMP_REG1);
3580
}
3581
3582
if (type & SLJIT_32) {
3583
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3584
return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG2));
3585
}
3586
3587
FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3588
mem |= SLJIT_MEM;
3589
3590
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3591
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, mem, memw + 4, TMP_REG1));
3592
return push_inst32(compiler, VMOV2 | VM4(freg) | RT4(TMP_REG2) | RN4(TMP_REG1));
3593
}
3594
3595
static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3596
{
3597
sljit_uw imm;
3598
sljit_s32 mem = *mem_ptr;
3599
3600
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3601
*mem_ptr = TMP_REG1;
3602
return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6));
3603
}
3604
3605
if (SLJIT_UNLIKELY(!(mem & REG_MASK))) {
3606
*mem_ptr = TMP_REG1;
3607
return load_immediate(compiler, TMP_REG1, (sljit_uw)memw);
3608
}
3609
3610
mem &= REG_MASK;
3611
3612
if (memw == 0) {
3613
*mem_ptr = mem;
3614
return SLJIT_SUCCESS;
3615
}
3616
3617
*mem_ptr = TMP_REG1;
3618
imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw));
3619
3620
if (imm != INVALID_IMM)
3621
return push_inst32(compiler, ((memw < 0) ? SUB_WI : ADD_WI) | RD4(TMP_REG1) | RN4(mem) | imm);
3622
3623
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3624
return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem));
3625
}
3626
3627
static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
3628
{
3629
freg += freg & 0x1;
3630
3631
SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
3632
3633
if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
3634
freg--;
3635
3636
return freg;
3637
}
3638
3639
#define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
3640
3641
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3642
sljit_s32 vreg,
3643
sljit_s32 srcdst, sljit_sw srcdstw)
3644
{
3645
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3646
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3647
sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3648
sljit_ins ins;
3649
3650
CHECK_ERROR();
3651
CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));
3652
3653
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3654
3655
if (reg_size != 3 && reg_size != 4)
3656
return SLJIT_ERR_UNSUPPORTED;
3657
3658
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3659
return SLJIT_ERR_UNSUPPORTED;
3660
3661
if (type & SLJIT_SIMD_TEST)
3662
return SLJIT_SUCCESS;
3663
3664
if (reg_size == 4)
3665
vreg = simd_get_quad_reg_index(vreg);
3666
3667
if (!(srcdst & SLJIT_MEM)) {
3668
if (reg_size == 4)
3669
srcdst = simd_get_quad_reg_index(srcdst);
3670
3671
if (type & SLJIT_SIMD_STORE)
3672
ins = VD4(srcdst) | VN4(vreg) | VM4(vreg);
3673
else
3674
ins = VD4(vreg) | VN4(srcdst) | VM4(srcdst);
3675
3676
if (reg_size == 4)
3677
ins |= (sljit_ins)1 << 6;
3678
3679
return push_inst32(compiler, VORR | ins);
3680
}
3681
3682
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3683
3684
if (elem_size > 3)
3685
elem_size = 3;
3686
3687
ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD4(vreg)
3688
| (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
3689
3690
SLJIT_ASSERT(reg_size >= alignment);
3691
3692
if (alignment == 3)
3693
ins |= 0x10;
3694
else if (alignment >= 4)
3695
ins |= 0x20;
3696
3697
return push_inst32(compiler, ins | RN4(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf);
3698
}
3699
3700
static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
3701
{
3702
sljit_ins result;
3703
3704
if (elem_size > 1 && (sljit_u16)value == (value >> 16)) {
3705
elem_size = 1;
3706
value = (sljit_u16)value;
3707
}
3708
3709
if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
3710
elem_size = 0;
3711
value = (sljit_u8)value;
3712
}
3713
3714
switch (elem_size) {
3715
case 0:
3716
SLJIT_ASSERT(value <= 0xff);
3717
result = 0xe00;
3718
break;
3719
case 1:
3720
SLJIT_ASSERT(value <= 0xffff);
3721
result = 0;
3722
3723
while (1) {
3724
if (value <= 0xff) {
3725
result |= 0x800;
3726
break;
3727
}
3728
3729
if ((value & 0xff) == 0) {
3730
value >>= 8;
3731
result |= 0xa00;
3732
break;
3733
}
3734
3735
if (result != 0)
3736
return ~(sljit_ins)0;
3737
3738
value ^= (sljit_uw)0xffff;
3739
result = (1 << 5);
3740
}
3741
break;
3742
default:
3743
SLJIT_ASSERT(value <= 0xffffffff);
3744
result = 0;
3745
3746
while (1) {
3747
if (value <= 0xff) {
3748
result |= 0x000;
3749
break;
3750
}
3751
3752
if ((value & ~(sljit_uw)0xff00) == 0) {
3753
value >>= 8;
3754
result |= 0x200;
3755
break;
3756
}
3757
3758
if ((value & ~(sljit_uw)0xff0000) == 0) {
3759
value >>= 16;
3760
result |= 0x400;
3761
break;
3762
}
3763
3764
if ((value & ~(sljit_uw)0xff000000) == 0) {
3765
value >>= 24;
3766
result |= 0x600;
3767
break;
3768
}
3769
3770
if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
3771
value >>= 8;
3772
result |= 0xc00;
3773
break;
3774
}
3775
3776
if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
3777
value >>= 16;
3778
result |= 0xd00;
3779
break;
3780
}
3781
3782
if (result != 0)
3783
return ~(sljit_ins)0;
3784
3785
value = ~value;
3786
result = (1 << 5);
3787
}
3788
break;
3789
}
3790
3791
return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 21) | result;
3792
}
3793
3794
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3795
sljit_s32 vreg,
3796
sljit_s32 src, sljit_sw srcw)
3797
{
3798
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3799
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3800
sljit_ins ins, imm;
3801
3802
CHECK_ERROR();
3803
CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));
3804
3805
ADJUST_LOCAL_OFFSET(src, srcw);
3806
3807
if (reg_size != 3 && reg_size != 4)
3808
return SLJIT_ERR_UNSUPPORTED;
3809
3810
if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3811
return SLJIT_ERR_UNSUPPORTED;
3812
3813
if (type & SLJIT_SIMD_TEST)
3814
return SLJIT_SUCCESS;
3815
3816
if (reg_size == 4)
3817
vreg = simd_get_quad_reg_index(vreg);
3818
3819
if (src == SLJIT_IMM && srcw == 0)
3820
return push_inst32(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD4(vreg));
3821
3822
if (SLJIT_UNLIKELY(elem_size == 3)) {
3823
SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT);
3824
3825
if (src & SLJIT_MEM) {
3826
FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, vreg, src, srcw));
3827
src = vreg;
3828
} else if (vreg != src)
3829
FAIL_IF(push_inst32(compiler, VORR | VD4(vreg) | VN4(src) | VM4(src)));
3830
3831
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
3832
3833
if (vreg != src)
3834
return push_inst32(compiler, VORR | VD4(vreg) | VN4(src) | VM4(src));
3835
return SLJIT_SUCCESS;
3836
}
3837
3838
if (src & SLJIT_MEM) {
3839
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3840
3841
ins = (sljit_ins)(elem_size << 6);
3842
3843
if (reg_size == 4)
3844
ins |= 1 << 5;
3845
3846
return push_inst32(compiler, VLD1_r | ins | VD4(vreg) | RN4(src) | 0xf);
3847
}
3848
3849
if (type & SLJIT_SIMD_FLOAT) {
3850
SLJIT_ASSERT(elem_size == 2);
3851
ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
3852
3853
if (reg_size == 4)
3854
ins |= (sljit_ins)1 << 6;
3855
3856
return push_inst32(compiler, VDUP_s | ins | VD4(vreg) | (sljit_ins)freg_map[src]);
3857
}
3858
3859
if (src == SLJIT_IMM) {
3860
if (elem_size < 2)
3861
srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
3862
3863
imm = simd_get_imm(elem_size, (sljit_uw)srcw);
3864
3865
if (imm != ~(sljit_ins)0) {
3866
if (reg_size == 4)
3867
imm |= (sljit_ins)1 << 6;
3868
3869
return push_inst32(compiler, VMOV_i | imm | VD4(vreg));
3870
}
3871
3872
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
3873
src = TMP_REG1;
3874
}
3875
3876
switch (elem_size) {
3877
case 0:
3878
ins = 1 << 22;
3879
break;
3880
case 1:
3881
ins = 1 << 5;
3882
break;
3883
default:
3884
ins = 0;
3885
break;
3886
}
3887
3888
if (reg_size == 4)
3889
ins |= (sljit_ins)1 << 21;
3890
3891
return push_inst32(compiler, VDUP | ins | VN4(vreg) | RT4(src));
3892
}
3893
3894
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
3895
sljit_s32 vreg, sljit_s32 lane_index,
3896
sljit_s32 srcdst, sljit_sw srcdstw)
3897
{
3898
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3899
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3900
sljit_ins ins;
3901
3902
CHECK_ERROR();
3903
CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));
3904
3905
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3906
3907
if (reg_size != 3 && reg_size != 4)
3908
return SLJIT_ERR_UNSUPPORTED;
3909
3910
if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3911
return SLJIT_ERR_UNSUPPORTED;
3912
3913
if (type & SLJIT_SIMD_TEST)
3914
return SLJIT_SUCCESS;
3915
3916
if (reg_size == 4)
3917
vreg = simd_get_quad_reg_index(vreg);
3918
3919
if (type & SLJIT_SIMD_LANE_ZERO) {
3920
ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
3921
3922
if (type & SLJIT_SIMD_FLOAT) {
3923
if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
3924
if (lane_index == 1)
3925
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
3926
3927
if (srcdst != vreg)
3928
FAIL_IF(push_inst32(compiler, VORR | VD4(vreg) | VN4(srcdst) | VM4(srcdst)));
3929
3930
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
3931
return push_inst32(compiler, VMOV_i | VD4(vreg));
3932
}
3933
3934
if (srcdst == vreg || (elem_size == 3 && srcdst == (vreg + SLJIT_QUAD_OTHER_HALF(vreg)))) {
3935
FAIL_IF(push_inst32(compiler, VORR | ins | VD4(TMP_FREG2) | VN4(vreg) | VM4(vreg)));
3936
srcdst = TMP_FREG2;
3937
srcdstw = 0;
3938
}
3939
}
3940
3941
FAIL_IF(push_inst32(compiler, VMOV_i | ins | VD4(vreg)));
3942
}
3943
3944
if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
3945
lane_index -= (0x8 >> elem_size);
3946
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
3947
}
3948
3949
if (srcdst & SLJIT_MEM) {
3950
if (elem_size == 3)
3951
return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, vreg, srcdst, srcdstw);
3952
3953
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3954
3955
lane_index = lane_index << elem_size;
3956
ins = (sljit_ins)((elem_size << 10) | (lane_index << 5));
3957
return push_inst32(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD4(vreg) | RN4(srcdst) | 0xf);
3958
}
3959
3960
if (type & SLJIT_SIMD_FLOAT) {
3961
if (elem_size == 3) {
3962
if (type & SLJIT_SIMD_STORE)
3963
return push_inst32(compiler, VORR | VD4(srcdst) | VN4(vreg) | VM4(vreg));
3964
return push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(vreg) | VM4(srcdst));
3965
}
3966
3967
if (type & SLJIT_SIMD_STORE) {
3968
if (freg_ebit_map[vreg] == 0) {
3969
if (lane_index == 1)
3970
vreg = SLJIT_F64_SECOND(vreg);
3971
3972
return push_inst32(compiler, VMOV_F32 | VD4(srcdst) | VM4(vreg));
3973
}
3974
3975
FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN4(vreg) | RT4(TMP_REG1)));
3976
return push_inst32(compiler, VMOV | VN4(srcdst) | RT4(TMP_REG1));
3977
}
3978
3979
FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(srcdst) | RT4(TMP_REG1)));
3980
return push_inst32(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN4(vreg) | RT4(TMP_REG1));
3981
}
3982
3983
if (srcdst == SLJIT_IMM) {
3984
if (elem_size < 2)
3985
srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
3986
3987
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw));
3988
srcdst = TMP_REG1;
3989
}
3990
3991
if (elem_size == 0)
3992
ins = 0x400000;
3993
else if (elem_size == 1)
3994
ins = 0x20;
3995
else
3996
ins = 0;
3997
3998
lane_index = lane_index << elem_size;
3999
ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
4000
4001
if (type & SLJIT_SIMD_STORE) {
4002
ins |= (1 << 20);
4003
4004
if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
4005
ins |= (1 << 23);
4006
}
4007
4008
return push_inst32(compiler, VMOV_s | ins | VN4(vreg) | RT4(srcdst));
4009
}
4010
4011
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4012
sljit_s32 vreg,
4013
sljit_s32 src, sljit_s32 src_lane_index)
4014
{
4015
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4016
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4017
sljit_ins ins;
4018
4019
CHECK_ERROR();
4020
CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));
4021
4022
if (reg_size != 3 && reg_size != 4)
4023
return SLJIT_ERR_UNSUPPORTED;
4024
4025
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4026
return SLJIT_ERR_UNSUPPORTED;
4027
4028
if (type & SLJIT_SIMD_TEST)
4029
return SLJIT_SUCCESS;
4030
4031
if (reg_size == 4) {
4032
vreg = simd_get_quad_reg_index(vreg);
4033
src = simd_get_quad_reg_index(src);
4034
4035
if (src_lane_index >= (0x8 >> elem_size)) {
4036
src_lane_index -= (0x8 >> elem_size);
4037
src += SLJIT_QUAD_OTHER_HALF(src);
4038
}
4039
}
4040
4041
if (elem_size == 3) {
4042
if (vreg != src)
4043
FAIL_IF(push_inst32(compiler, VORR | VD4(vreg) | VN4(src) | VM4(src)));
4044
4045
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4046
4047
if (vreg != src)
4048
return push_inst32(compiler, VORR | VD4(vreg) | VN4(src) | VM4(src));
4049
return SLJIT_SUCCESS;
4050
}
4051
4052
ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size));
4053
4054
if (reg_size == 4)
4055
ins |= (sljit_ins)1 << 6;
4056
4057
return push_inst32(compiler, VDUP_s | ins | VD4(vreg) | VM4(src));
4058
}
4059
4060
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4061
sljit_s32 vreg,
4062
sljit_s32 src, sljit_sw srcw)
4063
{
4064
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4065
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4066
sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4067
sljit_s32 dst_reg;
4068
4069
CHECK_ERROR();
4070
CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));
4071
4072
ADJUST_LOCAL_OFFSET(src, srcw);
4073
4074
if (reg_size != 3 && reg_size != 4)
4075
return SLJIT_ERR_UNSUPPORTED;
4076
4077
if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
4078
return SLJIT_ERR_UNSUPPORTED;
4079
4080
if (type & SLJIT_SIMD_TEST)
4081
return SLJIT_SUCCESS;
4082
4083
if (reg_size == 4)
4084
vreg = simd_get_quad_reg_index(vreg);
4085
4086
if (src & SLJIT_MEM) {
4087
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
4088
if (reg_size == 4 && elem2_size - elem_size == 1)
4089
FAIL_IF(push_inst32(compiler, VLD1 | (0x7 << 8) | VD4(vreg) | RN4(src) | 0xf));
4090
else
4091
FAIL_IF(push_inst32(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD4(vreg) | RN4(src) | 0xf));
4092
src = vreg;
4093
} else if (reg_size == 4)
4094
src = simd_get_quad_reg_index(src);
4095
4096
if (!(type & SLJIT_SIMD_FLOAT)) {
4097
dst_reg = (reg_size == 4) ? vreg : TMP_FREG2;
4098
4099
do {
4100
FAIL_IF(push_inst32(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 28))
4101
| ((sljit_ins)1 << (19 + elem_size)) | VD4(dst_reg) | VM4(src)));
4102
src = dst_reg;
4103
} while (++elem_size < elem2_size);
4104
4105
if (dst_reg == TMP_FREG2)
4106
return push_inst32(compiler, VORR | VD4(vreg) | VN4(TMP_FREG2) | VM4(TMP_FREG2));
4107
return SLJIT_SUCCESS;
4108
}
4109
4110
/* No SIMD variant, must use VFP instead. */
4111
SLJIT_ASSERT(reg_size == 4);
4112
4113
if (vreg == src) {
4114
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4115
FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(vreg) | VM4(src) | 0x20));
4116
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4117
return push_inst32(compiler, VCVT_F64_F32 | VD4(vreg) | VM4(src));
4118
}
4119
4120
FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(vreg) | VM4(src)));
4121
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4122
return push_inst32(compiler, VCVT_F64_F32 | VD4(vreg) | VM4(src) | 0x20);
4123
}
4124
4125
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4126
sljit_s32 vreg,
4127
sljit_s32 dst, sljit_sw dstw)
4128
{
4129
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4130
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4131
sljit_ins ins, imms;
4132
sljit_s32 dst_r;
4133
4134
CHECK_ERROR();
4135
CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));
4136
4137
ADJUST_LOCAL_OFFSET(dst, dstw);
4138
4139
if (reg_size != 3 && reg_size != 4)
4140
return SLJIT_ERR_UNSUPPORTED;
4141
4142
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4143
return SLJIT_ERR_UNSUPPORTED;
4144
4145
if (type & SLJIT_SIMD_TEST)
4146
return SLJIT_SUCCESS;
4147
4148
switch (elem_size) {
4149
case 0:
4150
imms = 0x243219;
4151
ins = VSHR | (1 << 28) | (0x9 << 16);
4152
break;
4153
case 1:
4154
imms = (reg_size == 4) ? 0x243219 : 0x2231;
4155
ins = VSHR | (1 << 28) | (0x11 << 16);
4156
break;
4157
case 2:
4158
imms = (reg_size == 4) ? 0x2231 : 0x21;
4159
ins = VSHR | (1 << 28) | (0x21 << 16);
4160
break;
4161
default:
4162
imms = 0x21;
4163
ins = VSHR | (1 << 28) | (0x1 << 16) | (1 << 7);
4164
break;
4165
}
4166
4167
if (reg_size == 4) {
4168
vreg = simd_get_quad_reg_index(vreg);
4169
ins |= (sljit_ins)1 << 6;
4170
}
4171
4172
SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0);
4173
FAIL_IF(push_inst32(compiler, ins | VD4(TMP_FREG2) | VM4(vreg)));
4174
4175
if (reg_size == 4 && elem_size > 0)
4176
FAIL_IF(push_inst32(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
4177
4178
ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
4179
4180
while (imms >= 0x100) {
4181
FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | ((imms & 0xff) << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
4182
imms >>= 8;
4183
}
4184
4185
FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | (1 << 7) | (imms << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
4186
4187
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4188
FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(dst_r) | VN4(TMP_FREG2)));
4189
4190
if (reg_size == 4 && elem_size == 0) {
4191
SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]);
4192
FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(TMP_REG2)| VN4(TMP_FREG1)));
4193
FAIL_IF(push_inst32(compiler, ORR_W | RD4(dst_r) | RN4(dst_r) | RM4(TMP_REG2) | (0x2 << 12)));
4194
}
4195
4196
if (dst_r == TMP_REG1)
4197
return emit_op_mem(compiler, STORE | WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
4198
4199
return SLJIT_SUCCESS;
4200
}
4201
4202
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4203
sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)
4204
{
4205
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4206
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4207
sljit_s32 alignment;
4208
sljit_ins ins = 0, load_ins;
4209
4210
CHECK_ERROR();
4211
CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));
4212
ADJUST_LOCAL_OFFSET(src2, src2w);
4213
4214
if (reg_size != 3 && reg_size != 4)
4215
return SLJIT_ERR_UNSUPPORTED;
4216
4217
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4218
return SLJIT_ERR_UNSUPPORTED;
4219
4220
if (type & SLJIT_SIMD_TEST)
4221
return SLJIT_SUCCESS;
4222
4223
switch (SLJIT_SIMD_GET_OPCODE(type)) {
4224
case SLJIT_SIMD_OP2_AND:
4225
ins = VAND;
4226
break;
4227
case SLJIT_SIMD_OP2_OR:
4228
ins = VORR;
4229
break;
4230
case SLJIT_SIMD_OP2_XOR:
4231
ins = VEOR;
4232
break;
4233
case SLJIT_SIMD_OP2_SHUFFLE:
4234
ins = VTBL;
4235
break;
4236
}
4237
4238
if (src2 & SLJIT_MEM) {
4239
if (elem_size > 3)
4240
elem_size = 3;
4241
4242
load_ins = VLD1 | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
4243
alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4244
4245
SLJIT_ASSERT(reg_size >= alignment);
4246
4247
if (alignment == 3)
4248
load_ins |= 0x10;
4249
else if (alignment >= 4)
4250
load_ins |= 0x20;
4251
4252
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src2, src2w));
4253
FAIL_IF(push_inst32(compiler, load_ins | VD4(TMP_FREG2) | RN4(src2) | ((sljit_ins)elem_size) << 6 | 0xf));
4254
src2 = TMP_FREG2;
4255
}
4256
4257
if (reg_size == 4) {
4258
dst_vreg = simd_get_quad_reg_index(dst_vreg);
4259
src1_vreg = simd_get_quad_reg_index(src1_vreg);
4260
src2 = simd_get_quad_reg_index(src2);
4261
4262
if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE) {
4263
ins |= (sljit_ins)1 << 8;
4264
4265
FAIL_IF(push_inst32(compiler, ins | VD4(dst_vreg != src1_vreg ? dst_vreg : TMP_FREG2) | VN4(src1_vreg) | VM4(src2)));
4266
src2 += SLJIT_QUAD_OTHER_HALF(src2);
4267
FAIL_IF(push_inst32(compiler, ins | VD4(dst_vreg + SLJIT_QUAD_OTHER_HALF(dst_vreg)) | VN4(src1_vreg) | VM4(src2)));
4268
4269
if (dst_vreg == src1_vreg)
4270
return push_inst32(compiler, VORR | VD4(dst_vreg) | VN4(TMP_FREG2) | VM4(TMP_FREG2));
4271
return SLJIT_SUCCESS;
4272
}
4273
4274
ins |= (sljit_ins)1 << 6;
4275
}
4276
4277
return push_inst32(compiler, ins | VD4(dst_vreg) | VN4(src1_vreg) | VM4(src2));
4278
}
4279
4280
#undef FPU_LOAD
4281
4282
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4283
sljit_s32 dst_reg,
4284
sljit_s32 mem_reg)
4285
{
4286
sljit_ins ins;
4287
4288
CHECK_ERROR();
4289
CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4290
4291
if (op & SLJIT_ATOMIC_USE_CAS)
4292
return SLJIT_ERR_UNSUPPORTED;
4293
4294
switch (GET_OPCODE(op)) {
4295
case SLJIT_MOV_S8:
4296
case SLJIT_MOV_S16:
4297
case SLJIT_MOV_S32:
4298
return SLJIT_ERR_UNSUPPORTED;
4299
4300
case SLJIT_MOV_U8:
4301
ins = LDREXB;
4302
break;
4303
case SLJIT_MOV_U16:
4304
ins = LDREXH;
4305
break;
4306
default:
4307
ins = LDREX;
4308
break;
4309
}
4310
4311
if (op & SLJIT_ATOMIC_TEST)
4312
return SLJIT_SUCCESS;
4313
4314
return push_inst32(compiler, ins | RN4(mem_reg) | RT4(dst_reg));
4315
}
4316
4317
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4318
sljit_s32 src_reg,
4319
sljit_s32 mem_reg,
4320
sljit_s32 temp_reg)
4321
{
4322
sljit_ins ins;
4323
4324
/* temp_reg == mem_reg is undefined so use another temp register */
4325
SLJIT_UNUSED_ARG(temp_reg);
4326
4327
CHECK_ERROR();
4328
CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4329
4330
if (op & SLJIT_ATOMIC_USE_CAS)
4331
return SLJIT_ERR_UNSUPPORTED;
4332
4333
switch (GET_OPCODE(op)) {
4334
case SLJIT_MOV_S8:
4335
case SLJIT_MOV_S16:
4336
case SLJIT_MOV_S32:
4337
return SLJIT_ERR_UNSUPPORTED;
4338
4339
case SLJIT_MOV_U8:
4340
ins = STREXB | RM4(TMP_REG1);
4341
break;
4342
case SLJIT_MOV_U16:
4343
ins = STREXH | RM4(TMP_REG1);
4344
break;
4345
default:
4346
ins = STREX | RD4(TMP_REG1);
4347
break;
4348
}
4349
4350
if (op & SLJIT_ATOMIC_TEST)
4351
return SLJIT_SUCCESS;
4352
4353
FAIL_IF(push_inst32(compiler, ins | RN4(mem_reg) | RT4(src_reg)));
4354
if (op & SLJIT_SET_ATOMIC_STORED)
4355
return push_inst32(compiler, CMPI_W | RN4(TMP_REG1));
4356
4357
return SLJIT_SUCCESS;
4358
}
4359
4360
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4361
{
4362
struct sljit_const *const_;
4363
sljit_s32 dst_r;
4364
4365
CHECK_ERROR_PTR();
4366
CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4367
ADJUST_LOCAL_OFFSET(dst, dstw);
4368
4369
const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4370
PTR_FAIL_IF(!const_);
4371
set_const(const_, compiler);
4372
4373
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4374
PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, (sljit_uw)init_value));
4375
4376
if (dst & SLJIT_MEM)
4377
PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
4378
return const_;
4379
}
4380
4381
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4382
{
4383
struct sljit_jump *jump;
4384
sljit_s32 dst_r;
4385
4386
CHECK_ERROR_PTR();
4387
CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
4388
ADJUST_LOCAL_OFFSET(dst, dstw);
4389
4390
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4391
PTR_FAIL_IF(!jump);
4392
set_mov_addr(jump, compiler, 0);
4393
4394
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4395
PTR_FAIL_IF(push_inst16(compiler, RDN3(dst_r)));
4396
compiler->size += 3;
4397
4398
if (dst & SLJIT_MEM)
4399
PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
4400
return jump;
4401
}
4402
4403
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4404
{
4405
sljit_u16 *inst = (sljit_u16*)addr;
4406
SLJIT_UNUSED_ARG(executable_offset);
4407
4408
SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);
4409
modify_imm32_const(inst, new_target);
4410
SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);
4411
inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
4412
SLJIT_CACHE_FLUSH(inst, inst + 4);
4413
}
4414
4415
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4416
{
4417
sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
4418
}
4419
4420