Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeARM_32.c
22129 views
1
/*
2
* Stack-less Just-In-Time compiler
3
*
4
* Copyright Zoltan Herczeg ([email protected]). All rights reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without modification, are
7
* permitted provided that the following conditions are met:
8
*
9
* 1. Redistributions of source code must retain the above copyright notice, this list of
10
* conditions and the following disclaimer.
11
*
12
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
13
* of conditions and the following disclaimer in the documentation and/or other materials
14
* provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
*/
26
27
#ifdef __SOFTFP__
28
#define ARM_ABI_INFO " ABI:softfp"
29
#else
30
#define ARM_ABI_INFO " ABI:hardfp"
31
#endif
32
33
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
34
{
35
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
36
return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO;
37
#elif (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
38
return "ARMv6" SLJIT_CPUINFO ARM_ABI_INFO;
39
#else
40
#error "Internal error: Unknown ARM architecture"
41
#endif
42
}
43
44
/* Length of an instruction word. */
45
typedef sljit_u32 sljit_ins;
46
47
/* Last register + 1. */
48
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
49
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
50
#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
51
52
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
53
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
54
55
/* In ARM instruction words.
56
Cache lines are usually 32 byte aligned. */
57
#define CONST_POOL_ALIGNMENT 8
58
#define CONST_POOL_EMPTY 0xffffffff
59
60
#define ALIGN_INSTRUCTION(ptr) \
61
(sljit_ins*)(((sljit_ins)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1))
62
#define MAX_DIFFERENCE(max_diff) \
63
(((max_diff) / (sljit_s32)sizeof(sljit_ins)) - (CONST_POOL_ALIGNMENT - 1))
64
65
/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
66
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
67
0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
68
};
69
70
static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
71
0,
72
0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
73
7, 6,
74
0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
75
7, 6
76
};
77
78
static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
79
0,
80
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81
0, 0,
82
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83
1, 1
84
};
85
86
#define RM(rm) ((sljit_ins)reg_map[rm])
87
#define RM8(rm) ((sljit_ins)reg_map[rm] << 8)
88
#define RD(rd) ((sljit_ins)reg_map[rd] << 12)
89
#define RN(rn) ((sljit_ins)reg_map[rn] << 16)
90
91
#define VM(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
92
#define VD(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
93
#define VN(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
94
95
/* --------------------------------------------------------------------- */
96
/* Instruction forms */
97
/* --------------------------------------------------------------------- */
98
99
/* The instruction includes the AL condition.
100
INST_NAME - CONDITIONAL remove this flag. */
101
#define COND_MASK 0xf0000000
102
#define CONDITIONAL 0xe0000000
103
#define PUSH_POOL 0xff000000
104
105
#define ADC 0xe0a00000
106
#define ADD 0xe0800000
107
#define AND 0xe0000000
108
#define B 0xea000000
109
#define BIC 0xe1c00000
110
#define BKPT 0xe1200070
111
#define BL 0xeb000000
112
#define BLX 0xe12fff30
113
#define BX 0xe12fff10
114
#define CLZ 0xe16f0f10
115
#define CMN 0xe1600000
116
#define CMP 0xe1400000
117
#define DMB_SY 0xf57ff05f
118
#define EOR 0xe0200000
119
#define LDR 0xe5100000
120
#define LDR_POST 0xe4100000
121
#define LDREX 0xe1900f9f
122
#define LDREXB 0xe1d00f9f
123
#define LDREXH 0xe1f00f9f
124
#define MLA 0xe0200090
125
#define MOV 0xe1a00000
126
#define MUL 0xe0000090
127
#define MVN 0xe1e00000
128
#define NOP 0xe1a00000
129
#define ORR 0xe1800000
130
#define PUSH 0xe92d0000
131
#define POP 0xe8bd0000
132
#define REV 0xe6bf0f30
133
#define REV16 0xe6bf0fb0
134
#define RSB 0xe0600000
135
#define RSC 0xe0e00000
136
#define SBC 0xe0c00000
137
#define SMULL 0xe0c00090
138
#define STR 0xe5000000
139
#define STREX 0xe1800f90
140
#define STREXB 0xe1c00f90
141
#define STREXH 0xe1e00f90
142
#define SUB 0xe0400000
143
#define SXTB 0xe6af0070
144
#define SXTH 0xe6bf0070
145
#define TST 0xe1000000
146
#define UMULL 0xe0800090
147
#define UXTB 0xe6ef0070
148
#define UXTH 0xe6ff0070
149
#define VABS_F32 0xeeb00ac0
150
#define VADD_F32 0xee300a00
151
#define VAND 0xf2000110
152
#define VCMP_F32 0xeeb40a40
153
#define VCVT_F32_S32 0xeeb80ac0
154
#define VCVT_F32_U32 0xeeb80a40
155
#define VCVT_F64_F32 0xeeb70ac0
156
#define VCVT_S32_F32 0xeebd0ac0
157
#define VDIV_F32 0xee800a00
158
#define VDUP 0xee800b10
159
#define VDUP_s 0xf3b00c00
160
#define VEOR 0xf3000110
161
#define VLD1 0xf4200000
162
#define VLD1_r 0xf4a00c00
163
#define VLD1_s 0xf4a00000
164
#define VLDR_F32 0xed100a00
165
#define VMOV_F32 0xeeb00a40
166
#define VMOV 0xee000a10
167
#define VMOV2 0xec400a10
168
#define VMOV_i 0xf2800010
169
#define VMOV_s 0xee000b10
170
#define VMOVN 0xf3b20200
171
#define VMRS 0xeef1fa10
172
#define VMUL_F32 0xee200a00
173
#define VNEG_F32 0xeeb10a40
174
#define VORR 0xf2200110
175
#define VPOP 0xecbd0b00
176
#define VPUSH 0xed2d0b00
177
#define VSHLL 0xf2800a10
178
#define VSHR 0xf2800010
179
#define VSRA 0xf2800110
180
#define VST1 0xf4000000
181
#define VST1_s 0xf4800000
182
#define VSTR_F32 0xed000a00
183
#define VSUB_F32 0xee300a40
184
#define VTBL 0xf3b00800
185
186
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
187
/* Arm v7 specific instructions. */
188
#define MOVT 0xe3400000
189
#define MOVW 0xe3000000
190
#define RBIT 0xe6ff0f30
191
#endif
192
193
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
194
195
static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
196
{
197
if (compiler->scratches == -1)
198
return 0;
199
200
if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
201
fr -= SLJIT_F64_SECOND(0);
202
203
return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->real_fscratches))
204
|| (fr > (SLJIT_FS0 - compiler->real_fsaveds) && fr <= SLJIT_FS0)
205
|| (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
206
}
207
208
static sljit_s32 function_check_is_vreg(struct sljit_compiler *compiler, sljit_s32 vr, sljit_s32 type)
209
{
210
sljit_s32 vr_low = vr;
211
212
if (compiler->scratches == -1)
213
return 0;
214
215
if (SLJIT_SIMD_GET_REG_SIZE(type) == 4) {
216
vr += (vr & 0x1);
217
vr_low = vr - 1;
218
}
219
220
return (vr >= SLJIT_VR0 && vr < (SLJIT_VR0 + compiler->vscratches))
221
|| (vr_low > (SLJIT_VS0 - compiler->vsaveds) && vr_low <= SLJIT_VS0)
222
|| (vr >= SLJIT_TMP_VREGISTER_BASE && vr < (SLJIT_TMP_VREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS));
223
}
224
225
#endif /* SLJIT_ARGUMENT_CHECKS */
226
227
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
228
229
static sljit_s32 push_cpool(struct sljit_compiler *compiler)
230
{
231
/* Pushing the constant pool into the instruction stream. */
232
sljit_ins* inst;
233
sljit_uw* cpool_ptr;
234
sljit_uw* cpool_end;
235
sljit_s32 i;
236
237
/* The label could point the address after the constant pool. */
238
if (compiler->last_label && compiler->last_label->size == compiler->size)
239
compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1;
240
241
SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE);
242
inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
243
FAIL_IF(!inst);
244
compiler->size++;
245
*inst = 0xff000000 | compiler->cpool_fill;
246
247
for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) {
248
inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
249
FAIL_IF(!inst);
250
compiler->size++;
251
*inst = 0;
252
}
253
254
cpool_ptr = compiler->cpool;
255
cpool_end = cpool_ptr + compiler->cpool_fill;
256
while (cpool_ptr < cpool_end) {
257
inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
258
FAIL_IF(!inst);
259
compiler->size++;
260
*inst = *cpool_ptr++;
261
}
262
compiler->cpool_diff = CONST_POOL_EMPTY;
263
compiler->cpool_fill = 0;
264
return SLJIT_SUCCESS;
265
}
266
267
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst)
268
{
269
sljit_ins* ptr;
270
271
if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
272
FAIL_IF(push_cpool(compiler));
273
274
ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
275
FAIL_IF(!ptr);
276
compiler->size++;
277
*ptr = inst;
278
return SLJIT_SUCCESS;
279
}
280
281
static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal)
282
{
283
sljit_ins* ptr;
284
sljit_uw cpool_index = CPOOL_SIZE;
285
sljit_uw* cpool_ptr;
286
sljit_uw* cpool_end;
287
sljit_u8* cpool_unique_ptr;
288
289
if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
290
FAIL_IF(push_cpool(compiler));
291
else if (compiler->cpool_fill > 0) {
292
cpool_ptr = compiler->cpool;
293
cpool_end = cpool_ptr + compiler->cpool_fill;
294
cpool_unique_ptr = compiler->cpool_unique;
295
do {
296
if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) {
297
cpool_index = (sljit_uw)(cpool_ptr - compiler->cpool);
298
break;
299
}
300
cpool_ptr++;
301
cpool_unique_ptr++;
302
} while (cpool_ptr < cpool_end);
303
}
304
305
if (cpool_index == CPOOL_SIZE) {
306
/* Must allocate a new entry in the literal pool. */
307
if (compiler->cpool_fill < CPOOL_SIZE) {
308
cpool_index = compiler->cpool_fill;
309
compiler->cpool_fill++;
310
}
311
else {
312
FAIL_IF(push_cpool(compiler));
313
cpool_index = 0;
314
compiler->cpool_fill = 1;
315
}
316
}
317
318
SLJIT_ASSERT((inst & 0xfff) == 0);
319
ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
320
FAIL_IF(!ptr);
321
compiler->size++;
322
*ptr = inst | cpool_index;
323
324
compiler->cpool[cpool_index] = literal;
325
compiler->cpool_unique[cpool_index] = 0;
326
if (compiler->cpool_diff == CONST_POOL_EMPTY)
327
compiler->cpool_diff = compiler->size;
328
return SLJIT_SUCCESS;
329
}
330
331
static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal)
332
{
333
sljit_ins* ptr;
334
335
if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE))
336
FAIL_IF(push_cpool(compiler));
337
338
SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0);
339
ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
340
FAIL_IF(!ptr);
341
compiler->size++;
342
*ptr = inst | compiler->cpool_fill;
343
344
compiler->cpool[compiler->cpool_fill] = literal;
345
compiler->cpool_unique[compiler->cpool_fill] = 1;
346
compiler->cpool_fill++;
347
if (compiler->cpool_diff == CONST_POOL_EMPTY)
348
compiler->cpool_diff = compiler->size;
349
return SLJIT_SUCCESS;
350
}
351
352
static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler)
353
{
354
/* Place for at least two instruction (doesn't matter whether the first has a literal). */
355
if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088)))
356
return push_cpool(compiler);
357
return SLJIT_SUCCESS;
358
}
359
360
static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler)
361
{
362
/* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
363
SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
364
SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
365
366
return push_inst(compiler, BLX | RM(TMP_REG1));
367
}
368
369
static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
370
{
371
sljit_uw diff;
372
sljit_uw ind;
373
sljit_uw counter = 0;
374
sljit_uw* clear_const_pool = const_pool;
375
sljit_uw* clear_const_pool_end = const_pool + cpool_size;
376
377
SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT);
378
/* Set unused flag for all literals in the constant pool.
379
I.e.: unused literals can belong to branches, which can be encoded as B or BL.
380
We can "compress" the constant pool by discarding these literals. */
381
while (clear_const_pool < clear_const_pool_end)
382
*clear_const_pool++ = (sljit_uw)(-1);
383
384
while (last_pc_patch < code_ptr) {
385
/* Data transfer instruction with Rn == r15. */
386
if ((*last_pc_patch & 0x0e4f0000) == 0x040f0000) {
387
diff = (sljit_uw)(const_pool - last_pc_patch);
388
ind = (*last_pc_patch) & 0xfff;
389
390
/* Must be a load instruction with immediate offset. */
391
SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20)));
392
if ((sljit_s32)const_pool[ind] < 0) {
393
const_pool[ind] = counter;
394
ind = counter;
395
counter++;
396
}
397
else
398
ind = const_pool[ind];
399
400
SLJIT_ASSERT(diff >= 1);
401
if (diff >= 2 || ind > 0) {
402
diff = (diff + (sljit_uw)ind - 2) << 2;
403
SLJIT_ASSERT(diff <= 0xfff);
404
*last_pc_patch = (*last_pc_patch & ~(sljit_uw)0xfff) | diff;
405
}
406
else
407
*last_pc_patch = (*last_pc_patch & ~(sljit_uw)(0xfff | (1 << 23))) | 0x004;
408
}
409
last_pc_patch++;
410
}
411
return counter;
412
}
413
414
/* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */
415
struct future_patch {
416
struct future_patch* next;
417
sljit_s32 index;
418
sljit_s32 value;
419
};
420
421
static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
422
{
423
sljit_u32 value;
424
struct future_patch *curr_patch, *prev_patch;
425
426
SLJIT_UNUSED_ARG(compiler);
427
428
/* Using the values generated by patch_pc_relative_loads. */
429
if (!*first_patch)
430
value = cpool_start_address[cpool_current_index];
431
else {
432
curr_patch = *first_patch;
433
prev_patch = NULL;
434
while (1) {
435
if (!curr_patch) {
436
value = cpool_start_address[cpool_current_index];
437
break;
438
}
439
if ((sljit_uw)curr_patch->index == cpool_current_index) {
440
value = (sljit_uw)curr_patch->value;
441
if (prev_patch)
442
prev_patch->next = curr_patch->next;
443
else
444
*first_patch = curr_patch->next;
445
SLJIT_FREE(curr_patch, compiler->allocator_data);
446
break;
447
}
448
prev_patch = curr_patch;
449
curr_patch = curr_patch->next;
450
}
451
}
452
453
if ((sljit_sw)value >= 0) {
454
if (value > cpool_current_index) {
455
curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data);
456
if (!curr_patch) {
457
while (*first_patch) {
458
curr_patch = *first_patch;
459
*first_patch = (*first_patch)->next;
460
SLJIT_FREE(curr_patch, compiler->allocator_data);
461
}
462
return SLJIT_ERR_ALLOC_FAILED;
463
}
464
curr_patch->next = *first_patch;
465
curr_patch->index = (sljit_sw)value;
466
curr_patch->value = (sljit_sw)cpool_start_address[value];
467
*first_patch = curr_patch;
468
}
469
cpool_start_address[value] = *buf_ptr;
470
}
471
return SLJIT_SUCCESS;
472
}
473
474
#else
475
476
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst)
477
{
478
sljit_ins* ptr;
479
480
ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
481
FAIL_IF(!ptr);
482
compiler->size++;
483
*ptr = inst;
484
return SLJIT_SUCCESS;
485
}
486
487
static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
488
{
489
FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | ((sljit_u32)imm & 0xfff)));
490
return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | (((sljit_u32)imm >> 16) & 0xfff));
491
}
492
493
#endif
494
495
static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset)
496
{
497
sljit_sw diff;
498
sljit_uw target_addr;
499
sljit_uw jump_addr = (sljit_uw)code_ptr;
500
sljit_uw orig_addr = jump->addr;
501
SLJIT_UNUSED_ARG(executable_offset);
502
503
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
504
jump->addr = jump_addr;
505
#endif
506
507
if (jump->flags & SLJIT_REWRITABLE_JUMP)
508
return 0;
509
510
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
511
if (jump->flags & IS_BL)
512
code_ptr--;
513
#endif /* SLJIT_CONFIG_ARM_V6 */
514
515
if (jump->flags & JUMP_ADDR)
516
target_addr = jump->u.target;
517
else {
518
SLJIT_ASSERT(jump->u.label != NULL);
519
target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
520
521
if (jump->u.label->size > orig_addr)
522
jump_addr = (sljit_uw)(code + orig_addr);
523
}
524
525
diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr + 8, executable_offset);
526
527
/* Branch to Thumb code has not been optimized yet. */
528
if (diff & 0x3)
529
return 0;
530
531
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
532
if (jump->flags & IS_BL) {
533
if (diff <= 0x01ffffff && diff >= -0x02000000) {
534
*code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
535
jump->flags |= PATCH_B;
536
return 1;
537
}
538
} else if (diff <= 0x01ffffff && diff >= -0x02000000) {
539
*code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK);
540
jump->flags |= PATCH_B;
541
}
542
#else /* !SLJIT_CONFIG_ARM_V6 */
543
if (diff <= 0x01ffffff && diff >= -0x02000000) {
544
*code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (*code_ptr & COND_MASK);
545
jump->flags |= PATCH_B;
546
return 1;
547
}
548
#endif /* SLJIT_CONFIG_ARM_V6 */
549
return 0;
550
}
551
552
static void set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache)
553
{
554
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
555
sljit_ins *ptr = (sljit_ins*)jump_ptr;
556
sljit_ins *inst = (sljit_ins*)ptr[0];
557
sljit_ins mov_pc = ptr[1];
558
sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
559
sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2);
560
561
SLJIT_UNUSED_ARG(executable_offset);
562
563
if (diff <= 0x7fffff && diff >= -0x800000) {
564
/* Turn to branch. */
565
if (!bl) {
566
if (flush_cache) {
567
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
568
}
569
inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
570
if (flush_cache) {
571
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
572
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
573
SLJIT_CACHE_FLUSH(inst, inst + 1);
574
}
575
} else {
576
if (flush_cache) {
577
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
578
}
579
inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
580
inst[1] = NOP;
581
if (flush_cache) {
582
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
583
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
584
SLJIT_CACHE_FLUSH(inst, inst + 2);
585
}
586
}
587
} else {
588
/* Get the position of the constant. */
589
if (mov_pc & (1 << 23))
590
ptr = inst + ((mov_pc & 0xfff) >> 2) + 2;
591
else
592
ptr = inst + 1;
593
594
if (*inst != mov_pc) {
595
if (flush_cache) {
596
SLJIT_UPDATE_WX_FLAGS(inst, inst + (!bl ? 1 : 2), 0);
597
}
598
inst[0] = mov_pc;
599
if (!bl) {
600
if (flush_cache) {
601
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
602
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
603
SLJIT_CACHE_FLUSH(inst, inst + 1);
604
}
605
} else {
606
inst[1] = BLX | RM(TMP_REG1);
607
if (flush_cache) {
608
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
609
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
610
SLJIT_CACHE_FLUSH(inst, inst + 2);
611
}
612
}
613
}
614
615
if (flush_cache) {
616
SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
617
}
618
619
*ptr = new_addr;
620
621
if (flush_cache) {
622
SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
623
}
624
}
625
#else /* !SLJIT_CONFIG_ARM_V6 */
626
sljit_ins *inst = (sljit_ins*)jump_ptr;
627
628
SLJIT_UNUSED_ARG(executable_offset);
629
630
SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
631
632
if (flush_cache) {
633
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
634
}
635
636
inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
637
inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
638
639
if (flush_cache) {
640
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
641
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
642
SLJIT_CACHE_FLUSH(inst, inst + 2);
643
}
644
#endif /* SLJIT_CONFIG_ARM_V6 */
645
}
646
647
static sljit_uw get_imm(sljit_uw imm);
648
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm);
649
static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg);
650
651
static void set_const_value(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache)
652
{
653
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
654
sljit_ins *ptr = (sljit_ins*)addr;
655
sljit_ins *inst = (sljit_ins*)ptr[0];
656
sljit_uw ldr_literal = ptr[1];
657
sljit_uw src2;
658
659
SLJIT_UNUSED_ARG(executable_offset);
660
661
src2 = get_imm(new_constant);
662
if (src2) {
663
if (flush_cache) {
664
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
665
}
666
667
*inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
668
669
if (flush_cache) {
670
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
671
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
672
SLJIT_CACHE_FLUSH(inst, inst + 1);
673
}
674
return;
675
}
676
677
src2 = get_imm(~new_constant);
678
if (src2) {
679
if (flush_cache) {
680
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
681
}
682
683
*inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
684
685
if (flush_cache) {
686
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
687
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
688
SLJIT_CACHE_FLUSH(inst, inst + 1);
689
}
690
return;
691
}
692
693
if (ldr_literal & (1 << 23))
694
ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2;
695
else
696
ptr = inst + 1;
697
698
if (*inst != ldr_literal) {
699
if (flush_cache) {
700
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
701
}
702
703
*inst = ldr_literal;
704
705
if (flush_cache) {
706
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
707
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
708
SLJIT_CACHE_FLUSH(inst, inst + 1);
709
}
710
}
711
712
if (flush_cache) {
713
SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
714
}
715
716
*ptr = new_constant;
717
718
if (flush_cache) {
719
SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
720
}
721
#else /* !SLJIT_CONFIG_ARM_V6 */
722
sljit_ins *inst = (sljit_ins*)addr;
723
724
SLJIT_UNUSED_ARG(executable_offset);
725
726
SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
727
728
if (flush_cache) {
729
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
730
}
731
732
inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
733
inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
734
735
if (flush_cache) {
736
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
737
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
738
SLJIT_CACHE_FLUSH(inst, inst + 2);
739
}
740
#endif /* SLJIT_CONFIG_ARM_V6 */
741
}
742
743
static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
744
{
745
sljit_uw addr;
746
sljit_uw jump_addr = (sljit_uw)code_ptr;
747
sljit_sw diff;
748
SLJIT_UNUSED_ARG(executable_offset);
749
750
if (jump->flags & JUMP_ADDR)
751
addr = jump->u.target;
752
else {
753
addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
754
755
if (jump->u.label->size > jump->addr)
756
jump_addr = (sljit_uw)(code + jump->addr);
757
}
758
759
/* The pc+8 offset is represented by the 2 * SSIZE_OF(ins) below. */
760
diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);
761
762
if ((diff & 0x3) == 0 && diff <= (0x3fc + 2 * SSIZE_OF(ins)) && diff >= (-0x3fc + 2 * SSIZE_OF(ins))) {
763
jump->flags |= PATCH_B;
764
return 0;
765
}
766
767
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
768
return 0;
769
#else /* !SLJIT_CONFIG_ARM_V6 */
770
return 1;
771
#endif /* SLJIT_CONFIG_ARM_V6 */
772
}
773
774
static SLJIT_INLINE sljit_ins *process_extended_label(sljit_ins *code_ptr, struct sljit_extended_label *ext_label)
775
{
776
SLJIT_ASSERT(ext_label->label.u.index == SLJIT_LABEL_ALIGNED);
777
return (sljit_ins*)((sljit_uw)code_ptr & ~(ext_label->data));
778
}
779
780
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
781
782
static void reduce_code_size(struct sljit_compiler *compiler)
783
{
784
struct sljit_label *label;
785
struct sljit_jump *jump;
786
struct sljit_const *const_;
787
SLJIT_NEXT_DEFINE_TYPES;
788
sljit_uw total_size;
789
sljit_uw size_reduce = 0;
790
sljit_sw diff;
791
792
label = compiler->labels;
793
jump = compiler->jumps;
794
const_ = compiler->consts;
795
SLJIT_NEXT_INIT_TYPES();
796
797
while (1) {
798
SLJIT_GET_NEXT_MIN();
799
800
if (next_min_addr == SLJIT_MAX_ADDRESS)
801
break;
802
803
if (next_min_addr == next_label_size) {
804
label->size -= size_reduce;
805
806
label = label->next;
807
next_label_size = SLJIT_GET_NEXT_SIZE(label);
808
}
809
810
if (next_min_addr == next_const_addr) {
811
const_->addr -= size_reduce;
812
const_ = const_->next;
813
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
814
continue;
815
}
816
817
if (next_min_addr != next_jump_addr)
818
continue;
819
820
jump->addr -= size_reduce;
821
if (!(jump->flags & JUMP_MOV_ADDR)) {
822
total_size = JUMP_MAX_SIZE - 1;
823
824
if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) {
825
/* Unit size: instruction. */
826
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2;
827
if (jump->u.label->size > jump->addr) {
828
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
829
diff -= (sljit_sw)size_reduce;
830
}
831
832
if (diff <= (0x01ffffff / SSIZE_OF(ins)) && diff >= (-0x02000000 / SSIZE_OF(ins)))
833
total_size = 1 - 1;
834
}
835
836
size_reduce += JUMP_MAX_SIZE - 1 - total_size;
837
} else {
838
/* Real size minus 1. Unit size: instruction. */
839
total_size = 1;
840
841
if (!(jump->flags & JUMP_ADDR)) {
842
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
843
if (jump->u.label->size > jump->addr) {
844
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
845
diff -= (sljit_sw)size_reduce;
846
}
847
848
if (diff <= 0xff + 2 && diff >= -0xff + 2)
849
total_size = 0;
850
}
851
852
size_reduce += 1 - total_size;
853
}
854
855
jump->flags |= total_size << JUMP_SIZE_SHIFT;
856
jump = jump->next;
857
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
858
}
859
860
compiler->size -= size_reduce;
861
}
862
863
#endif /* SLJIT_CONFIG_ARM_V7 */
864
865
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
866
{
867
struct sljit_memory_fragment *buf;
868
sljit_ins *code;
869
sljit_ins *code_ptr;
870
sljit_ins *buf_ptr;
871
sljit_ins *buf_end;
872
sljit_uw word_count;
873
SLJIT_NEXT_DEFINE_TYPES;
874
sljit_sw executable_offset;
875
sljit_uw addr;
876
sljit_sw diff;
877
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
878
sljit_uw cpool_size;
879
sljit_uw cpool_skip_alignment;
880
sljit_uw cpool_current_index;
881
sljit_ins *cpool_start_address;
882
sljit_ins *last_pc_patch;
883
struct future_patch *first_patch;
884
#endif
885
886
struct sljit_label *label;
887
struct sljit_jump *jump;
888
struct sljit_const *const_;
889
890
CHECK_ERROR_PTR();
891
CHECK_PTR(check_sljit_generate_code(compiler, options));
892
893
/* Second code generation pass. */
894
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
895
compiler->size += (compiler->patches << 1);
896
if (compiler->cpool_fill > 0)
897
compiler->size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1;
898
#else /* !SLJIT_CONFIG_ARM_V6 */
899
reduce_code_size(compiler);
900
#endif /* SLJIT_CONFIG_ARM_V6 */
901
code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset);
902
PTR_FAIL_WITH_EXEC_IF(code);
903
904
reverse_buf(compiler);
905
buf = compiler->buf;
906
907
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
908
cpool_size = 0;
909
cpool_skip_alignment = 0;
910
cpool_current_index = 0;
911
cpool_start_address = NULL;
912
first_patch = NULL;
913
last_pc_patch = code;
914
#endif /* SLJIT_CONFIG_ARM_V6 */
915
916
code_ptr = code;
917
word_count = 0;
918
label = compiler->labels;
919
jump = compiler->jumps;
920
const_ = compiler->consts;
921
SLJIT_NEXT_INIT_TYPES();
922
SLJIT_GET_NEXT_MIN();
923
924
do {
925
buf_ptr = (sljit_ins*)buf->memory;
926
buf_end = buf_ptr + (buf->used_size >> 2);
927
do {
928
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
929
if (cpool_size > 0) {
930
if (cpool_skip_alignment > 0) {
931
buf_ptr++;
932
cpool_skip_alignment--;
933
} else {
934
if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
935
SLJIT_FREE_EXEC(code, exec_allocator_data);
936
compiler->error = SLJIT_ERR_ALLOC_FAILED;
937
return NULL;
938
}
939
buf_ptr++;
940
if (++cpool_current_index >= cpool_size) {
941
SLJIT_ASSERT(!first_patch);
942
cpool_size = 0;
943
}
944
}
945
} else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
946
#endif /* SLJIT_CONFIG_ARM_V6 */
947
*code_ptr = *buf_ptr++;
948
if (next_min_addr == word_count) {
949
SLJIT_ASSERT(!label || label->size >= word_count);
950
SLJIT_ASSERT(!jump || jump->addr >= word_count);
951
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
952
953
if (next_min_addr == next_label_size) {
954
if (label->u.index >= SLJIT_LABEL_ALIGNED) {
955
code_ptr = process_extended_label(code_ptr, (struct sljit_extended_label*)label);
956
*code_ptr = buf_ptr[-1];
957
}
958
959
label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
960
label->size = (sljit_uw)(code_ptr - code);
961
label = label->next;
962
next_label_size = SLJIT_GET_NEXT_SIZE(label);
963
}
964
965
/* These structures are ordered by their address. */
966
if (next_min_addr == next_jump_addr) {
967
if (!(jump->flags & JUMP_MOV_ADDR)) {
968
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
969
if (detect_jump_type(jump, code_ptr, code, executable_offset))
970
code_ptr--;
971
jump->addr = (sljit_uw)code_ptr;
972
#else /* !SLJIT_CONFIG_ARM_V6 */
973
word_count += jump->flags >> JUMP_SIZE_SHIFT;
974
if (!detect_jump_type(jump, code_ptr, code, executable_offset)) {
975
code_ptr[2] = code_ptr[0];
976
addr = ((code_ptr[0] & 0xf) << 12);
977
code_ptr[0] = MOVW | addr;
978
code_ptr[1] = MOVT | addr;
979
code_ptr += 2;
980
}
981
SLJIT_ASSERT((sljit_uw)code_ptr - jump->addr <= (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins));
982
#endif /* SLJIT_CONFIG_ARM_V6 */
983
} else {
984
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
985
word_count += jump->flags >> JUMP_SIZE_SHIFT;
986
#endif /* SLJIT_CONFIG_ARM_V7 */
987
addr = (sljit_uw)code_ptr;
988
code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
989
jump->addr = addr;
990
}
991
jump = jump->next;
992
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
993
} else if (next_min_addr == next_const_addr) {
994
const_->addr = (sljit_uw)code_ptr;
995
const_ = const_->next;
996
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
997
}
998
999
SLJIT_GET_NEXT_MIN();
1000
}
1001
code_ptr++;
1002
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1003
} else {
1004
/* Fortunately, no need to shift. */
1005
cpool_size = *buf_ptr++ & ~PUSH_POOL;
1006
SLJIT_ASSERT(cpool_size > 0);
1007
cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1);
1008
cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size);
1009
if (cpool_current_index > 0) {
1010
/* Unconditional branch. */
1011
*code_ptr = B | (((sljit_ins)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
1012
code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index);
1013
}
1014
cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1;
1015
cpool_current_index = 0;
1016
last_pc_patch = code_ptr;
1017
}
1018
#endif /* SLJIT_CONFIG_ARM_V6 */
1019
word_count++;
1020
} while (buf_ptr < buf_end);
1021
buf = buf->next;
1022
} while (buf);
1023
1024
if (label && label->size == word_count) {
1025
if (label->u.index >= SLJIT_LABEL_ALIGNED)
1026
code_ptr = process_extended_label(code_ptr, (struct sljit_extended_label*)label);
1027
1028
label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1029
label->size = (sljit_uw)(code_ptr - code);
1030
label = label->next;
1031
}
1032
1033
SLJIT_ASSERT(!label);
1034
SLJIT_ASSERT(!jump);
1035
SLJIT_ASSERT(!const_);
1036
1037
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1038
SLJIT_ASSERT(cpool_size == 0);
1039
if (compiler->cpool_fill > 0) {
1040
cpool_start_address = ALIGN_INSTRUCTION(code_ptr);
1041
cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill);
1042
if (cpool_current_index > 0)
1043
code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index);
1044
1045
buf_ptr = compiler->cpool;
1046
buf_end = buf_ptr + compiler->cpool_fill;
1047
cpool_current_index = 0;
1048
while (buf_ptr < buf_end) {
1049
if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
1050
SLJIT_FREE_EXEC(code, exec_allocator_data);
1051
compiler->error = SLJIT_ERR_ALLOC_FAILED;
1052
return NULL;
1053
}
1054
buf_ptr++;
1055
cpool_current_index++;
1056
}
1057
SLJIT_ASSERT(!first_patch);
1058
}
1059
#endif
1060
1061
jump = compiler->jumps;
1062
while (jump) {
1063
addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
1064
buf_ptr = (sljit_ins*)jump->addr;
1065
1066
if (jump->flags & JUMP_MOV_ADDR) {
1067
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1068
SLJIT_ASSERT((buf_ptr[0] & (sljit_ins)0xffff0000) == 0xe59f0000);
1069
#else /* !SLJIT_CONFIG_ARM_V6 */
1070
SLJIT_ASSERT((buf_ptr[0] & ~(sljit_ins)0xf000) == 0);
1071
#endif /* SLJIT_CONFIG_ARM_V6 */
1072
1073
if (jump->flags & PATCH_B) {
1074
SLJIT_ASSERT((((sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset)) & 0x3) == 0);
1075
diff = ((sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset)) >> 2;
1076
1077
SLJIT_ASSERT(diff <= 0xff && diff >= -0xff);
1078
1079
addr = ADD;
1080
if (diff < 0) {
1081
diff = -diff;
1082
addr = SUB;
1083
}
1084
1085
buf_ptr[0] = addr | (buf_ptr[0] & 0xf000) | RN(TMP_PC) | (1 << 25) | (0xf << 8) | (sljit_ins)(diff & 0xff);
1086
} else {
1087
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1088
buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr;
1089
#else /* !SLJIT_CONFIG_ARM_V6 */
1090
buf_ptr[1] = MOVT | buf_ptr[0] | ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff);
1091
buf_ptr[0] = MOVW | buf_ptr[0] | ((addr << 4) & 0xf0000) | (addr & 0xfff);
1092
#endif /* SLJIT_CONFIG_ARM_V6 */
1093
}
1094
} else if (jump->flags & PATCH_B) {
1095
diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
1096
SLJIT_ASSERT(diff <= 0x01ffffff && diff >= -0x02000000);
1097
*buf_ptr |= (diff >> 2) & 0x00ffffff;
1098
} else {
1099
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1100
if (jump->flags & IS_BL)
1101
buf_ptr--;
1102
1103
if (jump->flags & SLJIT_REWRITABLE_JUMP) {
1104
jump->addr = (sljit_uw)code_ptr;
1105
code_ptr[0] = (sljit_ins)buf_ptr;
1106
code_ptr[1] = *buf_ptr;
1107
set_jump_addr((sljit_uw)code_ptr, executable_offset, addr, 0);
1108
code_ptr += 2;
1109
} else {
1110
if (*buf_ptr & (1 << 23))
1111
buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
1112
else
1113
buf_ptr += 1;
1114
*buf_ptr = addr;
1115
}
1116
#else /* !SLJIT_CONFIG_ARM_V6 */
1117
set_jump_addr((sljit_uw)buf_ptr, executable_offset, addr, 0);
1118
#endif /* SLJIT_CONFIG_ARM_V6 */
1119
}
1120
1121
jump = jump->next;
1122
}
1123
1124
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1125
const_ = compiler->consts;
1126
while (const_) {
1127
buf_ptr = (sljit_ins*)const_->addr;
1128
1129
/* Note: MVN = (MOV ^ 0x400000) */
1130
SLJIT_ASSERT((*buf_ptr & 0xfdb00000) == MOV || (*buf_ptr & 0xfd100000) == LDR);
1131
1132
if ((*buf_ptr & 0x4000000) != 0) {
1133
const_->addr = (sljit_uw)code_ptr;
1134
1135
code_ptr[0] = (sljit_ins)buf_ptr;
1136
code_ptr[1] = *buf_ptr;
1137
if (*buf_ptr & (1 << 23))
1138
buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
1139
else
1140
buf_ptr += 1;
1141
/* Set the value again (can be a simple constant). */
1142
set_const_value((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0);
1143
code_ptr += 2;
1144
}
1145
1146
const_ = const_->next;
1147
}
1148
#endif /* SLJIT_CONFIG_ARM_V6 */
1149
1150
SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size);
1151
1152
compiler->error = SLJIT_ERR_COMPILED;
1153
compiler->executable_offset = executable_offset;
1154
compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_uw);
1155
1156
code = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1157
code_ptr = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1158
1159
SLJIT_CACHE_FLUSH(code, code_ptr);
1160
SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1161
return code;
1162
}
1163
1164
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1165
{
1166
switch (feature_type) {
1167
case SLJIT_HAS_FPU:
1168
case SLJIT_HAS_F64_AS_F32_PAIR:
1169
#ifdef SLJIT_IS_FPU_AVAILABLE
1170
return (SLJIT_IS_FPU_AVAILABLE) != 0;
1171
#else
1172
/* Available by default. */
1173
return 1;
1174
#endif /* SLJIT_IS_FPU_AVAILABLE */
1175
case SLJIT_HAS_SIMD:
1176
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1177
return 0;
1178
#else
1179
#ifdef SLJIT_IS_FPU_AVAILABLE
1180
return (SLJIT_IS_FPU_AVAILABLE) != 0;
1181
#else
1182
/* Available by default. */
1183
return 1;
1184
#endif /* SLJIT_IS_FPU_AVAILABLE */
1185
#endif /* SLJIT_CONFIG_ARM_V6 */
1186
1187
case SLJIT_SIMD_REGS_ARE_PAIRS:
1188
case SLJIT_HAS_CLZ:
1189
case SLJIT_HAS_ROT:
1190
case SLJIT_HAS_CMOV:
1191
case SLJIT_HAS_REV:
1192
case SLJIT_HAS_PREFETCH:
1193
case SLJIT_HAS_COPY_F32:
1194
case SLJIT_HAS_COPY_F64:
1195
case SLJIT_HAS_ATOMIC:
1196
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
1197
case SLJIT_HAS_MEMORY_BARRIER:
1198
#endif /* SLJIT_CONFIG_ARM_V7 */
1199
return 1;
1200
1201
case SLJIT_HAS_CTZ:
1202
#if defined(SLJIT_CONFIG_ARM_V6) && SLJIT_CONFIG_ARM_V6
1203
return 2;
1204
#else
1205
return 1;
1206
#endif /* SLJIT_CONFIG_ARM_V6 */
1207
1208
default:
1209
return 0;
1210
}
1211
}
1212
1213
/* --------------------------------------------------------------------- */
1214
/* Entry, exit */
1215
/* --------------------------------------------------------------------- */
1216
1217
/* Creates an index in data_transfer_insts array. */
1218
#define WORD_SIZE 0x00
1219
#define BYTE_SIZE 0x01
1220
#define HALF_SIZE 0x02
1221
#define PRELOAD 0x03
1222
#define SIGNED 0x04
1223
#define LOAD_DATA 0x08
1224
1225
/* Flag bits for emit_op. */
1226
#define ALLOW_IMM 0x10
1227
#define ALLOW_INV_IMM 0x20
1228
#define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM)
1229
#define ALLOW_NEG_IMM 0x40
1230
#define ALLOW_DOUBLE_IMM 0x80
1231
1232
/* s/l - store/load (1 bit)
1233
u/s - signed/unsigned (1 bit)
1234
w/b/h/N - word/byte/half/NOT allowed (2 bit)
1235
Storing signed and unsigned values are the same operations. */
1236
1237
static const sljit_ins data_transfer_insts[16] = {
1238
/* s u w */ 0xe5000000 /* str */,
1239
/* s u b */ 0xe5400000 /* strb */,
1240
/* s u h */ 0xe10000b0 /* strh */,
1241
/* s u N */ 0x00000000 /* not allowed */,
1242
/* s s w */ 0xe5000000 /* str */,
1243
/* s s b */ 0xe5400000 /* strb */,
1244
/* s s h */ 0xe10000b0 /* strh */,
1245
/* s s N */ 0x00000000 /* not allowed */,
1246
1247
/* l u w */ 0xe5100000 /* ldr */,
1248
/* l u b */ 0xe5500000 /* ldrb */,
1249
/* l u h */ 0xe11000b0 /* ldrh */,
1250
/* l u p */ 0xf5500000 /* preload */,
1251
/* l s w */ 0xe5100000 /* ldr */,
1252
/* l s b */ 0xe11000d0 /* ldrsb */,
1253
/* l s h */ 0xe11000f0 /* ldrsh */,
1254
/* l s N */ 0x00000000 /* not allowed */,
1255
};
1256
1257
#define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \
1258
(data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_ins)(arg))
1259
1260
/* Normal ldr/str instruction.
1261
Type2: ldrsb, ldrh, ldrsh */
1262
#define IS_TYPE1_TRANSFER(type) \
1263
(data_transfer_insts[(type) & 0xf] & 0x04000000)
1264
#define TYPE2_TRANSFER_IMM(imm) \
1265
(((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
1266
1267
#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \
1268
((sljit_ins)(opcode) | (sljit_ins)(mode) | VD(dst) | VM(src1) | VN(src2))
1269
1270
/* Flags for emit_op: */
1271
/* Arguments are swapped. */
1272
#define ARGS_SWAPPED 0x01
1273
/* Inverted immediate. */
1274
#define INV_IMM 0x02
1275
/* Source and destination is register. */
1276
#define REGISTER_OP 0x04
1277
/* Unused return value. */
1278
#define UNUSED_RETURN 0x08
1279
/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
1280
#define SET_FLAGS (1 << 20)
1281
/* dst: reg
1282
src1: reg
1283
src2: reg or imm (if allowed)
1284
SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
1285
#define SRC2_IMM (1 << 25)
1286
1287
static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
1288
sljit_s32 dst, sljit_sw dstw,
1289
sljit_s32 src1, sljit_sw src1w,
1290
sljit_s32 src2, sljit_sw src2w);
1291
1292
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1293
sljit_s32 options, sljit_s32 arg_types,
1294
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
1295
{
1296
sljit_s32 fscratches;
1297
sljit_s32 fsaveds;
1298
sljit_uw imm, offset;
1299
sljit_s32 i, tmp, size, word_arg_count;
1300
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1301
#ifdef __SOFTFP__
1302
sljit_u32 float_arg_count;
1303
#else
1304
sljit_u32 old_offset, f32_offset;
1305
sljit_u32 remap[3];
1306
sljit_u32 *remap_ptr = remap;
1307
#endif
1308
1309
CHECK_ERROR();
1310
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));
1311
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
1312
1313
scratches = ENTER_GET_REGS(scratches);
1314
saveds = ENTER_GET_REGS(saveds);
1315
fscratches = compiler->fscratches;
1316
fsaveds = compiler->fsaveds;
1317
1318
imm = 0;
1319
tmp = SLJIT_S0 - saveds;
1320
for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--)
1321
imm |= (sljit_uw)1 << reg_map[i];
1322
1323
for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
1324
imm |= (sljit_uw)1 << reg_map[i];
1325
1326
SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
1327
1328
/* Push saved and temporary registers
1329
multiple registers: stmdb sp!, {..., lr}
1330
single register: str reg, [sp, #-4]! */
1331
if (imm != 0)
1332
FAIL_IF(push_inst(compiler, PUSH | (1 << 14) | imm));
1333
else
1334
FAIL_IF(push_inst(compiler, 0xe52d0004 | RD(TMP_REG2)));
1335
1336
/* Stack must be aligned to 8 bytes: */
1337
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
1338
1339
if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1340
if ((size & SSIZE_OF(sw)) != 0) {
1341
FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | sizeof(sljit_sw)));
1342
size += SSIZE_OF(sw);
1343
}
1344
1345
if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1346
FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1347
} else {
1348
if (fsaveds > 0)
1349
FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1)));
1350
if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1351
FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1352
}
1353
}
1354
1355
local_size = ((size + local_size + 0x7) & ~0x7) - size;
1356
compiler->local_size = local_size;
1357
1358
if (options & SLJIT_ENTER_REG_ARG)
1359
arg_types = 0;
1360
1361
arg_types >>= SLJIT_ARG_SHIFT;
1362
word_arg_count = 0;
1363
saved_arg_count = 0;
1364
#ifdef __SOFTFP__
1365
SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
1366
1367
offset = 0;
1368
float_arg_count = 0;
1369
1370
while (arg_types) {
1371
switch (arg_types & SLJIT_ARG_MASK) {
1372
case SLJIT_ARG_TYPE_F64:
1373
if (offset & 0x7)
1374
offset += sizeof(sljit_sw);
1375
1376
if (offset < 4 * sizeof(sljit_sw))
1377
FAIL_IF(push_inst(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
1378
else
1379
FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP)
1380
| (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2)));
1381
float_arg_count++;
1382
offset += sizeof(sljit_f64) - sizeof(sljit_sw);
1383
break;
1384
case SLJIT_ARG_TYPE_F32:
1385
if (offset < 4 * sizeof(sljit_sw))
1386
FAIL_IF(push_inst(compiler, VMOV | (float_arg_count << 16) | (offset << 10)));
1387
else
1388
FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP)
1389
| (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2)));
1390
float_arg_count++;
1391
break;
1392
default:
1393
word_arg_count++;
1394
1395
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1396
tmp = SLJIT_S0 - saved_arg_count;
1397
saved_arg_count++;
1398
} else if (word_arg_count - 1 != (sljit_s32)(offset >> 2))
1399
tmp = word_arg_count;
1400
else
1401
break;
1402
1403
if (offset < 4 * sizeof(sljit_sw))
1404
FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2)));
1405
else
1406
FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_ins)size - 4 * sizeof(sljit_sw))));
1407
break;
1408
}
1409
1410
offset += sizeof(sljit_sw);
1411
arg_types >>= SLJIT_ARG_SHIFT;
1412
}
1413
1414
compiler->args_size = offset;
1415
#else
1416
offset = SLJIT_FR0;
1417
old_offset = SLJIT_FR0;
1418
f32_offset = 0;
1419
1420
while (arg_types) {
1421
switch (arg_types & SLJIT_ARG_MASK) {
1422
case SLJIT_ARG_TYPE_F64:
1423
if (offset != old_offset)
1424
*remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, SLJIT_32, offset, old_offset, 0);
1425
old_offset++;
1426
offset++;
1427
break;
1428
case SLJIT_ARG_TYPE_F32:
1429
if (f32_offset != 0) {
1430
*remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0x20, offset, f32_offset, 0);
1431
f32_offset = 0;
1432
} else {
1433
if (offset != old_offset)
1434
*remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0, offset, old_offset, 0);
1435
f32_offset = old_offset;
1436
old_offset++;
1437
}
1438
offset++;
1439
break;
1440
default:
1441
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1442
FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(SLJIT_R0 + word_arg_count)));
1443
saved_arg_count++;
1444
}
1445
1446
word_arg_count++;
1447
break;
1448
}
1449
arg_types >>= SLJIT_ARG_SHIFT;
1450
}
1451
1452
SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap));
1453
1454
while (remap_ptr > remap)
1455
FAIL_IF(push_inst(compiler, *(--remap_ptr)));
1456
#endif
1457
1458
if (local_size > 0)
1459
FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
1460
1461
return SLJIT_SUCCESS;
1462
}
1463
1464
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1465
sljit_s32 options, sljit_s32 arg_types,
1466
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
1467
{
1468
sljit_s32 fscratches;
1469
sljit_s32 fsaveds;
1470
sljit_s32 size;
1471
1472
CHECK_ERROR();
1473
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));
1474
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
1475
1476
scratches = ENTER_GET_REGS(scratches);
1477
saveds = ENTER_GET_REGS(saveds);
1478
fscratches = compiler->fscratches;
1479
fsaveds = compiler->fsaveds;
1480
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
1481
1482
/* Doubles are saved, so alignment is unaffected. */
1483
if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG))
1484
size += SSIZE_OF(sw);
1485
1486
compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size;
1487
return SLJIT_SUCCESS;
1488
}
1489
1490
static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm)
1491
{
1492
sljit_uw imm2 = get_imm(imm);
1493
1494
if (imm2 == 0)
1495
return emit_op(compiler, SLJIT_ADD, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, (sljit_sw)imm);
1496
1497
return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | imm2);
1498
}
1499
1500
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size)
1501
{
1502
sljit_s32 local_size, fscratches, fsaveds, i, tmp;
1503
sljit_s32 restored_reg = 0;
1504
sljit_s32 lr_dst = TMP_PC;
1505
sljit_uw reg_list = 0;
1506
1507
SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128);
1508
1509
local_size = compiler->local_size;
1510
fscratches = compiler->fscratches;
1511
fsaveds = compiler->fsaveds;
1512
1513
if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1514
if (local_size > 0)
1515
FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1516
1517
if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1518
FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1519
} else {
1520
if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1521
FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1522
if (fsaveds > 0)
1523
FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1)));
1524
}
1525
1526
local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7;
1527
}
1528
1529
if (frame_size < 0) {
1530
lr_dst = TMP_REG2;
1531
frame_size = 0;
1532
} else if (frame_size > 0) {
1533
SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0);
1534
lr_dst = 0;
1535
frame_size &= ~0x7;
1536
}
1537
1538
if (lr_dst != 0)
1539
reg_list |= (sljit_uw)1 << reg_map[lr_dst];
1540
1541
tmp = SLJIT_S0 - compiler->saveds;
1542
i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1543
if (tmp < i) {
1544
restored_reg = i;
1545
do {
1546
reg_list |= (sljit_uw)1 << reg_map[i];
1547
} while (--i > tmp);
1548
}
1549
1550
i = compiler->scratches;
1551
if (i >= SLJIT_FIRST_SAVED_REG) {
1552
restored_reg = i;
1553
do {
1554
reg_list |= (sljit_uw)1 << reg_map[i];
1555
} while (--i >= SLJIT_FIRST_SAVED_REG);
1556
}
1557
1558
if (lr_dst == TMP_REG2 && reg_list == 0) {
1559
restored_reg = TMP_REG2;
1560
lr_dst = 0;
1561
}
1562
1563
if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) {
1564
/* The local_size does not include the saved registers. */
1565
tmp = 0;
1566
if (reg_list != 0) {
1567
tmp = 2;
1568
if (local_size <= 0xfff) {
1569
if (local_size == 0) {
1570
SLJIT_ASSERT(restored_reg != TMP_REG2);
1571
if (frame_size == 0)
1572
return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | 0x800008);
1573
if (frame_size > 2 * SSIZE_OF(sw))
1574
return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw))));
1575
}
1576
1577
FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)local_size));
1578
tmp = 1;
1579
} else if (frame_size == 0) {
1580
frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw);
1581
tmp = 3;
1582
}
1583
1584
/* Place for the saved register. */
1585
if (restored_reg != TMP_REG2)
1586
local_size += SSIZE_OF(sw);
1587
}
1588
1589
/* Place for the lr register. */
1590
local_size += SSIZE_OF(sw);
1591
1592
if (frame_size > local_size)
1593
FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_ins)(frame_size - local_size)));
1594
else if (frame_size < local_size)
1595
FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size)));
1596
1597
if (tmp <= 1)
1598
return SLJIT_SUCCESS;
1599
1600
if (tmp == 2) {
1601
frame_size -= SSIZE_OF(sw);
1602
if (restored_reg != TMP_REG2)
1603
frame_size -= SSIZE_OF(sw);
1604
1605
return push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)frame_size);
1606
}
1607
1608
tmp = (restored_reg == TMP_REG2) ? 0x800004 : 0x800008;
1609
return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)tmp);
1610
}
1611
1612
if (local_size > 0)
1613
FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1614
1615
/* Pop saved and temporary registers
1616
multiple registers: ldmia sp!, {...}
1617
single register: ldr reg, [sp], #4 */
1618
if ((reg_list & (reg_list - 1)) == 0) {
1619
SLJIT_ASSERT(lr_dst != 0);
1620
SLJIT_ASSERT(reg_list == (sljit_uw)1 << reg_map[lr_dst]);
1621
1622
return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(lr_dst) | 0x800004);
1623
}
1624
1625
FAIL_IF(push_inst(compiler, POP | reg_list));
1626
1627
if (frame_size > 0)
1628
return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_ins)frame_size - sizeof(sljit_sw)));
1629
1630
if (lr_dst != 0)
1631
return SLJIT_SUCCESS;
1632
1633
return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | sizeof(sljit_sw));
1634
}
1635
1636
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1637
{
1638
CHECK_ERROR();
1639
CHECK(check_sljit_emit_return_void(compiler));
1640
1641
return emit_stack_frame_release(compiler, 0);
1642
}
1643
1644
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1645
sljit_s32 src, sljit_sw srcw)
1646
{
1647
CHECK_ERROR();
1648
CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1649
1650
if (src & SLJIT_MEM) {
1651
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
1652
src = TMP_REG1;
1653
srcw = 0;
1654
} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1655
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
1656
src = TMP_REG1;
1657
srcw = 0;
1658
}
1659
1660
FAIL_IF(emit_stack_frame_release(compiler, 1));
1661
1662
SLJIT_SKIP_CHECKS(compiler);
1663
return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1664
}
1665
1666
/* --------------------------------------------------------------------- */
1667
/* Operators */
1668
/* --------------------------------------------------------------------- */
1669
1670
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1671
sljit_uw dst, sljit_uw src1, sljit_uw src2)
1672
{
1673
sljit_s32 reg, is_masked;
1674
sljit_uw shift_type;
1675
1676
switch (op) {
1677
case SLJIT_MOV:
1678
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1679
if (dst != src2) {
1680
if (src2 & SRC2_IMM) {
1681
return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1682
}
1683
return push_inst(compiler, MOV | RD(dst) | RM(src2));
1684
}
1685
return SLJIT_SUCCESS;
1686
1687
case SLJIT_MOV_U8:
1688
case SLJIT_MOV_S8:
1689
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1690
if (flags & REGISTER_OP)
1691
return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2));
1692
1693
if (dst != src2) {
1694
SLJIT_ASSERT(src2 & SRC2_IMM);
1695
return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1696
}
1697
return SLJIT_SUCCESS;
1698
1699
case SLJIT_MOV_U16:
1700
case SLJIT_MOV_S16:
1701
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1702
if (flags & REGISTER_OP)
1703
return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2));
1704
1705
if (dst != src2) {
1706
SLJIT_ASSERT(src2 & SRC2_IMM);
1707
return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1708
}
1709
return SLJIT_SUCCESS;
1710
1711
case SLJIT_CLZ:
1712
SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
1713
FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
1714
return SLJIT_SUCCESS;
1715
1716
case SLJIT_CTZ:
1717
SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
1718
SLJIT_ASSERT(src1 == TMP_REG1 && src2 != TMP_REG2 && !(flags & ARGS_SWAPPED));
1719
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1720
FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0));
1721
FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RN(src2) | RM(TMP_REG2)));
1722
FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG1)));
1723
FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32));
1724
return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f);
1725
#else /* !SLJIT_CONFIG_ARM_V6 */
1726
FAIL_IF(push_inst(compiler, RBIT | RD(dst) | RM(src2)));
1727
return push_inst(compiler, CLZ | RD(dst) | RM(dst));
1728
#endif /* SLJIT_CONFIG_ARM_V6 */
1729
1730
case SLJIT_REV:
1731
case SLJIT_REV_U32:
1732
case SLJIT_REV_S32:
1733
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1734
return push_inst(compiler, REV | RD(dst) | RM(src2));
1735
1736
case SLJIT_REV_U16:
1737
case SLJIT_REV_S16:
1738
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1739
FAIL_IF(push_inst(compiler, REV16 | RD(dst) | RM(src2)));
1740
if (!(flags & REGISTER_OP))
1741
return SLJIT_SUCCESS;
1742
return push_inst(compiler, (op == SLJIT_REV_U16 ? UXTH : SXTH) | RD(dst) | RM(dst));
1743
case SLJIT_ADD:
1744
SLJIT_ASSERT(!(flags & INV_IMM));
1745
1746
if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
1747
return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1748
return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1749
1750
case SLJIT_ADDC:
1751
SLJIT_ASSERT(!(flags & INV_IMM));
1752
return push_inst(compiler, ADC | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1753
1754
case SLJIT_SUB:
1755
SLJIT_ASSERT(!(flags & INV_IMM));
1756
1757
if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
1758
return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1759
1760
return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS)
1761
| RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1762
1763
case SLJIT_SUBC:
1764
SLJIT_ASSERT(!(flags & INV_IMM));
1765
return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SBC : RSC) | (flags & SET_FLAGS)
1766
| RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1767
1768
case SLJIT_MUL:
1769
SLJIT_ASSERT(!(flags & INV_IMM));
1770
SLJIT_ASSERT(!(src2 & SRC2_IMM));
1771
compiler->status_flags_state = 0;
1772
1773
if (!(flags & SET_FLAGS))
1774
return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1));
1775
1776
reg = dst == TMP_REG1 ? TMP_REG2 : TMP_REG1;
1777
FAIL_IF(push_inst(compiler, SMULL | RN(reg) | RD(dst) | RM8(src2) | RM(src1)));
1778
1779
/* cmp TMP_REG1, dst asr #31. */
1780
return push_inst(compiler, CMP | SET_FLAGS | RN(reg) | RM(dst) | 0xfc0);
1781
1782
case SLJIT_AND:
1783
if ((flags & (UNUSED_RETURN | INV_IMM)) == UNUSED_RETURN)
1784
return push_inst(compiler, TST | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1785
return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS)
1786
| RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1787
1788
case SLJIT_OR:
1789
SLJIT_ASSERT(!(flags & INV_IMM));
1790
return push_inst(compiler, ORR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1791
1792
case SLJIT_XOR:
1793
if (flags & INV_IMM) {
1794
SLJIT_ASSERT(src2 == SRC2_IMM);
1795
return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src1));
1796
}
1797
return push_inst(compiler, EOR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1798
1799
case SLJIT_SHL:
1800
case SLJIT_MSHL:
1801
shift_type = 0;
1802
is_masked = op == SLJIT_MSHL;
1803
break;
1804
1805
case SLJIT_LSHR:
1806
case SLJIT_MLSHR:
1807
shift_type = 1;
1808
is_masked = op == SLJIT_MLSHR;
1809
break;
1810
1811
case SLJIT_ASHR:
1812
case SLJIT_MASHR:
1813
shift_type = 2;
1814
is_masked = op == SLJIT_MASHR;
1815
break;
1816
1817
case SLJIT_ROTL:
1818
if (compiler->shift_imm == 0x20) {
1819
FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0));
1820
src2 = TMP_REG2;
1821
} else
1822
compiler->shift_imm = (sljit_uw)(-(sljit_sw)compiler->shift_imm) & 0x1f;
1823
SLJIT_FALLTHROUGH
1824
1825
case SLJIT_ROTR:
1826
shift_type = 3;
1827
is_masked = 0;
1828
break;
1829
1830
case SLJIT_MULADD:
1831
return push_inst(compiler, MLA | RN(dst) | RD(dst) | RM8(src2) | RM(src1));
1832
1833
default:
1834
SLJIT_UNREACHABLE();
1835
return SLJIT_SUCCESS;
1836
}
1837
1838
SLJIT_ASSERT(!(flags & ARGS_SWAPPED) && !(flags & INV_IMM) && !(src2 & SRC2_IMM));
1839
1840
if (compiler->shift_imm != 0x20) {
1841
SLJIT_ASSERT(src1 == TMP_REG1);
1842
1843
if (compiler->shift_imm != 0)
1844
return push_inst(compiler, MOV | (flags & SET_FLAGS) |
1845
RD(dst) | (compiler->shift_imm << 7) | (shift_type << 5) | RM(src2));
1846
return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2));
1847
}
1848
1849
SLJIT_ASSERT(src1 != TMP_REG2);
1850
1851
if (is_masked) {
1852
FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | SRC2_IMM | 0x1f));
1853
src2 = TMP_REG2;
1854
}
1855
1856
return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst)
1857
| RM8(src2) | (sljit_ins)(shift_type << 5) | 0x10 | RM(src1));
1858
}
1859
1860
#undef EMIT_SHIFT_INS_AND_RETURN
1861
1862
/* Tests whether the immediate can be stored in the 12 bit imm field.
1863
Returns with 0 if not possible. */
1864
static sljit_uw get_imm(sljit_uw imm)
1865
{
1866
sljit_u32 rol;
1867
1868
if (imm <= 0xff)
1869
return SRC2_IMM | imm;
1870
1871
if (!(imm & 0xff000000)) {
1872
imm <<= 8;
1873
rol = 8;
1874
} else {
1875
imm = (imm << 24) | (imm >> 8);
1876
rol = 0;
1877
}
1878
1879
if (!(imm & 0xff000000)) {
1880
imm <<= 8;
1881
rol += 4;
1882
}
1883
1884
if (!(imm & 0xf0000000)) {
1885
imm <<= 4;
1886
rol += 2;
1887
}
1888
1889
if (!(imm & 0xc0000000)) {
1890
imm <<= 2;
1891
rol += 1;
1892
}
1893
1894
if (!(imm & 0x00ffffff))
1895
return SRC2_IMM | (imm >> 24) | (rol << 8);
1896
return 0;
1897
}
1898
1899
static sljit_uw compute_imm(sljit_uw imm, sljit_uw* imm2)
1900
{
1901
sljit_uw mask;
1902
sljit_uw imm1;
1903
sljit_uw rol;
1904
1905
/* Step1: Search a zero byte (8 continous zero bit). */
1906
mask = 0xff000000;
1907
rol = 8;
1908
while (1) {
1909
if (!(imm & mask)) {
1910
/* Rol imm by rol. */
1911
imm = (imm << rol) | (imm >> (32 - rol));
1912
/* Calculate arm rol. */
1913
rol = 4 + (rol >> 1);
1914
break;
1915
}
1916
1917
rol += 2;
1918
mask >>= 2;
1919
if (mask & 0x3) {
1920
/* rol by 8. */
1921
imm = (imm << 8) | (imm >> 24);
1922
mask = 0xff00;
1923
rol = 24;
1924
while (1) {
1925
if (!(imm & mask)) {
1926
/* Rol imm by rol. */
1927
imm = (imm << rol) | (imm >> (32 - rol));
1928
/* Calculate arm rol. */
1929
rol = (rol >> 1) - 8;
1930
break;
1931
}
1932
rol += 2;
1933
mask >>= 2;
1934
if (mask & 0x3)
1935
return 0;
1936
}
1937
break;
1938
}
1939
}
1940
1941
/* The low 8 bit must be zero. */
1942
SLJIT_ASSERT(!(imm & 0xff));
1943
1944
if (!(imm & 0xff000000)) {
1945
imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8);
1946
*imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
1947
} else if (imm & 0xc0000000) {
1948
imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1949
imm <<= 8;
1950
rol += 4;
1951
1952
if (!(imm & 0xff000000)) {
1953
imm <<= 8;
1954
rol += 4;
1955
}
1956
1957
if (!(imm & 0xf0000000)) {
1958
imm <<= 4;
1959
rol += 2;
1960
}
1961
1962
if (!(imm & 0xc0000000)) {
1963
imm <<= 2;
1964
rol += 1;
1965
}
1966
1967
if (!(imm & 0x00ffffff))
1968
*imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1969
else
1970
return 0;
1971
} else {
1972
if (!(imm & 0xf0000000)) {
1973
imm <<= 4;
1974
rol += 2;
1975
}
1976
1977
if (!(imm & 0xc0000000)) {
1978
imm <<= 2;
1979
rol += 1;
1980
}
1981
1982
imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1983
imm <<= 8;
1984
rol += 4;
1985
1986
if (!(imm & 0xf0000000)) {
1987
imm <<= 4;
1988
rol += 2;
1989
}
1990
1991
if (!(imm & 0xc0000000)) {
1992
imm <<= 2;
1993
rol += 1;
1994
}
1995
1996
if (!(imm & 0x00ffffff))
1997
*imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1998
else
1999
return 0;
2000
}
2001
2002
return imm1;
2003
}
2004
2005
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm)
2006
{
2007
sljit_uw tmp;
2008
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
2009
sljit_uw imm1, imm2;
2010
#else /* !SLJIT_CONFIG_ARM_V6 */
2011
if (!(imm & ~(sljit_uw)0xffff))
2012
return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff));
2013
#endif /* SLJIT_CONFIG_ARM_V6 */
2014
2015
/* Create imm by 1 inst. */
2016
tmp = get_imm(imm);
2017
if (tmp)
2018
return push_inst(compiler, MOV | RD(reg) | tmp);
2019
2020
tmp = get_imm(~imm);
2021
if (tmp)
2022
return push_inst(compiler, MVN | RD(reg) | tmp);
2023
2024
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
2025
/* Create imm by 2 inst. */
2026
imm1 = compute_imm(imm, &imm2);
2027
if (imm1 != 0) {
2028
FAIL_IF(push_inst(compiler, MOV | RD(reg) | imm1));
2029
return push_inst(compiler, ORR | RD(reg) | RN(reg) | imm2);
2030
}
2031
2032
imm1 = compute_imm(~imm, &imm2);
2033
if (imm1 != 0) {
2034
FAIL_IF(push_inst(compiler, MVN | RD(reg) | imm1));
2035
return push_inst(compiler, BIC | RD(reg) | RN(reg) | imm2);
2036
}
2037
2038
/* Load integer. */
2039
return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), imm);
2040
#else /* !SLJIT_CONFIG_ARM_V6 */
2041
FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
2042
if (imm <= 0xffff)
2043
return SLJIT_SUCCESS;
2044
return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
2045
#endif /* SLJIT_CONFIG_ARM_V6 */
2046
}
2047
2048
static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
2049
sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
2050
{
2051
sljit_uw imm, offset_reg, tmp;
2052
sljit_sw mask = IS_TYPE1_TRANSFER(flags) ? 0xfff : 0xff;
2053
sljit_sw sign = IS_TYPE1_TRANSFER(flags) ? 0x1000 : 0x100;
2054
2055
SLJIT_ASSERT(arg & SLJIT_MEM);
2056
SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -mask && argw <= mask));
2057
2058
if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
2059
tmp = (sljit_uw)(argw & (sign | mask));
2060
tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
2061
2062
FAIL_IF(load_immediate(compiler, tmp_reg, tmp));
2063
2064
argw -= (sljit_sw)tmp;
2065
tmp = 1;
2066
2067
if (argw < 0) {
2068
argw = -argw;
2069
tmp = 0;
2070
}
2071
2072
return push_inst(compiler, EMIT_DATA_TRANSFER(flags, tmp, reg, tmp_reg,
2073
(mask == 0xff) ? TYPE2_TRANSFER_IMM(argw) : argw));
2074
}
2075
2076
if (arg & OFFS_REG_MASK) {
2077
offset_reg = OFFS_REG(arg);
2078
arg &= REG_MASK;
2079
argw &= 0x3;
2080
2081
if (argw != 0 && (mask == 0xff)) {
2082
FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_ins)argw << 7)));
2083
return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
2084
}
2085
2086
/* Bit 25: RM is offset. */
2087
return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
2088
RM(offset_reg) | (mask == 0xff ? 0 : (1 << 25)) | ((sljit_ins)argw << 7)));
2089
}
2090
2091
arg &= REG_MASK;
2092
2093
if (argw > mask) {
2094
tmp = (sljit_uw)(argw & (sign | mask));
2095
tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
2096
imm = get_imm(tmp);
2097
2098
if (imm) {
2099
FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm));
2100
argw -= (sljit_sw)tmp;
2101
arg = tmp_reg;
2102
2103
SLJIT_ASSERT(argw >= -mask && argw <= mask);
2104
}
2105
} else if (argw < -mask) {
2106
tmp = (sljit_uw)(-argw & (sign | mask));
2107
tmp = (sljit_uw)((-argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
2108
imm = get_imm(tmp);
2109
2110
if (imm) {
2111
FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm));
2112
argw += (sljit_sw)tmp;
2113
arg = tmp_reg;
2114
2115
SLJIT_ASSERT(argw >= -mask && argw <= mask);
2116
}
2117
}
2118
2119
if (argw <= mask && argw >= -mask) {
2120
if (argw >= 0) {
2121
if (mask == 0xff)
2122
argw = TYPE2_TRANSFER_IMM(argw);
2123
return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, argw));
2124
}
2125
2126
argw = -argw;
2127
2128
if (mask == 0xff)
2129
argw = TYPE2_TRANSFER_IMM(argw);
2130
2131
return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, argw));
2132
}
2133
2134
FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
2135
return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
2136
RM(tmp_reg) | (mask == 0xff ? 0 : (1 << 25))));
2137
}
2138
2139
static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
2140
sljit_s32 dst, sljit_sw dstw,
2141
sljit_s32 src1, sljit_sw src1w,
2142
sljit_s32 src2, sljit_sw src2w)
2143
{
2144
/* src1 is reg or TMP_REG1
2145
src2 is reg, TMP_REG2, or imm
2146
result goes to TMP_REG2, so put result can use TMP_REG1. */
2147
2148
/* We prefers register and simple consts. */
2149
sljit_s32 dst_reg;
2150
sljit_s32 src1_reg = 0;
2151
sljit_s32 src2_reg = 0;
2152
sljit_s32 src2_tmp_reg = 0;
2153
sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
2154
sljit_s32 neg_op = 0;
2155
sljit_u32 imm2;
2156
2157
op = GET_OPCODE(op);
2158
2159
if (flags & SET_FLAGS)
2160
inp_flags &= ~ALLOW_DOUBLE_IMM;
2161
2162
if (dst == TMP_REG1)
2163
flags |= UNUSED_RETURN;
2164
2165
SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM));
2166
2167
if (inp_flags & ALLOW_NEG_IMM) {
2168
switch (op) {
2169
case SLJIT_ADD:
2170
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
2171
neg_op = SLJIT_SUB;
2172
break;
2173
case SLJIT_ADDC:
2174
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
2175
neg_op = SLJIT_SUBC;
2176
break;
2177
case SLJIT_SUB:
2178
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
2179
neg_op = SLJIT_ADD;
2180
break;
2181
case SLJIT_SUBC:
2182
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
2183
neg_op = SLJIT_ADDC;
2184
break;
2185
}
2186
}
2187
2188
do {
2189
if (!(inp_flags & ALLOW_IMM))
2190
break;
2191
2192
if (src2 == SLJIT_IMM) {
2193
src2_reg = (sljit_s32)get_imm((sljit_uw)src2w);
2194
if (src2_reg)
2195
break;
2196
2197
if (inp_flags & ALLOW_INV_IMM) {
2198
src2_reg = (sljit_s32)get_imm(~(sljit_uw)src2w);
2199
if (src2_reg) {
2200
flags |= INV_IMM;
2201
break;
2202
}
2203
}
2204
2205
if (neg_op != 0) {
2206
src2_reg = (sljit_s32)get_imm((neg_op == SLJIT_ADD || neg_op == SLJIT_SUB) ? (sljit_uw)-src2w : ~(sljit_uw)src2w);
2207
if (src2_reg) {
2208
op = neg_op | GET_ALL_FLAGS(op);
2209
break;
2210
}
2211
}
2212
}
2213
2214
if (src1 == SLJIT_IMM) {
2215
src2_reg = (sljit_s32)get_imm((sljit_uw)src1w);
2216
if (src2_reg) {
2217
flags |= ARGS_SWAPPED;
2218
src1 = src2;
2219
src1w = src2w;
2220
break;
2221
}
2222
2223
if (inp_flags & ALLOW_INV_IMM) {
2224
src2_reg = (sljit_s32)get_imm(~(sljit_uw)src1w);
2225
if (src2_reg) {
2226
flags |= ARGS_SWAPPED | INV_IMM;
2227
src1 = src2;
2228
src1w = src2w;
2229
break;
2230
}
2231
}
2232
2233
if (neg_op >= SLJIT_SUB) {
2234
/* Note: additive operation (commutative). */
2235
SLJIT_ASSERT(op == SLJIT_ADD || op == SLJIT_ADDC);
2236
2237
src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w);
2238
if (src2_reg) {
2239
src1 = src2;
2240
src1w = src2w;
2241
op = neg_op | GET_ALL_FLAGS(op);
2242
break;
2243
}
2244
}
2245
}
2246
} while(0);
2247
2248
/* Destination. */
2249
dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2250
2251
if (op <= SLJIT_MOV_P) {
2252
if (dst & SLJIT_MEM) {
2253
if (inp_flags & BYTE_SIZE)
2254
inp_flags &= ~SIGNED;
2255
2256
if (FAST_IS_REG(src2))
2257
return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG1);
2258
}
2259
2260
if (FAST_IS_REG(src2) && dst_reg != TMP_REG2)
2261
flags |= REGISTER_OP;
2262
2263
src2_tmp_reg = dst_reg;
2264
} else {
2265
if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
2266
if (!(dst & SLJIT_MEM) && (!(src2 & SLJIT_MEM) || op == SLJIT_REV_S16))
2267
flags |= REGISTER_OP;
2268
}
2269
2270
src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
2271
}
2272
2273
if (src2_reg == 0 && (src2 & SLJIT_MEM)) {
2274
src2_reg = src2_tmp_reg;
2275
FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG1));
2276
}
2277
2278
/* Source 1. */
2279
if (FAST_IS_REG(src1))
2280
src1_reg = src1;
2281
else if (src1 & SLJIT_MEM) {
2282
FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
2283
src1_reg = TMP_REG1;
2284
} else if (!(inp_flags & ALLOW_DOUBLE_IMM) || src2_reg != 0 || op == SLJIT_SUB || op == SLJIT_SUBC) {
2285
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2286
src1_reg = TMP_REG1;
2287
}
2288
2289
/* Source 2. */
2290
if (src2_reg == 0) {
2291
src2_reg = src2_tmp_reg;
2292
2293
if (FAST_IS_REG(src2))
2294
src2_reg = src2;
2295
else if (!(inp_flags & ALLOW_DOUBLE_IMM))
2296
FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w));
2297
else {
2298
SLJIT_ASSERT(!(flags & SET_FLAGS));
2299
2300
if (src1_reg == 0) {
2301
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2302
src1_reg = TMP_REG1;
2303
}
2304
2305
src2_reg = (sljit_s32)compute_imm((sljit_uw)src2w, &imm2);
2306
2307
if (src2_reg == 0 && neg_op != 0) {
2308
src2_reg = (sljit_s32)compute_imm((sljit_uw)-src2w, &imm2);
2309
if (src2_reg != 0)
2310
op = neg_op;
2311
}
2312
2313
if (src2_reg == 0) {
2314
FAIL_IF(load_immediate(compiler, src2_tmp_reg, (sljit_uw)src2w));
2315
src2_reg = src2_tmp_reg;
2316
} else {
2317
FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg));
2318
src1_reg = dst_reg;
2319
src2_reg = (sljit_s32)imm2;
2320
2321
if (op == SLJIT_ADDC)
2322
op = SLJIT_ADD;
2323
else if (op == SLJIT_SUBC)
2324
op = SLJIT_SUB;
2325
}
2326
}
2327
}
2328
2329
if (src1_reg == 0) {
2330
SLJIT_ASSERT((inp_flags & ALLOW_DOUBLE_IMM) && !(flags & SET_FLAGS));
2331
2332
src1_reg = (sljit_s32)compute_imm((sljit_uw)src1w, &imm2);
2333
2334
if (src1_reg == 0 && neg_op != 0) {
2335
src1_reg = (sljit_s32)compute_imm((sljit_uw)-src1w, &imm2);
2336
if (src1_reg != 0)
2337
op = neg_op;
2338
}
2339
2340
if (src1_reg == 0) {
2341
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2342
src1_reg = TMP_REG1;
2343
} else {
2344
FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src2_reg, (sljit_uw)src1_reg));
2345
src1_reg = dst_reg;
2346
src2_reg = (sljit_s32)imm2;
2347
2348
if (op == SLJIT_ADDC)
2349
op = SLJIT_ADD;
2350
}
2351
}
2352
2353
FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg));
2354
2355
if (!(dst & SLJIT_MEM))
2356
return SLJIT_SUCCESS;
2357
2358
return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1);
2359
}
2360
2361
#ifdef __cplusplus
2362
extern "C" {
2363
#endif
2364
2365
#if defined(__GNUC__)
2366
extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator);
2367
extern int __aeabi_idivmod(int numerator, int denominator);
2368
#else
2369
#error "Software divmod functions are needed"
2370
#endif
2371
2372
#ifdef __cplusplus
2373
}
2374
#endif
2375
2376
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
2377
{
2378
sljit_uw saved_reg_list[3];
2379
sljit_sw saved_reg_count;
2380
2381
CHECK_ERROR();
2382
CHECK(check_sljit_emit_op0(compiler, op));
2383
2384
op = GET_OPCODE(op);
2385
switch (op) {
2386
case SLJIT_BREAKPOINT:
2387
FAIL_IF(push_inst(compiler, BKPT));
2388
break;
2389
case SLJIT_NOP:
2390
FAIL_IF(push_inst(compiler, NOP));
2391
break;
2392
case SLJIT_LMUL_UW:
2393
case SLJIT_LMUL_SW:
2394
return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
2395
| RN(SLJIT_R1) | RD(SLJIT_R0) | RM8(SLJIT_R0) | RM(SLJIT_R1));
2396
case SLJIT_DIVMOD_UW:
2397
case SLJIT_DIVMOD_SW:
2398
case SLJIT_DIV_UW:
2399
case SLJIT_DIV_SW:
2400
SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
2401
SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
2402
2403
saved_reg_count = 0;
2404
if (compiler->scratches >= 4)
2405
saved_reg_list[saved_reg_count++] = 3;
2406
if (compiler->scratches >= 3)
2407
saved_reg_list[saved_reg_count++] = 2;
2408
if (op >= SLJIT_DIV_UW)
2409
saved_reg_list[saved_reg_count++] = 1;
2410
2411
if (saved_reg_count > 0) {
2412
FAIL_IF(push_inst(compiler, STR | 0x2d0000 | (saved_reg_count >= 3 ? 16 : 8)
2413
| (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
2414
if (saved_reg_count >= 2) {
2415
SLJIT_ASSERT(saved_reg_list[1] < 8);
2416
FAIL_IF(push_inst(compiler, STR | 0x8d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */));
2417
}
2418
if (saved_reg_count >= 3) {
2419
SLJIT_ASSERT(saved_reg_list[2] < 8);
2420
FAIL_IF(push_inst(compiler, STR | 0x8d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */));
2421
}
2422
}
2423
2424
#if defined(__GNUC__)
2425
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
2426
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod))));
2427
#else
2428
#error "Software divmod functions are needed"
2429
#endif
2430
2431
if (saved_reg_count > 0) {
2432
if (saved_reg_count >= 3) {
2433
SLJIT_ASSERT(saved_reg_list[2] < 8);
2434
FAIL_IF(push_inst(compiler, LDR | 0x8d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */));
2435
}
2436
if (saved_reg_count >= 2) {
2437
SLJIT_ASSERT(saved_reg_list[1] < 8);
2438
FAIL_IF(push_inst(compiler, LDR | 0x8d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */));
2439
}
2440
return push_inst(compiler, (LDR ^ (1 << 24)) | 0x8d0000 | (sljit_ins)(saved_reg_count >= 3 ? 16 : 8)
2441
| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
2442
}
2443
return SLJIT_SUCCESS;
2444
case SLJIT_MEMORY_BARRIER:
2445
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
2446
return push_inst(compiler, DMB_SY);
2447
#else /* !SLJIT_CONFIG_ARM_V7 */
2448
return SLJIT_ERR_UNSUPPORTED;
2449
#endif /* SLJIT_CONFIG_ARM_V7 */
2450
case SLJIT_ENDBR:
2451
case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
2452
return SLJIT_SUCCESS;
2453
}
2454
2455
return SLJIT_SUCCESS;
2456
}
2457
2458
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2459
sljit_s32 dst, sljit_sw dstw,
2460
sljit_s32 src, sljit_sw srcw)
2461
{
2462
CHECK_ERROR();
2463
CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2464
ADJUST_LOCAL_OFFSET(dst, dstw);
2465
ADJUST_LOCAL_OFFSET(src, srcw);
2466
2467
switch (GET_OPCODE(op)) {
2468
case SLJIT_MOV:
2469
case SLJIT_MOV_U32:
2470
case SLJIT_MOV_S32:
2471
case SLJIT_MOV32:
2472
case SLJIT_MOV_P:
2473
return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
2474
2475
case SLJIT_MOV_U8:
2476
return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
2477
2478
case SLJIT_MOV_S8:
2479
return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
2480
2481
case SLJIT_MOV_U16:
2482
return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
2483
2484
case SLJIT_MOV_S16:
2485
return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
2486
2487
case SLJIT_CLZ:
2488
case SLJIT_CTZ:
2489
case SLJIT_REV:
2490
case SLJIT_REV_U32:
2491
case SLJIT_REV_S32:
2492
return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
2493
2494
case SLJIT_REV_U16:
2495
case SLJIT_REV_S16:
2496
return emit_op(compiler, op, HALF_SIZE, dst, dstw, TMP_REG1, 0, src, srcw);
2497
}
2498
2499
return SLJIT_SUCCESS;
2500
}
2501
2502
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2503
sljit_s32 dst, sljit_sw dstw,
2504
sljit_s32 src1, sljit_sw src1w,
2505
sljit_s32 src2, sljit_sw src2w)
2506
{
2507
sljit_s32 inp_flags;
2508
2509
CHECK_ERROR();
2510
CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2511
ADJUST_LOCAL_OFFSET(dst, dstw);
2512
ADJUST_LOCAL_OFFSET(src1, src1w);
2513
ADJUST_LOCAL_OFFSET(src2, src2w);
2514
2515
switch (GET_OPCODE(op)) {
2516
case SLJIT_ADD:
2517
case SLJIT_ADDC:
2518
case SLJIT_SUB:
2519
case SLJIT_SUBC:
2520
return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w);
2521
2522
case SLJIT_OR:
2523
return emit_op(compiler, op, ALLOW_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w);
2524
2525
case SLJIT_XOR:
2526
inp_flags = ALLOW_IMM | ALLOW_DOUBLE_IMM;
2527
if ((src1 == SLJIT_IMM && src1w == -1) || (src2 == SLJIT_IMM && src2w == -1)) {
2528
inp_flags |= ALLOW_INV_IMM;
2529
}
2530
return emit_op(compiler, op, inp_flags, dst, dstw, src1, src1w, src2, src2w);
2531
2532
case SLJIT_MUL:
2533
return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
2534
2535
case SLJIT_AND:
2536
return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w);
2537
2538
case SLJIT_SHL:
2539
case SLJIT_MSHL:
2540
case SLJIT_LSHR:
2541
case SLJIT_MLSHR:
2542
case SLJIT_ASHR:
2543
case SLJIT_MASHR:
2544
case SLJIT_ROTL:
2545
case SLJIT_ROTR:
2546
if (src2 == SLJIT_IMM) {
2547
compiler->shift_imm = src2w & 0x1f;
2548
return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
2549
} else {
2550
compiler->shift_imm = 0x20;
2551
return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
2552
}
2553
}
2554
2555
return SLJIT_SUCCESS;
2556
}
2557
2558
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2559
sljit_s32 src1, sljit_sw src1w,
2560
sljit_s32 src2, sljit_sw src2w)
2561
{
2562
CHECK_ERROR();
2563
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2564
2565
SLJIT_SKIP_CHECKS(compiler);
2566
return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2567
}
2568
2569
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2570
sljit_s32 dst_reg,
2571
sljit_s32 src1, sljit_sw src1w,
2572
sljit_s32 src2, sljit_sw src2w)
2573
{
2574
CHECK_ERROR();
2575
CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2576
ADJUST_LOCAL_OFFSET(src1, src1w);
2577
ADJUST_LOCAL_OFFSET(src2, src2w);
2578
2579
switch (GET_OPCODE(op)) {
2580
case SLJIT_MULADD:
2581
return emit_op(compiler, op, 0, dst_reg, 0, src1, src1w, src2, src2w);
2582
}
2583
2584
return SLJIT_SUCCESS;
2585
}
2586
2587
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2588
sljit_s32 dst_reg,
2589
sljit_s32 src1_reg,
2590
sljit_s32 src2_reg,
2591
sljit_s32 src3, sljit_sw src3w)
2592
{
2593
sljit_s32 is_left;
2594
2595
CHECK_ERROR();
2596
CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2597
2598
op = GET_OPCODE(op);
2599
is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
2600
2601
if (src1_reg == src2_reg) {
2602
SLJIT_SKIP_CHECKS(compiler);
2603
return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w);
2604
}
2605
2606
ADJUST_LOCAL_OFFSET(src3, src3w);
2607
2608
/* Shift type of ROR is 3. */
2609
if (src3 == SLJIT_IMM) {
2610
src3w &= 0x1f;
2611
2612
if (src3w == 0)
2613
return SLJIT_SUCCESS;
2614
2615
FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src1_reg) | ((sljit_ins)(is_left ? 0 : 1) << 5) | ((sljit_ins)src3w << 7)));
2616
src3w = (src3w ^ 0x1f) + 1;
2617
return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | ((sljit_ins)src3w << 7));
2618
}
2619
2620
if (src3 & SLJIT_MEM) {
2621
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src3, src3w, TMP_REG2));
2622
src3 = TMP_REG2;
2623
}
2624
2625
if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) {
2626
FAIL_IF(push_inst(compiler, AND | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f));
2627
src3 = TMP_REG2;
2628
}
2629
2630
FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM8(src3) | ((sljit_ins)(is_left ? 0 : 1) << 5) | 0x10 | RM(src1_reg)));
2631
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | (1 << 7)));
2632
FAIL_IF(push_inst(compiler, EOR | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f));
2633
return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM8(TMP_REG2) | ((sljit_ins)(is_left ? 1 : 0) << 5) | 0x10 | RM(TMP_REG1));
2634
}
2635
2636
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2_shift(struct sljit_compiler *compiler, sljit_s32 op,
2637
sljit_s32 dst, sljit_sw dstw,
2638
sljit_s32 src1, sljit_sw src1w,
2639
sljit_s32 src2, sljit_sw src2w,
2640
sljit_sw shift_arg)
2641
{
2642
sljit_s32 dst_r, tmp_r;
2643
2644
CHECK_ERROR();
2645
CHECK(check_sljit_emit_op2_shift(compiler, op, dst, dstw, src1, src1w, src2, src2w, shift_arg));
2646
ADJUST_LOCAL_OFFSET(dst, dstw);
2647
ADJUST_LOCAL_OFFSET(src1, src1w);
2648
ADJUST_LOCAL_OFFSET(src2, src2w);
2649
2650
shift_arg &= 0x1f;
2651
2652
if (src2 == SLJIT_IMM) {
2653
src2w = src2w << shift_arg;
2654
shift_arg = 0;
2655
}
2656
2657
if (shift_arg == 0) {
2658
SLJIT_SKIP_CHECKS(compiler);
2659
return sljit_emit_op2(compiler, GET_OPCODE(op), dst, dstw, src1, src1w, src2, src2w);
2660
}
2661
2662
if (src1 == SLJIT_IMM) {
2663
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2664
src1 = TMP_REG1;
2665
} else if (src1 & SLJIT_MEM) {
2666
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
2667
src1 = TMP_REG1;
2668
}
2669
2670
if (src2 & SLJIT_MEM) {
2671
tmp_r = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
2672
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, tmp_r, src2, src2w, tmp_r));
2673
src2 = tmp_r;
2674
}
2675
2676
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2677
FAIL_IF(push_inst(compiler, ADD | RD(dst_r) | RN(src1) | RM(src2) | ((sljit_ins)shift_arg << 7)));
2678
2679
if (dst & SLJIT_MEM)
2680
return emit_op_mem(compiler, WORD_SIZE, dst_r, dst, dstw, TMP_REG1);
2681
return SLJIT_SUCCESS;
2682
}
2683
2684
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2685
sljit_s32 src, sljit_sw srcw)
2686
{
2687
CHECK_ERROR();
2688
CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2689
ADJUST_LOCAL_OFFSET(src, srcw);
2690
2691
switch (op) {
2692
case SLJIT_FAST_RETURN:
2693
SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2694
2695
if (FAST_IS_REG(src)) {
2696
if (src != TMP_REG2)
2697
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src)));
2698
} else
2699
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1));
2700
2701
return push_inst(compiler, BX | RM(TMP_REG2));
2702
case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2703
return SLJIT_SUCCESS;
2704
case SLJIT_PREFETCH_L1:
2705
case SLJIT_PREFETCH_L2:
2706
case SLJIT_PREFETCH_L3:
2707
case SLJIT_PREFETCH_ONCE:
2708
SLJIT_ASSERT(src & SLJIT_MEM);
2709
return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
2710
}
2711
2712
return SLJIT_SUCCESS;
2713
}
2714
2715
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2716
sljit_s32 dst, sljit_sw dstw)
2717
{
2718
sljit_s32 size, dst_r;
2719
2720
CHECK_ERROR();
2721
CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2722
ADJUST_LOCAL_OFFSET(dst, dstw);
2723
2724
switch (op) {
2725
case SLJIT_FAST_ENTER:
2726
SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2727
2728
if (FAST_IS_REG(dst)) {
2729
if (dst == TMP_REG2)
2730
return SLJIT_SUCCESS;
2731
return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2));
2732
}
2733
break;
2734
case SLJIT_GET_RETURN_ADDRESS:
2735
size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
2736
2737
if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
2738
/* The size of pc is not added above. */
2739
if ((size & SSIZE_OF(sw)) == 0)
2740
size += SSIZE_OF(sw);
2741
2742
size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64);
2743
}
2744
2745
SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0);
2746
2747
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2748
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1));
2749
break;
2750
}
2751
2752
if (dst & SLJIT_MEM)
2753
return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1);
2754
2755
return SLJIT_SUCCESS;
2756
}
2757
2758
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2759
{
2760
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2761
2762
if (type == SLJIT_GP_REGISTER)
2763
return reg_map[reg];
2764
2765
if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
2766
return freg_map[reg];
2767
2768
if (type == SLJIT_SIMD_REG_128)
2769
return freg_map[reg] & ~0x1;
2770
2771
return -1;
2772
}
2773
2774
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2775
void *instruction, sljit_u32 size)
2776
{
2777
SLJIT_UNUSED_ARG(size);
2778
CHECK_ERROR();
2779
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2780
2781
return push_inst(compiler, *(sljit_ins*)instruction);
2782
}
2783
2784
/* --------------------------------------------------------------------- */
2785
/* Floating point operators */
2786
/* --------------------------------------------------------------------- */
2787
2788
#define FPU_LOAD (1 << 20)
2789
#define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
2790
((inst) | (sljit_ins)((add) << 23) | RN(base) | VD(freg) | (sljit_ins)(offs))
2791
2792
static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
2793
{
2794
sljit_uw imm;
2795
sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD));
2796
2797
SLJIT_ASSERT(arg & SLJIT_MEM);
2798
arg &= ~SLJIT_MEM;
2799
2800
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
2801
FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_ins)argw & 0x3) << 7)));
2802
arg = TMP_REG1;
2803
argw = 0;
2804
}
2805
2806
/* Fast loads and stores. */
2807
if (arg) {
2808
if (!(argw & ~0x3fc))
2809
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2));
2810
if (!(-argw & ~0x3fc))
2811
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2));
2812
2813
imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc);
2814
if (imm) {
2815
FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | imm));
2816
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2));
2817
}
2818
imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc);
2819
if (imm) {
2820
argw = -argw;
2821
FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg & REG_MASK) | imm));
2822
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2));
2823
}
2824
}
2825
2826
if (arg) {
2827
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw));
2828
FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(TMP_REG1)));
2829
}
2830
else
2831
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw));
2832
2833
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0));
2834
}
2835
2836
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2837
sljit_s32 dst, sljit_sw dstw,
2838
sljit_s32 src, sljit_sw srcw)
2839
{
2840
op ^= SLJIT_32;
2841
2842
if (src & SLJIT_MEM) {
2843
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw));
2844
src = TMP_FREG1;
2845
}
2846
2847
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_32, TMP_FREG1, src, 0)));
2848
2849
if (FAST_IS_REG(dst))
2850
return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | VN(TMP_FREG1));
2851
2852
/* Store the integer value from a VFP register. */
2853
return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
2854
}
2855
2856
static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
2857
sljit_s32 dst, sljit_sw dstw,
2858
sljit_s32 src, sljit_sw srcw)
2859
{
2860
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2861
2862
if (FAST_IS_REG(src))
2863
FAIL_IF(push_inst(compiler, VMOV | RD(src) | VN(TMP_FREG1)));
2864
else if (src & SLJIT_MEM) {
2865
/* Load the integer value into a VFP register. */
2866
FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
2867
}
2868
else {
2869
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
2870
FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | VN(TMP_FREG1)));
2871
}
2872
2873
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(ins, ins & SLJIT_32, dst_r, TMP_FREG1, 0)));
2874
2875
if (dst & SLJIT_MEM)
2876
return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw);
2877
return SLJIT_SUCCESS;
2878
}
2879
2880
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2881
sljit_s32 dst, sljit_sw dstw,
2882
sljit_s32 src, sljit_sw srcw)
2883
{
2884
return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2885
}
2886
2887
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2888
sljit_s32 dst, sljit_sw dstw,
2889
sljit_s32 src, sljit_sw srcw)
2890
{
2891
return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2892
}
2893
2894
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2895
sljit_s32 src1, sljit_sw src1w,
2896
sljit_s32 src2, sljit_sw src2w)
2897
{
2898
op ^= SLJIT_32;
2899
2900
if (src1 & SLJIT_MEM) {
2901
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2902
src1 = TMP_FREG1;
2903
}
2904
2905
if (src2 & SLJIT_MEM) {
2906
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2907
src2 = TMP_FREG2;
2908
}
2909
2910
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_32, src1, src2, 0)));
2911
FAIL_IF(push_inst(compiler, VMRS));
2912
2913
if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
2914
return SLJIT_SUCCESS;
2915
2916
return push_inst(compiler, (CMP - CONDITIONAL) | (0x60000000 /* VS */) | SET_FLAGS | RN(TMP_REG1) | RM(TMP_REG1));
2917
}
2918
2919
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2920
sljit_s32 dst, sljit_sw dstw,
2921
sljit_s32 src, sljit_sw srcw)
2922
{
2923
sljit_s32 dst_r;
2924
2925
CHECK_ERROR();
2926
2927
SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error);
2928
SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2929
2930
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2931
2932
if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
2933
op ^= SLJIT_32;
2934
2935
if (src & SLJIT_MEM) {
2936
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw));
2937
src = dst_r;
2938
}
2939
2940
switch (GET_OPCODE(op)) {
2941
case SLJIT_MOV_F64:
2942
if (src != dst_r) {
2943
if (!(dst & SLJIT_MEM))
2944
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_32, dst_r, src, 0)));
2945
else
2946
dst_r = src;
2947
}
2948
break;
2949
case SLJIT_NEG_F64:
2950
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_32, dst_r, src, 0)));
2951
break;
2952
case SLJIT_ABS_F64:
2953
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src, 0)));
2954
break;
2955
case SLJIT_CONV_F64_FROM_F32:
2956
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_32, dst_r, src, 0)));
2957
op ^= SLJIT_32;
2958
break;
2959
}
2960
2961
if (dst & SLJIT_MEM)
2962
return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw);
2963
return SLJIT_SUCCESS;
2964
}
2965
2966
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2967
sljit_s32 dst, sljit_sw dstw,
2968
sljit_s32 src1, sljit_sw src1w,
2969
sljit_s32 src2, sljit_sw src2w)
2970
{
2971
sljit_s32 dst_r;
2972
2973
CHECK_ERROR();
2974
CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2975
ADJUST_LOCAL_OFFSET(dst, dstw);
2976
ADJUST_LOCAL_OFFSET(src1, src1w);
2977
ADJUST_LOCAL_OFFSET(src2, src2w);
2978
2979
op ^= SLJIT_32;
2980
2981
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2982
2983
if (src2 & SLJIT_MEM) {
2984
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2985
src2 = TMP_FREG2;
2986
}
2987
2988
if (src1 & SLJIT_MEM) {
2989
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2990
src1 = TMP_FREG1;
2991
}
2992
2993
switch (GET_OPCODE(op)) {
2994
case SLJIT_ADD_F64:
2995
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_32, dst_r, src2, src1)));
2996
break;
2997
case SLJIT_SUB_F64:
2998
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_32, dst_r, src2, src1)));
2999
break;
3000
case SLJIT_MUL_F64:
3001
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_32, dst_r, src2, src1)));
3002
break;
3003
case SLJIT_DIV_F64:
3004
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_32, dst_r, src2, src1)));
3005
break;
3006
case SLJIT_COPYSIGN_F64:
3007
FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(src2) | RD(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0)));
3008
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src1, 0)));
3009
FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | SRC2_IMM | 0));
3010
return push_inst(compiler, EMIT_FPU_OPERATION((VNEG_F32 & ~COND_MASK) | 0xb0000000, op & SLJIT_32, dst_r, dst_r, 0));
3011
}
3012
3013
if (dst_r != dst)
3014
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw));
3015
3016
return SLJIT_SUCCESS;
3017
}
3018
3019
#undef EMIT_FPU_DATA_TRANSFER
3020
3021
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
3022
sljit_s32 freg, sljit_f32 value)
3023
{
3024
#if defined(__ARM_NEON) && __ARM_NEON
3025
sljit_u32 exp;
3026
sljit_ins ins;
3027
#endif /* NEON */
3028
union {
3029
sljit_u32 imm;
3030
sljit_f32 value;
3031
} u;
3032
3033
CHECK_ERROR();
3034
CHECK(check_sljit_emit_fset32(compiler, freg, value));
3035
3036
u.value = value;
3037
3038
#if defined(__ARM_NEON) && __ARM_NEON
3039
if ((u.imm << (32 - 19)) == 0) {
3040
exp = (u.imm >> (23 + 2)) & 0x3f;
3041
3042
if (exp == 0x20 || exp == 0x1f) {
3043
ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f);
3044
return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
3045
}
3046
}
3047
#endif /* NEON */
3048
3049
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
3050
return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG1));
3051
}
3052
3053
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
3054
sljit_s32 freg, sljit_f64 value)
3055
{
3056
#if defined(__ARM_NEON) && __ARM_NEON
3057
sljit_u32 exp;
3058
sljit_ins ins;
3059
#endif /* NEON */
3060
union {
3061
sljit_u32 imm[2];
3062
sljit_f64 value;
3063
} u;
3064
3065
CHECK_ERROR();
3066
CHECK(check_sljit_emit_fset64(compiler, freg, value));
3067
3068
u.value = value;
3069
3070
#if defined(__ARM_NEON) && __ARM_NEON
3071
if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
3072
exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
3073
3074
if (exp == 0x100 || exp == 0xff) {
3075
ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f);
3076
return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
3077
}
3078
}
3079
#endif /* NEON */
3080
3081
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
3082
if (u.imm[0] == u.imm[1])
3083
return push_inst(compiler, VMOV2 | RN(TMP_REG1) | RD(TMP_REG1) | VM(freg));
3084
3085
FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1]));
3086
return push_inst(compiler, VMOV2 | RN(TMP_REG2) | RD(TMP_REG1) | VM(freg));
3087
}
3088
3089
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
3090
sljit_s32 freg, sljit_s32 reg)
3091
{
3092
sljit_s32 reg2;
3093
sljit_ins inst;
3094
3095
CHECK_ERROR();
3096
CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
3097
3098
if (reg & REG_PAIR_MASK) {
3099
reg2 = REG_PAIR_SECOND(reg);
3100
reg = REG_PAIR_FIRST(reg);
3101
3102
inst = VMOV2 | RN(reg) | RD(reg2) | VM(freg);
3103
} else {
3104
inst = VMOV | VN(freg) | RD(reg);
3105
3106
if (!(op & SLJIT_32))
3107
inst |= 1 << 7;
3108
}
3109
3110
if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64)
3111
inst |= 1 << 20;
3112
3113
return push_inst(compiler, inst);
3114
}
3115
3116
/* --------------------------------------------------------------------- */
3117
/* Conditional instructions */
3118
/* --------------------------------------------------------------------- */
3119
3120
static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
3121
{
3122
switch (type) {
3123
case SLJIT_EQUAL:
3124
case SLJIT_ATOMIC_STORED:
3125
case SLJIT_F_EQUAL:
3126
case SLJIT_ORDERED_EQUAL:
3127
case SLJIT_UNORDERED_OR_EQUAL:
3128
return 0x00000000;
3129
3130
case SLJIT_NOT_EQUAL:
3131
case SLJIT_ATOMIC_NOT_STORED:
3132
case SLJIT_F_NOT_EQUAL:
3133
case SLJIT_UNORDERED_OR_NOT_EQUAL:
3134
case SLJIT_ORDERED_NOT_EQUAL:
3135
return 0x10000000;
3136
3137
case SLJIT_CARRY:
3138
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
3139
return 0x20000000;
3140
SLJIT_FALLTHROUGH
3141
3142
case SLJIT_LESS:
3143
return 0x30000000;
3144
3145
case SLJIT_NOT_CARRY:
3146
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
3147
return 0x30000000;
3148
SLJIT_FALLTHROUGH
3149
3150
case SLJIT_GREATER_EQUAL:
3151
return 0x20000000;
3152
3153
case SLJIT_GREATER:
3154
case SLJIT_UNORDERED_OR_GREATER:
3155
return 0x80000000;
3156
3157
case SLJIT_LESS_EQUAL:
3158
case SLJIT_F_LESS_EQUAL:
3159
case SLJIT_ORDERED_LESS_EQUAL:
3160
return 0x90000000;
3161
3162
case SLJIT_SIG_LESS:
3163
case SLJIT_UNORDERED_OR_LESS:
3164
return 0xb0000000;
3165
3166
case SLJIT_SIG_GREATER_EQUAL:
3167
case SLJIT_F_GREATER_EQUAL:
3168
case SLJIT_ORDERED_GREATER_EQUAL:
3169
return 0xa0000000;
3170
3171
case SLJIT_SIG_GREATER:
3172
case SLJIT_F_GREATER:
3173
case SLJIT_ORDERED_GREATER:
3174
return 0xc0000000;
3175
3176
case SLJIT_SIG_LESS_EQUAL:
3177
case SLJIT_UNORDERED_OR_LESS_EQUAL:
3178
return 0xd0000000;
3179
3180
case SLJIT_OVERFLOW:
3181
if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
3182
return 0x10000000;
3183
SLJIT_FALLTHROUGH
3184
3185
case SLJIT_UNORDERED:
3186
return 0x60000000;
3187
3188
case SLJIT_NOT_OVERFLOW:
3189
if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
3190
return 0x00000000;
3191
SLJIT_FALLTHROUGH
3192
3193
case SLJIT_ORDERED:
3194
return 0x70000000;
3195
3196
case SLJIT_F_LESS:
3197
case SLJIT_ORDERED_LESS:
3198
return 0x40000000;
3199
3200
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
3201
return 0x50000000;
3202
3203
default:
3204
SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_REG_ARG);
3205
return 0xe0000000;
3206
}
3207
}
3208
3209
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3210
{
3211
struct sljit_label *label;
3212
3213
CHECK_ERROR_PTR();
3214
CHECK_PTR(check_sljit_emit_label(compiler));
3215
3216
if (compiler->last_label && compiler->last_label->size == compiler->size)
3217
return compiler->last_label;
3218
3219
label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3220
PTR_FAIL_IF(!label);
3221
set_label(label, compiler);
3222
return label;
3223
}
3224
3225
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_aligned_label(struct sljit_compiler *compiler,
3226
sljit_s32 alignment, struct sljit_read_only_buffer *buffers)
3227
{
3228
sljit_uw mask, i;
3229
struct sljit_label *label;
3230
struct sljit_label *next_label;
3231
struct sljit_extended_label *ext_label;
3232
3233
CHECK_ERROR_PTR();
3234
CHECK_PTR(check_sljit_emit_aligned_label(compiler, alignment, buffers));
3235
3236
sljit_reset_read_only_buffers(buffers);
3237
3238
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
3239
if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY))
3240
PTR_FAIL_IF(push_cpool(compiler));
3241
#endif /* SLJIT_CONFIG_ARM_V6 */
3242
3243
if (alignment <= SLJIT_LABEL_ALIGN_4) {
3244
SLJIT_SKIP_CHECKS(compiler);
3245
label = sljit_emit_label(compiler);
3246
PTR_FAIL_IF(!label);
3247
} else {
3248
/* The used space is filled with NOPs. */
3249
mask = ((sljit_uw)1 << alignment) - sizeof(sljit_ins);
3250
3251
for (i = (mask >> 2); i != 0; i--)
3252
PTR_FAIL_IF(push_inst(compiler, NOP));
3253
3254
ext_label = (struct sljit_extended_label*)ensure_abuf(compiler, sizeof(struct sljit_extended_label));
3255
PTR_FAIL_IF(!ext_label);
3256
set_extended_label(ext_label, compiler, SLJIT_LABEL_ALIGNED, mask);
3257
label = &ext_label->label;
3258
}
3259
3260
if (buffers == NULL)
3261
return label;
3262
3263
next_label = label;
3264
3265
while (1) {
3266
buffers->u.label = next_label;
3267
3268
for (i = (buffers->size + 3) >> 2; i > 0; i--)
3269
PTR_FAIL_IF(push_inst(compiler, NOP));
3270
3271
buffers = buffers->next;
3272
3273
if (buffers == NULL)
3274
break;
3275
3276
SLJIT_SKIP_CHECKS(compiler);
3277
next_label = sljit_emit_label(compiler);
3278
PTR_FAIL_IF(!next_label);
3279
}
3280
3281
return label;
3282
}
3283
3284
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3285
{
3286
struct sljit_jump *jump;
3287
3288
CHECK_ERROR_PTR();
3289
CHECK_PTR(check_sljit_emit_jump(compiler, type));
3290
3291
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3292
PTR_FAIL_IF(!jump);
3293
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3294
type &= 0xff;
3295
3296
SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
3297
3298
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
3299
if (type >= SLJIT_FAST_CALL)
3300
PTR_FAIL_IF(prepare_blx(compiler));
3301
3302
PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3303
type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(compiler, type), 0));
3304
jump->addr = compiler->size - 1;
3305
3306
if (jump->flags & SLJIT_REWRITABLE_JUMP)
3307
compiler->patches++;
3308
3309
if (type >= SLJIT_FAST_CALL) {
3310
jump->flags |= IS_BL;
3311
jump->addr = compiler->size;
3312
PTR_FAIL_IF(emit_blx(compiler));
3313
}
3314
#else /* !SLJIT_CONFIG_ARM_V6 */
3315
jump->addr = compiler->size;
3316
if (type >= SLJIT_FAST_CALL)
3317
jump->flags |= IS_BL;
3318
PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(compiler, type)));
3319
compiler->size += JUMP_MAX_SIZE - 1;
3320
#endif /* SLJIT_CONFIG_ARM_V6 */
3321
return jump;
3322
}
3323
3324
#ifdef __SOFTFP__
3325
3326
static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space)
3327
{
3328
sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN;
3329
sljit_u32 offset = 0;
3330
sljit_u32 word_arg_offset = 0;
3331
sljit_u32 src_offset = 4 * sizeof(sljit_sw);
3332
sljit_u32 float_arg_count = 0;
3333
sljit_s32 types = 0;
3334
sljit_u8 offsets[4];
3335
sljit_u8 *offset_ptr = offsets;
3336
3337
if (src && FAST_IS_REG(*src))
3338
src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw);
3339
3340
arg_types >>= SLJIT_ARG_SHIFT;
3341
3342
while (arg_types) {
3343
types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
3344
3345
switch (arg_types & SLJIT_ARG_MASK) {
3346
case SLJIT_ARG_TYPE_F64:
3347
if (offset & 0x7)
3348
offset += sizeof(sljit_sw);
3349
*offset_ptr++ = (sljit_u8)offset;
3350
offset += sizeof(sljit_f64);
3351
float_arg_count++;
3352
break;
3353
case SLJIT_ARG_TYPE_F32:
3354
*offset_ptr++ = (sljit_u8)offset;
3355
offset += sizeof(sljit_f32);
3356
float_arg_count++;
3357
break;
3358
default:
3359
*offset_ptr++ = (sljit_u8)offset;
3360
offset += sizeof(sljit_sw);
3361
word_arg_offset += sizeof(sljit_sw);
3362
break;
3363
}
3364
3365
arg_types >>= SLJIT_ARG_SHIFT;
3366
}
3367
3368
if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) {
3369
/* Keep lr register on the stack. */
3370
if (is_tail_call)
3371
offset += sizeof(sljit_sw);
3372
3373
offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_u32)0x7;
3374
3375
*extra_space = offset;
3376
3377
if (is_tail_call)
3378
FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset));
3379
else
3380
FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | offset));
3381
} else {
3382
if (is_tail_call)
3383
FAIL_IF(emit_stack_frame_release(compiler, -1));
3384
*extra_space = 0;
3385
}
3386
3387
/* Process arguments in reversed direction. */
3388
while (types) {
3389
switch (types & SLJIT_ARG_MASK) {
3390
case SLJIT_ARG_TYPE_F64:
3391
float_arg_count--;
3392
offset = *(--offset_ptr);
3393
3394
SLJIT_ASSERT((offset & 0x7) == 0);
3395
3396
if (offset < 4 * sizeof(sljit_sw)) {
3397
if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) {
3398
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3399
*src = TMP_REG1;
3400
}
3401
FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
3402
} else
3403
FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP)
3404
| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
3405
break;
3406
case SLJIT_ARG_TYPE_F32:
3407
float_arg_count--;
3408
offset = *(--offset_ptr);
3409
3410
if (offset < 4 * sizeof(sljit_sw)) {
3411
if (src_offset == offset) {
3412
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3413
*src = TMP_REG1;
3414
}
3415
FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10)));
3416
} else
3417
FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP)
3418
| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
3419
break;
3420
default:
3421
word_arg_offset -= sizeof(sljit_sw);
3422
offset = *(--offset_ptr);
3423
3424
SLJIT_ASSERT(offset >= word_arg_offset);
3425
3426
if (offset != word_arg_offset) {
3427
if (offset < 4 * sizeof(sljit_sw)) {
3428
if (src_offset == offset) {
3429
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3430
*src = TMP_REG1;
3431
}
3432
else if (src_offset == word_arg_offset) {
3433
*src = (sljit_s32)(SLJIT_R0 + (offset >> 2));
3434
src_offset = offset;
3435
}
3436
FAIL_IF(push_inst(compiler, MOV | (offset << 10) | (word_arg_offset >> 2)));
3437
} else
3438
FAIL_IF(push_inst(compiler, STR | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (offset - 4 * sizeof(sljit_sw))));
3439
}
3440
break;
3441
}
3442
3443
types >>= SLJIT_ARG_SHIFT;
3444
}
3445
3446
return SLJIT_SUCCESS;
3447
}
3448
3449
static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
3450
{
3451
if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
3452
FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
3453
if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32)
3454
FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12)));
3455
3456
return SLJIT_SUCCESS;
3457
}
3458
3459
#else /* !__SOFTFP__ */
3460
3461
static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
3462
{
3463
sljit_u32 offset = SLJIT_FR0;
3464
sljit_u32 new_offset = SLJIT_FR0;
3465
sljit_u32 f32_offset = 0;
3466
3467
/* Remove return value. */
3468
arg_types >>= SLJIT_ARG_SHIFT;
3469
3470
while (arg_types) {
3471
switch (arg_types & SLJIT_ARG_MASK) {
3472
case SLJIT_ARG_TYPE_F64:
3473
if (offset != new_offset)
3474
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3475
SLJIT_32, new_offset, offset, 0)));
3476
3477
new_offset++;
3478
offset++;
3479
break;
3480
case SLJIT_ARG_TYPE_F32:
3481
if (f32_offset != 0) {
3482
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3483
0x400000, f32_offset, offset, 0)));
3484
f32_offset = 0;
3485
} else {
3486
if (offset != new_offset)
3487
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3488
0, new_offset, offset, 0)));
3489
f32_offset = new_offset;
3490
new_offset++;
3491
}
3492
offset++;
3493
break;
3494
}
3495
arg_types >>= SLJIT_ARG_SHIFT;
3496
}
3497
3498
return SLJIT_SUCCESS;
3499
}
3500
3501
#endif /* __SOFTFP__ */
3502
3503
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3504
sljit_s32 arg_types)
3505
{
3506
#ifdef __SOFTFP__
3507
struct sljit_jump *jump;
3508
sljit_u32 extra_space = (sljit_u32)type;
3509
#endif
3510
3511
CHECK_ERROR_PTR();
3512
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3513
3514
#ifdef __SOFTFP__
3515
if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3516
PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space));
3517
SLJIT_ASSERT((extra_space & 0x7) == 0);
3518
3519
if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3520
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3521
3522
SLJIT_SKIP_CHECKS(compiler);
3523
jump = sljit_emit_jump(compiler, type);
3524
PTR_FAIL_IF(jump == NULL);
3525
3526
if (extra_space > 0) {
3527
if (type & SLJIT_CALL_RETURN)
3528
PTR_FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3529
TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw))));
3530
3531
PTR_FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space));
3532
3533
if (type & SLJIT_CALL_RETURN) {
3534
PTR_FAIL_IF(push_inst(compiler, BX | RM(TMP_REG2)));
3535
return jump;
3536
}
3537
}
3538
3539
SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3540
PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
3541
return jump;
3542
}
3543
#endif /* __SOFTFP__ */
3544
3545
if (type & SLJIT_CALL_RETURN) {
3546
PTR_FAIL_IF(emit_stack_frame_release(compiler, -1));
3547
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3548
}
3549
3550
#ifndef __SOFTFP__
3551
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3552
PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3553
#endif /* !__SOFTFP__ */
3554
3555
SLJIT_SKIP_CHECKS(compiler);
3556
return sljit_emit_jump(compiler, type);
3557
}
3558
3559
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3560
{
3561
struct sljit_jump *jump;
3562
3563
CHECK_ERROR();
3564
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3565
ADJUST_LOCAL_OFFSET(src, srcw);
3566
3567
SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
3568
3569
if (src != SLJIT_IMM) {
3570
if (FAST_IS_REG(src)) {
3571
SLJIT_ASSERT(reg_map[src] != 14);
3572
return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
3573
}
3574
3575
SLJIT_ASSERT(src & SLJIT_MEM);
3576
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
3577
return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1));
3578
}
3579
3580
/* These jumps are converted to jump/call instructions when possible. */
3581
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3582
FAIL_IF(!jump);
3583
set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
3584
jump->u.target = (sljit_uw)srcw;
3585
3586
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
3587
if (type >= SLJIT_FAST_CALL)
3588
FAIL_IF(prepare_blx(compiler));
3589
jump->addr = compiler->size;
3590
FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
3591
if (type >= SLJIT_FAST_CALL) {
3592
jump->addr = compiler->size;
3593
FAIL_IF(emit_blx(compiler));
3594
}
3595
#else /* !SLJIT_CONFIG_ARM_V6 */
3596
jump->addr = compiler->size;
3597
FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
3598
compiler->size += JUMP_MAX_SIZE - 1;
3599
#endif /* SLJIT_CONFIG_ARM_V6 */
3600
return SLJIT_SUCCESS;
3601
}
3602
3603
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3604
sljit_s32 arg_types,
3605
sljit_s32 src, sljit_sw srcw)
3606
{
3607
#ifdef __SOFTFP__
3608
sljit_u32 extra_space = (sljit_u32)type;
3609
#endif
3610
3611
CHECK_ERROR();
3612
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3613
3614
if (src & SLJIT_MEM) {
3615
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
3616
src = TMP_REG1;
3617
}
3618
3619
if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
3620
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
3621
src = TMP_REG1;
3622
}
3623
3624
#ifdef __SOFTFP__
3625
if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3626
FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space));
3627
SLJIT_ASSERT((extra_space & 0x7) == 0);
3628
3629
if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3630
type = SLJIT_JUMP;
3631
3632
SLJIT_SKIP_CHECKS(compiler);
3633
FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
3634
3635
if (extra_space > 0) {
3636
if (type & SLJIT_CALL_RETURN)
3637
FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3638
TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw))));
3639
3640
FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space));
3641
3642
if (type & SLJIT_CALL_RETURN)
3643
return push_inst(compiler, BX | RM(TMP_REG2));
3644
}
3645
3646
SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3647
return softfloat_post_call_with_args(compiler, arg_types);
3648
}
3649
#endif /* __SOFTFP__ */
3650
3651
if (type & SLJIT_CALL_RETURN) {
3652
FAIL_IF(emit_stack_frame_release(compiler, -1));
3653
type = SLJIT_JUMP;
3654
}
3655
3656
#ifndef __SOFTFP__
3657
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3658
FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3659
#endif /* !__SOFTFP__ */
3660
3661
SLJIT_SKIP_CHECKS(compiler);
3662
return sljit_emit_ijump(compiler, type, src, srcw);
3663
}
3664
3665
#ifdef __SOFTFP__
3666
3667
static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
3668
{
3669
if (compiler->options & SLJIT_ENTER_REG_ARG) {
3670
if (src == SLJIT_FR0)
3671
return SLJIT_SUCCESS;
3672
3673
SLJIT_SKIP_CHECKS(compiler);
3674
return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
3675
}
3676
3677
if (FAST_IS_REG(src)) {
3678
if (op & SLJIT_32)
3679
return push_inst(compiler, VMOV | (1 << 20) | RD(SLJIT_R0) | VN(src));
3680
return push_inst(compiler, VMOV2 | (1 << 20) | RD(SLJIT_R0) | RN(SLJIT_R1) | VM(src));
3681
}
3682
3683
SLJIT_SKIP_CHECKS(compiler);
3684
3685
if (op & SLJIT_32)
3686
return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw);
3687
return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw);
3688
}
3689
3690
#endif /* __SOFTFP__ */
3691
3692
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3693
sljit_s32 dst, sljit_sw dstw,
3694
sljit_s32 type)
3695
{
3696
sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op);
3697
sljit_ins cc, ins;
3698
3699
CHECK_ERROR();
3700
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3701
ADJUST_LOCAL_OFFSET(dst, dstw);
3702
3703
op = GET_OPCODE(op);
3704
cc = get_cc(compiler, type);
3705
dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3706
3707
if (op < SLJIT_ADD) {
3708
FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | SRC2_IMM | 0));
3709
FAIL_IF(push_inst(compiler, ((MOV | RD(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
3710
if (dst & SLJIT_MEM)
3711
return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
3712
return SLJIT_SUCCESS;
3713
}
3714
3715
ins = (op == SLJIT_AND ? AND : (op == SLJIT_OR ? ORR : EOR));
3716
3717
if (dst & SLJIT_MEM)
3718
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
3719
3720
FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
3721
3722
if (op == SLJIT_AND)
3723
FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
3724
3725
if (dst & SLJIT_MEM)
3726
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
3727
3728
if (flags & SLJIT_SET_Z)
3729
return push_inst(compiler, MOV | SET_FLAGS | RD(TMP_REG2) | RM(dst_reg));
3730
return SLJIT_SUCCESS;
3731
}
3732
3733
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3734
sljit_s32 dst_reg,
3735
sljit_s32 src1, sljit_sw src1w,
3736
sljit_s32 src2_reg)
3737
{
3738
sljit_ins cc, tmp, tmp2;
3739
3740
CHECK_ERROR();
3741
CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3742
3743
ADJUST_LOCAL_OFFSET(src1, src1w);
3744
3745
if (src2_reg != dst_reg && src1 == dst_reg) {
3746
src1 = src2_reg;
3747
src1w = 0;
3748
src2_reg = dst_reg;
3749
if (!(type & SLJIT_COMPARE_SELECT))
3750
type ^= 0x1;
3751
}
3752
3753
if (src1 & SLJIT_MEM) {
3754
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG1));
3755
3756
if (src2_reg != dst_reg) {
3757
src1 = src2_reg;
3758
src1w = 0;
3759
if (!(type & SLJIT_COMPARE_SELECT))
3760
type ^= 0x1;
3761
} else {
3762
src1 = TMP_REG1;
3763
src1w = 0;
3764
}
3765
} else if (dst_reg != src2_reg)
3766
FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src2_reg)));
3767
3768
if (type & SLJIT_COMPARE_SELECT)
3769
type ^= 0x1;
3770
3771
cc = get_cc(compiler, type & ~(SLJIT_32 | SLJIT_COMPARE_SELECT));
3772
3773
if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {
3774
tmp = get_imm((sljit_uw)src1w);
3775
if (tmp) {
3776
if (type & SLJIT_COMPARE_SELECT)
3777
FAIL_IF(push_inst(compiler, (CMP | SET_FLAGS | RN(dst_reg) | tmp)));
3778
return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
3779
}
3780
3781
tmp = get_imm(~(sljit_uw)src1w);
3782
if (tmp && (type & SLJIT_COMPARE_SELECT)) {
3783
tmp2 = get_imm((sljit_uw)-src1w);
3784
if (tmp2)
3785
FAIL_IF(push_inst(compiler, (CMN | SET_FLAGS | RN(dst_reg) | tmp2)));
3786
else
3787
tmp = 0;
3788
}
3789
3790
if (tmp)
3791
return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
3792
3793
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
3794
if (!(type & SLJIT_COMPARE_SELECT)) {
3795
tmp = (sljit_ins)src1w;
3796
FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff)));
3797
if (tmp <= 0xffff)
3798
return SLJIT_SUCCESS;
3799
return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff));
3800
}
3801
#endif /* SLJIT_CONFIG_ARM_V7 */
3802
3803
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
3804
src1 = TMP_REG1;
3805
}
3806
3807
if (type & SLJIT_COMPARE_SELECT)
3808
FAIL_IF(push_inst(compiler, (CMP | SET_FLAGS | RN(dst_reg) | RM(src1))));
3809
3810
return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src1)) & ~COND_MASK) | cc);
3811
}
3812
3813
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3814
sljit_s32 dst_freg,
3815
sljit_s32 src1, sljit_sw src1w,
3816
sljit_s32 src2_freg)
3817
{
3818
sljit_ins cc;
3819
3820
CHECK_ERROR();
3821
CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3822
3823
ADJUST_LOCAL_OFFSET(src1, src1w);
3824
3825
type ^= SLJIT_32;
3826
3827
if (dst_freg != src2_freg) {
3828
if (dst_freg == src1) {
3829
src1 = src2_freg;
3830
src1w = 0;
3831
type ^= 0x1;
3832
} else
3833
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, (type & SLJIT_32), dst_freg, src2_freg, 0)));
3834
}
3835
3836
if (src1 & SLJIT_MEM) {
3837
FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG2, src1, src1w));
3838
src1 = TMP_FREG2;
3839
}
3840
3841
cc = get_cc(compiler, type & ~SLJIT_32);
3842
return push_inst(compiler, EMIT_FPU_OPERATION((VMOV_F32 & ~COND_MASK) | cc, (type & SLJIT_32), dst_freg, src1, 0));
3843
}
3844
3845
#undef EMIT_FPU_OPERATION
3846
3847
static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset)
3848
{
3849
sljit_s32 arg = *mem;
3850
sljit_sw argw = *memw;
3851
sljit_uw imm, tmp;
3852
sljit_sw mask = 0xfff;
3853
sljit_sw sign = 0x1000;
3854
3855
SLJIT_ASSERT(max_offset >= 0xf00);
3856
3857
*mem = TMP_REG1;
3858
3859
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
3860
*memw = 0;
3861
return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)(argw & 0x3) << 7));
3862
}
3863
3864
arg &= REG_MASK;
3865
3866
if (arg) {
3867
if (argw <= max_offset && argw >= -mask) {
3868
*mem = arg;
3869
return SLJIT_SUCCESS;
3870
}
3871
3872
if (argw >= 0) {
3873
tmp = (sljit_uw)(argw & (sign | mask));
3874
tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask);
3875
imm = get_imm(tmp);
3876
3877
if (imm) {
3878
*memw = argw - (sljit_sw)tmp;
3879
SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset);
3880
3881
return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | imm);
3882
}
3883
} else {
3884
tmp = (sljit_uw)(-argw & (sign | mask));
3885
tmp = (sljit_uw)((-argw + ((tmp <= (sljit_uw)((sign << 1) - max_offset - 1)) ? 0 : sign)) & ~mask);
3886
imm = get_imm(tmp);
3887
3888
if (imm) {
3889
*memw = argw + (sljit_sw)tmp;
3890
SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset);
3891
3892
return push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg) | imm);
3893
}
3894
}
3895
}
3896
3897
tmp = (sljit_uw)(argw & (sign | mask));
3898
tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask);
3899
*memw = argw - (sljit_sw)tmp;
3900
3901
FAIL_IF(load_immediate(compiler, TMP_REG1, tmp));
3902
3903
if (arg == 0)
3904
return SLJIT_SUCCESS;
3905
3906
return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(arg));
3907
}
3908
3909
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3910
sljit_s32 reg,
3911
sljit_s32 mem, sljit_sw memw)
3912
{
3913
sljit_s32 flags;
3914
3915
CHECK_ERROR();
3916
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3917
3918
if (!(reg & REG_PAIR_MASK))
3919
return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3920
3921
ADJUST_LOCAL_OFFSET(mem, memw);
3922
3923
FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3924
3925
flags = WORD_SIZE;
3926
3927
if (!(type & SLJIT_MEM_STORE)) {
3928
if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) {
3929
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1));
3930
return emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1);
3931
}
3932
3933
flags = WORD_SIZE | LOAD_DATA;
3934
}
3935
3936
FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1));
3937
return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1);
3938
}
3939
3940
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
3941
sljit_s32 reg,
3942
sljit_s32 mem, sljit_sw memw)
3943
{
3944
sljit_s32 flags;
3945
sljit_ins is_type1_transfer, inst;
3946
3947
CHECK_ERROR();
3948
CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
3949
3950
is_type1_transfer = 1;
3951
3952
switch (type & 0xff) {
3953
case SLJIT_MOV:
3954
case SLJIT_MOV_U32:
3955
case SLJIT_MOV_S32:
3956
case SLJIT_MOV32:
3957
case SLJIT_MOV_P:
3958
flags = WORD_SIZE;
3959
break;
3960
case SLJIT_MOV_U8:
3961
flags = BYTE_SIZE;
3962
break;
3963
case SLJIT_MOV_S8:
3964
if (!(type & SLJIT_MEM_STORE))
3965
is_type1_transfer = 0;
3966
flags = BYTE_SIZE | SIGNED;
3967
break;
3968
case SLJIT_MOV_U16:
3969
is_type1_transfer = 0;
3970
flags = HALF_SIZE;
3971
break;
3972
case SLJIT_MOV_S16:
3973
is_type1_transfer = 0;
3974
flags = HALF_SIZE | SIGNED;
3975
break;
3976
default:
3977
SLJIT_UNREACHABLE();
3978
flags = WORD_SIZE;
3979
break;
3980
}
3981
3982
if (!(type & SLJIT_MEM_STORE))
3983
flags |= LOAD_DATA;
3984
3985
SLJIT_ASSERT(is_type1_transfer == !!IS_TYPE1_TRANSFER(flags));
3986
3987
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3988
if (!is_type1_transfer && memw != 0)
3989
return SLJIT_ERR_UNSUPPORTED;
3990
} else {
3991
if (is_type1_transfer) {
3992
if (memw > 4095 || memw < -4095)
3993
return SLJIT_ERR_UNSUPPORTED;
3994
} else if (memw > 255 || memw < -255)
3995
return SLJIT_ERR_UNSUPPORTED;
3996
}
3997
3998
if (type & SLJIT_MEM_SUPP)
3999
return SLJIT_SUCCESS;
4000
4001
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
4002
memw &= 0x3;
4003
4004
inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_ins)memw << 7));
4005
4006
if (is_type1_transfer)
4007
inst |= (1 << 25);
4008
4009
if (type & SLJIT_MEM_POST)
4010
inst ^= (1 << 24);
4011
else
4012
inst |= (1 << 21);
4013
4014
return push_inst(compiler, inst);
4015
}
4016
4017
inst = EMIT_DATA_TRANSFER(flags, 0, reg, mem & REG_MASK, 0);
4018
4019
if (type & SLJIT_MEM_POST)
4020
inst ^= (1 << 24);
4021
else
4022
inst |= (1 << 21);
4023
4024
if (is_type1_transfer) {
4025
if (memw >= 0)
4026
inst |= (1 << 23);
4027
else
4028
memw = -memw;
4029
4030
return push_inst(compiler, inst | (sljit_ins)memw);
4031
}
4032
4033
if (memw >= 0)
4034
inst |= (1 << 23);
4035
else
4036
memw = -memw;
4037
4038
return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_ins)memw));
4039
}
4040
4041
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
4042
sljit_s32 freg,
4043
sljit_s32 mem, sljit_sw memw)
4044
{
4045
CHECK_ERROR();
4046
CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
4047
4048
if (type & SLJIT_MEM_ALIGNED_32)
4049
return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw);
4050
4051
if (type & SLJIT_MEM_STORE) {
4052
FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2)));
4053
4054
if (type & SLJIT_32)
4055
return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1);
4056
4057
FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
4058
mem |= SLJIT_MEM;
4059
4060
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
4061
FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2)));
4062
return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw + 4, TMP_REG1);
4063
}
4064
4065
if (type & SLJIT_32) {
4066
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
4067
return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG2));
4068
}
4069
4070
FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
4071
mem |= SLJIT_MEM;
4072
4073
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
4074
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, mem, memw + 4, TMP_REG1));
4075
return push_inst(compiler, VMOV2 | VM(freg) | RD(TMP_REG2) | RN(TMP_REG1));
4076
}
4077
4078
static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
4079
{
4080
sljit_s32 mem = *mem_ptr;
4081
sljit_uw imm;
4082
4083
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
4084
*mem_ptr = TMP_REG1;
4085
return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 7));
4086
}
4087
4088
if (SLJIT_UNLIKELY(!(mem & REG_MASK))) {
4089
*mem_ptr = TMP_REG1;
4090
return load_immediate(compiler, TMP_REG1, (sljit_uw)memw);
4091
}
4092
4093
mem &= REG_MASK;
4094
4095
if (memw == 0) {
4096
*mem_ptr = mem;
4097
return SLJIT_SUCCESS;
4098
}
4099
4100
*mem_ptr = TMP_REG1;
4101
imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw));
4102
4103
if (imm != 0)
4104
return push_inst(compiler, ((memw < 0) ? SUB : ADD) | RD(TMP_REG1) | RN(mem) | imm);
4105
4106
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
4107
return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem));
4108
}
4109
4110
static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
4111
{
4112
freg += freg & 0x1;
4113
4114
SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
4115
4116
if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
4117
freg--;
4118
4119
return freg;
4120
}
4121
4122
#define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
4123
4124
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
4125
sljit_s32 vreg,
4126
sljit_s32 srcdst, sljit_sw srcdstw)
4127
{
4128
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4129
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4130
sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4131
sljit_ins ins;
4132
4133
CHECK_ERROR();
4134
CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));
4135
4136
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4137
4138
if (reg_size != 3 && reg_size != 4)
4139
return SLJIT_ERR_UNSUPPORTED;
4140
4141
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4142
return SLJIT_ERR_UNSUPPORTED;
4143
4144
if (type & SLJIT_SIMD_TEST)
4145
return SLJIT_SUCCESS;
4146
4147
if (reg_size == 4)
4148
vreg = simd_get_quad_reg_index(vreg);
4149
4150
if (!(srcdst & SLJIT_MEM)) {
4151
if (reg_size == 4)
4152
srcdst = simd_get_quad_reg_index(srcdst);
4153
4154
if (type & SLJIT_SIMD_STORE)
4155
ins = VD(srcdst) | VN(vreg) | VM(vreg);
4156
else
4157
ins = VD(vreg) | VN(srcdst) | VM(srcdst);
4158
4159
if (reg_size == 4)
4160
ins |= (sljit_ins)1 << 6;
4161
4162
return push_inst(compiler, VORR | ins);
4163
}
4164
4165
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
4166
4167
if (elem_size > 3)
4168
elem_size = 3;
4169
4170
ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD(vreg)
4171
| (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
4172
4173
SLJIT_ASSERT(reg_size >= alignment);
4174
4175
if (alignment == 3)
4176
ins |= 0x10;
4177
else if (alignment >= 3)
4178
ins |= 0x20;
4179
4180
return push_inst(compiler, ins | RN(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf);
4181
}
4182
4183
static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
4184
{
4185
sljit_ins result;
4186
4187
if (elem_size > 1 && (sljit_u16)value == (value >> 16)) {
4188
elem_size = 1;
4189
value = (sljit_u16)value;
4190
}
4191
4192
if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
4193
elem_size = 0;
4194
value = (sljit_u8)value;
4195
}
4196
4197
switch (elem_size) {
4198
case 0:
4199
SLJIT_ASSERT(value <= 0xff);
4200
result = 0xe00;
4201
break;
4202
case 1:
4203
SLJIT_ASSERT(value <= 0xffff);
4204
result = 0;
4205
4206
while (1) {
4207
if (value <= 0xff) {
4208
result |= 0x800;
4209
break;
4210
}
4211
4212
if ((value & 0xff) == 0) {
4213
value >>= 8;
4214
result |= 0xa00;
4215
break;
4216
}
4217
4218
if (result != 0)
4219
return ~(sljit_ins)0;
4220
4221
value ^= (sljit_uw)0xffff;
4222
result = (1 << 5);
4223
}
4224
break;
4225
default:
4226
SLJIT_ASSERT(value <= 0xffffffff);
4227
result = 0;
4228
4229
while (1) {
4230
if (value <= 0xff) {
4231
result |= 0x000;
4232
break;
4233
}
4234
4235
if ((value & ~(sljit_uw)0xff00) == 0) {
4236
value >>= 8;
4237
result |= 0x200;
4238
break;
4239
}
4240
4241
if ((value & ~(sljit_uw)0xff0000) == 0) {
4242
value >>= 16;
4243
result |= 0x400;
4244
break;
4245
}
4246
4247
if ((value & ~(sljit_uw)0xff000000) == 0) {
4248
value >>= 24;
4249
result |= 0x600;
4250
break;
4251
}
4252
4253
if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
4254
value >>= 8;
4255
result |= 0xc00;
4256
break;
4257
}
4258
4259
if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
4260
value >>= 16;
4261
result |= 0xd00;
4262
break;
4263
}
4264
4265
if (result != 0)
4266
return ~(sljit_ins)0;
4267
4268
value = ~value;
4269
result = (1 << 5);
4270
}
4271
break;
4272
}
4273
4274
return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 17) | result;
4275
}
4276
4277
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4278
sljit_s32 vreg,
4279
sljit_s32 src, sljit_sw srcw)
4280
{
4281
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4282
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4283
sljit_ins ins, imm;
4284
4285
CHECK_ERROR();
4286
CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));
4287
4288
ADJUST_LOCAL_OFFSET(src, srcw);
4289
4290
if (reg_size != 3 && reg_size != 4)
4291
return SLJIT_ERR_UNSUPPORTED;
4292
4293
if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
4294
return SLJIT_ERR_UNSUPPORTED;
4295
4296
if (type & SLJIT_SIMD_TEST)
4297
return SLJIT_SUCCESS;
4298
4299
if (reg_size == 4)
4300
vreg = simd_get_quad_reg_index(vreg);
4301
4302
if (src == SLJIT_IMM && srcw == 0)
4303
return push_inst(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD(vreg));
4304
4305
if (SLJIT_UNLIKELY(elem_size == 3)) {
4306
SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT);
4307
4308
if (src & SLJIT_MEM) {
4309
FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, vreg, src, srcw));
4310
src = vreg;
4311
} else if (vreg != src)
4312
FAIL_IF(push_inst(compiler, VORR | VD(vreg) | VN(src) | VM(src)));
4313
4314
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4315
4316
if (vreg != src)
4317
return push_inst(compiler, VORR | VD(vreg) | VN(src) | VM(src));
4318
return SLJIT_SUCCESS;
4319
}
4320
4321
if (src & SLJIT_MEM) {
4322
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
4323
4324
ins = (sljit_ins)(elem_size << 6);
4325
4326
if (reg_size == 4)
4327
ins |= (sljit_ins)1 << 5;
4328
4329
return push_inst(compiler, VLD1_r | ins | VD(vreg) | RN(src) | 0xf);
4330
}
4331
4332
if (type & SLJIT_SIMD_FLOAT) {
4333
SLJIT_ASSERT(elem_size == 2);
4334
ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
4335
4336
if (reg_size == 4)
4337
ins |= (sljit_ins)1 << 6;
4338
4339
return push_inst(compiler, VDUP_s | ins | VD(vreg) | (sljit_ins)freg_map[src]);
4340
}
4341
4342
if (src == SLJIT_IMM) {
4343
if (elem_size < 2)
4344
srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
4345
4346
imm = simd_get_imm(elem_size, (sljit_uw)srcw);
4347
4348
if (imm != ~(sljit_ins)0) {
4349
if (reg_size == 4)
4350
imm |= (sljit_ins)1 << 6;
4351
4352
return push_inst(compiler, VMOV_i | imm | VD(vreg));
4353
}
4354
4355
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
4356
src = TMP_REG1;
4357
}
4358
4359
switch (elem_size) {
4360
case 0:
4361
ins = 1 << 22;
4362
break;
4363
case 1:
4364
ins = 1 << 5;
4365
break;
4366
default:
4367
ins = 0;
4368
break;
4369
}
4370
4371
if (reg_size == 4)
4372
ins |= (sljit_ins)1 << 21;
4373
4374
return push_inst(compiler, VDUP | ins | VN(vreg) | RD(src));
4375
}
4376
4377
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
4378
sljit_s32 vreg, sljit_s32 lane_index,
4379
sljit_s32 srcdst, sljit_sw srcdstw)
4380
{
4381
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4382
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4383
sljit_ins ins;
4384
4385
CHECK_ERROR();
4386
CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));
4387
4388
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4389
4390
if (reg_size != 3 && reg_size != 4)
4391
return SLJIT_ERR_UNSUPPORTED;
4392
4393
if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
4394
return SLJIT_ERR_UNSUPPORTED;
4395
4396
if (type & SLJIT_SIMD_TEST)
4397
return SLJIT_SUCCESS;
4398
4399
if (reg_size == 4)
4400
vreg = simd_get_quad_reg_index(vreg);
4401
4402
if (type & SLJIT_SIMD_LANE_ZERO) {
4403
ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
4404
4405
if (type & SLJIT_SIMD_FLOAT) {
4406
if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
4407
if (lane_index == 1)
4408
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4409
4410
if (srcdst != vreg)
4411
FAIL_IF(push_inst(compiler, VORR | VD(vreg) | VN(srcdst) | VM(srcdst)));
4412
4413
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4414
return push_inst(compiler, VMOV_i | VD(vreg));
4415
}
4416
4417
if (srcdst == vreg || (elem_size == 3 && srcdst == (vreg + SLJIT_QUAD_OTHER_HALF(vreg)))) {
4418
FAIL_IF(push_inst(compiler, VORR | ins | VD(TMP_FREG2) | VN(vreg) | VM(vreg)));
4419
srcdst = TMP_FREG2;
4420
srcdstw = 0;
4421
}
4422
}
4423
4424
FAIL_IF(push_inst(compiler, VMOV_i | ins | VD(vreg)));
4425
}
4426
4427
if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
4428
lane_index -= (0x8 >> elem_size);
4429
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4430
}
4431
4432
if (srcdst & SLJIT_MEM) {
4433
if (elem_size == 3)
4434
return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, vreg, srcdst, srcdstw);
4435
4436
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
4437
4438
lane_index = lane_index << elem_size;
4439
ins = (sljit_ins)((elem_size << 10) | (lane_index << 5));
4440
return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD(vreg) | RN(srcdst) | 0xf);
4441
}
4442
4443
if (type & SLJIT_SIMD_FLOAT) {
4444
if (elem_size == 3) {
4445
if (type & SLJIT_SIMD_STORE)
4446
return push_inst(compiler, VORR | VD(srcdst) | VN(vreg) | VM(vreg));
4447
return push_inst(compiler, VMOV_F32 | SLJIT_32 | VD(vreg) | VM(srcdst));
4448
}
4449
4450
if (type & SLJIT_SIMD_STORE) {
4451
if (freg_ebit_map[vreg] == 0) {
4452
if (lane_index == 1)
4453
vreg = SLJIT_F64_SECOND(vreg);
4454
4455
return push_inst(compiler, VMOV_F32 | VD(srcdst) | VM(vreg));
4456
}
4457
4458
FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN(vreg) | RD(TMP_REG1)));
4459
return push_inst(compiler, VMOV | VN(srcdst) | RD(TMP_REG1));
4460
}
4461
4462
FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(srcdst) | RD(TMP_REG1)));
4463
return push_inst(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN(vreg) | RD(TMP_REG1));
4464
}
4465
4466
if (srcdst == SLJIT_IMM) {
4467
if (elem_size < 2)
4468
srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
4469
4470
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw));
4471
srcdst = TMP_REG1;
4472
}
4473
4474
if (elem_size == 0)
4475
ins = 0x400000;
4476
else if (elem_size == 1)
4477
ins = 0x20;
4478
else
4479
ins = 0;
4480
4481
lane_index = lane_index << elem_size;
4482
ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
4483
4484
if (type & SLJIT_SIMD_STORE) {
4485
ins |= (1 << 20);
4486
4487
if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
4488
ins |= (1 << 23);
4489
}
4490
4491
return push_inst(compiler, VMOV_s | ins | VN(vreg) | RD(srcdst));
4492
}
4493
4494
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4495
sljit_s32 vreg,
4496
sljit_s32 src, sljit_s32 src_lane_index)
4497
{
4498
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4499
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4500
sljit_ins ins;
4501
4502
CHECK_ERROR();
4503
CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));
4504
4505
if (reg_size != 3 && reg_size != 4)
4506
return SLJIT_ERR_UNSUPPORTED;
4507
4508
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4509
return SLJIT_ERR_UNSUPPORTED;
4510
4511
if (type & SLJIT_SIMD_TEST)
4512
return SLJIT_SUCCESS;
4513
4514
if (reg_size == 4) {
4515
vreg = simd_get_quad_reg_index(vreg);
4516
src = simd_get_quad_reg_index(src);
4517
4518
if (src_lane_index >= (0x8 >> elem_size)) {
4519
src_lane_index -= (0x8 >> elem_size);
4520
src += SLJIT_QUAD_OTHER_HALF(src);
4521
}
4522
}
4523
4524
if (elem_size == 3) {
4525
if (vreg != src)
4526
FAIL_IF(push_inst(compiler, VORR | VD(vreg) | VN(src) | VM(src)));
4527
4528
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4529
4530
if (vreg != src)
4531
return push_inst(compiler, VORR | VD(vreg) | VN(src) | VM(src));
4532
return SLJIT_SUCCESS;
4533
}
4534
4535
ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size));
4536
4537
if (reg_size == 4)
4538
ins |= (sljit_ins)1 << 6;
4539
4540
return push_inst(compiler, VDUP_s | ins | VD(vreg) | VM(src));
4541
}
4542
4543
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4544
sljit_s32 vreg,
4545
sljit_s32 src, sljit_sw srcw)
4546
{
4547
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4548
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4549
sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4550
sljit_s32 dst_reg;
4551
4552
CHECK_ERROR();
4553
CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));
4554
4555
ADJUST_LOCAL_OFFSET(src, srcw);
4556
4557
if (reg_size != 3 && reg_size != 4)
4558
return SLJIT_ERR_UNSUPPORTED;
4559
4560
if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
4561
return SLJIT_ERR_UNSUPPORTED;
4562
4563
if (type & SLJIT_SIMD_TEST)
4564
return SLJIT_SUCCESS;
4565
4566
if (reg_size == 4)
4567
vreg = simd_get_quad_reg_index(vreg);
4568
4569
if (src & SLJIT_MEM) {
4570
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
4571
if (reg_size == 4 && elem2_size - elem_size == 1)
4572
FAIL_IF(push_inst(compiler, VLD1 | (0x7 << 8) | VD(vreg) | RN(src) | 0xf));
4573
else
4574
FAIL_IF(push_inst(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD(vreg) | RN(src) | 0xf));
4575
src = vreg;
4576
} else if (reg_size == 4)
4577
src = simd_get_quad_reg_index(src);
4578
4579
if (!(type & SLJIT_SIMD_FLOAT)) {
4580
dst_reg = (reg_size == 4) ? vreg : TMP_FREG2;
4581
4582
do {
4583
FAIL_IF(push_inst(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 24))
4584
| ((sljit_ins)1 << (19 + elem_size)) | VD(dst_reg) | VM(src)));
4585
src = dst_reg;
4586
} while (++elem_size < elem2_size);
4587
4588
if (dst_reg == TMP_FREG2)
4589
return push_inst(compiler, VORR | VD(vreg) | VN(TMP_FREG2) | VM(TMP_FREG2));
4590
return SLJIT_SUCCESS;
4591
}
4592
4593
/* No SIMD variant, must use VFP instead. */
4594
SLJIT_ASSERT(reg_size == 4);
4595
4596
if (vreg == src) {
4597
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4598
FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(vreg) | VM(src) | 0x20));
4599
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4600
return push_inst(compiler, VCVT_F64_F32 | VD(vreg) | VM(src));
4601
}
4602
4603
FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(vreg) | VM(src)));
4604
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4605
return push_inst(compiler, VCVT_F64_F32 | VD(vreg) | VM(src) | 0x20);
4606
}
4607
4608
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4609
sljit_s32 vreg,
4610
sljit_s32 dst, sljit_sw dstw)
4611
{
4612
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4613
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4614
sljit_ins ins, imms;
4615
sljit_s32 dst_r;
4616
4617
CHECK_ERROR();
4618
CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));
4619
4620
ADJUST_LOCAL_OFFSET(dst, dstw);
4621
4622
if (reg_size != 3 && reg_size != 4)
4623
return SLJIT_ERR_UNSUPPORTED;
4624
4625
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4626
return SLJIT_ERR_UNSUPPORTED;
4627
4628
if (type & SLJIT_SIMD_TEST)
4629
return SLJIT_SUCCESS;
4630
4631
switch (elem_size) {
4632
case 0:
4633
imms = 0x243219;
4634
ins = VSHR | (1 << 24) | (0x9 << 16);
4635
break;
4636
case 1:
4637
imms = (reg_size == 4) ? 0x243219 : 0x2231;
4638
ins = VSHR | (1 << 24) | (0x11 << 16);
4639
break;
4640
case 2:
4641
imms = (reg_size == 4) ? 0x2231 : 0x21;
4642
ins = VSHR | (1 << 24) | (0x21 << 16);
4643
break;
4644
default:
4645
imms = 0x21;
4646
ins = VSHR | (1 << 24) | (0x1 << 16) | (1 << 7);
4647
break;
4648
}
4649
4650
if (reg_size == 4) {
4651
vreg = simd_get_quad_reg_index(vreg);
4652
ins |= (sljit_ins)1 << 6;
4653
}
4654
4655
SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0);
4656
FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG2) | VM(vreg)));
4657
4658
if (reg_size == 4 && elem_size > 0)
4659
FAIL_IF(push_inst(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4660
4661
ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
4662
4663
while (imms >= 0x100) {
4664
FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | ((imms & 0xff) << 16) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4665
imms >>= 8;
4666
}
4667
4668
FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | (1 << 7) | (imms << 16) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4669
4670
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4671
FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(dst_r) | VN(TMP_FREG2)));
4672
4673
if (reg_size == 4 && elem_size == 0) {
4674
SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]);
4675
FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(TMP_REG2) | VN(TMP_FREG1)));
4676
FAIL_IF(push_inst(compiler, ORR | RD(dst_r) | RN(dst_r) | RM(TMP_REG2) | (0x8 << 7)));
4677
}
4678
4679
if (dst_r == TMP_REG1)
4680
return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
4681
4682
return SLJIT_SUCCESS;
4683
}
4684
4685
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4686
sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)
4687
{
4688
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4689
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4690
sljit_s32 alignment;
4691
sljit_ins ins = 0, load_ins;
4692
4693
CHECK_ERROR();
4694
CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));
4695
ADJUST_LOCAL_OFFSET(src2, src2w);
4696
4697
if (reg_size != 3 && reg_size != 4)
4698
return SLJIT_ERR_UNSUPPORTED;
4699
4700
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4701
return SLJIT_ERR_UNSUPPORTED;
4702
4703
if (type & SLJIT_SIMD_TEST)
4704
return SLJIT_SUCCESS;
4705
4706
switch (SLJIT_SIMD_GET_OPCODE(type)) {
4707
case SLJIT_SIMD_OP2_AND:
4708
ins = VAND;
4709
break;
4710
case SLJIT_SIMD_OP2_OR:
4711
ins = VORR;
4712
break;
4713
case SLJIT_SIMD_OP2_XOR:
4714
ins = VEOR;
4715
break;
4716
case SLJIT_SIMD_OP2_SHUFFLE:
4717
ins = VTBL;
4718
break;
4719
}
4720
4721
if (src2 & SLJIT_MEM) {
4722
if (elem_size > 3)
4723
elem_size = 3;
4724
4725
load_ins = VLD1 | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
4726
alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4727
4728
SLJIT_ASSERT(reg_size >= alignment);
4729
4730
if (alignment == 3)
4731
load_ins |= 0x10;
4732
else if (alignment >= 4)
4733
load_ins |= 0x20;
4734
4735
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src2, src2w));
4736
FAIL_IF(push_inst(compiler, load_ins | VD(TMP_FREG2) | RN(src2) | ((sljit_ins)elem_size) << 6 | 0xf));
4737
src2 = TMP_FREG2;
4738
}
4739
4740
if (reg_size == 4) {
4741
dst_vreg = simd_get_quad_reg_index(dst_vreg);
4742
src1_vreg = simd_get_quad_reg_index(src1_vreg);
4743
src2 = simd_get_quad_reg_index(src2);
4744
4745
if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE) {
4746
ins |= (sljit_ins)1 << 8;
4747
4748
FAIL_IF(push_inst(compiler, ins | VD(dst_vreg != src1_vreg ? dst_vreg : TMP_FREG2) | VN(src1_vreg) | VM(src2)));
4749
src2 += SLJIT_QUAD_OTHER_HALF(src2);
4750
FAIL_IF(push_inst(compiler, ins | VD(dst_vreg + SLJIT_QUAD_OTHER_HALF(dst_vreg)) | VN(src1_vreg) | VM(src2)));
4751
4752
if (dst_vreg == src1_vreg)
4753
return push_inst(compiler, VORR | VD(dst_vreg) | VN(TMP_FREG2) | VM(TMP_FREG2));
4754
return SLJIT_SUCCESS;
4755
}
4756
4757
ins |= (sljit_ins)1 << 6;
4758
}
4759
4760
return push_inst(compiler, ins | VD(dst_vreg) | VN(src1_vreg) | VM(src2));
4761
}
4762
4763
#undef FPU_LOAD
4764
4765
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4766
sljit_s32 dst_reg,
4767
sljit_s32 mem_reg)
4768
{
4769
sljit_u32 ins;
4770
4771
CHECK_ERROR();
4772
CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4773
4774
if (op & SLJIT_ATOMIC_USE_CAS)
4775
return SLJIT_ERR_UNSUPPORTED;
4776
4777
switch (GET_OPCODE(op)) {
4778
case SLJIT_MOV_S8:
4779
case SLJIT_MOV_S16:
4780
case SLJIT_MOV_S32:
4781
return SLJIT_ERR_UNSUPPORTED;
4782
4783
case SLJIT_MOV_U8:
4784
ins = LDREXB;
4785
break;
4786
case SLJIT_MOV_U16:
4787
ins = LDREXH;
4788
break;
4789
default:
4790
ins = LDREX;
4791
break;
4792
}
4793
4794
if (op & SLJIT_ATOMIC_TEST)
4795
return SLJIT_SUCCESS;
4796
4797
return push_inst(compiler, ins | RN(mem_reg) | RD(dst_reg));
4798
}
4799
4800
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4801
sljit_s32 src_reg,
4802
sljit_s32 mem_reg,
4803
sljit_s32 temp_reg)
4804
{
4805
sljit_u32 ins;
4806
4807
/* temp_reg == mem_reg is undefined so use another temp register */
4808
SLJIT_UNUSED_ARG(temp_reg);
4809
4810
CHECK_ERROR();
4811
CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4812
4813
if (op & SLJIT_ATOMIC_USE_CAS)
4814
return SLJIT_ERR_UNSUPPORTED;
4815
4816
switch (GET_OPCODE(op)) {
4817
case SLJIT_MOV_S8:
4818
case SLJIT_MOV_S16:
4819
case SLJIT_MOV_S32:
4820
return SLJIT_ERR_UNSUPPORTED;
4821
4822
case SLJIT_MOV_U8:
4823
ins = STREXB;
4824
break;
4825
case SLJIT_MOV_U16:
4826
ins = STREXH;
4827
break;
4828
default:
4829
ins = STREX;
4830
break;
4831
}
4832
4833
if (op & SLJIT_ATOMIC_TEST)
4834
return SLJIT_SUCCESS;
4835
4836
FAIL_IF(push_inst(compiler, ins | RN(mem_reg) | RD(TMP_REG1) | RM(src_reg)));
4837
if (op & SLJIT_SET_ATOMIC_STORED)
4838
return push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(TMP_REG1));
4839
4840
return SLJIT_SUCCESS;
4841
}
4842
4843
#define SLJIT_EMIT_CONST_U8(c) \
4844
(((c) & 0x100) != 0 ? (MVN | SRC2_IMM | (~(c) & 0xff)) : (MOV | SRC2_IMM | ((c) & 0xff)))
4845
4846
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 op,
4847
sljit_s32 dst, sljit_sw dstw,
4848
sljit_sw init_value)
4849
{
4850
struct sljit_const *const_;
4851
sljit_s32 dst_r;
4852
sljit_s32 mem_flags = WORD_SIZE;
4853
4854
CHECK_ERROR_PTR();
4855
CHECK_PTR(check_sljit_emit_const(compiler, op, dst, dstw, init_value));
4856
ADJUST_LOCAL_OFFSET(dst, dstw);
4857
4858
const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4859
PTR_FAIL_IF(!const_);
4860
set_const(const_, compiler);
4861
4862
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
4863
4864
if (GET_OPCODE(op) == SLJIT_MOV_U8) {
4865
PTR_FAIL_IF(push_inst(compiler, SLJIT_EMIT_CONST_U8(init_value) | RD(dst_r)));
4866
mem_flags = BYTE_SIZE;
4867
} else {
4868
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
4869
PTR_FAIL_IF(push_inst_with_unique_literal(compiler,
4870
EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_ins)init_value));
4871
compiler->patches++;
4872
#else /* !SLJIT_CONFIG_ARM_V6 */
4873
PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value));
4874
#endif /* SLJIT_CONFIG_ARM_V6 */
4875
}
4876
4877
if (dst & SLJIT_MEM)
4878
PTR_FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, dst, dstw, TMP_REG1));
4879
4880
return const_;
4881
}
4882
4883
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_op_addr(struct sljit_compiler *compiler, sljit_s32 op,
4884
sljit_s32 dst, sljit_sw dstw)
4885
{
4886
struct sljit_jump *jump;
4887
sljit_s32 dst_r, target_r;
4888
SLJIT_UNUSED_ARG(op);
4889
4890
CHECK_ERROR_PTR();
4891
CHECK_PTR(check_sljit_emit_op_addr(compiler, op, dst, dstw));
4892
ADJUST_LOCAL_OFFSET(dst, dstw);
4893
4894
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
4895
4896
if (op != SLJIT_ADD_ABS_ADDR)
4897
target_r = dst_r;
4898
else {
4899
target_r = TMP_REG1;
4900
4901
if (dst & SLJIT_MEM)
4902
PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, dst, dstw, TMP_REG1));
4903
}
4904
4905
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
4906
PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, target_r, TMP_PC, 0), 0));
4907
compiler->patches++;
4908
#else /* !SLJIT_CONFIG_ARM_V6 */
4909
PTR_FAIL_IF(push_inst(compiler, RD(target_r)));
4910
#endif /* SLJIT_CONFIG_ARM_V6 */
4911
4912
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4913
PTR_FAIL_IF(!jump);
4914
set_mov_addr(jump, compiler, 1);
4915
4916
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
4917
compiler->size += 1;
4918
#endif /* SLJIT_CONFIG_ARM_V7 */
4919
4920
if (op == SLJIT_ADD_ABS_ADDR)
4921
PTR_FAIL_IF(push_inst(compiler, ADD | RD(dst_r) | RN(dst_r) | RM(TMP_REG1)));
4922
4923
if (dst & SLJIT_MEM)
4924
PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
4925
return jump;
4926
}
4927
4928
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4929
{
4930
set_jump_addr(addr, executable_offset, new_target, 1);
4931
}
4932
4933
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_s32 op, sljit_sw new_constant, sljit_sw executable_offset)
4934
{
4935
sljit_ins *inst;
4936
4937
if (GET_OPCODE(op) != SLJIT_MOV_U8) {
4938
set_const_value(addr, executable_offset, (sljit_uw)new_constant, 1);
4939
return;
4940
}
4941
4942
inst = (sljit_ins*)addr;
4943
SLJIT_ASSERT((inst[0] & 0xfff00000) == (MOV | SRC2_IMM) || (inst[0] & 0xfff00000) == (MVN | SRC2_IMM));
4944
4945
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
4946
*inst = SLJIT_EMIT_CONST_U8(new_constant) | (*inst & 0xf000);
4947
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
4948
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
4949
SLJIT_CACHE_FLUSH(inst, inst + 1);
4950
}
4951
4952