Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeARM_32.c
9913 views
1
/*
2
* Stack-less Just-In-Time compiler
3
*
4
* Copyright Zoltan Herczeg ([email protected]). All rights reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without modification, are
7
* permitted provided that the following conditions are met:
8
*
9
* 1. Redistributions of source code must retain the above copyright notice, this list of
10
* conditions and the following disclaimer.
11
*
12
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
13
* of conditions and the following disclaimer in the documentation and/or other materials
14
* provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
*/
26
27
#ifdef __SOFTFP__
28
#define ARM_ABI_INFO " ABI:softfp"
29
#else
30
#define ARM_ABI_INFO " ABI:hardfp"
31
#endif
32
33
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
34
{
35
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
36
return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO;
37
#elif (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
38
return "ARMv6" SLJIT_CPUINFO ARM_ABI_INFO;
39
#else
40
#error "Internal error: Unknown ARM architecture"
41
#endif
42
}
43
44
/* Length of an instruction word. */
45
typedef sljit_u32 sljit_ins;
46
47
/* Last register + 1. */
48
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
49
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
50
#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
51
52
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
53
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
54
55
/* In ARM instruction words.
56
Cache lines are usually 32 byte aligned. */
57
#define CONST_POOL_ALIGNMENT 8
58
#define CONST_POOL_EMPTY 0xffffffff
59
60
#define ALIGN_INSTRUCTION(ptr) \
61
(sljit_ins*)(((sljit_ins)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1))
62
#define MAX_DIFFERENCE(max_diff) \
63
(((max_diff) / (sljit_s32)sizeof(sljit_ins)) - (CONST_POOL_ALIGNMENT - 1))
64
65
/* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
66
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
67
0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
68
};
69
70
static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
71
0,
72
0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
73
7, 6,
74
0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
75
7, 6
76
};
77
78
static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
79
0,
80
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81
0, 0,
82
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83
1, 1
84
};
85
86
#define RM(rm) ((sljit_ins)reg_map[rm])
87
#define RM8(rm) ((sljit_ins)reg_map[rm] << 8)
88
#define RD(rd) ((sljit_ins)reg_map[rd] << 12)
89
#define RN(rn) ((sljit_ins)reg_map[rn] << 16)
90
91
#define VM(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
92
#define VD(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
93
#define VN(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
94
95
/* --------------------------------------------------------------------- */
96
/* Instrucion forms */
97
/* --------------------------------------------------------------------- */
98
99
/* The instruction includes the AL condition.
100
INST_NAME - CONDITIONAL remove this flag. */
101
#define COND_MASK 0xf0000000
102
#define CONDITIONAL 0xe0000000
103
#define PUSH_POOL 0xff000000
104
105
#define ADC 0xe0a00000
106
#define ADD 0xe0800000
107
#define AND 0xe0000000
108
#define B 0xea000000
109
#define BIC 0xe1c00000
110
#define BKPT 0xe1200070
111
#define BL 0xeb000000
112
#define BLX 0xe12fff30
113
#define BX 0xe12fff10
114
#define CLZ 0xe16f0f10
115
#define CMN 0xe1600000
116
#define CMP 0xe1400000
117
#define DMB_SY 0xf57ff05f
118
#define EOR 0xe0200000
119
#define LDR 0xe5100000
120
#define LDR_POST 0xe4100000
121
#define LDREX 0xe1900f9f
122
#define LDREXB 0xe1d00f9f
123
#define LDREXH 0xe1f00f9f
124
#define MLA 0xe0200090
125
#define MOV 0xe1a00000
126
#define MUL 0xe0000090
127
#define MVN 0xe1e00000
128
#define NOP 0xe1a00000
129
#define ORR 0xe1800000
130
#define PUSH 0xe92d0000
131
#define POP 0xe8bd0000
132
#define REV 0xe6bf0f30
133
#define REV16 0xe6bf0fb0
134
#define RSB 0xe0600000
135
#define RSC 0xe0e00000
136
#define SBC 0xe0c00000
137
#define SMULL 0xe0c00090
138
#define STR 0xe5000000
139
#define STREX 0xe1800f90
140
#define STREXB 0xe1c00f90
141
#define STREXH 0xe1e00f90
142
#define SUB 0xe0400000
143
#define SXTB 0xe6af0070
144
#define SXTH 0xe6bf0070
145
#define TST 0xe1000000
146
#define UMULL 0xe0800090
147
#define UXTB 0xe6ef0070
148
#define UXTH 0xe6ff0070
149
#define VABS_F32 0xeeb00ac0
150
#define VADD_F32 0xee300a00
151
#define VAND 0xf2000110
152
#define VCMP_F32 0xeeb40a40
153
#define VCVT_F32_S32 0xeeb80ac0
154
#define VCVT_F32_U32 0xeeb80a40
155
#define VCVT_F64_F32 0xeeb70ac0
156
#define VCVT_S32_F32 0xeebd0ac0
157
#define VDIV_F32 0xee800a00
158
#define VDUP 0xee800b10
159
#define VDUP_s 0xf3b00c00
160
#define VEOR 0xf3000110
161
#define VLD1 0xf4200000
162
#define VLD1_r 0xf4a00c00
163
#define VLD1_s 0xf4a00000
164
#define VLDR_F32 0xed100a00
165
#define VMOV_F32 0xeeb00a40
166
#define VMOV 0xee000a10
167
#define VMOV2 0xec400a10
168
#define VMOV_i 0xf2800010
169
#define VMOV_s 0xee000b10
170
#define VMOVN 0xf3b20200
171
#define VMRS 0xeef1fa10
172
#define VMUL_F32 0xee200a00
173
#define VNEG_F32 0xeeb10a40
174
#define VORR 0xf2200110
175
#define VPOP 0xecbd0b00
176
#define VPUSH 0xed2d0b00
177
#define VSHLL 0xf2800a10
178
#define VSHR 0xf2800010
179
#define VSRA 0xf2800110
180
#define VST1 0xf4000000
181
#define VST1_s 0xf4800000
182
#define VSTR_F32 0xed000a00
183
#define VSUB_F32 0xee300a40
184
#define VTBL 0xf3b00800
185
186
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
187
/* Arm v7 specific instructions. */
188
#define MOVT 0xe3400000
189
#define MOVW 0xe3000000
190
#define RBIT 0xe6ff0f30
191
#endif
192
193
#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
194
195
static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
196
{
197
if (compiler->scratches == -1)
198
return 0;
199
200
if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
201
fr -= SLJIT_F64_SECOND(0);
202
203
return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->real_fscratches))
204
|| (fr > (SLJIT_FS0 - compiler->real_fsaveds) && fr <= SLJIT_FS0)
205
|| (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
206
}
207
208
static sljit_s32 function_check_is_vreg(struct sljit_compiler *compiler, sljit_s32 vr, sljit_s32 type)
209
{
210
sljit_s32 vr_low = vr;
211
212
if (compiler->scratches == -1)
213
return 0;
214
215
if (SLJIT_SIMD_GET_REG_SIZE(type) == 4) {
216
vr += (vr & 0x1);
217
vr_low = vr - 1;
218
}
219
220
return (vr >= SLJIT_VR0 && vr < (SLJIT_VR0 + compiler->vscratches))
221
|| (vr_low > (SLJIT_VS0 - compiler->vsaveds) && vr_low <= SLJIT_VS0)
222
|| (vr >= SLJIT_TMP_VREGISTER_BASE && vr < (SLJIT_TMP_VREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS));
223
}
224
225
#endif /* SLJIT_ARGUMENT_CHECKS */
226
227
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
228
229
static sljit_s32 push_cpool(struct sljit_compiler *compiler)
230
{
231
/* Pushing the constant pool into the instruction stream. */
232
sljit_ins* inst;
233
sljit_uw* cpool_ptr;
234
sljit_uw* cpool_end;
235
sljit_s32 i;
236
237
/* The label could point the address after the constant pool. */
238
if (compiler->last_label && compiler->last_label->size == compiler->size)
239
compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1;
240
241
SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE);
242
inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
243
FAIL_IF(!inst);
244
compiler->size++;
245
*inst = 0xff000000 | compiler->cpool_fill;
246
247
for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) {
248
inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
249
FAIL_IF(!inst);
250
compiler->size++;
251
*inst = 0;
252
}
253
254
cpool_ptr = compiler->cpool;
255
cpool_end = cpool_ptr + compiler->cpool_fill;
256
while (cpool_ptr < cpool_end) {
257
inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
258
FAIL_IF(!inst);
259
compiler->size++;
260
*inst = *cpool_ptr++;
261
}
262
compiler->cpool_diff = CONST_POOL_EMPTY;
263
compiler->cpool_fill = 0;
264
return SLJIT_SUCCESS;
265
}
266
267
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst)
268
{
269
sljit_ins* ptr;
270
271
if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
272
FAIL_IF(push_cpool(compiler));
273
274
ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
275
FAIL_IF(!ptr);
276
compiler->size++;
277
*ptr = inst;
278
return SLJIT_SUCCESS;
279
}
280
281
static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal)
282
{
283
sljit_ins* ptr;
284
sljit_uw cpool_index = CPOOL_SIZE;
285
sljit_uw* cpool_ptr;
286
sljit_uw* cpool_end;
287
sljit_u8* cpool_unique_ptr;
288
289
if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
290
FAIL_IF(push_cpool(compiler));
291
else if (compiler->cpool_fill > 0) {
292
cpool_ptr = compiler->cpool;
293
cpool_end = cpool_ptr + compiler->cpool_fill;
294
cpool_unique_ptr = compiler->cpool_unique;
295
do {
296
if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) {
297
cpool_index = (sljit_uw)(cpool_ptr - compiler->cpool);
298
break;
299
}
300
cpool_ptr++;
301
cpool_unique_ptr++;
302
} while (cpool_ptr < cpool_end);
303
}
304
305
if (cpool_index == CPOOL_SIZE) {
306
/* Must allocate a new entry in the literal pool. */
307
if (compiler->cpool_fill < CPOOL_SIZE) {
308
cpool_index = compiler->cpool_fill;
309
compiler->cpool_fill++;
310
}
311
else {
312
FAIL_IF(push_cpool(compiler));
313
cpool_index = 0;
314
compiler->cpool_fill = 1;
315
}
316
}
317
318
SLJIT_ASSERT((inst & 0xfff) == 0);
319
ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
320
FAIL_IF(!ptr);
321
compiler->size++;
322
*ptr = inst | cpool_index;
323
324
compiler->cpool[cpool_index] = literal;
325
compiler->cpool_unique[cpool_index] = 0;
326
if (compiler->cpool_diff == CONST_POOL_EMPTY)
327
compiler->cpool_diff = compiler->size;
328
return SLJIT_SUCCESS;
329
}
330
331
static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal)
332
{
333
sljit_ins* ptr;
334
335
if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE))
336
FAIL_IF(push_cpool(compiler));
337
338
SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0);
339
ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
340
FAIL_IF(!ptr);
341
compiler->size++;
342
*ptr = inst | compiler->cpool_fill;
343
344
compiler->cpool[compiler->cpool_fill] = literal;
345
compiler->cpool_unique[compiler->cpool_fill] = 1;
346
compiler->cpool_fill++;
347
if (compiler->cpool_diff == CONST_POOL_EMPTY)
348
compiler->cpool_diff = compiler->size;
349
return SLJIT_SUCCESS;
350
}
351
352
static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler)
353
{
354
/* Place for at least two instruction (doesn't matter whether the first has a literal). */
355
if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088)))
356
return push_cpool(compiler);
357
return SLJIT_SUCCESS;
358
}
359
360
static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler)
361
{
362
/* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
363
SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
364
SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
365
366
return push_inst(compiler, BLX | RM(TMP_REG1));
367
}
368
369
static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
370
{
371
sljit_uw diff;
372
sljit_uw ind;
373
sljit_uw counter = 0;
374
sljit_uw* clear_const_pool = const_pool;
375
sljit_uw* clear_const_pool_end = const_pool + cpool_size;
376
377
SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT);
378
/* Set unused flag for all literals in the constant pool.
379
I.e.: unused literals can belong to branches, which can be encoded as B or BL.
380
We can "compress" the constant pool by discarding these literals. */
381
while (clear_const_pool < clear_const_pool_end)
382
*clear_const_pool++ = (sljit_uw)(-1);
383
384
while (last_pc_patch < code_ptr) {
385
/* Data transfer instruction with Rn == r15. */
386
if ((*last_pc_patch & 0x0e4f0000) == 0x040f0000) {
387
diff = (sljit_uw)(const_pool - last_pc_patch);
388
ind = (*last_pc_patch) & 0xfff;
389
390
/* Must be a load instruction with immediate offset. */
391
SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20)));
392
if ((sljit_s32)const_pool[ind] < 0) {
393
const_pool[ind] = counter;
394
ind = counter;
395
counter++;
396
}
397
else
398
ind = const_pool[ind];
399
400
SLJIT_ASSERT(diff >= 1);
401
if (diff >= 2 || ind > 0) {
402
diff = (diff + (sljit_uw)ind - 2) << 2;
403
SLJIT_ASSERT(diff <= 0xfff);
404
*last_pc_patch = (*last_pc_patch & ~(sljit_uw)0xfff) | diff;
405
}
406
else
407
*last_pc_patch = (*last_pc_patch & ~(sljit_uw)(0xfff | (1 << 23))) | 0x004;
408
}
409
last_pc_patch++;
410
}
411
return counter;
412
}
413
414
/* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */
415
struct future_patch {
416
struct future_patch* next;
417
sljit_s32 index;
418
sljit_s32 value;
419
};
420
421
static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
422
{
423
sljit_u32 value;
424
struct future_patch *curr_patch, *prev_patch;
425
426
SLJIT_UNUSED_ARG(compiler);
427
428
/* Using the values generated by patch_pc_relative_loads. */
429
if (!*first_patch)
430
value = cpool_start_address[cpool_current_index];
431
else {
432
curr_patch = *first_patch;
433
prev_patch = NULL;
434
while (1) {
435
if (!curr_patch) {
436
value = cpool_start_address[cpool_current_index];
437
break;
438
}
439
if ((sljit_uw)curr_patch->index == cpool_current_index) {
440
value = (sljit_uw)curr_patch->value;
441
if (prev_patch)
442
prev_patch->next = curr_patch->next;
443
else
444
*first_patch = curr_patch->next;
445
SLJIT_FREE(curr_patch, compiler->allocator_data);
446
break;
447
}
448
prev_patch = curr_patch;
449
curr_patch = curr_patch->next;
450
}
451
}
452
453
if ((sljit_sw)value >= 0) {
454
if (value > cpool_current_index) {
455
curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data);
456
if (!curr_patch) {
457
while (*first_patch) {
458
curr_patch = *first_patch;
459
*first_patch = (*first_patch)->next;
460
SLJIT_FREE(curr_patch, compiler->allocator_data);
461
}
462
return SLJIT_ERR_ALLOC_FAILED;
463
}
464
curr_patch->next = *first_patch;
465
curr_patch->index = (sljit_sw)value;
466
curr_patch->value = (sljit_sw)cpool_start_address[value];
467
*first_patch = curr_patch;
468
}
469
cpool_start_address[value] = *buf_ptr;
470
}
471
return SLJIT_SUCCESS;
472
}
473
474
#else
475
476
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst)
477
{
478
sljit_ins* ptr;
479
480
ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
481
FAIL_IF(!ptr);
482
compiler->size++;
483
*ptr = inst;
484
return SLJIT_SUCCESS;
485
}
486
487
static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
488
{
489
FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | ((sljit_u32)imm & 0xfff)));
490
return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | (((sljit_u32)imm >> 16) & 0xfff));
491
}
492
493
#endif
494
495
static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset)
496
{
497
sljit_sw diff;
498
sljit_uw target_addr;
499
sljit_uw jump_addr = (sljit_uw)code_ptr;
500
sljit_uw orig_addr = jump->addr;
501
SLJIT_UNUSED_ARG(executable_offset);
502
503
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
504
jump->addr = jump_addr;
505
#endif
506
507
if (jump->flags & SLJIT_REWRITABLE_JUMP)
508
return 0;
509
510
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
511
if (jump->flags & IS_BL)
512
code_ptr--;
513
#endif /* SLJIT_CONFIG_ARM_V6 */
514
515
if (jump->flags & JUMP_ADDR)
516
target_addr = jump->u.target;
517
else {
518
SLJIT_ASSERT(jump->u.label != NULL);
519
target_addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
520
521
if (jump->u.label->size > orig_addr)
522
jump_addr = (sljit_uw)(code + orig_addr);
523
}
524
525
diff = (sljit_sw)target_addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr + 8, executable_offset);
526
527
/* Branch to Thumb code has not been optimized yet. */
528
if (diff & 0x3)
529
return 0;
530
531
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
532
if (jump->flags & IS_BL) {
533
if (diff <= 0x01ffffff && diff >= -0x02000000) {
534
*code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
535
jump->flags |= PATCH_B;
536
return 1;
537
}
538
} else if (diff <= 0x01ffffff && diff >= -0x02000000) {
539
*code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK);
540
jump->flags |= PATCH_B;
541
}
542
#else /* !SLJIT_CONFIG_ARM_V6 */
543
if (diff <= 0x01ffffff && diff >= -0x02000000) {
544
*code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (*code_ptr & COND_MASK);
545
jump->flags |= PATCH_B;
546
return 1;
547
}
548
#endif /* SLJIT_CONFIG_ARM_V6 */
549
return 0;
550
}
551
552
static void set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache)
553
{
554
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
555
sljit_ins *ptr = (sljit_ins*)jump_ptr;
556
sljit_ins *inst = (sljit_ins*)ptr[0];
557
sljit_ins mov_pc = ptr[1];
558
sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
559
sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2);
560
561
SLJIT_UNUSED_ARG(executable_offset);
562
563
if (diff <= 0x7fffff && diff >= -0x800000) {
564
/* Turn to branch. */
565
if (!bl) {
566
if (flush_cache) {
567
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
568
}
569
inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
570
if (flush_cache) {
571
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
572
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
573
SLJIT_CACHE_FLUSH(inst, inst + 1);
574
}
575
} else {
576
if (flush_cache) {
577
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
578
}
579
inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
580
inst[1] = NOP;
581
if (flush_cache) {
582
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
583
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
584
SLJIT_CACHE_FLUSH(inst, inst + 2);
585
}
586
}
587
} else {
588
/* Get the position of the constant. */
589
if (mov_pc & (1 << 23))
590
ptr = inst + ((mov_pc & 0xfff) >> 2) + 2;
591
else
592
ptr = inst + 1;
593
594
if (*inst != mov_pc) {
595
if (flush_cache) {
596
SLJIT_UPDATE_WX_FLAGS(inst, inst + (!bl ? 1 : 2), 0);
597
}
598
inst[0] = mov_pc;
599
if (!bl) {
600
if (flush_cache) {
601
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
602
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
603
SLJIT_CACHE_FLUSH(inst, inst + 1);
604
}
605
} else {
606
inst[1] = BLX | RM(TMP_REG1);
607
if (flush_cache) {
608
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
609
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
610
SLJIT_CACHE_FLUSH(inst, inst + 2);
611
}
612
}
613
}
614
615
if (flush_cache) {
616
SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
617
}
618
619
*ptr = new_addr;
620
621
if (flush_cache) {
622
SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
623
}
624
}
625
#else /* !SLJIT_CONFIG_ARM_V6 */
626
sljit_ins *inst = (sljit_ins*)jump_ptr;
627
628
SLJIT_UNUSED_ARG(executable_offset);
629
630
SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
631
632
if (flush_cache) {
633
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
634
}
635
636
inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
637
inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
638
639
if (flush_cache) {
640
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
641
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
642
SLJIT_CACHE_FLUSH(inst, inst + 2);
643
}
644
#endif /* SLJIT_CONFIG_ARM_V6 */
645
}
646
647
static sljit_uw get_imm(sljit_uw imm);
648
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm);
649
static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg);
650
651
static void set_const_value(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache)
652
{
653
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
654
sljit_ins *ptr = (sljit_ins*)addr;
655
sljit_ins *inst = (sljit_ins*)ptr[0];
656
sljit_uw ldr_literal = ptr[1];
657
sljit_uw src2;
658
659
SLJIT_UNUSED_ARG(executable_offset);
660
661
src2 = get_imm(new_constant);
662
if (src2) {
663
if (flush_cache) {
664
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
665
}
666
667
*inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
668
669
if (flush_cache) {
670
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
671
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
672
SLJIT_CACHE_FLUSH(inst, inst + 1);
673
}
674
return;
675
}
676
677
src2 = get_imm(~new_constant);
678
if (src2) {
679
if (flush_cache) {
680
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
681
}
682
683
*inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
684
685
if (flush_cache) {
686
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
687
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
688
SLJIT_CACHE_FLUSH(inst, inst + 1);
689
}
690
return;
691
}
692
693
if (ldr_literal & (1 << 23))
694
ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2;
695
else
696
ptr = inst + 1;
697
698
if (*inst != ldr_literal) {
699
if (flush_cache) {
700
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
701
}
702
703
*inst = ldr_literal;
704
705
if (flush_cache) {
706
SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
707
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
708
SLJIT_CACHE_FLUSH(inst, inst + 1);
709
}
710
}
711
712
if (flush_cache) {
713
SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
714
}
715
716
*ptr = new_constant;
717
718
if (flush_cache) {
719
SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
720
}
721
#else /* !SLJIT_CONFIG_ARM_V6 */
722
sljit_ins *inst = (sljit_ins*)addr;
723
724
SLJIT_UNUSED_ARG(executable_offset);
725
726
SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
727
728
if (flush_cache) {
729
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
730
}
731
732
inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
733
inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
734
735
if (flush_cache) {
736
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
737
inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
738
SLJIT_CACHE_FLUSH(inst, inst + 2);
739
}
740
#endif /* SLJIT_CONFIG_ARM_V6 */
741
}
742
743
static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
744
{
745
sljit_uw addr;
746
sljit_uw jump_addr = (sljit_uw)code_ptr;
747
sljit_sw diff;
748
SLJIT_UNUSED_ARG(executable_offset);
749
750
if (jump->flags & JUMP_ADDR)
751
addr = jump->u.target;
752
else {
753
addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
754
755
if (jump->u.label->size > jump->addr)
756
jump_addr = (sljit_uw)(code + jump->addr);
757
}
758
759
/* The pc+8 offset is represented by the 2 * SSIZE_OF(ins) below. */
760
diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_addr, executable_offset);
761
762
if ((diff & 0x3) == 0 && diff <= (0x3fc + 2 * SSIZE_OF(ins)) && diff >= (-0x3fc + 2 * SSIZE_OF(ins))) {
763
jump->flags |= PATCH_B;
764
return 0;
765
}
766
767
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
768
return 0;
769
#else /* !SLJIT_CONFIG_ARM_V6 */
770
return 1;
771
#endif /* SLJIT_CONFIG_ARM_V6 */
772
}
773
774
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
775
776
static void reduce_code_size(struct sljit_compiler *compiler)
777
{
778
struct sljit_label *label;
779
struct sljit_jump *jump;
780
struct sljit_const *const_;
781
SLJIT_NEXT_DEFINE_TYPES;
782
sljit_uw total_size;
783
sljit_uw size_reduce = 0;
784
sljit_sw diff;
785
786
label = compiler->labels;
787
jump = compiler->jumps;
788
const_ = compiler->consts;
789
SLJIT_NEXT_INIT_TYPES();
790
791
while (1) {
792
SLJIT_GET_NEXT_MIN();
793
794
if (next_min_addr == SLJIT_MAX_ADDRESS)
795
break;
796
797
if (next_min_addr == next_label_size) {
798
label->size -= size_reduce;
799
800
label = label->next;
801
next_label_size = SLJIT_GET_NEXT_SIZE(label);
802
}
803
804
if (next_min_addr == next_const_addr) {
805
const_->addr -= size_reduce;
806
const_ = const_->next;
807
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
808
continue;
809
}
810
811
if (next_min_addr != next_jump_addr)
812
continue;
813
814
jump->addr -= size_reduce;
815
if (!(jump->flags & JUMP_MOV_ADDR)) {
816
total_size = JUMP_MAX_SIZE - 1;
817
818
if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) {
819
/* Unit size: instruction. */
820
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2;
821
if (jump->u.label->size > jump->addr) {
822
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
823
diff -= (sljit_sw)size_reduce;
824
}
825
826
if (diff <= (0x01ffffff / SSIZE_OF(ins)) && diff >= (-0x02000000 / SSIZE_OF(ins)))
827
total_size = 1 - 1;
828
}
829
830
size_reduce += JUMP_MAX_SIZE - 1 - total_size;
831
} else {
832
/* Real size minus 1. Unit size: instruction. */
833
total_size = 1;
834
835
if (!(jump->flags & JUMP_ADDR)) {
836
diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
837
if (jump->u.label->size > jump->addr) {
838
SLJIT_ASSERT(jump->u.label->size - size_reduce >= jump->addr);
839
diff -= (sljit_sw)size_reduce;
840
}
841
842
if (diff <= 0xff + 2 && diff >= -0xff + 2)
843
total_size = 0;
844
}
845
846
size_reduce += 1 - total_size;
847
}
848
849
jump->flags |= total_size << JUMP_SIZE_SHIFT;
850
jump = jump->next;
851
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
852
}
853
854
compiler->size -= size_reduce;
855
}
856
857
#endif /* SLJIT_CONFIG_ARM_V7 */
858
859
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
860
{
861
struct sljit_memory_fragment *buf;
862
sljit_ins *code;
863
sljit_ins *code_ptr;
864
sljit_ins *buf_ptr;
865
sljit_ins *buf_end;
866
sljit_uw word_count;
867
SLJIT_NEXT_DEFINE_TYPES;
868
sljit_sw executable_offset;
869
sljit_uw addr;
870
sljit_sw diff;
871
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
872
sljit_uw cpool_size;
873
sljit_uw cpool_skip_alignment;
874
sljit_uw cpool_current_index;
875
sljit_ins *cpool_start_address;
876
sljit_ins *last_pc_patch;
877
struct future_patch *first_patch;
878
#endif
879
880
struct sljit_label *label;
881
struct sljit_jump *jump;
882
struct sljit_const *const_;
883
884
CHECK_ERROR_PTR();
885
CHECK_PTR(check_sljit_generate_code(compiler));
886
887
/* Second code generation pass. */
888
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
889
compiler->size += (compiler->patches << 1);
890
if (compiler->cpool_fill > 0)
891
compiler->size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1;
892
#else /* !SLJIT_CONFIG_ARM_V6 */
893
reduce_code_size(compiler);
894
#endif /* SLJIT_CONFIG_ARM_V6 */
895
code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset);
896
PTR_FAIL_WITH_EXEC_IF(code);
897
898
reverse_buf(compiler);
899
buf = compiler->buf;
900
901
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
902
cpool_size = 0;
903
cpool_skip_alignment = 0;
904
cpool_current_index = 0;
905
cpool_start_address = NULL;
906
first_patch = NULL;
907
last_pc_patch = code;
908
#endif /* SLJIT_CONFIG_ARM_V6 */
909
910
code_ptr = code;
911
word_count = 0;
912
label = compiler->labels;
913
jump = compiler->jumps;
914
const_ = compiler->consts;
915
SLJIT_NEXT_INIT_TYPES();
916
SLJIT_GET_NEXT_MIN();
917
918
do {
919
buf_ptr = (sljit_ins*)buf->memory;
920
buf_end = buf_ptr + (buf->used_size >> 2);
921
do {
922
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
923
if (cpool_size > 0) {
924
if (cpool_skip_alignment > 0) {
925
buf_ptr++;
926
cpool_skip_alignment--;
927
} else {
928
if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
929
SLJIT_FREE_EXEC(code, exec_allocator_data);
930
compiler->error = SLJIT_ERR_ALLOC_FAILED;
931
return NULL;
932
}
933
buf_ptr++;
934
if (++cpool_current_index >= cpool_size) {
935
SLJIT_ASSERT(!first_patch);
936
cpool_size = 0;
937
}
938
}
939
} else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
940
#endif /* SLJIT_CONFIG_ARM_V6 */
941
*code_ptr = *buf_ptr++;
942
if (next_min_addr == word_count) {
943
SLJIT_ASSERT(!label || label->size >= word_count);
944
SLJIT_ASSERT(!jump || jump->addr >= word_count);
945
SLJIT_ASSERT(!const_ || const_->addr >= word_count);
946
947
if (next_min_addr == next_label_size) {
948
label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
949
label->size = (sljit_uw)(code_ptr - code);
950
label = label->next;
951
next_label_size = SLJIT_GET_NEXT_SIZE(label);
952
}
953
954
/* These structures are ordered by their address. */
955
if (next_min_addr == next_jump_addr) {
956
if (!(jump->flags & JUMP_MOV_ADDR)) {
957
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
958
if (detect_jump_type(jump, code_ptr, code, executable_offset))
959
code_ptr--;
960
jump->addr = (sljit_uw)code_ptr;
961
#else /* !SLJIT_CONFIG_ARM_V6 */
962
word_count += jump->flags >> JUMP_SIZE_SHIFT;
963
if (!detect_jump_type(jump, code_ptr, code, executable_offset)) {
964
code_ptr[2] = code_ptr[0];
965
addr = ((code_ptr[0] & 0xf) << 12);
966
code_ptr[0] = MOVW | addr;
967
code_ptr[1] = MOVT | addr;
968
code_ptr += 2;
969
}
970
SLJIT_ASSERT((sljit_uw)code_ptr - jump->addr <= (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins));
971
#endif /* SLJIT_CONFIG_ARM_V6 */
972
} else {
973
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
974
word_count += jump->flags >> JUMP_SIZE_SHIFT;
975
#endif /* SLJIT_CONFIG_ARM_V7 */
976
addr = (sljit_uw)code_ptr;
977
code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
978
jump->addr = addr;
979
}
980
jump = jump->next;
981
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
982
} else if (next_min_addr == next_const_addr) {
983
const_->addr = (sljit_uw)code_ptr;
984
const_ = const_->next;
985
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
986
}
987
988
SLJIT_GET_NEXT_MIN();
989
}
990
code_ptr++;
991
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
992
} else {
993
/* Fortunately, no need to shift. */
994
cpool_size = *buf_ptr++ & ~PUSH_POOL;
995
SLJIT_ASSERT(cpool_size > 0);
996
cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1);
997
cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size);
998
if (cpool_current_index > 0) {
999
/* Unconditional branch. */
1000
*code_ptr = B | (((sljit_ins)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
1001
code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index);
1002
}
1003
cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1;
1004
cpool_current_index = 0;
1005
last_pc_patch = code_ptr;
1006
}
1007
#endif /* SLJIT_CONFIG_ARM_V6 */
1008
word_count++;
1009
} while (buf_ptr < buf_end);
1010
buf = buf->next;
1011
} while (buf);
1012
1013
if (label && label->size == word_count) {
1014
label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1015
label->size = (sljit_uw)(code_ptr - code);
1016
label = label->next;
1017
}
1018
1019
SLJIT_ASSERT(!label);
1020
SLJIT_ASSERT(!jump);
1021
SLJIT_ASSERT(!const_);
1022
1023
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1024
SLJIT_ASSERT(cpool_size == 0);
1025
if (compiler->cpool_fill > 0) {
1026
cpool_start_address = ALIGN_INSTRUCTION(code_ptr);
1027
cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill);
1028
if (cpool_current_index > 0)
1029
code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index);
1030
1031
buf_ptr = compiler->cpool;
1032
buf_end = buf_ptr + compiler->cpool_fill;
1033
cpool_current_index = 0;
1034
while (buf_ptr < buf_end) {
1035
if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
1036
SLJIT_FREE_EXEC(code, exec_allocator_data);
1037
compiler->error = SLJIT_ERR_ALLOC_FAILED;
1038
return NULL;
1039
}
1040
buf_ptr++;
1041
cpool_current_index++;
1042
}
1043
SLJIT_ASSERT(!first_patch);
1044
}
1045
#endif
1046
1047
jump = compiler->jumps;
1048
while (jump) {
1049
addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
1050
buf_ptr = (sljit_ins*)jump->addr;
1051
1052
if (jump->flags & JUMP_MOV_ADDR) {
1053
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1054
SLJIT_ASSERT((buf_ptr[0] & (sljit_ins)0xffff0000) == 0xe59f0000);
1055
#else /* !SLJIT_CONFIG_ARM_V6 */
1056
SLJIT_ASSERT((buf_ptr[0] & ~(sljit_ins)0xf000) == 0);
1057
#endif /* SLJIT_CONFIG_ARM_V6 */
1058
1059
if (jump->flags & PATCH_B) {
1060
SLJIT_ASSERT((((sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset)) & 0x3) == 0);
1061
diff = ((sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset)) >> 2;
1062
1063
SLJIT_ASSERT(diff <= 0xff && diff >= -0xff);
1064
1065
addr = ADD;
1066
if (diff < 0) {
1067
diff = -diff;
1068
addr = SUB;
1069
}
1070
1071
buf_ptr[0] = addr | (buf_ptr[0] & 0xf000) | RN(TMP_PC) | (1 << 25) | (0xf << 8) | (sljit_ins)(diff & 0xff);
1072
} else {
1073
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1074
buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr;
1075
#else /* !SLJIT_CONFIG_ARM_V6 */
1076
buf_ptr[1] = MOVT | buf_ptr[0] | ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff);
1077
buf_ptr[0] = MOVW | buf_ptr[0] | ((addr << 4) & 0xf0000) | (addr & 0xfff);
1078
#endif /* SLJIT_CONFIG_ARM_V6 */
1079
}
1080
} else if (jump->flags & PATCH_B) {
1081
diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
1082
SLJIT_ASSERT(diff <= 0x01ffffff && diff >= -0x02000000);
1083
*buf_ptr |= (diff >> 2) & 0x00ffffff;
1084
} else {
1085
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1086
if (jump->flags & IS_BL)
1087
buf_ptr--;
1088
1089
if (jump->flags & SLJIT_REWRITABLE_JUMP) {
1090
jump->addr = (sljit_uw)code_ptr;
1091
code_ptr[0] = (sljit_ins)buf_ptr;
1092
code_ptr[1] = *buf_ptr;
1093
set_jump_addr((sljit_uw)code_ptr, executable_offset, addr, 0);
1094
code_ptr += 2;
1095
} else {
1096
if (*buf_ptr & (1 << 23))
1097
buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
1098
else
1099
buf_ptr += 1;
1100
*buf_ptr = addr;
1101
}
1102
#else /* !SLJIT_CONFIG_ARM_V6 */
1103
set_jump_addr((sljit_uw)buf_ptr, executable_offset, addr, 0);
1104
#endif /* SLJIT_CONFIG_ARM_V6 */
1105
}
1106
1107
jump = jump->next;
1108
}
1109
1110
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1111
const_ = compiler->consts;
1112
while (const_) {
1113
buf_ptr = (sljit_ins*)const_->addr;
1114
const_->addr = (sljit_uw)code_ptr;
1115
1116
code_ptr[0] = (sljit_ins)buf_ptr;
1117
code_ptr[1] = *buf_ptr;
1118
if (*buf_ptr & (1 << 23))
1119
buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
1120
else
1121
buf_ptr += 1;
1122
/* Set the value again (can be a simple constant). */
1123
set_const_value((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0);
1124
code_ptr += 2;
1125
1126
const_ = const_->next;
1127
}
1128
#endif /* SLJIT_CONFIG_ARM_V6 */
1129
1130
SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size);
1131
1132
compiler->error = SLJIT_ERR_COMPILED;
1133
compiler->executable_offset = executable_offset;
1134
compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_uw);
1135
1136
code = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1137
code_ptr = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1138
1139
SLJIT_CACHE_FLUSH(code, code_ptr);
1140
SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1141
return code;
1142
}
1143
1144
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1145
{
1146
switch (feature_type) {
1147
case SLJIT_HAS_FPU:
1148
case SLJIT_HAS_F64_AS_F32_PAIR:
1149
#ifdef SLJIT_IS_FPU_AVAILABLE
1150
return (SLJIT_IS_FPU_AVAILABLE) != 0;
1151
#else
1152
/* Available by default. */
1153
return 1;
1154
#endif /* SLJIT_IS_FPU_AVAILABLE */
1155
case SLJIT_HAS_SIMD:
1156
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1157
return 0;
1158
#else
1159
#ifdef SLJIT_IS_FPU_AVAILABLE
1160
return (SLJIT_IS_FPU_AVAILABLE) != 0;
1161
#else
1162
/* Available by default. */
1163
return 1;
1164
#endif /* SLJIT_IS_FPU_AVAILABLE */
1165
#endif /* SLJIT_CONFIG_ARM_V6 */
1166
1167
case SLJIT_SIMD_REGS_ARE_PAIRS:
1168
case SLJIT_HAS_CLZ:
1169
case SLJIT_HAS_ROT:
1170
case SLJIT_HAS_CMOV:
1171
case SLJIT_HAS_REV:
1172
case SLJIT_HAS_PREFETCH:
1173
case SLJIT_HAS_COPY_F32:
1174
case SLJIT_HAS_COPY_F64:
1175
case SLJIT_HAS_ATOMIC:
1176
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
1177
case SLJIT_HAS_MEMORY_BARRIER:
1178
#endif /* SLJIT_CONFIG_ARM_V7 */
1179
return 1;
1180
1181
case SLJIT_HAS_CTZ:
1182
#if defined(SLJIT_CONFIG_ARM_V6) && SLJIT_CONFIG_ARM_V6
1183
return 2;
1184
#else
1185
return 1;
1186
#endif /* SLJIT_CONFIG_ARM_V6 */
1187
1188
default:
1189
return 0;
1190
}
1191
}
1192
1193
/* --------------------------------------------------------------------- */
1194
/* Entry, exit */
1195
/* --------------------------------------------------------------------- */
1196
1197
/* Creates an index in data_transfer_insts array. */
1198
#define WORD_SIZE 0x00
1199
#define BYTE_SIZE 0x01
1200
#define HALF_SIZE 0x02
1201
#define PRELOAD 0x03
1202
#define SIGNED 0x04
1203
#define LOAD_DATA 0x08
1204
1205
/* Flag bits for emit_op. */
1206
#define ALLOW_IMM 0x10
1207
#define ALLOW_INV_IMM 0x20
1208
#define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM)
1209
#define ALLOW_NEG_IMM 0x40
1210
#define ALLOW_DOUBLE_IMM 0x80
1211
1212
/* s/l - store/load (1 bit)
1213
u/s - signed/unsigned (1 bit)
1214
w/b/h/N - word/byte/half/NOT allowed (2 bit)
1215
Storing signed and unsigned values are the same operations. */
1216
1217
static const sljit_ins data_transfer_insts[16] = {
1218
/* s u w */ 0xe5000000 /* str */,
1219
/* s u b */ 0xe5400000 /* strb */,
1220
/* s u h */ 0xe10000b0 /* strh */,
1221
/* s u N */ 0x00000000 /* not allowed */,
1222
/* s s w */ 0xe5000000 /* str */,
1223
/* s s b */ 0xe5400000 /* strb */,
1224
/* s s h */ 0xe10000b0 /* strh */,
1225
/* s s N */ 0x00000000 /* not allowed */,
1226
1227
/* l u w */ 0xe5100000 /* ldr */,
1228
/* l u b */ 0xe5500000 /* ldrb */,
1229
/* l u h */ 0xe11000b0 /* ldrh */,
1230
/* l u p */ 0xf5500000 /* preload */,
1231
/* l s w */ 0xe5100000 /* ldr */,
1232
/* l s b */ 0xe11000d0 /* ldrsb */,
1233
/* l s h */ 0xe11000f0 /* ldrsh */,
1234
/* l s N */ 0x00000000 /* not allowed */,
1235
};
1236
1237
#define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \
1238
(data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_ins)(arg))
1239
1240
/* Normal ldr/str instruction.
1241
Type2: ldrsb, ldrh, ldrsh */
1242
#define IS_TYPE1_TRANSFER(type) \
1243
(data_transfer_insts[(type) & 0xf] & 0x04000000)
1244
#define TYPE2_TRANSFER_IMM(imm) \
1245
(((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
1246
1247
#define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \
1248
((sljit_ins)(opcode) | (sljit_ins)(mode) | VD(dst) | VM(src1) | VN(src2))
1249
1250
/* Flags for emit_op: */
1251
/* Arguments are swapped. */
1252
#define ARGS_SWAPPED 0x01
1253
/* Inverted immediate. */
1254
#define INV_IMM 0x02
1255
/* Source and destination is register. */
1256
#define REGISTER_OP 0x04
1257
/* Unused return value. */
1258
#define UNUSED_RETURN 0x08
1259
/* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
1260
#define SET_FLAGS (1 << 20)
1261
/* dst: reg
1262
src1: reg
1263
src2: reg or imm (if allowed)
1264
SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
1265
#define SRC2_IMM (1 << 25)
1266
1267
static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
1268
sljit_s32 dst, sljit_sw dstw,
1269
sljit_s32 src1, sljit_sw src1w,
1270
sljit_s32 src2, sljit_sw src2w);
1271
1272
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1273
sljit_s32 options, sljit_s32 arg_types,
1274
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
1275
{
1276
sljit_s32 fscratches;
1277
sljit_s32 fsaveds;
1278
sljit_uw imm, offset;
1279
sljit_s32 i, tmp, size, word_arg_count;
1280
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1281
#ifdef __SOFTFP__
1282
sljit_u32 float_arg_count;
1283
#else
1284
sljit_u32 old_offset, f32_offset;
1285
sljit_u32 remap[3];
1286
sljit_u32 *remap_ptr = remap;
1287
#endif
1288
1289
CHECK_ERROR();
1290
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));
1291
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
1292
1293
scratches = ENTER_GET_REGS(scratches);
1294
saveds = ENTER_GET_REGS(saveds);
1295
fscratches = compiler->fscratches;
1296
fsaveds = compiler->fsaveds;
1297
1298
imm = 0;
1299
tmp = SLJIT_S0 - saveds;
1300
for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--)
1301
imm |= (sljit_uw)1 << reg_map[i];
1302
1303
for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
1304
imm |= (sljit_uw)1 << reg_map[i];
1305
1306
SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
1307
1308
/* Push saved and temporary registers
1309
multiple registers: stmdb sp!, {..., lr}
1310
single register: str reg, [sp, #-4]! */
1311
if (imm != 0)
1312
FAIL_IF(push_inst(compiler, PUSH | (1 << 14) | imm));
1313
else
1314
FAIL_IF(push_inst(compiler, 0xe52d0004 | RD(TMP_REG2)));
1315
1316
/* Stack must be aligned to 8 bytes: */
1317
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
1318
1319
if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1320
if ((size & SSIZE_OF(sw)) != 0) {
1321
FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | sizeof(sljit_sw)));
1322
size += SSIZE_OF(sw);
1323
}
1324
1325
if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1326
FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1327
} else {
1328
if (fsaveds > 0)
1329
FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1)));
1330
if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1331
FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1332
}
1333
}
1334
1335
local_size = ((size + local_size + 0x7) & ~0x7) - size;
1336
compiler->local_size = local_size;
1337
1338
if (options & SLJIT_ENTER_REG_ARG)
1339
arg_types = 0;
1340
1341
arg_types >>= SLJIT_ARG_SHIFT;
1342
word_arg_count = 0;
1343
saved_arg_count = 0;
1344
#ifdef __SOFTFP__
1345
SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
1346
1347
offset = 0;
1348
float_arg_count = 0;
1349
1350
while (arg_types) {
1351
switch (arg_types & SLJIT_ARG_MASK) {
1352
case SLJIT_ARG_TYPE_F64:
1353
if (offset & 0x7)
1354
offset += sizeof(sljit_sw);
1355
1356
if (offset < 4 * sizeof(sljit_sw))
1357
FAIL_IF(push_inst(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
1358
else
1359
FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP)
1360
| (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2)));
1361
float_arg_count++;
1362
offset += sizeof(sljit_f64) - sizeof(sljit_sw);
1363
break;
1364
case SLJIT_ARG_TYPE_F32:
1365
if (offset < 4 * sizeof(sljit_sw))
1366
FAIL_IF(push_inst(compiler, VMOV | (float_arg_count << 16) | (offset << 10)));
1367
else
1368
FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP)
1369
| (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2)));
1370
float_arg_count++;
1371
break;
1372
default:
1373
word_arg_count++;
1374
1375
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1376
tmp = SLJIT_S0 - saved_arg_count;
1377
saved_arg_count++;
1378
} else if (word_arg_count - 1 != (sljit_s32)(offset >> 2))
1379
tmp = word_arg_count;
1380
else
1381
break;
1382
1383
if (offset < 4 * sizeof(sljit_sw))
1384
FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2)));
1385
else
1386
FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_ins)size - 4 * sizeof(sljit_sw))));
1387
break;
1388
}
1389
1390
offset += sizeof(sljit_sw);
1391
arg_types >>= SLJIT_ARG_SHIFT;
1392
}
1393
1394
compiler->args_size = offset;
1395
#else
1396
offset = SLJIT_FR0;
1397
old_offset = SLJIT_FR0;
1398
f32_offset = 0;
1399
1400
while (arg_types) {
1401
switch (arg_types & SLJIT_ARG_MASK) {
1402
case SLJIT_ARG_TYPE_F64:
1403
if (offset != old_offset)
1404
*remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, SLJIT_32, offset, old_offset, 0);
1405
old_offset++;
1406
offset++;
1407
break;
1408
case SLJIT_ARG_TYPE_F32:
1409
if (f32_offset != 0) {
1410
*remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0x20, offset, f32_offset, 0);
1411
f32_offset = 0;
1412
} else {
1413
if (offset != old_offset)
1414
*remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0, offset, old_offset, 0);
1415
f32_offset = old_offset;
1416
old_offset++;
1417
}
1418
offset++;
1419
break;
1420
default:
1421
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1422
FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(SLJIT_R0 + word_arg_count)));
1423
saved_arg_count++;
1424
}
1425
1426
word_arg_count++;
1427
break;
1428
}
1429
arg_types >>= SLJIT_ARG_SHIFT;
1430
}
1431
1432
SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap));
1433
1434
while (remap_ptr > remap)
1435
FAIL_IF(push_inst(compiler, *(--remap_ptr)));
1436
#endif
1437
1438
if (local_size > 0)
1439
FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
1440
1441
return SLJIT_SUCCESS;
1442
}
1443
1444
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1445
sljit_s32 options, sljit_s32 arg_types,
1446
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
1447
{
1448
sljit_s32 fscratches;
1449
sljit_s32 fsaveds;
1450
sljit_s32 size;
1451
1452
CHECK_ERROR();
1453
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));
1454
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
1455
1456
scratches = ENTER_GET_REGS(scratches);
1457
saveds = ENTER_GET_REGS(saveds);
1458
fscratches = compiler->fscratches;
1459
fsaveds = compiler->fsaveds;
1460
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
1461
1462
/* Doubles are saved, so alignment is unaffected. */
1463
if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG))
1464
size += SSIZE_OF(sw);
1465
1466
compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size;
1467
return SLJIT_SUCCESS;
1468
}
1469
1470
static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm)
1471
{
1472
sljit_uw imm2 = get_imm(imm);
1473
1474
if (imm2 == 0)
1475
return emit_op(compiler, SLJIT_ADD, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, (sljit_sw)imm);
1476
1477
return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | imm2);
1478
}
1479
1480
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size)
1481
{
1482
sljit_s32 local_size, fscratches, fsaveds, i, tmp;
1483
sljit_s32 restored_reg = 0;
1484
sljit_s32 lr_dst = TMP_PC;
1485
sljit_uw reg_list = 0;
1486
1487
SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128);
1488
1489
local_size = compiler->local_size;
1490
fscratches = compiler->fscratches;
1491
fsaveds = compiler->fsaveds;
1492
1493
if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1494
if (local_size > 0)
1495
FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1496
1497
if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1498
FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1499
} else {
1500
if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1501
FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1502
if (fsaveds > 0)
1503
FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1)));
1504
}
1505
1506
local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7;
1507
}
1508
1509
if (frame_size < 0) {
1510
lr_dst = TMP_REG2;
1511
frame_size = 0;
1512
} else if (frame_size > 0) {
1513
SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0);
1514
lr_dst = 0;
1515
frame_size &= ~0x7;
1516
}
1517
1518
if (lr_dst != 0)
1519
reg_list |= (sljit_uw)1 << reg_map[lr_dst];
1520
1521
tmp = SLJIT_S0 - compiler->saveds;
1522
i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1523
if (tmp < i) {
1524
restored_reg = i;
1525
do {
1526
reg_list |= (sljit_uw)1 << reg_map[i];
1527
} while (--i > tmp);
1528
}
1529
1530
i = compiler->scratches;
1531
if (i >= SLJIT_FIRST_SAVED_REG) {
1532
restored_reg = i;
1533
do {
1534
reg_list |= (sljit_uw)1 << reg_map[i];
1535
} while (--i >= SLJIT_FIRST_SAVED_REG);
1536
}
1537
1538
if (lr_dst == TMP_REG2 && reg_list == 0) {
1539
restored_reg = TMP_REG2;
1540
lr_dst = 0;
1541
}
1542
1543
if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) {
1544
/* The local_size does not include the saved registers. */
1545
tmp = 0;
1546
if (reg_list != 0) {
1547
tmp = 2;
1548
if (local_size <= 0xfff) {
1549
if (local_size == 0) {
1550
SLJIT_ASSERT(restored_reg != TMP_REG2);
1551
if (frame_size == 0)
1552
return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | 0x800008);
1553
if (frame_size > 2 * SSIZE_OF(sw))
1554
return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw))));
1555
}
1556
1557
FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)local_size));
1558
tmp = 1;
1559
} else if (frame_size == 0) {
1560
frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw);
1561
tmp = 3;
1562
}
1563
1564
/* Place for the saved register. */
1565
if (restored_reg != TMP_REG2)
1566
local_size += SSIZE_OF(sw);
1567
}
1568
1569
/* Place for the lr register. */
1570
local_size += SSIZE_OF(sw);
1571
1572
if (frame_size > local_size)
1573
FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_ins)(frame_size - local_size)));
1574
else if (frame_size < local_size)
1575
FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size)));
1576
1577
if (tmp <= 1)
1578
return SLJIT_SUCCESS;
1579
1580
if (tmp == 2) {
1581
frame_size -= SSIZE_OF(sw);
1582
if (restored_reg != TMP_REG2)
1583
frame_size -= SSIZE_OF(sw);
1584
1585
return push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)frame_size);
1586
}
1587
1588
tmp = (restored_reg == TMP_REG2) ? 0x800004 : 0x800008;
1589
return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)tmp);
1590
}
1591
1592
if (local_size > 0)
1593
FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1594
1595
/* Pop saved and temporary registers
1596
multiple registers: ldmia sp!, {...}
1597
single register: ldr reg, [sp], #4 */
1598
if ((reg_list & (reg_list - 1)) == 0) {
1599
SLJIT_ASSERT(lr_dst != 0);
1600
SLJIT_ASSERT(reg_list == (sljit_uw)1 << reg_map[lr_dst]);
1601
1602
return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(lr_dst) | 0x800004);
1603
}
1604
1605
FAIL_IF(push_inst(compiler, POP | reg_list));
1606
1607
if (frame_size > 0)
1608
return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_ins)frame_size - sizeof(sljit_sw)));
1609
1610
if (lr_dst != 0)
1611
return SLJIT_SUCCESS;
1612
1613
return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | sizeof(sljit_sw));
1614
}
1615
1616
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1617
{
1618
CHECK_ERROR();
1619
CHECK(check_sljit_emit_return_void(compiler));
1620
1621
return emit_stack_frame_release(compiler, 0);
1622
}
1623
1624
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1625
sljit_s32 src, sljit_sw srcw)
1626
{
1627
CHECK_ERROR();
1628
CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1629
1630
if (src & SLJIT_MEM) {
1631
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
1632
src = TMP_REG1;
1633
srcw = 0;
1634
} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1635
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
1636
src = TMP_REG1;
1637
srcw = 0;
1638
}
1639
1640
FAIL_IF(emit_stack_frame_release(compiler, 1));
1641
1642
SLJIT_SKIP_CHECKS(compiler);
1643
return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1644
}
1645
1646
/* --------------------------------------------------------------------- */
1647
/* Operators */
1648
/* --------------------------------------------------------------------- */
1649
1650
static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1651
sljit_uw dst, sljit_uw src1, sljit_uw src2)
1652
{
1653
sljit_s32 reg, is_masked;
1654
sljit_uw shift_type;
1655
1656
switch (op) {
1657
case SLJIT_MOV:
1658
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1659
if (dst != src2) {
1660
if (src2 & SRC2_IMM) {
1661
return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1662
}
1663
return push_inst(compiler, MOV | RD(dst) | RM(src2));
1664
}
1665
return SLJIT_SUCCESS;
1666
1667
case SLJIT_MOV_U8:
1668
case SLJIT_MOV_S8:
1669
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1670
if (flags & REGISTER_OP)
1671
return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2));
1672
1673
if (dst != src2) {
1674
SLJIT_ASSERT(src2 & SRC2_IMM);
1675
return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1676
}
1677
return SLJIT_SUCCESS;
1678
1679
case SLJIT_MOV_U16:
1680
case SLJIT_MOV_S16:
1681
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1682
if (flags & REGISTER_OP)
1683
return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2));
1684
1685
if (dst != src2) {
1686
SLJIT_ASSERT(src2 & SRC2_IMM);
1687
return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1688
}
1689
return SLJIT_SUCCESS;
1690
1691
case SLJIT_CLZ:
1692
SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
1693
FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
1694
return SLJIT_SUCCESS;
1695
1696
case SLJIT_CTZ:
1697
SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
1698
SLJIT_ASSERT(src1 == TMP_REG1 && src2 != TMP_REG2 && !(flags & ARGS_SWAPPED));
1699
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1700
FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0));
1701
FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RN(src2) | RM(TMP_REG2)));
1702
FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG1)));
1703
FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32));
1704
return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f);
1705
#else /* !SLJIT_CONFIG_ARM_V6 */
1706
FAIL_IF(push_inst(compiler, RBIT | RD(dst) | RM(src2)));
1707
return push_inst(compiler, CLZ | RD(dst) | RM(dst));
1708
#endif /* SLJIT_CONFIG_ARM_V6 */
1709
1710
case SLJIT_REV:
1711
case SLJIT_REV_U32:
1712
case SLJIT_REV_S32:
1713
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1714
return push_inst(compiler, REV | RD(dst) | RM(src2));
1715
1716
case SLJIT_REV_U16:
1717
case SLJIT_REV_S16:
1718
SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1719
FAIL_IF(push_inst(compiler, REV16 | RD(dst) | RM(src2)));
1720
if (!(flags & REGISTER_OP))
1721
return SLJIT_SUCCESS;
1722
return push_inst(compiler, (op == SLJIT_REV_U16 ? UXTH : SXTH) | RD(dst) | RM(dst));
1723
case SLJIT_ADD:
1724
SLJIT_ASSERT(!(flags & INV_IMM));
1725
1726
if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
1727
return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1728
return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1729
1730
case SLJIT_ADDC:
1731
SLJIT_ASSERT(!(flags & INV_IMM));
1732
return push_inst(compiler, ADC | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1733
1734
case SLJIT_SUB:
1735
SLJIT_ASSERT(!(flags & INV_IMM));
1736
1737
if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
1738
return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1739
1740
return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS)
1741
| RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1742
1743
case SLJIT_SUBC:
1744
SLJIT_ASSERT(!(flags & INV_IMM));
1745
return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SBC : RSC) | (flags & SET_FLAGS)
1746
| RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1747
1748
case SLJIT_MUL:
1749
SLJIT_ASSERT(!(flags & INV_IMM));
1750
SLJIT_ASSERT(!(src2 & SRC2_IMM));
1751
compiler->status_flags_state = 0;
1752
1753
if (!(flags & SET_FLAGS))
1754
return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1));
1755
1756
reg = dst == TMP_REG1 ? TMP_REG2 : TMP_REG1;
1757
FAIL_IF(push_inst(compiler, SMULL | RN(reg) | RD(dst) | RM8(src2) | RM(src1)));
1758
1759
/* cmp TMP_REG1, dst asr #31. */
1760
return push_inst(compiler, CMP | SET_FLAGS | RN(reg) | RM(dst) | 0xfc0);
1761
1762
case SLJIT_AND:
1763
if ((flags & (UNUSED_RETURN | INV_IMM)) == UNUSED_RETURN)
1764
return push_inst(compiler, TST | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1765
return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS)
1766
| RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1767
1768
case SLJIT_OR:
1769
SLJIT_ASSERT(!(flags & INV_IMM));
1770
return push_inst(compiler, ORR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1771
1772
case SLJIT_XOR:
1773
if (flags & INV_IMM) {
1774
SLJIT_ASSERT(src2 == SRC2_IMM);
1775
return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src1));
1776
}
1777
return push_inst(compiler, EOR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1778
1779
case SLJIT_SHL:
1780
case SLJIT_MSHL:
1781
shift_type = 0;
1782
is_masked = op == SLJIT_MSHL;
1783
break;
1784
1785
case SLJIT_LSHR:
1786
case SLJIT_MLSHR:
1787
shift_type = 1;
1788
is_masked = op == SLJIT_MLSHR;
1789
break;
1790
1791
case SLJIT_ASHR:
1792
case SLJIT_MASHR:
1793
shift_type = 2;
1794
is_masked = op == SLJIT_MASHR;
1795
break;
1796
1797
case SLJIT_ROTL:
1798
if (compiler->shift_imm == 0x20) {
1799
FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0));
1800
src2 = TMP_REG2;
1801
} else
1802
compiler->shift_imm = (sljit_uw)(-(sljit_sw)compiler->shift_imm) & 0x1f;
1803
/* fallthrough */
1804
1805
case SLJIT_ROTR:
1806
shift_type = 3;
1807
is_masked = 0;
1808
break;
1809
1810
case SLJIT_MULADD:
1811
return push_inst(compiler, MLA | RN(dst) | RD(dst) | RM8(src2) | RM(src1));
1812
1813
default:
1814
SLJIT_UNREACHABLE();
1815
return SLJIT_SUCCESS;
1816
}
1817
1818
SLJIT_ASSERT(!(flags & ARGS_SWAPPED) && !(flags & INV_IMM) && !(src2 & SRC2_IMM));
1819
1820
if (compiler->shift_imm != 0x20) {
1821
SLJIT_ASSERT(src1 == TMP_REG1);
1822
1823
if (compiler->shift_imm != 0)
1824
return push_inst(compiler, MOV | (flags & SET_FLAGS) |
1825
RD(dst) | (compiler->shift_imm << 7) | (shift_type << 5) | RM(src2));
1826
return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2));
1827
}
1828
1829
SLJIT_ASSERT(src1 != TMP_REG2);
1830
1831
if (is_masked) {
1832
FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | SRC2_IMM | 0x1f));
1833
src2 = TMP_REG2;
1834
}
1835
1836
return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst)
1837
| RM8(src2) | (sljit_ins)(shift_type << 5) | 0x10 | RM(src1));
1838
}
1839
1840
#undef EMIT_SHIFT_INS_AND_RETURN
1841
1842
/* Tests whether the immediate can be stored in the 12 bit imm field.
1843
Returns with 0 if not possible. */
1844
static sljit_uw get_imm(sljit_uw imm)
1845
{
1846
sljit_u32 rol;
1847
1848
if (imm <= 0xff)
1849
return SRC2_IMM | imm;
1850
1851
if (!(imm & 0xff000000)) {
1852
imm <<= 8;
1853
rol = 8;
1854
} else {
1855
imm = (imm << 24) | (imm >> 8);
1856
rol = 0;
1857
}
1858
1859
if (!(imm & 0xff000000)) {
1860
imm <<= 8;
1861
rol += 4;
1862
}
1863
1864
if (!(imm & 0xf0000000)) {
1865
imm <<= 4;
1866
rol += 2;
1867
}
1868
1869
if (!(imm & 0xc0000000)) {
1870
imm <<= 2;
1871
rol += 1;
1872
}
1873
1874
if (!(imm & 0x00ffffff))
1875
return SRC2_IMM | (imm >> 24) | (rol << 8);
1876
return 0;
1877
}
1878
1879
static sljit_uw compute_imm(sljit_uw imm, sljit_uw* imm2)
1880
{
1881
sljit_uw mask;
1882
sljit_uw imm1;
1883
sljit_uw rol;
1884
1885
/* Step1: Search a zero byte (8 continous zero bit). */
1886
mask = 0xff000000;
1887
rol = 8;
1888
while (1) {
1889
if (!(imm & mask)) {
1890
/* Rol imm by rol. */
1891
imm = (imm << rol) | (imm >> (32 - rol));
1892
/* Calculate arm rol. */
1893
rol = 4 + (rol >> 1);
1894
break;
1895
}
1896
1897
rol += 2;
1898
mask >>= 2;
1899
if (mask & 0x3) {
1900
/* rol by 8. */
1901
imm = (imm << 8) | (imm >> 24);
1902
mask = 0xff00;
1903
rol = 24;
1904
while (1) {
1905
if (!(imm & mask)) {
1906
/* Rol imm by rol. */
1907
imm = (imm << rol) | (imm >> (32 - rol));
1908
/* Calculate arm rol. */
1909
rol = (rol >> 1) - 8;
1910
break;
1911
}
1912
rol += 2;
1913
mask >>= 2;
1914
if (mask & 0x3)
1915
return 0;
1916
}
1917
break;
1918
}
1919
}
1920
1921
/* The low 8 bit must be zero. */
1922
SLJIT_ASSERT(!(imm & 0xff));
1923
1924
if (!(imm & 0xff000000)) {
1925
imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8);
1926
*imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
1927
} else if (imm & 0xc0000000) {
1928
imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1929
imm <<= 8;
1930
rol += 4;
1931
1932
if (!(imm & 0xff000000)) {
1933
imm <<= 8;
1934
rol += 4;
1935
}
1936
1937
if (!(imm & 0xf0000000)) {
1938
imm <<= 4;
1939
rol += 2;
1940
}
1941
1942
if (!(imm & 0xc0000000)) {
1943
imm <<= 2;
1944
rol += 1;
1945
}
1946
1947
if (!(imm & 0x00ffffff))
1948
*imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1949
else
1950
return 0;
1951
} else {
1952
if (!(imm & 0xf0000000)) {
1953
imm <<= 4;
1954
rol += 2;
1955
}
1956
1957
if (!(imm & 0xc0000000)) {
1958
imm <<= 2;
1959
rol += 1;
1960
}
1961
1962
imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1963
imm <<= 8;
1964
rol += 4;
1965
1966
if (!(imm & 0xf0000000)) {
1967
imm <<= 4;
1968
rol += 2;
1969
}
1970
1971
if (!(imm & 0xc0000000)) {
1972
imm <<= 2;
1973
rol += 1;
1974
}
1975
1976
if (!(imm & 0x00ffffff))
1977
*imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1978
else
1979
return 0;
1980
}
1981
1982
return imm1;
1983
}
1984
1985
static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm)
1986
{
1987
sljit_uw tmp;
1988
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1989
sljit_uw imm1, imm2;
1990
#else /* !SLJIT_CONFIG_ARM_V6 */
1991
if (!(imm & ~(sljit_uw)0xffff))
1992
return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff));
1993
#endif /* SLJIT_CONFIG_ARM_V6 */
1994
1995
/* Create imm by 1 inst. */
1996
tmp = get_imm(imm);
1997
if (tmp)
1998
return push_inst(compiler, MOV | RD(reg) | tmp);
1999
2000
tmp = get_imm(~imm);
2001
if (tmp)
2002
return push_inst(compiler, MVN | RD(reg) | tmp);
2003
2004
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
2005
/* Create imm by 2 inst. */
2006
imm1 = compute_imm(imm, &imm2);
2007
if (imm1 != 0) {
2008
FAIL_IF(push_inst(compiler, MOV | RD(reg) | imm1));
2009
return push_inst(compiler, ORR | RD(reg) | RN(reg) | imm2);
2010
}
2011
2012
imm1 = compute_imm(~imm, &imm2);
2013
if (imm1 != 0) {
2014
FAIL_IF(push_inst(compiler, MVN | RD(reg) | imm1));
2015
return push_inst(compiler, BIC | RD(reg) | RN(reg) | imm2);
2016
}
2017
2018
/* Load integer. */
2019
return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), imm);
2020
#else /* !SLJIT_CONFIG_ARM_V6 */
2021
FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
2022
if (imm <= 0xffff)
2023
return SLJIT_SUCCESS;
2024
return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
2025
#endif /* SLJIT_CONFIG_ARM_V6 */
2026
}
2027
2028
static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
2029
sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
2030
{
2031
sljit_uw imm, offset_reg, tmp;
2032
sljit_sw mask = IS_TYPE1_TRANSFER(flags) ? 0xfff : 0xff;
2033
sljit_sw sign = IS_TYPE1_TRANSFER(flags) ? 0x1000 : 0x100;
2034
2035
SLJIT_ASSERT(arg & SLJIT_MEM);
2036
SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -mask && argw <= mask));
2037
2038
if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
2039
tmp = (sljit_uw)(argw & (sign | mask));
2040
tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
2041
2042
FAIL_IF(load_immediate(compiler, tmp_reg, tmp));
2043
2044
argw -= (sljit_sw)tmp;
2045
tmp = 1;
2046
2047
if (argw < 0) {
2048
argw = -argw;
2049
tmp = 0;
2050
}
2051
2052
return push_inst(compiler, EMIT_DATA_TRANSFER(flags, tmp, reg, tmp_reg,
2053
(mask == 0xff) ? TYPE2_TRANSFER_IMM(argw) : argw));
2054
}
2055
2056
if (arg & OFFS_REG_MASK) {
2057
offset_reg = OFFS_REG(arg);
2058
arg &= REG_MASK;
2059
argw &= 0x3;
2060
2061
if (argw != 0 && (mask == 0xff)) {
2062
FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_ins)argw << 7)));
2063
return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
2064
}
2065
2066
/* Bit 25: RM is offset. */
2067
return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
2068
RM(offset_reg) | (mask == 0xff ? 0 : (1 << 25)) | ((sljit_ins)argw << 7)));
2069
}
2070
2071
arg &= REG_MASK;
2072
2073
if (argw > mask) {
2074
tmp = (sljit_uw)(argw & (sign | mask));
2075
tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
2076
imm = get_imm(tmp);
2077
2078
if (imm) {
2079
FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm));
2080
argw -= (sljit_sw)tmp;
2081
arg = tmp_reg;
2082
2083
SLJIT_ASSERT(argw >= -mask && argw <= mask);
2084
}
2085
} else if (argw < -mask) {
2086
tmp = (sljit_uw)(-argw & (sign | mask));
2087
tmp = (sljit_uw)((-argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
2088
imm = get_imm(tmp);
2089
2090
if (imm) {
2091
FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm));
2092
argw += (sljit_sw)tmp;
2093
arg = tmp_reg;
2094
2095
SLJIT_ASSERT(argw >= -mask && argw <= mask);
2096
}
2097
}
2098
2099
if (argw <= mask && argw >= -mask) {
2100
if (argw >= 0) {
2101
if (mask == 0xff)
2102
argw = TYPE2_TRANSFER_IMM(argw);
2103
return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, argw));
2104
}
2105
2106
argw = -argw;
2107
2108
if (mask == 0xff)
2109
argw = TYPE2_TRANSFER_IMM(argw);
2110
2111
return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, argw));
2112
}
2113
2114
FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
2115
return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
2116
RM(tmp_reg) | (mask == 0xff ? 0 : (1 << 25))));
2117
}
2118
2119
static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
2120
sljit_s32 dst, sljit_sw dstw,
2121
sljit_s32 src1, sljit_sw src1w,
2122
sljit_s32 src2, sljit_sw src2w)
2123
{
2124
/* src1 is reg or TMP_REG1
2125
src2 is reg, TMP_REG2, or imm
2126
result goes to TMP_REG2, so put result can use TMP_REG1. */
2127
2128
/* We prefers register and simple consts. */
2129
sljit_s32 dst_reg;
2130
sljit_s32 src1_reg = 0;
2131
sljit_s32 src2_reg = 0;
2132
sljit_s32 src2_tmp_reg = 0;
2133
sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
2134
sljit_s32 neg_op = 0;
2135
sljit_u32 imm2;
2136
2137
op = GET_OPCODE(op);
2138
2139
if (flags & SET_FLAGS)
2140
inp_flags &= ~ALLOW_DOUBLE_IMM;
2141
2142
if (dst == TMP_REG1)
2143
flags |= UNUSED_RETURN;
2144
2145
SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM));
2146
2147
if (inp_flags & ALLOW_NEG_IMM) {
2148
switch (op) {
2149
case SLJIT_ADD:
2150
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
2151
neg_op = SLJIT_SUB;
2152
break;
2153
case SLJIT_ADDC:
2154
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
2155
neg_op = SLJIT_SUBC;
2156
break;
2157
case SLJIT_SUB:
2158
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
2159
neg_op = SLJIT_ADD;
2160
break;
2161
case SLJIT_SUBC:
2162
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
2163
neg_op = SLJIT_ADDC;
2164
break;
2165
}
2166
}
2167
2168
do {
2169
if (!(inp_flags & ALLOW_IMM))
2170
break;
2171
2172
if (src2 == SLJIT_IMM) {
2173
src2_reg = (sljit_s32)get_imm((sljit_uw)src2w);
2174
if (src2_reg)
2175
break;
2176
2177
if (inp_flags & ALLOW_INV_IMM) {
2178
src2_reg = (sljit_s32)get_imm(~(sljit_uw)src2w);
2179
if (src2_reg) {
2180
flags |= INV_IMM;
2181
break;
2182
}
2183
}
2184
2185
if (neg_op != 0) {
2186
src2_reg = (sljit_s32)get_imm((neg_op == SLJIT_ADD || neg_op == SLJIT_SUB) ? (sljit_uw)-src2w : ~(sljit_uw)src2w);
2187
if (src2_reg) {
2188
op = neg_op | GET_ALL_FLAGS(op);
2189
break;
2190
}
2191
}
2192
}
2193
2194
if (src1 == SLJIT_IMM) {
2195
src2_reg = (sljit_s32)get_imm((sljit_uw)src1w);
2196
if (src2_reg) {
2197
flags |= ARGS_SWAPPED;
2198
src1 = src2;
2199
src1w = src2w;
2200
break;
2201
}
2202
2203
if (inp_flags & ALLOW_INV_IMM) {
2204
src2_reg = (sljit_s32)get_imm(~(sljit_uw)src1w);
2205
if (src2_reg) {
2206
flags |= ARGS_SWAPPED | INV_IMM;
2207
src1 = src2;
2208
src1w = src2w;
2209
break;
2210
}
2211
}
2212
2213
if (neg_op >= SLJIT_SUB) {
2214
/* Note: additive operation (commutative). */
2215
SLJIT_ASSERT(op == SLJIT_ADD || op == SLJIT_ADDC);
2216
2217
src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w);
2218
if (src2_reg) {
2219
src1 = src2;
2220
src1w = src2w;
2221
op = neg_op | GET_ALL_FLAGS(op);
2222
break;
2223
}
2224
}
2225
}
2226
} while(0);
2227
2228
/* Destination. */
2229
dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2230
2231
if (op <= SLJIT_MOV_P) {
2232
if (dst & SLJIT_MEM) {
2233
if (inp_flags & BYTE_SIZE)
2234
inp_flags &= ~SIGNED;
2235
2236
if (FAST_IS_REG(src2))
2237
return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG1);
2238
}
2239
2240
if (FAST_IS_REG(src2) && dst_reg != TMP_REG2)
2241
flags |= REGISTER_OP;
2242
2243
src2_tmp_reg = dst_reg;
2244
} else {
2245
if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
2246
if (!(dst & SLJIT_MEM) && (!(src2 & SLJIT_MEM) || op == SLJIT_REV_S16))
2247
flags |= REGISTER_OP;
2248
}
2249
2250
src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
2251
}
2252
2253
if (src2_reg == 0 && (src2 & SLJIT_MEM)) {
2254
src2_reg = src2_tmp_reg;
2255
FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG1));
2256
}
2257
2258
/* Source 1. */
2259
if (FAST_IS_REG(src1))
2260
src1_reg = src1;
2261
else if (src1 & SLJIT_MEM) {
2262
FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
2263
src1_reg = TMP_REG1;
2264
} else if (!(inp_flags & ALLOW_DOUBLE_IMM) || src2_reg != 0 || op == SLJIT_SUB || op == SLJIT_SUBC) {
2265
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2266
src1_reg = TMP_REG1;
2267
}
2268
2269
/* Source 2. */
2270
if (src2_reg == 0) {
2271
src2_reg = src2_tmp_reg;
2272
2273
if (FAST_IS_REG(src2))
2274
src2_reg = src2;
2275
else if (!(inp_flags & ALLOW_DOUBLE_IMM))
2276
FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w));
2277
else {
2278
SLJIT_ASSERT(!(flags & SET_FLAGS));
2279
2280
if (src1_reg == 0) {
2281
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2282
src1_reg = TMP_REG1;
2283
}
2284
2285
src2_reg = (sljit_s32)compute_imm((sljit_uw)src2w, &imm2);
2286
2287
if (src2_reg == 0 && neg_op != 0) {
2288
src2_reg = (sljit_s32)compute_imm((sljit_uw)-src2w, &imm2);
2289
if (src2_reg != 0)
2290
op = neg_op;
2291
}
2292
2293
if (src2_reg == 0) {
2294
FAIL_IF(load_immediate(compiler, src2_tmp_reg, (sljit_uw)src2w));
2295
src2_reg = src2_tmp_reg;
2296
} else {
2297
FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg));
2298
src1_reg = dst_reg;
2299
src2_reg = (sljit_s32)imm2;
2300
2301
if (op == SLJIT_ADDC)
2302
op = SLJIT_ADD;
2303
else if (op == SLJIT_SUBC)
2304
op = SLJIT_SUB;
2305
}
2306
}
2307
}
2308
2309
if (src1_reg == 0) {
2310
SLJIT_ASSERT((inp_flags & ALLOW_DOUBLE_IMM) && !(flags & SET_FLAGS));
2311
2312
src1_reg = (sljit_s32)compute_imm((sljit_uw)src1w, &imm2);
2313
2314
if (src1_reg == 0 && neg_op != 0) {
2315
src1_reg = (sljit_s32)compute_imm((sljit_uw)-src1w, &imm2);
2316
if (src1_reg != 0)
2317
op = neg_op;
2318
}
2319
2320
if (src1_reg == 0) {
2321
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2322
src1_reg = TMP_REG1;
2323
} else {
2324
FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src2_reg, (sljit_uw)src1_reg));
2325
src1_reg = dst_reg;
2326
src2_reg = (sljit_s32)imm2;
2327
2328
if (op == SLJIT_ADDC)
2329
op = SLJIT_ADD;
2330
}
2331
}
2332
2333
FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg));
2334
2335
if (!(dst & SLJIT_MEM))
2336
return SLJIT_SUCCESS;
2337
2338
return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1);
2339
}
2340
2341
#ifdef __cplusplus
2342
extern "C" {
2343
#endif
2344
2345
#if defined(__GNUC__)
2346
extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator);
2347
extern int __aeabi_idivmod(int numerator, int denominator);
2348
#else
2349
#error "Software divmod functions are needed"
2350
#endif
2351
2352
#ifdef __cplusplus
2353
}
2354
#endif
2355
2356
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
2357
{
2358
sljit_uw saved_reg_list[3];
2359
sljit_sw saved_reg_count;
2360
2361
CHECK_ERROR();
2362
CHECK(check_sljit_emit_op0(compiler, op));
2363
2364
op = GET_OPCODE(op);
2365
switch (op) {
2366
case SLJIT_BREAKPOINT:
2367
FAIL_IF(push_inst(compiler, BKPT));
2368
break;
2369
case SLJIT_NOP:
2370
FAIL_IF(push_inst(compiler, NOP));
2371
break;
2372
case SLJIT_LMUL_UW:
2373
case SLJIT_LMUL_SW:
2374
return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
2375
| RN(SLJIT_R1) | RD(SLJIT_R0) | RM8(SLJIT_R0) | RM(SLJIT_R1));
2376
case SLJIT_DIVMOD_UW:
2377
case SLJIT_DIVMOD_SW:
2378
case SLJIT_DIV_UW:
2379
case SLJIT_DIV_SW:
2380
SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
2381
SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
2382
2383
saved_reg_count = 0;
2384
if (compiler->scratches >= 4)
2385
saved_reg_list[saved_reg_count++] = 3;
2386
if (compiler->scratches >= 3)
2387
saved_reg_list[saved_reg_count++] = 2;
2388
if (op >= SLJIT_DIV_UW)
2389
saved_reg_list[saved_reg_count++] = 1;
2390
2391
if (saved_reg_count > 0) {
2392
FAIL_IF(push_inst(compiler, STR | 0x2d0000 | (saved_reg_count >= 3 ? 16 : 8)
2393
| (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
2394
if (saved_reg_count >= 2) {
2395
SLJIT_ASSERT(saved_reg_list[1] < 8);
2396
FAIL_IF(push_inst(compiler, STR | 0x8d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */));
2397
}
2398
if (saved_reg_count >= 3) {
2399
SLJIT_ASSERT(saved_reg_list[2] < 8);
2400
FAIL_IF(push_inst(compiler, STR | 0x8d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */));
2401
}
2402
}
2403
2404
#if defined(__GNUC__)
2405
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
2406
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod))));
2407
#else
2408
#error "Software divmod functions are needed"
2409
#endif
2410
2411
if (saved_reg_count > 0) {
2412
if (saved_reg_count >= 3) {
2413
SLJIT_ASSERT(saved_reg_list[2] < 8);
2414
FAIL_IF(push_inst(compiler, LDR | 0x8d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */));
2415
}
2416
if (saved_reg_count >= 2) {
2417
SLJIT_ASSERT(saved_reg_list[1] < 8);
2418
FAIL_IF(push_inst(compiler, LDR | 0x8d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */));
2419
}
2420
return push_inst(compiler, (LDR ^ (1 << 24)) | 0x8d0000 | (sljit_ins)(saved_reg_count >= 3 ? 16 : 8)
2421
| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
2422
}
2423
return SLJIT_SUCCESS;
2424
case SLJIT_MEMORY_BARRIER:
2425
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
2426
return push_inst(compiler, DMB_SY);
2427
#else /* !SLJIT_CONFIG_ARM_V7 */
2428
return SLJIT_ERR_UNSUPPORTED;
2429
#endif /* SLJIT_CONFIG_ARM_V7 */
2430
case SLJIT_ENDBR:
2431
case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
2432
return SLJIT_SUCCESS;
2433
}
2434
2435
return SLJIT_SUCCESS;
2436
}
2437
2438
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2439
sljit_s32 dst, sljit_sw dstw,
2440
sljit_s32 src, sljit_sw srcw)
2441
{
2442
CHECK_ERROR();
2443
CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2444
ADJUST_LOCAL_OFFSET(dst, dstw);
2445
ADJUST_LOCAL_OFFSET(src, srcw);
2446
2447
switch (GET_OPCODE(op)) {
2448
case SLJIT_MOV:
2449
case SLJIT_MOV_U32:
2450
case SLJIT_MOV_S32:
2451
case SLJIT_MOV32:
2452
case SLJIT_MOV_P:
2453
return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
2454
2455
case SLJIT_MOV_U8:
2456
return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
2457
2458
case SLJIT_MOV_S8:
2459
return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
2460
2461
case SLJIT_MOV_U16:
2462
return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
2463
2464
case SLJIT_MOV_S16:
2465
return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
2466
2467
case SLJIT_CLZ:
2468
case SLJIT_CTZ:
2469
case SLJIT_REV:
2470
case SLJIT_REV_U32:
2471
case SLJIT_REV_S32:
2472
return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
2473
2474
case SLJIT_REV_U16:
2475
case SLJIT_REV_S16:
2476
return emit_op(compiler, op, HALF_SIZE, dst, dstw, TMP_REG1, 0, src, srcw);
2477
}
2478
2479
return SLJIT_SUCCESS;
2480
}
2481
2482
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2483
sljit_s32 dst, sljit_sw dstw,
2484
sljit_s32 src1, sljit_sw src1w,
2485
sljit_s32 src2, sljit_sw src2w)
2486
{
2487
sljit_s32 inp_flags;
2488
2489
CHECK_ERROR();
2490
CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2491
ADJUST_LOCAL_OFFSET(dst, dstw);
2492
ADJUST_LOCAL_OFFSET(src1, src1w);
2493
ADJUST_LOCAL_OFFSET(src2, src2w);
2494
2495
switch (GET_OPCODE(op)) {
2496
case SLJIT_ADD:
2497
case SLJIT_ADDC:
2498
case SLJIT_SUB:
2499
case SLJIT_SUBC:
2500
return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w);
2501
2502
case SLJIT_OR:
2503
return emit_op(compiler, op, ALLOW_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w);
2504
2505
case SLJIT_XOR:
2506
inp_flags = ALLOW_IMM | ALLOW_DOUBLE_IMM;
2507
if ((src1 == SLJIT_IMM && src1w == -1) || (src2 == SLJIT_IMM && src2w == -1)) {
2508
inp_flags |= ALLOW_INV_IMM;
2509
}
2510
return emit_op(compiler, op, inp_flags, dst, dstw, src1, src1w, src2, src2w);
2511
2512
case SLJIT_MUL:
2513
return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
2514
2515
case SLJIT_AND:
2516
return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w);
2517
2518
case SLJIT_SHL:
2519
case SLJIT_MSHL:
2520
case SLJIT_LSHR:
2521
case SLJIT_MLSHR:
2522
case SLJIT_ASHR:
2523
case SLJIT_MASHR:
2524
case SLJIT_ROTL:
2525
case SLJIT_ROTR:
2526
if (src2 == SLJIT_IMM) {
2527
compiler->shift_imm = src2w & 0x1f;
2528
return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
2529
} else {
2530
compiler->shift_imm = 0x20;
2531
return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
2532
}
2533
}
2534
2535
return SLJIT_SUCCESS;
2536
}
2537
2538
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2539
sljit_s32 src1, sljit_sw src1w,
2540
sljit_s32 src2, sljit_sw src2w)
2541
{
2542
CHECK_ERROR();
2543
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2544
2545
SLJIT_SKIP_CHECKS(compiler);
2546
return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2547
}
2548
2549
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2550
sljit_s32 dst_reg,
2551
sljit_s32 src1, sljit_sw src1w,
2552
sljit_s32 src2, sljit_sw src2w)
2553
{
2554
CHECK_ERROR();
2555
CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2556
ADJUST_LOCAL_OFFSET(src1, src1w);
2557
ADJUST_LOCAL_OFFSET(src2, src2w);
2558
2559
switch (GET_OPCODE(op)) {
2560
case SLJIT_MULADD:
2561
return emit_op(compiler, op, 0, dst_reg, 0, src1, src1w, src2, src2w);
2562
}
2563
2564
return SLJIT_SUCCESS;
2565
}
2566
2567
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2568
sljit_s32 dst_reg,
2569
sljit_s32 src1_reg,
2570
sljit_s32 src2_reg,
2571
sljit_s32 src3, sljit_sw src3w)
2572
{
2573
sljit_s32 is_left;
2574
2575
CHECK_ERROR();
2576
CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2577
2578
op = GET_OPCODE(op);
2579
is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
2580
2581
if (src1_reg == src2_reg) {
2582
SLJIT_SKIP_CHECKS(compiler);
2583
return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w);
2584
}
2585
2586
ADJUST_LOCAL_OFFSET(src3, src3w);
2587
2588
/* Shift type of ROR is 3. */
2589
if (src3 == SLJIT_IMM) {
2590
src3w &= 0x1f;
2591
2592
if (src3w == 0)
2593
return SLJIT_SUCCESS;
2594
2595
FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src1_reg) | ((sljit_ins)(is_left ? 0 : 1) << 5) | ((sljit_ins)src3w << 7)));
2596
src3w = (src3w ^ 0x1f) + 1;
2597
return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | ((sljit_ins)src3w << 7));
2598
}
2599
2600
if (src3 & SLJIT_MEM) {
2601
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src3, src3w, TMP_REG2));
2602
src3 = TMP_REG2;
2603
}
2604
2605
if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) {
2606
FAIL_IF(push_inst(compiler, AND | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f));
2607
src3 = TMP_REG2;
2608
}
2609
2610
FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM8(src3) | ((sljit_ins)(is_left ? 0 : 1) << 5) | 0x10 | RM(src1_reg)));
2611
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | (1 << 7)));
2612
FAIL_IF(push_inst(compiler, EOR | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f));
2613
return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM8(TMP_REG2) | ((sljit_ins)(is_left ? 1 : 0) << 5) | 0x10 | RM(TMP_REG1));
2614
}
2615
2616
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2617
sljit_s32 src, sljit_sw srcw)
2618
{
2619
CHECK_ERROR();
2620
CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2621
ADJUST_LOCAL_OFFSET(src, srcw);
2622
2623
switch (op) {
2624
case SLJIT_FAST_RETURN:
2625
SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2626
2627
if (FAST_IS_REG(src))
2628
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src)));
2629
else
2630
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1));
2631
2632
return push_inst(compiler, BX | RM(TMP_REG2));
2633
case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2634
return SLJIT_SUCCESS;
2635
case SLJIT_PREFETCH_L1:
2636
case SLJIT_PREFETCH_L2:
2637
case SLJIT_PREFETCH_L3:
2638
case SLJIT_PREFETCH_ONCE:
2639
SLJIT_ASSERT(src & SLJIT_MEM);
2640
return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
2641
}
2642
2643
return SLJIT_SUCCESS;
2644
}
2645
2646
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2647
sljit_s32 dst, sljit_sw dstw)
2648
{
2649
sljit_s32 size, dst_r;
2650
2651
CHECK_ERROR();
2652
CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2653
ADJUST_LOCAL_OFFSET(dst, dstw);
2654
2655
switch (op) {
2656
case SLJIT_FAST_ENTER:
2657
SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2658
2659
if (FAST_IS_REG(dst))
2660
return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2));
2661
break;
2662
case SLJIT_GET_RETURN_ADDRESS:
2663
size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
2664
2665
if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
2666
/* The size of pc is not added above. */
2667
if ((size & SSIZE_OF(sw)) == 0)
2668
size += SSIZE_OF(sw);
2669
2670
size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64);
2671
}
2672
2673
SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0);
2674
2675
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2676
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1));
2677
break;
2678
}
2679
2680
if (dst & SLJIT_MEM)
2681
return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1);
2682
2683
return SLJIT_SUCCESS;
2684
}
2685
2686
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2687
{
2688
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2689
2690
if (type == SLJIT_GP_REGISTER)
2691
return reg_map[reg];
2692
2693
if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
2694
return freg_map[reg];
2695
2696
if (type == SLJIT_SIMD_REG_128)
2697
return freg_map[reg] & ~0x1;
2698
2699
return -1;
2700
}
2701
2702
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2703
void *instruction, sljit_u32 size)
2704
{
2705
SLJIT_UNUSED_ARG(size);
2706
CHECK_ERROR();
2707
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2708
2709
return push_inst(compiler, *(sljit_ins*)instruction);
2710
}
2711
2712
/* --------------------------------------------------------------------- */
2713
/* Floating point operators */
2714
/* --------------------------------------------------------------------- */
2715
2716
#define FPU_LOAD (1 << 20)
2717
#define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
2718
((inst) | (sljit_ins)((add) << 23) | RN(base) | VD(freg) | (sljit_ins)(offs))
2719
2720
static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
2721
{
2722
sljit_uw imm;
2723
sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD));
2724
2725
SLJIT_ASSERT(arg & SLJIT_MEM);
2726
arg &= ~SLJIT_MEM;
2727
2728
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
2729
FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_ins)argw & 0x3) << 7)));
2730
arg = TMP_REG1;
2731
argw = 0;
2732
}
2733
2734
/* Fast loads and stores. */
2735
if (arg) {
2736
if (!(argw & ~0x3fc))
2737
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2));
2738
if (!(-argw & ~0x3fc))
2739
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2));
2740
2741
imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc);
2742
if (imm) {
2743
FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | imm));
2744
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2));
2745
}
2746
imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc);
2747
if (imm) {
2748
argw = -argw;
2749
FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg & REG_MASK) | imm));
2750
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2));
2751
}
2752
}
2753
2754
if (arg) {
2755
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw));
2756
FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(TMP_REG1)));
2757
}
2758
else
2759
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw));
2760
2761
return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0));
2762
}
2763
2764
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2765
sljit_s32 dst, sljit_sw dstw,
2766
sljit_s32 src, sljit_sw srcw)
2767
{
2768
op ^= SLJIT_32;
2769
2770
if (src & SLJIT_MEM) {
2771
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw));
2772
src = TMP_FREG1;
2773
}
2774
2775
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_32, TMP_FREG1, src, 0)));
2776
2777
if (FAST_IS_REG(dst))
2778
return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | VN(TMP_FREG1));
2779
2780
/* Store the integer value from a VFP register. */
2781
return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
2782
}
2783
2784
static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
2785
sljit_s32 dst, sljit_sw dstw,
2786
sljit_s32 src, sljit_sw srcw)
2787
{
2788
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2789
2790
if (FAST_IS_REG(src))
2791
FAIL_IF(push_inst(compiler, VMOV | RD(src) | VN(TMP_FREG1)));
2792
else if (src & SLJIT_MEM) {
2793
/* Load the integer value into a VFP register. */
2794
FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
2795
}
2796
else {
2797
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
2798
FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | VN(TMP_FREG1)));
2799
}
2800
2801
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(ins, ins & SLJIT_32, dst_r, TMP_FREG1, 0)));
2802
2803
if (dst & SLJIT_MEM)
2804
return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw);
2805
return SLJIT_SUCCESS;
2806
}
2807
2808
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2809
sljit_s32 dst, sljit_sw dstw,
2810
sljit_s32 src, sljit_sw srcw)
2811
{
2812
return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2813
}
2814
2815
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2816
sljit_s32 dst, sljit_sw dstw,
2817
sljit_s32 src, sljit_sw srcw)
2818
{
2819
return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2820
}
2821
2822
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2823
sljit_s32 src1, sljit_sw src1w,
2824
sljit_s32 src2, sljit_sw src2w)
2825
{
2826
op ^= SLJIT_32;
2827
2828
if (src1 & SLJIT_MEM) {
2829
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2830
src1 = TMP_FREG1;
2831
}
2832
2833
if (src2 & SLJIT_MEM) {
2834
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2835
src2 = TMP_FREG2;
2836
}
2837
2838
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_32, src1, src2, 0)));
2839
FAIL_IF(push_inst(compiler, VMRS));
2840
2841
if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
2842
return SLJIT_SUCCESS;
2843
2844
return push_inst(compiler, (CMP - CONDITIONAL) | (0x60000000 /* VS */) | SET_FLAGS | RN(TMP_REG1) | RM(TMP_REG1));
2845
}
2846
2847
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2848
sljit_s32 dst, sljit_sw dstw,
2849
sljit_s32 src, sljit_sw srcw)
2850
{
2851
sljit_s32 dst_r;
2852
2853
CHECK_ERROR();
2854
2855
SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error);
2856
SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2857
2858
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2859
2860
if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
2861
op ^= SLJIT_32;
2862
2863
if (src & SLJIT_MEM) {
2864
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw));
2865
src = dst_r;
2866
}
2867
2868
switch (GET_OPCODE(op)) {
2869
case SLJIT_MOV_F64:
2870
if (src != dst_r) {
2871
if (!(dst & SLJIT_MEM))
2872
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_32, dst_r, src, 0)));
2873
else
2874
dst_r = src;
2875
}
2876
break;
2877
case SLJIT_NEG_F64:
2878
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_32, dst_r, src, 0)));
2879
break;
2880
case SLJIT_ABS_F64:
2881
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src, 0)));
2882
break;
2883
case SLJIT_CONV_F64_FROM_F32:
2884
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_32, dst_r, src, 0)));
2885
op ^= SLJIT_32;
2886
break;
2887
}
2888
2889
if (dst & SLJIT_MEM)
2890
return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw);
2891
return SLJIT_SUCCESS;
2892
}
2893
2894
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2895
sljit_s32 dst, sljit_sw dstw,
2896
sljit_s32 src1, sljit_sw src1w,
2897
sljit_s32 src2, sljit_sw src2w)
2898
{
2899
sljit_s32 dst_r;
2900
2901
CHECK_ERROR();
2902
CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2903
ADJUST_LOCAL_OFFSET(dst, dstw);
2904
ADJUST_LOCAL_OFFSET(src1, src1w);
2905
ADJUST_LOCAL_OFFSET(src2, src2w);
2906
2907
op ^= SLJIT_32;
2908
2909
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2910
2911
if (src2 & SLJIT_MEM) {
2912
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2913
src2 = TMP_FREG2;
2914
}
2915
2916
if (src1 & SLJIT_MEM) {
2917
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2918
src1 = TMP_FREG1;
2919
}
2920
2921
switch (GET_OPCODE(op)) {
2922
case SLJIT_ADD_F64:
2923
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_32, dst_r, src2, src1)));
2924
break;
2925
case SLJIT_SUB_F64:
2926
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_32, dst_r, src2, src1)));
2927
break;
2928
case SLJIT_MUL_F64:
2929
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_32, dst_r, src2, src1)));
2930
break;
2931
case SLJIT_DIV_F64:
2932
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_32, dst_r, src2, src1)));
2933
break;
2934
case SLJIT_COPYSIGN_F64:
2935
FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(src2) | RD(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0)));
2936
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src1, 0)));
2937
FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | SRC2_IMM | 0));
2938
return push_inst(compiler, EMIT_FPU_OPERATION((VNEG_F32 & ~COND_MASK) | 0xb0000000, op & SLJIT_32, dst_r, dst_r, 0));
2939
}
2940
2941
if (dst_r != dst)
2942
FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw));
2943
2944
return SLJIT_SUCCESS;
2945
}
2946
2947
#undef EMIT_FPU_DATA_TRANSFER
2948
2949
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2950
sljit_s32 freg, sljit_f32 value)
2951
{
2952
#if defined(__ARM_NEON) && __ARM_NEON
2953
sljit_u32 exp;
2954
sljit_ins ins;
2955
#endif /* NEON */
2956
union {
2957
sljit_u32 imm;
2958
sljit_f32 value;
2959
} u;
2960
2961
CHECK_ERROR();
2962
CHECK(check_sljit_emit_fset32(compiler, freg, value));
2963
2964
u.value = value;
2965
2966
#if defined(__ARM_NEON) && __ARM_NEON
2967
if ((u.imm << (32 - 19)) == 0) {
2968
exp = (u.imm >> (23 + 2)) & 0x3f;
2969
2970
if (exp == 0x20 || exp == 0x1f) {
2971
ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f);
2972
return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
2973
}
2974
}
2975
#endif /* NEON */
2976
2977
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2978
return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG1));
2979
}
2980
2981
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2982
sljit_s32 freg, sljit_f64 value)
2983
{
2984
#if defined(__ARM_NEON) && __ARM_NEON
2985
sljit_u32 exp;
2986
sljit_ins ins;
2987
#endif /* NEON */
2988
union {
2989
sljit_u32 imm[2];
2990
sljit_f64 value;
2991
} u;
2992
2993
CHECK_ERROR();
2994
CHECK(check_sljit_emit_fset64(compiler, freg, value));
2995
2996
u.value = value;
2997
2998
#if defined(__ARM_NEON) && __ARM_NEON
2999
if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
3000
exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
3001
3002
if (exp == 0x100 || exp == 0xff) {
3003
ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f);
3004
return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
3005
}
3006
}
3007
#endif /* NEON */
3008
3009
FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
3010
if (u.imm[0] == u.imm[1])
3011
return push_inst(compiler, VMOV2 | RN(TMP_REG1) | RD(TMP_REG1) | VM(freg));
3012
3013
FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1]));
3014
return push_inst(compiler, VMOV2 | RN(TMP_REG2) | RD(TMP_REG1) | VM(freg));
3015
}
3016
3017
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
3018
sljit_s32 freg, sljit_s32 reg)
3019
{
3020
sljit_s32 reg2;
3021
sljit_ins inst;
3022
3023
CHECK_ERROR();
3024
CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
3025
3026
if (reg & REG_PAIR_MASK) {
3027
reg2 = REG_PAIR_SECOND(reg);
3028
reg = REG_PAIR_FIRST(reg);
3029
3030
inst = VMOV2 | RN(reg) | RD(reg2) | VM(freg);
3031
} else {
3032
inst = VMOV | VN(freg) | RD(reg);
3033
3034
if (!(op & SLJIT_32))
3035
inst |= 1 << 7;
3036
}
3037
3038
if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64)
3039
inst |= 1 << 20;
3040
3041
return push_inst(compiler, inst);
3042
}
3043
3044
/* --------------------------------------------------------------------- */
3045
/* Conditional instructions */
3046
/* --------------------------------------------------------------------- */
3047
3048
static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
3049
{
3050
switch (type) {
3051
case SLJIT_EQUAL:
3052
case SLJIT_ATOMIC_STORED:
3053
case SLJIT_F_EQUAL:
3054
case SLJIT_ORDERED_EQUAL:
3055
case SLJIT_UNORDERED_OR_EQUAL:
3056
return 0x00000000;
3057
3058
case SLJIT_NOT_EQUAL:
3059
case SLJIT_ATOMIC_NOT_STORED:
3060
case SLJIT_F_NOT_EQUAL:
3061
case SLJIT_UNORDERED_OR_NOT_EQUAL:
3062
case SLJIT_ORDERED_NOT_EQUAL:
3063
return 0x10000000;
3064
3065
case SLJIT_CARRY:
3066
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
3067
return 0x20000000;
3068
/* fallthrough */
3069
3070
case SLJIT_LESS:
3071
return 0x30000000;
3072
3073
case SLJIT_NOT_CARRY:
3074
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
3075
return 0x30000000;
3076
/* fallthrough */
3077
3078
case SLJIT_GREATER_EQUAL:
3079
return 0x20000000;
3080
3081
case SLJIT_GREATER:
3082
case SLJIT_UNORDERED_OR_GREATER:
3083
return 0x80000000;
3084
3085
case SLJIT_LESS_EQUAL:
3086
case SLJIT_F_LESS_EQUAL:
3087
case SLJIT_ORDERED_LESS_EQUAL:
3088
return 0x90000000;
3089
3090
case SLJIT_SIG_LESS:
3091
case SLJIT_UNORDERED_OR_LESS:
3092
return 0xb0000000;
3093
3094
case SLJIT_SIG_GREATER_EQUAL:
3095
case SLJIT_F_GREATER_EQUAL:
3096
case SLJIT_ORDERED_GREATER_EQUAL:
3097
return 0xa0000000;
3098
3099
case SLJIT_SIG_GREATER:
3100
case SLJIT_F_GREATER:
3101
case SLJIT_ORDERED_GREATER:
3102
return 0xc0000000;
3103
3104
case SLJIT_SIG_LESS_EQUAL:
3105
case SLJIT_UNORDERED_OR_LESS_EQUAL:
3106
return 0xd0000000;
3107
3108
case SLJIT_OVERFLOW:
3109
if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
3110
return 0x10000000;
3111
/* fallthrough */
3112
3113
case SLJIT_UNORDERED:
3114
return 0x60000000;
3115
3116
case SLJIT_NOT_OVERFLOW:
3117
if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
3118
return 0x00000000;
3119
/* fallthrough */
3120
3121
case SLJIT_ORDERED:
3122
return 0x70000000;
3123
3124
case SLJIT_F_LESS:
3125
case SLJIT_ORDERED_LESS:
3126
return 0x40000000;
3127
3128
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
3129
return 0x50000000;
3130
3131
default:
3132
SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_REG_ARG);
3133
return 0xe0000000;
3134
}
3135
}
3136
3137
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3138
{
3139
struct sljit_label *label;
3140
3141
CHECK_ERROR_PTR();
3142
CHECK_PTR(check_sljit_emit_label(compiler));
3143
3144
if (compiler->last_label && compiler->last_label->size == compiler->size)
3145
return compiler->last_label;
3146
3147
label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3148
PTR_FAIL_IF(!label);
3149
set_label(label, compiler);
3150
return label;
3151
}
3152
3153
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3154
{
3155
struct sljit_jump *jump;
3156
3157
CHECK_ERROR_PTR();
3158
CHECK_PTR(check_sljit_emit_jump(compiler, type));
3159
3160
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3161
PTR_FAIL_IF(!jump);
3162
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3163
type &= 0xff;
3164
3165
SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
3166
3167
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
3168
if (type >= SLJIT_FAST_CALL)
3169
PTR_FAIL_IF(prepare_blx(compiler));
3170
3171
PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3172
type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(compiler, type), 0));
3173
jump->addr = compiler->size - 1;
3174
3175
if (jump->flags & SLJIT_REWRITABLE_JUMP)
3176
compiler->patches++;
3177
3178
if (type >= SLJIT_FAST_CALL) {
3179
jump->flags |= IS_BL;
3180
jump->addr = compiler->size;
3181
PTR_FAIL_IF(emit_blx(compiler));
3182
}
3183
#else /* !SLJIT_CONFIG_ARM_V6 */
3184
jump->addr = compiler->size;
3185
if (type >= SLJIT_FAST_CALL)
3186
jump->flags |= IS_BL;
3187
PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(compiler, type)));
3188
compiler->size += JUMP_MAX_SIZE - 1;
3189
#endif /* SLJIT_CONFIG_ARM_V6 */
3190
return jump;
3191
}
3192
3193
#ifdef __SOFTFP__
3194
3195
static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space)
3196
{
3197
sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN;
3198
sljit_u32 offset = 0;
3199
sljit_u32 word_arg_offset = 0;
3200
sljit_u32 src_offset = 4 * sizeof(sljit_sw);
3201
sljit_u32 float_arg_count = 0;
3202
sljit_s32 types = 0;
3203
sljit_u8 offsets[4];
3204
sljit_u8 *offset_ptr = offsets;
3205
3206
if (src && FAST_IS_REG(*src))
3207
src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw);
3208
3209
arg_types >>= SLJIT_ARG_SHIFT;
3210
3211
while (arg_types) {
3212
types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
3213
3214
switch (arg_types & SLJIT_ARG_MASK) {
3215
case SLJIT_ARG_TYPE_F64:
3216
if (offset & 0x7)
3217
offset += sizeof(sljit_sw);
3218
*offset_ptr++ = (sljit_u8)offset;
3219
offset += sizeof(sljit_f64);
3220
float_arg_count++;
3221
break;
3222
case SLJIT_ARG_TYPE_F32:
3223
*offset_ptr++ = (sljit_u8)offset;
3224
offset += sizeof(sljit_f32);
3225
float_arg_count++;
3226
break;
3227
default:
3228
*offset_ptr++ = (sljit_u8)offset;
3229
offset += sizeof(sljit_sw);
3230
word_arg_offset += sizeof(sljit_sw);
3231
break;
3232
}
3233
3234
arg_types >>= SLJIT_ARG_SHIFT;
3235
}
3236
3237
if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) {
3238
/* Keep lr register on the stack. */
3239
if (is_tail_call)
3240
offset += sizeof(sljit_sw);
3241
3242
offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_u32)0x7;
3243
3244
*extra_space = offset;
3245
3246
if (is_tail_call)
3247
FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset));
3248
else
3249
FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | offset));
3250
} else {
3251
if (is_tail_call)
3252
FAIL_IF(emit_stack_frame_release(compiler, -1));
3253
*extra_space = 0;
3254
}
3255
3256
/* Process arguments in reversed direction. */
3257
while (types) {
3258
switch (types & SLJIT_ARG_MASK) {
3259
case SLJIT_ARG_TYPE_F64:
3260
float_arg_count--;
3261
offset = *(--offset_ptr);
3262
3263
SLJIT_ASSERT((offset & 0x7) == 0);
3264
3265
if (offset < 4 * sizeof(sljit_sw)) {
3266
if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) {
3267
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3268
*src = TMP_REG1;
3269
}
3270
FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
3271
} else
3272
FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP)
3273
| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
3274
break;
3275
case SLJIT_ARG_TYPE_F32:
3276
float_arg_count--;
3277
offset = *(--offset_ptr);
3278
3279
if (offset < 4 * sizeof(sljit_sw)) {
3280
if (src_offset == offset) {
3281
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3282
*src = TMP_REG1;
3283
}
3284
FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10)));
3285
} else
3286
FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP)
3287
| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
3288
break;
3289
default:
3290
word_arg_offset -= sizeof(sljit_sw);
3291
offset = *(--offset_ptr);
3292
3293
SLJIT_ASSERT(offset >= word_arg_offset);
3294
3295
if (offset != word_arg_offset) {
3296
if (offset < 4 * sizeof(sljit_sw)) {
3297
if (src_offset == offset) {
3298
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3299
*src = TMP_REG1;
3300
}
3301
else if (src_offset == word_arg_offset) {
3302
*src = (sljit_s32)(SLJIT_R0 + (offset >> 2));
3303
src_offset = offset;
3304
}
3305
FAIL_IF(push_inst(compiler, MOV | (offset << 10) | (word_arg_offset >> 2)));
3306
} else
3307
FAIL_IF(push_inst(compiler, STR | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (offset - 4 * sizeof(sljit_sw))));
3308
}
3309
break;
3310
}
3311
3312
types >>= SLJIT_ARG_SHIFT;
3313
}
3314
3315
return SLJIT_SUCCESS;
3316
}
3317
3318
static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
3319
{
3320
if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
3321
FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
3322
if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32)
3323
FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12)));
3324
3325
return SLJIT_SUCCESS;
3326
}
3327
3328
#else /* !__SOFTFP__ */
3329
3330
static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
3331
{
3332
sljit_u32 offset = SLJIT_FR0;
3333
sljit_u32 new_offset = SLJIT_FR0;
3334
sljit_u32 f32_offset = 0;
3335
3336
/* Remove return value. */
3337
arg_types >>= SLJIT_ARG_SHIFT;
3338
3339
while (arg_types) {
3340
switch (arg_types & SLJIT_ARG_MASK) {
3341
case SLJIT_ARG_TYPE_F64:
3342
if (offset != new_offset)
3343
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3344
SLJIT_32, new_offset, offset, 0)));
3345
3346
new_offset++;
3347
offset++;
3348
break;
3349
case SLJIT_ARG_TYPE_F32:
3350
if (f32_offset != 0) {
3351
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3352
0x400000, f32_offset, offset, 0)));
3353
f32_offset = 0;
3354
} else {
3355
if (offset != new_offset)
3356
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3357
0, new_offset, offset, 0)));
3358
f32_offset = new_offset;
3359
new_offset++;
3360
}
3361
offset++;
3362
break;
3363
}
3364
arg_types >>= SLJIT_ARG_SHIFT;
3365
}
3366
3367
return SLJIT_SUCCESS;
3368
}
3369
3370
#endif /* __SOFTFP__ */
3371
3372
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3373
sljit_s32 arg_types)
3374
{
3375
#ifdef __SOFTFP__
3376
struct sljit_jump *jump;
3377
sljit_u32 extra_space = (sljit_u32)type;
3378
#endif
3379
3380
CHECK_ERROR_PTR();
3381
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3382
3383
#ifdef __SOFTFP__
3384
if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3385
PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space));
3386
SLJIT_ASSERT((extra_space & 0x7) == 0);
3387
3388
if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3389
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3390
3391
SLJIT_SKIP_CHECKS(compiler);
3392
jump = sljit_emit_jump(compiler, type);
3393
PTR_FAIL_IF(jump == NULL);
3394
3395
if (extra_space > 0) {
3396
if (type & SLJIT_CALL_RETURN)
3397
PTR_FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3398
TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw))));
3399
3400
PTR_FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space));
3401
3402
if (type & SLJIT_CALL_RETURN) {
3403
PTR_FAIL_IF(push_inst(compiler, BX | RM(TMP_REG2)));
3404
return jump;
3405
}
3406
}
3407
3408
SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3409
PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
3410
return jump;
3411
}
3412
#endif /* __SOFTFP__ */
3413
3414
if (type & SLJIT_CALL_RETURN) {
3415
PTR_FAIL_IF(emit_stack_frame_release(compiler, -1));
3416
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3417
}
3418
3419
#ifndef __SOFTFP__
3420
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3421
PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3422
#endif /* !__SOFTFP__ */
3423
3424
SLJIT_SKIP_CHECKS(compiler);
3425
return sljit_emit_jump(compiler, type);
3426
}
3427
3428
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3429
{
3430
struct sljit_jump *jump;
3431
3432
CHECK_ERROR();
3433
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3434
ADJUST_LOCAL_OFFSET(src, srcw);
3435
3436
SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
3437
3438
if (src != SLJIT_IMM) {
3439
if (FAST_IS_REG(src)) {
3440
SLJIT_ASSERT(reg_map[src] != 14);
3441
return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
3442
}
3443
3444
SLJIT_ASSERT(src & SLJIT_MEM);
3445
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
3446
return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1));
3447
}
3448
3449
/* These jumps are converted to jump/call instructions when possible. */
3450
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3451
FAIL_IF(!jump);
3452
set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
3453
jump->u.target = (sljit_uw)srcw;
3454
3455
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
3456
if (type >= SLJIT_FAST_CALL)
3457
FAIL_IF(prepare_blx(compiler));
3458
jump->addr = compiler->size;
3459
FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
3460
if (type >= SLJIT_FAST_CALL) {
3461
jump->addr = compiler->size;
3462
FAIL_IF(emit_blx(compiler));
3463
}
3464
#else /* !SLJIT_CONFIG_ARM_V6 */
3465
jump->addr = compiler->size;
3466
FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
3467
compiler->size += JUMP_MAX_SIZE - 1;
3468
#endif /* SLJIT_CONFIG_ARM_V6 */
3469
return SLJIT_SUCCESS;
3470
}
3471
3472
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3473
sljit_s32 arg_types,
3474
sljit_s32 src, sljit_sw srcw)
3475
{
3476
#ifdef __SOFTFP__
3477
sljit_u32 extra_space = (sljit_u32)type;
3478
#endif
3479
3480
CHECK_ERROR();
3481
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3482
3483
if (src & SLJIT_MEM) {
3484
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
3485
src = TMP_REG1;
3486
}
3487
3488
if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
3489
FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
3490
src = TMP_REG1;
3491
}
3492
3493
#ifdef __SOFTFP__
3494
if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3495
FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space));
3496
SLJIT_ASSERT((extra_space & 0x7) == 0);
3497
3498
if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3499
type = SLJIT_JUMP;
3500
3501
SLJIT_SKIP_CHECKS(compiler);
3502
FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
3503
3504
if (extra_space > 0) {
3505
if (type & SLJIT_CALL_RETURN)
3506
FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3507
TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw))));
3508
3509
FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space));
3510
3511
if (type & SLJIT_CALL_RETURN)
3512
return push_inst(compiler, BX | RM(TMP_REG2));
3513
}
3514
3515
SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3516
return softfloat_post_call_with_args(compiler, arg_types);
3517
}
3518
#endif /* __SOFTFP__ */
3519
3520
if (type & SLJIT_CALL_RETURN) {
3521
FAIL_IF(emit_stack_frame_release(compiler, -1));
3522
type = SLJIT_JUMP;
3523
}
3524
3525
#ifndef __SOFTFP__
3526
if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3527
FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3528
#endif /* !__SOFTFP__ */
3529
3530
SLJIT_SKIP_CHECKS(compiler);
3531
return sljit_emit_ijump(compiler, type, src, srcw);
3532
}
3533
3534
#ifdef __SOFTFP__
3535
3536
static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
3537
{
3538
if (compiler->options & SLJIT_ENTER_REG_ARG) {
3539
if (src == SLJIT_FR0)
3540
return SLJIT_SUCCESS;
3541
3542
SLJIT_SKIP_CHECKS(compiler);
3543
return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
3544
}
3545
3546
if (FAST_IS_REG(src)) {
3547
if (op & SLJIT_32)
3548
return push_inst(compiler, VMOV | (1 << 20) | RD(SLJIT_R0) | VN(src));
3549
return push_inst(compiler, VMOV2 | (1 << 20) | RD(SLJIT_R0) | RN(SLJIT_R1) | VM(src));
3550
}
3551
3552
SLJIT_SKIP_CHECKS(compiler);
3553
3554
if (op & SLJIT_32)
3555
return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw);
3556
return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw);
3557
}
3558
3559
#endif /* __SOFTFP__ */
3560
3561
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3562
sljit_s32 dst, sljit_sw dstw,
3563
sljit_s32 type)
3564
{
3565
sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op);
3566
sljit_ins cc, ins;
3567
3568
CHECK_ERROR();
3569
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3570
ADJUST_LOCAL_OFFSET(dst, dstw);
3571
3572
op = GET_OPCODE(op);
3573
cc = get_cc(compiler, type);
3574
dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3575
3576
if (op < SLJIT_ADD) {
3577
FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | SRC2_IMM | 0));
3578
FAIL_IF(push_inst(compiler, ((MOV | RD(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
3579
if (dst & SLJIT_MEM)
3580
return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
3581
return SLJIT_SUCCESS;
3582
}
3583
3584
ins = (op == SLJIT_AND ? AND : (op == SLJIT_OR ? ORR : EOR));
3585
3586
if (dst & SLJIT_MEM)
3587
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
3588
3589
FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
3590
3591
if (op == SLJIT_AND)
3592
FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
3593
3594
if (dst & SLJIT_MEM)
3595
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
3596
3597
if (flags & SLJIT_SET_Z)
3598
return push_inst(compiler, MOV | SET_FLAGS | RD(TMP_REG2) | RM(dst_reg));
3599
return SLJIT_SUCCESS;
3600
}
3601
3602
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3603
sljit_s32 dst_reg,
3604
sljit_s32 src1, sljit_sw src1w,
3605
sljit_s32 src2_reg)
3606
{
3607
sljit_ins cc, tmp;
3608
3609
CHECK_ERROR();
3610
CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3611
3612
ADJUST_LOCAL_OFFSET(src1, src1w);
3613
3614
if (src2_reg != dst_reg && src1 == dst_reg) {
3615
src1 = src2_reg;
3616
src1w = 0;
3617
src2_reg = dst_reg;
3618
type ^= 0x1;
3619
}
3620
3621
if (src1 & SLJIT_MEM) {
3622
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG1));
3623
3624
if (src2_reg != dst_reg) {
3625
src1 = src2_reg;
3626
src1w = 0;
3627
type ^= 0x1;
3628
} else {
3629
src1 = TMP_REG1;
3630
src1w = 0;
3631
}
3632
} else if (dst_reg != src2_reg)
3633
FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src2_reg)));
3634
3635
cc = get_cc(compiler, type & ~SLJIT_32);
3636
3637
if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {
3638
tmp = get_imm((sljit_uw)src1w);
3639
if (tmp)
3640
return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
3641
3642
tmp = get_imm(~(sljit_uw)src1w);
3643
if (tmp)
3644
return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
3645
3646
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
3647
tmp = (sljit_ins)src1w;
3648
FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff)));
3649
if (tmp <= 0xffff)
3650
return SLJIT_SUCCESS;
3651
return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff));
3652
#else /* !SLJIT_CONFIG_ARM_V7 */
3653
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
3654
src1 = TMP_REG1;
3655
#endif /* SLJIT_CONFIG_ARM_V7 */
3656
}
3657
3658
return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src1)) & ~COND_MASK) | cc);
3659
}
3660
3661
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3662
sljit_s32 dst_freg,
3663
sljit_s32 src1, sljit_sw src1w,
3664
sljit_s32 src2_freg)
3665
{
3666
sljit_ins cc;
3667
3668
CHECK_ERROR();
3669
CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3670
3671
ADJUST_LOCAL_OFFSET(src1, src1w);
3672
3673
type ^= SLJIT_32;
3674
3675
if (dst_freg != src2_freg) {
3676
if (dst_freg == src1) {
3677
src1 = src2_freg;
3678
src1w = 0;
3679
type ^= 0x1;
3680
} else
3681
FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, (type & SLJIT_32), dst_freg, src2_freg, 0)));
3682
}
3683
3684
if (src1 & SLJIT_MEM) {
3685
FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG2, src1, src1w));
3686
src1 = TMP_FREG2;
3687
}
3688
3689
cc = get_cc(compiler, type & ~SLJIT_32);
3690
return push_inst(compiler, EMIT_FPU_OPERATION((VMOV_F32 & ~COND_MASK) | cc, (type & SLJIT_32), dst_freg, src1, 0));
3691
}
3692
3693
#undef EMIT_FPU_OPERATION
3694
3695
static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset)
3696
{
3697
sljit_s32 arg = *mem;
3698
sljit_sw argw = *memw;
3699
sljit_uw imm, tmp;
3700
sljit_sw mask = 0xfff;
3701
sljit_sw sign = 0x1000;
3702
3703
SLJIT_ASSERT(max_offset >= 0xf00);
3704
3705
*mem = TMP_REG1;
3706
3707
if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
3708
*memw = 0;
3709
return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)(argw & 0x3) << 7));
3710
}
3711
3712
arg &= REG_MASK;
3713
3714
if (arg) {
3715
if (argw <= max_offset && argw >= -mask) {
3716
*mem = arg;
3717
return SLJIT_SUCCESS;
3718
}
3719
3720
if (argw >= 0) {
3721
tmp = (sljit_uw)(argw & (sign | mask));
3722
tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask);
3723
imm = get_imm(tmp);
3724
3725
if (imm) {
3726
*memw = argw - (sljit_sw)tmp;
3727
SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset);
3728
3729
return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | imm);
3730
}
3731
} else {
3732
tmp = (sljit_uw)(-argw & (sign | mask));
3733
tmp = (sljit_uw)((-argw + ((tmp <= (sljit_uw)((sign << 1) - max_offset - 1)) ? 0 : sign)) & ~mask);
3734
imm = get_imm(tmp);
3735
3736
if (imm) {
3737
*memw = argw + (sljit_sw)tmp;
3738
SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset);
3739
3740
return push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg) | imm);
3741
}
3742
}
3743
}
3744
3745
tmp = (sljit_uw)(argw & (sign | mask));
3746
tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask);
3747
*memw = argw - (sljit_sw)tmp;
3748
3749
FAIL_IF(load_immediate(compiler, TMP_REG1, tmp));
3750
3751
if (arg == 0)
3752
return SLJIT_SUCCESS;
3753
3754
return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(arg));
3755
}
3756
3757
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3758
sljit_s32 reg,
3759
sljit_s32 mem, sljit_sw memw)
3760
{
3761
sljit_s32 flags;
3762
3763
CHECK_ERROR();
3764
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3765
3766
if (!(reg & REG_PAIR_MASK))
3767
return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3768
3769
ADJUST_LOCAL_OFFSET(mem, memw);
3770
3771
FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3772
3773
flags = WORD_SIZE;
3774
3775
if (!(type & SLJIT_MEM_STORE)) {
3776
if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) {
3777
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1));
3778
return emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1);
3779
}
3780
3781
flags = WORD_SIZE | LOAD_DATA;
3782
}
3783
3784
FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1));
3785
return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1);
3786
}
3787
3788
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
3789
sljit_s32 reg,
3790
sljit_s32 mem, sljit_sw memw)
3791
{
3792
sljit_s32 flags;
3793
sljit_ins is_type1_transfer, inst;
3794
3795
CHECK_ERROR();
3796
CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
3797
3798
is_type1_transfer = 1;
3799
3800
switch (type & 0xff) {
3801
case SLJIT_MOV:
3802
case SLJIT_MOV_U32:
3803
case SLJIT_MOV_S32:
3804
case SLJIT_MOV32:
3805
case SLJIT_MOV_P:
3806
flags = WORD_SIZE;
3807
break;
3808
case SLJIT_MOV_U8:
3809
flags = BYTE_SIZE;
3810
break;
3811
case SLJIT_MOV_S8:
3812
if (!(type & SLJIT_MEM_STORE))
3813
is_type1_transfer = 0;
3814
flags = BYTE_SIZE | SIGNED;
3815
break;
3816
case SLJIT_MOV_U16:
3817
is_type1_transfer = 0;
3818
flags = HALF_SIZE;
3819
break;
3820
case SLJIT_MOV_S16:
3821
is_type1_transfer = 0;
3822
flags = HALF_SIZE | SIGNED;
3823
break;
3824
default:
3825
SLJIT_UNREACHABLE();
3826
flags = WORD_SIZE;
3827
break;
3828
}
3829
3830
if (!(type & SLJIT_MEM_STORE))
3831
flags |= LOAD_DATA;
3832
3833
SLJIT_ASSERT(is_type1_transfer == !!IS_TYPE1_TRANSFER(flags));
3834
3835
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3836
if (!is_type1_transfer && memw != 0)
3837
return SLJIT_ERR_UNSUPPORTED;
3838
} else {
3839
if (is_type1_transfer) {
3840
if (memw > 4095 || memw < -4095)
3841
return SLJIT_ERR_UNSUPPORTED;
3842
} else if (memw > 255 || memw < -255)
3843
return SLJIT_ERR_UNSUPPORTED;
3844
}
3845
3846
if (type & SLJIT_MEM_SUPP)
3847
return SLJIT_SUCCESS;
3848
3849
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3850
memw &= 0x3;
3851
3852
inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_ins)memw << 7));
3853
3854
if (is_type1_transfer)
3855
inst |= (1 << 25);
3856
3857
if (type & SLJIT_MEM_POST)
3858
inst ^= (1 << 24);
3859
else
3860
inst |= (1 << 21);
3861
3862
return push_inst(compiler, inst);
3863
}
3864
3865
inst = EMIT_DATA_TRANSFER(flags, 0, reg, mem & REG_MASK, 0);
3866
3867
if (type & SLJIT_MEM_POST)
3868
inst ^= (1 << 24);
3869
else
3870
inst |= (1 << 21);
3871
3872
if (is_type1_transfer) {
3873
if (memw >= 0)
3874
inst |= (1 << 23);
3875
else
3876
memw = -memw;
3877
3878
return push_inst(compiler, inst | (sljit_ins)memw);
3879
}
3880
3881
if (memw >= 0)
3882
inst |= (1 << 23);
3883
else
3884
memw = -memw;
3885
3886
return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_ins)memw));
3887
}
3888
3889
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
3890
sljit_s32 freg,
3891
sljit_s32 mem, sljit_sw memw)
3892
{
3893
CHECK_ERROR();
3894
CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
3895
3896
if (type & SLJIT_MEM_ALIGNED_32)
3897
return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw);
3898
3899
if (type & SLJIT_MEM_STORE) {
3900
FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2)));
3901
3902
if (type & SLJIT_32)
3903
return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1);
3904
3905
FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3906
mem |= SLJIT_MEM;
3907
3908
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3909
FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2)));
3910
return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw + 4, TMP_REG1);
3911
}
3912
3913
if (type & SLJIT_32) {
3914
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
3915
return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG2));
3916
}
3917
3918
FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3919
mem |= SLJIT_MEM;
3920
3921
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
3922
FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, mem, memw + 4, TMP_REG1));
3923
return push_inst(compiler, VMOV2 | VM(freg) | RD(TMP_REG2) | RN(TMP_REG1));
3924
}
3925
3926
static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3927
{
3928
sljit_s32 mem = *mem_ptr;
3929
sljit_uw imm;
3930
3931
if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3932
*mem_ptr = TMP_REG1;
3933
return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 7));
3934
}
3935
3936
if (SLJIT_UNLIKELY(!(mem & REG_MASK))) {
3937
*mem_ptr = TMP_REG1;
3938
return load_immediate(compiler, TMP_REG1, (sljit_uw)memw);
3939
}
3940
3941
mem &= REG_MASK;
3942
3943
if (memw == 0) {
3944
*mem_ptr = mem;
3945
return SLJIT_SUCCESS;
3946
}
3947
3948
*mem_ptr = TMP_REG1;
3949
imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw));
3950
3951
if (imm != 0)
3952
return push_inst(compiler, ((memw < 0) ? SUB : ADD) | RD(TMP_REG1) | RN(mem) | imm);
3953
3954
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3955
return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem));
3956
}
3957
3958
static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
3959
{
3960
freg += freg & 0x1;
3961
3962
SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
3963
3964
if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
3965
freg--;
3966
3967
return freg;
3968
}
3969
3970
#define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
3971
3972
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3973
sljit_s32 vreg,
3974
sljit_s32 srcdst, sljit_sw srcdstw)
3975
{
3976
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3977
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3978
sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3979
sljit_ins ins;
3980
3981
CHECK_ERROR();
3982
CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));
3983
3984
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3985
3986
if (reg_size != 3 && reg_size != 4)
3987
return SLJIT_ERR_UNSUPPORTED;
3988
3989
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3990
return SLJIT_ERR_UNSUPPORTED;
3991
3992
if (type & SLJIT_SIMD_TEST)
3993
return SLJIT_SUCCESS;
3994
3995
if (reg_size == 4)
3996
vreg = simd_get_quad_reg_index(vreg);
3997
3998
if (!(srcdst & SLJIT_MEM)) {
3999
if (reg_size == 4)
4000
srcdst = simd_get_quad_reg_index(srcdst);
4001
4002
if (type & SLJIT_SIMD_STORE)
4003
ins = VD(srcdst) | VN(vreg) | VM(vreg);
4004
else
4005
ins = VD(vreg) | VN(srcdst) | VM(srcdst);
4006
4007
if (reg_size == 4)
4008
ins |= (sljit_ins)1 << 6;
4009
4010
return push_inst(compiler, VORR | ins);
4011
}
4012
4013
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
4014
4015
if (elem_size > 3)
4016
elem_size = 3;
4017
4018
ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD(vreg)
4019
| (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
4020
4021
SLJIT_ASSERT(reg_size >= alignment);
4022
4023
if (alignment == 3)
4024
ins |= 0x10;
4025
else if (alignment >= 3)
4026
ins |= 0x20;
4027
4028
return push_inst(compiler, ins | RN(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf);
4029
}
4030
4031
static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
4032
{
4033
sljit_ins result;
4034
4035
if (elem_size > 1 && (sljit_u16)value == (value >> 16)) {
4036
elem_size = 1;
4037
value = (sljit_u16)value;
4038
}
4039
4040
if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
4041
elem_size = 0;
4042
value = (sljit_u8)value;
4043
}
4044
4045
switch (elem_size) {
4046
case 0:
4047
SLJIT_ASSERT(value <= 0xff);
4048
result = 0xe00;
4049
break;
4050
case 1:
4051
SLJIT_ASSERT(value <= 0xffff);
4052
result = 0;
4053
4054
while (1) {
4055
if (value <= 0xff) {
4056
result |= 0x800;
4057
break;
4058
}
4059
4060
if ((value & 0xff) == 0) {
4061
value >>= 8;
4062
result |= 0xa00;
4063
break;
4064
}
4065
4066
if (result != 0)
4067
return ~(sljit_ins)0;
4068
4069
value ^= (sljit_uw)0xffff;
4070
result = (1 << 5);
4071
}
4072
break;
4073
default:
4074
SLJIT_ASSERT(value <= 0xffffffff);
4075
result = 0;
4076
4077
while (1) {
4078
if (value <= 0xff) {
4079
result |= 0x000;
4080
break;
4081
}
4082
4083
if ((value & ~(sljit_uw)0xff00) == 0) {
4084
value >>= 8;
4085
result |= 0x200;
4086
break;
4087
}
4088
4089
if ((value & ~(sljit_uw)0xff0000) == 0) {
4090
value >>= 16;
4091
result |= 0x400;
4092
break;
4093
}
4094
4095
if ((value & ~(sljit_uw)0xff000000) == 0) {
4096
value >>= 24;
4097
result |= 0x600;
4098
break;
4099
}
4100
4101
if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
4102
value >>= 8;
4103
result |= 0xc00;
4104
break;
4105
}
4106
4107
if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
4108
value >>= 16;
4109
result |= 0xd00;
4110
break;
4111
}
4112
4113
if (result != 0)
4114
return ~(sljit_ins)0;
4115
4116
value = ~value;
4117
result = (1 << 5);
4118
}
4119
break;
4120
}
4121
4122
return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 17) | result;
4123
}
4124
4125
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4126
sljit_s32 vreg,
4127
sljit_s32 src, sljit_sw srcw)
4128
{
4129
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4130
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4131
sljit_ins ins, imm;
4132
4133
CHECK_ERROR();
4134
CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));
4135
4136
ADJUST_LOCAL_OFFSET(src, srcw);
4137
4138
if (reg_size != 3 && reg_size != 4)
4139
return SLJIT_ERR_UNSUPPORTED;
4140
4141
if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
4142
return SLJIT_ERR_UNSUPPORTED;
4143
4144
if (type & SLJIT_SIMD_TEST)
4145
return SLJIT_SUCCESS;
4146
4147
if (reg_size == 4)
4148
vreg = simd_get_quad_reg_index(vreg);
4149
4150
if (src == SLJIT_IMM && srcw == 0)
4151
return push_inst(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD(vreg));
4152
4153
if (SLJIT_UNLIKELY(elem_size == 3)) {
4154
SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT);
4155
4156
if (src & SLJIT_MEM) {
4157
FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, vreg, src, srcw));
4158
src = vreg;
4159
} else if (vreg != src)
4160
FAIL_IF(push_inst(compiler, VORR | VD(vreg) | VN(src) | VM(src)));
4161
4162
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4163
4164
if (vreg != src)
4165
return push_inst(compiler, VORR | VD(vreg) | VN(src) | VM(src));
4166
return SLJIT_SUCCESS;
4167
}
4168
4169
if (src & SLJIT_MEM) {
4170
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
4171
4172
ins = (sljit_ins)(elem_size << 6);
4173
4174
if (reg_size == 4)
4175
ins |= (sljit_ins)1 << 5;
4176
4177
return push_inst(compiler, VLD1_r | ins | VD(vreg) | RN(src) | 0xf);
4178
}
4179
4180
if (type & SLJIT_SIMD_FLOAT) {
4181
SLJIT_ASSERT(elem_size == 2);
4182
ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
4183
4184
if (reg_size == 4)
4185
ins |= (sljit_ins)1 << 6;
4186
4187
return push_inst(compiler, VDUP_s | ins | VD(vreg) | (sljit_ins)freg_map[src]);
4188
}
4189
4190
if (src == SLJIT_IMM) {
4191
if (elem_size < 2)
4192
srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
4193
4194
imm = simd_get_imm(elem_size, (sljit_uw)srcw);
4195
4196
if (imm != ~(sljit_ins)0) {
4197
if (reg_size == 4)
4198
imm |= (sljit_ins)1 << 6;
4199
4200
return push_inst(compiler, VMOV_i | imm | VD(vreg));
4201
}
4202
4203
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
4204
src = TMP_REG1;
4205
}
4206
4207
switch (elem_size) {
4208
case 0:
4209
ins = 1 << 22;
4210
break;
4211
case 1:
4212
ins = 1 << 5;
4213
break;
4214
default:
4215
ins = 0;
4216
break;
4217
}
4218
4219
if (reg_size == 4)
4220
ins |= (sljit_ins)1 << 21;
4221
4222
return push_inst(compiler, VDUP | ins | VN(vreg) | RD(src));
4223
}
4224
4225
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
4226
sljit_s32 vreg, sljit_s32 lane_index,
4227
sljit_s32 srcdst, sljit_sw srcdstw)
4228
{
4229
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4230
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4231
sljit_ins ins;
4232
4233
CHECK_ERROR();
4234
CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));
4235
4236
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4237
4238
if (reg_size != 3 && reg_size != 4)
4239
return SLJIT_ERR_UNSUPPORTED;
4240
4241
if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
4242
return SLJIT_ERR_UNSUPPORTED;
4243
4244
if (type & SLJIT_SIMD_TEST)
4245
return SLJIT_SUCCESS;
4246
4247
if (reg_size == 4)
4248
vreg = simd_get_quad_reg_index(vreg);
4249
4250
if (type & SLJIT_SIMD_LANE_ZERO) {
4251
ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
4252
4253
if (type & SLJIT_SIMD_FLOAT) {
4254
if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
4255
if (lane_index == 1)
4256
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4257
4258
if (srcdst != vreg)
4259
FAIL_IF(push_inst(compiler, VORR | VD(vreg) | VN(srcdst) | VM(srcdst)));
4260
4261
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4262
return push_inst(compiler, VMOV_i | VD(vreg));
4263
}
4264
4265
if (srcdst == vreg || (elem_size == 3 && srcdst == (vreg + SLJIT_QUAD_OTHER_HALF(vreg)))) {
4266
FAIL_IF(push_inst(compiler, VORR | ins | VD(TMP_FREG2) | VN(vreg) | VM(vreg)));
4267
srcdst = TMP_FREG2;
4268
srcdstw = 0;
4269
}
4270
}
4271
4272
FAIL_IF(push_inst(compiler, VMOV_i | ins | VD(vreg)));
4273
}
4274
4275
if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
4276
lane_index -= (0x8 >> elem_size);
4277
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4278
}
4279
4280
if (srcdst & SLJIT_MEM) {
4281
if (elem_size == 3)
4282
return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, vreg, srcdst, srcdstw);
4283
4284
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
4285
4286
lane_index = lane_index << elem_size;
4287
ins = (sljit_ins)((elem_size << 10) | (lane_index << 5));
4288
return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD(vreg) | RN(srcdst) | 0xf);
4289
}
4290
4291
if (type & SLJIT_SIMD_FLOAT) {
4292
if (elem_size == 3) {
4293
if (type & SLJIT_SIMD_STORE)
4294
return push_inst(compiler, VORR | VD(srcdst) | VN(vreg) | VM(vreg));
4295
return push_inst(compiler, VMOV_F32 | SLJIT_32 | VD(vreg) | VM(srcdst));
4296
}
4297
4298
if (type & SLJIT_SIMD_STORE) {
4299
if (freg_ebit_map[vreg] == 0) {
4300
if (lane_index == 1)
4301
vreg = SLJIT_F64_SECOND(vreg);
4302
4303
return push_inst(compiler, VMOV_F32 | VD(srcdst) | VM(vreg));
4304
}
4305
4306
FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN(vreg) | RD(TMP_REG1)));
4307
return push_inst(compiler, VMOV | VN(srcdst) | RD(TMP_REG1));
4308
}
4309
4310
FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(srcdst) | RD(TMP_REG1)));
4311
return push_inst(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN(vreg) | RD(TMP_REG1));
4312
}
4313
4314
if (srcdst == SLJIT_IMM) {
4315
if (elem_size < 2)
4316
srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
4317
4318
FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw));
4319
srcdst = TMP_REG1;
4320
}
4321
4322
if (elem_size == 0)
4323
ins = 0x400000;
4324
else if (elem_size == 1)
4325
ins = 0x20;
4326
else
4327
ins = 0;
4328
4329
lane_index = lane_index << elem_size;
4330
ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
4331
4332
if (type & SLJIT_SIMD_STORE) {
4333
ins |= (1 << 20);
4334
4335
if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
4336
ins |= (1 << 23);
4337
}
4338
4339
return push_inst(compiler, VMOV_s | ins | VN(vreg) | RD(srcdst));
4340
}
4341
4342
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4343
sljit_s32 vreg,
4344
sljit_s32 src, sljit_s32 src_lane_index)
4345
{
4346
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4347
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4348
sljit_ins ins;
4349
4350
CHECK_ERROR();
4351
CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));
4352
4353
if (reg_size != 3 && reg_size != 4)
4354
return SLJIT_ERR_UNSUPPORTED;
4355
4356
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4357
return SLJIT_ERR_UNSUPPORTED;
4358
4359
if (type & SLJIT_SIMD_TEST)
4360
return SLJIT_SUCCESS;
4361
4362
if (reg_size == 4) {
4363
vreg = simd_get_quad_reg_index(vreg);
4364
src = simd_get_quad_reg_index(src);
4365
4366
if (src_lane_index >= (0x8 >> elem_size)) {
4367
src_lane_index -= (0x8 >> elem_size);
4368
src += SLJIT_QUAD_OTHER_HALF(src);
4369
}
4370
}
4371
4372
if (elem_size == 3) {
4373
if (vreg != src)
4374
FAIL_IF(push_inst(compiler, VORR | VD(vreg) | VN(src) | VM(src)));
4375
4376
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4377
4378
if (vreg != src)
4379
return push_inst(compiler, VORR | VD(vreg) | VN(src) | VM(src));
4380
return SLJIT_SUCCESS;
4381
}
4382
4383
ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size));
4384
4385
if (reg_size == 4)
4386
ins |= (sljit_ins)1 << 6;
4387
4388
return push_inst(compiler, VDUP_s | ins | VD(vreg) | VM(src));
4389
}
4390
4391
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4392
sljit_s32 vreg,
4393
sljit_s32 src, sljit_sw srcw)
4394
{
4395
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4396
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4397
sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4398
sljit_s32 dst_reg;
4399
4400
CHECK_ERROR();
4401
CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));
4402
4403
ADJUST_LOCAL_OFFSET(src, srcw);
4404
4405
if (reg_size != 3 && reg_size != 4)
4406
return SLJIT_ERR_UNSUPPORTED;
4407
4408
if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
4409
return SLJIT_ERR_UNSUPPORTED;
4410
4411
if (type & SLJIT_SIMD_TEST)
4412
return SLJIT_SUCCESS;
4413
4414
if (reg_size == 4)
4415
vreg = simd_get_quad_reg_index(vreg);
4416
4417
if (src & SLJIT_MEM) {
4418
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
4419
if (reg_size == 4 && elem2_size - elem_size == 1)
4420
FAIL_IF(push_inst(compiler, VLD1 | (0x7 << 8) | VD(vreg) | RN(src) | 0xf));
4421
else
4422
FAIL_IF(push_inst(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD(vreg) | RN(src) | 0xf));
4423
src = vreg;
4424
} else if (reg_size == 4)
4425
src = simd_get_quad_reg_index(src);
4426
4427
if (!(type & SLJIT_SIMD_FLOAT)) {
4428
dst_reg = (reg_size == 4) ? vreg : TMP_FREG2;
4429
4430
do {
4431
FAIL_IF(push_inst(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 24))
4432
| ((sljit_ins)1 << (19 + elem_size)) | VD(dst_reg) | VM(src)));
4433
src = dst_reg;
4434
} while (++elem_size < elem2_size);
4435
4436
if (dst_reg == TMP_FREG2)
4437
return push_inst(compiler, VORR | VD(vreg) | VN(TMP_FREG2) | VM(TMP_FREG2));
4438
return SLJIT_SUCCESS;
4439
}
4440
4441
/* No SIMD variant, must use VFP instead. */
4442
SLJIT_ASSERT(reg_size == 4);
4443
4444
if (vreg == src) {
4445
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4446
FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(vreg) | VM(src) | 0x20));
4447
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4448
return push_inst(compiler, VCVT_F64_F32 | VD(vreg) | VM(src));
4449
}
4450
4451
FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(vreg) | VM(src)));
4452
vreg += SLJIT_QUAD_OTHER_HALF(vreg);
4453
return push_inst(compiler, VCVT_F64_F32 | VD(vreg) | VM(src) | 0x20);
4454
}
4455
4456
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4457
sljit_s32 vreg,
4458
sljit_s32 dst, sljit_sw dstw)
4459
{
4460
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4461
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4462
sljit_ins ins, imms;
4463
sljit_s32 dst_r;
4464
4465
CHECK_ERROR();
4466
CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));
4467
4468
ADJUST_LOCAL_OFFSET(dst, dstw);
4469
4470
if (reg_size != 3 && reg_size != 4)
4471
return SLJIT_ERR_UNSUPPORTED;
4472
4473
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4474
return SLJIT_ERR_UNSUPPORTED;
4475
4476
if (type & SLJIT_SIMD_TEST)
4477
return SLJIT_SUCCESS;
4478
4479
switch (elem_size) {
4480
case 0:
4481
imms = 0x243219;
4482
ins = VSHR | (1 << 24) | (0x9 << 16);
4483
break;
4484
case 1:
4485
imms = (reg_size == 4) ? 0x243219 : 0x2231;
4486
ins = VSHR | (1 << 24) | (0x11 << 16);
4487
break;
4488
case 2:
4489
imms = (reg_size == 4) ? 0x2231 : 0x21;
4490
ins = VSHR | (1 << 24) | (0x21 << 16);
4491
break;
4492
default:
4493
imms = 0x21;
4494
ins = VSHR | (1 << 24) | (0x1 << 16) | (1 << 7);
4495
break;
4496
}
4497
4498
if (reg_size == 4) {
4499
vreg = simd_get_quad_reg_index(vreg);
4500
ins |= (sljit_ins)1 << 6;
4501
}
4502
4503
SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0);
4504
FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG2) | VM(vreg)));
4505
4506
if (reg_size == 4 && elem_size > 0)
4507
FAIL_IF(push_inst(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4508
4509
ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
4510
4511
while (imms >= 0x100) {
4512
FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | ((imms & 0xff) << 16) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4513
imms >>= 8;
4514
}
4515
4516
FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | (1 << 7) | (imms << 16) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4517
4518
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4519
FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(dst_r) | VN(TMP_FREG2)));
4520
4521
if (reg_size == 4 && elem_size == 0) {
4522
SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]);
4523
FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(TMP_REG2) | VN(TMP_FREG1)));
4524
FAIL_IF(push_inst(compiler, ORR | RD(dst_r) | RN(dst_r) | RM(TMP_REG2) | (0x8 << 7)));
4525
}
4526
4527
if (dst_r == TMP_REG1)
4528
return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
4529
4530
return SLJIT_SUCCESS;
4531
}
4532
4533
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4534
sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)
4535
{
4536
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4537
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4538
sljit_s32 alignment;
4539
sljit_ins ins = 0, load_ins;
4540
4541
CHECK_ERROR();
4542
CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));
4543
ADJUST_LOCAL_OFFSET(src2, src2w);
4544
4545
if (reg_size != 3 && reg_size != 4)
4546
return SLJIT_ERR_UNSUPPORTED;
4547
4548
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4549
return SLJIT_ERR_UNSUPPORTED;
4550
4551
if (type & SLJIT_SIMD_TEST)
4552
return SLJIT_SUCCESS;
4553
4554
switch (SLJIT_SIMD_GET_OPCODE(type)) {
4555
case SLJIT_SIMD_OP2_AND:
4556
ins = VAND;
4557
break;
4558
case SLJIT_SIMD_OP2_OR:
4559
ins = VORR;
4560
break;
4561
case SLJIT_SIMD_OP2_XOR:
4562
ins = VEOR;
4563
break;
4564
case SLJIT_SIMD_OP2_SHUFFLE:
4565
ins = VTBL;
4566
break;
4567
}
4568
4569
if (src2 & SLJIT_MEM) {
4570
if (elem_size > 3)
4571
elem_size = 3;
4572
4573
load_ins = VLD1 | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
4574
alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4575
4576
SLJIT_ASSERT(reg_size >= alignment);
4577
4578
if (alignment == 3)
4579
load_ins |= 0x10;
4580
else if (alignment >= 4)
4581
load_ins |= 0x20;
4582
4583
FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src2, src2w));
4584
FAIL_IF(push_inst(compiler, load_ins | VD(TMP_FREG2) | RN(src2) | ((sljit_ins)elem_size) << 6 | 0xf));
4585
src2 = TMP_FREG2;
4586
}
4587
4588
if (reg_size == 4) {
4589
dst_vreg = simd_get_quad_reg_index(dst_vreg);
4590
src1_vreg = simd_get_quad_reg_index(src1_vreg);
4591
src2 = simd_get_quad_reg_index(src2);
4592
4593
if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE) {
4594
ins |= (sljit_ins)1 << 8;
4595
4596
FAIL_IF(push_inst(compiler, ins | VD(dst_vreg != src1_vreg ? dst_vreg : TMP_FREG2) | VN(src1_vreg) | VM(src2)));
4597
src2 += SLJIT_QUAD_OTHER_HALF(src2);
4598
FAIL_IF(push_inst(compiler, ins | VD(dst_vreg + SLJIT_QUAD_OTHER_HALF(dst_vreg)) | VN(src1_vreg) | VM(src2)));
4599
4600
if (dst_vreg == src1_vreg)
4601
return push_inst(compiler, VORR | VD(dst_vreg) | VN(TMP_FREG2) | VM(TMP_FREG2));
4602
return SLJIT_SUCCESS;
4603
}
4604
4605
ins |= (sljit_ins)1 << 6;
4606
}
4607
4608
return push_inst(compiler, ins | VD(dst_vreg) | VN(src1_vreg) | VM(src2));
4609
}
4610
4611
#undef FPU_LOAD
4612
4613
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4614
sljit_s32 dst_reg,
4615
sljit_s32 mem_reg)
4616
{
4617
sljit_u32 ins;
4618
4619
CHECK_ERROR();
4620
CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4621
4622
if (op & SLJIT_ATOMIC_USE_CAS)
4623
return SLJIT_ERR_UNSUPPORTED;
4624
4625
switch (GET_OPCODE(op)) {
4626
case SLJIT_MOV_S8:
4627
case SLJIT_MOV_S16:
4628
case SLJIT_MOV_S32:
4629
return SLJIT_ERR_UNSUPPORTED;
4630
4631
case SLJIT_MOV_U8:
4632
ins = LDREXB;
4633
break;
4634
case SLJIT_MOV_U16:
4635
ins = LDREXH;
4636
break;
4637
default:
4638
ins = LDREX;
4639
break;
4640
}
4641
4642
if (op & SLJIT_ATOMIC_TEST)
4643
return SLJIT_SUCCESS;
4644
4645
return push_inst(compiler, ins | RN(mem_reg) | RD(dst_reg));
4646
}
4647
4648
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4649
sljit_s32 src_reg,
4650
sljit_s32 mem_reg,
4651
sljit_s32 temp_reg)
4652
{
4653
sljit_u32 ins;
4654
4655
/* temp_reg == mem_reg is undefined so use another temp register */
4656
SLJIT_UNUSED_ARG(temp_reg);
4657
4658
CHECK_ERROR();
4659
CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4660
4661
if (op & SLJIT_ATOMIC_USE_CAS)
4662
return SLJIT_ERR_UNSUPPORTED;
4663
4664
switch (GET_OPCODE(op)) {
4665
case SLJIT_MOV_S8:
4666
case SLJIT_MOV_S16:
4667
case SLJIT_MOV_S32:
4668
return SLJIT_ERR_UNSUPPORTED;
4669
4670
case SLJIT_MOV_U8:
4671
ins = STREXB;
4672
break;
4673
case SLJIT_MOV_U16:
4674
ins = STREXH;
4675
break;
4676
default:
4677
ins = STREX;
4678
break;
4679
}
4680
4681
if (op & SLJIT_ATOMIC_TEST)
4682
return SLJIT_SUCCESS;
4683
4684
FAIL_IF(push_inst(compiler, ins | RN(mem_reg) | RD(TMP_REG1) | RM(src_reg)));
4685
if (op & SLJIT_SET_ATOMIC_STORED)
4686
return push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(TMP_REG1));
4687
4688
return SLJIT_SUCCESS;
4689
}
4690
4691
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4692
{
4693
struct sljit_const *const_;
4694
sljit_s32 dst_r;
4695
4696
CHECK_ERROR_PTR();
4697
CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4698
ADJUST_LOCAL_OFFSET(dst, dstw);
4699
4700
const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4701
PTR_FAIL_IF(!const_);
4702
set_const(const_, compiler);
4703
4704
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
4705
4706
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
4707
PTR_FAIL_IF(push_inst_with_unique_literal(compiler,
4708
EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_ins)init_value));
4709
compiler->patches++;
4710
#else /* !SLJIT_CONFIG_ARM_V6 */
4711
PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value));
4712
#endif /* SLJIT_CONFIG_ARM_V6 */
4713
4714
if (dst & SLJIT_MEM)
4715
PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
4716
return const_;
4717
}
4718
4719
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4720
{
4721
struct sljit_jump *jump;
4722
sljit_s32 dst_r;
4723
4724
CHECK_ERROR_PTR();
4725
CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
4726
ADJUST_LOCAL_OFFSET(dst, dstw);
4727
4728
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
4729
4730
#if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
4731
PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0));
4732
compiler->patches++;
4733
#else /* !SLJIT_CONFIG_ARM_V6 */
4734
PTR_FAIL_IF(push_inst(compiler, RD(dst_r)));
4735
#endif /* SLJIT_CONFIG_ARM_V6 */
4736
4737
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4738
PTR_FAIL_IF(!jump);
4739
set_mov_addr(jump, compiler, 1);
4740
4741
#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
4742
compiler->size += 1;
4743
#endif /* SLJIT_CONFIG_ARM_V7 */
4744
4745
if (dst & SLJIT_MEM)
4746
PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
4747
return jump;
4748
}
4749
4750
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4751
{
4752
set_jump_addr(addr, executable_offset, new_target, 1);
4753
}
4754
4755
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4756
{
4757
set_const_value(addr, executable_offset, (sljit_uw)new_constant, 1);
4758
}
4759
4760