Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeS390X.c
22434 views
1
/*
2
* Stack-less Just-In-Time compiler
3
*
4
* Copyright Zoltan Herczeg ([email protected]). All rights reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without modification, are
7
* permitted provided that the following conditions are met:
8
*
9
* 1. Redistributions of source code must retain the above copyright notice, this list of
10
* conditions and the following disclaimer.
11
*
12
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
13
* of conditions and the following disclaimer in the documentation and/or other materials
14
* provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
*/
26
27
#include <sys/auxv.h>
28
29
#ifdef __ARCH__
30
#define ENABLE_STATIC_FACILITY_DETECTION 1
31
#else
32
#define ENABLE_STATIC_FACILITY_DETECTION 0
33
#endif
34
#define ENABLE_DYNAMIC_FACILITY_DETECTION 1
35
36
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
37
{
38
return "s390x" SLJIT_CPUINFO;
39
}
40
41
/* Instructions are stored as 64 bit values regardless their size. */
42
typedef sljit_uw sljit_ins;
43
44
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
45
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
46
47
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
48
0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1, 14
49
};
50
51
/* there are also a[2-15] available, but they are slower to access and
52
* their use is limited as mundaym explained:
53
* https://github.com/zherczeg/sljit/pull/91#discussion_r486895689
54
*/
55
56
/* General Purpose Registers [0-15]. */
57
typedef sljit_uw sljit_gpr;
58
59
/*
60
* WARNING
61
* the following code is non standard and should be improved for
62
* consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based
63
* registers because r0 and r1 are the ABI recommended volatiles.
64
* there is a gpr() function that maps sljit to physical register numbers
65
* that should be used instead of the usual index into reg_map[] and
66
* will be retired ASAP (TODO: carenas)
67
*/
68
69
static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
70
static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
71
static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */
72
static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */
73
static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */
74
static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */
75
static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */
76
static const sljit_gpr r7 = 7; /* reg_map[6] */
77
static const sljit_gpr r8 = 8; /* reg_map[7] */
78
static const sljit_gpr r9 = 9; /* reg_map[8] */
79
static const sljit_gpr r10 = 10; /* reg_map[9] */
80
static const sljit_gpr r11 = 11; /* reg_map[10] */
81
static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */
82
static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */
83
static const sljit_gpr r14 = 14; /* reg_map[0]: return address */
84
static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
85
86
/* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
87
/* TODO(carenas): r12 might conflict in PIC code, reserve? */
88
/* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp
89
* like we do know might be faster though, reserve?
90
*/
91
92
/* TODO(carenas): should be named TMP_REG[1-2] for consistency */
93
#define tmp0 r0
94
#define tmp1 r1
95
96
/* Link register. */
97
static const sljit_gpr link_r = 14; /* r14 */
98
99
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
100
101
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
102
0, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, 1
103
};
104
105
#define R0A(r) (r)
106
#define R4A(r) ((r) << 4)
107
#define R8A(r) ((r) << 8)
108
#define R12A(r) ((r) << 12)
109
#define R16A(r) ((r) << 16)
110
#define R20A(r) ((r) << 20)
111
#define R28A(r) ((r) << 28)
112
#define R32A(r) ((r) << 32)
113
#define R36A(r) ((r) << 36)
114
115
#define R0(r) ((sljit_ins)reg_map[r])
116
117
#define F0(r) ((sljit_ins)freg_map[r])
118
#define F4(r) (R4A((sljit_ins)freg_map[r]))
119
#define F12(r) (R12A((sljit_ins)freg_map[r]))
120
#define F20(r) (R20A((sljit_ins)freg_map[r]))
121
#define F28(r) (R28A((sljit_ins)freg_map[r]))
122
#define F32(r) (R32A((sljit_ins)freg_map[r]))
123
#define F36(r) (R36A((sljit_ins)freg_map[r]))
124
125
/* Convert SLJIT register to hardware register. */
126
static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
127
{
128
SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));
129
return reg_map[r];
130
}
131
132
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
133
{
134
sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
135
FAIL_IF(!ibuf);
136
*ibuf = ins;
137
138
SLJIT_ASSERT(ins <= 0xffffffffffffL);
139
140
compiler->size++;
141
if (ins & 0xffff00000000L)
142
compiler->size++;
143
144
if (ins & 0xffffffff0000L)
145
compiler->size++;
146
147
return SLJIT_SUCCESS;
148
}
149
150
#define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \
151
(((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \
152
&& !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE))
153
154
/* Map the given type to a 4-bit condition code mask. */
155
static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) {
156
const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */
157
const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */
158
const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */
159
const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */
160
161
switch (type) {
162
case SLJIT_EQUAL:
163
if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
164
sljit_s32 flag_type = GET_FLAG_TYPE(compiler->status_flags_state);
165
if (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_SIG_LESS_EQUAL)
166
return cc0;
167
if (flag_type == SLJIT_OVERFLOW)
168
return (cc0 | cc3);
169
return (cc0 | cc2);
170
}
171
SLJIT_FALLTHROUGH
172
173
case SLJIT_ATOMIC_STORED:
174
case SLJIT_F_EQUAL:
175
case SLJIT_ORDERED_EQUAL:
176
return cc0;
177
178
case SLJIT_NOT_EQUAL:
179
if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
180
sljit_s32 flag_type = GET_FLAG_TYPE(compiler->status_flags_state);
181
if (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_SIG_LESS_EQUAL)
182
return (cc1 | cc2 | cc3);
183
if (flag_type == SLJIT_OVERFLOW)
184
return (cc1 | cc2);
185
return (cc1 | cc3);
186
}
187
SLJIT_FALLTHROUGH
188
189
case SLJIT_UNORDERED_OR_NOT_EQUAL:
190
return (cc1 | cc2 | cc3);
191
192
case SLJIT_LESS:
193
case SLJIT_ATOMIC_NOT_STORED:
194
return cc1;
195
196
case SLJIT_GREATER_EQUAL:
197
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
198
return (cc0 | cc2 | cc3);
199
200
case SLJIT_GREATER:
201
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
202
return cc2;
203
return cc3;
204
205
case SLJIT_LESS_EQUAL:
206
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
207
return (cc0 | cc1);
208
return (cc0 | cc1 | cc2);
209
210
case SLJIT_SIG_LESS:
211
case SLJIT_F_LESS:
212
case SLJIT_ORDERED_LESS:
213
return cc1;
214
215
case SLJIT_NOT_CARRY:
216
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
217
return (cc2 | cc3);
218
SLJIT_FALLTHROUGH
219
220
case SLJIT_SIG_LESS_EQUAL:
221
case SLJIT_F_LESS_EQUAL:
222
case SLJIT_ORDERED_LESS_EQUAL:
223
return (cc0 | cc1);
224
225
case SLJIT_CARRY:
226
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
227
return (cc0 | cc1);
228
SLJIT_FALLTHROUGH
229
230
case SLJIT_SIG_GREATER:
231
case SLJIT_UNORDERED_OR_GREATER:
232
/* Overflow is considered greater, see SLJIT_SUB. */
233
return cc2 | cc3;
234
235
case SLJIT_SIG_GREATER_EQUAL:
236
return (cc0 | cc2 | cc3);
237
238
case SLJIT_OVERFLOW:
239
if (compiler->status_flags_state & SLJIT_SET_Z)
240
return (cc2 | cc3);
241
SLJIT_FALLTHROUGH
242
243
case SLJIT_UNORDERED:
244
return cc3;
245
246
case SLJIT_NOT_OVERFLOW:
247
if (compiler->status_flags_state & SLJIT_SET_Z)
248
return (cc0 | cc1);
249
SLJIT_FALLTHROUGH
250
251
case SLJIT_ORDERED:
252
return (cc0 | cc1 | cc2);
253
254
case SLJIT_F_NOT_EQUAL:
255
case SLJIT_ORDERED_NOT_EQUAL:
256
return (cc1 | cc2);
257
258
case SLJIT_F_GREATER:
259
case SLJIT_ORDERED_GREATER:
260
return cc2;
261
262
case SLJIT_F_GREATER_EQUAL:
263
case SLJIT_ORDERED_GREATER_EQUAL:
264
return (cc0 | cc2);
265
266
case SLJIT_UNORDERED_OR_LESS_EQUAL:
267
return (cc0 | cc1 | cc3);
268
269
case SLJIT_UNORDERED_OR_EQUAL:
270
return (cc0 | cc3);
271
272
case SLJIT_UNORDERED_OR_LESS:
273
return (cc1 | cc3);
274
}
275
276
SLJIT_UNREACHABLE();
277
return (sljit_u8)-1;
278
}
279
280
/* Facility to bit index mappings.
281
Note: some facilities share the same bit index. */
282
typedef sljit_uw facility_bit;
283
#define STORE_FACILITY_LIST_EXTENDED_FACILITY 7
284
#define FAST_LONG_DISPLACEMENT_FACILITY 19
285
#define EXTENDED_IMMEDIATE_FACILITY 21
286
#define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34
287
#define DISTINCT_OPERAND_FACILITY 45
288
#define HIGH_WORD_FACILITY 45
289
#define POPULATION_COUNT_FACILITY 45
290
#define LOAD_STORE_ON_CONDITION_1_FACILITY 45
291
#define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49
292
#define LOAD_STORE_ON_CONDITION_2_FACILITY 53
293
#define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58
294
#define VECTOR_FACILITY 129
295
#define VECTOR_ENHANCEMENTS_1_FACILITY 135
296
297
/* Report whether a facility is known to be present due to the compiler
298
settings. This function should always be compiled to a constant
299
value given a constant argument. */
300
static SLJIT_INLINE int have_facility_static(facility_bit x)
301
{
302
#if ENABLE_STATIC_FACILITY_DETECTION
303
switch (x) {
304
case FAST_LONG_DISPLACEMENT_FACILITY:
305
return (__ARCH__ >= 6 /* z990 */);
306
case EXTENDED_IMMEDIATE_FACILITY:
307
case STORE_FACILITY_LIST_EXTENDED_FACILITY:
308
return (__ARCH__ >= 7 /* z9-109 */);
309
case GENERAL_INSTRUCTION_EXTENSION_FACILITY:
310
return (__ARCH__ >= 8 /* z10 */);
311
case DISTINCT_OPERAND_FACILITY:
312
return (__ARCH__ >= 9 /* z196 */);
313
case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:
314
return (__ARCH__ >= 10 /* zEC12 */);
315
case LOAD_STORE_ON_CONDITION_2_FACILITY:
316
case VECTOR_FACILITY:
317
return (__ARCH__ >= 11 /* z13 */);
318
case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:
319
case VECTOR_ENHANCEMENTS_1_FACILITY:
320
return (__ARCH__ >= 12 /* z14 */);
321
default:
322
SLJIT_UNREACHABLE();
323
}
324
#endif
325
return 0;
326
}
327
328
static SLJIT_INLINE unsigned long get_hwcap(void)
329
{
330
static unsigned long hwcap = 0;
331
if (SLJIT_UNLIKELY(!hwcap)) {
332
hwcap = getauxval(AT_HWCAP);
333
SLJIT_ASSERT(hwcap != 0);
334
}
335
return hwcap;
336
}
337
338
static SLJIT_INLINE int have_stfle(void)
339
{
340
if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))
341
return 1;
342
343
return (get_hwcap() & HWCAP_S390_STFLE);
344
}
345
346
/* Report whether the given facility is available. This function always
347
performs a runtime check. */
348
static int have_facility_dynamic(facility_bit x)
349
{
350
#if ENABLE_DYNAMIC_FACILITY_DETECTION
351
static struct {
352
sljit_uw bits[4];
353
} cpu_features;
354
size_t size = sizeof(cpu_features);
355
const sljit_uw word_index = x >> 6;
356
const sljit_uw bit_index = ((1UL << 63) >> (x & 63));
357
358
SLJIT_ASSERT(x < size * 8);
359
if (SLJIT_UNLIKELY(!have_stfle()))
360
return 0;
361
362
if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {
363
__asm__ __volatile__ (
364
"lgr %%r0, %0;"
365
"stfle 0(%1);"
366
/* outputs */:
367
/* inputs */: "d" ((size / 8) - 1), "a" (&cpu_features)
368
/* clobbers */: "r0", "cc", "memory"
369
);
370
SLJIT_ASSERT(cpu_features.bits[0] != 0);
371
}
372
return (cpu_features.bits[word_index] & bit_index) != 0;
373
#else
374
return 0;
375
#endif
376
}
377
378
#define HAVE_FACILITY(name, bit) \
379
static SLJIT_INLINE int name() \
380
{ \
381
static int have = -1; \
382
/* Static check first. May allow the function to be optimized away. */ \
383
if (have_facility_static(bit)) \
384
have = 1; \
385
else if (SLJIT_UNLIKELY(have < 0)) \
386
have = have_facility_dynamic(bit) ? 1 : 0; \
387
\
388
return have; \
389
}
390
391
HAVE_FACILITY(have_eimm, EXTENDED_IMMEDIATE_FACILITY)
392
HAVE_FACILITY(have_ldisp, FAST_LONG_DISPLACEMENT_FACILITY)
393
HAVE_FACILITY(have_genext, GENERAL_INSTRUCTION_EXTENSION_FACILITY)
394
HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)
395
HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)
396
HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
397
#undef HAVE_FACILITY
398
399
#define is_u12(d) (0 <= (d) && (d) <= 0x00000fffL)
400
#define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL)
401
402
#define CHECK_SIGNED(v, bitlen) \
403
((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1)))
404
405
#define is_s8(d) ((sljit_sw)(d) == (sljit_s8)(d))
406
#define is_s16(d) ((sljit_sw)(d) == (sljit_s16)(d))
407
#define is_s20(d) CHECK_SIGNED((d), 20)
408
#define is_s32(d) ((sljit_sw)(d) == (sljit_s32)(d))
409
410
static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)
411
{
412
sljit_uw dh, dl;
413
414
SLJIT_ASSERT(is_s20(d));
415
416
dh = (d >> 12) & 0xff;
417
dl = ((sljit_uw)d << 8) & 0xfff00;
418
return (dh | dl) << 8;
419
}
420
421
/* TODO(carenas): variadic macro is not strictly needed */
422
#define SLJIT_S390X_INSTRUCTION(op, ...) \
423
static SLJIT_INLINE sljit_ins op(__VA_ARGS__)
424
425
/* RR form instructions. */
426
#define SLJIT_S390X_RR(name, pattern) \
427
SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
428
{ \
429
return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
430
}
431
432
/* AND */
433
SLJIT_S390X_RR(nr, 0x1400)
434
435
/* BRANCH AND SAVE */
436
SLJIT_S390X_RR(basr, 0x0d00)
437
438
/* BRANCH ON CONDITION */
439
SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */
440
441
/* DIVIDE */
442
SLJIT_S390X_RR(dr, 0x1d00)
443
444
/* EXCLUSIVE OR */
445
SLJIT_S390X_RR(xr, 0x1700)
446
447
/* LOAD */
448
SLJIT_S390X_RR(lr, 0x1800)
449
450
/* LOAD COMPLEMENT */
451
SLJIT_S390X_RR(lcr, 0x1300)
452
453
/* OR */
454
SLJIT_S390X_RR(or, 0x1600)
455
456
#undef SLJIT_S390X_RR
457
458
/* RRE form instructions */
459
#define SLJIT_S390X_RRE(name, pattern) \
460
SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
461
{ \
462
return (pattern) | R4A(dst) | R0A(src); \
463
}
464
465
/* AND */
466
SLJIT_S390X_RRE(ngr, 0xb9800000)
467
468
/* DIVIDE LOGICAL */
469
SLJIT_S390X_RRE(dlr, 0xb9970000)
470
SLJIT_S390X_RRE(dlgr, 0xb9870000)
471
472
/* DIVIDE SINGLE */
473
SLJIT_S390X_RRE(dsgr, 0xb90d0000)
474
475
/* EXCLUSIVE OR */
476
SLJIT_S390X_RRE(xgr, 0xb9820000)
477
478
/* LOAD */
479
SLJIT_S390X_RRE(lgr, 0xb9040000)
480
SLJIT_S390X_RRE(lgfr, 0xb9140000)
481
482
/* LOAD BYTE */
483
SLJIT_S390X_RRE(lbr, 0xb9260000)
484
SLJIT_S390X_RRE(lgbr, 0xb9060000)
485
486
/* LOAD COMPLEMENT */
487
SLJIT_S390X_RRE(lcgr, 0xb9030000)
488
489
/* LOAD HALFWORD */
490
SLJIT_S390X_RRE(lhr, 0xb9270000)
491
SLJIT_S390X_RRE(lghr, 0xb9070000)
492
493
/* LOAD LOGICAL */
494
SLJIT_S390X_RRE(llgfr, 0xb9160000)
495
496
/* LOAD LOGICAL CHARACTER */
497
SLJIT_S390X_RRE(llcr, 0xb9940000)
498
SLJIT_S390X_RRE(llgcr, 0xb9840000)
499
500
/* LOAD LOGICAL HALFWORD */
501
SLJIT_S390X_RRE(llhr, 0xb9950000)
502
SLJIT_S390X_RRE(llghr, 0xb9850000)
503
504
/* MULTIPLY LOGICAL */
505
SLJIT_S390X_RRE(mlgr, 0xb9860000)
506
507
/* MULTIPLY SINGLE */
508
SLJIT_S390X_RRE(msgfr, 0xb91c0000)
509
510
/* OR */
511
SLJIT_S390X_RRE(ogr, 0xb9810000)
512
513
/* SUBTRACT */
514
SLJIT_S390X_RRE(sgr, 0xb9090000)
515
516
#undef SLJIT_S390X_RRE
517
518
/* RI-a form instructions */
519
#define SLJIT_S390X_RIA(name, pattern, imm_type) \
520
SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
521
{ \
522
return (pattern) | R20A(reg) | (imm & 0xffff); \
523
}
524
525
/* ADD HALFWORD IMMEDIATE */
526
SLJIT_S390X_RIA(aghi, 0xa70b0000, sljit_s16)
527
528
/* LOAD HALFWORD IMMEDIATE */
529
SLJIT_S390X_RIA(lhi, 0xa7080000, sljit_s16)
530
SLJIT_S390X_RIA(lghi, 0xa7090000, sljit_s16)
531
532
/* LOAD LOGICAL IMMEDIATE */
533
SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)
534
SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)
535
SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)
536
SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)
537
538
/* MULTIPLY HALFWORD IMMEDIATE */
539
SLJIT_S390X_RIA(mhi, 0xa70c0000, sljit_s16)
540
SLJIT_S390X_RIA(mghi, 0xa70d0000, sljit_s16)
541
542
/* OR IMMEDIATE */
543
SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16)
544
545
#undef SLJIT_S390X_RIA
546
547
/* RIL-a form instructions (requires extended immediate facility) */
548
#define SLJIT_S390X_RILA(name, pattern, imm_type) \
549
SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
550
{ \
551
SLJIT_ASSERT(have_eimm()); \
552
return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \
553
}
554
555
/* ADD IMMEDIATE */
556
SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32)
557
558
/* ADD IMMEDIATE HIGH */
559
SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */
560
561
/* AND IMMEDIATE */
562
SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32)
563
564
/* EXCLUSIVE OR IMMEDIATE */
565
SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32)
566
567
/* INSERT IMMEDIATE */
568
SLJIT_S390X_RILA(iihf, 0xc00800000000, sljit_u32)
569
SLJIT_S390X_RILA(iilf, 0xc00900000000, sljit_u32)
570
571
/* LOAD IMMEDIATE */
572
SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32)
573
574
/* LOAD LOGICAL IMMEDIATE */
575
SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)
576
SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)
577
578
/* SUBTRACT LOGICAL IMMEDIATE */
579
SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32)
580
581
#undef SLJIT_S390X_RILA
582
583
/* RX-a form instructions */
584
#define SLJIT_S390X_RXA(name, pattern) \
585
SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
586
{ \
587
SLJIT_ASSERT((d & 0xfff) == d); \
588
\
589
return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \
590
}
591
592
/* LOAD */
593
SLJIT_S390X_RXA(l, 0x58000000)
594
595
/* LOAD ADDRESS */
596
SLJIT_S390X_RXA(la, 0x41000000)
597
598
/* LOAD HALFWORD */
599
SLJIT_S390X_RXA(lh, 0x48000000)
600
601
/* MULTIPLY SINGLE */
602
SLJIT_S390X_RXA(ms, 0x71000000)
603
604
/* STORE */
605
SLJIT_S390X_RXA(st, 0x50000000)
606
607
/* STORE CHARACTER */
608
SLJIT_S390X_RXA(stc, 0x42000000)
609
610
/* STORE HALFWORD */
611
SLJIT_S390X_RXA(sth, 0x40000000)
612
613
#undef SLJIT_S390X_RXA
614
615
/* RXY-a instructions */
616
#define SLJIT_S390X_RXYA(name, pattern, cond) \
617
SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
618
{ \
619
SLJIT_ASSERT(cond); \
620
\
621
return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \
622
}
623
624
/* LOAD */
625
SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp())
626
SLJIT_S390X_RXYA(lg, 0xe30000000004, 1)
627
SLJIT_S390X_RXYA(lgf, 0xe30000000014, 1)
628
629
/* LOAD BYTE */
630
SLJIT_S390X_RXYA(lb, 0xe30000000076, have_ldisp())
631
SLJIT_S390X_RXYA(lgb, 0xe30000000077, have_ldisp())
632
633
/* LOAD HALFWORD */
634
SLJIT_S390X_RXYA(lhy, 0xe30000000078, have_ldisp())
635
SLJIT_S390X_RXYA(lgh, 0xe30000000015, 1)
636
637
/* LOAD LOGICAL */
638
SLJIT_S390X_RXYA(llgf, 0xe30000000016, 1)
639
640
/* LOAD LOGICAL CHARACTER */
641
SLJIT_S390X_RXYA(llc, 0xe30000000094, have_eimm())
642
SLJIT_S390X_RXYA(llgc, 0xe30000000090, 1)
643
644
/* LOAD LOGICAL HALFWORD */
645
SLJIT_S390X_RXYA(llh, 0xe30000000095, have_eimm())
646
SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1)
647
648
/* MULTIPLY SINGLE */
649
SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp())
650
SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1)
651
652
/* STORE */
653
SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp())
654
SLJIT_S390X_RXYA(stg, 0xe30000000024, 1)
655
656
/* STORE CHARACTER */
657
SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp())
658
659
/* STORE HALFWORD */
660
SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp())
661
662
#undef SLJIT_S390X_RXYA
663
664
/* RSY-a instructions */
665
#define SLJIT_S390X_RSYA(name, pattern, cond) \
666
SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \
667
{ \
668
SLJIT_ASSERT(cond); \
669
\
670
return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \
671
}
672
673
/* LOAD MULTIPLE */
674
SLJIT_S390X_RSYA(lmg, 0xeb0000000004, 1)
675
676
/* SHIFT LEFT LOGICAL */
677
SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1)
678
679
/* SHIFT RIGHT SINGLE */
680
SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1)
681
682
/* STORE MULTIPLE */
683
SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1)
684
685
#undef SLJIT_S390X_RSYA
686
687
/* RIE-f instructions (require general-instructions-extension facility) */
688
#define SLJIT_S390X_RIEF(name, pattern) \
689
SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \
690
{ \
691
sljit_ins i3, i4, i5; \
692
\
693
SLJIT_ASSERT(have_genext()); \
694
i3 = (sljit_ins)start << 24; \
695
i4 = (sljit_ins)end << 16; \
696
i5 = (sljit_ins)rot << 8; \
697
\
698
return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \
699
}
700
701
/* ROTATE THEN AND SELECTED BITS */
702
/* SLJIT_S390X_RIEF(rnsbg, 0xec0000000054) */
703
704
/* ROTATE THEN EXCLUSIVE OR SELECTED BITS */
705
/* SLJIT_S390X_RIEF(rxsbg, 0xec0000000057) */
706
707
/* ROTATE THEN OR SELECTED BITS */
708
SLJIT_S390X_RIEF(rosbg, 0xec0000000056)
709
710
/* ROTATE THEN INSERT SELECTED BITS */
711
/* SLJIT_S390X_RIEF(risbg, 0xec0000000055) */
712
/* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */
713
714
/* ROTATE THEN INSERT SELECTED BITS HIGH */
715
SLJIT_S390X_RIEF(risbhg, 0xec000000005d)
716
717
/* ROTATE THEN INSERT SELECTED BITS LOW */
718
/* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */
719
720
#undef SLJIT_S390X_RIEF
721
722
/* RRF-c instructions (require load/store-on-condition 1 facility) */
723
#define SLJIT_S390X_RRFC(name, pattern) \
724
SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \
725
{ \
726
sljit_ins m3; \
727
\
728
SLJIT_ASSERT(have_lscond1()); \
729
m3 = (sljit_ins)(mask & 0xf) << 12; \
730
\
731
return (pattern) | m3 | R4A(dst) | R0A(src); \
732
}
733
734
/* LOAD HALFWORD IMMEDIATE ON CONDITION */
735
SLJIT_S390X_RRFC(locr, 0xb9f20000)
736
SLJIT_S390X_RRFC(locgr, 0xb9e20000)
737
738
#undef SLJIT_S390X_RRFC
739
740
/* RIE-g instructions (require load/store-on-condition 2 facility) */
741
#define SLJIT_S390X_RIEG(name, pattern) \
742
SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \
743
{ \
744
sljit_ins m3, i2; \
745
\
746
SLJIT_ASSERT(have_lscond2()); \
747
m3 = (sljit_ins)(mask & 0xf) << 32; \
748
i2 = (sljit_ins)(imm & 0xffffL) << 16; \
749
\
750
return (pattern) | R36A(reg) | m3 | i2; \
751
}
752
753
/* LOAD HALFWORD IMMEDIATE ON CONDITION */
754
SLJIT_S390X_RIEG(lochi, 0xec0000000042)
755
SLJIT_S390X_RIEG(locghi, 0xec0000000046)
756
757
#undef SLJIT_S390X_RIEG
758
759
#define SLJIT_S390X_RILB(name, pattern, cond) \
760
SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \
761
{ \
762
SLJIT_ASSERT(cond); \
763
\
764
return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \
765
}
766
767
/* BRANCH RELATIVE AND SAVE LONG */
768
SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)
769
770
/* LOAD ADDRESS RELATIVE LONG */
771
SLJIT_S390X_RILB(larl, 0xc00000000000, 1)
772
773
/* LOAD RELATIVE LONG */
774
SLJIT_S390X_RILB(lgrl, 0xc40800000000, have_genext())
775
776
#undef SLJIT_S390X_RILB
777
778
SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)
779
{
780
return 0x07f0 | target;
781
}
782
783
SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target)
784
{
785
sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20;
786
sljit_ins ri2 = (sljit_ins)target & 0xffff;
787
return 0xa7040000L | m1 | ri2;
788
}
789
790
SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)
791
{
792
sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;
793
sljit_ins ri2 = (sljit_ins)target & 0xffffffff;
794
return 0xc00400000000L | m1 | ri2;
795
}
796
797
SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)
798
{
799
SLJIT_ASSERT(have_eimm());
800
return 0xb9830000 | R8A(dst) | R0A(src);
801
}
802
803
/* INSERT PROGRAM MASK */
804
SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)
805
{
806
return 0xb2220000 | R4A(dst);
807
}
808
809
/* SET PROGRAM MASK */
810
SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst)
811
{
812
return 0x0400 | R4A(dst);
813
}
814
815
/* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */
816
SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)
817
{
818
return risbhg(dst, src, start, 0x8 | end, rot);
819
}
820
821
#undef SLJIT_S390X_INSTRUCTION
822
823
static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r)
824
{
825
/* Condition codes: bits 18 and 19.
826
Transformation:
827
0 (zero and no overflow) : unchanged
828
1 (non-zero and no overflow) : unchanged
829
2 (zero and overflow) : decreased by 1
830
3 (non-zero and overflow) : decreased by 1 if non-zero */
831
FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1)));
832
FAIL_IF(push_inst(compiler, ipm(tmp1)));
833
FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)));
834
FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3)));
835
FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000)));
836
FAIL_IF(push_inst(compiler, spm(tmp1)));
837
return SLJIT_SUCCESS;
838
}
839
840
/* load 64-bit immediate into register without clobbering flags */
841
static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)
842
{
843
/* 4 byte instructions */
844
if (is_s16(v))
845
return push_inst(compiler, lghi(target, (sljit_s16)v));
846
847
if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0)
848
return push_inst(compiler, llill(target, (sljit_u16)v));
849
850
if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0)
851
return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));
852
853
if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0)
854
return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));
855
856
if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)
857
return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
858
859
if (is_s32(v))
860
return push_inst(compiler, lgfi(target, (sljit_s32)v));
861
862
if (((sljit_uw)v >> 32) == 0)
863
return push_inst(compiler, llilf(target, (sljit_u32)v));
864
865
if (((sljit_uw)v << 32) == 0)
866
return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
867
868
FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
869
return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
870
}
871
872
struct addr {
873
sljit_gpr base;
874
sljit_gpr index;
875
sljit_s32 offset;
876
};
877
878
/* transform memory operand into D(X,B) form with a signed 20-bit offset */
879
static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,
880
struct addr *addr, sljit_s32 mem, sljit_sw off,
881
sljit_gpr tmp /* clobbered, must not be r0 */)
882
{
883
sljit_gpr base = r0;
884
sljit_gpr index = r0;
885
886
SLJIT_ASSERT(tmp != r0);
887
if (mem & REG_MASK)
888
base = gpr(mem & REG_MASK);
889
890
if (mem & OFFS_REG_MASK) {
891
index = gpr(OFFS_REG(mem));
892
if (off != 0) {
893
/* shift and put the result into tmp */
894
SLJIT_ASSERT(0 <= off && off < 64);
895
FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
896
index = tmp;
897
off = 0; /* clear offset */
898
}
899
}
900
else if (!is_s20(off)) {
901
FAIL_IF(push_load_imm_inst(compiler, tmp, off));
902
index = tmp;
903
off = 0; /* clear offset */
904
}
905
addr->base = base;
906
addr->index = index;
907
addr->offset = (sljit_s32)off;
908
return SLJIT_SUCCESS;
909
}
910
911
/* transform memory operand into D(X,B) form with an unsigned 12-bit offset */
912
static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
913
struct addr *addr, sljit_s32 mem, sljit_sw off,
914
sljit_gpr tmp /* clobbered, must not be r0 */)
915
{
916
sljit_gpr base = r0;
917
sljit_gpr index = r0;
918
919
SLJIT_ASSERT(tmp != r0);
920
if (mem & REG_MASK)
921
base = gpr(mem & REG_MASK);
922
923
if (mem & OFFS_REG_MASK) {
924
index = gpr(OFFS_REG(mem));
925
if (off != 0) {
926
/* shift and put the result into tmp */
927
SLJIT_ASSERT(0 <= off && off < 64);
928
FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
929
index = tmp;
930
off = 0; /* clear offset */
931
}
932
}
933
else if (!is_u12(off)) {
934
FAIL_IF(push_load_imm_inst(compiler, tmp, off));
935
index = tmp;
936
off = 0; /* clear offset */
937
}
938
addr->base = base;
939
addr->index = index;
940
addr->offset = (sljit_s32)off;
941
return SLJIT_SUCCESS;
942
}
943
944
#define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)
945
#define WHEN(cond, r, i1, i2, addr) \
946
(cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
947
948
/* May clobber tmp1. */
949
static sljit_s32 load_store_op(struct sljit_compiler *compiler, sljit_gpr reg,
950
sljit_s32 mem, sljit_sw memw,
951
sljit_s32 is_32bit, const sljit_ins* forms)
952
{
953
struct addr addr;
954
955
SLJIT_ASSERT(mem & SLJIT_MEM);
956
957
if (is_32bit && ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw))) {
958
FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
959
return push_inst(compiler, forms[0] | R20A(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
960
}
961
962
FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
963
return push_inst(compiler, (is_32bit ? forms[1] : forms[2]) | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
964
}
965
966
static const sljit_ins load_forms[3] = {
967
0x58000000 /* l */,
968
0xe30000000058 /* ly */,
969
0xe30000000004 /* lg */
970
};
971
972
static const sljit_ins store_forms[3] = {
973
0x50000000 /* st */,
974
0xe30000000050 /* sty */,
975
0xe30000000024 /* stg */
976
};
977
978
static const sljit_ins store_byte_forms[3] = {
979
0x42000000 /* stc */,
980
0xe30000000072 /* stcy */,
981
0
982
};
983
984
static const sljit_ins load_halfword_forms[3] = {
985
0x48000000 /* lh */,
986
0xe30000000078 /* lhy */,
987
0xe30000000015 /* lgh */
988
};
989
990
/* May clobber tmp1. */
991
static SLJIT_INLINE sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
992
sljit_s32 src, sljit_sw srcw,
993
sljit_s32 is_32bit)
994
{
995
return load_store_op(compiler, dst_r, src, srcw, is_32bit, load_forms);
996
}
997
998
/* May clobber tmp1. */
999
static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
1000
sljit_s32 src, sljit_sw srcw,
1001
sljit_s32 is_32bit)
1002
{
1003
struct addr addr;
1004
sljit_ins ins;
1005
1006
SLJIT_ASSERT(src & SLJIT_MEM);
1007
1008
FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
1009
1010
ins = is_32bit ? 0xe30000000016 /* llgf */ : 0xe30000000004 /* lg */;
1011
return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
1012
}
1013
1014
/* May clobber tmp1. */
1015
static SLJIT_INLINE sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,
1016
sljit_s32 dst, sljit_sw dstw,
1017
sljit_s32 is_32bit)
1018
{
1019
return load_store_op(compiler, src_r, dst, dstw, is_32bit, store_forms);
1020
}
1021
1022
/* May clobber tmp1. */
1023
static SLJIT_INLINE sljit_s32 store_byte(struct sljit_compiler *compiler, sljit_gpr src_r,
1024
sljit_s32 dst, sljit_sw dstw)
1025
{
1026
return load_store_op(compiler, src_r, dst, dstw, 1, store_byte_forms);
1027
}
1028
1029
#undef WHEN
1030
1031
static sljit_s32 emit_move(struct sljit_compiler *compiler,
1032
sljit_gpr dst_r,
1033
sljit_s32 src, sljit_sw srcw)
1034
{
1035
sljit_gpr src_r;
1036
1037
SLJIT_ASSERT(!FAST_IS_REG(src) || dst_r != gpr(src & REG_MASK));
1038
1039
if (src == SLJIT_IMM)
1040
return push_load_imm_inst(compiler, dst_r, srcw);
1041
1042
if (src & SLJIT_MEM)
1043
return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);
1044
1045
src_r = gpr(src & REG_MASK);
1046
return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
1047
}
1048
1049
static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
1050
sljit_s32 dst,
1051
sljit_s32 src1, sljit_sw src1w,
1052
sljit_s32 src2, sljit_sw src2w)
1053
{
1054
sljit_gpr dst_r = tmp0;
1055
sljit_gpr src_r = tmp1;
1056
sljit_s32 needs_move = 1;
1057
1058
if (FAST_IS_REG(dst)) {
1059
dst_r = gpr(dst);
1060
1061
if (dst == src1)
1062
needs_move = 0;
1063
else if (dst == src2) {
1064
dst_r = tmp0;
1065
needs_move = 2;
1066
}
1067
}
1068
1069
if (needs_move)
1070
FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1071
1072
if (FAST_IS_REG(src2))
1073
src_r = gpr(src2);
1074
else
1075
FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1076
1077
FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)));
1078
1079
if (needs_move != 2)
1080
return SLJIT_SUCCESS;
1081
1082
dst_r = gpr(dst & REG_MASK);
1083
return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1084
}
1085
1086
static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins,
1087
sljit_s32 dst,
1088
sljit_s32 src1, sljit_sw src1w)
1089
{
1090
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1091
sljit_gpr src_r = tmp1;
1092
1093
if (FAST_IS_REG(src1))
1094
src_r = gpr(src1);
1095
else
1096
FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
1097
1098
return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r));
1099
}
1100
1101
static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,
1102
sljit_s32 dst,
1103
sljit_s32 src1, sljit_sw src1w,
1104
sljit_s32 src2, sljit_sw src2w)
1105
{
1106
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1107
sljit_gpr src1_r = tmp0;
1108
sljit_gpr src2_r = tmp1;
1109
1110
if (FAST_IS_REG(src1))
1111
src1_r = gpr(src1);
1112
else
1113
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1114
1115
if (FAST_IS_REG(src2))
1116
src2_r = gpr(src2);
1117
else
1118
FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1119
1120
return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r));
1121
}
1122
1123
typedef enum {
1124
RI_A,
1125
RIL_A,
1126
} emit_ril_type;
1127
1128
static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,
1129
sljit_s32 dst,
1130
sljit_s32 src1, sljit_sw src1w,
1131
sljit_sw src2w,
1132
emit_ril_type type)
1133
{
1134
sljit_gpr dst_r = tmp0;
1135
sljit_s32 needs_move = 1;
1136
1137
if (FAST_IS_REG(dst)) {
1138
dst_r = gpr(dst);
1139
1140
if (dst == src1)
1141
needs_move = 0;
1142
}
1143
1144
if (needs_move)
1145
FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1146
1147
if (type == RIL_A)
1148
return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff));
1149
return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff));
1150
}
1151
1152
static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,
1153
sljit_s32 dst,
1154
sljit_s32 src1, sljit_sw src1w,
1155
sljit_sw src2w)
1156
{
1157
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1158
sljit_gpr src_r = tmp0;
1159
1160
if (!FAST_IS_REG(src1))
1161
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1162
else
1163
src_r = gpr(src1 & REG_MASK);
1164
1165
return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16);
1166
}
1167
1168
typedef enum {
1169
RX_A,
1170
RXY_A,
1171
} emit_rx_type;
1172
1173
static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,
1174
sljit_s32 dst,
1175
sljit_s32 src1, sljit_sw src1w,
1176
sljit_s32 src2, sljit_sw src2w,
1177
emit_rx_type type)
1178
{
1179
sljit_gpr dst_r = tmp0;
1180
sljit_s32 needs_move = 1;
1181
sljit_gpr base, index;
1182
1183
SLJIT_ASSERT(src2 & SLJIT_MEM);
1184
1185
if (FAST_IS_REG(dst)) {
1186
dst_r = gpr(dst);
1187
1188
if (dst == src1)
1189
needs_move = 0;
1190
else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) {
1191
dst_r = tmp0;
1192
needs_move = 2;
1193
}
1194
}
1195
1196
if (needs_move)
1197
FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1198
1199
base = gpr(src2 & REG_MASK);
1200
index = tmp0;
1201
1202
if (src2 & OFFS_REG_MASK) {
1203
index = gpr(OFFS_REG(src2));
1204
1205
if (src2w != 0) {
1206
FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0)));
1207
src2w = 0;
1208
index = tmp1;
1209
}
1210
} else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) {
1211
FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w));
1212
1213
if (src2 & REG_MASK)
1214
index = tmp1;
1215
else
1216
base = tmp1;
1217
src2w = 0;
1218
}
1219
1220
if (type == RX_A)
1221
ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w;
1222
else
1223
ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w);
1224
1225
FAIL_IF(push_inst(compiler, ins));
1226
1227
if (needs_move != 2)
1228
return SLJIT_SUCCESS;
1229
1230
dst_r = gpr(dst);
1231
return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1232
}
1233
1234
static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,
1235
sljit_s32 dst, sljit_sw dstw,
1236
sljit_sw srcw)
1237
{
1238
sljit_gpr dst_r = tmp1;
1239
1240
SLJIT_ASSERT(dst & SLJIT_MEM);
1241
1242
if (dst & OFFS_REG_MASK) {
1243
sljit_gpr index = tmp1;
1244
1245
if ((dstw & 0x3) == 0)
1246
index = gpr(OFFS_REG(dst));
1247
else
1248
FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0)));
1249
1250
FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index)));
1251
dstw = 0;
1252
}
1253
else if (!is_s20(dstw)) {
1254
FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw));
1255
1256
if (dst & REG_MASK)
1257
FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1)));
1258
1259
dstw = 0;
1260
}
1261
else
1262
dst_r = gpr(dst & REG_MASK);
1263
1264
return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw));
1265
}
1266
1267
struct ins_forms {
1268
sljit_ins op_r;
1269
sljit_ins op_gr;
1270
sljit_ins op_rk;
1271
sljit_ins op_grk;
1272
sljit_ins op;
1273
sljit_ins op_y;
1274
sljit_ins op_g;
1275
};
1276
1277
static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1278
sljit_s32 dst,
1279
sljit_s32 src1, sljit_sw src1w,
1280
sljit_s32 src2, sljit_sw src2w)
1281
{
1282
sljit_s32 mode = compiler->mode;
1283
sljit_ins ins, ins_k;
1284
1285
if ((src1 | src2) & SLJIT_MEM) {
1286
sljit_ins ins12, ins20;
1287
1288
if (mode & SLJIT_32) {
1289
ins12 = forms->op;
1290
ins20 = forms->op_y;
1291
}
1292
else {
1293
ins12 = 0;
1294
ins20 = forms->op_g;
1295
}
1296
1297
if (ins12 && ins20) {
1298
/* Extra instructions needed for address computation can be executed independently. */
1299
if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1300
|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) {
1301
if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1302
return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1303
1304
return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1305
}
1306
1307
if (src1 & SLJIT_MEM) {
1308
if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w))
1309
return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A);
1310
1311
return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A);
1312
}
1313
}
1314
else if (ins12 || ins20) {
1315
emit_rx_type rx_type;
1316
1317
if (ins12) {
1318
rx_type = RX_A;
1319
ins = ins12;
1320
}
1321
else {
1322
rx_type = RXY_A;
1323
ins = ins20;
1324
}
1325
1326
if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1327
|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w)))))
1328
return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type);
1329
1330
if (src1 & SLJIT_MEM)
1331
return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type);
1332
}
1333
}
1334
1335
if (mode & SLJIT_32) {
1336
ins = forms->op_r;
1337
ins_k = forms->op_rk;
1338
}
1339
else {
1340
ins = forms->op_gr;
1341
ins_k = forms->op_grk;
1342
}
1343
1344
SLJIT_ASSERT(ins != 0 || ins_k != 0);
1345
1346
if (ins && FAST_IS_REG(dst)) {
1347
if (dst == src1)
1348
return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1349
1350
if (dst == src2)
1351
return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w);
1352
}
1353
1354
if (ins_k == 0)
1355
return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1356
1357
return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w);
1358
}
1359
1360
static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1361
sljit_s32 dst,
1362
sljit_s32 src1, sljit_sw src1w,
1363
sljit_s32 src2, sljit_sw src2w)
1364
{
1365
sljit_s32 mode = compiler->mode;
1366
sljit_ins ins;
1367
1368
if (src2 & SLJIT_MEM) {
1369
sljit_ins ins12, ins20;
1370
1371
if (mode & SLJIT_32) {
1372
ins12 = forms->op;
1373
ins20 = forms->op_y;
1374
}
1375
else {
1376
ins12 = 0;
1377
ins20 = forms->op_g;
1378
}
1379
1380
if (ins12 && ins20) {
1381
if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1382
return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1383
1384
return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1385
}
1386
else if (ins12)
1387
return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1388
else if (ins20)
1389
return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1390
}
1391
1392
ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk;
1393
1394
if (ins == 0 || (FAST_IS_REG(dst) && dst == src1))
1395
return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w);
1396
1397
return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);
1398
}
1399
1400
static SLJIT_INLINE sljit_u16 *process_extended_label(sljit_u16 *code_ptr, struct sljit_extended_label *ext_label)
1401
{
1402
SLJIT_ASSERT(ext_label->label.u.index == SLJIT_LABEL_ALIGNED);
1403
return (sljit_u16*)((sljit_uw)code_ptr & ~(ext_label->data));
1404
}
1405
1406
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
1407
{
1408
struct sljit_label *label;
1409
struct sljit_jump *jump;
1410
struct sljit_const *const_;
1411
sljit_sw executable_offset;
1412
sljit_uw ins_size = compiler->size << 1;
1413
sljit_uw pool_size = 0; /* literal pool */
1414
sljit_uw pad_size;
1415
sljit_uw half_count;
1416
SLJIT_NEXT_DEFINE_TYPES;
1417
struct sljit_memory_fragment *buf;
1418
sljit_ins *buf_ptr;
1419
sljit_ins *buf_end;
1420
sljit_u16 *code;
1421
sljit_u16 *code_ptr;
1422
sljit_uw *pool, *pool_ptr;
1423
sljit_ins ins;
1424
sljit_sw source, offset;
1425
1426
CHECK_ERROR_PTR();
1427
CHECK_PTR(check_sljit_generate_code(compiler, options));
1428
reverse_buf(compiler);
1429
1430
jump = compiler->jumps;
1431
while (jump != NULL) {
1432
if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR)) {
1433
/* encoded: */
1434
/* brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
1435
/* replace with: */
1436
/* lgrl %r1, <pool_addr> */
1437
/* bras %r14, %r1 (or bcr <mask>, %r1) */
1438
if (((jump->flags & SLJIT_REWRITABLE_JUMP) || !is_s32(jump->u.target)))
1439
pool_size += sizeof(sljit_uw);
1440
else
1441
jump->flags |= PATCH_IMM32;
1442
1443
if (!(jump->flags & JUMP_MOV_ADDR))
1444
ins_size += 2;
1445
}
1446
jump = jump->next;
1447
}
1448
1449
/* pad code size to 8 bytes so is accessible with half word offsets */
1450
/* the literal pool needs to be doubleword aligned */
1451
pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
1452
SLJIT_ASSERT(pad_size < 8UL);
1453
1454
/* allocate target buffer */
1455
code = (sljit_u16*)allocate_executable_memory(ins_size + pad_size + pool_size, options, exec_allocator_data, &executable_offset);
1456
PTR_FAIL_WITH_EXEC_IF(code);
1457
code_ptr = code;
1458
1459
/* TODO(carenas): pool is optional, and the ABI recommends it to
1460
* be created before the function code, instead of
1461
* globally; if generated code is too big could
1462
* need offsets bigger than 32bit words and asser()
1463
*/
1464
pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
1465
pool_ptr = pool;
1466
buf = compiler->buf;
1467
half_count = 0;
1468
1469
label = compiler->labels;
1470
jump = compiler->jumps;
1471
const_ = compiler->consts;
1472
SLJIT_NEXT_INIT_TYPES();
1473
SLJIT_GET_NEXT_MIN();
1474
1475
do {
1476
buf_ptr = (sljit_ins*)buf->memory;
1477
buf_end = buf_ptr + (buf->used_size >> 3);
1478
do {
1479
ins = *buf_ptr++;
1480
1481
if (next_min_addr == half_count) {
1482
SLJIT_ASSERT(!label || label->size >= half_count);
1483
SLJIT_ASSERT(!jump || jump->addr >= half_count);
1484
SLJIT_ASSERT(!const_ || const_->addr >= half_count);
1485
1486
if (next_min_addr == next_label_size) {
1487
if (label->u.index >= SLJIT_LABEL_ALIGNED)
1488
code_ptr = process_extended_label(code_ptr, (struct sljit_extended_label*)label);
1489
1490
label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1491
label = label->next;
1492
next_label_size = SLJIT_GET_NEXT_SIZE(label);
1493
}
1494
1495
if (next_min_addr == next_jump_addr) {
1496
jump->addr = (sljit_uw)code_ptr;
1497
1498
if (SLJIT_UNLIKELY(jump->flags & JUMP_MOV_ADDR)) {
1499
if (jump->flags & PATCH_IMM32) {
1500
SLJIT_ASSERT((jump->flags & JUMP_ADDR) && is_s32(jump->u.target));
1501
ins = 0xc00100000000 /* lgfi */ | (ins & 0xf000000000);
1502
} else if (jump->flags & JUMP_ADDR) {
1503
source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1504
offset = (sljit_sw)(jump->u.target - (sljit_uw)source);
1505
1506
if ((offset & 0x1) != 0 || offset > 0xffffffffl || offset < -0x100000000l) {
1507
jump->addr = (sljit_uw)pool_ptr;
1508
jump->flags |= PATCH_POOL;
1509
1510
/* store target into pool */
1511
offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1512
pool_ptr++;
1513
1514
SLJIT_ASSERT(!(offset & 1));
1515
offset >>= 1;
1516
SLJIT_ASSERT(is_s32(offset));
1517
ins = 0xc40800000000 /* lgrl */ | (ins & 0xf000000000) | (sljit_ins)(offset & 0xffffffff);
1518
}
1519
}
1520
} else if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR)) {
1521
source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1522
1523
if (jump->flags & PATCH_IMM32) {
1524
SLJIT_ASSERT((jump->flags & JUMP_ADDR) && is_s32(jump->u.target));
1525
code_ptr[0] = (sljit_u16)(0xc001 /* lgfi */ | R4A(tmp1));
1526
code_ptr += 3;
1527
} else if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {
1528
offset = (sljit_sw)(jump->u.target - (sljit_uw)source);
1529
1530
if ((offset & 0x1) != 0 || offset > 0xffffffffl || offset < -0x100000000l)
1531
jump->flags |= PATCH_POOL;
1532
} else
1533
jump->flags |= PATCH_POOL;
1534
1535
if (jump->flags & PATCH_POOL) {
1536
jump->addr = (sljit_uw)pool_ptr;
1537
1538
/* load address into tmp1 */
1539
offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1540
1541
SLJIT_ASSERT(!(offset & 1));
1542
offset >>= 1;
1543
SLJIT_ASSERT(is_s32(offset));
1544
1545
code_ptr[0] = (sljit_u16)(0xc408 /* lgrl */ | R4A(tmp1));
1546
code_ptr[1] = (sljit_u16)(offset >> 16);
1547
code_ptr[2] = (sljit_u16)offset;
1548
code_ptr += 3;
1549
pool_ptr++;
1550
}
1551
1552
if (jump->flags & (PATCH_POOL | PATCH_IMM32)) {
1553
/* branch to tmp1 */
1554
if (((ins >> 32) & 0xf) == 4) {
1555
/* brcl -> bcr */
1556
ins = 0x0700 /* bcr */ | ((ins >> 32) & 0xf0) | R0A(tmp1);
1557
} else {
1558
SLJIT_ASSERT(((ins >> 32) & 0xf) == 5);
1559
/* brasl -> basr */
1560
ins = 0x0d00 /* basr */ | ((ins >> 32) & 0xf0) | R0A(tmp1);
1561
}
1562
1563
/* Adjust half_count. */
1564
half_count += 2;
1565
}
1566
}
1567
1568
jump = jump->next;
1569
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
1570
} else if (next_min_addr == next_const_addr) {
1571
const_->addr = (sljit_uw)code_ptr;
1572
const_ = const_->next;
1573
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
1574
}
1575
1576
SLJIT_GET_NEXT_MIN();
1577
}
1578
1579
if (ins & 0xffff00000000L) {
1580
*code_ptr++ = (sljit_u16)(ins >> 32);
1581
half_count++;
1582
}
1583
1584
if (ins & 0xffffffff0000L) {
1585
*code_ptr++ = (sljit_u16)(ins >> 16);
1586
half_count++;
1587
}
1588
1589
*code_ptr++ = (sljit_u16)ins;
1590
half_count++;
1591
} while (buf_ptr < buf_end);
1592
1593
buf = buf->next;
1594
} while (buf);
1595
1596
if (next_label_size == half_count) {
1597
if (label->u.index >= SLJIT_LABEL_ALIGNED)
1598
code_ptr = process_extended_label(code_ptr, (struct sljit_extended_label*)label);
1599
1600
label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1601
label = label->next;
1602
}
1603
1604
SLJIT_ASSERT(!label);
1605
SLJIT_ASSERT(!jump);
1606
SLJIT_ASSERT(!const_);
1607
SLJIT_ASSERT(code_ptr <= code + (ins_size >> 1));
1608
SLJIT_ASSERT((sljit_u8 *)pool_ptr <= (sljit_u8 *)pool + pool_size);
1609
1610
jump = compiler->jumps;
1611
while (jump != NULL) {
1612
offset = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr);
1613
1614
if (!(jump->flags & (PATCH_POOL | PATCH_IMM32))) {
1615
code_ptr = (sljit_u16*)jump->addr;
1616
offset -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1617
1618
/* Offset must be halfword aligned. */
1619
SLJIT_ASSERT(!(offset & 1));
1620
offset >>= 1;
1621
SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
1622
1623
code_ptr[1] = (sljit_u16)(offset >> 16);
1624
code_ptr[2] = (sljit_u16)offset;
1625
} else if (jump->flags & PATCH_POOL) {
1626
/* Store jump target into pool. */
1627
*(sljit_uw*)(jump->addr) = (sljit_uw)offset;
1628
} else {
1629
SLJIT_ASSERT(is_s32(offset));
1630
code_ptr = (sljit_u16*)jump->addr;
1631
code_ptr[1] = (sljit_u16)(offset >> 16);
1632
code_ptr[2] = (sljit_u16)offset;
1633
}
1634
jump = jump->next;
1635
}
1636
1637
compiler->error = SLJIT_ERR_COMPILED;
1638
compiler->executable_offset = executable_offset;
1639
compiler->executable_size = ins_size;
1640
if (pool_size)
1641
compiler->executable_size += (pad_size + pool_size);
1642
1643
code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1644
code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1645
SLJIT_CACHE_FLUSH(code, code_ptr);
1646
SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1647
return code;
1648
}
1649
1650
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1651
{
1652
/* TODO(mundaym): implement all */
1653
switch (feature_type) {
1654
case SLJIT_HAS_FPU:
1655
#ifdef SLJIT_IS_FPU_AVAILABLE
1656
return (SLJIT_IS_FPU_AVAILABLE) != 0;
1657
#else
1658
return 1;
1659
#endif /* SLJIT_IS_FPU_AVAILABLE */
1660
1661
case SLJIT_HAS_CLZ:
1662
case SLJIT_HAS_REV:
1663
case SLJIT_HAS_ROT:
1664
case SLJIT_HAS_PREFETCH:
1665
case SLJIT_HAS_COPY_F32:
1666
case SLJIT_HAS_COPY_F64:
1667
case SLJIT_HAS_SIMD:
1668
case SLJIT_HAS_ATOMIC:
1669
case SLJIT_HAS_MEMORY_BARRIER:
1670
return 1;
1671
1672
case SLJIT_HAS_CTZ:
1673
return 2;
1674
1675
case SLJIT_HAS_CMOV:
1676
return have_lscond1() ? 1 : 0;
1677
}
1678
return 0;
1679
}
1680
1681
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
1682
{
1683
SLJIT_UNUSED_ARG(type);
1684
return 0;
1685
}
1686
1687
/* --------------------------------------------------------------------- */
1688
/* Entry, exit */
1689
/* --------------------------------------------------------------------- */
1690
1691
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1692
sljit_s32 options, sljit_s32 arg_types,
1693
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
1694
{
1695
sljit_s32 fscratches;
1696
sljit_s32 fsaveds;
1697
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1698
sljit_s32 offset, i, tmp;
1699
1700
CHECK_ERROR();
1701
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));
1702
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
1703
1704
/* Saved registers are stored in callee allocated save area. */
1705
SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13);
1706
1707
scratches = ENTER_GET_REGS(scratches);
1708
saveds = ENTER_GET_REGS(saveds);
1709
fscratches = compiler->fscratches;
1710
fsaveds = compiler->fsaveds;
1711
1712
offset = 2 * SSIZE_OF(sw);
1713
if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1714
if (saved_arg_count == 0) {
1715
FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15)));
1716
offset += 9 * SSIZE_OF(sw);
1717
} else {
1718
FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1719
offset += (8 - saved_arg_count) * SSIZE_OF(sw);
1720
}
1721
} else {
1722
if (scratches == SLJIT_FIRST_SAVED_REG) {
1723
FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));
1724
offset += SSIZE_OF(sw);
1725
} else if (scratches > SLJIT_FIRST_SAVED_REG) {
1726
FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1727
offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1728
}
1729
1730
if (saved_arg_count == 0) {
1731
if (saveds == 0) {
1732
FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1733
offset += SSIZE_OF(sw);
1734
} else {
1735
FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
1736
offset += (saveds + 1) * SSIZE_OF(sw);
1737
}
1738
} else if (saveds > saved_arg_count) {
1739
if (saveds == saved_arg_count + 1) {
1740
FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1741
offset += SSIZE_OF(sw);
1742
} else {
1743
FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1744
offset += (saveds - saved_arg_count) * SSIZE_OF(sw);
1745
}
1746
}
1747
}
1748
1749
if (saved_arg_count > 0) {
1750
FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1751
offset += SSIZE_OF(sw);
1752
}
1753
1754
tmp = SLJIT_FS0 - fsaveds;
1755
for (i = SLJIT_FS0; i > tmp; i--) {
1756
FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1757
offset += SSIZE_OF(sw);
1758
}
1759
1760
for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1761
FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1762
offset += SSIZE_OF(sw);
1763
}
1764
1765
local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1766
compiler->local_size = local_size;
1767
1768
if (is_s20(-local_size))
1769
FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
1770
else
1771
FAIL_IF(push_inst(compiler, 0xc20400000000 /* slgfi */ | R36A(r15) | (sljit_ins)local_size));
1772
1773
if (options & SLJIT_ENTER_REG_ARG)
1774
return SLJIT_SUCCESS;
1775
1776
arg_types >>= SLJIT_ARG_SHIFT;
1777
saved_arg_count = 0;
1778
tmp = 0;
1779
while (arg_types > 0) {
1780
if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
1781
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1782
FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp))));
1783
saved_arg_count++;
1784
}
1785
tmp++;
1786
}
1787
1788
arg_types >>= SLJIT_ARG_SHIFT;
1789
}
1790
1791
return SLJIT_SUCCESS;
1792
}
1793
1794
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1795
sljit_s32 options, sljit_s32 arg_types,
1796
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
1797
{
1798
CHECK_ERROR();
1799
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));
1800
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
1801
1802
compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1803
return SLJIT_SUCCESS;
1804
}
1805
1806
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_gpr last_reg)
1807
{
1808
sljit_s32 offset, i, tmp;
1809
sljit_s32 local_size = compiler->local_size;
1810
sljit_s32 saveds = compiler->saveds;
1811
sljit_s32 scratches = compiler->scratches;
1812
sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1813
1814
if (is_u12(local_size))
1815
FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
1816
else if (is_s20(local_size))
1817
FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));
1818
else
1819
FAIL_IF(push_inst(compiler, 0xc20a00000000 /* algfi */ | R36A(r15) | (sljit_ins)local_size));
1820
1821
offset = 2 * SSIZE_OF(sw);
1822
if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1823
if (kept_saveds_count == 0) {
1824
FAIL_IF(push_inst(compiler, lmg(r6, last_reg, offset, r15)));
1825
offset += 9 * SSIZE_OF(sw);
1826
} else {
1827
FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1828
offset += (8 - kept_saveds_count) * SSIZE_OF(sw);
1829
}
1830
} else {
1831
if (scratches == SLJIT_FIRST_SAVED_REG) {
1832
FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));
1833
offset += SSIZE_OF(sw);
1834
} else if (scratches > SLJIT_FIRST_SAVED_REG) {
1835
FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1836
offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1837
}
1838
1839
if (kept_saveds_count == 0) {
1840
if (saveds == 0) {
1841
if (last_reg == r14)
1842
FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1843
offset += SSIZE_OF(sw);
1844
} else if (saveds == 1 && last_reg == r13) {
1845
FAIL_IF(push_inst(compiler, lg(r13, offset, 0, r15)));
1846
offset += 2 * SSIZE_OF(sw);
1847
} else {
1848
FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, last_reg, offset, r15)));
1849
offset += (saveds + 1) * SSIZE_OF(sw);
1850
}
1851
} else if (saveds > kept_saveds_count) {
1852
if (saveds == kept_saveds_count + 1) {
1853
FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1854
offset += SSIZE_OF(sw);
1855
} else {
1856
FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1857
offset += (saveds - kept_saveds_count) * SSIZE_OF(sw);
1858
}
1859
}
1860
}
1861
1862
if (kept_saveds_count > 0) {
1863
if (last_reg == r14)
1864
FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1865
offset += SSIZE_OF(sw);
1866
}
1867
1868
tmp = SLJIT_FS0 - compiler->fsaveds;
1869
for (i = SLJIT_FS0; i > tmp; i--) {
1870
FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1871
offset += SSIZE_OF(sw);
1872
}
1873
1874
for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1875
FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1876
offset += SSIZE_OF(sw);
1877
}
1878
1879
return SLJIT_SUCCESS;
1880
}
1881
1882
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1883
{
1884
CHECK_ERROR();
1885
CHECK(check_sljit_emit_return_void(compiler));
1886
1887
FAIL_IF(emit_stack_frame_release(compiler, r14));
1888
return push_inst(compiler, br(r14)); /* return */
1889
}
1890
1891
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1892
sljit_s32 src, sljit_sw srcw)
1893
{
1894
CHECK_ERROR();
1895
CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1896
1897
if (src & SLJIT_MEM) {
1898
ADJUST_LOCAL_OFFSET(src, srcw);
1899
FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
1900
src = TMP_REG2;
1901
srcw = 0;
1902
} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1903
FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
1904
src = TMP_REG2;
1905
srcw = 0;
1906
}
1907
1908
FAIL_IF(emit_stack_frame_release(compiler, r13));
1909
1910
SLJIT_SKIP_CHECKS(compiler);
1911
return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1912
}
1913
1914
/* --------------------------------------------------------------------- */
1915
/* Operators */
1916
/* --------------------------------------------------------------------- */
1917
1918
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1919
{
1920
sljit_gpr arg0 = gpr(SLJIT_R0);
1921
sljit_gpr arg1 = gpr(SLJIT_R1);
1922
1923
CHECK_ERROR();
1924
CHECK(check_sljit_emit_op0(compiler, op));
1925
1926
op = GET_OPCODE(op) | (op & SLJIT_32);
1927
switch (op) {
1928
case SLJIT_BREAKPOINT:
1929
/* The following invalid instruction is emitted by gdb. */
1930
return push_inst(compiler, 0x0001 /* 2-byte trap */);
1931
case SLJIT_NOP:
1932
return push_inst(compiler, 0x0700 /* 2-byte nop */);
1933
case SLJIT_LMUL_UW:
1934
FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1935
break;
1936
case SLJIT_LMUL_SW:
1937
/* signed multiplication from: */
1938
/* Hacker's Delight, Second Edition: Chapter 8-3. */
1939
FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));
1940
FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));
1941
FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));
1942
FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));
1943
1944
/* unsigned multiplication */
1945
FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1946
1947
FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));
1948
FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));
1949
break;
1950
case SLJIT_DIV_U32:
1951
case SLJIT_DIVMOD_U32:
1952
FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1953
FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1954
FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));
1955
FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1956
if (op == SLJIT_DIVMOD_U32)
1957
return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1958
1959
return SLJIT_SUCCESS;
1960
case SLJIT_DIV_S32:
1961
case SLJIT_DIVMOD_S32:
1962
FAIL_IF(push_inst(compiler, 0xeb00000000dc /* srak */ | R36A(tmp0) | R32A(arg0) | (31 << 16)));
1963
FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1964
FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));
1965
FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1966
if (op == SLJIT_DIVMOD_S32)
1967
return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1968
1969
return SLJIT_SUCCESS;
1970
case SLJIT_DIV_UW:
1971
case SLJIT_DIVMOD_UW:
1972
FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));
1973
FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1974
FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));
1975
FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1976
if (op == SLJIT_DIVMOD_UW)
1977
return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1978
1979
return SLJIT_SUCCESS;
1980
case SLJIT_DIV_SW:
1981
case SLJIT_DIVMOD_SW:
1982
FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1983
FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));
1984
FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1985
if (op == SLJIT_DIVMOD_SW)
1986
return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1987
1988
return SLJIT_SUCCESS;
1989
case SLJIT_MEMORY_BARRIER:
1990
return push_inst(compiler, 0x0700 /* bcr */ | (0xe << 4) | 0);
1991
case SLJIT_ENDBR:
1992
return SLJIT_SUCCESS;
1993
case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1994
return SLJIT_SUCCESS;
1995
default:
1996
SLJIT_UNREACHABLE();
1997
}
1998
/* swap result registers */
1999
FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));
2000
FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));
2001
return push_inst(compiler, lgr(arg1, tmp0));
2002
}
2003
2004
static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r, sljit_gpr src_r)
2005
{
2006
sljit_s32 is_ctz = (GET_OPCODE(op) == SLJIT_CTZ);
2007
2008
if ((op & SLJIT_32) && src_r != tmp0) {
2009
FAIL_IF(push_inst(compiler, 0xb9160000 /* llgfr */ | R4A(tmp0) | R0A(src_r)));
2010
src_r = tmp0;
2011
}
2012
2013
if (is_ctz) {
2014
FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */) | R4A(tmp1) | R0A(src_r)));
2015
2016
if (src_r == tmp0)
2017
FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1400 /* nr */ : 0xb9800000 /* ngr */) | R4A(tmp0) | R0A(tmp1)));
2018
else
2019
FAIL_IF(push_inst(compiler, 0xb9e40000 /* ngrk */ | R12A(tmp1) | R4A(tmp0) | R0A(src_r)));
2020
2021
src_r = tmp0;
2022
}
2023
2024
FAIL_IF(push_inst(compiler, 0xb9830000 /* flogr */ | R4A(tmp0) | R0A(src_r)));
2025
2026
if (is_ctz)
2027
FAIL_IF(push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(tmp1) | R32A(tmp0) | ((sljit_ins)(-64 & 0xffff) << 16)));
2028
2029
if (op & SLJIT_32) {
2030
if (!is_ctz && dst_r != tmp0)
2031
return push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(dst_r) | R32A(tmp0) | ((sljit_ins)(-32 & 0xffff) << 16));
2032
2033
FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp0) | (sljit_u32)-32));
2034
}
2035
2036
if (is_ctz)
2037
FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp0) | R32A(tmp1) | ((sljit_ins)((op & SLJIT_32) ? 59 : 58) << 24) | (63 << 16) | ((sljit_ins)((op & SLJIT_32) ? 5 : 6) << 8)));
2038
2039
if (dst_r == tmp0)
2040
return SLJIT_SUCCESS;
2041
2042
return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0));
2043
}
2044
2045
static sljit_s32 sljit_emit_rev(struct sljit_compiler *compiler, sljit_s32 op,
2046
sljit_s32 dst, sljit_sw dstw,
2047
sljit_s32 src, sljit_sw srcw)
2048
{
2049
struct addr addr;
2050
sljit_gpr reg;
2051
sljit_ins ins;
2052
sljit_s32 opcode = GET_OPCODE(op);
2053
sljit_s32 is_16bit = (opcode == SLJIT_REV_U16 || opcode == SLJIT_REV_S16);
2054
2055
if (dst & SLJIT_MEM) {
2056
if (src & SLJIT_MEM) {
2057
FAIL_IF(load_store_op(compiler, tmp0, src, srcw, op & SLJIT_32, is_16bit ? load_halfword_forms : load_forms));
2058
reg = tmp0;
2059
} else
2060
reg = gpr(src);
2061
2062
FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
2063
2064
if (is_16bit)
2065
ins = 0xe3000000003f /* strvh */;
2066
else
2067
ins = (op & SLJIT_32) ? 0xe3000000003e /* strv */ : 0xe3000000002f /* strvg */;
2068
2069
return push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
2070
}
2071
2072
reg = gpr(dst);
2073
2074
if (src & SLJIT_MEM) {
2075
FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
2076
2077
if (is_16bit)
2078
ins = 0xe3000000001f /* lrvh */;
2079
else
2080
ins = (op & SLJIT_32) ? 0xe3000000001e /* lrv */ : 0xe3000000000f /* lrvg */;
2081
2082
FAIL_IF(push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)));
2083
2084
if (opcode == SLJIT_REV)
2085
return SLJIT_SUCCESS;
2086
2087
if (is_16bit) {
2088
if (op & SLJIT_32)
2089
ins = (opcode == SLJIT_REV_U16) ? 0xb9950000 /* llhr */ : 0xb9270000 /* lhr */;
2090
else
2091
ins = (opcode == SLJIT_REV_U16) ? 0xb9850000 /* llghr */ : 0xb9070000 /* lghr */;
2092
} else
2093
ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
2094
2095
return push_inst(compiler, ins | R4A(reg) | R0A(reg));
2096
}
2097
2098
ins = (op & SLJIT_32) ? 0xb91f0000 /* lrvr */ : 0xb90f0000 /* lrvgr */;
2099
FAIL_IF(push_inst(compiler, ins | R4A(reg) | R0A(gpr(src))));
2100
2101
if (opcode == SLJIT_REV)
2102
return SLJIT_SUCCESS;
2103
2104
if (!is_16bit) {
2105
ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
2106
return push_inst(compiler, ins | R4A(reg) | R0A(reg));
2107
}
2108
2109
if (op & SLJIT_32) {
2110
ins = (opcode == SLJIT_REV_U16) ? 0x88000000 /* srl */ : 0x8a000000 /* sra */;
2111
return push_inst(compiler, ins | R20A(reg) | 16);
2112
}
2113
2114
ins = (opcode == SLJIT_REV_U16) ? 0xeb000000000c /* srlg */ : 0xeb000000000a /* srag */;
2115
return push_inst(compiler, ins | R36A(reg) | R32A(reg) | (48 << 16));
2116
}
2117
2118
/* LEVAL will be defined later with different parameters as needed */
2119
#define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
2120
2121
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2122
sljit_s32 dst, sljit_sw dstw,
2123
sljit_s32 src, sljit_sw srcw)
2124
{
2125
sljit_ins ins;
2126
struct addr mem;
2127
sljit_gpr dst_r;
2128
sljit_gpr src_r;
2129
sljit_s32 opcode = GET_OPCODE(op);
2130
2131
CHECK_ERROR();
2132
CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2133
ADJUST_LOCAL_OFFSET(dst, dstw);
2134
ADJUST_LOCAL_OFFSET(src, srcw);
2135
2136
if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {
2137
/* LOAD REGISTER */
2138
if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {
2139
dst_r = gpr(dst);
2140
src_r = gpr(src);
2141
switch (opcode | (op & SLJIT_32)) {
2142
/* 32-bit */
2143
case SLJIT_MOV32_U8:
2144
ins = llcr(dst_r, src_r);
2145
break;
2146
case SLJIT_MOV32_S8:
2147
ins = lbr(dst_r, src_r);
2148
break;
2149
case SLJIT_MOV32_U16:
2150
ins = llhr(dst_r, src_r);
2151
break;
2152
case SLJIT_MOV32_S16:
2153
ins = lhr(dst_r, src_r);
2154
break;
2155
case SLJIT_MOV32:
2156
if (dst_r == src_r)
2157
return SLJIT_SUCCESS;
2158
ins = lr(dst_r, src_r);
2159
break;
2160
/* 64-bit */
2161
case SLJIT_MOV_U8:
2162
ins = llgcr(dst_r, src_r);
2163
break;
2164
case SLJIT_MOV_S8:
2165
ins = lgbr(dst_r, src_r);
2166
break;
2167
case SLJIT_MOV_U16:
2168
ins = llghr(dst_r, src_r);
2169
break;
2170
case SLJIT_MOV_S16:
2171
ins = lghr(dst_r, src_r);
2172
break;
2173
case SLJIT_MOV_U32:
2174
ins = llgfr(dst_r, src_r);
2175
break;
2176
case SLJIT_MOV_S32:
2177
ins = lgfr(dst_r, src_r);
2178
break;
2179
case SLJIT_MOV:
2180
case SLJIT_MOV_P:
2181
if (dst_r == src_r)
2182
return SLJIT_SUCCESS;
2183
ins = lgr(dst_r, src_r);
2184
break;
2185
default:
2186
ins = 0;
2187
SLJIT_UNREACHABLE();
2188
break;
2189
}
2190
FAIL_IF(push_inst(compiler, ins));
2191
return SLJIT_SUCCESS;
2192
}
2193
/* LOAD IMMEDIATE */
2194
if (FAST_IS_REG(dst) && src == SLJIT_IMM) {
2195
switch (opcode) {
2196
case SLJIT_MOV_U8:
2197
srcw = (sljit_sw)((sljit_u8)(srcw));
2198
break;
2199
case SLJIT_MOV_S8:
2200
srcw = (sljit_sw)((sljit_s8)(srcw));
2201
break;
2202
case SLJIT_MOV_U16:
2203
srcw = (sljit_sw)((sljit_u16)(srcw));
2204
break;
2205
case SLJIT_MOV_S16:
2206
srcw = (sljit_sw)((sljit_s16)(srcw));
2207
break;
2208
case SLJIT_MOV_U32:
2209
srcw = (sljit_sw)((sljit_u32)(srcw));
2210
break;
2211
case SLJIT_MOV_S32:
2212
case SLJIT_MOV32:
2213
srcw = (sljit_sw)((sljit_s32)(srcw));
2214
break;
2215
}
2216
return push_load_imm_inst(compiler, gpr(dst), srcw);
2217
}
2218
/* LOAD */
2219
/* TODO(carenas): avoid reg being defined later */
2220
#define LEVAL(i) EVAL(i, reg, mem)
2221
if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {
2222
sljit_gpr reg = gpr(dst);
2223
2224
FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2225
/* TODO(carenas): convert all calls below to LEVAL */
2226
switch (opcode | (op & SLJIT_32)) {
2227
case SLJIT_MOV32_U8:
2228
ins = llc(reg, mem.offset, mem.index, mem.base);
2229
break;
2230
case SLJIT_MOV32_S8:
2231
ins = lb(reg, mem.offset, mem.index, mem.base);
2232
break;
2233
case SLJIT_MOV32_U16:
2234
ins = llh(reg, mem.offset, mem.index, mem.base);
2235
break;
2236
case SLJIT_MOV32_S16:
2237
ins = WHEN2(is_u12(mem.offset), lh, lhy);
2238
break;
2239
case SLJIT_MOV32:
2240
ins = WHEN2(is_u12(mem.offset), l, ly);
2241
break;
2242
case SLJIT_MOV_U8:
2243
ins = LEVAL(llgc);
2244
break;
2245
case SLJIT_MOV_S8:
2246
ins = lgb(reg, mem.offset, mem.index, mem.base);
2247
break;
2248
case SLJIT_MOV_U16:
2249
ins = LEVAL(llgh);
2250
break;
2251
case SLJIT_MOV_S16:
2252
ins = lgh(reg, mem.offset, mem.index, mem.base);
2253
break;
2254
case SLJIT_MOV_U32:
2255
ins = LEVAL(llgf);
2256
break;
2257
case SLJIT_MOV_S32:
2258
ins = lgf(reg, mem.offset, mem.index, mem.base);
2259
break;
2260
case SLJIT_MOV_P:
2261
case SLJIT_MOV:
2262
ins = lg(reg, mem.offset, mem.index, mem.base);
2263
break;
2264
default:
2265
ins = 0;
2266
SLJIT_UNREACHABLE();
2267
break;
2268
}
2269
FAIL_IF(push_inst(compiler, ins));
2270
return SLJIT_SUCCESS;
2271
}
2272
/* STORE and STORE IMMEDIATE */
2273
if ((dst & SLJIT_MEM) && (FAST_IS_REG(src) || src == SLJIT_IMM)) {
2274
sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
2275
2276
if (src == SLJIT_IMM) {
2277
/* TODO(mundaym): MOVE IMMEDIATE? */
2278
FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
2279
}
2280
FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2281
switch (opcode) {
2282
case SLJIT_MOV_U8:
2283
case SLJIT_MOV_S8:
2284
return push_inst(compiler,
2285
WHEN2(is_u12(mem.offset), stc, stcy));
2286
case SLJIT_MOV_U16:
2287
case SLJIT_MOV_S16:
2288
return push_inst(compiler,
2289
WHEN2(is_u12(mem.offset), sth, sthy));
2290
case SLJIT_MOV_U32:
2291
case SLJIT_MOV_S32:
2292
case SLJIT_MOV32:
2293
return push_inst(compiler,
2294
WHEN2(is_u12(mem.offset), st, sty));
2295
case SLJIT_MOV_P:
2296
case SLJIT_MOV:
2297
FAIL_IF(push_inst(compiler, LEVAL(stg)));
2298
return SLJIT_SUCCESS;
2299
default:
2300
SLJIT_UNREACHABLE();
2301
}
2302
}
2303
#undef LEVAL
2304
/* MOVE CHARACTERS */
2305
if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {
2306
FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2307
switch (opcode) {
2308
case SLJIT_MOV_U8:
2309
case SLJIT_MOV_S8:
2310
FAIL_IF(push_inst(compiler,
2311
EVAL(llgc, tmp0, mem)));
2312
FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2313
return push_inst(compiler,
2314
EVAL(stcy, tmp0, mem));
2315
case SLJIT_MOV_U16:
2316
case SLJIT_MOV_S16:
2317
FAIL_IF(push_inst(compiler,
2318
EVAL(llgh, tmp0, mem)));
2319
FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2320
return push_inst(compiler,
2321
EVAL(sthy, tmp0, mem));
2322
case SLJIT_MOV_U32:
2323
case SLJIT_MOV_S32:
2324
case SLJIT_MOV32:
2325
FAIL_IF(push_inst(compiler,
2326
EVAL(ly, tmp0, mem)));
2327
FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2328
return push_inst(compiler,
2329
EVAL(sty, tmp0, mem));
2330
case SLJIT_MOV_P:
2331
case SLJIT_MOV:
2332
FAIL_IF(push_inst(compiler,
2333
EVAL(lg, tmp0, mem)));
2334
FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2335
FAIL_IF(push_inst(compiler,
2336
EVAL(stg, tmp0, mem)));
2337
return SLJIT_SUCCESS;
2338
default:
2339
SLJIT_UNREACHABLE();
2340
}
2341
}
2342
SLJIT_UNREACHABLE();
2343
}
2344
2345
SLJIT_ASSERT(src != SLJIT_IMM);
2346
2347
dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
2348
src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
2349
2350
compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2351
2352
/* TODO(mundaym): optimize loads and stores */
2353
switch (opcode) {
2354
case SLJIT_CLZ:
2355
case SLJIT_CTZ:
2356
if (src & SLJIT_MEM)
2357
FAIL_IF(load_unsigned_word(compiler, src_r, src, srcw, op & SLJIT_32));
2358
2359
FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r));
2360
break;
2361
case SLJIT_REV_U32:
2362
case SLJIT_REV_S32:
2363
op |= SLJIT_32;
2364
SLJIT_FALLTHROUGH
2365
case SLJIT_REV:
2366
case SLJIT_REV_U16:
2367
case SLJIT_REV_S16:
2368
return sljit_emit_rev(compiler, op, dst, dstw, src, srcw);
2369
default:
2370
SLJIT_UNREACHABLE();
2371
}
2372
2373
if (dst & SLJIT_MEM)
2374
return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32);
2375
2376
return SLJIT_SUCCESS;
2377
}
2378
2379
static SLJIT_INLINE int is_commutative(sljit_s32 op)
2380
{
2381
switch (GET_OPCODE(op)) {
2382
case SLJIT_ADD:
2383
case SLJIT_ADDC:
2384
case SLJIT_MUL:
2385
case SLJIT_AND:
2386
case SLJIT_OR:
2387
case SLJIT_XOR:
2388
return 1;
2389
}
2390
return 0;
2391
}
2392
2393
static const struct ins_forms add_forms = {
2394
0x1a00, /* ar */
2395
0xb9080000, /* agr */
2396
0xb9f80000, /* ark */
2397
0xb9e80000, /* agrk */
2398
0x5a000000, /* a */
2399
0xe3000000005a, /* ay */
2400
0xe30000000008, /* ag */
2401
};
2402
2403
static const struct ins_forms logical_add_forms = {
2404
0x1e00, /* alr */
2405
0xb90a0000, /* algr */
2406
0xb9fa0000, /* alrk */
2407
0xb9ea0000, /* algrk */
2408
0x5e000000, /* al */
2409
0xe3000000005e, /* aly */
2410
0xe3000000000a, /* alg */
2411
};
2412
2413
static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,
2414
sljit_s32 dst, sljit_sw dstw,
2415
sljit_s32 src1, sljit_sw src1w,
2416
sljit_s32 src2, sljit_sw src2w)
2417
{
2418
int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW;
2419
int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2420
const struct ins_forms *forms;
2421
sljit_ins ins;
2422
2423
if (src2 == SLJIT_IMM) {
2424
if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2425
if (sets_overflow)
2426
ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2427
else
2428
ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2429
return emit_siy(compiler, ins, dst, dstw, src2w);
2430
}
2431
2432
if (is_s16(src2w)) {
2433
if (sets_overflow)
2434
ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2435
else
2436
ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2437
FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w));
2438
goto done;
2439
}
2440
2441
if (!sets_overflow) {
2442
if ((op & SLJIT_32) || is_u32(src2w)) {
2443
ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */;
2444
FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2445
goto done;
2446
}
2447
if (is_u32(-src2w)) {
2448
FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A));
2449
goto done;
2450
}
2451
}
2452
else if ((op & SLJIT_32) || is_s32(src2w)) {
2453
ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2454
FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2455
goto done;
2456
}
2457
}
2458
2459
forms = sets_overflow ? &add_forms : &logical_add_forms;
2460
FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2461
2462
done:
2463
if (sets_zero_overflow)
2464
FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0));
2465
2466
if (dst & SLJIT_MEM)
2467
return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2468
2469
return SLJIT_SUCCESS;
2470
}
2471
2472
static const struct ins_forms sub_forms = {
2473
0x1b00, /* sr */
2474
0xb9090000, /* sgr */
2475
0xb9f90000, /* srk */
2476
0xb9e90000, /* sgrk */
2477
0x5b000000, /* s */
2478
0xe3000000005b, /* sy */
2479
0xe30000000009, /* sg */
2480
};
2481
2482
static const struct ins_forms logical_sub_forms = {
2483
0x1f00, /* slr */
2484
0xb90b0000, /* slgr */
2485
0xb9fb0000, /* slrk */
2486
0xb9eb0000, /* slgrk */
2487
0x5f000000, /* sl */
2488
0xe3000000005f, /* sly */
2489
0xe3000000000b, /* slg */
2490
};
2491
2492
static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
2493
sljit_s32 dst, sljit_sw dstw,
2494
sljit_s32 src1, sljit_sw src1w,
2495
sljit_s32 src2, sljit_sw src2w)
2496
{
2497
sljit_s32 flag_type = GET_FLAG_TYPE(op);
2498
int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW);
2499
int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2500
const struct ins_forms *forms;
2501
sljit_ins ins;
2502
2503
if (dst == TMP_REG2 && flag_type <= SLJIT_SIG_LESS_EQUAL) {
2504
int compare_signed = flag_type >= SLJIT_SIG_LESS;
2505
2506
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
2507
2508
if (src2 == SLJIT_IMM) {
2509
if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) {
2510
if ((op & SLJIT_32) || is_s32(src2w)) {
2511
ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
2512
return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2513
}
2514
} else if ((op & SLJIT_32) || is_u32(src2w)) {
2515
ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;
2516
return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2517
}
2518
}
2519
else if (src2 & SLJIT_MEM) {
2520
if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) {
2521
ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */;
2522
return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A);
2523
}
2524
2525
if (compare_signed)
2526
ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */;
2527
else
2528
ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */;
2529
return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A);
2530
}
2531
2532
if (compare_signed)
2533
ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;
2534
else
2535
ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;
2536
return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);
2537
}
2538
2539
if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) {
2540
ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2541
FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w));
2542
goto done;
2543
}
2544
2545
if (src2 == SLJIT_IMM) {
2546
sljit_sw neg_src2w = -src2w;
2547
2548
if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
2549
if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2550
if (sets_signed)
2551
ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2552
else
2553
ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2554
return emit_siy(compiler, ins, dst, dstw, neg_src2w);
2555
}
2556
2557
if (is_s16(neg_src2w)) {
2558
if (sets_signed)
2559
ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2560
else
2561
ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2562
FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w));
2563
goto done;
2564
}
2565
}
2566
2567
if (!sets_signed) {
2568
if ((op & SLJIT_32) || is_u32(src2w)) {
2569
ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */;
2570
FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2571
goto done;
2572
}
2573
if (is_u32(neg_src2w)) {
2574
FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A));
2575
goto done;
2576
}
2577
}
2578
else if ((op & SLJIT_32) || is_s32(neg_src2w)) {
2579
ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2580
FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A));
2581
goto done;
2582
}
2583
}
2584
2585
forms = sets_signed ? &sub_forms : &logical_sub_forms;
2586
FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2587
2588
done:
2589
if (sets_signed) {
2590
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2591
2592
if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) {
2593
/* In case of overflow, the sign bit of the two source operands must be different, and
2594
- the first operand is greater if the sign bit of the result is set
2595
- the first operand is less if the sign bit of the result is not set
2596
The -result operation sets the corrent sign, because the result cannot be zero.
2597
The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */
2598
FAIL_IF(push_inst(compiler, brc(0xe, (op & SLJIT_32) ? (2 + 1) : (2 + 2))));
2599
FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));
2600
}
2601
else if (op & SLJIT_SET_Z)
2602
FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2603
}
2604
2605
if (dst & SLJIT_MEM)
2606
return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2607
2608
return SLJIT_SUCCESS;
2609
}
2610
2611
static const struct ins_forms multiply_forms = {
2612
0xb2520000, /* msr */
2613
0xb90c0000, /* msgr */
2614
0xb9fd0000, /* msrkc */
2615
0xb9ed0000, /* msgrkc */
2616
0x71000000, /* ms */
2617
0xe30000000051, /* msy */
2618
0xe3000000000c, /* msg */
2619
};
2620
2621
static const struct ins_forms multiply_overflow_forms = {
2622
0,
2623
0,
2624
0xb9fd0000, /* msrkc */
2625
0xb9ed0000, /* msgrkc */
2626
0,
2627
0xe30000000053, /* msc */
2628
0xe30000000083, /* msgc */
2629
};
2630
2631
static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op,
2632
sljit_s32 dst,
2633
sljit_s32 src1, sljit_sw src1w,
2634
sljit_s32 src2, sljit_sw src2w)
2635
{
2636
sljit_ins ins;
2637
2638
if (HAS_FLAGS(op)) {
2639
/* if have_misc2 fails, this operation should be emulated. 32 bit emulation:
2640
FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));
2641
FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));
2642
if (dst_r != tmp0) {
2643
FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2644
}
2645
FAIL_IF(push_inst(compiler, aih(tmp0, 1)));
2646
FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));
2647
FAIL_IF(push_inst(compiler, ipm(tmp1)));
2648
FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */
2649
2650
return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);
2651
}
2652
2653
if (src2 == SLJIT_IMM) {
2654
if (is_s16(src2w)) {
2655
ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
2656
return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
2657
}
2658
2659
if (is_s32(src2w)) {
2660
ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */;
2661
return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A);
2662
}
2663
}
2664
2665
return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w);
2666
}
2667
2668
static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type,
2669
sljit_s32 dst,
2670
sljit_s32 src1, sljit_sw src1w,
2671
sljit_uw imm, sljit_s32 count16)
2672
{
2673
sljit_s32 mode = compiler->mode;
2674
sljit_gpr dst_r = tmp0;
2675
sljit_s32 needs_move = 1;
2676
2677
if (FAST_IS_REG(dst)) {
2678
dst_r = gpr(dst & REG_MASK);
2679
if (dst == src1)
2680
needs_move = 0;
2681
}
2682
2683
if (needs_move)
2684
FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
2685
2686
if (type == SLJIT_AND) {
2687
if (!(mode & SLJIT_32))
2688
FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32)));
2689
return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff));
2690
}
2691
else if (type == SLJIT_OR) {
2692
if (count16 >= 3) {
2693
FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)));
2694
return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2695
}
2696
2697
if (count16 >= 2) {
2698
if ((imm & 0x00000000ffffffffull) == 0)
2699
return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32));
2700
if ((imm & 0xffffffff00000000ull) == 0)
2701
return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2702
}
2703
2704
if ((imm & 0xffff000000000000ull) != 0)
2705
FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48)));
2706
if ((imm & 0x0000ffff00000000ull) != 0)
2707
FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff)));
2708
if ((imm & 0x00000000ffff0000ull) != 0)
2709
FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff)));
2710
if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2711
return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff));
2712
return SLJIT_SUCCESS;
2713
}
2714
2715
if ((imm & 0xffffffff00000000ull) != 0)
2716
FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32)));
2717
if ((imm & 0x00000000ffffffffull) != 0 || imm == 0)
2718
return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff));
2719
return SLJIT_SUCCESS;
2720
}
2721
2722
static const struct ins_forms bitwise_and_forms = {
2723
0x1400, /* nr */
2724
0xb9800000, /* ngr */
2725
0xb9f40000, /* nrk */
2726
0xb9e40000, /* ngrk */
2727
0x54000000, /* n */
2728
0xe30000000054, /* ny */
2729
0xe30000000080, /* ng */
2730
};
2731
2732
static const struct ins_forms bitwise_or_forms = {
2733
0x1600, /* or */
2734
0xb9810000, /* ogr */
2735
0xb9f60000, /* ork */
2736
0xb9e60000, /* ogrk */
2737
0x56000000, /* o */
2738
0xe30000000056, /* oy */
2739
0xe30000000081, /* og */
2740
};
2741
2742
static const struct ins_forms bitwise_xor_forms = {
2743
0x1700, /* xr */
2744
0xb9820000, /* xgr */
2745
0xb9f70000, /* xrk */
2746
0xb9e70000, /* xgrk */
2747
0x57000000, /* x */
2748
0xe30000000057, /* xy */
2749
0xe30000000082, /* xg */
2750
};
2751
2752
static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op,
2753
sljit_s32 dst,
2754
sljit_s32 src1, sljit_sw src1w,
2755
sljit_s32 src2, sljit_sw src2w)
2756
{
2757
sljit_s32 type = GET_OPCODE(op);
2758
const struct ins_forms *forms;
2759
2760
if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == TMP_REG2))) {
2761
sljit_s32 count16 = 0;
2762
sljit_uw imm = (sljit_uw)src2w;
2763
2764
if (op & SLJIT_32)
2765
imm &= 0xffffffffull;
2766
2767
if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2768
count16++;
2769
if ((imm & 0x00000000ffff0000ull) != 0)
2770
count16++;
2771
if ((imm & 0x0000ffff00000000ull) != 0)
2772
count16++;
2773
if ((imm & 0xffff000000000000ull) != 0)
2774
count16++;
2775
2776
if (type == SLJIT_AND && dst == TMP_REG2 && count16 == 1) {
2777
sljit_gpr src_r = tmp1;
2778
2779
if (FAST_IS_REG(src1))
2780
src_r = gpr(src1 & REG_MASK);
2781
else
2782
FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
2783
2784
if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2785
return push_inst(compiler, 0xa7010000 /* tmll */ | R20A(src_r) | imm);
2786
if ((imm & 0x00000000ffff0000ull) != 0)
2787
return push_inst(compiler, 0xa7000000 /* tmlh */ | R20A(src_r) | (imm >> 16));
2788
if ((imm & 0x0000ffff00000000ull) != 0)
2789
return push_inst(compiler, 0xa7030000 /* tmhl */ | R20A(src_r) | (imm >> 32));
2790
return push_inst(compiler, 0xa7020000 /* tmhh */ | R20A(src_r) | (imm >> 48));
2791
}
2792
2793
if (!(op & SLJIT_SET_Z))
2794
return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16);
2795
}
2796
2797
if (type == SLJIT_AND)
2798
forms = &bitwise_and_forms;
2799
else if (type == SLJIT_OR)
2800
forms = &bitwise_or_forms;
2801
else
2802
forms = &bitwise_xor_forms;
2803
2804
return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w);
2805
}
2806
2807
static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
2808
sljit_s32 dst,
2809
sljit_s32 src1, sljit_sw src1w,
2810
sljit_s32 src2, sljit_sw src2w)
2811
{
2812
sljit_s32 type = GET_OPCODE(op);
2813
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2814
sljit_gpr src_r = tmp0;
2815
sljit_gpr base_r = tmp0;
2816
sljit_ins imm = 0;
2817
sljit_ins ins;
2818
2819
if (FAST_IS_REG(src1))
2820
src_r = gpr(src1);
2821
else
2822
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2823
2824
if (src2 != SLJIT_IMM) {
2825
if (FAST_IS_REG(src2))
2826
base_r = gpr(src2);
2827
else {
2828
FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2829
base_r = tmp1;
2830
}
2831
2832
if ((op & SLJIT_32) && (type == SLJIT_MSHL || type == SLJIT_MLSHR || type == SLJIT_MASHR)) {
2833
if (base_r != tmp1) {
2834
FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(base_r) | (59 << 24) | (1 << 23) | (63 << 16)));
2835
base_r = tmp1;
2836
} else
2837
FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
2838
}
2839
} else
2840
imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2841
2842
if ((op & SLJIT_32) && dst_r == src_r) {
2843
if (type == SLJIT_SHL || type == SLJIT_MSHL)
2844
ins = 0x89000000 /* sll */;
2845
else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2846
ins = 0x88000000 /* srl */;
2847
else
2848
ins = 0x8a000000 /* sra */;
2849
2850
FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));
2851
} else {
2852
if (type == SLJIT_SHL || type == SLJIT_MSHL)
2853
ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
2854
else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2855
ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
2856
else
2857
ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
2858
2859
FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)));
2860
}
2861
2862
if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR)
2863
return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r));
2864
2865
return SLJIT_SUCCESS;
2866
}
2867
2868
static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op,
2869
sljit_s32 dst,
2870
sljit_s32 src1, sljit_sw src1w,
2871
sljit_s32 src2, sljit_sw src2w)
2872
{
2873
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2874
sljit_gpr src_r = tmp0;
2875
sljit_gpr base_r = tmp0;
2876
sljit_ins imm = 0;
2877
sljit_ins ins;
2878
2879
if (FAST_IS_REG(src1))
2880
src_r = gpr(src1);
2881
else
2882
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2883
2884
if (src2 != SLJIT_IMM) {
2885
if (FAST_IS_REG(src2))
2886
base_r = gpr(src2);
2887
else {
2888
FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2889
base_r = tmp1;
2890
}
2891
}
2892
2893
if (GET_OPCODE(op) == SLJIT_ROTR) {
2894
if (src2 != SLJIT_IMM) {
2895
ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2896
FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));
2897
base_r = tmp1;
2898
} else
2899
src2w = -src2w;
2900
}
2901
2902
if (src2 == SLJIT_IMM)
2903
imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2904
2905
ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;
2906
return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16));
2907
}
2908
2909
static const struct ins_forms addc_forms = {
2910
0xb9980000, /* alcr */
2911
0xb9880000, /* alcgr */
2912
0,
2913
0,
2914
0,
2915
0xe30000000098, /* alc */
2916
0xe30000000088, /* alcg */
2917
};
2918
2919
static const struct ins_forms subc_forms = {
2920
0xb9990000, /* slbr */
2921
0xb9890000, /* slbgr */
2922
0,
2923
0,
2924
0,
2925
0xe30000000099, /* slb */
2926
0xe30000000089, /* slbg */
2927
};
2928
2929
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2930
sljit_s32 dst, sljit_sw dstw,
2931
sljit_s32 src1, sljit_sw src1w,
2932
sljit_s32 src2, sljit_sw src2w)
2933
{
2934
CHECK_ERROR();
2935
CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2936
ADJUST_LOCAL_OFFSET(dst, dstw);
2937
ADJUST_LOCAL_OFFSET(src1, src1w);
2938
ADJUST_LOCAL_OFFSET(src2, src2w);
2939
2940
compiler->mode = op & SLJIT_32;
2941
compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2942
2943
if (is_commutative(op) && src1 == SLJIT_IMM && src2 != SLJIT_IMM) {
2944
src1 ^= src2;
2945
src2 ^= src1;
2946
src1 ^= src2;
2947
2948
src1w ^= src2w;
2949
src2w ^= src1w;
2950
src1w ^= src2w;
2951
}
2952
2953
switch (GET_OPCODE(op)) {
2954
case SLJIT_ADD:
2955
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2956
return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2957
case SLJIT_ADDC:
2958
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2959
FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w));
2960
if (dst & SLJIT_MEM)
2961
return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2962
return SLJIT_SUCCESS;
2963
case SLJIT_SUB:
2964
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2965
return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2966
case SLJIT_SUBC:
2967
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2968
FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w));
2969
if (dst & SLJIT_MEM)
2970
return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2971
return SLJIT_SUCCESS;
2972
case SLJIT_MUL:
2973
FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w));
2974
break;
2975
case SLJIT_AND:
2976
case SLJIT_OR:
2977
case SLJIT_XOR:
2978
FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));
2979
break;
2980
case SLJIT_SHL:
2981
case SLJIT_MSHL:
2982
case SLJIT_LSHR:
2983
case SLJIT_MLSHR:
2984
case SLJIT_ASHR:
2985
case SLJIT_MASHR:
2986
FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));
2987
break;
2988
case SLJIT_ROTL:
2989
case SLJIT_ROTR:
2990
FAIL_IF(sljit_emit_rotate(compiler, op, dst, src1, src1w, src2, src2w));
2991
break;
2992
}
2993
2994
if (dst & SLJIT_MEM)
2995
return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2996
return SLJIT_SUCCESS;
2997
}
2998
2999
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
3000
sljit_s32 src1, sljit_sw src1w,
3001
sljit_s32 src2, sljit_sw src2w)
3002
{
3003
sljit_s32 dst_reg = (GET_OPCODE(op) == SLJIT_SUB || GET_OPCODE(op) == SLJIT_AND) ? TMP_REG2 : TMP_REG1;
3004
3005
CHECK_ERROR();
3006
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
3007
3008
SLJIT_SKIP_CHECKS(compiler);
3009
return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w);
3010
}
3011
3012
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
3013
sljit_s32 dst_reg,
3014
sljit_s32 src1, sljit_sw src1w,
3015
sljit_s32 src2, sljit_sw src2w)
3016
{
3017
CHECK_ERROR();
3018
CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
3019
3020
switch (GET_OPCODE(op)) {
3021
case SLJIT_MULADD:
3022
SLJIT_SKIP_CHECKS(compiler);
3023
FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), 0 /* tmp0 */, 0, src1, src1w, src2, src2w));
3024
return push_inst(compiler, ((op & SLJIT_32) ? 0x1a00 /* ar */ : 0xb9080000 /* agr */) | R4A(gpr(dst_reg)) | R0A(tmp0));
3025
}
3026
3027
return SLJIT_SUCCESS;
3028
}
3029
3030
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
3031
sljit_s32 dst_reg,
3032
sljit_s32 src1_reg,
3033
sljit_s32 src2_reg,
3034
sljit_s32 src3, sljit_sw src3w)
3035
{
3036
sljit_s32 is_right;
3037
sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
3038
sljit_gpr dst_r = gpr(dst_reg);
3039
sljit_gpr src1_r = gpr(src1_reg);
3040
sljit_gpr src2_r = gpr(src2_reg);
3041
sljit_gpr src3_r = tmp1;
3042
sljit_ins ins;
3043
3044
CHECK_ERROR();
3045
CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
3046
3047
is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
3048
3049
if (src1_reg == src2_reg) {
3050
SLJIT_SKIP_CHECKS(compiler);
3051
return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
3052
}
3053
3054
ADJUST_LOCAL_OFFSET(src3, src3w);
3055
3056
if (src3 == SLJIT_IMM) {
3057
src3w &= bit_length - 1;
3058
3059
if (src3w == 0)
3060
return SLJIT_SUCCESS;
3061
3062
if (op & SLJIT_32) {
3063
if (dst_r == src1_r) {
3064
ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3065
FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | (sljit_ins)src3w));
3066
} else {
3067
ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
3068
FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
3069
}
3070
} else {
3071
ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3072
FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
3073
}
3074
3075
ins = 0xec0000000055 /* risbg */;
3076
3077
if (is_right) {
3078
src3w = bit_length - src3w;
3079
ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src3w) << 16) | ((sljit_ins)src3w << 8);
3080
} else
3081
ins |= ((sljit_ins)(64 - src3w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)(src3w + 64 - bit_length) << 8);
3082
3083
return push_inst(compiler, ins | R36A(dst_r) | R32A(src2_r));
3084
}
3085
3086
if (!(src3 & SLJIT_MEM)) {
3087
src3_r = gpr(src3);
3088
3089
if (dst_r == src3_r) {
3090
FAIL_IF(push_inst(compiler, 0x1800 /* lr */ | R4A(tmp1) | R0A(src3_r)));
3091
src3_r = tmp1;
3092
}
3093
} else
3094
FAIL_IF(load_word(compiler, tmp1, src3, src3w, op & SLJIT_32));
3095
3096
if (op & SLJIT_32) {
3097
if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
3098
if (src3_r != tmp1) {
3099
FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src3_r) | (59 << 24) | (1 << 23) | (63 << 16)));
3100
src3_r = tmp1;
3101
} else
3102
FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
3103
}
3104
3105
if (dst_r == src1_r) {
3106
ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3107
FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(src3_r)));
3108
} else {
3109
ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
3110
FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
3111
}
3112
3113
if (src3_r != tmp1) {
3114
FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f));
3115
FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src3_r)));
3116
} else
3117
FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f));
3118
3119
ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;
3120
FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1) | (0x1 << 16)));
3121
3122
return push_inst(compiler, 0x1600 /* or */ | R4A(dst_r) | R0A(tmp0));
3123
}
3124
3125
ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3126
FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
3127
3128
ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */;
3129
3130
if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
3131
if (src3_r != tmp1)
3132
FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f));
3133
3134
FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | (0x1 << 16)));
3135
src2_r = tmp0;
3136
3137
if (src3_r != tmp1)
3138
FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src3_r)));
3139
else
3140
FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f));
3141
} else
3142
FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src3_r)));
3143
3144
FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1)));
3145
return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(dst_r) | R0A(tmp0));
3146
}
3147
3148
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2_shift(struct sljit_compiler *compiler, sljit_s32 op,
3149
sljit_s32 dst, sljit_sw dstw,
3150
sljit_s32 src1, sljit_sw src1w,
3151
sljit_s32 src2, sljit_sw src2w,
3152
sljit_sw shift_arg)
3153
{
3154
sljit_gpr dst_r, tmp_r, src_r;
3155
struct addr addr;
3156
3157
CHECK_ERROR();
3158
CHECK(check_sljit_emit_op2_shift(compiler, op, dst, dstw, src1, src1w, src2, src2w, shift_arg));
3159
ADJUST_LOCAL_OFFSET(dst, dstw);
3160
ADJUST_LOCAL_OFFSET(src1, src1w);
3161
ADJUST_LOCAL_OFFSET(src2, src2w);
3162
3163
shift_arg &= 0x3f;
3164
3165
if (src2 == SLJIT_IMM) {
3166
src2w = src2w << shift_arg;
3167
shift_arg = 0;
3168
}
3169
3170
if (shift_arg == 0) {
3171
SLJIT_SKIP_CHECKS(compiler);
3172
return sljit_emit_op2(compiler, GET_OPCODE(op), dst, dstw, src1, src1w, src2, src2w);
3173
}
3174
3175
tmp_r = FAST_IS_REG(dst) && (dst != src1) ? gpr(dst) : tmp0;
3176
3177
if (src2 & SLJIT_MEM) {
3178
FAIL_IF(load_word(compiler, tmp_r, src2, src2w, 0 /* 64-bit */));
3179
src_r = tmp_r;
3180
} else {
3181
src_r = gpr(src2);
3182
}
3183
3184
FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp_r) | R32A(src_r) | ((sljit_ins)shift_arg << 16)));
3185
3186
if (src1 & SLJIT_MEM) {
3187
FAIL_IF(make_addr_bxy(compiler, &addr, src1, src1w, tmp1));
3188
FAIL_IF(push_inst(compiler, 0xe30000000008 /* ag */ | R36A(tmp_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)));
3189
src_r = tmp_r;
3190
} else if (src1 == SLJIT_IMM) {
3191
if (is_s32(src1w)) {
3192
FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp_r) | (sljit_u32)src1w));
3193
src_r = tmp_r;
3194
} else {
3195
src_r = tmp_r != tmp0 ? tmp0 : tmp1;
3196
FAIL_IF(push_load_imm_inst(compiler, src_r, src1w));
3197
}
3198
} else {
3199
src_r = gpr(src1);
3200
}
3201
3202
dst_r = (FAST_IS_REG(dst) ? gpr(dst) : tmp0);
3203
3204
if (src_r != tmp_r) {
3205
if (src_r == dst_r) {
3206
FAIL_IF(push_inst(compiler, 0xb9080000 /* agr */ | R4A(dst_r) | R0A(tmp_r)));
3207
} else {
3208
FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(tmp_r) | R4A(dst_r) | R0A(src_r)));
3209
}
3210
}
3211
3212
if (dst & SLJIT_MEM)
3213
return store_word(compiler, dst_r, dst, dstw, 0 /* 64-bit */);
3214
return SLJIT_SUCCESS;
3215
}
3216
3217
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
3218
sljit_s32 src, sljit_sw srcw)
3219
{
3220
sljit_gpr src_r;
3221
struct addr addr;
3222
3223
CHECK_ERROR();
3224
CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
3225
ADJUST_LOCAL_OFFSET(src, srcw);
3226
3227
switch (op) {
3228
case SLJIT_FAST_RETURN:
3229
if (FAST_IS_REG(src)) {
3230
src_r = gpr(src);
3231
if (src_r != link_r)
3232
FAIL_IF(push_inst(compiler, lgr(link_r, src_r)));
3233
} else
3234
FAIL_IF(load_word(compiler, link_r, src, srcw, 0));
3235
3236
return push_inst(compiler, br(link_r));
3237
case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
3238
return SLJIT_SUCCESS;
3239
case SLJIT_PREFETCH_L1:
3240
case SLJIT_PREFETCH_L2:
3241
case SLJIT_PREFETCH_L3:
3242
case SLJIT_PREFETCH_ONCE:
3243
FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
3244
return push_inst(compiler, 0xe31000000036 /* pfd */ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3245
default:
3246
return SLJIT_SUCCESS;
3247
}
3248
3249
return SLJIT_SUCCESS;
3250
}
3251
3252
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
3253
sljit_s32 dst, sljit_sw dstw)
3254
{
3255
sljit_gpr dst_r = link_r;
3256
sljit_s32 size;
3257
3258
CHECK_ERROR();
3259
CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
3260
ADJUST_LOCAL_OFFSET(dst, dstw);
3261
3262
switch (op) {
3263
case SLJIT_FAST_ENTER:
3264
if (FAST_IS_REG(dst)) {
3265
dst_r = gpr(dst);
3266
3267
if (dst_r == link_r)
3268
return SLJIT_SUCCESS;
3269
return push_inst(compiler, lgr(dst_r, link_r));
3270
}
3271
break;
3272
case SLJIT_GET_RETURN_ADDRESS:
3273
dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3274
3275
size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 2);
3276
FAIL_IF(load_word(compiler, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, 0));
3277
break;
3278
}
3279
3280
if (dst & SLJIT_MEM)
3281
return store_word(compiler, dst_r, dst, dstw, 0);
3282
3283
return SLJIT_SUCCESS;
3284
}
3285
3286
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
3287
{
3288
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
3289
3290
if (type == SLJIT_GP_REGISTER)
3291
return (sljit_s32)gpr(reg);
3292
3293
if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128)
3294
return -1;
3295
3296
return (sljit_s32)freg_map[reg];
3297
}
3298
3299
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
3300
void *instruction, sljit_u32 size)
3301
{
3302
sljit_ins ins = 0;
3303
3304
CHECK_ERROR();
3305
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
3306
3307
memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);
3308
return push_inst(compiler, ins);
3309
}
3310
3311
/* --------------------------------------------------------------------- */
3312
/* Floating point operators */
3313
/* --------------------------------------------------------------------- */
3314
3315
#define FLOAT_LOAD 0
3316
#define FLOAT_STORE 1
3317
3318
static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op,
3319
sljit_s32 reg,
3320
sljit_s32 mem, sljit_sw memw)
3321
{
3322
struct addr addr;
3323
sljit_ins ins;
3324
3325
SLJIT_ASSERT(mem & SLJIT_MEM);
3326
3327
if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) {
3328
FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
3329
3330
if (op & FLOAT_STORE)
3331
ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */;
3332
else
3333
ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */;
3334
3335
return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
3336
}
3337
3338
FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
3339
3340
if (op & FLOAT_STORE)
3341
ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */;
3342
else
3343
ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */;
3344
3345
return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3346
}
3347
3348
static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins,
3349
sljit_s32 reg,
3350
sljit_s32 src, sljit_sw srcw)
3351
{
3352
struct addr addr;
3353
3354
if (!(src & SLJIT_MEM))
3355
return push_inst(compiler, ins_r | F4(reg) | F0(src));
3356
3357
FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
3358
return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16));
3359
}
3360
3361
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
3362
sljit_s32 dst, sljit_sw dstw,
3363
sljit_s32 src, sljit_sw srcw)
3364
{
3365
sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3366
sljit_ins ins;
3367
3368
if (src & SLJIT_MEM) {
3369
FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw));
3370
src = TMP_FREG1;
3371
}
3372
3373
/* M3 is set to 5 */
3374
if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
3375
ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */;
3376
else
3377
ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */;
3378
3379
FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src)));
3380
3381
if (dst & SLJIT_MEM)
3382
return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64);
3383
3384
return SLJIT_SUCCESS;
3385
}
3386
3387
static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
3388
sljit_s32 dst, sljit_sw dstw,
3389
sljit_s32 src, sljit_sw srcw)
3390
{
3391
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3392
3393
if (src == SLJIT_IMM) {
3394
FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
3395
src = (sljit_s32)tmp0;
3396
}
3397
else if (src & SLJIT_MEM) {
3398
FAIL_IF(load_word(compiler, tmp0, src, srcw, ins & 0x100000));
3399
src = (sljit_s32)tmp0;
3400
}
3401
3402
FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
3403
3404
if (dst & SLJIT_MEM)
3405
return float_mem(compiler, FLOAT_STORE | ((ins & 0x10000) ? 0 : SLJIT_32), TMP_FREG1, dst, dstw);
3406
3407
return SLJIT_SUCCESS;
3408
}
3409
3410
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
3411
sljit_s32 dst, sljit_sw dstw,
3412
sljit_s32 src, sljit_sw srcw)
3413
{
3414
sljit_ins ins;
3415
3416
if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
3417
srcw = (sljit_s32)srcw;
3418
3419
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
3420
ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;
3421
else
3422
ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;
3423
3424
return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
3425
}
3426
3427
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
3428
sljit_s32 dst, sljit_sw dstw,
3429
sljit_s32 src, sljit_sw srcw)
3430
{
3431
sljit_ins ins;
3432
3433
if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
3434
srcw = (sljit_u32)srcw;
3435
3436
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW)
3437
ins = (op & SLJIT_32) ? 0xb3a00000 /* celgbr */ : 0xb3a10000 /* cdlgbr */;
3438
else
3439
ins = (op & SLJIT_32) ? 0xb3900000 /* celfbr */ : 0xb3910000 /* cdlfbr */;
3440
3441
return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
3442
}
3443
3444
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
3445
sljit_s32 src1, sljit_sw src1w,
3446
sljit_s32 src2, sljit_sw src2w)
3447
{
3448
sljit_ins ins_r, ins;
3449
3450
if (src1 & SLJIT_MEM) {
3451
FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w));
3452
src1 = TMP_FREG1;
3453
}
3454
3455
if (op & SLJIT_32) {
3456
ins_r = 0xb3090000 /* cebr */;
3457
ins = 0xed0000000009 /* ceb */;
3458
} else {
3459
ins_r = 0xb3190000 /* cdbr */;
3460
ins = 0xed0000000019 /* cdb */;
3461
}
3462
3463
return emit_float(compiler, ins_r, ins, src1, src2, src2w);
3464
}
3465
3466
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
3467
sljit_s32 dst, sljit_sw dstw,
3468
sljit_s32 src, sljit_sw srcw)
3469
{
3470
sljit_s32 dst_r;
3471
sljit_ins ins;
3472
3473
CHECK_ERROR();
3474
3475
SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3476
3477
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3478
3479
if (op == SLJIT_CONV_F64_FROM_F32)
3480
FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw));
3481
else {
3482
if (src & SLJIT_MEM) {
3483
FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw));
3484
src = dst_r;
3485
}
3486
3487
switch (GET_OPCODE(op)) {
3488
case SLJIT_MOV_F64:
3489
if (FAST_IS_REG(dst)) {
3490
if (dst == src)
3491
return SLJIT_SUCCESS;
3492
3493
ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3494
break;
3495
}
3496
return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw);
3497
case SLJIT_CONV_F64_FROM_F32:
3498
/* Only SLJIT_CONV_F32_FROM_F64. */
3499
ins = 0xb3440000 /* ledbr */;
3500
break;
3501
case SLJIT_NEG_F64:
3502
ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */;
3503
break;
3504
default:
3505
SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64);
3506
ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */;
3507
break;
3508
}
3509
3510
FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src)));
3511
}
3512
3513
if (dst & SLJIT_MEM)
3514
return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3515
3516
return SLJIT_SUCCESS;
3517
}
3518
3519
#define FLOAT_MOV(op, dst_r, src_r) \
3520
(((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r))
3521
3522
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
3523
sljit_s32 dst, sljit_sw dstw,
3524
sljit_s32 src1, sljit_sw src1w,
3525
sljit_s32 src2, sljit_sw src2w)
3526
{
3527
sljit_s32 dst_r = TMP_FREG1;
3528
sljit_ins ins_r, ins;
3529
3530
CHECK_ERROR();
3531
CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3532
ADJUST_LOCAL_OFFSET(dst, dstw);
3533
ADJUST_LOCAL_OFFSET(src1, src1w);
3534
ADJUST_LOCAL_OFFSET(src2, src2w);
3535
3536
do {
3537
if (FAST_IS_REG(dst)) {
3538
dst_r = dst;
3539
3540
if (dst == src1)
3541
break;
3542
3543
if (dst == src2) {
3544
if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) {
3545
src2 = src1;
3546
src2w = src1w;
3547
src1 = dst;
3548
break;
3549
}
3550
3551
FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2)));
3552
src2 = TMP_FREG1;
3553
}
3554
}
3555
3556
if (src1 & SLJIT_MEM)
3557
FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w));
3558
else
3559
FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1)));
3560
} while (0);
3561
3562
switch (GET_OPCODE(op)) {
3563
case SLJIT_ADD_F64:
3564
ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */;
3565
ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */;
3566
break;
3567
case SLJIT_SUB_F64:
3568
ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */;
3569
ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */;
3570
break;
3571
case SLJIT_MUL_F64:
3572
ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */;
3573
ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */;
3574
break;
3575
default:
3576
SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64);
3577
ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */;
3578
ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */;
3579
break;
3580
}
3581
3582
FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w));
3583
3584
if (dst & SLJIT_MEM)
3585
return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3586
3587
return SLJIT_SUCCESS;
3588
}
3589
3590
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
3591
sljit_s32 dst_freg,
3592
sljit_s32 src1, sljit_sw src1w,
3593
sljit_s32 src2, sljit_sw src2w)
3594
{
3595
sljit_s32 reg;
3596
3597
CHECK_ERROR();
3598
CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
3599
ADJUST_LOCAL_OFFSET(src1, src1w);
3600
ADJUST_LOCAL_OFFSET(src2, src2w);
3601
3602
if (src2 & SLJIT_MEM) {
3603
FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src2, src2w));
3604
src2 = TMP_FREG1;
3605
}
3606
3607
if (src1 & SLJIT_MEM) {
3608
reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
3609
FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), reg, src1, src1w));
3610
src1 = reg;
3611
}
3612
3613
return push_inst(compiler, 0xb3720000 /* cpsdr */ | F12(src2) | F4(dst_freg) | F0(src1));
3614
}
3615
3616
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
3617
sljit_s32 freg, sljit_f32 value)
3618
{
3619
union {
3620
sljit_s32 imm;
3621
sljit_f32 value;
3622
} u;
3623
3624
CHECK_ERROR();
3625
CHECK(check_sljit_emit_fset32(compiler, freg, value));
3626
3627
u.value = value;
3628
3629
FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)(((sljit_uw)u.imm << 32))));
3630
return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
3631
}
3632
3633
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
3634
sljit_s32 freg, sljit_f64 value)
3635
{
3636
union {
3637
sljit_sw imm;
3638
sljit_f64 value;
3639
} u;
3640
3641
CHECK_ERROR();
3642
CHECK(check_sljit_emit_fset64(compiler, freg, value));
3643
3644
u.value = value;
3645
3646
FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)u.imm));
3647
return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
3648
}
3649
3650
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
3651
sljit_s32 freg, sljit_s32 reg)
3652
{
3653
sljit_gpr gen_r;
3654
3655
CHECK_ERROR();
3656
CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
3657
3658
gen_r = gpr(reg);
3659
3660
if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {
3661
if (op & SLJIT_32) {
3662
FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(gen_r) | (32 << 16)));
3663
gen_r = tmp0;
3664
}
3665
3666
return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(gen_r));
3667
}
3668
3669
FAIL_IF(push_inst(compiler, 0xb3cd0000 /* lgdr */ | R4A(gen_r) | F0(freg)));
3670
3671
if (!(op & SLJIT_32))
3672
return SLJIT_SUCCESS;
3673
3674
return push_inst(compiler, 0xeb000000000c /* srlg */ | R36A(gen_r) | R32A(gen_r) | (32 << 16));
3675
}
3676
3677
/* --------------------------------------------------------------------- */
3678
/* Conditional instructions */
3679
/* --------------------------------------------------------------------- */
3680
3681
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3682
{
3683
struct sljit_label *label;
3684
3685
CHECK_ERROR_PTR();
3686
CHECK_PTR(check_sljit_emit_label(compiler));
3687
3688
if (compiler->last_label && compiler->last_label->size == compiler->size)
3689
return compiler->last_label;
3690
3691
label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3692
PTR_FAIL_IF(!label);
3693
set_label(label, compiler);
3694
return label;
3695
}
3696
3697
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_aligned_label(struct sljit_compiler *compiler,
3698
sljit_s32 alignment, struct sljit_read_only_buffer *buffers)
3699
{
3700
sljit_uw mask, i;
3701
struct sljit_label *label;
3702
struct sljit_label *next_label;
3703
struct sljit_extended_label *ext_label;
3704
3705
CHECK_ERROR_PTR();
3706
CHECK_PTR(check_sljit_emit_aligned_label(compiler, alignment, buffers));
3707
3708
sljit_reset_read_only_buffers(buffers);
3709
3710
if (alignment <= SLJIT_LABEL_ALIGN_2) {
3711
SLJIT_SKIP_CHECKS(compiler);
3712
label = sljit_emit_label(compiler);
3713
PTR_FAIL_IF(!label);
3714
} else {
3715
/* The used space is filled with NOPs. */
3716
mask = ((sljit_uw)1 << alignment) - sizeof(sljit_u16);
3717
3718
for (i = (mask >> 1); i != 0; i--)
3719
PTR_FAIL_IF(push_inst(compiler, 0x0700 /* 2-byte nop */));
3720
3721
ext_label = (struct sljit_extended_label*)ensure_abuf(compiler, sizeof(struct sljit_extended_label));
3722
PTR_FAIL_IF(!ext_label);
3723
set_extended_label(ext_label, compiler, SLJIT_LABEL_ALIGNED, mask);
3724
label = &ext_label->label;
3725
}
3726
3727
if (buffers == NULL)
3728
return label;
3729
3730
next_label = label;
3731
3732
while (1) {
3733
buffers->u.label = next_label;
3734
3735
for (i = (buffers->size + 1) >> 1; i > 0; i--)
3736
PTR_FAIL_IF(push_inst(compiler, 0x0700 /* 2-byte nop */));
3737
3738
buffers = buffers->next;
3739
3740
if (buffers == NULL)
3741
break;
3742
3743
SLJIT_SKIP_CHECKS(compiler);
3744
next_label = sljit_emit_label(compiler);
3745
PTR_FAIL_IF(!next_label);
3746
}
3747
3748
return label;
3749
}
3750
3751
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3752
{
3753
struct sljit_jump *jump;
3754
sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
3755
3756
CHECK_ERROR_PTR();
3757
CHECK_PTR(check_sljit_emit_jump(compiler, type));
3758
3759
/* record jump */
3760
jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
3761
PTR_FAIL_IF(!jump);
3762
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3763
jump->addr = compiler->size;
3764
3765
/* emit jump instruction */
3766
type &= 0xff;
3767
if (type >= SLJIT_FAST_CALL)
3768
PTR_FAIL_IF(push_inst(compiler, brasl(link_r, 0)));
3769
else
3770
PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
3771
3772
return jump;
3773
}
3774
3775
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3776
sljit_s32 arg_types)
3777
{
3778
SLJIT_UNUSED_ARG(arg_types);
3779
CHECK_ERROR_PTR();
3780
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3781
3782
if (type & SLJIT_CALL_RETURN) {
3783
PTR_FAIL_IF(emit_stack_frame_release(compiler, r14));
3784
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3785
}
3786
3787
SLJIT_SKIP_CHECKS(compiler);
3788
return sljit_emit_jump(compiler, type);
3789
}
3790
3791
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3792
{
3793
struct sljit_jump *jump;
3794
sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3795
3796
CHECK_ERROR();
3797
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3798
3799
if (src != SLJIT_IMM) {
3800
if (src & SLJIT_MEM) {
3801
ADJUST_LOCAL_OFFSET(src, srcw);
3802
FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */));
3803
}
3804
3805
/* emit jump instruction */
3806
if (type >= SLJIT_FAST_CALL)
3807
return push_inst(compiler, basr(link_r, src_r));
3808
3809
return push_inst(compiler, br(src_r));
3810
}
3811
3812
jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
3813
FAIL_IF(!jump);
3814
set_jump(jump, compiler, JUMP_ADDR);
3815
jump->addr = compiler->size;
3816
jump->u.target = (sljit_uw)srcw;
3817
3818
type &= 0xff;
3819
if (type >= SLJIT_FAST_CALL)
3820
return push_inst(compiler, brasl(link_r, 0));
3821
3822
return push_inst(compiler, brcl(0xf, 0));
3823
}
3824
3825
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3826
sljit_s32 arg_types,
3827
sljit_s32 src, sljit_sw srcw)
3828
{
3829
SLJIT_UNUSED_ARG(arg_types);
3830
3831
CHECK_ERROR();
3832
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3833
3834
SLJIT_ASSERT(gpr(TMP_REG2) == tmp1);
3835
3836
if (src & SLJIT_MEM) {
3837
ADJUST_LOCAL_OFFSET(src, srcw);
3838
FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
3839
src = TMP_REG2;
3840
srcw = 0;
3841
}
3842
3843
if (type & SLJIT_CALL_RETURN) {
3844
if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
3845
FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
3846
src = TMP_REG2;
3847
srcw = 0;
3848
}
3849
3850
FAIL_IF(emit_stack_frame_release(compiler, r14));
3851
type = SLJIT_JUMP;
3852
}
3853
3854
SLJIT_SKIP_CHECKS(compiler);
3855
return sljit_emit_ijump(compiler, type, src, srcw);
3856
}
3857
3858
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3859
sljit_s32 dst, sljit_sw dstw,
3860
sljit_s32 type)
3861
{
3862
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3863
sljit_gpr loc_r = tmp1;
3864
sljit_u8 mask = get_cc(compiler, type);
3865
3866
CHECK_ERROR();
3867
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3868
3869
switch (GET_OPCODE(op)) {
3870
case SLJIT_AND:
3871
case SLJIT_OR:
3872
case SLJIT_XOR:
3873
compiler->status_flags_state = op & SLJIT_SET_Z;
3874
3875
/* dst is also source operand */
3876
if (dst & SLJIT_MEM)
3877
FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
3878
3879
break;
3880
case SLJIT_MOV32:
3881
op |= SLJIT_32;
3882
SLJIT_FALLTHROUGH
3883
case SLJIT_MOV:
3884
/* can write straight into destination */
3885
loc_r = dst_r;
3886
break;
3887
default:
3888
SLJIT_UNREACHABLE();
3889
}
3890
3891
/* TODO(mundaym): fold into cmov helper function? */
3892
#define LEVAL(i) i(loc_r, 1, mask)
3893
if (have_lscond2()) {
3894
FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3895
FAIL_IF(push_inst(compiler,
3896
WHEN2(op & SLJIT_32, lochi, locghi)));
3897
} else {
3898
FAIL_IF(push_load_imm_inst(compiler, loc_r, 1));
3899
FAIL_IF(push_inst(compiler, brc(mask, 2 + 2)));
3900
FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3901
}
3902
#undef LEVAL
3903
3904
/* apply bitwise op and set condition codes */
3905
switch (GET_OPCODE(op)) {
3906
#define LEVAL(i) i(dst_r, loc_r)
3907
case SLJIT_AND:
3908
FAIL_IF(push_inst(compiler,
3909
WHEN2(op & SLJIT_32, nr, ngr)));
3910
break;
3911
case SLJIT_OR:
3912
FAIL_IF(push_inst(compiler,
3913
WHEN2(op & SLJIT_32, or, ogr)));
3914
break;
3915
case SLJIT_XOR:
3916
FAIL_IF(push_inst(compiler,
3917
WHEN2(op & SLJIT_32, xr, xgr)));
3918
break;
3919
#undef LEVAL
3920
}
3921
3922
/* store result to memory if required */
3923
if (dst & SLJIT_MEM)
3924
return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32));
3925
3926
return SLJIT_SUCCESS;
3927
}
3928
3929
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3930
sljit_s32 dst_reg,
3931
sljit_s32 src1, sljit_sw src1w,
3932
sljit_s32 src2_reg)
3933
{
3934
sljit_ins mask;
3935
sljit_gpr src_r;
3936
sljit_gpr dst_r = gpr(dst_reg);
3937
sljit_s32 is_32bit = (type & SLJIT_32) != 0;
3938
sljit_ins ins;
3939
3940
CHECK_ERROR();
3941
CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3942
3943
ADJUST_LOCAL_OFFSET(src1, src1w);
3944
3945
type &= ~SLJIT_32;
3946
if (src1 == SLJIT_IMM && is_32bit)
3947
src1w = (sljit_s32)src1w;
3948
3949
if (type & SLJIT_COMPARE_SELECT) {
3950
type ^= SLJIT_COMPARE_SELECT;
3951
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE;
3952
3953
if (src1 & SLJIT_MEM) {
3954
FAIL_IF(load_word(compiler, tmp0, src1, src1w, is_32bit));
3955
src1 = TMP_REG1;
3956
src1w = 0;
3957
} else if (src1 == SLJIT_IMM) {
3958
if (type >= SLJIT_LESS && type <= SLJIT_LESS_EQUAL && src1w >= 0 && src1w <= 0x7fff) {
3959
ins = is_32bit ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;
3960
FAIL_IF(push_inst(compiler, ins | R36A(gpr(src2_reg)) | (sljit_ins)src1w));
3961
type ^= 0x1;
3962
} else if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL && is_s20(src1w)) {
3963
ins = is_32bit ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
3964
FAIL_IF(push_inst(compiler, ins | R36A(gpr(src2_reg)) | ((sljit_ins)src1w & 0xffffffff)));
3965
type ^= 0x1;
3966
} else {
3967
FAIL_IF(push_load_imm_inst(compiler, tmp0, src1w));
3968
src1 = TMP_REG1;
3969
src1w = 0;
3970
}
3971
}
3972
3973
if (FAST_IS_REG(src1)) {
3974
if (type >= SLJIT_LESS && type <= SLJIT_LESS_EQUAL)
3975
ins = is_32bit ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;
3976
else
3977
ins = is_32bit ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;
3978
FAIL_IF(push_inst(compiler, ins | R4A(gpr(src1)) | R0A(gpr(src2_reg))));
3979
}
3980
}
3981
3982
if (dst_reg != src2_reg) {
3983
if (src1 == dst_reg) {
3984
src1 = src2_reg;
3985
src1w = 0;
3986
type ^= 0x1;
3987
} else {
3988
if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
3989
FAIL_IF(load_word(compiler, dst_r, src1, src1w, is_32bit));
3990
src1 = src2_reg;
3991
src1w = 0;
3992
type ^= 0x1;
3993
} else
3994
FAIL_IF(push_inst(compiler, (is_32bit ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(gpr(src2_reg))));
3995
}
3996
}
3997
3998
mask = get_cc(compiler, type);
3999
4000
if (src1 & SLJIT_MEM) {
4001
if (src1 & OFFS_REG_MASK) {
4002
src_r = gpr(OFFS_REG(src1));
4003
4004
if (src1w != 0) {
4005
FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(src_r) | ((sljit_ins)(src1w & 0x3) << 16)));
4006
src_r = tmp1;
4007
}
4008
4009
FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(src_r) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
4010
src_r = tmp1;
4011
src1w = 0;
4012
} else if (!is_s20(src1w)) {
4013
FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));
4014
4015
if (src1 & REG_MASK)
4016
FAIL_IF(push_inst(compiler, 0xb9080000 /* agr */ | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
4017
4018
src_r = tmp1;
4019
src1w = 0;
4020
} else
4021
src_r = gpr(src1 & REG_MASK);
4022
4023
ins = is_32bit ? 0xeb00000000f2 /* loc */ : 0xeb00000000e2 /* locg */;
4024
return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | R28A(src_r) | disp_s20((sljit_s32)src1w));
4025
}
4026
4027
if (src1 == SLJIT_IMM) {
4028
if (have_lscond2() && is_s16(src1w)) {
4029
ins = is_32bit ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;
4030
return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | (sljit_ins)(src1w & 0xffff) << 16);
4031
}
4032
4033
FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));
4034
src_r = tmp1;
4035
} else
4036
src_r = gpr(src1);
4037
4038
ins = is_32bit ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;
4039
return push_inst(compiler, ins | (mask << 12) | R4A(dst_r) | R0A(src_r));
4040
}
4041
4042
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
4043
sljit_s32 dst_freg,
4044
sljit_s32 src1, sljit_sw src1w,
4045
sljit_s32 src2_freg)
4046
{
4047
sljit_ins ins;
4048
struct sljit_label *label;
4049
struct sljit_jump *jump;
4050
4051
CHECK_ERROR();
4052
CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
4053
4054
ADJUST_LOCAL_OFFSET(src1, src1w);
4055
4056
if (dst_freg != src2_freg) {
4057
if (dst_freg == src1) {
4058
src1 = src2_freg;
4059
src1w = 0;
4060
type ^= 0x1;
4061
} else {
4062
ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
4063
FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src2_freg)));
4064
}
4065
}
4066
4067
SLJIT_SKIP_CHECKS(compiler);
4068
jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1);
4069
FAIL_IF(!jump);
4070
4071
if (!(src1 & SLJIT_MEM)) {
4072
ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
4073
FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src1)));
4074
} else
4075
FAIL_IF(float_mem(compiler, FLOAT_LOAD | (type & SLJIT_32), dst_freg, src1, src1w));
4076
4077
SLJIT_SKIP_CHECKS(compiler);
4078
label = sljit_emit_label(compiler);
4079
FAIL_IF(!label);
4080
4081
sljit_set_label(jump, label);
4082
return SLJIT_SUCCESS;
4083
}
4084
4085
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
4086
sljit_s32 reg,
4087
sljit_s32 mem, sljit_sw memw)
4088
{
4089
sljit_ins ins, reg1, reg2, base, offs = 0;
4090
4091
CHECK_ERROR();
4092
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
4093
4094
if (!(reg & REG_PAIR_MASK))
4095
return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
4096
4097
ADJUST_LOCAL_OFFSET(mem, memw);
4098
4099
base = gpr(mem & REG_MASK);
4100
reg1 = gpr(REG_PAIR_FIRST(reg));
4101
reg2 = gpr(REG_PAIR_SECOND(reg));
4102
4103
if (mem & OFFS_REG_MASK) {
4104
memw &= 0x3;
4105
offs = gpr(OFFS_REG(mem));
4106
4107
if (memw != 0) {
4108
FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(offs) | ((sljit_ins)memw << 16)));
4109
offs = tmp1;
4110
} else if (!(type & SLJIT_MEM_STORE) && (base == reg1 || base == reg2) && (offs == reg1 || offs == reg2)) {
4111
FAIL_IF(push_inst(compiler, 0xb9f80000 | R12A(tmp1) | R4A(base) | R0A(offs)));
4112
base = tmp1;
4113
offs = 0;
4114
}
4115
4116
memw = 0;
4117
} else if (memw < -0x80000 || memw > 0x7ffff - ((reg2 == reg1 + 1) ? 0 : SSIZE_OF(sw))) {
4118
FAIL_IF(push_load_imm_inst(compiler, tmp1, memw));
4119
4120
if (base == 0)
4121
base = tmp1;
4122
else
4123
offs = tmp1;
4124
4125
memw = 0;
4126
}
4127
4128
if (offs == 0 && reg2 == (reg1 + 1)) {
4129
ins = (type & SLJIT_MEM_STORE) ? 0xeb0000000024 /* stmg */ : 0xeb0000000004 /* lmg */;
4130
return push_inst(compiler, ins | R36A(reg1) | R32A(reg2) | R28A(base) | disp_s20((sljit_s32)memw));
4131
}
4132
4133
ins = ((type & SLJIT_MEM_STORE) ? 0xe30000000024 /* stg */ : 0xe30000000004 /* lg */) | R32A(offs) | R28A(base);
4134
4135
if (!(type & SLJIT_MEM_STORE) && base == reg1) {
4136
FAIL_IF(push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))));
4137
return push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw));
4138
}
4139
4140
FAIL_IF(push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw)));
4141
return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)));
4142
}
4143
4144
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
4145
sljit_s32 vreg,
4146
sljit_s32 srcdst, sljit_sw srcdstw)
4147
{
4148
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4149
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4150
sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4151
struct addr addr;
4152
sljit_ins ins;
4153
4154
CHECK_ERROR();
4155
CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));
4156
4157
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4158
4159
if (reg_size != 4)
4160
return SLJIT_ERR_UNSUPPORTED;
4161
4162
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4163
return SLJIT_ERR_UNSUPPORTED;
4164
4165
if (type & SLJIT_SIMD_TEST)
4166
return SLJIT_SUCCESS;
4167
4168
if (!(srcdst & SLJIT_MEM)) {
4169
if (type & SLJIT_SIMD_STORE)
4170
ins = F36(srcdst) | F32(vreg);
4171
else
4172
ins = F36(vreg) | F32(srcdst);
4173
4174
return push_inst(compiler, 0xe70000000056 /* vlr */ | ins);
4175
}
4176
4177
FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
4178
ins = F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4179
4180
if (alignment >= 4)
4181
ins |= 4 << 12;
4182
else if (alignment == 3)
4183
ins |= 3 << 12;
4184
4185
return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? 0xe7000000000e /* vst */ : 0xe70000000006 /* vl */) | ins);
4186
}
4187
4188
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4189
sljit_s32 vreg,
4190
sljit_s32 src, sljit_sw srcw)
4191
{
4192
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4193
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4194
struct addr addr;
4195
sljit_gpr reg;
4196
sljit_sw sign_ext;
4197
4198
CHECK_ERROR();
4199
CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));
4200
4201
ADJUST_LOCAL_OFFSET(src, srcw);
4202
4203
if (reg_size != 4)
4204
return SLJIT_ERR_UNSUPPORTED;
4205
4206
if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4207
return SLJIT_ERR_UNSUPPORTED;
4208
4209
if (type & SLJIT_SIMD_TEST)
4210
return SLJIT_SUCCESS;
4211
4212
if (src & SLJIT_MEM) {
4213
FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
4214
return push_inst(compiler, 0xe70000000005 /* vlrep */ | F36(vreg)
4215
| R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset) | ((sljit_ins)elem_size << 12));
4216
}
4217
4218
if (type & SLJIT_SIMD_FLOAT) {
4219
if (src == SLJIT_IMM)
4220
return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(vreg));
4221
4222
return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(vreg) | F32(src) | ((sljit_ins)elem_size << 12));
4223
}
4224
4225
if (src == SLJIT_IMM) {
4226
sign_ext = 0x10000;
4227
4228
switch (elem_size) {
4229
case 0:
4230
srcw &= 0xff;
4231
sign_ext = (sljit_s8)srcw;
4232
break;
4233
case 1:
4234
srcw &= 0xffff;
4235
sign_ext = (sljit_s16)srcw;
4236
break;
4237
case 2:
4238
if ((sljit_s32)srcw == (sljit_s16)srcw) {
4239
srcw &= 0xffff;
4240
sign_ext = (sljit_s16)srcw;
4241
} else
4242
srcw &= 0xffffffff;
4243
break;
4244
default:
4245
if (srcw == (sljit_s16)srcw) {
4246
srcw &= 0xffff;
4247
sign_ext = (sljit_s16)srcw;
4248
}
4249
break;
4250
}
4251
4252
if (sign_ext != 0x10000) {
4253
if (sign_ext == 0 || sign_ext == -1)
4254
return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(vreg)
4255
| (sign_ext == 0 ? 0 : ((sljit_ins)0xffff << 16)));
4256
4257
return push_inst(compiler, 0xe70000000045 /* vrepi */ | F36(vreg)
4258
| ((sljit_ins)srcw << 16) | ((sljit_ins)elem_size << 12));
4259
}
4260
4261
push_load_imm_inst(compiler, tmp0, srcw);
4262
reg = tmp0;
4263
} else
4264
reg = gpr(src);
4265
4266
FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(vreg) | R32A(reg) | ((sljit_ins)elem_size << 12)));
4267
return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(vreg) | F32(vreg) | ((sljit_ins)elem_size << 12));
4268
}
4269
4270
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
4271
sljit_s32 vreg, sljit_s32 lane_index,
4272
sljit_s32 srcdst, sljit_sw srcdstw)
4273
{
4274
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4275
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4276
struct addr addr;
4277
sljit_gpr reg;
4278
sljit_ins ins = 0;
4279
4280
CHECK_ERROR();
4281
CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));
4282
4283
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4284
4285
if (reg_size != 4)
4286
return SLJIT_ERR_UNSUPPORTED;
4287
4288
if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4289
return SLJIT_ERR_UNSUPPORTED;
4290
4291
if (type & SLJIT_SIMD_TEST)
4292
return SLJIT_SUCCESS;
4293
4294
if (srcdst & SLJIT_MEM) {
4295
FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
4296
ins = F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4297
}
4298
4299
if (type & SLJIT_SIMD_LANE_ZERO) {
4300
if ((srcdst & SLJIT_MEM) && lane_index == ((1 << (3 - elem_size)) - 1))
4301
return push_inst(compiler, 0xe70000000004 /* vllez */ | ins | ((sljit_ins)elem_size << 12));
4302
4303
if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) {
4304
FAIL_IF(push_inst(compiler, 0xe70000000056 /* vlr */ | F36(TMP_FREG1) | F32(vreg)));
4305
srcdst = TMP_FREG1;
4306
srcdstw = 0;
4307
}
4308
4309
FAIL_IF(push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(vreg)));
4310
}
4311
4312
if (srcdst & SLJIT_MEM) {
4313
switch (elem_size) {
4314
case 0:
4315
ins |= 0xe70000000000 /* vleb */;
4316
break;
4317
case 1:
4318
ins |= 0xe70000000001 /* vleh */;
4319
break;
4320
case 2:
4321
ins |= 0xe70000000003 /* vlef */;
4322
break;
4323
default:
4324
ins |= 0xe70000000002 /* vleg */;
4325
break;
4326
}
4327
4328
/* Convert to vsteb - vsteg */
4329
if (type & SLJIT_SIMD_STORE)
4330
ins |= 0x8;
4331
4332
return push_inst(compiler, ins | ((sljit_ins)lane_index << 12));
4333
}
4334
4335
if (type & SLJIT_SIMD_FLOAT) {
4336
if (type & SLJIT_SIMD_STORE)
4337
return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(srcdst) | F32(vreg) | ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12));
4338
4339
if (elem_size == 3) {
4340
if (lane_index == 0)
4341
ins = F32(srcdst) | F28(vreg) | (1 << 12);
4342
else
4343
ins = F32(vreg) | F28(srcdst);
4344
4345
return push_inst(compiler, 0xe70000000084 /* vpdi */ | F36(vreg) | ins);
4346
}
4347
4348
FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(tmp0) | F32(srcdst) | ((sljit_ins)2 << 12)));
4349
return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(vreg) | R32A(tmp0) | ((sljit_ins)lane_index << 16) | ((sljit_ins)2 << 12));
4350
}
4351
4352
if (srcdst == SLJIT_IMM) {
4353
switch (elem_size) {
4354
case 0:
4355
ins = 0xe70000000040 /* vleib */;
4356
srcdstw &= 0xff;
4357
break;
4358
case 1:
4359
ins = 0xe70000000041 /* vleih */;
4360
srcdstw &= 0xffff;
4361
break;
4362
case 2:
4363
if ((sljit_s32)srcdstw == (sljit_s16)srcdstw) {
4364
srcdstw &= 0xffff;
4365
ins = 0xe70000000043 /* vleif */;
4366
} else
4367
srcdstw &= 0xffffffff;
4368
break;
4369
default:
4370
if (srcdstw == (sljit_s16)srcdstw) {
4371
srcdstw &= 0xffff;
4372
ins = 0xe70000000042 /* vleig */;
4373
}
4374
break;
4375
}
4376
4377
if (ins != 0)
4378
return push_inst(compiler, ins | F36(vreg) | ((sljit_ins)srcdstw << 16) | ((sljit_ins)lane_index << 12));
4379
4380
push_load_imm_inst(compiler, tmp0, srcdstw);
4381
reg = tmp0;
4382
} else
4383
reg = gpr(srcdst);
4384
4385
ins = ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12);
4386
4387
if (!(type & SLJIT_SIMD_STORE))
4388
return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(vreg) | R32A(reg) | ins);
4389
4390
FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(reg) | F32(vreg) | ins));
4391
4392
if (!(type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3)
4393
return SLJIT_SUCCESS;
4394
4395
switch (elem_size) {
4396
case 0:
4397
ins = 0xb9060000 /* lgbr */;
4398
break;
4399
case 1:
4400
ins = 0xb9070000 /* lghr */;
4401
break;
4402
default:
4403
ins = 0xb9140000 /* lgfr */;
4404
break;
4405
}
4406
4407
return push_inst(compiler, ins | R4A(reg) | R0A(reg));
4408
}
4409
4410
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4411
sljit_s32 vreg,
4412
sljit_s32 src, sljit_s32 src_lane_index)
4413
{
4414
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4415
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4416
4417
CHECK_ERROR();
4418
CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));
4419
4420
if (reg_size != 4)
4421
return SLJIT_ERR_UNSUPPORTED;
4422
4423
if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4424
return SLJIT_ERR_UNSUPPORTED;
4425
4426
if (type & SLJIT_SIMD_TEST)
4427
return SLJIT_SUCCESS;
4428
4429
return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(vreg) | F32(src)
4430
| ((sljit_ins)src_lane_index << 16) | ((sljit_ins)elem_size << 12));
4431
}
4432
4433
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4434
sljit_s32 vreg,
4435
sljit_s32 src, sljit_sw srcw)
4436
{
4437
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4438
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4439
sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4440
struct addr addr;
4441
sljit_ins ins;
4442
4443
CHECK_ERROR();
4444
CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));
4445
4446
ADJUST_LOCAL_OFFSET(src, srcw);
4447
4448
if (reg_size != 4)
4449
return SLJIT_ERR_UNSUPPORTED;
4450
4451
if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4452
return SLJIT_ERR_UNSUPPORTED;
4453
4454
if (type & SLJIT_SIMD_TEST)
4455
return SLJIT_SUCCESS;
4456
4457
if (src & SLJIT_MEM) {
4458
FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
4459
ins = F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4460
4461
switch (elem2_size - elem_size) {
4462
case 1:
4463
ins |= 0xe70000000002 /* vleg */;
4464
break;
4465
case 2:
4466
ins |= 0xe70000000003 /* vlef */;
4467
break;
4468
default:
4469
ins |= 0xe70000000001 /* vleh */;
4470
break;
4471
}
4472
4473
FAIL_IF(push_inst(compiler, ins));
4474
src = vreg;
4475
}
4476
4477
if (type & SLJIT_SIMD_FLOAT) {
4478
FAIL_IF(push_inst(compiler, 0xe700000000d5 /* vuplh */ | F36(vreg) | F32(src) | (2 << 12)));
4479
FAIL_IF(push_inst(compiler, 0xe70000000030 /* vesl */ | F36(vreg) | F32(vreg) | (32 << 16) | (3 << 12)));
4480
return push_inst(compiler, 0xe700000000c4 /* vfll */ | F36(vreg) | F32(vreg) | (2 << 12));
4481
}
4482
4483
ins = ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0xe700000000d7 /* vuph */ : 0xe700000000d5 /* vuplh */) | F36(vreg);
4484
4485
do {
4486
FAIL_IF(push_inst(compiler, ins | F32(src) | ((sljit_ins)elem_size << 12)));
4487
src = vreg;
4488
} while (++elem_size < elem2_size);
4489
4490
return SLJIT_SUCCESS;
4491
}
4492
4493
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4494
sljit_s32 vreg,
4495
sljit_s32 dst, sljit_sw dstw)
4496
{
4497
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4498
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4499
sljit_gpr dst_r;
4500
4501
CHECK_ERROR();
4502
CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));
4503
4504
ADJUST_LOCAL_OFFSET(dst, dstw);
4505
4506
if (reg_size != 4)
4507
return SLJIT_ERR_UNSUPPORTED;
4508
4509
if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4510
return SLJIT_ERR_UNSUPPORTED;
4511
4512
if (type & SLJIT_SIMD_TEST)
4513
return SLJIT_SUCCESS;
4514
4515
switch (elem_size) {
4516
case 0:
4517
push_load_imm_inst(compiler, tmp0, (sljit_sw)0x4048505860687078);
4518
push_load_imm_inst(compiler, tmp1, (sljit_sw)0x0008101820283038);
4519
FAIL_IF(push_inst(compiler, 0xe70000000062 /* vlvgp */ | F36(TMP_FREG1) | R32A(tmp1) | R28A(tmp0)));
4520
break;
4521
case 1:
4522
push_load_imm_inst(compiler, tmp0, (sljit_sw)0x0010203040506070);
4523
break;
4524
case 2:
4525
push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808000204060);
4526
break;
4527
default:
4528
push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808080800040);
4529
break;
4530
}
4531
4532
if (elem_size != 0)
4533
FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(TMP_FREG1) | R32A(tmp0) | (1 << 16) | (3 << 12)));
4534
4535
FAIL_IF(push_inst(compiler, 0xe70000000085 /* vbperm */ | F36(TMP_FREG1) | F32(vreg) | F28(TMP_FREG1)));
4536
4537
dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
4538
FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(dst_r) | F32(TMP_FREG1)
4539
| (elem_size == 0 ? ((3 << 16) | (1 << 12)) : (7 << 16))));
4540
4541
if (dst_r == tmp0)
4542
return store_word(compiler, tmp0, dst, dstw, type & SLJIT_32);
4543
4544
return SLJIT_SUCCESS;
4545
}
4546
4547
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4548
sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)
4549
{
4550
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4551
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4552
sljit_s32 alignment;
4553
struct addr addr;
4554
sljit_ins ins = 0, load_ins;
4555
4556
CHECK_ERROR();
4557
CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));
4558
ADJUST_LOCAL_OFFSET(src2, src2w);
4559
4560
if (reg_size != 4)
4561
return SLJIT_ERR_UNSUPPORTED;
4562
4563
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4564
return SLJIT_ERR_UNSUPPORTED;
4565
4566
if (type & SLJIT_SIMD_TEST)
4567
return SLJIT_SUCCESS;
4568
4569
switch (SLJIT_SIMD_GET_OPCODE(type)) {
4570
case SLJIT_SIMD_OP2_AND:
4571
ins = 0xe70000000068 /* vn */;
4572
break;
4573
case SLJIT_SIMD_OP2_OR:
4574
ins = 0xe7000000006a /* vo */;
4575
break;
4576
case SLJIT_SIMD_OP2_XOR:
4577
ins = 0xe7000000006d /* vx */;
4578
break;
4579
case SLJIT_SIMD_OP2_SHUFFLE:
4580
ins = 0xe7000000008c /* vperm */;
4581
break;
4582
}
4583
4584
if (src2 & SLJIT_MEM) {
4585
FAIL_IF(make_addr_bx(compiler, &addr, src2, src2w, tmp1));
4586
load_ins = 0xe70000000006 /* vl */ | F36(TMP_FREG1) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4587
alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4588
4589
if (alignment >= 4)
4590
load_ins |= 4 << 12;
4591
else if (alignment == 3)
4592
load_ins |= 3 << 12;
4593
4594
FAIL_IF(push_inst(compiler, load_ins));
4595
src2 = TMP_FREG1;
4596
}
4597
4598
if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE)
4599
return push_inst(compiler, ins | F36(dst_vreg) | F32(src1_vreg) | F28(src1_vreg) | F12(src2));
4600
4601
return push_inst(compiler, ins | F36(dst_vreg) | F32(src1_vreg) | F28(src2));
4602
}
4603
4604
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4605
sljit_s32 dst_reg,
4606
sljit_s32 mem_reg)
4607
{
4608
CHECK_ERROR();
4609
CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4610
4611
if (op & SLJIT_ATOMIC_USE_LS)
4612
return SLJIT_ERR_UNSUPPORTED;
4613
4614
switch (GET_OPCODE(op)) {
4615
case SLJIT_MOV32:
4616
case SLJIT_MOV_U32:
4617
case SLJIT_MOV:
4618
case SLJIT_MOV_P:
4619
if (op & SLJIT_ATOMIC_TEST)
4620
return SLJIT_SUCCESS;
4621
4622
SLJIT_SKIP_CHECKS(compiler);
4623
return sljit_emit_op1(compiler, op & ~SLJIT_ATOMIC_USE_CAS, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
4624
default:
4625
return SLJIT_ERR_UNSUPPORTED;
4626
}
4627
}
4628
4629
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4630
sljit_s32 src_reg,
4631
sljit_s32 mem_reg,
4632
sljit_s32 temp_reg)
4633
{
4634
sljit_ins ins;
4635
sljit_gpr tmp_r = gpr(temp_reg);
4636
sljit_gpr mem_r = gpr(mem_reg);
4637
4638
CHECK_ERROR();
4639
CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4640
4641
if (op & SLJIT_ATOMIC_USE_LS)
4642
return SLJIT_ERR_UNSUPPORTED;
4643
4644
switch (GET_OPCODE(op)) {
4645
case SLJIT_MOV32:
4646
case SLJIT_MOV_U32:
4647
ins = 0xba000000 /* cs */ | R20A(tmp_r) | R16A(gpr(src_reg)) | R12A(mem_r);
4648
break;
4649
case SLJIT_MOV:
4650
case SLJIT_MOV_P:
4651
ins = 0xeb0000000030 /* csg */ | R36A(tmp_r) | R32A(gpr(src_reg)) | R28A(mem_r);
4652
break;
4653
default:
4654
return SLJIT_ERR_UNSUPPORTED;
4655
}
4656
4657
if (op & SLJIT_ATOMIC_TEST)
4658
return SLJIT_SUCCESS;
4659
4660
return push_inst(compiler, ins);
4661
}
4662
4663
/* --------------------------------------------------------------------- */
4664
/* Other instructions */
4665
/* --------------------------------------------------------------------- */
4666
4667
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 op,
4668
sljit_s32 dst, sljit_sw dstw,
4669
sljit_sw init_value)
4670
{
4671
struct sljit_const *const_;
4672
sljit_gpr dst_r;
4673
int is_32 = 0;
4674
4675
CHECK_ERROR_PTR();
4676
CHECK_PTR(check_sljit_emit_const(compiler, op, dst, dstw, init_value));
4677
ADJUST_LOCAL_OFFSET(dst, dstw);
4678
4679
const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4680
PTR_FAIL_IF(!const_);
4681
set_const((struct sljit_const*)const_, compiler);
4682
4683
dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
4684
4685
switch (GET_OPCODE(op)) {
4686
case SLJIT_MOV_U8:
4687
if (init_value & 0x100)
4688
init_value |= 0xff00;
4689
else
4690
init_value &= 0xff;
4691
4692
PTR_FAIL_IF(push_inst(compiler, 0xa7090000 /* lghi */ | R20A(dst_r) | (sljit_ins)(init_value & 0xffff)));
4693
4694
if (dst & SLJIT_MEM)
4695
PTR_FAIL_IF(store_byte(compiler, dst_r, dst, dstw));
4696
return (struct sljit_const*)const_;
4697
4698
case SLJIT_MOV32:
4699
is_32 = 1;
4700
SLJIT_FALLTHROUGH
4701
case SLJIT_MOV_S32:
4702
PTR_FAIL_IF(push_inst(compiler, 0xc00100000000 /* lgfi */ | R36A(dst_r) | (sljit_ins)(init_value & 0xffffffff)));
4703
break;
4704
4705
default:
4706
PTR_FAIL_IF(push_inst(compiler, 0xc00f00000000 /* llilf */ | R36A(dst_r) | (sljit_ins)(init_value & 0xffffffff)));
4707
PTR_FAIL_IF(push_inst(compiler, 0xc00800000000 /* iihf */ | R36A(dst_r) | (sljit_ins)((init_value >> 32) & 0xffffffff)));
4708
break;
4709
}
4710
4711
if (dst & SLJIT_MEM)
4712
PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, is_32));
4713
4714
return (struct sljit_const*)const_;
4715
}
4716
4717
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4718
{
4719
/* Update the constant pool. */
4720
sljit_uw *ptr = (sljit_uw *)addr;
4721
SLJIT_UNUSED_ARG(executable_offset);
4722
4723
SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
4724
*ptr = new_target;
4725
SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
4726
SLJIT_CACHE_FLUSH(ptr, ptr + 1);
4727
}
4728
4729
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_s32 op, sljit_sw new_constant, sljit_sw executable_offset)
4730
{
4731
sljit_u16 *inst = (sljit_u16*)addr;
4732
SLJIT_UNUSED_ARG(executable_offset);
4733
4734
switch (GET_OPCODE(op)) {
4735
case SLJIT_MOV_U8:
4736
SLJIT_ASSERT((inst[0] & 0xff0f) == 0xa709 /* lghi */);
4737
4738
if (new_constant & 0x100)
4739
new_constant |= 0xff00;
4740
else
4741
new_constant &= 0xff;
4742
4743
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
4744
inst[1] = (sljit_u16)new_constant;
4745
SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
4746
inst = (sljit_u16*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
4747
SLJIT_CACHE_FLUSH(inst, inst + 2);
4748
return;
4749
4750
case SLJIT_MOV32:
4751
case SLJIT_MOV_S32:
4752
SLJIT_ASSERT((inst[0] & 0xff0f) == 0xc001 /* lgfi */);
4753
4754
SLJIT_UPDATE_WX_FLAGS(inst, inst + 3, 0);
4755
inst[1] = (sljit_u16)(new_constant >> 16);
4756
inst[2] = (sljit_u16)new_constant;
4757
SLJIT_UPDATE_WX_FLAGS(inst, inst + 3, 1);
4758
inst = (sljit_u16*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
4759
SLJIT_CACHE_FLUSH(inst, inst + 3);
4760
return;
4761
4762
default:
4763
SLJIT_ASSERT((inst[0] & 0xff0f) == 0xc00f /* llilf */ && (inst[3] & 0xff0f) == 0xc008 /* iihf */);
4764
4765
SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 0);
4766
inst[1] = (sljit_u16)(new_constant >> 16);
4767
inst[2] = (sljit_u16)new_constant;
4768
inst[4] = (sljit_u16)(new_constant >> 48);
4769
inst[5] = (sljit_u16)(new_constant >> 32);
4770
SLJIT_UPDATE_WX_FLAGS(inst, inst + 6, 1);
4771
inst = (sljit_u16*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
4772
SLJIT_CACHE_FLUSH(inst, inst + 6);
4773
return;
4774
}
4775
}
4776
4777
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_op_addr(struct sljit_compiler *compiler, sljit_s32 op,
4778
sljit_s32 dst, sljit_sw dstw)
4779
{
4780
struct sljit_jump *jump;
4781
sljit_gpr dst_r, target_r;
4782
SLJIT_UNUSED_ARG(op);
4783
4784
CHECK_ERROR_PTR();
4785
CHECK_PTR(check_sljit_emit_op_addr(compiler, op, dst, dstw));
4786
ADJUST_LOCAL_OFFSET(dst, dstw);
4787
4788
dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
4789
4790
if (op != SLJIT_ADD_ABS_ADDR)
4791
target_r = dst_r;
4792
else {
4793
target_r = tmp1;
4794
4795
if (dst & SLJIT_MEM)
4796
PTR_FAIL_IF(load_word(compiler, dst_r, dst, dstw, 0));
4797
}
4798
4799
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4800
PTR_FAIL_IF(!jump);
4801
set_mov_addr(jump, compiler, 0);
4802
4803
/* Might be converted to lgrl. */
4804
PTR_FAIL_IF(push_inst(compiler, 0xc00000000000 /* larl */ | R36A(target_r)));
4805
4806
if (op == SLJIT_ADD_ABS_ADDR)
4807
PTR_FAIL_IF(push_inst(compiler, 0xb90a0000 /* algr */ | R4A(dst_r) | R0A(tmp1)));
4808
4809
if (dst & SLJIT_MEM)
4810
PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));
4811
4812
return jump;
4813
}
4814
4815
/* TODO(carenas): EVAL probably should move up or be refactored */
4816
#undef WHEN2
4817
#undef EVAL
4818
4819
#undef tmp1
4820
#undef tmp0
4821
4822
/* TODO(carenas): undef other macros that spill like is_u12? */
4823
4824