Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/pcre2/deps/sljit/sljit_src/sljitNativeS390X.c
9913 views
1
/*
2
* Stack-less Just-In-Time compiler
3
*
4
* Copyright Zoltan Herczeg ([email protected]). All rights reserved.
5
*
6
* Redistribution and use in source and binary forms, with or without modification, are
7
* permitted provided that the following conditions are met:
8
*
9
* 1. Redistributions of source code must retain the above copyright notice, this list of
10
* conditions and the following disclaimer.
11
*
12
* 2. Redistributions in binary form must reproduce the above copyright notice, this list
13
* of conditions and the following disclaimer in the documentation and/or other materials
14
* provided with the distribution.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19
* SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22
* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
*/
26
27
#include <sys/auxv.h>
28
29
#ifdef __ARCH__
30
#define ENABLE_STATIC_FACILITY_DETECTION 1
31
#else
32
#define ENABLE_STATIC_FACILITY_DETECTION 0
33
#endif
34
#define ENABLE_DYNAMIC_FACILITY_DETECTION 1
35
36
SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
37
{
38
return "s390x" SLJIT_CPUINFO;
39
}
40
41
/* Instructions are stored as 64 bit values regardless their size. */
42
typedef sljit_uw sljit_ins;
43
44
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
45
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
46
47
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
48
0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1, 14
49
};
50
51
/* there are also a[2-15] available, but they are slower to access and
52
* their use is limited as mundaym explained:
53
* https://github.com/zherczeg/sljit/pull/91#discussion_r486895689
54
*/
55
56
/* General Purpose Registers [0-15]. */
57
typedef sljit_uw sljit_gpr;
58
59
/*
60
* WARNING
61
* the following code is non standard and should be improved for
62
* consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based
63
* registers because r0 and r1 are the ABI recommended volatiles.
64
* there is a gpr() function that maps sljit to physical register numbers
65
* that should be used instead of the usual index into reg_map[] and
66
* will be retired ASAP (TODO: carenas)
67
*/
68
69
static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
70
static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
71
static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */
72
static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */
73
static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */
74
static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */
75
static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */
76
static const sljit_gpr r7 = 7; /* reg_map[6] */
77
static const sljit_gpr r8 = 8; /* reg_map[7] */
78
static const sljit_gpr r9 = 9; /* reg_map[8] */
79
static const sljit_gpr r10 = 10; /* reg_map[9] */
80
static const sljit_gpr r11 = 11; /* reg_map[10] */
81
static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */
82
static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */
83
static const sljit_gpr r14 = 14; /* reg_map[0]: return address */
84
static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
85
86
/* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
87
/* TODO(carenas): r12 might conflict in PIC code, reserve? */
88
/* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp
89
* like we do know might be faster though, reserve?
90
*/
91
92
/* TODO(carenas): should be named TMP_REG[1-2] for consistency */
93
#define tmp0 r0
94
#define tmp1 r1
95
96
/* When reg cannot be unused. */
97
#define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP)
98
99
/* Link register. */
100
static const sljit_gpr link_r = 14; /* r14 */
101
102
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
103
104
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
105
0, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, 1
106
};
107
108
#define R0A(r) (r)
109
#define R4A(r) ((r) << 4)
110
#define R8A(r) ((r) << 8)
111
#define R12A(r) ((r) << 12)
112
#define R16A(r) ((r) << 16)
113
#define R20A(r) ((r) << 20)
114
#define R28A(r) ((r) << 28)
115
#define R32A(r) ((r) << 32)
116
#define R36A(r) ((r) << 36)
117
118
#define R0(r) ((sljit_ins)reg_map[r])
119
120
#define F0(r) ((sljit_ins)freg_map[r])
121
#define F4(r) (R4A((sljit_ins)freg_map[r]))
122
#define F12(r) (R12A((sljit_ins)freg_map[r]))
123
#define F20(r) (R20A((sljit_ins)freg_map[r]))
124
#define F28(r) (R28A((sljit_ins)freg_map[r]))
125
#define F32(r) (R32A((sljit_ins)freg_map[r]))
126
#define F36(r) (R36A((sljit_ins)freg_map[r]))
127
128
struct sljit_s390x_const {
129
struct sljit_const const_; /* must be first */
130
sljit_sw init_value; /* required to build literal pool */
131
};
132
133
/* Convert SLJIT register to hardware register. */
134
static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
135
{
136
SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));
137
return reg_map[r];
138
}
139
140
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
141
{
142
sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
143
FAIL_IF(!ibuf);
144
*ibuf = ins;
145
146
SLJIT_ASSERT(ins <= 0xffffffffffffL);
147
148
compiler->size++;
149
if (ins & 0xffff00000000L)
150
compiler->size++;
151
152
if (ins & 0xffffffff0000L)
153
compiler->size++;
154
155
return SLJIT_SUCCESS;
156
}
157
158
#define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \
159
(((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \
160
&& !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE))
161
162
/* Map the given type to a 4-bit condition code mask. */
163
static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) {
164
const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */
165
const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */
166
const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */
167
const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */
168
169
switch (type) {
170
case SLJIT_EQUAL:
171
if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
172
sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
173
if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
174
return cc0;
175
if (type == SLJIT_OVERFLOW)
176
return (cc0 | cc3);
177
return (cc0 | cc2);
178
}
179
/* fallthrough */
180
181
case SLJIT_ATOMIC_STORED:
182
case SLJIT_F_EQUAL:
183
case SLJIT_ORDERED_EQUAL:
184
return cc0;
185
186
case SLJIT_NOT_EQUAL:
187
if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
188
sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
189
if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
190
return (cc1 | cc2 | cc3);
191
if (type == SLJIT_OVERFLOW)
192
return (cc1 | cc2);
193
return (cc1 | cc3);
194
}
195
/* fallthrough */
196
197
case SLJIT_UNORDERED_OR_NOT_EQUAL:
198
return (cc1 | cc2 | cc3);
199
200
case SLJIT_LESS:
201
case SLJIT_ATOMIC_NOT_STORED:
202
return cc1;
203
204
case SLJIT_GREATER_EQUAL:
205
case SLJIT_UNORDERED_OR_GREATER_EQUAL:
206
return (cc0 | cc2 | cc3);
207
208
case SLJIT_GREATER:
209
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
210
return cc2;
211
return cc3;
212
213
case SLJIT_LESS_EQUAL:
214
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
215
return (cc0 | cc1);
216
return (cc0 | cc1 | cc2);
217
218
case SLJIT_SIG_LESS:
219
case SLJIT_F_LESS:
220
case SLJIT_ORDERED_LESS:
221
return cc1;
222
223
case SLJIT_NOT_CARRY:
224
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
225
return (cc2 | cc3);
226
/* fallthrough */
227
228
case SLJIT_SIG_LESS_EQUAL:
229
case SLJIT_F_LESS_EQUAL:
230
case SLJIT_ORDERED_LESS_EQUAL:
231
return (cc0 | cc1);
232
233
case SLJIT_CARRY:
234
if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
235
return (cc0 | cc1);
236
/* fallthrough */
237
238
case SLJIT_SIG_GREATER:
239
case SLJIT_UNORDERED_OR_GREATER:
240
/* Overflow is considered greater, see SLJIT_SUB. */
241
return cc2 | cc3;
242
243
case SLJIT_SIG_GREATER_EQUAL:
244
return (cc0 | cc2 | cc3);
245
246
case SLJIT_OVERFLOW:
247
if (compiler->status_flags_state & SLJIT_SET_Z)
248
return (cc2 | cc3);
249
/* fallthrough */
250
251
case SLJIT_UNORDERED:
252
return cc3;
253
254
case SLJIT_NOT_OVERFLOW:
255
if (compiler->status_flags_state & SLJIT_SET_Z)
256
return (cc0 | cc1);
257
/* fallthrough */
258
259
case SLJIT_ORDERED:
260
return (cc0 | cc1 | cc2);
261
262
case SLJIT_F_NOT_EQUAL:
263
case SLJIT_ORDERED_NOT_EQUAL:
264
return (cc1 | cc2);
265
266
case SLJIT_F_GREATER:
267
case SLJIT_ORDERED_GREATER:
268
return cc2;
269
270
case SLJIT_F_GREATER_EQUAL:
271
case SLJIT_ORDERED_GREATER_EQUAL:
272
return (cc0 | cc2);
273
274
case SLJIT_UNORDERED_OR_LESS_EQUAL:
275
return (cc0 | cc1 | cc3);
276
277
case SLJIT_UNORDERED_OR_EQUAL:
278
return (cc0 | cc3);
279
280
case SLJIT_UNORDERED_OR_LESS:
281
return (cc1 | cc3);
282
}
283
284
SLJIT_UNREACHABLE();
285
return (sljit_u8)-1;
286
}
287
288
/* Facility to bit index mappings.
289
Note: some facilities share the same bit index. */
290
typedef sljit_uw facility_bit;
291
#define STORE_FACILITY_LIST_EXTENDED_FACILITY 7
292
#define FAST_LONG_DISPLACEMENT_FACILITY 19
293
#define EXTENDED_IMMEDIATE_FACILITY 21
294
#define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34
295
#define DISTINCT_OPERAND_FACILITY 45
296
#define HIGH_WORD_FACILITY 45
297
#define POPULATION_COUNT_FACILITY 45
298
#define LOAD_STORE_ON_CONDITION_1_FACILITY 45
299
#define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49
300
#define LOAD_STORE_ON_CONDITION_2_FACILITY 53
301
#define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58
302
#define VECTOR_FACILITY 129
303
#define VECTOR_ENHANCEMENTS_1_FACILITY 135
304
305
/* Report whether a facility is known to be present due to the compiler
306
settings. This function should always be compiled to a constant
307
value given a constant argument. */
308
static SLJIT_INLINE int have_facility_static(facility_bit x)
309
{
310
#if ENABLE_STATIC_FACILITY_DETECTION
311
switch (x) {
312
case FAST_LONG_DISPLACEMENT_FACILITY:
313
return (__ARCH__ >= 6 /* z990 */);
314
case EXTENDED_IMMEDIATE_FACILITY:
315
case STORE_FACILITY_LIST_EXTENDED_FACILITY:
316
return (__ARCH__ >= 7 /* z9-109 */);
317
case GENERAL_INSTRUCTION_EXTENSION_FACILITY:
318
return (__ARCH__ >= 8 /* z10 */);
319
case DISTINCT_OPERAND_FACILITY:
320
return (__ARCH__ >= 9 /* z196 */);
321
case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:
322
return (__ARCH__ >= 10 /* zEC12 */);
323
case LOAD_STORE_ON_CONDITION_2_FACILITY:
324
case VECTOR_FACILITY:
325
return (__ARCH__ >= 11 /* z13 */);
326
case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:
327
case VECTOR_ENHANCEMENTS_1_FACILITY:
328
return (__ARCH__ >= 12 /* z14 */);
329
default:
330
SLJIT_UNREACHABLE();
331
}
332
#endif
333
return 0;
334
}
335
336
static SLJIT_INLINE unsigned long get_hwcap()
337
{
338
static unsigned long hwcap = 0;
339
if (SLJIT_UNLIKELY(!hwcap)) {
340
hwcap = getauxval(AT_HWCAP);
341
SLJIT_ASSERT(hwcap != 0);
342
}
343
return hwcap;
344
}
345
346
static SLJIT_INLINE int have_stfle()
347
{
348
if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))
349
return 1;
350
351
return (get_hwcap() & HWCAP_S390_STFLE);
352
}
353
354
/* Report whether the given facility is available. This function always
355
performs a runtime check. */
356
static int have_facility_dynamic(facility_bit x)
357
{
358
#if ENABLE_DYNAMIC_FACILITY_DETECTION
359
static struct {
360
sljit_uw bits[4];
361
} cpu_features;
362
size_t size = sizeof(cpu_features);
363
const sljit_uw word_index = x >> 6;
364
const sljit_uw bit_index = ((1UL << 63) >> (x & 63));
365
366
SLJIT_ASSERT(x < size * 8);
367
if (SLJIT_UNLIKELY(!have_stfle()))
368
return 0;
369
370
if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {
371
__asm__ __volatile__ (
372
"lgr %%r0, %0;"
373
"stfle 0(%1);"
374
/* outputs */:
375
/* inputs */: "d" ((size / 8) - 1), "a" (&cpu_features)
376
/* clobbers */: "r0", "cc", "memory"
377
);
378
SLJIT_ASSERT(cpu_features.bits[0] != 0);
379
}
380
return (cpu_features.bits[word_index] & bit_index) != 0;
381
#else
382
return 0;
383
#endif
384
}
385
386
#define HAVE_FACILITY(name, bit) \
387
static SLJIT_INLINE int name() \
388
{ \
389
static int have = -1; \
390
/* Static check first. May allow the function to be optimized away. */ \
391
if (have_facility_static(bit)) \
392
have = 1; \
393
else if (SLJIT_UNLIKELY(have < 0)) \
394
have = have_facility_dynamic(bit) ? 1 : 0; \
395
\
396
return have; \
397
}
398
399
HAVE_FACILITY(have_eimm, EXTENDED_IMMEDIATE_FACILITY)
400
HAVE_FACILITY(have_ldisp, FAST_LONG_DISPLACEMENT_FACILITY)
401
HAVE_FACILITY(have_genext, GENERAL_INSTRUCTION_EXTENSION_FACILITY)
402
HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)
403
HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)
404
HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
405
#undef HAVE_FACILITY
406
407
#define is_u12(d) (0 <= (d) && (d) <= 0x00000fffL)
408
#define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL)
409
410
#define CHECK_SIGNED(v, bitlen) \
411
((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1)))
412
413
#define is_s8(d) CHECK_SIGNED((d), 8)
414
#define is_s16(d) CHECK_SIGNED((d), 16)
415
#define is_s20(d) CHECK_SIGNED((d), 20)
416
#define is_s32(d) ((d) == (sljit_s32)(d))
417
418
static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)
419
{
420
sljit_uw dh, dl;
421
422
SLJIT_ASSERT(is_s20(d));
423
424
dh = (d >> 12) & 0xff;
425
dl = ((sljit_uw)d << 8) & 0xfff00;
426
return (dh | dl) << 8;
427
}
428
429
/* TODO(carenas): variadic macro is not strictly needed */
430
#define SLJIT_S390X_INSTRUCTION(op, ...) \
431
static SLJIT_INLINE sljit_ins op(__VA_ARGS__)
432
433
/* RR form instructions. */
434
#define SLJIT_S390X_RR(name, pattern) \
435
SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
436
{ \
437
return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
438
}
439
440
/* AND */
441
SLJIT_S390X_RR(nr, 0x1400)
442
443
/* BRANCH AND SAVE */
444
SLJIT_S390X_RR(basr, 0x0d00)
445
446
/* BRANCH ON CONDITION */
447
SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */
448
449
/* DIVIDE */
450
SLJIT_S390X_RR(dr, 0x1d00)
451
452
/* EXCLUSIVE OR */
453
SLJIT_S390X_RR(xr, 0x1700)
454
455
/* LOAD */
456
SLJIT_S390X_RR(lr, 0x1800)
457
458
/* LOAD COMPLEMENT */
459
SLJIT_S390X_RR(lcr, 0x1300)
460
461
/* OR */
462
SLJIT_S390X_RR(or, 0x1600)
463
464
#undef SLJIT_S390X_RR
465
466
/* RRE form instructions */
467
#define SLJIT_S390X_RRE(name, pattern) \
468
SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
469
{ \
470
return (pattern) | R4A(dst) | R0A(src); \
471
}
472
473
/* AND */
474
SLJIT_S390X_RRE(ngr, 0xb9800000)
475
476
/* DIVIDE LOGICAL */
477
SLJIT_S390X_RRE(dlr, 0xb9970000)
478
SLJIT_S390X_RRE(dlgr, 0xb9870000)
479
480
/* DIVIDE SINGLE */
481
SLJIT_S390X_RRE(dsgr, 0xb90d0000)
482
483
/* EXCLUSIVE OR */
484
SLJIT_S390X_RRE(xgr, 0xb9820000)
485
486
/* LOAD */
487
SLJIT_S390X_RRE(lgr, 0xb9040000)
488
SLJIT_S390X_RRE(lgfr, 0xb9140000)
489
490
/* LOAD BYTE */
491
SLJIT_S390X_RRE(lbr, 0xb9260000)
492
SLJIT_S390X_RRE(lgbr, 0xb9060000)
493
494
/* LOAD COMPLEMENT */
495
SLJIT_S390X_RRE(lcgr, 0xb9030000)
496
497
/* LOAD HALFWORD */
498
SLJIT_S390X_RRE(lhr, 0xb9270000)
499
SLJIT_S390X_RRE(lghr, 0xb9070000)
500
501
/* LOAD LOGICAL */
502
SLJIT_S390X_RRE(llgfr, 0xb9160000)
503
504
/* LOAD LOGICAL CHARACTER */
505
SLJIT_S390X_RRE(llcr, 0xb9940000)
506
SLJIT_S390X_RRE(llgcr, 0xb9840000)
507
508
/* LOAD LOGICAL HALFWORD */
509
SLJIT_S390X_RRE(llhr, 0xb9950000)
510
SLJIT_S390X_RRE(llghr, 0xb9850000)
511
512
/* MULTIPLY LOGICAL */
513
SLJIT_S390X_RRE(mlgr, 0xb9860000)
514
515
/* MULTIPLY SINGLE */
516
SLJIT_S390X_RRE(msgfr, 0xb91c0000)
517
518
/* OR */
519
SLJIT_S390X_RRE(ogr, 0xb9810000)
520
521
/* SUBTRACT */
522
SLJIT_S390X_RRE(sgr, 0xb9090000)
523
524
#undef SLJIT_S390X_RRE
525
526
/* RI-a form instructions */
527
#define SLJIT_S390X_RIA(name, pattern, imm_type) \
528
SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
529
{ \
530
return (pattern) | R20A(reg) | (imm & 0xffff); \
531
}
532
533
/* ADD HALFWORD IMMEDIATE */
534
SLJIT_S390X_RIA(aghi, 0xa70b0000, sljit_s16)
535
536
/* LOAD HALFWORD IMMEDIATE */
537
SLJIT_S390X_RIA(lhi, 0xa7080000, sljit_s16)
538
SLJIT_S390X_RIA(lghi, 0xa7090000, sljit_s16)
539
540
/* LOAD LOGICAL IMMEDIATE */
541
SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)
542
SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)
543
SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)
544
SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)
545
546
/* MULTIPLY HALFWORD IMMEDIATE */
547
SLJIT_S390X_RIA(mhi, 0xa70c0000, sljit_s16)
548
SLJIT_S390X_RIA(mghi, 0xa70d0000, sljit_s16)
549
550
/* OR IMMEDIATE */
551
SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16)
552
553
#undef SLJIT_S390X_RIA
554
555
/* RIL-a form instructions (requires extended immediate facility) */
556
#define SLJIT_S390X_RILA(name, pattern, imm_type) \
557
SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
558
{ \
559
SLJIT_ASSERT(have_eimm()); \
560
return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \
561
}
562
563
/* ADD IMMEDIATE */
564
SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32)
565
566
/* ADD IMMEDIATE HIGH */
567
SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */
568
569
/* AND IMMEDIATE */
570
SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32)
571
572
/* EXCLUSIVE OR IMMEDIATE */
573
SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32)
574
575
/* INSERT IMMEDIATE */
576
SLJIT_S390X_RILA(iihf, 0xc00800000000, sljit_u32)
577
SLJIT_S390X_RILA(iilf, 0xc00900000000, sljit_u32)
578
579
/* LOAD IMMEDIATE */
580
SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32)
581
582
/* LOAD LOGICAL IMMEDIATE */
583
SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)
584
SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)
585
586
/* SUBTRACT LOGICAL IMMEDIATE */
587
SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32)
588
589
#undef SLJIT_S390X_RILA
590
591
/* RX-a form instructions */
592
#define SLJIT_S390X_RXA(name, pattern) \
593
SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
594
{ \
595
SLJIT_ASSERT((d & 0xfff) == d); \
596
\
597
return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \
598
}
599
600
/* LOAD */
601
SLJIT_S390X_RXA(l, 0x58000000)
602
603
/* LOAD ADDRESS */
604
SLJIT_S390X_RXA(la, 0x41000000)
605
606
/* LOAD HALFWORD */
607
SLJIT_S390X_RXA(lh, 0x48000000)
608
609
/* MULTIPLY SINGLE */
610
SLJIT_S390X_RXA(ms, 0x71000000)
611
612
/* STORE */
613
SLJIT_S390X_RXA(st, 0x50000000)
614
615
/* STORE CHARACTER */
616
SLJIT_S390X_RXA(stc, 0x42000000)
617
618
/* STORE HALFWORD */
619
SLJIT_S390X_RXA(sth, 0x40000000)
620
621
#undef SLJIT_S390X_RXA
622
623
/* RXY-a instructions */
624
#define SLJIT_S390X_RXYA(name, pattern, cond) \
625
SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
626
{ \
627
SLJIT_ASSERT(cond); \
628
\
629
return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \
630
}
631
632
/* LOAD */
633
SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp())
634
SLJIT_S390X_RXYA(lg, 0xe30000000004, 1)
635
SLJIT_S390X_RXYA(lgf, 0xe30000000014, 1)
636
637
/* LOAD BYTE */
638
SLJIT_S390X_RXYA(lb, 0xe30000000076, have_ldisp())
639
SLJIT_S390X_RXYA(lgb, 0xe30000000077, have_ldisp())
640
641
/* LOAD HALFWORD */
642
SLJIT_S390X_RXYA(lhy, 0xe30000000078, have_ldisp())
643
SLJIT_S390X_RXYA(lgh, 0xe30000000015, 1)
644
645
/* LOAD LOGICAL */
646
SLJIT_S390X_RXYA(llgf, 0xe30000000016, 1)
647
648
/* LOAD LOGICAL CHARACTER */
649
SLJIT_S390X_RXYA(llc, 0xe30000000094, have_eimm())
650
SLJIT_S390X_RXYA(llgc, 0xe30000000090, 1)
651
652
/* LOAD LOGICAL HALFWORD */
653
SLJIT_S390X_RXYA(llh, 0xe30000000095, have_eimm())
654
SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1)
655
656
/* MULTIPLY SINGLE */
657
SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp())
658
SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1)
659
660
/* STORE */
661
SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp())
662
SLJIT_S390X_RXYA(stg, 0xe30000000024, 1)
663
664
/* STORE CHARACTER */
665
SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp())
666
667
/* STORE HALFWORD */
668
SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp())
669
670
#undef SLJIT_S390X_RXYA
671
672
/* RSY-a instructions */
673
#define SLJIT_S390X_RSYA(name, pattern, cond) \
674
SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \
675
{ \
676
SLJIT_ASSERT(cond); \
677
\
678
return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \
679
}
680
681
/* LOAD MULTIPLE */
682
SLJIT_S390X_RSYA(lmg, 0xeb0000000004, 1)
683
684
/* SHIFT LEFT LOGICAL */
685
SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1)
686
687
/* SHIFT RIGHT SINGLE */
688
SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1)
689
690
/* STORE MULTIPLE */
691
SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1)
692
693
#undef SLJIT_S390X_RSYA
694
695
/* RIE-f instructions (require general-instructions-extension facility) */
696
#define SLJIT_S390X_RIEF(name, pattern) \
697
SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \
698
{ \
699
sljit_ins i3, i4, i5; \
700
\
701
SLJIT_ASSERT(have_genext()); \
702
i3 = (sljit_ins)start << 24; \
703
i4 = (sljit_ins)end << 16; \
704
i5 = (sljit_ins)rot << 8; \
705
\
706
return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \
707
}
708
709
/* ROTATE THEN AND SELECTED BITS */
710
/* SLJIT_S390X_RIEF(rnsbg, 0xec0000000054) */
711
712
/* ROTATE THEN EXCLUSIVE OR SELECTED BITS */
713
/* SLJIT_S390X_RIEF(rxsbg, 0xec0000000057) */
714
715
/* ROTATE THEN OR SELECTED BITS */
716
SLJIT_S390X_RIEF(rosbg, 0xec0000000056)
717
718
/* ROTATE THEN INSERT SELECTED BITS */
719
/* SLJIT_S390X_RIEF(risbg, 0xec0000000055) */
720
/* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */
721
722
/* ROTATE THEN INSERT SELECTED BITS HIGH */
723
SLJIT_S390X_RIEF(risbhg, 0xec000000005d)
724
725
/* ROTATE THEN INSERT SELECTED BITS LOW */
726
/* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */
727
728
#undef SLJIT_S390X_RIEF
729
730
/* RRF-c instructions (require load/store-on-condition 1 facility) */
731
#define SLJIT_S390X_RRFC(name, pattern) \
732
SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \
733
{ \
734
sljit_ins m3; \
735
\
736
SLJIT_ASSERT(have_lscond1()); \
737
m3 = (sljit_ins)(mask & 0xf) << 12; \
738
\
739
return (pattern) | m3 | R4A(dst) | R0A(src); \
740
}
741
742
/* LOAD HALFWORD IMMEDIATE ON CONDITION */
743
SLJIT_S390X_RRFC(locr, 0xb9f20000)
744
SLJIT_S390X_RRFC(locgr, 0xb9e20000)
745
746
#undef SLJIT_S390X_RRFC
747
748
/* RIE-g instructions (require load/store-on-condition 2 facility) */
749
#define SLJIT_S390X_RIEG(name, pattern) \
750
SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \
751
{ \
752
sljit_ins m3, i2; \
753
\
754
SLJIT_ASSERT(have_lscond2()); \
755
m3 = (sljit_ins)(mask & 0xf) << 32; \
756
i2 = (sljit_ins)(imm & 0xffffL) << 16; \
757
\
758
return (pattern) | R36A(reg) | m3 | i2; \
759
}
760
761
/* LOAD HALFWORD IMMEDIATE ON CONDITION */
762
SLJIT_S390X_RIEG(lochi, 0xec0000000042)
763
SLJIT_S390X_RIEG(locghi, 0xec0000000046)
764
765
#undef SLJIT_S390X_RIEG
766
767
#define SLJIT_S390X_RILB(name, pattern, cond) \
768
SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \
769
{ \
770
SLJIT_ASSERT(cond); \
771
\
772
return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \
773
}
774
775
/* BRANCH RELATIVE AND SAVE LONG */
776
SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)
777
778
/* LOAD ADDRESS RELATIVE LONG */
779
SLJIT_S390X_RILB(larl, 0xc00000000000, 1)
780
781
/* LOAD RELATIVE LONG */
782
SLJIT_S390X_RILB(lgrl, 0xc40800000000, have_genext())
783
784
#undef SLJIT_S390X_RILB
785
786
SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)
787
{
788
return 0x07f0 | target;
789
}
790
791
SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target)
792
{
793
sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20;
794
sljit_ins ri2 = (sljit_ins)target & 0xffff;
795
return 0xa7040000L | m1 | ri2;
796
}
797
798
SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)
799
{
800
sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;
801
sljit_ins ri2 = (sljit_ins)target & 0xffffffff;
802
return 0xc00400000000L | m1 | ri2;
803
}
804
805
SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)
806
{
807
SLJIT_ASSERT(have_eimm());
808
return 0xb9830000 | R8A(dst) | R0A(src);
809
}
810
811
/* INSERT PROGRAM MASK */
812
SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)
813
{
814
return 0xb2220000 | R4A(dst);
815
}
816
817
/* SET PROGRAM MASK */
818
SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst)
819
{
820
return 0x0400 | R4A(dst);
821
}
822
823
/* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */
824
SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)
825
{
826
return risbhg(dst, src, start, 0x8 | end, rot);
827
}
828
829
#undef SLJIT_S390X_INSTRUCTION
830
831
static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r)
832
{
833
/* Condition codes: bits 18 and 19.
834
Transformation:
835
0 (zero and no overflow) : unchanged
836
1 (non-zero and no overflow) : unchanged
837
2 (zero and overflow) : decreased by 1
838
3 (non-zero and overflow) : decreased by 1 if non-zero */
839
FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1)));
840
FAIL_IF(push_inst(compiler, ipm(tmp1)));
841
FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)));
842
FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3)));
843
FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000)));
844
FAIL_IF(push_inst(compiler, spm(tmp1)));
845
return SLJIT_SUCCESS;
846
}
847
848
/* load 64-bit immediate into register without clobbering flags */
849
static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)
850
{
851
/* 4 byte instructions */
852
if (is_s16(v))
853
return push_inst(compiler, lghi(target, (sljit_s16)v));
854
855
if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0)
856
return push_inst(compiler, llill(target, (sljit_u16)v));
857
858
if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0)
859
return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));
860
861
if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0)
862
return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));
863
864
if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)
865
return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
866
867
if (is_s32(v))
868
return push_inst(compiler, lgfi(target, (sljit_s32)v));
869
870
if (((sljit_uw)v >> 32) == 0)
871
return push_inst(compiler, llilf(target, (sljit_u32)v));
872
873
if (((sljit_uw)v << 32) == 0)
874
return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
875
876
FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
877
return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
878
}
879
880
struct addr {
881
sljit_gpr base;
882
sljit_gpr index;
883
sljit_s32 offset;
884
};
885
886
/* transform memory operand into D(X,B) form with a signed 20-bit offset */
887
static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,
888
struct addr *addr, sljit_s32 mem, sljit_sw off,
889
sljit_gpr tmp /* clobbered, must not be r0 */)
890
{
891
sljit_gpr base = r0;
892
sljit_gpr index = r0;
893
894
SLJIT_ASSERT(tmp != r0);
895
if (mem & REG_MASK)
896
base = gpr(mem & REG_MASK);
897
898
if (mem & OFFS_REG_MASK) {
899
index = gpr(OFFS_REG(mem));
900
if (off != 0) {
901
/* shift and put the result into tmp */
902
SLJIT_ASSERT(0 <= off && off < 64);
903
FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
904
index = tmp;
905
off = 0; /* clear offset */
906
}
907
}
908
else if (!is_s20(off)) {
909
FAIL_IF(push_load_imm_inst(compiler, tmp, off));
910
index = tmp;
911
off = 0; /* clear offset */
912
}
913
addr->base = base;
914
addr->index = index;
915
addr->offset = (sljit_s32)off;
916
return SLJIT_SUCCESS;
917
}
918
919
/* transform memory operand into D(X,B) form with an unsigned 12-bit offset */
920
static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
921
struct addr *addr, sljit_s32 mem, sljit_sw off,
922
sljit_gpr tmp /* clobbered, must not be r0 */)
923
{
924
sljit_gpr base = r0;
925
sljit_gpr index = r0;
926
927
SLJIT_ASSERT(tmp != r0);
928
if (mem & REG_MASK)
929
base = gpr(mem & REG_MASK);
930
931
if (mem & OFFS_REG_MASK) {
932
index = gpr(OFFS_REG(mem));
933
if (off != 0) {
934
/* shift and put the result into tmp */
935
SLJIT_ASSERT(0 <= off && off < 64);
936
FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
937
index = tmp;
938
off = 0; /* clear offset */
939
}
940
}
941
else if (!is_u12(off)) {
942
FAIL_IF(push_load_imm_inst(compiler, tmp, off));
943
index = tmp;
944
off = 0; /* clear offset */
945
}
946
addr->base = base;
947
addr->index = index;
948
addr->offset = (sljit_s32)off;
949
return SLJIT_SUCCESS;
950
}
951
952
#define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)
953
#define WHEN(cond, r, i1, i2, addr) \
954
(cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
955
956
/* May clobber tmp1. */
957
static sljit_s32 load_store_op(struct sljit_compiler *compiler, sljit_gpr reg,
958
sljit_s32 mem, sljit_sw memw,
959
sljit_s32 is_32bit, const sljit_ins* forms)
960
{
961
struct addr addr;
962
963
SLJIT_ASSERT(mem & SLJIT_MEM);
964
965
if (is_32bit && ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw))) {
966
FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
967
return push_inst(compiler, forms[0] | R20A(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
968
}
969
970
FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
971
return push_inst(compiler, (is_32bit ? forms[1] : forms[2]) | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
972
}
973
974
static const sljit_ins load_forms[3] = {
975
0x58000000 /* l */,
976
0xe30000000058 /* ly */,
977
0xe30000000004 /* lg */
978
};
979
980
static const sljit_ins store_forms[3] = {
981
0x50000000 /* st */,
982
0xe30000000050 /* sty */,
983
0xe30000000024 /* stg */
984
};
985
986
static const sljit_ins load_halfword_forms[3] = {
987
0x48000000 /* lh */,
988
0xe30000000078 /* lhy */,
989
0xe30000000015 /* lgh */
990
};
991
992
/* May clobber tmp1. */
993
static SLJIT_INLINE sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
994
sljit_s32 src, sljit_sw srcw,
995
sljit_s32 is_32bit)
996
{
997
return load_store_op(compiler, dst_r, src, srcw, is_32bit, load_forms);
998
}
999
1000
/* May clobber tmp1. */
1001
static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
1002
sljit_s32 src, sljit_sw srcw,
1003
sljit_s32 is_32bit)
1004
{
1005
struct addr addr;
1006
sljit_ins ins;
1007
1008
SLJIT_ASSERT(src & SLJIT_MEM);
1009
1010
FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
1011
1012
ins = is_32bit ? 0xe30000000016 /* llgf */ : 0xe30000000004 /* lg */;
1013
return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
1014
}
1015
1016
/* May clobber tmp1. */
1017
static SLJIT_INLINE sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,
1018
sljit_s32 dst, sljit_sw dstw,
1019
sljit_s32 is_32bit)
1020
{
1021
return load_store_op(compiler, src_r, dst, dstw, is_32bit, store_forms);
1022
}
1023
1024
#undef WHEN
1025
1026
static sljit_s32 emit_move(struct sljit_compiler *compiler,
1027
sljit_gpr dst_r,
1028
sljit_s32 src, sljit_sw srcw)
1029
{
1030
sljit_gpr src_r;
1031
1032
SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK));
1033
1034
if (src == SLJIT_IMM)
1035
return push_load_imm_inst(compiler, dst_r, srcw);
1036
1037
if (src & SLJIT_MEM)
1038
return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);
1039
1040
src_r = gpr(src & REG_MASK);
1041
return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
1042
}
1043
1044
static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
1045
sljit_s32 dst,
1046
sljit_s32 src1, sljit_sw src1w,
1047
sljit_s32 src2, sljit_sw src2w)
1048
{
1049
sljit_gpr dst_r = tmp0;
1050
sljit_gpr src_r = tmp1;
1051
sljit_s32 needs_move = 1;
1052
1053
if (FAST_IS_REG(dst)) {
1054
dst_r = gpr(dst);
1055
1056
if (dst == src1)
1057
needs_move = 0;
1058
else if (dst == src2) {
1059
dst_r = tmp0;
1060
needs_move = 2;
1061
}
1062
}
1063
1064
if (needs_move)
1065
FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1066
1067
if (FAST_IS_REG(src2))
1068
src_r = gpr(src2);
1069
else
1070
FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1071
1072
FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)));
1073
1074
if (needs_move != 2)
1075
return SLJIT_SUCCESS;
1076
1077
dst_r = gpr(dst & REG_MASK);
1078
return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1079
}
1080
1081
static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins,
1082
sljit_s32 dst,
1083
sljit_s32 src1, sljit_sw src1w)
1084
{
1085
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1086
sljit_gpr src_r = tmp1;
1087
1088
if (FAST_IS_REG(src1))
1089
src_r = gpr(src1);
1090
else
1091
FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
1092
1093
return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r));
1094
}
1095
1096
static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,
1097
sljit_s32 dst,
1098
sljit_s32 src1, sljit_sw src1w,
1099
sljit_s32 src2, sljit_sw src2w)
1100
{
1101
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1102
sljit_gpr src1_r = tmp0;
1103
sljit_gpr src2_r = tmp1;
1104
1105
if (FAST_IS_REG(src1))
1106
src1_r = gpr(src1);
1107
else
1108
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1109
1110
if (FAST_IS_REG(src2))
1111
src2_r = gpr(src2);
1112
else
1113
FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1114
1115
return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r));
1116
}
1117
1118
typedef enum {
1119
RI_A,
1120
RIL_A,
1121
} emit_ril_type;
1122
1123
static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,
1124
sljit_s32 dst,
1125
sljit_s32 src1, sljit_sw src1w,
1126
sljit_sw src2w,
1127
emit_ril_type type)
1128
{
1129
sljit_gpr dst_r = tmp0;
1130
sljit_s32 needs_move = 1;
1131
1132
if (FAST_IS_REG(dst)) {
1133
dst_r = gpr(dst);
1134
1135
if (dst == src1)
1136
needs_move = 0;
1137
}
1138
1139
if (needs_move)
1140
FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1141
1142
if (type == RIL_A)
1143
return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff));
1144
return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff));
1145
}
1146
1147
static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,
1148
sljit_s32 dst,
1149
sljit_s32 src1, sljit_sw src1w,
1150
sljit_sw src2w)
1151
{
1152
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1153
sljit_gpr src_r = tmp0;
1154
1155
if (!FAST_IS_REG(src1))
1156
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1157
else
1158
src_r = gpr(src1 & REG_MASK);
1159
1160
return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16);
1161
}
1162
1163
typedef enum {
1164
RX_A,
1165
RXY_A,
1166
} emit_rx_type;
1167
1168
static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,
1169
sljit_s32 dst,
1170
sljit_s32 src1, sljit_sw src1w,
1171
sljit_s32 src2, sljit_sw src2w,
1172
emit_rx_type type)
1173
{
1174
sljit_gpr dst_r = tmp0;
1175
sljit_s32 needs_move = 1;
1176
sljit_gpr base, index;
1177
1178
SLJIT_ASSERT(src2 & SLJIT_MEM);
1179
1180
if (FAST_IS_REG(dst)) {
1181
dst_r = gpr(dst);
1182
1183
if (dst == src1)
1184
needs_move = 0;
1185
else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) {
1186
dst_r = tmp0;
1187
needs_move = 2;
1188
}
1189
}
1190
1191
if (needs_move)
1192
FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1193
1194
base = gpr(src2 & REG_MASK);
1195
index = tmp0;
1196
1197
if (src2 & OFFS_REG_MASK) {
1198
index = gpr(OFFS_REG(src2));
1199
1200
if (src2w != 0) {
1201
FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0)));
1202
src2w = 0;
1203
index = tmp1;
1204
}
1205
} else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) {
1206
FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w));
1207
1208
if (src2 & REG_MASK)
1209
index = tmp1;
1210
else
1211
base = tmp1;
1212
src2w = 0;
1213
}
1214
1215
if (type == RX_A)
1216
ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w;
1217
else
1218
ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w);
1219
1220
FAIL_IF(push_inst(compiler, ins));
1221
1222
if (needs_move != 2)
1223
return SLJIT_SUCCESS;
1224
1225
dst_r = gpr(dst);
1226
return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1227
}
1228
1229
static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,
1230
sljit_s32 dst, sljit_sw dstw,
1231
sljit_sw srcw)
1232
{
1233
sljit_gpr dst_r = tmp1;
1234
1235
SLJIT_ASSERT(dst & SLJIT_MEM);
1236
1237
if (dst & OFFS_REG_MASK) {
1238
sljit_gpr index = tmp1;
1239
1240
if ((dstw & 0x3) == 0)
1241
index = gpr(OFFS_REG(dst));
1242
else
1243
FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0)));
1244
1245
FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index)));
1246
dstw = 0;
1247
}
1248
else if (!is_s20(dstw)) {
1249
FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw));
1250
1251
if (dst & REG_MASK)
1252
FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1)));
1253
1254
dstw = 0;
1255
}
1256
else
1257
dst_r = gpr(dst & REG_MASK);
1258
1259
return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw));
1260
}
1261
1262
struct ins_forms {
1263
sljit_ins op_r;
1264
sljit_ins op_gr;
1265
sljit_ins op_rk;
1266
sljit_ins op_grk;
1267
sljit_ins op;
1268
sljit_ins op_y;
1269
sljit_ins op_g;
1270
};
1271
1272
static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1273
sljit_s32 dst,
1274
sljit_s32 src1, sljit_sw src1w,
1275
sljit_s32 src2, sljit_sw src2w)
1276
{
1277
sljit_s32 mode = compiler->mode;
1278
sljit_ins ins, ins_k;
1279
1280
if ((src1 | src2) & SLJIT_MEM) {
1281
sljit_ins ins12, ins20;
1282
1283
if (mode & SLJIT_32) {
1284
ins12 = forms->op;
1285
ins20 = forms->op_y;
1286
}
1287
else {
1288
ins12 = 0;
1289
ins20 = forms->op_g;
1290
}
1291
1292
if (ins12 && ins20) {
1293
/* Extra instructions needed for address computation can be executed independently. */
1294
if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1295
|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) {
1296
if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1297
return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1298
1299
return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1300
}
1301
1302
if (src1 & SLJIT_MEM) {
1303
if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w))
1304
return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A);
1305
1306
return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A);
1307
}
1308
}
1309
else if (ins12 || ins20) {
1310
emit_rx_type rx_type;
1311
1312
if (ins12) {
1313
rx_type = RX_A;
1314
ins = ins12;
1315
}
1316
else {
1317
rx_type = RXY_A;
1318
ins = ins20;
1319
}
1320
1321
if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1322
|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w)))))
1323
return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type);
1324
1325
if (src1 & SLJIT_MEM)
1326
return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type);
1327
}
1328
}
1329
1330
if (mode & SLJIT_32) {
1331
ins = forms->op_r;
1332
ins_k = forms->op_rk;
1333
}
1334
else {
1335
ins = forms->op_gr;
1336
ins_k = forms->op_grk;
1337
}
1338
1339
SLJIT_ASSERT(ins != 0 || ins_k != 0);
1340
1341
if (ins && FAST_IS_REG(dst)) {
1342
if (dst == src1)
1343
return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1344
1345
if (dst == src2)
1346
return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w);
1347
}
1348
1349
if (ins_k == 0)
1350
return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1351
1352
return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w);
1353
}
1354
1355
static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1356
sljit_s32 dst,
1357
sljit_s32 src1, sljit_sw src1w,
1358
sljit_s32 src2, sljit_sw src2w)
1359
{
1360
sljit_s32 mode = compiler->mode;
1361
sljit_ins ins;
1362
1363
if (src2 & SLJIT_MEM) {
1364
sljit_ins ins12, ins20;
1365
1366
if (mode & SLJIT_32) {
1367
ins12 = forms->op;
1368
ins20 = forms->op_y;
1369
}
1370
else {
1371
ins12 = 0;
1372
ins20 = forms->op_g;
1373
}
1374
1375
if (ins12 && ins20) {
1376
if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1377
return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1378
1379
return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1380
}
1381
else if (ins12)
1382
return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1383
else if (ins20)
1384
return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1385
}
1386
1387
ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk;
1388
1389
if (ins == 0 || (FAST_IS_REG(dst) && dst == src1))
1390
return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w);
1391
1392
return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);
1393
}
1394
1395
SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
1396
{
1397
struct sljit_label *label;
1398
struct sljit_jump *jump;
1399
struct sljit_const *const_;
1400
sljit_sw executable_offset;
1401
sljit_uw ins_size = compiler->size << 1;
1402
sljit_uw pool_size = 0; /* literal pool */
1403
sljit_uw pad_size;
1404
sljit_uw half_count;
1405
SLJIT_NEXT_DEFINE_TYPES;
1406
struct sljit_memory_fragment *buf;
1407
sljit_ins *buf_ptr;
1408
sljit_ins *buf_end;
1409
sljit_u16 *code;
1410
sljit_u16 *code_ptr;
1411
sljit_uw *pool, *pool_ptr;
1412
sljit_ins ins;
1413
sljit_sw source, offset;
1414
1415
CHECK_ERROR_PTR();
1416
CHECK_PTR(check_sljit_generate_code(compiler));
1417
reverse_buf(compiler);
1418
1419
jump = compiler->jumps;
1420
while (jump != NULL) {
1421
if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) {
1422
/* encoded: */
1423
/* brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
1424
/* replace with: */
1425
/* lgrl %r1, <pool_addr> */
1426
/* bras %r14, %r1 (or bcr <mask>, %r1) */
1427
pool_size += sizeof(*pool);
1428
if (!(jump->flags & JUMP_MOV_ADDR))
1429
ins_size += 2;
1430
}
1431
jump = jump->next;
1432
}
1433
1434
const_ = compiler->consts;
1435
while (const_) {
1436
pool_size += sizeof(*pool);
1437
const_ = const_->next;
1438
}
1439
1440
/* pad code size to 8 bytes so is accessible with half word offsets */
1441
/* the literal pool needs to be doubleword aligned */
1442
pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
1443
SLJIT_ASSERT(pad_size < 8UL);
1444
1445
/* allocate target buffer */
1446
code = (sljit_u16*)allocate_executable_memory(ins_size + pad_size + pool_size, options, exec_allocator_data, &executable_offset);
1447
PTR_FAIL_WITH_EXEC_IF(code);
1448
code_ptr = code;
1449
1450
/* TODO(carenas): pool is optional, and the ABI recommends it to
1451
* be created before the function code, instead of
1452
* globally; if generated code is too big could
1453
* need offsets bigger than 32bit words and asser()
1454
*/
1455
pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
1456
pool_ptr = pool;
1457
buf = compiler->buf;
1458
half_count = 0;
1459
1460
label = compiler->labels;
1461
jump = compiler->jumps;
1462
const_ = compiler->consts;
1463
SLJIT_NEXT_INIT_TYPES();
1464
SLJIT_GET_NEXT_MIN();
1465
1466
do {
1467
buf_ptr = (sljit_ins*)buf->memory;
1468
buf_end = buf_ptr + (buf->used_size >> 3);
1469
do {
1470
ins = *buf_ptr++;
1471
1472
if (next_min_addr == half_count) {
1473
SLJIT_ASSERT(!label || label->size >= half_count);
1474
SLJIT_ASSERT(!jump || jump->addr >= half_count);
1475
SLJIT_ASSERT(!const_ || const_->addr >= half_count);
1476
1477
if (next_min_addr == next_label_size) {
1478
label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1479
label = label->next;
1480
next_label_size = SLJIT_GET_NEXT_SIZE(label);
1481
}
1482
1483
if (next_min_addr == next_jump_addr) {
1484
if (SLJIT_UNLIKELY(jump->flags & JUMP_MOV_ADDR)) {
1485
source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1486
1487
jump->addr = (sljit_uw)pool_ptr;
1488
1489
/* store target into pool */
1490
offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1491
pool_ptr++;
1492
1493
SLJIT_ASSERT(!(offset & 1));
1494
offset >>= 1;
1495
SLJIT_ASSERT(is_s32(offset));
1496
ins |= (sljit_ins)offset & 0xffffffff;
1497
} else if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR)) {
1498
sljit_ins arg;
1499
1500
jump->addr = (sljit_uw)pool_ptr;
1501
1502
/* load address into tmp1 */
1503
source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1504
offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1505
1506
SLJIT_ASSERT(!(offset & 1));
1507
offset >>= 1;
1508
SLJIT_ASSERT(is_s32(offset));
1509
1510
code_ptr[0] = (sljit_u16)(0xc408 | R4A(tmp1) /* lgrl */);
1511
code_ptr[1] = (sljit_u16)(offset >> 16);
1512
code_ptr[2] = (sljit_u16)offset;
1513
code_ptr += 3;
1514
pool_ptr++;
1515
1516
/* branch to tmp1 */
1517
arg = (ins >> 36) & 0xf;
1518
if (((ins >> 32) & 0xf) == 4) {
1519
/* brcl -> bcr */
1520
ins = bcr(arg, tmp1);
1521
} else {
1522
SLJIT_ASSERT(((ins >> 32) & 0xf) == 5);
1523
/* brasl -> basr */
1524
ins = basr(arg, tmp1);
1525
}
1526
1527
/* Adjust half_count. */
1528
half_count += 2;
1529
} else
1530
jump->addr = (sljit_uw)code_ptr;
1531
1532
jump = jump->next;
1533
next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
1534
} else if (next_min_addr == next_const_addr) {
1535
/* update instruction with relative address of constant */
1536
source = (sljit_sw)code_ptr;
1537
offset = (sljit_sw)pool_ptr - source;
1538
1539
SLJIT_ASSERT(!(offset & 0x1));
1540
offset >>= 1; /* halfword (not byte) offset */
1541
SLJIT_ASSERT(is_s32(offset));
1542
1543
ins |= (sljit_ins)offset & 0xffffffff;
1544
1545
/* update address */
1546
const_->addr = (sljit_uw)pool_ptr;
1547
1548
/* store initial value into pool and update pool address */
1549
*(pool_ptr++) = (sljit_uw)(((struct sljit_s390x_const*)const_)->init_value);
1550
1551
/* move to next constant */
1552
const_ = const_->next;
1553
next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
1554
}
1555
1556
SLJIT_GET_NEXT_MIN();
1557
}
1558
1559
if (ins & 0xffff00000000L) {
1560
*code_ptr++ = (sljit_u16)(ins >> 32);
1561
half_count++;
1562
}
1563
1564
if (ins & 0xffffffff0000L) {
1565
*code_ptr++ = (sljit_u16)(ins >> 16);
1566
half_count++;
1567
}
1568
1569
*code_ptr++ = (sljit_u16)ins;
1570
half_count++;
1571
} while (buf_ptr < buf_end);
1572
1573
buf = buf->next;
1574
} while (buf);
1575
1576
if (next_label_size == half_count) {
1577
label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1578
label = label->next;
1579
}
1580
1581
SLJIT_ASSERT(!label);
1582
SLJIT_ASSERT(!jump);
1583
SLJIT_ASSERT(!const_);
1584
SLJIT_ASSERT(code + (ins_size >> 1) == code_ptr);
1585
SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
1586
1587
jump = compiler->jumps;
1588
while (jump != NULL) {
1589
offset = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr);
1590
1591
if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) {
1592
/* Store jump target into pool. */
1593
*(sljit_uw*)(jump->addr) = (sljit_uw)offset;
1594
} else {
1595
code_ptr = (sljit_u16*)jump->addr;
1596
offset -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1597
1598
/* offset must be halfword aligned */
1599
SLJIT_ASSERT(!(offset & 1));
1600
offset >>= 1;
1601
SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
1602
1603
code_ptr[1] = (sljit_u16)(offset >> 16);
1604
code_ptr[2] = (sljit_u16)offset;
1605
}
1606
jump = jump->next;
1607
}
1608
1609
compiler->error = SLJIT_ERR_COMPILED;
1610
compiler->executable_offset = executable_offset;
1611
compiler->executable_size = ins_size;
1612
if (pool_size)
1613
compiler->executable_size += (pad_size + pool_size);
1614
1615
code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1616
code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1617
SLJIT_CACHE_FLUSH(code, code_ptr);
1618
SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1619
return code;
1620
}
1621
1622
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1623
{
1624
/* TODO(mundaym): implement all */
1625
switch (feature_type) {
1626
case SLJIT_HAS_FPU:
1627
#ifdef SLJIT_IS_FPU_AVAILABLE
1628
return (SLJIT_IS_FPU_AVAILABLE) != 0;
1629
#else
1630
return 1;
1631
#endif /* SLJIT_IS_FPU_AVAILABLE */
1632
1633
case SLJIT_HAS_CLZ:
1634
case SLJIT_HAS_REV:
1635
case SLJIT_HAS_ROT:
1636
case SLJIT_HAS_PREFETCH:
1637
case SLJIT_HAS_COPY_F32:
1638
case SLJIT_HAS_COPY_F64:
1639
case SLJIT_HAS_SIMD:
1640
case SLJIT_HAS_ATOMIC:
1641
case SLJIT_HAS_MEMORY_BARRIER:
1642
return 1;
1643
1644
case SLJIT_HAS_CTZ:
1645
return 2;
1646
1647
case SLJIT_HAS_CMOV:
1648
return have_lscond1() ? 1 : 0;
1649
}
1650
return 0;
1651
}
1652
1653
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
1654
{
1655
SLJIT_UNUSED_ARG(type);
1656
return 0;
1657
}
1658
1659
/* --------------------------------------------------------------------- */
1660
/* Entry, exit */
1661
/* --------------------------------------------------------------------- */
1662
1663
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1664
sljit_s32 options, sljit_s32 arg_types,
1665
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
1666
{
1667
sljit_s32 fscratches;
1668
sljit_s32 fsaveds;
1669
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1670
sljit_s32 offset, i, tmp;
1671
1672
CHECK_ERROR();
1673
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, local_size));
1674
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
1675
1676
/* Saved registers are stored in callee allocated save area. */
1677
SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13);
1678
1679
scratches = ENTER_GET_REGS(scratches);
1680
saveds = ENTER_GET_REGS(saveds);
1681
fscratches = compiler->fscratches;
1682
fsaveds = compiler->fsaveds;
1683
1684
offset = 2 * SSIZE_OF(sw);
1685
if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1686
if (saved_arg_count == 0) {
1687
FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15)));
1688
offset += 9 * SSIZE_OF(sw);
1689
} else {
1690
FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1691
offset += (8 - saved_arg_count) * SSIZE_OF(sw);
1692
}
1693
} else {
1694
if (scratches == SLJIT_FIRST_SAVED_REG) {
1695
FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));
1696
offset += SSIZE_OF(sw);
1697
} else if (scratches > SLJIT_FIRST_SAVED_REG) {
1698
FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1699
offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1700
}
1701
1702
if (saved_arg_count == 0) {
1703
if (saveds == 0) {
1704
FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1705
offset += SSIZE_OF(sw);
1706
} else {
1707
FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
1708
offset += (saveds + 1) * SSIZE_OF(sw);
1709
}
1710
} else if (saveds > saved_arg_count) {
1711
if (saveds == saved_arg_count + 1) {
1712
FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1713
offset += SSIZE_OF(sw);
1714
} else {
1715
FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1716
offset += (saveds - saved_arg_count) * SSIZE_OF(sw);
1717
}
1718
}
1719
}
1720
1721
if (saved_arg_count > 0) {
1722
FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1723
offset += SSIZE_OF(sw);
1724
}
1725
1726
tmp = SLJIT_FS0 - fsaveds;
1727
for (i = SLJIT_FS0; i > tmp; i--) {
1728
FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1729
offset += SSIZE_OF(sw);
1730
}
1731
1732
for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1733
FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1734
offset += SSIZE_OF(sw);
1735
}
1736
1737
local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1738
compiler->local_size = local_size;
1739
1740
if (is_s20(-local_size))
1741
FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
1742
else
1743
FAIL_IF(push_inst(compiler, 0xc20400000000 /* slgfi */ | R36A(r15) | (sljit_ins)local_size));
1744
1745
if (options & SLJIT_ENTER_REG_ARG)
1746
return SLJIT_SUCCESS;
1747
1748
arg_types >>= SLJIT_ARG_SHIFT;
1749
saved_arg_count = 0;
1750
tmp = 0;
1751
while (arg_types > 0) {
1752
if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
1753
if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1754
FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp))));
1755
saved_arg_count++;
1756
}
1757
tmp++;
1758
}
1759
1760
arg_types >>= SLJIT_ARG_SHIFT;
1761
}
1762
1763
return SLJIT_SUCCESS;
1764
}
1765
1766
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1767
sljit_s32 options, sljit_s32 arg_types,
1768
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
1769
{
1770
CHECK_ERROR();
1771
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));
1772
set_emit_enter(compiler, options, arg_types, scratches, saveds, local_size);
1773
1774
compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1775
return SLJIT_SUCCESS;
1776
}
1777
1778
static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_gpr last_reg)
1779
{
1780
sljit_s32 offset, i, tmp;
1781
sljit_s32 local_size = compiler->local_size;
1782
sljit_s32 saveds = compiler->saveds;
1783
sljit_s32 scratches = compiler->scratches;
1784
sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1785
1786
if (is_u12(local_size))
1787
FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
1788
else if (is_s20(local_size))
1789
FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));
1790
else
1791
FAIL_IF(push_inst(compiler, 0xc20a00000000 /* algfi */ | R36A(r15) | (sljit_ins)local_size));
1792
1793
offset = 2 * SSIZE_OF(sw);
1794
if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1795
if (kept_saveds_count == 0) {
1796
FAIL_IF(push_inst(compiler, lmg(r6, last_reg, offset, r15)));
1797
offset += 9 * SSIZE_OF(sw);
1798
} else {
1799
FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1800
offset += (8 - kept_saveds_count) * SSIZE_OF(sw);
1801
}
1802
} else {
1803
if (scratches == SLJIT_FIRST_SAVED_REG) {
1804
FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));
1805
offset += SSIZE_OF(sw);
1806
} else if (scratches > SLJIT_FIRST_SAVED_REG) {
1807
FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1808
offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1809
}
1810
1811
if (kept_saveds_count == 0) {
1812
if (saveds == 0) {
1813
if (last_reg == r14)
1814
FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1815
offset += SSIZE_OF(sw);
1816
} else if (saveds == 1 && last_reg == r13) {
1817
FAIL_IF(push_inst(compiler, lg(r13, offset, 0, r15)));
1818
offset += 2 * SSIZE_OF(sw);
1819
} else {
1820
FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, last_reg, offset, r15)));
1821
offset += (saveds + 1) * SSIZE_OF(sw);
1822
}
1823
} else if (saveds > kept_saveds_count) {
1824
if (saveds == kept_saveds_count + 1) {
1825
FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1826
offset += SSIZE_OF(sw);
1827
} else {
1828
FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1829
offset += (saveds - kept_saveds_count) * SSIZE_OF(sw);
1830
}
1831
}
1832
}
1833
1834
if (kept_saveds_count > 0) {
1835
if (last_reg == r14)
1836
FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1837
offset += SSIZE_OF(sw);
1838
}
1839
1840
tmp = SLJIT_FS0 - compiler->fsaveds;
1841
for (i = SLJIT_FS0; i > tmp; i--) {
1842
FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1843
offset += SSIZE_OF(sw);
1844
}
1845
1846
for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1847
FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1848
offset += SSIZE_OF(sw);
1849
}
1850
1851
return SLJIT_SUCCESS;
1852
}
1853
1854
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1855
{
1856
CHECK_ERROR();
1857
CHECK(check_sljit_emit_return_void(compiler));
1858
1859
FAIL_IF(emit_stack_frame_release(compiler, r14));
1860
return push_inst(compiler, br(r14)); /* return */
1861
}
1862
1863
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1864
sljit_s32 src, sljit_sw srcw)
1865
{
1866
CHECK_ERROR();
1867
CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1868
1869
if (src & SLJIT_MEM) {
1870
ADJUST_LOCAL_OFFSET(src, srcw);
1871
FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
1872
src = TMP_REG2;
1873
srcw = 0;
1874
} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1875
FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
1876
src = TMP_REG2;
1877
srcw = 0;
1878
}
1879
1880
FAIL_IF(emit_stack_frame_release(compiler, r13));
1881
1882
SLJIT_SKIP_CHECKS(compiler);
1883
return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1884
}
1885
1886
/* --------------------------------------------------------------------- */
1887
/* Operators */
1888
/* --------------------------------------------------------------------- */
1889
1890
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1891
{
1892
sljit_gpr arg0 = gpr(SLJIT_R0);
1893
sljit_gpr arg1 = gpr(SLJIT_R1);
1894
1895
CHECK_ERROR();
1896
CHECK(check_sljit_emit_op0(compiler, op));
1897
1898
op = GET_OPCODE(op) | (op & SLJIT_32);
1899
switch (op) {
1900
case SLJIT_BREAKPOINT:
1901
/* The following invalid instruction is emitted by gdb. */
1902
return push_inst(compiler, 0x0001 /* 2-byte trap */);
1903
case SLJIT_NOP:
1904
return push_inst(compiler, 0x0700 /* 2-byte nop */);
1905
case SLJIT_LMUL_UW:
1906
FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1907
break;
1908
case SLJIT_LMUL_SW:
1909
/* signed multiplication from: */
1910
/* Hacker's Delight, Second Edition: Chapter 8-3. */
1911
FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));
1912
FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));
1913
FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));
1914
FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));
1915
1916
/* unsigned multiplication */
1917
FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1918
1919
FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));
1920
FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));
1921
break;
1922
case SLJIT_DIV_U32:
1923
case SLJIT_DIVMOD_U32:
1924
FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1925
FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1926
FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));
1927
FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1928
if (op == SLJIT_DIVMOD_U32)
1929
return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1930
1931
return SLJIT_SUCCESS;
1932
case SLJIT_DIV_S32:
1933
case SLJIT_DIVMOD_S32:
1934
FAIL_IF(push_inst(compiler, 0xeb00000000dc /* srak */ | R36A(tmp0) | R32A(arg0) | (31 << 16)));
1935
FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1936
FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));
1937
FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1938
if (op == SLJIT_DIVMOD_S32)
1939
return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1940
1941
return SLJIT_SUCCESS;
1942
case SLJIT_DIV_UW:
1943
case SLJIT_DIVMOD_UW:
1944
FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));
1945
FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1946
FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));
1947
FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1948
if (op == SLJIT_DIVMOD_UW)
1949
return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1950
1951
return SLJIT_SUCCESS;
1952
case SLJIT_DIV_SW:
1953
case SLJIT_DIVMOD_SW:
1954
FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1955
FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));
1956
FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1957
if (op == SLJIT_DIVMOD_SW)
1958
return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1959
1960
return SLJIT_SUCCESS;
1961
case SLJIT_MEMORY_BARRIER:
1962
return push_inst(compiler, 0x0700 /* bcr */ | (0xe << 4) | 0);
1963
case SLJIT_ENDBR:
1964
return SLJIT_SUCCESS;
1965
case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1966
return SLJIT_SUCCESS;
1967
default:
1968
SLJIT_UNREACHABLE();
1969
}
1970
/* swap result registers */
1971
FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));
1972
FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));
1973
return push_inst(compiler, lgr(arg1, tmp0));
1974
}
1975
1976
static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r, sljit_gpr src_r)
1977
{
1978
sljit_s32 is_ctz = (GET_OPCODE(op) == SLJIT_CTZ);
1979
1980
if ((op & SLJIT_32) && src_r != tmp0) {
1981
FAIL_IF(push_inst(compiler, 0xb9160000 /* llgfr */ | R4A(tmp0) | R0A(src_r)));
1982
src_r = tmp0;
1983
}
1984
1985
if (is_ctz) {
1986
FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */) | R4A(tmp1) | R0A(src_r)));
1987
1988
if (src_r == tmp0)
1989
FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1400 /* nr */ : 0xb9800000 /* ngr */) | R4A(tmp0) | R0A(tmp1)));
1990
else
1991
FAIL_IF(push_inst(compiler, 0xb9e40000 /* ngrk */ | R12A(tmp1) | R4A(tmp0) | R0A(src_r)));
1992
1993
src_r = tmp0;
1994
}
1995
1996
FAIL_IF(push_inst(compiler, 0xb9830000 /* flogr */ | R4A(tmp0) | R0A(src_r)));
1997
1998
if (is_ctz)
1999
FAIL_IF(push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(tmp1) | R32A(tmp0) | ((sljit_ins)(-64 & 0xffff) << 16)));
2000
2001
if (op & SLJIT_32) {
2002
if (!is_ctz && dst_r != tmp0)
2003
return push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(dst_r) | R32A(tmp0) | ((sljit_ins)(-32 & 0xffff) << 16));
2004
2005
FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp0) | (sljit_u32)-32));
2006
}
2007
2008
if (is_ctz)
2009
FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp0) | R32A(tmp1) | ((sljit_ins)((op & SLJIT_32) ? 59 : 58) << 24) | (63 << 16) | ((sljit_ins)((op & SLJIT_32) ? 5 : 6) << 8)));
2010
2011
if (dst_r == tmp0)
2012
return SLJIT_SUCCESS;
2013
2014
return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0));
2015
}
2016
2017
static sljit_s32 sljit_emit_rev(struct sljit_compiler *compiler, sljit_s32 op,
2018
sljit_s32 dst, sljit_sw dstw,
2019
sljit_s32 src, sljit_sw srcw)
2020
{
2021
struct addr addr;
2022
sljit_gpr reg;
2023
sljit_ins ins;
2024
sljit_s32 opcode = GET_OPCODE(op);
2025
sljit_s32 is_16bit = (opcode == SLJIT_REV_U16 || opcode == SLJIT_REV_S16);
2026
2027
if (dst & SLJIT_MEM) {
2028
if (src & SLJIT_MEM) {
2029
FAIL_IF(load_store_op(compiler, tmp0, src, srcw, op & SLJIT_32, is_16bit ? load_halfword_forms : load_forms));
2030
reg = tmp0;
2031
} else
2032
reg = gpr(src);
2033
2034
FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
2035
2036
if (is_16bit)
2037
ins = 0xe3000000003f /* strvh */;
2038
else
2039
ins = (op & SLJIT_32) ? 0xe3000000003e /* strv */ : 0xe3000000002f /* strvg */;
2040
2041
return push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
2042
}
2043
2044
reg = gpr(dst);
2045
2046
if (src & SLJIT_MEM) {
2047
FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
2048
2049
if (is_16bit)
2050
ins = 0xe3000000001f /* lrvh */;
2051
else
2052
ins = (op & SLJIT_32) ? 0xe3000000001e /* lrv */ : 0xe3000000000f /* lrvg */;
2053
2054
FAIL_IF(push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)));
2055
2056
if (opcode == SLJIT_REV)
2057
return SLJIT_SUCCESS;
2058
2059
if (is_16bit) {
2060
if (op & SLJIT_32)
2061
ins = (opcode == SLJIT_REV_U16) ? 0xb9950000 /* llhr */ : 0xb9270000 /* lhr */;
2062
else
2063
ins = (opcode == SLJIT_REV_U16) ? 0xb9850000 /* llghr */ : 0xb9070000 /* lghr */;
2064
} else
2065
ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
2066
2067
return push_inst(compiler, ins | R4A(reg) | R0A(reg));
2068
}
2069
2070
ins = (op & SLJIT_32) ? 0xb91f0000 /* lrvr */ : 0xb90f0000 /* lrvgr */;
2071
FAIL_IF(push_inst(compiler, ins | R4A(reg) | R0A(gpr(src))));
2072
2073
if (opcode == SLJIT_REV)
2074
return SLJIT_SUCCESS;
2075
2076
if (!is_16bit) {
2077
ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
2078
return push_inst(compiler, ins | R4A(reg) | R0A(reg));
2079
}
2080
2081
if (op & SLJIT_32) {
2082
ins = (opcode == SLJIT_REV_U16) ? 0x88000000 /* srl */ : 0x8a000000 /* sra */;
2083
return push_inst(compiler, ins | R20A(reg) | 16);
2084
}
2085
2086
ins = (opcode == SLJIT_REV_U16) ? 0xeb000000000c /* srlg */ : 0xeb000000000a /* srag */;
2087
return push_inst(compiler, ins | R36A(reg) | R32A(reg) | (48 << 16));
2088
}
2089
2090
/* LEVAL will be defined later with different parameters as needed */
2091
#define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
2092
2093
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2094
sljit_s32 dst, sljit_sw dstw,
2095
sljit_s32 src, sljit_sw srcw)
2096
{
2097
sljit_ins ins;
2098
struct addr mem;
2099
sljit_gpr dst_r;
2100
sljit_gpr src_r;
2101
sljit_s32 opcode = GET_OPCODE(op);
2102
2103
CHECK_ERROR();
2104
CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2105
ADJUST_LOCAL_OFFSET(dst, dstw);
2106
ADJUST_LOCAL_OFFSET(src, srcw);
2107
2108
if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {
2109
/* LOAD REGISTER */
2110
if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {
2111
dst_r = gpr(dst);
2112
src_r = gpr(src);
2113
switch (opcode | (op & SLJIT_32)) {
2114
/* 32-bit */
2115
case SLJIT_MOV32_U8:
2116
ins = llcr(dst_r, src_r);
2117
break;
2118
case SLJIT_MOV32_S8:
2119
ins = lbr(dst_r, src_r);
2120
break;
2121
case SLJIT_MOV32_U16:
2122
ins = llhr(dst_r, src_r);
2123
break;
2124
case SLJIT_MOV32_S16:
2125
ins = lhr(dst_r, src_r);
2126
break;
2127
case SLJIT_MOV32:
2128
if (dst_r == src_r)
2129
return SLJIT_SUCCESS;
2130
ins = lr(dst_r, src_r);
2131
break;
2132
/* 64-bit */
2133
case SLJIT_MOV_U8:
2134
ins = llgcr(dst_r, src_r);
2135
break;
2136
case SLJIT_MOV_S8:
2137
ins = lgbr(dst_r, src_r);
2138
break;
2139
case SLJIT_MOV_U16:
2140
ins = llghr(dst_r, src_r);
2141
break;
2142
case SLJIT_MOV_S16:
2143
ins = lghr(dst_r, src_r);
2144
break;
2145
case SLJIT_MOV_U32:
2146
ins = llgfr(dst_r, src_r);
2147
break;
2148
case SLJIT_MOV_S32:
2149
ins = lgfr(dst_r, src_r);
2150
break;
2151
case SLJIT_MOV:
2152
case SLJIT_MOV_P:
2153
if (dst_r == src_r)
2154
return SLJIT_SUCCESS;
2155
ins = lgr(dst_r, src_r);
2156
break;
2157
default:
2158
ins = 0;
2159
SLJIT_UNREACHABLE();
2160
break;
2161
}
2162
FAIL_IF(push_inst(compiler, ins));
2163
return SLJIT_SUCCESS;
2164
}
2165
/* LOAD IMMEDIATE */
2166
if (FAST_IS_REG(dst) && src == SLJIT_IMM) {
2167
switch (opcode) {
2168
case SLJIT_MOV_U8:
2169
srcw = (sljit_sw)((sljit_u8)(srcw));
2170
break;
2171
case SLJIT_MOV_S8:
2172
srcw = (sljit_sw)((sljit_s8)(srcw));
2173
break;
2174
case SLJIT_MOV_U16:
2175
srcw = (sljit_sw)((sljit_u16)(srcw));
2176
break;
2177
case SLJIT_MOV_S16:
2178
srcw = (sljit_sw)((sljit_s16)(srcw));
2179
break;
2180
case SLJIT_MOV_U32:
2181
srcw = (sljit_sw)((sljit_u32)(srcw));
2182
break;
2183
case SLJIT_MOV_S32:
2184
case SLJIT_MOV32:
2185
srcw = (sljit_sw)((sljit_s32)(srcw));
2186
break;
2187
}
2188
return push_load_imm_inst(compiler, gpr(dst), srcw);
2189
}
2190
/* LOAD */
2191
/* TODO(carenas): avoid reg being defined later */
2192
#define LEVAL(i) EVAL(i, reg, mem)
2193
if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {
2194
sljit_gpr reg = gpr(dst);
2195
2196
FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2197
/* TODO(carenas): convert all calls below to LEVAL */
2198
switch (opcode | (op & SLJIT_32)) {
2199
case SLJIT_MOV32_U8:
2200
ins = llc(reg, mem.offset, mem.index, mem.base);
2201
break;
2202
case SLJIT_MOV32_S8:
2203
ins = lb(reg, mem.offset, mem.index, mem.base);
2204
break;
2205
case SLJIT_MOV32_U16:
2206
ins = llh(reg, mem.offset, mem.index, mem.base);
2207
break;
2208
case SLJIT_MOV32_S16:
2209
ins = WHEN2(is_u12(mem.offset), lh, lhy);
2210
break;
2211
case SLJIT_MOV32:
2212
ins = WHEN2(is_u12(mem.offset), l, ly);
2213
break;
2214
case SLJIT_MOV_U8:
2215
ins = LEVAL(llgc);
2216
break;
2217
case SLJIT_MOV_S8:
2218
ins = lgb(reg, mem.offset, mem.index, mem.base);
2219
break;
2220
case SLJIT_MOV_U16:
2221
ins = LEVAL(llgh);
2222
break;
2223
case SLJIT_MOV_S16:
2224
ins = lgh(reg, mem.offset, mem.index, mem.base);
2225
break;
2226
case SLJIT_MOV_U32:
2227
ins = LEVAL(llgf);
2228
break;
2229
case SLJIT_MOV_S32:
2230
ins = lgf(reg, mem.offset, mem.index, mem.base);
2231
break;
2232
case SLJIT_MOV_P:
2233
case SLJIT_MOV:
2234
ins = lg(reg, mem.offset, mem.index, mem.base);
2235
break;
2236
default:
2237
ins = 0;
2238
SLJIT_UNREACHABLE();
2239
break;
2240
}
2241
FAIL_IF(push_inst(compiler, ins));
2242
return SLJIT_SUCCESS;
2243
}
2244
/* STORE and STORE IMMEDIATE */
2245
if ((dst & SLJIT_MEM) && (FAST_IS_REG(src) || src == SLJIT_IMM)) {
2246
struct addr mem;
2247
sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
2248
2249
if (src == SLJIT_IMM) {
2250
/* TODO(mundaym): MOVE IMMEDIATE? */
2251
FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
2252
}
2253
FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2254
switch (opcode) {
2255
case SLJIT_MOV_U8:
2256
case SLJIT_MOV_S8:
2257
return push_inst(compiler,
2258
WHEN2(is_u12(mem.offset), stc, stcy));
2259
case SLJIT_MOV_U16:
2260
case SLJIT_MOV_S16:
2261
return push_inst(compiler,
2262
WHEN2(is_u12(mem.offset), sth, sthy));
2263
case SLJIT_MOV_U32:
2264
case SLJIT_MOV_S32:
2265
case SLJIT_MOV32:
2266
return push_inst(compiler,
2267
WHEN2(is_u12(mem.offset), st, sty));
2268
case SLJIT_MOV_P:
2269
case SLJIT_MOV:
2270
FAIL_IF(push_inst(compiler, LEVAL(stg)));
2271
return SLJIT_SUCCESS;
2272
default:
2273
SLJIT_UNREACHABLE();
2274
}
2275
}
2276
#undef LEVAL
2277
/* MOVE CHARACTERS */
2278
if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {
2279
struct addr mem;
2280
FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2281
switch (opcode) {
2282
case SLJIT_MOV_U8:
2283
case SLJIT_MOV_S8:
2284
FAIL_IF(push_inst(compiler,
2285
EVAL(llgc, tmp0, mem)));
2286
FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2287
return push_inst(compiler,
2288
EVAL(stcy, tmp0, mem));
2289
case SLJIT_MOV_U16:
2290
case SLJIT_MOV_S16:
2291
FAIL_IF(push_inst(compiler,
2292
EVAL(llgh, tmp0, mem)));
2293
FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2294
return push_inst(compiler,
2295
EVAL(sthy, tmp0, mem));
2296
case SLJIT_MOV_U32:
2297
case SLJIT_MOV_S32:
2298
case SLJIT_MOV32:
2299
FAIL_IF(push_inst(compiler,
2300
EVAL(ly, tmp0, mem)));
2301
FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2302
return push_inst(compiler,
2303
EVAL(sty, tmp0, mem));
2304
case SLJIT_MOV_P:
2305
case SLJIT_MOV:
2306
FAIL_IF(push_inst(compiler,
2307
EVAL(lg, tmp0, mem)));
2308
FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2309
FAIL_IF(push_inst(compiler,
2310
EVAL(stg, tmp0, mem)));
2311
return SLJIT_SUCCESS;
2312
default:
2313
SLJIT_UNREACHABLE();
2314
}
2315
}
2316
SLJIT_UNREACHABLE();
2317
}
2318
2319
SLJIT_ASSERT(src != SLJIT_IMM);
2320
2321
dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
2322
src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
2323
2324
compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2325
2326
/* TODO(mundaym): optimize loads and stores */
2327
switch (opcode) {
2328
case SLJIT_CLZ:
2329
case SLJIT_CTZ:
2330
if (src & SLJIT_MEM)
2331
FAIL_IF(load_unsigned_word(compiler, src_r, src, srcw, op & SLJIT_32));
2332
2333
FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r));
2334
break;
2335
case SLJIT_REV_U32:
2336
case SLJIT_REV_S32:
2337
op |= SLJIT_32;
2338
/* fallthrough */
2339
case SLJIT_REV:
2340
case SLJIT_REV_U16:
2341
case SLJIT_REV_S16:
2342
return sljit_emit_rev(compiler, op, dst, dstw, src, srcw);
2343
default:
2344
SLJIT_UNREACHABLE();
2345
}
2346
2347
if (dst & SLJIT_MEM)
2348
return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32);
2349
2350
return SLJIT_SUCCESS;
2351
}
2352
2353
static SLJIT_INLINE int is_commutative(sljit_s32 op)
2354
{
2355
switch (GET_OPCODE(op)) {
2356
case SLJIT_ADD:
2357
case SLJIT_ADDC:
2358
case SLJIT_MUL:
2359
case SLJIT_AND:
2360
case SLJIT_OR:
2361
case SLJIT_XOR:
2362
return 1;
2363
}
2364
return 0;
2365
}
2366
2367
static const struct ins_forms add_forms = {
2368
0x1a00, /* ar */
2369
0xb9080000, /* agr */
2370
0xb9f80000, /* ark */
2371
0xb9e80000, /* agrk */
2372
0x5a000000, /* a */
2373
0xe3000000005a, /* ay */
2374
0xe30000000008, /* ag */
2375
};
2376
2377
static const struct ins_forms logical_add_forms = {
2378
0x1e00, /* alr */
2379
0xb90a0000, /* algr */
2380
0xb9fa0000, /* alrk */
2381
0xb9ea0000, /* algrk */
2382
0x5e000000, /* al */
2383
0xe3000000005e, /* aly */
2384
0xe3000000000a, /* alg */
2385
};
2386
2387
static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,
2388
sljit_s32 dst, sljit_sw dstw,
2389
sljit_s32 src1, sljit_sw src1w,
2390
sljit_s32 src2, sljit_sw src2w)
2391
{
2392
int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW;
2393
int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2394
const struct ins_forms *forms;
2395
sljit_ins ins;
2396
2397
if (src2 == SLJIT_IMM) {
2398
if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2399
if (sets_overflow)
2400
ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2401
else
2402
ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2403
return emit_siy(compiler, ins, dst, dstw, src2w);
2404
}
2405
2406
if (is_s16(src2w)) {
2407
if (sets_overflow)
2408
ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2409
else
2410
ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2411
FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w));
2412
goto done;
2413
}
2414
2415
if (!sets_overflow) {
2416
if ((op & SLJIT_32) || is_u32(src2w)) {
2417
ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */;
2418
FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2419
goto done;
2420
}
2421
if (is_u32(-src2w)) {
2422
FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A));
2423
goto done;
2424
}
2425
}
2426
else if ((op & SLJIT_32) || is_s32(src2w)) {
2427
ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2428
FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2429
goto done;
2430
}
2431
}
2432
2433
forms = sets_overflow ? &add_forms : &logical_add_forms;
2434
FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2435
2436
done:
2437
if (sets_zero_overflow)
2438
FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0));
2439
2440
if (dst & SLJIT_MEM)
2441
return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2442
2443
return SLJIT_SUCCESS;
2444
}
2445
2446
static const struct ins_forms sub_forms = {
2447
0x1b00, /* sr */
2448
0xb9090000, /* sgr */
2449
0xb9f90000, /* srk */
2450
0xb9e90000, /* sgrk */
2451
0x5b000000, /* s */
2452
0xe3000000005b, /* sy */
2453
0xe30000000009, /* sg */
2454
};
2455
2456
static const struct ins_forms logical_sub_forms = {
2457
0x1f00, /* slr */
2458
0xb90b0000, /* slgr */
2459
0xb9fb0000, /* slrk */
2460
0xb9eb0000, /* slgrk */
2461
0x5f000000, /* sl */
2462
0xe3000000005f, /* sly */
2463
0xe3000000000b, /* slg */
2464
};
2465
2466
static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
2467
sljit_s32 dst, sljit_sw dstw,
2468
sljit_s32 src1, sljit_sw src1w,
2469
sljit_s32 src2, sljit_sw src2w)
2470
{
2471
sljit_s32 flag_type = GET_FLAG_TYPE(op);
2472
int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW);
2473
int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2474
const struct ins_forms *forms;
2475
sljit_ins ins;
2476
2477
if (dst == TMP_REG2 && flag_type <= SLJIT_SIG_LESS_EQUAL) {
2478
int compare_signed = flag_type >= SLJIT_SIG_LESS;
2479
2480
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
2481
2482
if (src2 == SLJIT_IMM) {
2483
if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) {
2484
if ((op & SLJIT_32) || is_s32(src2w)) {
2485
ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
2486
return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2487
}
2488
} else if ((op & SLJIT_32) || is_u32(src2w)) {
2489
ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;
2490
return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2491
}
2492
}
2493
else if (src2 & SLJIT_MEM) {
2494
if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) {
2495
ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */;
2496
return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A);
2497
}
2498
2499
if (compare_signed)
2500
ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */;
2501
else
2502
ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */;
2503
return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A);
2504
}
2505
2506
if (compare_signed)
2507
ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;
2508
else
2509
ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;
2510
return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);
2511
}
2512
2513
if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) {
2514
ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2515
FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w));
2516
goto done;
2517
}
2518
2519
if (src2 == SLJIT_IMM) {
2520
sljit_sw neg_src2w = -src2w;
2521
2522
if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
2523
if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2524
if (sets_signed)
2525
ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2526
else
2527
ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2528
return emit_siy(compiler, ins, dst, dstw, neg_src2w);
2529
}
2530
2531
if (is_s16(neg_src2w)) {
2532
if (sets_signed)
2533
ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2534
else
2535
ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2536
FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w));
2537
goto done;
2538
}
2539
}
2540
2541
if (!sets_signed) {
2542
if ((op & SLJIT_32) || is_u32(src2w)) {
2543
ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */;
2544
FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2545
goto done;
2546
}
2547
if (is_u32(neg_src2w)) {
2548
FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A));
2549
goto done;
2550
}
2551
}
2552
else if ((op & SLJIT_32) || is_s32(neg_src2w)) {
2553
ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2554
FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A));
2555
goto done;
2556
}
2557
}
2558
2559
forms = sets_signed ? &sub_forms : &logical_sub_forms;
2560
FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2561
2562
done:
2563
if (sets_signed) {
2564
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2565
2566
if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) {
2567
/* In case of overflow, the sign bit of the two source operands must be different, and
2568
- the first operand is greater if the sign bit of the result is set
2569
- the first operand is less if the sign bit of the result is not set
2570
The -result operation sets the corrent sign, because the result cannot be zero.
2571
The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */
2572
FAIL_IF(push_inst(compiler, brc(0xe, (op & SLJIT_32) ? (2 + 1) : (2 + 2))));
2573
FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));
2574
}
2575
else if (op & SLJIT_SET_Z)
2576
FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2577
}
2578
2579
if (dst & SLJIT_MEM)
2580
return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2581
2582
return SLJIT_SUCCESS;
2583
}
2584
2585
static const struct ins_forms multiply_forms = {
2586
0xb2520000, /* msr */
2587
0xb90c0000, /* msgr */
2588
0xb9fd0000, /* msrkc */
2589
0xb9ed0000, /* msgrkc */
2590
0x71000000, /* ms */
2591
0xe30000000051, /* msy */
2592
0xe3000000000c, /* msg */
2593
};
2594
2595
static const struct ins_forms multiply_overflow_forms = {
2596
0,
2597
0,
2598
0xb9fd0000, /* msrkc */
2599
0xb9ed0000, /* msgrkc */
2600
0,
2601
0xe30000000053, /* msc */
2602
0xe30000000083, /* msgc */
2603
};
2604
2605
static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op,
2606
sljit_s32 dst,
2607
sljit_s32 src1, sljit_sw src1w,
2608
sljit_s32 src2, sljit_sw src2w)
2609
{
2610
sljit_ins ins;
2611
2612
if (HAS_FLAGS(op)) {
2613
/* if have_misc2 fails, this operation should be emulated. 32 bit emulation:
2614
FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));
2615
FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));
2616
if (dst_r != tmp0) {
2617
FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2618
}
2619
FAIL_IF(push_inst(compiler, aih(tmp0, 1)));
2620
FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));
2621
FAIL_IF(push_inst(compiler, ipm(tmp1)));
2622
FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */
2623
2624
return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);
2625
}
2626
2627
if (src2 == SLJIT_IMM) {
2628
if (is_s16(src2w)) {
2629
ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
2630
return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
2631
}
2632
2633
if (is_s32(src2w)) {
2634
ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */;
2635
return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A);
2636
}
2637
}
2638
2639
return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w);
2640
}
2641
2642
static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type,
2643
sljit_s32 dst,
2644
sljit_s32 src1, sljit_sw src1w,
2645
sljit_uw imm, sljit_s32 count16)
2646
{
2647
sljit_s32 mode = compiler->mode;
2648
sljit_gpr dst_r = tmp0;
2649
sljit_s32 needs_move = 1;
2650
2651
if (IS_GPR_REG(dst)) {
2652
dst_r = gpr(dst & REG_MASK);
2653
if (dst == src1)
2654
needs_move = 0;
2655
}
2656
2657
if (needs_move)
2658
FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
2659
2660
if (type == SLJIT_AND) {
2661
if (!(mode & SLJIT_32))
2662
FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32)));
2663
return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff));
2664
}
2665
else if (type == SLJIT_OR) {
2666
if (count16 >= 3) {
2667
FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)));
2668
return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2669
}
2670
2671
if (count16 >= 2) {
2672
if ((imm & 0x00000000ffffffffull) == 0)
2673
return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32));
2674
if ((imm & 0xffffffff00000000ull) == 0)
2675
return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2676
}
2677
2678
if ((imm & 0xffff000000000000ull) != 0)
2679
FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48)));
2680
if ((imm & 0x0000ffff00000000ull) != 0)
2681
FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff)));
2682
if ((imm & 0x00000000ffff0000ull) != 0)
2683
FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff)));
2684
if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2685
return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff));
2686
return SLJIT_SUCCESS;
2687
}
2688
2689
if ((imm & 0xffffffff00000000ull) != 0)
2690
FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32)));
2691
if ((imm & 0x00000000ffffffffull) != 0 || imm == 0)
2692
return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff));
2693
return SLJIT_SUCCESS;
2694
}
2695
2696
static const struct ins_forms bitwise_and_forms = {
2697
0x1400, /* nr */
2698
0xb9800000, /* ngr */
2699
0xb9f40000, /* nrk */
2700
0xb9e40000, /* ngrk */
2701
0x54000000, /* n */
2702
0xe30000000054, /* ny */
2703
0xe30000000080, /* ng */
2704
};
2705
2706
static const struct ins_forms bitwise_or_forms = {
2707
0x1600, /* or */
2708
0xb9810000, /* ogr */
2709
0xb9f60000, /* ork */
2710
0xb9e60000, /* ogrk */
2711
0x56000000, /* o */
2712
0xe30000000056, /* oy */
2713
0xe30000000081, /* og */
2714
};
2715
2716
static const struct ins_forms bitwise_xor_forms = {
2717
0x1700, /* xr */
2718
0xb9820000, /* xgr */
2719
0xb9f70000, /* xrk */
2720
0xb9e70000, /* xgrk */
2721
0x57000000, /* x */
2722
0xe30000000057, /* xy */
2723
0xe30000000082, /* xg */
2724
};
2725
2726
static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op,
2727
sljit_s32 dst,
2728
sljit_s32 src1, sljit_sw src1w,
2729
sljit_s32 src2, sljit_sw src2w)
2730
{
2731
sljit_s32 type = GET_OPCODE(op);
2732
const struct ins_forms *forms;
2733
2734
if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == TMP_REG2))) {
2735
sljit_s32 count16 = 0;
2736
sljit_uw imm = (sljit_uw)src2w;
2737
2738
if (op & SLJIT_32)
2739
imm &= 0xffffffffull;
2740
2741
if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2742
count16++;
2743
if ((imm & 0x00000000ffff0000ull) != 0)
2744
count16++;
2745
if ((imm & 0x0000ffff00000000ull) != 0)
2746
count16++;
2747
if ((imm & 0xffff000000000000ull) != 0)
2748
count16++;
2749
2750
if (type == SLJIT_AND && dst == TMP_REG2 && count16 == 1) {
2751
sljit_gpr src_r = tmp1;
2752
2753
if (FAST_IS_REG(src1))
2754
src_r = gpr(src1 & REG_MASK);
2755
else
2756
FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
2757
2758
if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2759
return push_inst(compiler, 0xa7010000 /* tmll */ | R20A(src_r) | imm);
2760
if ((imm & 0x00000000ffff0000ull) != 0)
2761
return push_inst(compiler, 0xa7000000 /* tmlh */ | R20A(src_r) | (imm >> 16));
2762
if ((imm & 0x0000ffff00000000ull) != 0)
2763
return push_inst(compiler, 0xa7030000 /* tmhl */ | R20A(src_r) | (imm >> 32));
2764
return push_inst(compiler, 0xa7020000 /* tmhh */ | R20A(src_r) | (imm >> 48));
2765
}
2766
2767
if (!(op & SLJIT_SET_Z))
2768
return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16);
2769
}
2770
2771
if (type == SLJIT_AND)
2772
forms = &bitwise_and_forms;
2773
else if (type == SLJIT_OR)
2774
forms = &bitwise_or_forms;
2775
else
2776
forms = &bitwise_xor_forms;
2777
2778
return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w);
2779
}
2780
2781
static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
2782
sljit_s32 dst,
2783
sljit_s32 src1, sljit_sw src1w,
2784
sljit_s32 src2, sljit_sw src2w)
2785
{
2786
sljit_s32 type = GET_OPCODE(op);
2787
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2788
sljit_gpr src_r = tmp0;
2789
sljit_gpr base_r = tmp0;
2790
sljit_ins imm = 0;
2791
sljit_ins ins;
2792
2793
if (FAST_IS_REG(src1))
2794
src_r = gpr(src1);
2795
else
2796
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2797
2798
if (src2 != SLJIT_IMM) {
2799
if (FAST_IS_REG(src2))
2800
base_r = gpr(src2);
2801
else {
2802
FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2803
base_r = tmp1;
2804
}
2805
2806
if ((op & SLJIT_32) && (type == SLJIT_MSHL || type == SLJIT_MLSHR || type == SLJIT_MASHR)) {
2807
if (base_r != tmp1) {
2808
FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(base_r) | (59 << 24) | (1 << 23) | (63 << 16)));
2809
base_r = tmp1;
2810
} else
2811
FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
2812
}
2813
} else
2814
imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2815
2816
if ((op & SLJIT_32) && dst_r == src_r) {
2817
if (type == SLJIT_SHL || type == SLJIT_MSHL)
2818
ins = 0x89000000 /* sll */;
2819
else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2820
ins = 0x88000000 /* srl */;
2821
else
2822
ins = 0x8a000000 /* sra */;
2823
2824
FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));
2825
} else {
2826
if (type == SLJIT_SHL || type == SLJIT_MSHL)
2827
ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
2828
else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2829
ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
2830
else
2831
ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
2832
2833
FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)));
2834
}
2835
2836
if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR)
2837
return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r));
2838
2839
return SLJIT_SUCCESS;
2840
}
2841
2842
static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op,
2843
sljit_s32 dst,
2844
sljit_s32 src1, sljit_sw src1w,
2845
sljit_s32 src2, sljit_sw src2w)
2846
{
2847
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2848
sljit_gpr src_r = tmp0;
2849
sljit_gpr base_r = tmp0;
2850
sljit_ins imm = 0;
2851
sljit_ins ins;
2852
2853
if (FAST_IS_REG(src1))
2854
src_r = gpr(src1);
2855
else
2856
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2857
2858
if (src2 != SLJIT_IMM) {
2859
if (FAST_IS_REG(src2))
2860
base_r = gpr(src2);
2861
else {
2862
FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2863
base_r = tmp1;
2864
}
2865
}
2866
2867
if (GET_OPCODE(op) == SLJIT_ROTR) {
2868
if (src2 != SLJIT_IMM) {
2869
ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2870
FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));
2871
base_r = tmp1;
2872
} else
2873
src2w = -src2w;
2874
}
2875
2876
if (src2 == SLJIT_IMM)
2877
imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2878
2879
ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;
2880
return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16));
2881
}
2882
2883
static const struct ins_forms addc_forms = {
2884
0xb9980000, /* alcr */
2885
0xb9880000, /* alcgr */
2886
0,
2887
0,
2888
0,
2889
0xe30000000098, /* alc */
2890
0xe30000000088, /* alcg */
2891
};
2892
2893
static const struct ins_forms subc_forms = {
2894
0xb9990000, /* slbr */
2895
0xb9890000, /* slbgr */
2896
0,
2897
0,
2898
0,
2899
0xe30000000099, /* slb */
2900
0xe30000000089, /* slbg */
2901
};
2902
2903
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2904
sljit_s32 dst, sljit_sw dstw,
2905
sljit_s32 src1, sljit_sw src1w,
2906
sljit_s32 src2, sljit_sw src2w)
2907
{
2908
CHECK_ERROR();
2909
CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2910
ADJUST_LOCAL_OFFSET(dst, dstw);
2911
ADJUST_LOCAL_OFFSET(src1, src1w);
2912
ADJUST_LOCAL_OFFSET(src2, src2w);
2913
2914
compiler->mode = op & SLJIT_32;
2915
compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2916
2917
if (is_commutative(op) && src1 == SLJIT_IMM && src2 != SLJIT_IMM) {
2918
src1 ^= src2;
2919
src2 ^= src1;
2920
src1 ^= src2;
2921
2922
src1w ^= src2w;
2923
src2w ^= src1w;
2924
src1w ^= src2w;
2925
}
2926
2927
switch (GET_OPCODE(op)) {
2928
case SLJIT_ADD:
2929
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2930
return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2931
case SLJIT_ADDC:
2932
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2933
FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w));
2934
if (dst & SLJIT_MEM)
2935
return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2936
return SLJIT_SUCCESS;
2937
case SLJIT_SUB:
2938
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2939
return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2940
case SLJIT_SUBC:
2941
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2942
FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w));
2943
if (dst & SLJIT_MEM)
2944
return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2945
return SLJIT_SUCCESS;
2946
case SLJIT_MUL:
2947
FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w));
2948
break;
2949
case SLJIT_AND:
2950
case SLJIT_OR:
2951
case SLJIT_XOR:
2952
FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));
2953
break;
2954
case SLJIT_SHL:
2955
case SLJIT_MSHL:
2956
case SLJIT_LSHR:
2957
case SLJIT_MLSHR:
2958
case SLJIT_ASHR:
2959
case SLJIT_MASHR:
2960
FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));
2961
break;
2962
case SLJIT_ROTL:
2963
case SLJIT_ROTR:
2964
FAIL_IF(sljit_emit_rotate(compiler, op, dst, src1, src1w, src2, src2w));
2965
break;
2966
}
2967
2968
if (dst & SLJIT_MEM)
2969
return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2970
return SLJIT_SUCCESS;
2971
}
2972
2973
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2974
sljit_s32 src1, sljit_sw src1w,
2975
sljit_s32 src2, sljit_sw src2w)
2976
{
2977
sljit_s32 dst_reg = (GET_OPCODE(op) == SLJIT_SUB || GET_OPCODE(op) == SLJIT_AND) ? TMP_REG2 : TMP_REG1;
2978
2979
CHECK_ERROR();
2980
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2981
2982
SLJIT_SKIP_CHECKS(compiler);
2983
return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w);
2984
}
2985
2986
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2987
sljit_s32 dst_reg,
2988
sljit_s32 src1, sljit_sw src1w,
2989
sljit_s32 src2, sljit_sw src2w)
2990
{
2991
CHECK_ERROR();
2992
CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2993
2994
switch (GET_OPCODE(op)) {
2995
case SLJIT_MULADD:
2996
SLJIT_SKIP_CHECKS(compiler);
2997
FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), 0 /* tmp0 */, 0, src1, src1w, src2, src2w));
2998
return push_inst(compiler, ((op & SLJIT_32) ? 0x1a00 /* ar */ : 0xb9080000 /* agr */) | R4A(gpr(dst_reg)) | R0A(tmp0));
2999
}
3000
3001
return SLJIT_SUCCESS;
3002
}
3003
3004
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
3005
sljit_s32 dst_reg,
3006
sljit_s32 src1_reg,
3007
sljit_s32 src2_reg,
3008
sljit_s32 src3, sljit_sw src3w)
3009
{
3010
sljit_s32 is_right;
3011
sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
3012
sljit_gpr dst_r = gpr(dst_reg);
3013
sljit_gpr src1_r = gpr(src1_reg);
3014
sljit_gpr src2_r = gpr(src2_reg);
3015
sljit_gpr src3_r = tmp1;
3016
sljit_ins ins;
3017
3018
CHECK_ERROR();
3019
CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
3020
3021
is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
3022
3023
if (src1_reg == src2_reg) {
3024
SLJIT_SKIP_CHECKS(compiler);
3025
return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
3026
}
3027
3028
ADJUST_LOCAL_OFFSET(src3, src3w);
3029
3030
if (src3 == SLJIT_IMM) {
3031
src3w &= bit_length - 1;
3032
3033
if (src3w == 0)
3034
return SLJIT_SUCCESS;
3035
3036
if (op & SLJIT_32) {
3037
if (dst_r == src1_r) {
3038
ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3039
FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | (sljit_ins)src3w));
3040
} else {
3041
ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
3042
FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
3043
}
3044
} else {
3045
ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3046
FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
3047
}
3048
3049
ins = 0xec0000000055 /* risbg */;
3050
3051
if (is_right) {
3052
src3w = bit_length - src3w;
3053
ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src3w) << 16) | ((sljit_ins)src3w << 8);
3054
} else
3055
ins |= ((sljit_ins)(64 - src3w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)(src3w + 64 - bit_length) << 8);
3056
3057
return push_inst(compiler, ins | R36A(dst_r) | R32A(src2_r));
3058
}
3059
3060
if (!(src3 & SLJIT_MEM)) {
3061
src3_r = gpr(src3);
3062
3063
if (dst_r == src3_r) {
3064
FAIL_IF(push_inst(compiler, 0x1800 /* lr */ | R4A(tmp1) | R0A(src3_r)));
3065
src3_r = tmp1;
3066
}
3067
} else
3068
FAIL_IF(load_word(compiler, tmp1, src3, src3w, op & SLJIT_32));
3069
3070
if (op & SLJIT_32) {
3071
if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
3072
if (src3_r != tmp1) {
3073
FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src3_r) | (59 << 24) | (1 << 23) | (63 << 16)));
3074
src3_r = tmp1;
3075
} else
3076
FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
3077
}
3078
3079
if (dst_r == src1_r) {
3080
ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3081
FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(src3_r)));
3082
} else {
3083
ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
3084
FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
3085
}
3086
3087
if (src3_r != tmp1) {
3088
FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f));
3089
FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src3_r)));
3090
} else
3091
FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f));
3092
3093
ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;
3094
FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1) | (0x1 << 16)));
3095
3096
return push_inst(compiler, 0x1600 /* or */ | R4A(dst_r) | R0A(tmp0));
3097
}
3098
3099
ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3100
FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
3101
3102
ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */;
3103
3104
if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
3105
if (src3_r != tmp1)
3106
FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f));
3107
3108
FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | (0x1 << 16)));
3109
src2_r = tmp0;
3110
3111
if (src3_r != tmp1)
3112
FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src3_r)));
3113
else
3114
FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f));
3115
} else
3116
FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src3_r)));
3117
3118
FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1)));
3119
return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(dst_r) | R0A(tmp0));
3120
}
3121
3122
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
3123
sljit_s32 src, sljit_sw srcw)
3124
{
3125
sljit_gpr src_r;
3126
struct addr addr;
3127
3128
CHECK_ERROR();
3129
CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
3130
ADJUST_LOCAL_OFFSET(src, srcw);
3131
3132
switch (op) {
3133
case SLJIT_FAST_RETURN:
3134
src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3135
if (src & SLJIT_MEM)
3136
FAIL_IF(load_word(compiler, tmp1, src, srcw, 0));
3137
3138
return push_inst(compiler, br(src_r));
3139
case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
3140
return SLJIT_SUCCESS;
3141
case SLJIT_PREFETCH_L1:
3142
case SLJIT_PREFETCH_L2:
3143
case SLJIT_PREFETCH_L3:
3144
case SLJIT_PREFETCH_ONCE:
3145
FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
3146
return push_inst(compiler, 0xe31000000036 /* pfd */ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3147
default:
3148
return SLJIT_SUCCESS;
3149
}
3150
3151
return SLJIT_SUCCESS;
3152
}
3153
3154
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
3155
sljit_s32 dst, sljit_sw dstw)
3156
{
3157
sljit_gpr dst_r = link_r;
3158
sljit_s32 size;
3159
3160
CHECK_ERROR();
3161
CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
3162
ADJUST_LOCAL_OFFSET(dst, dstw);
3163
3164
switch (op) {
3165
case SLJIT_FAST_ENTER:
3166
if (FAST_IS_REG(dst))
3167
return push_inst(compiler, lgr(gpr(dst), link_r));
3168
break;
3169
case SLJIT_GET_RETURN_ADDRESS:
3170
dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3171
3172
size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 2);
3173
FAIL_IF(load_word(compiler, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, 0));
3174
break;
3175
}
3176
3177
if (dst & SLJIT_MEM)
3178
return store_word(compiler, dst_r, dst, dstw, 0);
3179
3180
return SLJIT_SUCCESS;
3181
}
3182
3183
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
3184
{
3185
CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
3186
3187
if (type == SLJIT_GP_REGISTER)
3188
return (sljit_s32)gpr(reg);
3189
3190
if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128)
3191
return -1;
3192
3193
return (sljit_s32)freg_map[reg];
3194
}
3195
3196
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
3197
void *instruction, sljit_u32 size)
3198
{
3199
sljit_ins ins = 0;
3200
3201
CHECK_ERROR();
3202
CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
3203
3204
memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);
3205
return push_inst(compiler, ins);
3206
}
3207
3208
/* --------------------------------------------------------------------- */
3209
/* Floating point operators */
3210
/* --------------------------------------------------------------------- */
3211
3212
#define FLOAT_LOAD 0
3213
#define FLOAT_STORE 1
3214
3215
static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op,
3216
sljit_s32 reg,
3217
sljit_s32 mem, sljit_sw memw)
3218
{
3219
struct addr addr;
3220
sljit_ins ins;
3221
3222
SLJIT_ASSERT(mem & SLJIT_MEM);
3223
3224
if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) {
3225
FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
3226
3227
if (op & FLOAT_STORE)
3228
ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */;
3229
else
3230
ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */;
3231
3232
return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
3233
}
3234
3235
FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
3236
3237
if (op & FLOAT_STORE)
3238
ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */;
3239
else
3240
ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */;
3241
3242
return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3243
}
3244
3245
static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins,
3246
sljit_s32 reg,
3247
sljit_s32 src, sljit_sw srcw)
3248
{
3249
struct addr addr;
3250
3251
if (!(src & SLJIT_MEM))
3252
return push_inst(compiler, ins_r | F4(reg) | F0(src));
3253
3254
FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
3255
return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16));
3256
}
3257
3258
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
3259
sljit_s32 dst, sljit_sw dstw,
3260
sljit_s32 src, sljit_sw srcw)
3261
{
3262
sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3263
sljit_ins ins;
3264
3265
if (src & SLJIT_MEM) {
3266
FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw));
3267
src = TMP_FREG1;
3268
}
3269
3270
/* M3 is set to 5 */
3271
if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
3272
ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */;
3273
else
3274
ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */;
3275
3276
FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src)));
3277
3278
if (dst & SLJIT_MEM)
3279
return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64);
3280
3281
return SLJIT_SUCCESS;
3282
}
3283
3284
static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
3285
sljit_s32 dst, sljit_sw dstw,
3286
sljit_s32 src, sljit_sw srcw)
3287
{
3288
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3289
3290
if (src == SLJIT_IMM) {
3291
FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
3292
src = (sljit_s32)tmp0;
3293
}
3294
else if (src & SLJIT_MEM) {
3295
FAIL_IF(load_word(compiler, tmp0, src, srcw, ins & 0x100000));
3296
src = (sljit_s32)tmp0;
3297
}
3298
3299
FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
3300
3301
if (dst & SLJIT_MEM)
3302
return float_mem(compiler, FLOAT_STORE | ((ins & 0x10000) ? 0 : SLJIT_32), TMP_FREG1, dst, dstw);
3303
3304
return SLJIT_SUCCESS;
3305
}
3306
3307
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
3308
sljit_s32 dst, sljit_sw dstw,
3309
sljit_s32 src, sljit_sw srcw)
3310
{
3311
sljit_ins ins;
3312
3313
if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
3314
srcw = (sljit_s32)srcw;
3315
3316
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
3317
ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;
3318
else
3319
ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;
3320
3321
return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
3322
}
3323
3324
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
3325
sljit_s32 dst, sljit_sw dstw,
3326
sljit_s32 src, sljit_sw srcw)
3327
{
3328
sljit_ins ins;
3329
3330
if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
3331
srcw = (sljit_u32)srcw;
3332
3333
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW)
3334
ins = (op & SLJIT_32) ? 0xb3a00000 /* celgbr */ : 0xb3a10000 /* cdlgbr */;
3335
else
3336
ins = (op & SLJIT_32) ? 0xb3900000 /* celfbr */ : 0xb3910000 /* cdlfbr */;
3337
3338
return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
3339
}
3340
3341
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
3342
sljit_s32 src1, sljit_sw src1w,
3343
sljit_s32 src2, sljit_sw src2w)
3344
{
3345
sljit_ins ins_r, ins;
3346
3347
if (src1 & SLJIT_MEM) {
3348
FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w));
3349
src1 = TMP_FREG1;
3350
}
3351
3352
if (op & SLJIT_32) {
3353
ins_r = 0xb3090000 /* cebr */;
3354
ins = 0xed0000000009 /* ceb */;
3355
} else {
3356
ins_r = 0xb3190000 /* cdbr */;
3357
ins = 0xed0000000019 /* cdb */;
3358
}
3359
3360
return emit_float(compiler, ins_r, ins, src1, src2, src2w);
3361
}
3362
3363
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
3364
sljit_s32 dst, sljit_sw dstw,
3365
sljit_s32 src, sljit_sw srcw)
3366
{
3367
sljit_s32 dst_r;
3368
sljit_ins ins;
3369
3370
CHECK_ERROR();
3371
3372
SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3373
3374
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3375
3376
if (op == SLJIT_CONV_F64_FROM_F32)
3377
FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw));
3378
else {
3379
if (src & SLJIT_MEM) {
3380
FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw));
3381
src = dst_r;
3382
}
3383
3384
switch (GET_OPCODE(op)) {
3385
case SLJIT_MOV_F64:
3386
if (FAST_IS_REG(dst)) {
3387
if (dst == src)
3388
return SLJIT_SUCCESS;
3389
3390
ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3391
break;
3392
}
3393
return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw);
3394
case SLJIT_CONV_F64_FROM_F32:
3395
/* Only SLJIT_CONV_F32_FROM_F64. */
3396
ins = 0xb3440000 /* ledbr */;
3397
break;
3398
case SLJIT_NEG_F64:
3399
ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */;
3400
break;
3401
default:
3402
SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64);
3403
ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */;
3404
break;
3405
}
3406
3407
FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src)));
3408
}
3409
3410
if (dst & SLJIT_MEM)
3411
return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3412
3413
return SLJIT_SUCCESS;
3414
}
3415
3416
#define FLOAT_MOV(op, dst_r, src_r) \
3417
(((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r))
3418
3419
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
3420
sljit_s32 dst, sljit_sw dstw,
3421
sljit_s32 src1, sljit_sw src1w,
3422
sljit_s32 src2, sljit_sw src2w)
3423
{
3424
sljit_s32 dst_r = TMP_FREG1;
3425
sljit_ins ins_r, ins;
3426
3427
CHECK_ERROR();
3428
CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3429
ADJUST_LOCAL_OFFSET(dst, dstw);
3430
ADJUST_LOCAL_OFFSET(src1, src1w);
3431
ADJUST_LOCAL_OFFSET(src2, src2w);
3432
3433
do {
3434
if (FAST_IS_REG(dst)) {
3435
dst_r = dst;
3436
3437
if (dst == src1)
3438
break;
3439
3440
if (dst == src2) {
3441
if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) {
3442
src2 = src1;
3443
src2w = src1w;
3444
src1 = dst;
3445
break;
3446
}
3447
3448
FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2)));
3449
src2 = TMP_FREG1;
3450
}
3451
}
3452
3453
if (src1 & SLJIT_MEM)
3454
FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w));
3455
else
3456
FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1)));
3457
} while (0);
3458
3459
switch (GET_OPCODE(op)) {
3460
case SLJIT_ADD_F64:
3461
ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */;
3462
ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */;
3463
break;
3464
case SLJIT_SUB_F64:
3465
ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */;
3466
ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */;
3467
break;
3468
case SLJIT_MUL_F64:
3469
ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */;
3470
ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */;
3471
break;
3472
default:
3473
SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64);
3474
ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */;
3475
ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */;
3476
break;
3477
}
3478
3479
FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w));
3480
3481
if (dst & SLJIT_MEM)
3482
return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3483
3484
return SLJIT_SUCCESS;
3485
}
3486
3487
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
3488
sljit_s32 dst_freg,
3489
sljit_s32 src1, sljit_sw src1w,
3490
sljit_s32 src2, sljit_sw src2w)
3491
{
3492
sljit_s32 reg;
3493
3494
CHECK_ERROR();
3495
CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
3496
ADJUST_LOCAL_OFFSET(src1, src1w);
3497
ADJUST_LOCAL_OFFSET(src2, src2w);
3498
3499
if (src2 & SLJIT_MEM) {
3500
FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src2, src2w));
3501
src2 = TMP_FREG1;
3502
}
3503
3504
if (src1 & SLJIT_MEM) {
3505
reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
3506
FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), reg, src1, src1w));
3507
src1 = reg;
3508
}
3509
3510
return push_inst(compiler, 0xb3720000 /* cpsdr */ | F12(src2) | F4(dst_freg) | F0(src1));
3511
}
3512
3513
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
3514
sljit_s32 freg, sljit_f32 value)
3515
{
3516
union {
3517
sljit_s32 imm;
3518
sljit_f32 value;
3519
} u;
3520
3521
CHECK_ERROR();
3522
CHECK(check_sljit_emit_fset32(compiler, freg, value));
3523
3524
u.value = value;
3525
3526
FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)(((sljit_uw)u.imm << 32))));
3527
return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
3528
}
3529
3530
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
3531
sljit_s32 freg, sljit_f64 value)
3532
{
3533
union {
3534
sljit_sw imm;
3535
sljit_f64 value;
3536
} u;
3537
3538
CHECK_ERROR();
3539
CHECK(check_sljit_emit_fset64(compiler, freg, value));
3540
3541
u.value = value;
3542
3543
FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)u.imm));
3544
return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
3545
}
3546
3547
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
3548
sljit_s32 freg, sljit_s32 reg)
3549
{
3550
sljit_gpr gen_r;
3551
3552
CHECK_ERROR();
3553
CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
3554
3555
gen_r = gpr(reg);
3556
3557
if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {
3558
if (op & SLJIT_32) {
3559
FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(gen_r) | (32 << 16)));
3560
gen_r = tmp0;
3561
}
3562
3563
return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(gen_r));
3564
}
3565
3566
FAIL_IF(push_inst(compiler, 0xb3cd0000 /* lgdr */ | R4A(gen_r) | F0(freg)));
3567
3568
if (!(op & SLJIT_32))
3569
return SLJIT_SUCCESS;
3570
3571
return push_inst(compiler, 0xeb000000000c /* srlg */ | R36A(gen_r) | R32A(gen_r) | (32 << 16));
3572
}
3573
3574
/* --------------------------------------------------------------------- */
3575
/* Conditional instructions */
3576
/* --------------------------------------------------------------------- */
3577
3578
SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3579
{
3580
struct sljit_label *label;
3581
3582
CHECK_ERROR_PTR();
3583
CHECK_PTR(check_sljit_emit_label(compiler));
3584
3585
if (compiler->last_label && compiler->last_label->size == compiler->size)
3586
return compiler->last_label;
3587
3588
label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3589
PTR_FAIL_IF(!label);
3590
set_label(label, compiler);
3591
return label;
3592
}
3593
3594
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3595
{
3596
struct sljit_jump *jump;
3597
sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
3598
3599
CHECK_ERROR_PTR();
3600
CHECK_PTR(check_sljit_emit_jump(compiler, type));
3601
3602
/* record jump */
3603
jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
3604
PTR_FAIL_IF(!jump);
3605
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3606
jump->addr = compiler->size;
3607
3608
/* emit jump instruction */
3609
type &= 0xff;
3610
if (type >= SLJIT_FAST_CALL)
3611
PTR_FAIL_IF(push_inst(compiler, brasl(link_r, 0)));
3612
else
3613
PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
3614
3615
return jump;
3616
}
3617
3618
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3619
sljit_s32 arg_types)
3620
{
3621
SLJIT_UNUSED_ARG(arg_types);
3622
CHECK_ERROR_PTR();
3623
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3624
3625
if (type & SLJIT_CALL_RETURN) {
3626
PTR_FAIL_IF(emit_stack_frame_release(compiler, r14));
3627
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3628
}
3629
3630
SLJIT_SKIP_CHECKS(compiler);
3631
return sljit_emit_jump(compiler, type);
3632
}
3633
3634
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3635
{
3636
sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3637
3638
CHECK_ERROR();
3639
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3640
3641
if (src == SLJIT_IMM) {
3642
SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
3643
FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
3644
}
3645
else if (src & SLJIT_MEM) {
3646
ADJUST_LOCAL_OFFSET(src, srcw);
3647
FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */));
3648
}
3649
3650
/* emit jump instruction */
3651
if (type >= SLJIT_FAST_CALL)
3652
return push_inst(compiler, basr(link_r, src_r));
3653
3654
return push_inst(compiler, br(src_r));
3655
}
3656
3657
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3658
sljit_s32 arg_types,
3659
sljit_s32 src, sljit_sw srcw)
3660
{
3661
SLJIT_UNUSED_ARG(arg_types);
3662
3663
CHECK_ERROR();
3664
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3665
3666
SLJIT_ASSERT(gpr(TMP_REG2) == tmp1);
3667
3668
if (src & SLJIT_MEM) {
3669
ADJUST_LOCAL_OFFSET(src, srcw);
3670
FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
3671
src = TMP_REG2;
3672
srcw = 0;
3673
}
3674
3675
if (type & SLJIT_CALL_RETURN) {
3676
if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
3677
FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
3678
src = TMP_REG2;
3679
srcw = 0;
3680
}
3681
3682
FAIL_IF(emit_stack_frame_release(compiler, r14));
3683
type = SLJIT_JUMP;
3684
}
3685
3686
SLJIT_SKIP_CHECKS(compiler);
3687
return sljit_emit_ijump(compiler, type, src, srcw);
3688
}
3689
3690
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3691
sljit_s32 dst, sljit_sw dstw,
3692
sljit_s32 type)
3693
{
3694
sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3695
sljit_gpr loc_r = tmp1;
3696
sljit_u8 mask = get_cc(compiler, type);
3697
3698
CHECK_ERROR();
3699
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3700
3701
switch (GET_OPCODE(op)) {
3702
case SLJIT_AND:
3703
case SLJIT_OR:
3704
case SLJIT_XOR:
3705
compiler->status_flags_state = op & SLJIT_SET_Z;
3706
3707
/* dst is also source operand */
3708
if (dst & SLJIT_MEM)
3709
FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
3710
3711
break;
3712
case SLJIT_MOV32:
3713
op |= SLJIT_32;
3714
/* fallthrough */
3715
case SLJIT_MOV:
3716
/* can write straight into destination */
3717
loc_r = dst_r;
3718
break;
3719
default:
3720
SLJIT_UNREACHABLE();
3721
}
3722
3723
/* TODO(mundaym): fold into cmov helper function? */
3724
#define LEVAL(i) i(loc_r, 1, mask)
3725
if (have_lscond2()) {
3726
FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3727
FAIL_IF(push_inst(compiler,
3728
WHEN2(op & SLJIT_32, lochi, locghi)));
3729
} else {
3730
FAIL_IF(push_load_imm_inst(compiler, loc_r, 1));
3731
FAIL_IF(push_inst(compiler, brc(mask, 2 + 2)));
3732
FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3733
}
3734
#undef LEVAL
3735
3736
/* apply bitwise op and set condition codes */
3737
switch (GET_OPCODE(op)) {
3738
#define LEVAL(i) i(dst_r, loc_r)
3739
case SLJIT_AND:
3740
FAIL_IF(push_inst(compiler,
3741
WHEN2(op & SLJIT_32, nr, ngr)));
3742
break;
3743
case SLJIT_OR:
3744
FAIL_IF(push_inst(compiler,
3745
WHEN2(op & SLJIT_32, or, ogr)));
3746
break;
3747
case SLJIT_XOR:
3748
FAIL_IF(push_inst(compiler,
3749
WHEN2(op & SLJIT_32, xr, xgr)));
3750
break;
3751
#undef LEVAL
3752
}
3753
3754
/* store result to memory if required */
3755
if (dst & SLJIT_MEM)
3756
return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32));
3757
3758
return SLJIT_SUCCESS;
3759
}
3760
3761
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3762
sljit_s32 dst_reg,
3763
sljit_s32 src1, sljit_sw src1w,
3764
sljit_s32 src2_reg)
3765
{
3766
sljit_ins mask;
3767
sljit_gpr src_r;
3768
sljit_gpr dst_r = gpr(dst_reg);
3769
sljit_ins ins;
3770
3771
CHECK_ERROR();
3772
CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3773
3774
ADJUST_LOCAL_OFFSET(src1, src1w);
3775
3776
if (dst_reg != src2_reg) {
3777
if (src1 == dst_reg) {
3778
src1 = src2_reg;
3779
src1w = 0;
3780
type ^= 0x1;
3781
} else {
3782
if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
3783
FAIL_IF(load_word(compiler, dst_r, src1, src1w, type & SLJIT_32));
3784
src1 = src2_reg;
3785
src1w = 0;
3786
type ^= 0x1;
3787
} else
3788
FAIL_IF(push_inst(compiler, ((type & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(gpr(src2_reg))));
3789
}
3790
}
3791
3792
mask = get_cc(compiler, type & ~SLJIT_32);
3793
3794
if (src1 & SLJIT_MEM) {
3795
if (src1 & OFFS_REG_MASK) {
3796
src_r = gpr(OFFS_REG(src1));
3797
3798
if (src1w != 0) {
3799
FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(src_r) | ((sljit_ins)(src1w & 0x3) << 16)));
3800
src_r = tmp1;
3801
}
3802
3803
FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(src_r) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
3804
src_r = tmp1;
3805
src1w = 0;
3806
} else if (!is_s20(src1w)) {
3807
FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));
3808
3809
if (src1 & REG_MASK)
3810
FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(tmp1) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
3811
3812
src_r = tmp1;
3813
src1w = 0;
3814
} else
3815
src_r = gpr(src1 & REG_MASK);
3816
3817
ins = (type & SLJIT_32) ? 0xeb00000000f2 /* loc */ : 0xeb00000000e2 /* locg */;
3818
return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | R28A(src_r) | disp_s20((sljit_s32)src1w));
3819
}
3820
3821
if (src1 == SLJIT_IMM) {
3822
if (type & SLJIT_32)
3823
src1w = (sljit_s32)src1w;
3824
3825
if (have_lscond2() && is_s16(src1w)) {
3826
ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;
3827
return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | (sljit_ins)(src1w & 0xffff) << 16);
3828
}
3829
3830
FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));
3831
src_r = tmp1;
3832
} else
3833
src_r = gpr(src1);
3834
3835
ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;
3836
return push_inst(compiler, ins | (mask << 12) | R4A(dst_r) | R0A(src_r));
3837
}
3838
3839
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3840
sljit_s32 dst_freg,
3841
sljit_s32 src1, sljit_sw src1w,
3842
sljit_s32 src2_freg)
3843
{
3844
sljit_ins ins;
3845
struct sljit_label *label;
3846
struct sljit_jump *jump;
3847
3848
CHECK_ERROR();
3849
CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3850
3851
ADJUST_LOCAL_OFFSET(src1, src1w);
3852
3853
if (dst_freg != src2_freg) {
3854
if (dst_freg == src1) {
3855
src1 = src2_freg;
3856
src1w = 0;
3857
type ^= 0x1;
3858
} else {
3859
ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3860
FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src2_freg)));
3861
}
3862
}
3863
3864
SLJIT_SKIP_CHECKS(compiler);
3865
jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1);
3866
FAIL_IF(!jump);
3867
3868
if (!(src1 & SLJIT_MEM)) {
3869
ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3870
FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src1)));
3871
} else
3872
FAIL_IF(float_mem(compiler, FLOAT_LOAD | (type & SLJIT_32), dst_freg, src1, src1w));
3873
3874
SLJIT_SKIP_CHECKS(compiler);
3875
label = sljit_emit_label(compiler);
3876
FAIL_IF(!label);
3877
3878
sljit_set_label(jump, label);
3879
return SLJIT_SUCCESS;
3880
}
3881
3882
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3883
sljit_s32 reg,
3884
sljit_s32 mem, sljit_sw memw)
3885
{
3886
sljit_ins ins, reg1, reg2, base, offs = 0;
3887
3888
CHECK_ERROR();
3889
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3890
3891
if (!(reg & REG_PAIR_MASK))
3892
return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3893
3894
ADJUST_LOCAL_OFFSET(mem, memw);
3895
3896
base = gpr(mem & REG_MASK);
3897
reg1 = gpr(REG_PAIR_FIRST(reg));
3898
reg2 = gpr(REG_PAIR_SECOND(reg));
3899
3900
if (mem & OFFS_REG_MASK) {
3901
memw &= 0x3;
3902
offs = gpr(OFFS_REG(mem));
3903
3904
if (memw != 0) {
3905
FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(offs) | ((sljit_ins)memw << 16)));
3906
offs = tmp1;
3907
} else if (!(type & SLJIT_MEM_STORE) && (base == reg1 || base == reg2) && (offs == reg1 || offs == reg2)) {
3908
FAIL_IF(push_inst(compiler, 0xb9f80000 | R12A(tmp1) | R4A(base) | R0A(offs)));
3909
base = tmp1;
3910
offs = 0;
3911
}
3912
3913
memw = 0;
3914
} else if (memw < -0x80000 || memw > 0x7ffff - ((reg2 == reg1 + 1) ? 0 : SSIZE_OF(sw))) {
3915
FAIL_IF(push_load_imm_inst(compiler, tmp1, memw));
3916
3917
if (base == 0)
3918
base = tmp1;
3919
else
3920
offs = tmp1;
3921
3922
memw = 0;
3923
}
3924
3925
if (offs == 0 && reg2 == (reg1 + 1)) {
3926
ins = (type & SLJIT_MEM_STORE) ? 0xeb0000000024 /* stmg */ : 0xeb0000000004 /* lmg */;
3927
return push_inst(compiler, ins | R36A(reg1) | R32A(reg2) | R28A(base) | disp_s20((sljit_s32)memw));
3928
}
3929
3930
ins = ((type & SLJIT_MEM_STORE) ? 0xe30000000024 /* stg */ : 0xe30000000004 /* lg */) | R32A(offs) | R28A(base);
3931
3932
if (!(type & SLJIT_MEM_STORE) && base == reg1) {
3933
FAIL_IF(push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))));
3934
return push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw));
3935
}
3936
3937
FAIL_IF(push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw)));
3938
return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)));
3939
}
3940
3941
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3942
sljit_s32 vreg,
3943
sljit_s32 srcdst, sljit_sw srcdstw)
3944
{
3945
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3946
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3947
sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3948
struct addr addr;
3949
sljit_ins ins;
3950
3951
CHECK_ERROR();
3952
CHECK(check_sljit_emit_simd_mov(compiler, type, vreg, srcdst, srcdstw));
3953
3954
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3955
3956
if (reg_size != 4)
3957
return SLJIT_ERR_UNSUPPORTED;
3958
3959
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3960
return SLJIT_ERR_UNSUPPORTED;
3961
3962
if (type & SLJIT_SIMD_TEST)
3963
return SLJIT_SUCCESS;
3964
3965
if (!(srcdst & SLJIT_MEM)) {
3966
if (type & SLJIT_SIMD_STORE)
3967
ins = F36(srcdst) | F32(vreg);
3968
else
3969
ins = F36(vreg) | F32(srcdst);
3970
3971
return push_inst(compiler, 0xe70000000056 /* vlr */ | ins);
3972
}
3973
3974
FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
3975
ins = F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
3976
3977
if (alignment >= 4)
3978
ins |= 4 << 12;
3979
else if (alignment == 3)
3980
ins |= 3 << 12;
3981
3982
return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? 0xe7000000000e /* vst */ : 0xe70000000006 /* vl */) | ins);
3983
}
3984
3985
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3986
sljit_s32 vreg,
3987
sljit_s32 src, sljit_sw srcw)
3988
{
3989
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3990
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3991
struct addr addr;
3992
sljit_gpr reg;
3993
sljit_sw sign_ext;
3994
3995
CHECK_ERROR();
3996
CHECK(check_sljit_emit_simd_replicate(compiler, type, vreg, src, srcw));
3997
3998
ADJUST_LOCAL_OFFSET(src, srcw);
3999
4000
if (reg_size != 4)
4001
return SLJIT_ERR_UNSUPPORTED;
4002
4003
if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4004
return SLJIT_ERR_UNSUPPORTED;
4005
4006
if (type & SLJIT_SIMD_TEST)
4007
return SLJIT_SUCCESS;
4008
4009
if (src & SLJIT_MEM) {
4010
FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
4011
return push_inst(compiler, 0xe70000000005 /* vlrep */ | F36(vreg)
4012
| R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset) | ((sljit_ins)elem_size << 12));
4013
}
4014
4015
if (type & SLJIT_SIMD_FLOAT) {
4016
if (src == SLJIT_IMM)
4017
return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(vreg));
4018
4019
return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(vreg) | F32(src) | ((sljit_ins)elem_size << 12));
4020
}
4021
4022
if (src == SLJIT_IMM) {
4023
sign_ext = 0x10000;
4024
4025
switch (elem_size) {
4026
case 0:
4027
srcw &= 0xff;
4028
sign_ext = (sljit_s8)srcw;
4029
break;
4030
case 1:
4031
srcw &= 0xffff;
4032
sign_ext = (sljit_s16)srcw;
4033
break;
4034
case 2:
4035
if ((sljit_s32)srcw == (sljit_s16)srcw) {
4036
srcw &= 0xffff;
4037
sign_ext = (sljit_s16)srcw;
4038
} else
4039
srcw &= 0xffffffff;
4040
break;
4041
default:
4042
if (srcw == (sljit_s16)srcw) {
4043
srcw &= 0xffff;
4044
sign_ext = (sljit_s16)srcw;
4045
}
4046
break;
4047
}
4048
4049
if (sign_ext != 0x10000) {
4050
if (sign_ext == 0 || sign_ext == -1)
4051
return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(vreg)
4052
| (sign_ext == 0 ? 0 : ((sljit_ins)0xffff << 16)));
4053
4054
return push_inst(compiler, 0xe70000000045 /* vrepi */ | F36(vreg)
4055
| ((sljit_ins)srcw << 16) | ((sljit_ins)elem_size << 12));
4056
}
4057
4058
push_load_imm_inst(compiler, tmp0, srcw);
4059
reg = tmp0;
4060
} else
4061
reg = gpr(src);
4062
4063
FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(vreg) | R32A(reg) | ((sljit_ins)elem_size << 12)));
4064
return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(vreg) | F32(vreg) | ((sljit_ins)elem_size << 12));
4065
}
4066
4067
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
4068
sljit_s32 vreg, sljit_s32 lane_index,
4069
sljit_s32 srcdst, sljit_sw srcdstw)
4070
{
4071
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4072
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4073
struct addr addr;
4074
sljit_gpr reg;
4075
sljit_ins ins = 0;
4076
4077
CHECK_ERROR();
4078
CHECK(check_sljit_emit_simd_lane_mov(compiler, type, vreg, lane_index, srcdst, srcdstw));
4079
4080
ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4081
4082
if (reg_size != 4)
4083
return SLJIT_ERR_UNSUPPORTED;
4084
4085
if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4086
return SLJIT_ERR_UNSUPPORTED;
4087
4088
if (type & SLJIT_SIMD_TEST)
4089
return SLJIT_SUCCESS;
4090
4091
if (srcdst & SLJIT_MEM) {
4092
FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
4093
ins = F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4094
}
4095
4096
if (type & SLJIT_SIMD_LANE_ZERO) {
4097
if ((srcdst & SLJIT_MEM) && lane_index == ((1 << (3 - elem_size)) - 1))
4098
return push_inst(compiler, 0xe70000000004 /* vllez */ | ins | ((sljit_ins)elem_size << 12));
4099
4100
if ((type & SLJIT_SIMD_FLOAT) && vreg == srcdst) {
4101
FAIL_IF(push_inst(compiler, 0xe70000000056 /* vlr */ | F36(TMP_FREG1) | F32(vreg)));
4102
srcdst = TMP_FREG1;
4103
srcdstw = 0;
4104
}
4105
4106
FAIL_IF(push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(vreg)));
4107
}
4108
4109
if (srcdst & SLJIT_MEM) {
4110
switch (elem_size) {
4111
case 0:
4112
ins |= 0xe70000000000 /* vleb */;
4113
break;
4114
case 1:
4115
ins |= 0xe70000000001 /* vleh */;
4116
break;
4117
case 2:
4118
ins |= 0xe70000000003 /* vlef */;
4119
break;
4120
default:
4121
ins |= 0xe70000000002 /* vleg */;
4122
break;
4123
}
4124
4125
/* Convert to vsteb - vsteg */
4126
if (type & SLJIT_SIMD_STORE)
4127
ins |= 0x8;
4128
4129
return push_inst(compiler, ins | ((sljit_ins)lane_index << 12));
4130
}
4131
4132
if (type & SLJIT_SIMD_FLOAT) {
4133
if (type & SLJIT_SIMD_STORE)
4134
return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(srcdst) | F32(vreg) | ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12));
4135
4136
if (elem_size == 3) {
4137
if (lane_index == 0)
4138
ins = F32(srcdst) | F28(vreg) | (1 << 12);
4139
else
4140
ins = F32(vreg) | F28(srcdst);
4141
4142
return push_inst(compiler, 0xe70000000084 /* vpdi */ | F36(vreg) | ins);
4143
}
4144
4145
FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(tmp0) | F32(srcdst) | ((sljit_ins)2 << 12)));
4146
return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(vreg) | R32A(tmp0) | ((sljit_ins)lane_index << 16) | ((sljit_ins)2 << 12));
4147
}
4148
4149
if (srcdst == SLJIT_IMM) {
4150
switch (elem_size) {
4151
case 0:
4152
ins = 0xe70000000040 /* vleib */;
4153
srcdstw &= 0xff;
4154
break;
4155
case 1:
4156
ins = 0xe70000000041 /* vleih */;
4157
srcdstw &= 0xffff;
4158
break;
4159
case 2:
4160
if ((sljit_s32)srcdstw == (sljit_s16)srcdstw) {
4161
srcdstw &= 0xffff;
4162
ins = 0xe70000000043 /* vleif */;
4163
} else
4164
srcdstw &= 0xffffffff;
4165
break;
4166
default:
4167
if (srcdstw == (sljit_s16)srcdstw) {
4168
srcdstw &= 0xffff;
4169
ins = 0xe70000000042 /* vleig */;
4170
}
4171
break;
4172
}
4173
4174
if (ins != 0)
4175
return push_inst(compiler, ins | F36(vreg) | ((sljit_ins)srcdstw << 16) | ((sljit_ins)lane_index << 12));
4176
4177
push_load_imm_inst(compiler, tmp0, srcdstw);
4178
reg = tmp0;
4179
} else
4180
reg = gpr(srcdst);
4181
4182
ins = ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12);
4183
4184
if (!(type & SLJIT_SIMD_STORE))
4185
return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(vreg) | R32A(reg) | ins);
4186
4187
FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(reg) | F32(vreg) | ins));
4188
4189
if (!(type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3)
4190
return SLJIT_SUCCESS;
4191
4192
switch (elem_size) {
4193
case 0:
4194
ins = 0xb9060000 /* lgbr */;
4195
break;
4196
case 1:
4197
ins = 0xb9070000 /* lghr */;
4198
break;
4199
default:
4200
ins = 0xb9140000 /* lgfr */;
4201
break;
4202
}
4203
4204
return push_inst(compiler, ins | R4A(reg) | R0A(reg));
4205
}
4206
4207
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4208
sljit_s32 vreg,
4209
sljit_s32 src, sljit_s32 src_lane_index)
4210
{
4211
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4212
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4213
4214
CHECK_ERROR();
4215
CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, vreg, src, src_lane_index));
4216
4217
if (reg_size != 4)
4218
return SLJIT_ERR_UNSUPPORTED;
4219
4220
if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4221
return SLJIT_ERR_UNSUPPORTED;
4222
4223
if (type & SLJIT_SIMD_TEST)
4224
return SLJIT_SUCCESS;
4225
4226
return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(vreg) | F32(src)
4227
| ((sljit_ins)src_lane_index << 16) | ((sljit_ins)elem_size << 12));
4228
}
4229
4230
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4231
sljit_s32 vreg,
4232
sljit_s32 src, sljit_sw srcw)
4233
{
4234
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4235
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4236
sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4237
struct addr addr;
4238
sljit_ins ins;
4239
4240
CHECK_ERROR();
4241
CHECK(check_sljit_emit_simd_extend(compiler, type, vreg, src, srcw));
4242
4243
ADJUST_LOCAL_OFFSET(src, srcw);
4244
4245
if (reg_size != 4)
4246
return SLJIT_ERR_UNSUPPORTED;
4247
4248
if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4249
return SLJIT_ERR_UNSUPPORTED;
4250
4251
if (type & SLJIT_SIMD_TEST)
4252
return SLJIT_SUCCESS;
4253
4254
if (src & SLJIT_MEM) {
4255
FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
4256
ins = F36(vreg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4257
4258
switch (elem2_size - elem_size) {
4259
case 1:
4260
ins |= 0xe70000000002 /* vleg */;
4261
break;
4262
case 2:
4263
ins |= 0xe70000000003 /* vlef */;
4264
break;
4265
default:
4266
ins |= 0xe70000000001 /* vleh */;
4267
break;
4268
}
4269
4270
FAIL_IF(push_inst(compiler, ins));
4271
src = vreg;
4272
}
4273
4274
if (type & SLJIT_SIMD_FLOAT) {
4275
FAIL_IF(push_inst(compiler, 0xe700000000d5 /* vuplh */ | F36(vreg) | F32(src) | (2 << 12)));
4276
FAIL_IF(push_inst(compiler, 0xe70000000030 /* vesl */ | F36(vreg) | F32(vreg) | (32 << 16) | (3 << 12)));
4277
return push_inst(compiler, 0xe700000000c4 /* vfll */ | F36(vreg) | F32(vreg) | (2 << 12));
4278
}
4279
4280
ins = ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0xe700000000d7 /* vuph */ : 0xe700000000d5 /* vuplh */) | F36(vreg);
4281
4282
do {
4283
FAIL_IF(push_inst(compiler, ins | F32(src) | ((sljit_ins)elem_size << 12)));
4284
src = vreg;
4285
} while (++elem_size < elem2_size);
4286
4287
return SLJIT_SUCCESS;
4288
}
4289
4290
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4291
sljit_s32 vreg,
4292
sljit_s32 dst, sljit_sw dstw)
4293
{
4294
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4295
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4296
sljit_gpr dst_r;
4297
4298
CHECK_ERROR();
4299
CHECK(check_sljit_emit_simd_sign(compiler, type, vreg, dst, dstw));
4300
4301
ADJUST_LOCAL_OFFSET(dst, dstw);
4302
4303
if (reg_size != 4)
4304
return SLJIT_ERR_UNSUPPORTED;
4305
4306
if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4307
return SLJIT_ERR_UNSUPPORTED;
4308
4309
if (type & SLJIT_SIMD_TEST)
4310
return SLJIT_SUCCESS;
4311
4312
switch (elem_size) {
4313
case 0:
4314
push_load_imm_inst(compiler, tmp0, (sljit_sw)0x4048505860687078);
4315
push_load_imm_inst(compiler, tmp1, (sljit_sw)0x0008101820283038);
4316
FAIL_IF(push_inst(compiler, 0xe70000000062 /* vlvgp */ | F36(TMP_FREG1) | R32A(tmp1) | R28A(tmp0)));
4317
break;
4318
case 1:
4319
push_load_imm_inst(compiler, tmp0, (sljit_sw)0x0010203040506070);
4320
break;
4321
case 2:
4322
push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808000204060);
4323
break;
4324
default:
4325
push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808080800040);
4326
break;
4327
}
4328
4329
if (elem_size != 0)
4330
FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(TMP_FREG1) | R32A(tmp0) | (1 << 16) | (3 << 12)));
4331
4332
FAIL_IF(push_inst(compiler, 0xe70000000085 /* vbperm */ | F36(TMP_FREG1) | F32(vreg) | F28(TMP_FREG1)));
4333
4334
dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
4335
FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(dst_r) | F32(TMP_FREG1)
4336
| (elem_size == 0 ? ((3 << 16) | (1 << 12)) : (7 << 16))));
4337
4338
if (dst_r == tmp0)
4339
return store_word(compiler, tmp0, dst, dstw, type & SLJIT_32);
4340
4341
return SLJIT_SUCCESS;
4342
}
4343
4344
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4345
sljit_s32 dst_vreg, sljit_s32 src1_vreg, sljit_s32 src2, sljit_sw src2w)
4346
{
4347
sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4348
sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4349
sljit_s32 alignment;
4350
struct addr addr;
4351
sljit_ins ins = 0, load_ins;
4352
4353
CHECK_ERROR();
4354
CHECK(check_sljit_emit_simd_op2(compiler, type, dst_vreg, src1_vreg, src2, src2w));
4355
ADJUST_LOCAL_OFFSET(src2, src2w);
4356
4357
if (reg_size != 4)
4358
return SLJIT_ERR_UNSUPPORTED;
4359
4360
if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4361
return SLJIT_ERR_UNSUPPORTED;
4362
4363
if (type & SLJIT_SIMD_TEST)
4364
return SLJIT_SUCCESS;
4365
4366
switch (SLJIT_SIMD_GET_OPCODE(type)) {
4367
case SLJIT_SIMD_OP2_AND:
4368
ins = 0xe70000000068 /* vn */;
4369
break;
4370
case SLJIT_SIMD_OP2_OR:
4371
ins = 0xe7000000006a /* vo */;
4372
break;
4373
case SLJIT_SIMD_OP2_XOR:
4374
ins = 0xe7000000006d /* vx */;
4375
break;
4376
case SLJIT_SIMD_OP2_SHUFFLE:
4377
ins = 0xe7000000008c /* vperm */;
4378
break;
4379
}
4380
4381
if (src2 & SLJIT_MEM) {
4382
FAIL_IF(make_addr_bx(compiler, &addr, src2, src2w, tmp1));
4383
load_ins = 0xe70000000006 /* vl */ | F36(TMP_FREG1) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4384
alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4385
4386
if (alignment >= 4)
4387
load_ins |= 4 << 12;
4388
else if (alignment == 3)
4389
load_ins |= 3 << 12;
4390
4391
FAIL_IF(push_inst(compiler, load_ins));
4392
src2 = TMP_FREG1;
4393
}
4394
4395
if (SLJIT_SIMD_GET_OPCODE(type) == SLJIT_SIMD_OP2_SHUFFLE)
4396
return push_inst(compiler, ins | F36(dst_vreg) | F32(src1_vreg) | F28(src1_vreg) | F12(src2));
4397
4398
return push_inst(compiler, ins | F36(dst_vreg) | F32(src1_vreg) | F28(src2));
4399
}
4400
4401
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4402
sljit_s32 dst_reg,
4403
sljit_s32 mem_reg)
4404
{
4405
CHECK_ERROR();
4406
CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4407
4408
if (op & SLJIT_ATOMIC_USE_LS)
4409
return SLJIT_ERR_UNSUPPORTED;
4410
4411
switch (GET_OPCODE(op)) {
4412
case SLJIT_MOV32:
4413
case SLJIT_MOV_U32:
4414
case SLJIT_MOV:
4415
case SLJIT_MOV_P:
4416
if (op & SLJIT_ATOMIC_TEST)
4417
return SLJIT_SUCCESS;
4418
4419
SLJIT_SKIP_CHECKS(compiler);
4420
return sljit_emit_op1(compiler, op & ~SLJIT_ATOMIC_USE_CAS, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
4421
default:
4422
return SLJIT_ERR_UNSUPPORTED;
4423
}
4424
}
4425
4426
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4427
sljit_s32 src_reg,
4428
sljit_s32 mem_reg,
4429
sljit_s32 temp_reg)
4430
{
4431
sljit_ins ins;
4432
sljit_gpr tmp_r = gpr(temp_reg);
4433
sljit_gpr mem_r = gpr(mem_reg);
4434
4435
CHECK_ERROR();
4436
CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4437
4438
if (op & SLJIT_ATOMIC_USE_LS)
4439
return SLJIT_ERR_UNSUPPORTED;
4440
4441
switch (GET_OPCODE(op)) {
4442
case SLJIT_MOV32:
4443
case SLJIT_MOV_U32:
4444
ins = 0xba000000 /* cs */ | R20A(tmp_r) | R16A(gpr(src_reg)) | R12A(mem_r);
4445
break;
4446
case SLJIT_MOV:
4447
case SLJIT_MOV_P:
4448
ins = 0xeb0000000030 /* csg */ | R36A(tmp_r) | R32A(gpr(src_reg)) | R28A(mem_r);
4449
break;
4450
default:
4451
return SLJIT_ERR_UNSUPPORTED;
4452
}
4453
4454
if (op & SLJIT_ATOMIC_TEST)
4455
return SLJIT_SUCCESS;
4456
4457
return push_inst(compiler, ins);
4458
}
4459
4460
/* --------------------------------------------------------------------- */
4461
/* Other instructions */
4462
/* --------------------------------------------------------------------- */
4463
4464
/* On s390x we build a literal pool to hold constants. This has two main
4465
advantages:
4466
4467
1. we only need one instruction in the instruction stream (LGRL)
4468
2. we can store 64 bit addresses and use 32 bit offsets
4469
4470
To retrofit the extra information needed to build the literal pool we
4471
add a new sljit_s390x_const struct that contains the initial value but
4472
can still be cast to a sljit_const. */
4473
4474
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4475
{
4476
struct sljit_s390x_const *const_;
4477
sljit_gpr dst_r;
4478
4479
CHECK_ERROR_PTR();
4480
CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4481
4482
const_ = (struct sljit_s390x_const*)ensure_abuf(compiler,
4483
sizeof(struct sljit_s390x_const));
4484
PTR_FAIL_IF(!const_);
4485
set_const((struct sljit_const*)const_, compiler);
4486
const_->init_value = init_value;
4487
4488
dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
4489
if (have_genext())
4490
PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
4491
else {
4492
PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
4493
PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
4494
}
4495
4496
if (dst & SLJIT_MEM)
4497
PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */));
4498
4499
return (struct sljit_const*)const_;
4500
}
4501
4502
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4503
{
4504
/* Update the constant pool. */
4505
sljit_uw *ptr = (sljit_uw *)addr;
4506
SLJIT_UNUSED_ARG(executable_offset);
4507
4508
SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
4509
*ptr = new_target;
4510
SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
4511
SLJIT_CACHE_FLUSH(ptr, ptr + 1);
4512
}
4513
4514
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4515
{
4516
sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
4517
}
4518
4519
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4520
{
4521
struct sljit_jump *jump;
4522
sljit_gpr dst_r;
4523
4524
CHECK_ERROR_PTR();
4525
CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
4526
ADJUST_LOCAL_OFFSET(dst, dstw);
4527
4528
jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4529
PTR_FAIL_IF(!jump);
4530
set_mov_addr(jump, compiler, 0);
4531
4532
dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
4533
4534
if (have_genext())
4535
PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
4536
else {
4537
PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
4538
PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
4539
}
4540
4541
if (dst & SLJIT_MEM)
4542
PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));
4543
4544
return jump;
4545
}
4546
4547
/* TODO(carenas): EVAL probably should move up or be refactored */
4548
#undef WHEN2
4549
#undef EVAL
4550
4551
#undef tmp1
4552
#undef tmp0
4553
4554
/* TODO(carenas): undef other macros that spill like is_u12? */
4555
4556