Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/dep/lzma/src/Bra.c
4253 views
1
/* Bra.c -- Branch converters for RISC code
2
2024-01-20 : Igor Pavlov : Public domain */
3
4
#include "Precomp.h"
5
6
#include "Bra.h"
7
#include "RotateDefs.h"
8
#include "CpuArch.h"
9
10
#if defined(MY_CPU_SIZEOF_POINTER) \
11
&& ( MY_CPU_SIZEOF_POINTER == 4 \
12
|| MY_CPU_SIZEOF_POINTER == 8)
13
#define BR_CONV_USE_OPT_PC_PTR
14
#endif
15
16
#ifdef BR_CONV_USE_OPT_PC_PTR
17
#define BR_PC_INIT pc -= (UInt32)(SizeT)p;
18
#define BR_PC_GET (pc + (UInt32)(SizeT)p)
19
#else
20
#define BR_PC_INIT pc += (UInt32)size;
21
#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
22
// #define BR_PC_INIT
23
// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
24
#endif
25
26
#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
27
// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
28
29
#define Z7_BRANCH_CONV(name) z7_ ## name
30
31
#define Z7_BRANCH_FUNC_MAIN(name) \
32
static \
33
Z7_FORCE_INLINE \
34
Z7_ATTRIB_NO_VECTOR \
35
Byte *Z7_BRANCH_CONV(name)(Byte *p, SizeT size, UInt32 pc, int encoding)
36
37
#define Z7_BRANCH_FUNC_IMP(name, m, encoding) \
38
Z7_NO_INLINE \
39
Z7_ATTRIB_NO_VECTOR \
40
Byte *m(name)(Byte *data, SizeT size, UInt32 pc) \
41
{ return Z7_BRANCH_CONV(name)(data, size, pc, encoding); } \
42
43
#ifdef Z7_EXTRACT_ONLY
44
#define Z7_BRANCH_FUNCS_IMP(name) \
45
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0)
46
#else
47
#define Z7_BRANCH_FUNCS_IMP(name) \
48
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_DEC_2, 0) \
49
Z7_BRANCH_FUNC_IMP(name, Z7_BRANCH_CONV_ENC_2, 1)
50
#endif
51
52
#if defined(__clang__)
53
#define BR_EXTERNAL_FOR
54
#define BR_NEXT_ITERATION continue;
55
#else
56
#define BR_EXTERNAL_FOR for (;;)
57
#define BR_NEXT_ITERATION break;
58
#endif
59
60
#if defined(__clang__) && (__clang_major__ >= 8) \
61
|| defined(__GNUC__) && (__GNUC__ >= 1000) \
62
// GCC is not good for __builtin_expect() here
63
/* || defined(_MSC_VER) && (_MSC_VER >= 1920) */
64
// #define Z7_unlikely [[unlikely]]
65
// #define Z7_LIKELY(x) (__builtin_expect((x), 1))
66
#define Z7_UNLIKELY(x) (__builtin_expect((x), 0))
67
// #define Z7_likely [[likely]]
68
#else
69
// #define Z7_LIKELY(x) (x)
70
#define Z7_UNLIKELY(x) (x)
71
// #define Z7_likely
72
#endif
73
74
75
Z7_BRANCH_FUNC_MAIN(BranchConv_ARM64)
76
{
77
// Byte *p = data;
78
const Byte *lim;
79
const UInt32 flag = (UInt32)1 << (24 - 4);
80
const UInt32 mask = ((UInt32)1 << 24) - (flag << 1);
81
size &= ~(SizeT)3;
82
// if (size == 0) return p;
83
lim = p + size;
84
BR_PC_INIT
85
pc -= 4; // because (p) will point to next instruction
86
87
BR_EXTERNAL_FOR
88
{
89
// Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE
90
for (;;)
91
{
92
UInt32 v;
93
if Z7_UNLIKELY(p == lim)
94
return p;
95
v = GetUi32a(p);
96
p += 4;
97
if Z7_UNLIKELY(((v - 0x94000000) & 0xfc000000) == 0)
98
{
99
UInt32 c = BR_PC_GET >> 2;
100
BR_CONVERT_VAL(v, c)
101
v &= 0x03ffffff;
102
v |= 0x94000000;
103
SetUi32a(p - 4, v)
104
BR_NEXT_ITERATION
105
}
106
// v = rotlFixed(v, 8); v += (flag << 8) - 0x90; if Z7_UNLIKELY((v & ((mask << 8) + 0x9f)) == 0)
107
v -= 0x90000000; if Z7_UNLIKELY((v & 0x9f000000) == 0)
108
{
109
UInt32 z, c;
110
// v = rotrFixed(v, 8);
111
v += flag; if Z7_UNLIKELY(v & mask) continue;
112
z = (v & 0xffffffe0) | (v >> 26);
113
c = (BR_PC_GET >> (12 - 3)) & ~(UInt32)7;
114
BR_CONVERT_VAL(z, c)
115
v &= 0x1f;
116
v |= 0x90000000;
117
v |= z << 26;
118
v |= 0x00ffffe0 & ((z & (((flag << 1) - 1))) - flag);
119
SetUi32a(p - 4, v)
120
}
121
}
122
}
123
}
124
Z7_BRANCH_FUNCS_IMP(BranchConv_ARM64)
125
126
127
Z7_BRANCH_FUNC_MAIN(BranchConv_ARM)
128
{
129
// Byte *p = data;
130
const Byte *lim;
131
size &= ~(SizeT)3;
132
lim = p + size;
133
BR_PC_INIT
134
/* in ARM: branch offset is relative to the +2 instructions from current instruction.
135
(p) will point to next instruction */
136
pc += 8 - 4;
137
138
for (;;)
139
{
140
for (;;)
141
{
142
if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
143
if Z7_UNLIKELY(p >= lim) { return p; } p += 4; if Z7_UNLIKELY(p[-1] == 0xeb) break;
144
}
145
{
146
UInt32 v = GetUi32a(p - 4);
147
UInt32 c = BR_PC_GET >> 2;
148
BR_CONVERT_VAL(v, c)
149
v &= 0x00ffffff;
150
v |= 0xeb000000;
151
SetUi32a(p - 4, v)
152
}
153
}
154
}
155
Z7_BRANCH_FUNCS_IMP(BranchConv_ARM)
156
157
158
Z7_BRANCH_FUNC_MAIN(BranchConv_PPC)
159
{
160
// Byte *p = data;
161
const Byte *lim;
162
size &= ~(SizeT)3;
163
lim = p + size;
164
BR_PC_INIT
165
pc -= 4; // because (p) will point to next instruction
166
167
for (;;)
168
{
169
UInt32 v;
170
for (;;)
171
{
172
if Z7_UNLIKELY(p == lim)
173
return p;
174
// v = GetBe32a(p);
175
v = *(UInt32 *)(void *)p;
176
p += 4;
177
// if ((v & 0xfc000003) == 0x48000001) break;
178
// if ((p[-4] & 0xFC) == 0x48 && (p[-1] & 3) == 1) break;
179
if Z7_UNLIKELY(
180
((v - Z7_CONV_BE_TO_NATIVE_CONST32(0x48000001))
181
& Z7_CONV_BE_TO_NATIVE_CONST32(0xfc000003)) == 0) break;
182
}
183
{
184
v = Z7_CONV_NATIVE_TO_BE_32(v);
185
{
186
UInt32 c = BR_PC_GET;
187
BR_CONVERT_VAL(v, c)
188
}
189
v &= 0x03ffffff;
190
v |= 0x48000000;
191
SetBe32a(p - 4, v)
192
}
193
}
194
}
195
Z7_BRANCH_FUNCS_IMP(BranchConv_PPC)
196
197
198
#ifdef Z7_CPU_FAST_ROTATE_SUPPORTED
199
#define BR_SPARC_USE_ROTATE
200
#endif
201
202
Z7_BRANCH_FUNC_MAIN(BranchConv_SPARC)
203
{
204
// Byte *p = data;
205
const Byte *lim;
206
const UInt32 flag = (UInt32)1 << 22;
207
size &= ~(SizeT)3;
208
lim = p + size;
209
BR_PC_INIT
210
pc -= 4; // because (p) will point to next instruction
211
for (;;)
212
{
213
UInt32 v;
214
for (;;)
215
{
216
if Z7_UNLIKELY(p == lim)
217
return p;
218
/* // the code without GetBe32a():
219
{ const UInt32 v = GetUi16a(p) & 0xc0ff; p += 4; if (v == 0x40 || v == 0xc07f) break; }
220
*/
221
v = GetBe32a(p);
222
p += 4;
223
#ifdef BR_SPARC_USE_ROTATE
224
v = rotlFixed(v, 2);
225
v += (flag << 2) - 1;
226
if Z7_UNLIKELY((v & (3 - (flag << 3))) == 0)
227
#else
228
v += (UInt32)5 << 29;
229
v ^= (UInt32)7 << 29;
230
v += flag;
231
if Z7_UNLIKELY((v & (0 - (flag << 1))) == 0)
232
#endif
233
break;
234
}
235
{
236
// UInt32 v = GetBe32a(p - 4);
237
#ifndef BR_SPARC_USE_ROTATE
238
v <<= 2;
239
#endif
240
{
241
UInt32 c = BR_PC_GET;
242
BR_CONVERT_VAL(v, c)
243
}
244
v &= (flag << 3) - 1;
245
#ifdef BR_SPARC_USE_ROTATE
246
v -= (flag << 2) - 1;
247
v = rotrFixed(v, 2);
248
#else
249
v -= (flag << 2);
250
v >>= 2;
251
v |= (UInt32)1 << 30;
252
#endif
253
SetBe32a(p - 4, v)
254
}
255
}
256
}
257
Z7_BRANCH_FUNCS_IMP(BranchConv_SPARC)
258
259
260
Z7_BRANCH_FUNC_MAIN(BranchConv_ARMT)
261
{
262
// Byte *p = data;
263
Byte *lim;
264
size &= ~(SizeT)1;
265
// if (size == 0) return p;
266
if (size <= 2) return p;
267
size -= 2;
268
lim = p + size;
269
BR_PC_INIT
270
/* in ARM: branch offset is relative to the +2 instructions from current instruction.
271
(p) will point to the +2 instructions from current instruction */
272
// pc += 4 - 4;
273
// if (encoding) pc -= 0xf800 << 1; else pc += 0xf800 << 1;
274
// #define ARMT_TAIL_PROC { goto armt_tail; }
275
#define ARMT_TAIL_PROC { return p; }
276
277
do
278
{
279
/* in MSVC 32-bit x86 compilers:
280
UInt32 version : it loads value from memory with movzx
281
Byte version : it loads value to 8-bit register (AL/CL)
282
movzx version is slightly faster in some cpus
283
*/
284
unsigned b1;
285
// Byte / unsigned
286
b1 = p[1];
287
// optimized version to reduce one (p >= lim) check:
288
// unsigned a1 = p[1]; b1 = p[3]; p += 2; if Z7_LIKELY((b1 & (a1 ^ 8)) < 0xf8)
289
for (;;)
290
{
291
unsigned b3; // Byte / UInt32
292
/* (Byte)(b3) normalization can use low byte computations in MSVC.
293
It gives smaller code, and no loss of speed in some compilers/cpus.
294
But new MSVC 32-bit x86 compilers use more slow load
295
from memory to low byte register in that case.
296
So we try to use full 32-bit computations for faster code.
297
*/
298
// if (p >= lim) { ARMT_TAIL_PROC } b3 = b1 + 8; b1 = p[3]; p += 2; if ((b3 & b1) >= 0xf8) break;
299
if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b3 = p[3]; p += 2; if Z7_UNLIKELY((b3 & (b1 ^ 8)) >= 0xf8) break;
300
if Z7_UNLIKELY(p >= lim) { ARMT_TAIL_PROC } b1 = p[3]; p += 2; if Z7_UNLIKELY((b1 & (b3 ^ 8)) >= 0xf8) break;
301
}
302
{
303
/* we can adjust pc for (0xf800) to rid of (& 0x7FF) operation.
304
But gcc/clang for arm64 can use bfi instruction for full code here */
305
UInt32 v =
306
((UInt32)GetUi16a(p - 2) << 11) |
307
((UInt32)GetUi16a(p) & 0x7FF);
308
/*
309
UInt32 v =
310
((UInt32)p[1 - 2] << 19)
311
+ (((UInt32)p[1] & 0x7) << 8)
312
+ (((UInt32)p[-2] << 11))
313
+ (p[0]);
314
*/
315
p += 2;
316
{
317
UInt32 c = BR_PC_GET >> 1;
318
BR_CONVERT_VAL(v, c)
319
}
320
SetUi16a(p - 4, (UInt16)(((v >> 11) & 0x7ff) | 0xf000))
321
SetUi16a(p - 2, (UInt16)(v | 0xf800))
322
/*
323
p[-4] = (Byte)(v >> 11);
324
p[-3] = (Byte)(0xf0 | ((v >> 19) & 0x7));
325
p[-2] = (Byte)v;
326
p[-1] = (Byte)(0xf8 | (v >> 8));
327
*/
328
}
329
}
330
while (p < lim);
331
return p;
332
// armt_tail:
333
// if ((Byte)((lim[1] & 0xf8)) != 0xf0) { lim += 2; } return lim;
334
// return (Byte *)(lim + ((Byte)((lim[1] ^ 0xf0) & 0xf8) == 0 ? 0 : 2));
335
// return (Byte *)(lim + (((lim[1] ^ ~0xfu) & ~7u) == 0 ? 0 : 2));
336
// return (Byte *)(lim + 2 - (((((unsigned)lim[1] ^ 8) + 8) >> 7) & 2));
337
}
338
Z7_BRANCH_FUNCS_IMP(BranchConv_ARMT)
339
340
341
// #define BR_IA64_NO_INLINE
342
343
Z7_BRANCH_FUNC_MAIN(BranchConv_IA64)
344
{
345
// Byte *p = data;
346
const Byte *lim;
347
size &= ~(SizeT)15;
348
lim = p + size;
349
pc -= 1 << 4;
350
pc >>= 4 - 1;
351
// pc -= 1 << 1;
352
353
for (;;)
354
{
355
unsigned m;
356
for (;;)
357
{
358
if Z7_UNLIKELY(p == lim)
359
return p;
360
m = (unsigned)((UInt32)0x334b0000 >> (*p & 0x1e));
361
p += 16;
362
pc += 1 << 1;
363
if (m &= 3)
364
break;
365
}
366
{
367
p += (ptrdiff_t)m * 5 - 20; // negative value is expected here.
368
do
369
{
370
const UInt32 t =
371
#if defined(MY_CPU_X86_OR_AMD64)
372
// we use 32-bit load here to reduce code size on x86:
373
GetUi32(p);
374
#else
375
GetUi16(p);
376
#endif
377
UInt32 z = GetUi32(p + 1) >> m;
378
p += 5;
379
if (((t >> m) & (0x70 << 1)) == 0
380
&& ((z - (0x5000000 << 1)) & (0xf000000 << 1)) == 0)
381
{
382
UInt32 v = (UInt32)((0x8fffff << 1) | 1) & z;
383
z ^= v;
384
#ifdef BR_IA64_NO_INLINE
385
v |= (v & ((UInt32)1 << (23 + 1))) >> 3;
386
{
387
UInt32 c = pc;
388
BR_CONVERT_VAL(v, c)
389
}
390
v &= (0x1fffff << 1) | 1;
391
#else
392
{
393
if (encoding)
394
{
395
// pc &= ~(0xc00000 << 1); // we just need to clear at least 2 bits
396
pc &= (0x1fffff << 1) | 1;
397
v += pc;
398
}
399
else
400
{
401
// pc |= 0xc00000 << 1; // we need to set at least 2 bits
402
pc |= ~(UInt32)((0x1fffff << 1) | 1);
403
v -= pc;
404
}
405
}
406
v &= ~(UInt32)(0x600000 << 1);
407
#endif
408
v += (0x700000 << 1);
409
v &= (0x8fffff << 1) | 1;
410
z |= v;
411
z <<= m;
412
SetUi32(p + 1 - 5, z)
413
}
414
m++;
415
}
416
while (m &= 3); // while (m < 4);
417
}
418
}
419
}
420
Z7_BRANCH_FUNCS_IMP(BranchConv_IA64)
421
422
423
#define BR_CONVERT_VAL_ENC(v) v += BR_PC_GET;
424
#define BR_CONVERT_VAL_DEC(v) v -= BR_PC_GET;
425
426
#if 1 && defined(MY_CPU_LE_UNALIGN)
427
#define RISCV_USE_UNALIGNED_LOAD
428
#endif
429
430
#ifdef RISCV_USE_UNALIGNED_LOAD
431
#define RISCV_GET_UI32(p) GetUi32(p)
432
#define RISCV_SET_UI32(p, v) { SetUi32(p, v) }
433
#else
434
#define RISCV_GET_UI32(p) \
435
((UInt32)GetUi16a(p) + \
436
((UInt32)GetUi16a((p) + 2) << 16))
437
#define RISCV_SET_UI32(p, v) { \
438
SetUi16a(p, (UInt16)(v)) \
439
SetUi16a((p) + 2, (UInt16)(v >> 16)) }
440
#endif
441
442
#if 1 && defined(MY_CPU_LE)
443
#define RISCV_USE_16BIT_LOAD
444
#endif
445
446
#ifdef RISCV_USE_16BIT_LOAD
447
#define RISCV_LOAD_VAL(p) GetUi16a(p)
448
#else
449
#define RISCV_LOAD_VAL(p) (*(p))
450
#endif
451
452
#define RISCV_INSTR_SIZE 2
453
#define RISCV_STEP_1 (4 + RISCV_INSTR_SIZE)
454
#define RISCV_STEP_2 4
455
#define RISCV_REG_VAL (2 << 7)
456
#define RISCV_CMD_VAL 3
457
#if 1
458
// for code size optimization:
459
#define RISCV_DELTA_7F 0x7f
460
#else
461
#define RISCV_DELTA_7F 0
462
#endif
463
464
#define RISCV_CHECK_1(v, b) \
465
(((((b) - RISCV_CMD_VAL) ^ ((v) << 8)) & (0xf8000 + RISCV_CMD_VAL)) == 0)
466
467
#if 1
468
#define RISCV_CHECK_2(v, r) \
469
((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL | 8)) \
470
<< 18) \
471
< ((r) & 0x1d))
472
#else
473
// this branch gives larger code, because
474
// compilers generate larger code for big constants.
475
#define RISCV_CHECK_2(v, r) \
476
((((v) - ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
477
& ((RISCV_CMD_VAL << 12) | RISCV_REG_VAL)) \
478
< ((r) & 0x1d))
479
#endif
480
481
482
#define RISCV_SCAN_LOOP \
483
Byte *lim; \
484
size &= ~(SizeT)(RISCV_INSTR_SIZE - 1); \
485
if (size <= 6) return p; \
486
size -= 6; \
487
lim = p + size; \
488
BR_PC_INIT \
489
for (;;) \
490
{ \
491
UInt32 a, v; \
492
/* Z7_PRAGMA_OPT_DISABLE_LOOP_UNROLL_VECTORIZE */ \
493
for (;;) \
494
{ \
495
if Z7_UNLIKELY(p >= lim) { return p; } \
496
a = (RISCV_LOAD_VAL(p) ^ 0x10u) + 1; \
497
if ((a & 0x77) == 0) break; \
498
a = (RISCV_LOAD_VAL(p + RISCV_INSTR_SIZE) ^ 0x10u) + 1; \
499
p += RISCV_INSTR_SIZE * 2; \
500
if ((a & 0x77) == 0) \
501
{ \
502
p -= RISCV_INSTR_SIZE; \
503
if Z7_UNLIKELY(p >= lim) { return p; } \
504
break; \
505
} \
506
}
507
// (xx6f ^ 10) + 1 = xx7f + 1 = xx80 : JAL
508
// (xxef ^ 10) + 1 = xxff + 1 = xx00 + 100 : JAL
509
// (xx17 ^ 10) + 1 = xx07 + 1 = xx08 : AUIPC
510
// (xx97 ^ 10) + 1 = xx87 + 1 = xx88 : AUIPC
511
512
Byte * Z7_BRANCH_CONV_ENC(RISCV)(Byte *p, SizeT size, UInt32 pc)
513
{
514
RISCV_SCAN_LOOP
515
v = a;
516
a = RISCV_GET_UI32(p);
517
#ifndef RISCV_USE_16BIT_LOAD
518
v += (UInt32)p[1] << 8;
519
#endif
520
521
if ((v & 8) == 0) // JAL
522
{
523
if ((v - (0x100 /* - RISCV_DELTA_7F */)) & 0xd80)
524
{
525
p += RISCV_INSTR_SIZE;
526
continue;
527
}
528
{
529
v = ((a & 1u << 31) >> 11)
530
| ((a & 0x3ff << 21) >> 20)
531
| ((a & 1 << 20) >> 9)
532
| (a & 0xff << 12);
533
BR_CONVERT_VAL_ENC(v)
534
// ((v & 1) == 0)
535
// v: bits [1 : 20] contain offset bits
536
#if 0 && defined(RISCV_USE_UNALIGNED_LOAD)
537
a &= 0xfff;
538
a |= ((UInt32)(v << 23))
539
| ((UInt32)(v << 7) & ((UInt32)0xff << 16))
540
| ((UInt32)(v >> 5) & ((UInt32)0xf0 << 8));
541
RISCV_SET_UI32(p, a)
542
#else // aligned
543
#if 0
544
SetUi16a(p, (UInt16)(((v >> 5) & 0xf000) | (a & 0xfff)))
545
#else
546
p[1] = (Byte)(((v >> 13) & 0xf0) | ((a >> 8) & 0xf));
547
#endif
548
549
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
550
v <<= 15;
551
v = Z7_BSWAP32(v);
552
SetUi16a(p + 2, (UInt16)v)
553
#else
554
p[2] = (Byte)(v >> 9);
555
p[3] = (Byte)(v >> 1);
556
#endif
557
#endif // aligned
558
}
559
p += 4;
560
continue;
561
} // JAL
562
563
{
564
// AUIPC
565
if (v & 0xe80) // (not x0) and (not x2)
566
{
567
const UInt32 b = RISCV_GET_UI32(p + 4);
568
if (RISCV_CHECK_1(v, b))
569
{
570
{
571
const UInt32 temp = (b << 12) | (0x17 + RISCV_REG_VAL);
572
RISCV_SET_UI32(p, temp)
573
}
574
a &= 0xfffff000;
575
{
576
#if 1
577
const int t = -1 >> 1;
578
if (t != -1)
579
a += (b >> 20) - ((b >> 19) & 0x1000); // arithmetic right shift emulation
580
else
581
#endif
582
a += (UInt32)((Int32)b >> 20); // arithmetic right shift (sign-extension).
583
}
584
BR_CONVERT_VAL_ENC(a)
585
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
586
a = Z7_BSWAP32(a);
587
RISCV_SET_UI32(p + 4, a)
588
#else
589
SetBe32(p + 4, a)
590
#endif
591
p += 8;
592
}
593
else
594
p += RISCV_STEP_1;
595
}
596
else
597
{
598
UInt32 r = a >> 27;
599
if (RISCV_CHECK_2(v, r))
600
{
601
v = RISCV_GET_UI32(p + 4);
602
r = (r << 7) + 0x17 + (v & 0xfffff000);
603
a = (a >> 12) | (v << 20);
604
RISCV_SET_UI32(p, r)
605
RISCV_SET_UI32(p + 4, a)
606
p += 8;
607
}
608
else
609
p += RISCV_STEP_2;
610
}
611
}
612
} // for
613
}
614
615
616
Byte * Z7_BRANCH_CONV_DEC(RISCV)(Byte *p, SizeT size, UInt32 pc)
617
{
618
RISCV_SCAN_LOOP
619
#ifdef RISCV_USE_16BIT_LOAD
620
if ((a & 8) == 0)
621
{
622
#else
623
v = a;
624
a += (UInt32)p[1] << 8;
625
if ((v & 8) == 0)
626
{
627
#endif
628
// JAL
629
a -= 0x100 - RISCV_DELTA_7F;
630
if (a & 0xd80)
631
{
632
p += RISCV_INSTR_SIZE;
633
continue;
634
}
635
{
636
const UInt32 a_old = (a + (0xef - RISCV_DELTA_7F)) & 0xfff;
637
#if 0 // unaligned
638
a = GetUi32(p);
639
v = (UInt32)(a >> 23) & ((UInt32)0xff << 1)
640
| (UInt32)(a >> 7) & ((UInt32)0xff << 9)
641
#elif 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
642
v = GetUi16a(p + 2);
643
v = Z7_BSWAP32(v) >> 15
644
#else
645
v = (UInt32)p[3] << 1
646
| (UInt32)p[2] << 9
647
#endif
648
| (UInt32)((a & 0xf000) << 5);
649
BR_CONVERT_VAL_DEC(v)
650
a = a_old
651
| (v << 11 & 1u << 31)
652
| (v << 20 & 0x3ff << 21)
653
| (v << 9 & 1 << 20)
654
| (v & 0xff << 12);
655
RISCV_SET_UI32(p, a)
656
}
657
p += 4;
658
continue;
659
} // JAL
660
661
{
662
// AUIPC
663
v = a;
664
#if 1 && defined(RISCV_USE_UNALIGNED_LOAD)
665
a = GetUi32(p);
666
#else
667
a |= (UInt32)GetUi16a(p + 2) << 16;
668
#endif
669
if ((v & 0xe80) == 0) // x0/x2
670
{
671
const UInt32 r = a >> 27;
672
if (RISCV_CHECK_2(v, r))
673
{
674
UInt32 b;
675
#if 1 && defined(Z7_CPU_FAST_BSWAP_SUPPORTED) && defined(MY_CPU_LE)
676
b = RISCV_GET_UI32(p + 4);
677
b = Z7_BSWAP32(b);
678
#else
679
b = GetBe32(p + 4);
680
#endif
681
v = a >> 12;
682
BR_CONVERT_VAL_DEC(b)
683
a = (r << 7) + 0x17;
684
a += (b + 0x800) & 0xfffff000;
685
v |= b << 20;
686
RISCV_SET_UI32(p, a)
687
RISCV_SET_UI32(p + 4, v)
688
p += 8;
689
}
690
else
691
p += RISCV_STEP_2;
692
}
693
else
694
{
695
const UInt32 b = RISCV_GET_UI32(p + 4);
696
if (!RISCV_CHECK_1(v, b))
697
p += RISCV_STEP_1;
698
else
699
{
700
v = (a & 0xfffff000) | (b >> 20);
701
a = (b << 12) | (0x17 + RISCV_REG_VAL);
702
RISCV_SET_UI32(p, a)
703
RISCV_SET_UI32(p + 4, v)
704
p += 8;
705
}
706
}
707
}
708
} // for
709
}
710
711