Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/dep/lzma/src/Bra86.c
4253 views
1
/* Bra86.c -- Branch converter for X86 code (BCJ)
2
2023-04-02 : Igor Pavlov : Public domain */
3
4
#include "Precomp.h"
5
6
#include "Bra.h"
7
#include "CpuArch.h"
8
9
10
#if defined(MY_CPU_SIZEOF_POINTER) \
11
&& ( MY_CPU_SIZEOF_POINTER == 4 \
12
|| MY_CPU_SIZEOF_POINTER == 8)
13
#define BR_CONV_USE_OPT_PC_PTR
14
#endif
15
16
#ifdef BR_CONV_USE_OPT_PC_PTR
17
#define BR_PC_INIT pc -= (UInt32)(SizeT)p; // (MY_uintptr_t)
18
#define BR_PC_GET (pc + (UInt32)(SizeT)p)
19
#else
20
#define BR_PC_INIT pc += (UInt32)size;
21
#define BR_PC_GET (pc - (UInt32)(SizeT)(lim - p))
22
// #define BR_PC_INIT
23
// #define BR_PC_GET (pc + (UInt32)(SizeT)(p - data))
24
#endif
25
26
#define BR_CONVERT_VAL(v, c) if (encoding) v += c; else v -= c;
27
// #define BR_CONVERT_VAL(v, c) if (!encoding) c = (UInt32)0 - c; v += c;
28
29
#define Z7_BRANCH_CONV_ST(name) z7_BranchConvSt_ ## name
30
31
#define BR86_NEED_CONV_FOR_MS_BYTE(b) ((((b) + 1) & 0xfe) == 0)
32
33
#ifdef MY_CPU_LE_UNALIGN
34
#define BR86_PREPARE_BCJ_SCAN const UInt32 v = GetUi32(p) ^ 0xe8e8e8e8;
35
#define BR86_IS_BCJ_BYTE(n) ((v & ((UInt32)0xfe << (n) * 8)) == 0)
36
#else
37
#define BR86_PREPARE_BCJ_SCAN
38
// bad for MSVC X86 (partial write to byte reg):
39
#define BR86_IS_BCJ_BYTE(n) ((p[n - 4] & 0xfe) == 0xe8)
40
// bad for old MSVC (partial write to byte reg):
41
// #define BR86_IS_BCJ_BYTE(n) (((*p ^ 0xe8) & 0xfe) == 0)
42
#endif
43
44
static
45
Z7_FORCE_INLINE
46
Z7_ATTRIB_NO_VECTOR
47
Byte *Z7_BRANCH_CONV_ST(X86)(Byte *p, SizeT size, UInt32 pc, UInt32 *state, int encoding)
48
{
49
if (size < 5)
50
return p;
51
{
52
// Byte *p = data;
53
const Byte *lim = p + size - 4;
54
unsigned mask = (unsigned)*state; // & 7;
55
#ifdef BR_CONV_USE_OPT_PC_PTR
56
/* if BR_CONV_USE_OPT_PC_PTR is defined: we need to adjust (pc) for (+4),
57
because call/jump offset is relative to the next instruction.
58
if BR_CONV_USE_OPT_PC_PTR is not defined : we don't need to adjust (pc) for (+4),
59
because BR_PC_GET uses (pc - (lim - p)), and lim was adjusted for (-4) before.
60
*/
61
pc += 4;
62
#endif
63
BR_PC_INIT
64
goto start;
65
66
for (;; mask |= 4)
67
{
68
// cont: mask |= 4;
69
start:
70
if (p >= lim)
71
goto fin;
72
{
73
BR86_PREPARE_BCJ_SCAN
74
p += 4;
75
if (BR86_IS_BCJ_BYTE(0)) { goto m0; } mask >>= 1;
76
if (BR86_IS_BCJ_BYTE(1)) { goto m1; } mask >>= 1;
77
if (BR86_IS_BCJ_BYTE(2)) { goto m2; } mask = 0;
78
if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
79
}
80
goto main_loop;
81
82
m0: p--;
83
m1: p--;
84
m2: p--;
85
if (mask == 0)
86
goto a3;
87
if (p > lim)
88
goto fin_p;
89
90
// if (((0x17u >> mask) & 1) == 0)
91
if (mask > 4 || mask == 3)
92
{
93
mask >>= 1;
94
continue; // goto cont;
95
}
96
mask >>= 1;
97
if (BR86_NEED_CONV_FOR_MS_BYTE(p[mask]))
98
continue; // goto cont;
99
// if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
100
{
101
UInt32 v = GetUi32(p);
102
UInt32 c;
103
v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
104
c = BR_PC_GET;
105
BR_CONVERT_VAL(v, c)
106
{
107
mask <<= 3;
108
if (BR86_NEED_CONV_FOR_MS_BYTE(v >> mask))
109
{
110
v ^= (((UInt32)0x100 << mask) - 1);
111
#ifdef MY_CPU_X86
112
// for X86 : we can recalculate (c) to reduce register pressure
113
c = BR_PC_GET;
114
#endif
115
BR_CONVERT_VAL(v, c)
116
}
117
mask = 0;
118
}
119
// v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
120
v &= (1 << 25) - 1; v -= (1 << 24);
121
SetUi32(p, v)
122
p += 4;
123
goto main_loop;
124
}
125
126
main_loop:
127
if (p >= lim)
128
goto fin;
129
for (;;)
130
{
131
BR86_PREPARE_BCJ_SCAN
132
p += 4;
133
if (BR86_IS_BCJ_BYTE(0)) { goto a0; }
134
if (BR86_IS_BCJ_BYTE(1)) { goto a1; }
135
if (BR86_IS_BCJ_BYTE(2)) { goto a2; }
136
if (BR86_IS_BCJ_BYTE(3)) { goto a3; }
137
if (p >= lim)
138
goto fin;
139
}
140
141
a0: p--;
142
a1: p--;
143
a2: p--;
144
a3:
145
if (p > lim)
146
goto fin_p;
147
// if (!BR86_NEED_CONV_FOR_MS_BYTE(p[3])) continue; // goto cont;
148
{
149
UInt32 v = GetUi32(p);
150
UInt32 c;
151
v += (1 << 24); if (v & 0xfe000000) continue; // goto cont;
152
c = BR_PC_GET;
153
BR_CONVERT_VAL(v, c)
154
// v = (v & ((1 << 24) - 1)) - (v & (1 << 24));
155
v &= (1 << 25) - 1; v -= (1 << 24);
156
SetUi32(p, v)
157
p += 4;
158
goto main_loop;
159
}
160
}
161
162
fin_p:
163
p--;
164
fin:
165
// the following processing for tail is optional and can be commented
166
/*
167
lim += 4;
168
for (; p < lim; p++, mask >>= 1)
169
if ((*p & 0xfe) == 0xe8)
170
break;
171
*/
172
*state = (UInt32)mask;
173
return p;
174
}
175
}
176
177
178
#define Z7_BRANCH_CONV_ST_FUNC_IMP(name, m, encoding) \
179
Z7_NO_INLINE \
180
Z7_ATTRIB_NO_VECTOR \
181
Byte *m(name)(Byte *data, SizeT size, UInt32 pc, UInt32 *state) \
182
{ return Z7_BRANCH_CONV_ST(name)(data, size, pc, state, encoding); }
183
184
Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_DEC, 0)
185
#ifndef Z7_EXTRACT_ONLY
186
Z7_BRANCH_CONV_ST_FUNC_IMP(X86, Z7_BRANCH_CONV_ST_ENC, 1)
187
#endif
188
189