Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
35269 views
1
//===------------- OrcABISupport.cpp - ABI specific support code ----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
10
#include "llvm/Support/FormatVariadic.h"
11
#include "llvm/Support/Process.h"
12
#include "llvm/Support/raw_ostream.h"
13
14
#define DEBUG_TYPE "orc"
15
16
using namespace llvm;
17
using namespace llvm::orc;
18
19
template <typename ORCABI>
20
static bool stubAndPointerRangesOk(ExecutorAddr StubBlockAddr,
21
ExecutorAddr PointerBlockAddr,
22
unsigned NumStubs) {
23
constexpr unsigned MaxDisp = ORCABI::StubToPointerMaxDisplacement;
24
ExecutorAddr FirstStub = StubBlockAddr;
25
ExecutorAddr LastStub = FirstStub + ((NumStubs - 1) * ORCABI::StubSize);
26
ExecutorAddr FirstPointer = PointerBlockAddr;
27
ExecutorAddr LastPointer = FirstPointer + ((NumStubs - 1) * ORCABI::StubSize);
28
29
if (FirstStub < FirstPointer) {
30
if (LastStub >= FirstPointer)
31
return false; // Ranges overlap.
32
return (FirstPointer - FirstStub <= MaxDisp) &&
33
(LastPointer - LastStub <= MaxDisp); // out-of-range.
34
}
35
36
if (LastPointer >= FirstStub)
37
return false; // Ranges overlap.
38
39
return (FirstStub - FirstPointer <= MaxDisp) &&
40
(LastStub - LastPointer <= MaxDisp);
41
}
42
43
namespace llvm {
44
namespace orc {
45
46
void OrcAArch64::writeResolverCode(char *ResolverWorkingMem,
47
ExecutorAddr ResolverTargetAddress,
48
ExecutorAddr ReentryFnAddr,
49
ExecutorAddr ReentryCtxAddr) {
50
51
const uint32_t ResolverCode[] = {
52
// resolver_entry:
53
0xa9bf47fd, // 0x000: stp x29, x17, [sp, #-16]!
54
0x910003fd, // 0x004: mov x29, sp
55
0xa9bf73fb, // 0x008: stp x27, x28, [sp, #-16]!
56
0xa9bf6bf9, // 0x00c: stp x25, x26, [sp, #-16]!
57
0xa9bf63f7, // 0x010: stp x23, x24, [sp, #-16]!
58
0xa9bf5bf5, // 0x014: stp x21, x22, [sp, #-16]!
59
0xa9bf53f3, // 0x018: stp x19, x20, [sp, #-16]!
60
0xa9bf3fee, // 0x01c: stp x14, x15, [sp, #-16]!
61
0xa9bf37ec, // 0x020: stp x12, x13, [sp, #-16]!
62
0xa9bf2fea, // 0x024: stp x10, x11, [sp, #-16]!
63
0xa9bf27e8, // 0x028: stp x8, x9, [sp, #-16]!
64
0xa9bf1fe6, // 0x02c: stp x6, x7, [sp, #-16]!
65
0xa9bf17e4, // 0x030: stp x4, x5, [sp, #-16]!
66
0xa9bf0fe2, // 0x034: stp x2, x3, [sp, #-16]!
67
0xa9bf07e0, // 0x038: stp x0, x1, [sp, #-16]!
68
0xadbf7ffe, // 0x03c: stp q30, q31, [sp, #-32]!
69
0xadbf77fc, // 0x040: stp q28, q29, [sp, #-32]!
70
0xadbf6ffa, // 0x044: stp q26, q27, [sp, #-32]!
71
0xadbf67f8, // 0x048: stp q24, q25, [sp, #-32]!
72
0xadbf5ff6, // 0x04c: stp q22, q23, [sp, #-32]!
73
0xadbf57f4, // 0x050: stp q20, q21, [sp, #-32]!
74
0xadbf4ff2, // 0x054: stp q18, q19, [sp, #-32]!
75
0xadbf47f0, // 0x058: stp q16, q17, [sp, #-32]!
76
0xadbf3fee, // 0x05c: stp q14, q15, [sp, #-32]!
77
0xadbf37ec, // 0x060: stp q12, q13, [sp, #-32]!
78
0xadbf2fea, // 0x064: stp q10, q11, [sp, #-32]!
79
0xadbf27e8, // 0x068: stp q8, q9, [sp, #-32]!
80
0xadbf1fe6, // 0x06c: stp q6, q7, [sp, #-32]!
81
0xadbf17e4, // 0x070: stp q4, q5, [sp, #-32]!
82
0xadbf0fe2, // 0x074: stp q2, q3, [sp, #-32]!
83
0xadbf07e0, // 0x078: stp q0, q1, [sp, #-32]!
84
0x580004e0, // 0x07c: ldr x0, Lreentry_ctx_ptr
85
0xaa1e03e1, // 0x080: mov x1, x30
86
0xd1003021, // 0x084: sub x1, x1, #12
87
0x58000442, // 0x088: ldr x2, Lreentry_fn_ptr
88
0xd63f0040, // 0x08c: blr x2
89
0xaa0003f1, // 0x090: mov x17, x0
90
0xacc107e0, // 0x094: ldp q0, q1, [sp], #32
91
0xacc10fe2, // 0x098: ldp q2, q3, [sp], #32
92
0xacc117e4, // 0x09c: ldp q4, q5, [sp], #32
93
0xacc11fe6, // 0x0a0: ldp q6, q7, [sp], #32
94
0xacc127e8, // 0x0a4: ldp q8, q9, [sp], #32
95
0xacc12fea, // 0x0a8: ldp q10, q11, [sp], #32
96
0xacc137ec, // 0x0ac: ldp q12, q13, [sp], #32
97
0xacc13fee, // 0x0b0: ldp q14, q15, [sp], #32
98
0xacc147f0, // 0x0b4: ldp q16, q17, [sp], #32
99
0xacc14ff2, // 0x0b8: ldp q18, q19, [sp], #32
100
0xacc157f4, // 0x0bc: ldp q20, q21, [sp], #32
101
0xacc15ff6, // 0x0c0: ldp q22, q23, [sp], #32
102
0xacc167f8, // 0x0c4: ldp q24, q25, [sp], #32
103
0xacc16ffa, // 0x0c8: ldp q26, q27, [sp], #32
104
0xacc177fc, // 0x0cc: ldp q28, q29, [sp], #32
105
0xacc17ffe, // 0x0d0: ldp q30, q31, [sp], #32
106
0xa8c107e0, // 0x0d4: ldp x0, x1, [sp], #16
107
0xa8c10fe2, // 0x0d8: ldp x2, x3, [sp], #16
108
0xa8c117e4, // 0x0dc: ldp x4, x5, [sp], #16
109
0xa8c11fe6, // 0x0e0: ldp x6, x7, [sp], #16
110
0xa8c127e8, // 0x0e4: ldp x8, x9, [sp], #16
111
0xa8c12fea, // 0x0e8: ldp x10, x11, [sp], #16
112
0xa8c137ec, // 0x0ec: ldp x12, x13, [sp], #16
113
0xa8c13fee, // 0x0f0: ldp x14, x15, [sp], #16
114
0xa8c153f3, // 0x0f4: ldp x19, x20, [sp], #16
115
0xa8c15bf5, // 0x0f8: ldp x21, x22, [sp], #16
116
0xa8c163f7, // 0x0fc: ldp x23, x24, [sp], #16
117
0xa8c16bf9, // 0x100: ldp x25, x26, [sp], #16
118
0xa8c173fb, // 0x104: ldp x27, x28, [sp], #16
119
0xa8c17bfd, // 0x108: ldp x29, x30, [sp], #16
120
0xd65f0220, // 0x10c: ret x17
121
0x01234567, // 0x110: Lreentry_fn_ptr:
122
0xdeadbeef, // 0x114: .quad 0
123
0x98765432, // 0x118: Lreentry_ctx_ptr:
124
0xcafef00d // 0x11c: .quad 0
125
};
126
127
const unsigned ReentryFnAddrOffset = 0x110;
128
const unsigned ReentryCtxAddrOffset = 0x118;
129
130
memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
131
memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
132
sizeof(uint64_t));
133
memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
134
sizeof(uint64_t));
135
}
136
137
void OrcAArch64::writeTrampolines(char *TrampolineBlockWorkingMem,
138
ExecutorAddr TrampolineBlockTargetAddress,
139
ExecutorAddr ResolverAddr,
140
unsigned NumTrampolines) {
141
142
unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
143
144
memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
145
sizeof(uint64_t));
146
147
// OffsetToPtr is actually the offset from the PC for the 2nd instruction, so
148
// subtract 32-bits.
149
OffsetToPtr -= 4;
150
151
uint32_t *Trampolines =
152
reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
153
154
for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
155
Trampolines[3 * I + 0] = 0xaa1e03f1; // mov x17, x30
156
Trampolines[3 * I + 1] = 0x58000010 | (OffsetToPtr << 3); // adr x16, Lptr
157
Trampolines[3 * I + 2] = 0xd63f0200; // blr x16
158
}
159
}
160
161
void OrcAArch64::writeIndirectStubsBlock(
162
char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
163
ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
164
// Stub format is:
165
//
166
// .section __orc_stubs
167
// stub1:
168
// ldr x16, ptr1 ; PC-rel load of ptr1
169
// br x16 ; Jump to resolver
170
// stub2:
171
// ldr x16, ptr2 ; PC-rel load of ptr2
172
// br x16 ; Jump to resolver
173
//
174
// ...
175
//
176
// .section __orc_ptrs
177
// ptr1:
178
// .quad 0x0
179
// ptr2:
180
// .quad 0x0
181
//
182
// ...
183
184
static_assert(StubSize == PointerSize,
185
"Pointer and stub size must match for algorithm below");
186
assert(stubAndPointerRangesOk<OrcAArch64>(
187
StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
188
"PointersBlock is out of range");
189
uint64_t PtrDisplacement =
190
PointersBlockTargetAddress - StubsBlockTargetAddress;
191
assert((PtrDisplacement % 8 == 0) &&
192
"Displacement to pointer is not a multiple of 8");
193
uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem);
194
uint64_t PtrOffsetField = ((PtrDisplacement >> 2) & 0x7ffff) << 5;
195
196
for (unsigned I = 0; I < NumStubs; ++I)
197
Stub[I] = 0xd61f020058000010 | PtrOffsetField;
198
}
199
200
void OrcX86_64_Base::writeTrampolines(char *TrampolineBlockWorkingMem,
201
ExecutorAddr TrampolineBlockTargetAddress,
202
ExecutorAddr ResolverAddr,
203
unsigned NumTrampolines) {
204
205
unsigned OffsetToPtr = NumTrampolines * TrampolineSize;
206
207
memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
208
sizeof(uint64_t));
209
210
uint64_t *Trampolines =
211
reinterpret_cast<uint64_t *>(TrampolineBlockWorkingMem);
212
uint64_t CallIndirPCRel = 0xf1c40000000015ff;
213
214
for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize)
215
Trampolines[I] = CallIndirPCRel | ((OffsetToPtr - 6) << 16);
216
}
217
218
void OrcX86_64_Base::writeIndirectStubsBlock(
219
char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
220
ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
221
// Stub format is:
222
//
223
// .section __orc_stubs
224
// stub1:
225
// jmpq *ptr1(%rip)
226
// .byte 0xC4 ; <- Invalid opcode padding.
227
// .byte 0xF1
228
// stub2:
229
// jmpq *ptr2(%rip)
230
//
231
// ...
232
//
233
// .section __orc_ptrs
234
// ptr1:
235
// .quad 0x0
236
// ptr2:
237
// .quad 0x0
238
//
239
// ...
240
241
// Populate the stubs page stubs and mark it executable.
242
static_assert(StubSize == PointerSize,
243
"Pointer and stub size must match for algorithm below");
244
assert(stubAndPointerRangesOk<OrcX86_64_Base>(
245
StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
246
"PointersBlock is out of range");
247
uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem);
248
uint64_t PtrOffsetField =
249
(PointersBlockTargetAddress - StubsBlockTargetAddress - 6) << 16;
250
for (unsigned I = 0; I < NumStubs; ++I)
251
Stub[I] = 0xF1C40000000025ff | PtrOffsetField;
252
}
253
254
void OrcX86_64_SysV::writeResolverCode(char *ResolverWorkingMem,
255
ExecutorAddr ResolverTargetAddress,
256
ExecutorAddr ReentryFnAddr,
257
ExecutorAddr ReentryCtxAddr) {
258
259
LLVM_DEBUG({
260
dbgs() << "Writing resolver code to "
261
<< formatv("{0:x16}", ResolverTargetAddress) << "\n";
262
});
263
264
const uint8_t ResolverCode[] = {
265
// resolver_entry:
266
0x55, // 0x00: pushq %rbp
267
0x48, 0x89, 0xe5, // 0x01: movq %rsp, %rbp
268
0x50, // 0x04: pushq %rax
269
0x53, // 0x05: pushq %rbx
270
0x51, // 0x06: pushq %rcx
271
0x52, // 0x07: pushq %rdx
272
0x56, // 0x08: pushq %rsi
273
0x57, // 0x09: pushq %rdi
274
0x41, 0x50, // 0x0a: pushq %r8
275
0x41, 0x51, // 0x0c: pushq %r9
276
0x41, 0x52, // 0x0e: pushq %r10
277
0x41, 0x53, // 0x10: pushq %r11
278
0x41, 0x54, // 0x12: pushq %r12
279
0x41, 0x55, // 0x14: pushq %r13
280
0x41, 0x56, // 0x16: pushq %r14
281
0x41, 0x57, // 0x18: pushq %r15
282
0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq 0x208, %rsp
283
0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp)
284
0x48, 0xbf, // 0x26: movabsq <CBMgr>, %rdi
285
286
// 0x28: JIT re-entry ctx addr.
287
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
288
289
0x48, 0x8b, 0x75, 0x08, // 0x30: movq 8(%rbp), %rsi
290
0x48, 0x83, 0xee, 0x06, // 0x34: subq $6, %rsi
291
0x48, 0xb8, // 0x38: movabsq <REntry>, %rax
292
293
// 0x3a: JIT re-entry fn addr:
294
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
295
296
0xff, 0xd0, // 0x42: callq *%rax
297
0x48, 0x89, 0x45, 0x08, // 0x44: movq %rax, 8(%rbp)
298
0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x48: fxrstor64 (%rsp)
299
0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x4d: addq 0x208, %rsp
300
0x41, 0x5f, // 0x54: popq %r15
301
0x41, 0x5e, // 0x56: popq %r14
302
0x41, 0x5d, // 0x58: popq %r13
303
0x41, 0x5c, // 0x5a: popq %r12
304
0x41, 0x5b, // 0x5c: popq %r11
305
0x41, 0x5a, // 0x5e: popq %r10
306
0x41, 0x59, // 0x60: popq %r9
307
0x41, 0x58, // 0x62: popq %r8
308
0x5f, // 0x64: popq %rdi
309
0x5e, // 0x65: popq %rsi
310
0x5a, // 0x66: popq %rdx
311
0x59, // 0x67: popq %rcx
312
0x5b, // 0x68: popq %rbx
313
0x58, // 0x69: popq %rax
314
0x5d, // 0x6a: popq %rbp
315
0xc3, // 0x6b: retq
316
};
317
318
const unsigned ReentryFnAddrOffset = 0x3a;
319
const unsigned ReentryCtxAddrOffset = 0x28;
320
321
memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
322
memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
323
sizeof(uint64_t));
324
memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
325
sizeof(uint64_t));
326
}
327
328
void OrcX86_64_Win32::writeResolverCode(char *ResolverWorkingMem,
329
ExecutorAddr ResolverTargetAddress,
330
ExecutorAddr ReentryFnAddr,
331
ExecutorAddr ReentryCtxAddr) {
332
333
// resolverCode is similar to OrcX86_64 with differences specific to windows
334
// x64 calling convention: arguments go into rcx, rdx and come in reverse
335
// order, shadow space allocation on stack
336
const uint8_t ResolverCode[] = {
337
// resolver_entry:
338
0x55, // 0x00: pushq %rbp
339
0x48, 0x89, 0xe5, // 0x01: movq %rsp, %rbp
340
0x50, // 0x04: pushq %rax
341
0x53, // 0x05: pushq %rbx
342
0x51, // 0x06: pushq %rcx
343
0x52, // 0x07: pushq %rdx
344
0x56, // 0x08: pushq %rsi
345
0x57, // 0x09: pushq %rdi
346
0x41, 0x50, // 0x0a: pushq %r8
347
0x41, 0x51, // 0x0c: pushq %r9
348
0x41, 0x52, // 0x0e: pushq %r10
349
0x41, 0x53, // 0x10: pushq %r11
350
0x41, 0x54, // 0x12: pushq %r12
351
0x41, 0x55, // 0x14: pushq %r13
352
0x41, 0x56, // 0x16: pushq %r14
353
0x41, 0x57, // 0x18: pushq %r15
354
0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq 0x208, %rsp
355
0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp)
356
357
0x48, 0xb9, // 0x26: movabsq <CBMgr>, %rcx
358
// 0x28: JIT re-entry ctx addr.
359
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
360
361
0x48, 0x8B, 0x55, 0x08, // 0x30: mov rdx, [rbp+0x8]
362
0x48, 0x83, 0xea, 0x06, // 0x34: sub rdx, 0x6
363
364
0x48, 0xb8, // 0x38: movabsq <REntry>, %rax
365
// 0x3a: JIT re-entry fn addr:
366
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
367
368
// 0x42: sub rsp, 0x20 (Allocate shadow space)
369
0x48, 0x83, 0xEC, 0x20,
370
0xff, 0xd0, // 0x46: callq *%rax
371
372
// 0x48: add rsp, 0x20 (Free shadow space)
373
0x48, 0x83, 0xC4, 0x20,
374
375
0x48, 0x89, 0x45, 0x08, // 0x4C: movq %rax, 8(%rbp)
376
0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x50: fxrstor64 (%rsp)
377
0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x55: addq 0x208, %rsp
378
0x41, 0x5f, // 0x5C: popq %r15
379
0x41, 0x5e, // 0x5E: popq %r14
380
0x41, 0x5d, // 0x60: popq %r13
381
0x41, 0x5c, // 0x62: popq %r12
382
0x41, 0x5b, // 0x64: popq %r11
383
0x41, 0x5a, // 0x66: popq %r10
384
0x41, 0x59, // 0x68: popq %r9
385
0x41, 0x58, // 0x6a: popq %r8
386
0x5f, // 0x6c: popq %rdi
387
0x5e, // 0x6d: popq %rsi
388
0x5a, // 0x6e: popq %rdx
389
0x59, // 0x6f: popq %rcx
390
0x5b, // 0x70: popq %rbx
391
0x58, // 0x71: popq %rax
392
0x5d, // 0x72: popq %rbp
393
0xc3, // 0x73: retq
394
};
395
396
const unsigned ReentryFnAddrOffset = 0x3a;
397
const unsigned ReentryCtxAddrOffset = 0x28;
398
399
memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
400
memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
401
sizeof(uint64_t));
402
memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
403
sizeof(uint64_t));
404
}
405
406
void OrcI386::writeResolverCode(char *ResolverWorkingMem,
407
ExecutorAddr ResolverTargetAddress,
408
ExecutorAddr ReentryFnAddr,
409
ExecutorAddr ReentryCtxAddr) {
410
411
assert((ReentryFnAddr.getValue() >> 32) == 0 && "ReentryFnAddr out of range");
412
assert((ReentryCtxAddr.getValue() >> 32) == 0 &&
413
"ReentryCtxAddr out of range");
414
415
const uint8_t ResolverCode[] = {
416
// resolver_entry:
417
0x55, // 0x00: pushl %ebp
418
0x89, 0xe5, // 0x01: movl %esp, %ebp
419
0x54, // 0x03: pushl %esp
420
0x83, 0xe4, 0xf0, // 0x04: andl $-0x10, %esp
421
0x50, // 0x07: pushl %eax
422
0x53, // 0x08: pushl %ebx
423
0x51, // 0x09: pushl %ecx
424
0x52, // 0x0a: pushl %edx
425
0x56, // 0x0b: pushl %esi
426
0x57, // 0x0c: pushl %edi
427
0x81, 0xec, 0x18, 0x02, 0x00, 0x00, // 0x0d: subl $0x218, %esp
428
0x0f, 0xae, 0x44, 0x24, 0x10, // 0x13: fxsave 0x10(%esp)
429
0x8b, 0x75, 0x04, // 0x18: movl 0x4(%ebp), %esi
430
0x83, 0xee, 0x05, // 0x1b: subl $0x5, %esi
431
0x89, 0x74, 0x24, 0x04, // 0x1e: movl %esi, 0x4(%esp)
432
0xc7, 0x04, 0x24, 0x00, 0x00, 0x00,
433
0x00, // 0x22: movl <cbmgr>, (%esp)
434
0xb8, 0x00, 0x00, 0x00, 0x00, // 0x29: movl <reentry>, %eax
435
0xff, 0xd0, // 0x2e: calll *%eax
436
0x89, 0x45, 0x04, // 0x30: movl %eax, 0x4(%ebp)
437
0x0f, 0xae, 0x4c, 0x24, 0x10, // 0x33: fxrstor 0x10(%esp)
438
0x81, 0xc4, 0x18, 0x02, 0x00, 0x00, // 0x38: addl $0x218, %esp
439
0x5f, // 0x3e: popl %edi
440
0x5e, // 0x3f: popl %esi
441
0x5a, // 0x40: popl %edx
442
0x59, // 0x41: popl %ecx
443
0x5b, // 0x42: popl %ebx
444
0x58, // 0x43: popl %eax
445
0x8b, 0x65, 0xfc, // 0x44: movl -0x4(%ebp), %esp
446
0x5d, // 0x48: popl %ebp
447
0xc3 // 0x49: retl
448
};
449
450
const unsigned ReentryFnAddrOffset = 0x2a;
451
const unsigned ReentryCtxAddrOffset = 0x25;
452
453
memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
454
memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
455
sizeof(uint32_t));
456
memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
457
sizeof(uint32_t));
458
}
459
460
void OrcI386::writeTrampolines(char *TrampolineWorkingMem,
461
ExecutorAddr TrampolineBlockTargetAddress,
462
ExecutorAddr ResolverAddr,
463
unsigned NumTrampolines) {
464
assert((ResolverAddr.getValue() >> 32) == 0 && "ResolverAddr out of range");
465
466
uint64_t CallRelImm = 0xF1C4C400000000e8;
467
uint64_t ResolverRel = ResolverAddr - TrampolineBlockTargetAddress - 5;
468
469
uint64_t *Trampolines = reinterpret_cast<uint64_t *>(TrampolineWorkingMem);
470
for (unsigned I = 0; I < NumTrampolines; ++I, ResolverRel -= TrampolineSize)
471
Trampolines[I] = CallRelImm | (ResolverRel << 8);
472
}
473
474
void OrcI386::writeIndirectStubsBlock(char *StubsBlockWorkingMem,
475
ExecutorAddr StubsBlockTargetAddress,
476
ExecutorAddr PointersBlockTargetAddress,
477
unsigned NumStubs) {
478
assert((StubsBlockTargetAddress.getValue() >> 32) == 0 &&
479
"StubsBlockTargetAddress is out of range");
480
assert((PointersBlockTargetAddress.getValue() >> 32) == 0 &&
481
"PointersBlockTargetAddress is out of range");
482
483
// Stub format is:
484
//
485
// .section __orc_stubs
486
// stub1:
487
// jmpq *ptr1
488
// .byte 0xC4 ; <- Invalid opcode padding.
489
// .byte 0xF1
490
// stub2:
491
// jmpq *ptr2
492
//
493
// ...
494
//
495
// .section __orc_ptrs
496
// ptr1:
497
// .quad 0x0
498
// ptr2:
499
// .quad 0x0
500
//
501
// ...
502
503
assert(stubAndPointerRangesOk<OrcI386>(
504
StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
505
"PointersBlock is out of range");
506
507
uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem);
508
uint64_t PtrAddr = PointersBlockTargetAddress.getValue();
509
for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 4)
510
Stub[I] = 0xF1C40000000025ff | (PtrAddr << 16);
511
}
512
513
void OrcMips32_Base::writeResolverCode(char *ResolverWorkingMem,
514
ExecutorAddr ResolverTargetAddress,
515
ExecutorAddr ReentryFnAddr,
516
ExecutorAddr ReentryCtxAddr,
517
bool isBigEndian) {
518
519
const uint32_t ResolverCode[] = {
520
// resolver_entry:
521
0x27bdff98, // 0x00: addiu $sp,$sp,-104
522
0xafa20000, // 0x04: sw $v0,0($sp)
523
0xafa30004, // 0x08: sw $v1,4($sp)
524
0xafa40008, // 0x0c: sw $a0,8($sp)
525
0xafa5000c, // 0x10: sw $a1,12($sp)
526
0xafa60010, // 0x14: sw $a2,16($sp)
527
0xafa70014, // 0x18: sw $a3,20($sp)
528
0xafb00018, // 0x1c: sw $s0,24($sp)
529
0xafb1001c, // 0x20: sw $s1,28($sp)
530
0xafb20020, // 0x24: sw $s2,32($sp)
531
0xafb30024, // 0x28: sw $s3,36($sp)
532
0xafb40028, // 0x2c: sw $s4,40($sp)
533
0xafb5002c, // 0x30: sw $s5,44($sp)
534
0xafb60030, // 0x34: sw $s6,48($sp)
535
0xafb70034, // 0x38: sw $s7,52($sp)
536
0xafa80038, // 0x3c: sw $t0,56($sp)
537
0xafa9003c, // 0x40: sw $t1,60($sp)
538
0xafaa0040, // 0x44: sw $t2,64($sp)
539
0xafab0044, // 0x48: sw $t3,68($sp)
540
0xafac0048, // 0x4c: sw $t4,72($sp)
541
0xafad004c, // 0x50: sw $t5,76($sp)
542
0xafae0050, // 0x54: sw $t6,80($sp)
543
0xafaf0054, // 0x58: sw $t7,84($sp)
544
0xafb80058, // 0x5c: sw $t8,88($sp)
545
0xafb9005c, // 0x60: sw $t9,92($sp)
546
0xafbe0060, // 0x64: sw $fp,96($sp)
547
0xafbf0064, // 0x68: sw $ra,100($sp)
548
549
// JIT re-entry ctx addr.
550
0x00000000, // 0x6c: lui $a0,ctx
551
0x00000000, // 0x70: addiu $a0,$a0,ctx
552
553
0x03e02825, // 0x74: move $a1, $ra
554
0x24a5ffec, // 0x78: addiu $a1,$a1,-20
555
556
// JIT re-entry fn addr:
557
0x00000000, // 0x7c: lui $t9,reentry
558
0x00000000, // 0x80: addiu $t9,$t9,reentry
559
560
0x0320f809, // 0x84: jalr $t9
561
0x00000000, // 0x88: nop
562
0x8fbf0064, // 0x8c: lw $ra,100($sp)
563
0x8fbe0060, // 0x90: lw $fp,96($sp)
564
0x8fb9005c, // 0x94: lw $t9,92($sp)
565
0x8fb80058, // 0x98: lw $t8,88($sp)
566
0x8faf0054, // 0x9c: lw $t7,84($sp)
567
0x8fae0050, // 0xa0: lw $t6,80($sp)
568
0x8fad004c, // 0xa4: lw $t5,76($sp)
569
0x8fac0048, // 0xa8: lw $t4,72($sp)
570
0x8fab0044, // 0xac: lw $t3,68($sp)
571
0x8faa0040, // 0xb0: lw $t2,64($sp)
572
0x8fa9003c, // 0xb4: lw $t1,60($sp)
573
0x8fa80038, // 0xb8: lw $t0,56($sp)
574
0x8fb70034, // 0xbc: lw $s7,52($sp)
575
0x8fb60030, // 0xc0: lw $s6,48($sp)
576
0x8fb5002c, // 0xc4: lw $s5,44($sp)
577
0x8fb40028, // 0xc8: lw $s4,40($sp)
578
0x8fb30024, // 0xcc: lw $s3,36($sp)
579
0x8fb20020, // 0xd0: lw $s2,32($sp)
580
0x8fb1001c, // 0xd4: lw $s1,28($sp)
581
0x8fb00018, // 0xd8: lw $s0,24($sp)
582
0x8fa70014, // 0xdc: lw $a3,20($sp)
583
0x8fa60010, // 0xe0: lw $a2,16($sp)
584
0x8fa5000c, // 0xe4: lw $a1,12($sp)
585
0x8fa40008, // 0xe8: lw $a0,8($sp)
586
0x27bd0068, // 0xec: addiu $sp,$sp,104
587
0x0300f825, // 0xf0: move $ra, $t8
588
0x03200008, // 0xf4: jr $t9
589
0x00000000, // 0xf8: move $t9, $v0/v1
590
};
591
592
const unsigned ReentryFnAddrOffset = 0x7c; // JIT re-entry fn addr lui
593
const unsigned ReentryCtxAddrOffset = 0x6c; // JIT re-entry context addr lui
594
const unsigned Offsett = 0xf8;
595
596
memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
597
598
// Depending on endian return value will be in v0 or v1.
599
uint32_t MoveVxT9 = isBigEndian ? 0x0060c825 : 0x0040c825;
600
memcpy(ResolverWorkingMem + Offsett, &MoveVxT9, sizeof(MoveVxT9));
601
602
uint32_t ReentryCtxLUi =
603
0x3c040000 | (((ReentryCtxAddr.getValue() + 0x8000) >> 16) & 0xFFFF);
604
uint32_t ReentryCtxADDiu = 0x24840000 | (ReentryCtxAddr.getValue() & 0xFFFF);
605
memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLUi,
606
sizeof(ReentryCtxLUi));
607
memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 4, &ReentryCtxADDiu,
608
sizeof(ReentryCtxADDiu));
609
610
uint32_t ReentryFnLUi =
611
0x3c190000 | (((ReentryFnAddr.getValue() + 0x8000) >> 16) & 0xFFFF);
612
uint32_t ReentryFnADDiu = 0x27390000 | (ReentryFnAddr.getValue() & 0xFFFF);
613
memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLUi,
614
sizeof(ReentryFnLUi));
615
memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 4, &ReentryFnADDiu,
616
sizeof(ReentryFnADDiu));
617
}
618
619
void OrcMips32_Base::writeTrampolines(char *TrampolineBlockWorkingMem,
620
ExecutorAddr TrampolineBlockTargetAddress,
621
ExecutorAddr ResolverAddr,
622
unsigned NumTrampolines) {
623
624
assert((ResolverAddr.getValue() >> 32) == 0 && "ResolverAddr out of range");
625
626
uint32_t *Trampolines =
627
reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
628
uint32_t RHiAddr = ((ResolverAddr.getValue() + 0x8000) >> 16);
629
630
for (unsigned I = 0; I < NumTrampolines; ++I) {
631
// move $t8,$ra
632
// lui $t9,ResolverAddr
633
// addiu $t9,$t9,ResolverAddr
634
// jalr $t9
635
// nop
636
Trampolines[5 * I + 0] = 0x03e0c025;
637
Trampolines[5 * I + 1] = 0x3c190000 | (RHiAddr & 0xFFFF);
638
Trampolines[5 * I + 2] = 0x27390000 | (ResolverAddr.getValue() & 0xFFFF);
639
Trampolines[5 * I + 3] = 0x0320f809;
640
Trampolines[5 * I + 4] = 0x00000000;
641
}
642
}
643
644
void OrcMips32_Base::writeIndirectStubsBlock(
645
char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
646
ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
647
assert((StubsBlockTargetAddress.getValue() >> 32) == 0 &&
648
"InitialPtrVal is out of range");
649
650
// Stub format is:
651
//
652
// .section __orc_stubs
653
// stub1:
654
// lui $t9, ptr1
655
// lw $t9, %lo(ptr1)($t9)
656
// jr $t9
657
// stub2:
658
// lui $t9, ptr2
659
// lw $t9,%lo(ptr1)($t9)
660
// jr $t9
661
//
662
// ...
663
//
664
// .section __orc_ptrs
665
// ptr1:
666
// .word 0x0
667
// ptr2:
668
// .word 0x0
669
//
670
// i..
671
672
assert(stubAndPointerRangesOk<OrcMips32_Base>(
673
StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
674
"PointersBlock is out of range");
675
676
// Populate the stubs page stubs and mark it executable.
677
uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
678
uint64_t PtrAddr = PointersBlockTargetAddress.getValue();
679
680
for (unsigned I = 0; I < NumStubs; ++I) {
681
uint32_t HiAddr = ((PtrAddr + 0x8000) >> 16);
682
Stub[4 * I + 0] = 0x3c190000 | (HiAddr & 0xFFFF); // lui $t9,ptr1
683
Stub[4 * I + 1] = 0x8f390000 | (PtrAddr & 0xFFFF); // lw $t9,%lo(ptr1)($t9)
684
Stub[4 * I + 2] = 0x03200008; // jr $t9
685
Stub[4 * I + 3] = 0x00000000; // nop
686
PtrAddr += 4;
687
}
688
}
689
690
void OrcMips64::writeResolverCode(char *ResolverWorkingMem,
691
ExecutorAddr ResolverTargetAddress,
692
ExecutorAddr ReentryFnAddr,
693
ExecutorAddr ReentryCtxAddr) {
694
695
const uint32_t ResolverCode[] = {
696
//resolver_entry:
697
0x67bdff30, // 0x00: daddiu $sp,$sp,-208
698
0xffa20000, // 0x04: sd v0,0(sp)
699
0xffa30008, // 0x08: sd v1,8(sp)
700
0xffa40010, // 0x0c: sd a0,16(sp)
701
0xffa50018, // 0x10: sd a1,24(sp)
702
0xffa60020, // 0x14: sd a2,32(sp)
703
0xffa70028, // 0x18: sd a3,40(sp)
704
0xffa80030, // 0x1c: sd a4,48(sp)
705
0xffa90038, // 0x20: sd a5,56(sp)
706
0xffaa0040, // 0x24: sd a6,64(sp)
707
0xffab0048, // 0x28: sd a7,72(sp)
708
0xffac0050, // 0x2c: sd t0,80(sp)
709
0xffad0058, // 0x30: sd t1,88(sp)
710
0xffae0060, // 0x34: sd t2,96(sp)
711
0xffaf0068, // 0x38: sd t3,104(sp)
712
0xffb00070, // 0x3c: sd s0,112(sp)
713
0xffb10078, // 0x40: sd s1,120(sp)
714
0xffb20080, // 0x44: sd s2,128(sp)
715
0xffb30088, // 0x48: sd s3,136(sp)
716
0xffb40090, // 0x4c: sd s4,144(sp)
717
0xffb50098, // 0x50: sd s5,152(sp)
718
0xffb600a0, // 0x54: sd s6,160(sp)
719
0xffb700a8, // 0x58: sd s7,168(sp)
720
0xffb800b0, // 0x5c: sd t8,176(sp)
721
0xffb900b8, // 0x60: sd t9,184(sp)
722
0xffbe00c0, // 0x64: sd fp,192(sp)
723
0xffbf00c8, // 0x68: sd ra,200(sp)
724
725
// JIT re-entry ctx addr.
726
0x00000000, // 0x6c: lui $a0,heighest(ctx)
727
0x00000000, // 0x70: daddiu $a0,$a0,heigher(ctx)
728
0x00000000, // 0x74: dsll $a0,$a0,16
729
0x00000000, // 0x78: daddiu $a0,$a0,hi(ctx)
730
0x00000000, // 0x7c: dsll $a0,$a0,16
731
0x00000000, // 0x80: daddiu $a0,$a0,lo(ctx)
732
733
0x03e02825, // 0x84: move $a1, $ra
734
0x64a5ffdc, // 0x88: daddiu $a1,$a1,-36
735
736
// JIT re-entry fn addr:
737
0x00000000, // 0x8c: lui $t9,reentry
738
0x00000000, // 0x90: daddiu $t9,$t9,reentry
739
0x00000000, // 0x94: dsll $t9,$t9,
740
0x00000000, // 0x98: daddiu $t9,$t9,
741
0x00000000, // 0x9c: dsll $t9,$t9,
742
0x00000000, // 0xa0: daddiu $t9,$t9,
743
0x0320f809, // 0xa4: jalr $t9
744
0x00000000, // 0xa8: nop
745
0xdfbf00c8, // 0xac: ld ra, 200(sp)
746
0xdfbe00c0, // 0xb0: ld fp, 192(sp)
747
0xdfb900b8, // 0xb4: ld t9, 184(sp)
748
0xdfb800b0, // 0xb8: ld t8, 176(sp)
749
0xdfb700a8, // 0xbc: ld s7, 168(sp)
750
0xdfb600a0, // 0xc0: ld s6, 160(sp)
751
0xdfb50098, // 0xc4: ld s5, 152(sp)
752
0xdfb40090, // 0xc8: ld s4, 144(sp)
753
0xdfb30088, // 0xcc: ld s3, 136(sp)
754
0xdfb20080, // 0xd0: ld s2, 128(sp)
755
0xdfb10078, // 0xd4: ld s1, 120(sp)
756
0xdfb00070, // 0xd8: ld s0, 112(sp)
757
0xdfaf0068, // 0xdc: ld t3, 104(sp)
758
0xdfae0060, // 0xe0: ld t2, 96(sp)
759
0xdfad0058, // 0xe4: ld t1, 88(sp)
760
0xdfac0050, // 0xe8: ld t0, 80(sp)
761
0xdfab0048, // 0xec: ld a7, 72(sp)
762
0xdfaa0040, // 0xf0: ld a6, 64(sp)
763
0xdfa90038, // 0xf4: ld a5, 56(sp)
764
0xdfa80030, // 0xf8: ld a4, 48(sp)
765
0xdfa70028, // 0xfc: ld a3, 40(sp)
766
0xdfa60020, // 0x100: ld a2, 32(sp)
767
0xdfa50018, // 0x104: ld a1, 24(sp)
768
0xdfa40010, // 0x108: ld a0, 16(sp)
769
0xdfa30008, // 0x10c: ld v1, 8(sp)
770
0x67bd00d0, // 0x110: daddiu $sp,$sp,208
771
0x0300f825, // 0x114: move $ra, $t8
772
0x03200008, // 0x118: jr $t9
773
0x0040c825, // 0x11c: move $t9, $v0
774
};
775
776
const unsigned ReentryFnAddrOffset = 0x8c; // JIT re-entry fn addr lui
777
const unsigned ReentryCtxAddrOffset = 0x6c; // JIT re-entry ctx addr lui
778
779
memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
780
781
uint32_t ReentryCtxLUi =
782
0x3c040000 |
783
(((ReentryCtxAddr.getValue() + 0x800080008000) >> 48) & 0xFFFF);
784
uint32_t ReentryCtxDADDiu =
785
0x64840000 | (((ReentryCtxAddr.getValue() + 0x80008000) >> 32) & 0xFFFF);
786
uint32_t ReentryCtxDSLL = 0x00042438;
787
uint32_t ReentryCtxDADDiu2 =
788
0x64840000 | ((((ReentryCtxAddr.getValue() + 0x8000) >> 16) & 0xFFFF));
789
uint32_t ReentryCtxDSLL2 = 0x00042438;
790
uint32_t ReentryCtxDADDiu3 =
791
0x64840000 | (ReentryCtxAddr.getValue() & 0xFFFF);
792
793
memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLUi,
794
sizeof(ReentryCtxLUi));
795
memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 4), &ReentryCtxDADDiu,
796
sizeof(ReentryCtxDADDiu));
797
memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 8), &ReentryCtxDSLL,
798
sizeof(ReentryCtxDSLL));
799
memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 12), &ReentryCtxDADDiu2,
800
sizeof(ReentryCtxDADDiu2));
801
memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 16), &ReentryCtxDSLL2,
802
sizeof(ReentryCtxDSLL2));
803
memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 20), &ReentryCtxDADDiu3,
804
sizeof(ReentryCtxDADDiu3));
805
806
uint32_t ReentryFnLUi =
807
0x3c190000 |
808
(((ReentryFnAddr.getValue() + 0x800080008000) >> 48) & 0xFFFF);
809
810
uint32_t ReentryFnDADDiu =
811
0x67390000 | (((ReentryFnAddr.getValue() + 0x80008000) >> 32) & 0xFFFF);
812
813
uint32_t ReentryFnDSLL = 0x0019cc38;
814
815
uint32_t ReentryFnDADDiu2 =
816
0x67390000 | (((ReentryFnAddr.getValue() + 0x8000) >> 16) & 0xFFFF);
817
818
uint32_t ReentryFnDSLL2 = 0x0019cc38;
819
820
uint32_t ReentryFnDADDiu3 = 0x67390000 | (ReentryFnAddr.getValue() & 0xFFFF);
821
822
memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLUi,
823
sizeof(ReentryFnLUi));
824
memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 4), &ReentryFnDADDiu,
825
sizeof(ReentryFnDADDiu));
826
memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 8), &ReentryFnDSLL,
827
sizeof(ReentryFnDSLL));
828
memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 12), &ReentryFnDADDiu2,
829
sizeof(ReentryFnDADDiu2));
830
memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 16), &ReentryFnDSLL2,
831
sizeof(ReentryFnDSLL2));
832
memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 20), &ReentryFnDADDiu3,
833
sizeof(ReentryFnDADDiu3));
834
}
835
836
void OrcMips64::writeTrampolines(char *TrampolineBlockWorkingMem,
837
ExecutorAddr TrampolineBlockTargetAddress,
838
ExecutorAddr ResolverAddr,
839
unsigned NumTrampolines) {
840
841
uint32_t *Trampolines =
842
reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
843
844
uint64_t HeighestAddr = ((ResolverAddr.getValue() + 0x800080008000) >> 48);
845
uint64_t HeigherAddr = ((ResolverAddr.getValue() + 0x80008000) >> 32);
846
uint64_t HiAddr = ((ResolverAddr.getValue() + 0x8000) >> 16);
847
848
for (unsigned I = 0; I < NumTrampolines; ++I) {
849
Trampolines[10 * I + 0] = 0x03e0c025; // move $t8,$ra
850
Trampolines[10 * I + 1] = 0x3c190000 | (HeighestAddr & 0xFFFF); // lui $t9,resolveAddr
851
Trampolines[10 * I + 2] = 0x67390000 | (HeigherAddr & 0xFFFF); // daddiu $t9,$t9,%higher(resolveAddr)
852
Trampolines[10 * I + 3] = 0x0019cc38; // dsll $t9,$t9,16
853
Trampolines[10 * I + 4] = 0x67390000 | (HiAddr & 0xFFFF); // daddiu $t9,$t9,%hi(ptr)
854
Trampolines[10 * I + 5] = 0x0019cc38; // dsll $t9,$t9,16
855
Trampolines[10 * I + 6] = 0x67390000 | (ResolverAddr.getValue() &
856
0xFFFF); // daddiu $t9,$t9,%lo(ptr)
857
Trampolines[10 * I + 7] = 0x0320f809; // jalr $t9
858
Trampolines[10 * I + 8] = 0x00000000; // nop
859
Trampolines[10 * I + 9] = 0x00000000; // nop
860
}
861
}
862
863
void OrcMips64::writeIndirectStubsBlock(char *StubsBlockWorkingMem,
864
ExecutorAddr StubsBlockTargetAddress,
865
ExecutorAddr PointersBlockTargetAddress,
866
unsigned NumStubs) {
867
// Stub format is:
868
//
869
// .section __orc_stubs
870
// stub1:
871
// lui $t9,ptr1
872
// dsll $t9,$t9,16
873
// daddiu $t9,$t9,%hi(ptr)
874
// dsll $t9,$t9,16
875
// ld $t9,%lo(ptr)
876
// jr $t9
877
// stub2:
878
// lui $t9,ptr1
879
// dsll $t9,$t9,16
880
// daddiu $t9,$t9,%hi(ptr)
881
// dsll $t9,$t9,16
882
// ld $t9,%lo(ptr)
883
// jr $t9
884
//
885
// ...
886
//
887
// .section __orc_ptrs
888
// ptr1:
889
// .dword 0x0
890
// ptr2:
891
// .dword 0x0
892
//
893
// ...
894
895
assert(stubAndPointerRangesOk<OrcMips64>(
896
StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
897
"PointersBlock is out of range");
898
899
// Populate the stubs page stubs and mark it executable.
900
uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
901
uint64_t PtrAddr = PointersBlockTargetAddress.getValue();
902
903
for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) {
904
uint64_t HeighestAddr = ((PtrAddr + 0x800080008000) >> 48);
905
uint64_t HeigherAddr = ((PtrAddr + 0x80008000) >> 32);
906
uint64_t HiAddr = ((PtrAddr + 0x8000) >> 16);
907
Stub[8 * I + 0] = 0x3c190000 | (HeighestAddr & 0xFFFF); // lui $t9,ptr1
908
Stub[8 * I + 1] = 0x67390000 | (HeigherAddr & 0xFFFF); // daddiu $t9,$t9,%higher(ptr)
909
Stub[8 * I + 2] = 0x0019cc38; // dsll $t9,$t9,16
910
Stub[8 * I + 3] = 0x67390000 | (HiAddr & 0xFFFF); // daddiu $t9,$t9,%hi(ptr)
911
Stub[8 * I + 4] = 0x0019cc38; // dsll $t9,$t9,16
912
Stub[8 * I + 5] = 0xdf390000 | (PtrAddr & 0xFFFF); // ld $t9,%lo(ptr)
913
Stub[8 * I + 6] = 0x03200008; // jr $t9
914
Stub[8 * I + 7] = 0x00000000; // nop
915
}
916
}
917
918
void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem,
919
ExecutorAddr ResolverTargetAddress,
920
ExecutorAddr ReentryFnAddr,
921
ExecutorAddr ReentryCtxAddr) {
922
923
const uint32_t ResolverCode[] = {
924
0xef810113, // 0x00: addi sp,sp,-264
925
0x00813023, // 0x04: sd s0,0(sp)
926
0x00913423, // 0x08: sd s1,8(sp)
927
0x01213823, // 0x0c: sd s2,16(sp)
928
0x01313c23, // 0x10: sd s3,24(sp)
929
0x03413023, // 0x14: sd s4,32(sp)
930
0x03513423, // 0x18: sd s5,40(sp)
931
0x03613823, // 0x1c: sd s6,48(sp)
932
0x03713c23, // 0x20: sd s7,56(sp)
933
0x05813023, // 0x24: sd s8,64(sp)
934
0x05913423, // 0x28: sd s9,72(sp)
935
0x05a13823, // 0x2c: sd s10,80(sp)
936
0x05b13c23, // 0x30: sd s11,88(sp)
937
0x06113023, // 0x34: sd ra,96(sp)
938
0x06a13423, // 0x38: sd a0,104(sp)
939
0x06b13823, // 0x3c: sd a1,112(sp)
940
0x06c13c23, // 0x40: sd a2,120(sp)
941
0x08d13023, // 0x44: sd a3,128(sp)
942
0x08e13423, // 0x48: sd a4,136(sp)
943
0x08f13823, // 0x4c: sd a5,144(sp)
944
0x09013c23, // 0x50: sd a6,152(sp)
945
0x0b113023, // 0x54: sd a7,160(sp)
946
0x0a813427, // 0x58: fsd fs0,168(sp)
947
0x0a913827, // 0x5c: fsd fs1,176(sp)
948
0x0b213c27, // 0x60: fsd fs2,184(sp)
949
0x0d313027, // 0x64: fsd fs3,192(sp)
950
0x0d413427, // 0x68: fsd fs4,200(sp)
951
0x0d513827, // 0x6c: fsd fs5,208(sp)
952
0x0d613c27, // 0x70: fsd fs6,216(sp)
953
0x0f713027, // 0x74: fsd fs7,224(sp)
954
0x0f813427, // 0x78: fsd fs8,232(sp)
955
0x0f913827, // 0x7c: fsd fs9,240(sp)
956
0x0fa13c27, // 0x80: fsd fs10,248(sp)
957
0x11b13027, // 0x84: fsd fs11,256(sp)
958
0x00000517, // 0x88: auipc a0,0x0
959
0x0b053503, // 0x8c: ld a0,176(a0) # 0x138
960
0x00030593, // 0x90: mv a1,t1
961
0xff458593, // 0x94: addi a1,a1,-12
962
0x00000617, // 0x98: auipc a2,0x0
963
0x0a863603, // 0x9c: ld a2,168(a2) # 0x140
964
0x000600e7, // 0xa0: jalr a2
965
0x00050293, // 0xa4: mv t0,a0
966
0x00013403, // 0xa8: ld s0,0(sp)
967
0x00813483, // 0xac: ld s1,8(sp)
968
0x01013903, // 0xb0: ld s2,16(sp)
969
0x01813983, // 0xb4: ld s3,24(sp)
970
0x02013a03, // 0xb8: ld s4,32(sp)
971
0x02813a83, // 0xbc: ld s5,40(sp)
972
0x03013b03, // 0xc0: ld s6,48(sp)
973
0x03813b83, // 0xc4: ld s7,56(sp)
974
0x04013c03, // 0xc8: ld s8,64(sp)
975
0x04813c83, // 0xcc: ld s9,72(sp)
976
0x05013d03, // 0xd0: ld s10,80(sp)
977
0x05813d83, // 0xd4: ld s11,88(sp)
978
0x06013083, // 0xd8: ld ra,96(sp)
979
0x06813503, // 0xdc: ld a0,104(sp)
980
0x07013583, // 0xe0: ld a1,112(sp)
981
0x07813603, // 0xe4: ld a2,120(sp)
982
0x08013683, // 0xe8: ld a3,128(sp)
983
0x08813703, // 0xec: ld a4,136(sp)
984
0x09013783, // 0xf0: ld a5,144(sp)
985
0x09813803, // 0xf4: ld a6,152(sp)
986
0x0a013883, // 0xf8: ld a7,160(sp)
987
0x0a813407, // 0xfc: fld fs0,168(sp)
988
0x0b013487, // 0x100: fld fs1,176(sp)
989
0x0b813907, // 0x104: fld fs2,184(sp)
990
0x0c013987, // 0x108: fld fs3,192(sp)
991
0x0c813a07, // 0x10c: fld fs4,200(sp)
992
0x0d013a87, // 0x110: fld fs5,208(sp)
993
0x0d813b07, // 0x114: fld fs6,216(sp)
994
0x0e013b87, // 0x118: fld fs7,224(sp)
995
0x0e813c07, // 0x11c: fld fs8,232(sp)
996
0x0f013c87, // 0x120: fld fs9,240(sp)
997
0x0f813d07, // 0x124: fld fs10,248(sp)
998
0x10013d87, // 0x128: fld fs11,256(sp)
999
0x10810113, // 0x12c: addi sp,sp,264
1000
0x00028067, // 0x130: jr t0
1001
0x12345678, // 0x134: padding to align at 8 byte
1002
0x12345678, // 0x138: Lreentry_ctx_ptr:
1003
0xdeadbeef, // 0x13c: .quad 0
1004
0x98765432, // 0x140: Lreentry_fn_ptr:
1005
0xcafef00d // 0x144: .quad 0
1006
};
1007
1008
const unsigned ReentryCtxAddrOffset = 0x138;
1009
const unsigned ReentryFnAddrOffset = 0x140;
1010
1011
memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
1012
memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
1013
sizeof(uint64_t));
1014
memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
1015
sizeof(uint64_t));
1016
}
1017
1018
void OrcRiscv64::writeTrampolines(char *TrampolineBlockWorkingMem,
1019
ExecutorAddr TrampolineBlockTargetAddress,
1020
ExecutorAddr ResolverAddr,
1021
unsigned NumTrampolines) {
1022
1023
unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
1024
1025
memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
1026
sizeof(uint64_t));
1027
1028
uint32_t *Trampolines =
1029
reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
1030
for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
1031
uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xFFFFF000;
1032
uint32_t Lo12 = OffsetToPtr - Hi20;
1033
Trampolines[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr)
1034
Trampolines[4 * I + 1] =
1035
0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr)
1036
Trampolines[4 * I + 2] = 0x00028367; // jalr t1, t0
1037
Trampolines[4 * I + 3] = 0xdeadface; // padding
1038
}
1039
}
1040
1041
void OrcRiscv64::writeIndirectStubsBlock(
1042
char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
1043
ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
1044
// Stub format is:
1045
//
1046
// .section __orc_stubs
1047
// stub1:
1048
// auipc t0, %hi(ptr1) ; PC-rel load of ptr1
1049
// ld t0, %lo(t0)
1050
// jr t0 ; Jump to resolver
1051
// .quad 0 ; Pad to 16 bytes
1052
// stub2:
1053
// auipc t0, %hi(ptr1) ; PC-rel load of ptr1
1054
// ld t0, %lo(t0)
1055
// jr t0 ; Jump to resolver
1056
// .quad 0
1057
//
1058
// ...
1059
//
1060
// .section __orc_ptrs
1061
// ptr1:
1062
// .quad 0x0
1063
// ptr2:
1064
// .quad 0x0
1065
//
1066
// ...
1067
1068
assert(stubAndPointerRangesOk<OrcRiscv64>(
1069
StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
1070
"PointersBlock is out of range");
1071
1072
uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
1073
1074
for (unsigned I = 0; I < NumStubs; ++I) {
1075
uint64_t PtrDisplacement =
1076
PointersBlockTargetAddress - StubsBlockTargetAddress;
1077
uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xFFFFF000;
1078
uint32_t Lo12 = PtrDisplacement - Hi20;
1079
Stub[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr)
1080
Stub[4 * I + 1] = 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr)
1081
Stub[4 * I + 2] = 0x00028067; // jr t0
1082
Stub[4 * I + 3] = 0xfeedbeef; // padding
1083
PointersBlockTargetAddress += PointerSize;
1084
StubsBlockTargetAddress += StubSize;
1085
}
1086
}
1087
1088
void OrcLoongArch64::writeResolverCode(char *ResolverWorkingMem,
1089
ExecutorAddr ResolverTargetAddress,
1090
ExecutorAddr ReentryFnAddr,
1091
ExecutorAddr ReentryCtxAddr) {
1092
1093
LLVM_DEBUG({
1094
dbgs() << "Writing resolver code to "
1095
<< formatv("{0:x16}", ResolverTargetAddress) << "\n";
1096
});
1097
1098
const uint32_t ResolverCode[] = {
1099
0x02fde063, // 0x0: addi.d $sp, $sp, -136(0xf78)
1100
0x29c00061, // 0x4: st.d $ra, $sp, 0
1101
0x29c02064, // 0x8: st.d $a0, $sp, 8(0x8)
1102
0x29c04065, // 0xc: st.d $a1, $sp, 16(0x10)
1103
0x29c06066, // 0x10: st.d $a2, $sp, 24(0x18)
1104
0x29c08067, // 0x14: st.d $a3, $sp, 32(0x20)
1105
0x29c0a068, // 0x18: st.d $a4, $sp, 40(0x28)
1106
0x29c0c069, // 0x1c: st.d $a5, $sp, 48(0x30)
1107
0x29c0e06a, // 0x20: st.d $a6, $sp, 56(0x38)
1108
0x29c1006b, // 0x24: st.d $a7, $sp, 64(0x40)
1109
0x2bc12060, // 0x28: fst.d $fa0, $sp, 72(0x48)
1110
0x2bc14061, // 0x2c: fst.d $fa1, $sp, 80(0x50)
1111
0x2bc16062, // 0x30: fst.d $fa2, $sp, 88(0x58)
1112
0x2bc18063, // 0x34: fst.d $fa3, $sp, 96(0x60)
1113
0x2bc1a064, // 0x38: fst.d $fa4, $sp, 104(0x68)
1114
0x2bc1c065, // 0x3c: fst.d $fa5, $sp, 112(0x70)
1115
0x2bc1e066, // 0x40: fst.d $fa6, $sp, 120(0x78)
1116
0x2bc20067, // 0x44: fst.d $fa7, $sp, 128(0x80)
1117
0x1c000004, // 0x48: pcaddu12i $a0, 0
1118
0x28c1c084, // 0x4c: ld.d $a0, $a0, 112(0x70)
1119
0x001501a5, // 0x50: move $a1, $t1
1120
0x02ffd0a5, // 0x54: addi.d $a1, $a1, -12(0xff4)
1121
0x1c000006, // 0x58: pcaddu12i $a2, 0
1122
0x28c1a0c6, // 0x5c: ld.d $a2, $a2, 104(0x68)
1123
0x4c0000c1, // 0x60: jirl $ra, $a2, 0
1124
0x0015008c, // 0x64: move $t0, $a0
1125
0x2b820067, // 0x68: fld.d $fa7, $sp, 128(0x80)
1126
0x2b81e066, // 0x6c: fld.d $fa6, $sp, 120(0x78)
1127
0x2b81c065, // 0x70: fld.d $fa5, $sp, 112(0x70)
1128
0x2b81a064, // 0x74: fld.d $fa4, $sp, 104(0x68)
1129
0x2b818063, // 0x78: fld.d $fa3, $sp, 96(0x60)
1130
0x2b816062, // 0x7c: fld.d $fa2, $sp, 88(0x58)
1131
0x2b814061, // 0x80: fld.d $fa1, $sp, 80(0x50)
1132
0x2b812060, // 0x84: fld.d $fa0, $sp, 72(0x48)
1133
0x28c1006b, // 0x88: ld.d $a7, $sp, 64(0x40)
1134
0x28c0e06a, // 0x8c: ld.d $a6, $sp, 56(0x38)
1135
0x28c0c069, // 0x90: ld.d $a5, $sp, 48(0x30)
1136
0x28c0a068, // 0x94: ld.d $a4, $sp, 40(0x28)
1137
0x28c08067, // 0x98: ld.d $a3, $sp, 32(0x20)
1138
0x28c06066, // 0x9c: ld.d $a2, $sp, 24(0x18)
1139
0x28c04065, // 0xa0: ld.d $a1, $sp, 16(0x10)
1140
0x28c02064, // 0xa4: ld.d $a0, $sp, 8(0x8)
1141
0x28c00061, // 0xa8: ld.d $ra, $sp, 0
1142
0x02c22063, // 0xac: addi.d $sp, $sp, 136(0x88)
1143
0x4c000180, // 0xb0: jr $t0
1144
0x00000000, // 0xb4: padding to align at 8 bytes
1145
0x01234567, // 0xb8: Lreentry_ctx_ptr:
1146
0xdeedbeef, // 0xbc: .dword 0
1147
0x98765432, // 0xc0: Lreentry_fn_ptr:
1148
0xcafef00d, // 0xc4: .dword 0
1149
};
1150
1151
const unsigned ReentryCtxAddrOffset = 0xb8;
1152
const unsigned ReentryFnAddrOffset = 0xc0;
1153
1154
memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
1155
memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
1156
sizeof(uint64_t));
1157
memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
1158
sizeof(uint64_t));
1159
}
1160
1161
void OrcLoongArch64::writeTrampolines(char *TrampolineBlockWorkingMem,
1162
ExecutorAddr TrampolineBlockTargetAddress,
1163
ExecutorAddr ResolverAddr,
1164
unsigned NumTrampolines) {
1165
1166
LLVM_DEBUG({
1167
dbgs() << "Writing trampoline code to "
1168
<< formatv("{0:x16}", TrampolineBlockTargetAddress) << "\n";
1169
});
1170
1171
unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
1172
1173
memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
1174
sizeof(uint64_t));
1175
1176
uint32_t *Trampolines =
1177
reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
1178
for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
1179
uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xfffff000;
1180
uint32_t Lo12 = OffsetToPtr - Hi20;
1181
Trampolines[4 * I + 0] =
1182
0x1c00000c |
1183
(((Hi20 >> 12) & 0xfffff) << 5); // pcaddu12i $t0, %pc_hi20(Lptr)
1184
Trampolines[4 * I + 1] =
1185
0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr)
1186
Trampolines[4 * I + 2] = 0x4c00018d; // jirl $t1, $t0, 0
1187
Trampolines[4 * I + 3] = 0x0; // padding
1188
}
1189
}
1190
1191
void OrcLoongArch64::writeIndirectStubsBlock(
1192
char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
1193
ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
1194
// Stub format is:
1195
//
1196
// .section __orc_stubs
1197
// stub1:
1198
// pcaddu12i $t0, %pc_hi20(ptr1) ; PC-rel load of ptr1
1199
// ld.d $t0, $t0, %pc_lo12(ptr1)
1200
// jr $t0 ; Jump to resolver
1201
// .dword 0 ; Pad to 16 bytes
1202
// stub2:
1203
// pcaddu12i $t0, %pc_hi20(ptr2) ; PC-rel load of ptr2
1204
// ld.d $t0, $t0, %pc_lo12(ptr2)
1205
// jr $t0 ; Jump to resolver
1206
// .dword 0 ; Pad to 16 bytes
1207
// ...
1208
//
1209
// .section __orc_ptrs
1210
// ptr1:
1211
// .dword 0x0
1212
// ptr2:
1213
// .dword 0x0
1214
// ...
1215
LLVM_DEBUG({
1216
dbgs() << "Writing stubs code to "
1217
<< formatv("{0:x16}", StubsBlockTargetAddress) << "\n";
1218
});
1219
assert(stubAndPointerRangesOk<OrcLoongArch64>(
1220
StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
1221
"PointersBlock is out of range");
1222
1223
uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
1224
1225
for (unsigned I = 0; I < NumStubs; ++I) {
1226
uint64_t PtrDisplacement =
1227
PointersBlockTargetAddress - StubsBlockTargetAddress;
1228
uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xfffff000;
1229
uint32_t Lo12 = PtrDisplacement - Hi20;
1230
Stub[4 * I + 0] = 0x1c00000c | (((Hi20 >> 12) & 0xfffff)
1231
<< 5); // pcaddu12i $t0, %pc_hi20(Lptr)
1232
Stub[4 * I + 1] =
1233
0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr)
1234
Stub[4 * I + 2] = 0x4c000180; // jr $t0
1235
Stub[4 * I + 3] = 0x0; // padding
1236
PointersBlockTargetAddress += PointerSize;
1237
StubsBlockTargetAddress += StubSize;
1238
}
1239
}
1240
1241
} // End namespace orc.
1242
} // End namespace llvm.
1243
1244