Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/x/codegen/AllocPrefetchSnippet.cpp
6004 views
1
/*******************************************************************************
2
* Copyright (c) 2000, 2021 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
23
#include "x/codegen/AllocPrefetchSnippet.hpp"
24
25
#include "codegen/Relocation.hpp"
26
#include "env/CompilerEnv.hpp"
27
#include "env/jittypes.h"
28
#include "il/Node.hpp"
29
#include "il/Node_inlines.hpp"
30
#include "runtime/CodeRuntime.hpp"
31
#include "runtime/J9CodeCache.hpp"
32
#include "env/VMJ9.h"
33
34
uint8_t *TR::X86AllocPrefetchSnippet::emitSnippetBody()
35
{
36
TR::Compilation *comp = cg()->comp();
37
if (comp->getOptions()->realTimeGC())
38
return 0;
39
40
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
41
42
uint8_t *buffer = cg()->getBinaryBufferCursor();
43
getSnippetLabel()->setCodeLocation(buffer);
44
45
TR::SymbolReference *helperSymRef = NULL;
46
47
bool useSharedCodeCacheSnippet = fej9->supportsCodeCacheSnippets();
48
49
bool prefetchThunkGenerated = (fej9->getAllocationPrefetchCodeSnippetAddress(comp) != 0);
50
#ifdef J9VM_GC_NON_ZERO_TLH
51
if (isNonZeroTLH())
52
{
53
prefetchThunkGenerated = (fej9->getAllocationNoZeroPrefetchCodeSnippetAddress(comp) != 0);
54
}
55
#endif
56
57
TR_ASSERT(prefetchThunkGenerated, "Invalid prefetch snippet.");
58
59
// CALL [32-bit relative]
60
//
61
*buffer++ = 0xe8;
62
63
int32_t disp32;
64
uintptr_t helperAddress = 0;
65
66
if (useSharedCodeCacheSnippet)
67
{
68
#ifdef J9VM_GC_NON_ZERO_TLH
69
if(!isNonZeroTLH())
70
{
71
helperAddress = (uintptr_t)(fej9->getAllocationPrefetchCodeSnippetAddress(comp));
72
}
73
else
74
{
75
helperAddress = (uintptr_t)(fej9->getAllocationNoZeroPrefetchCodeSnippetAddress(comp));
76
}
77
#else
78
helperAddress = (uintptr_t)(fej9->getAllocationPrefetchCodeSnippetAddress(comp));
79
#endif
80
}
81
82
if (helperAddress && IS_32BIT_RIP(helperAddress, (buffer + 4) ) )
83
{
84
disp32 = (int32_t)(helperAddress - (uintptr_t)(buffer+4));
85
}
86
else
87
{
88
TR_RuntimeHelper helper = (comp->getOption(TR_EnableNewX86PrefetchTLH)) ? TR_X86newPrefetchTLH : TR_X86prefetchTLH;
89
helperSymRef = cg()->symRefTab()->findOrCreateRuntimeHelper(helper);
90
disp32 = cg()->branchDisplacementToHelperOrTrampoline(buffer+4, helperSymRef);
91
if (fej9->needRelocationsForHelpers())
92
{
93
cg()->addExternalRelocation(new (cg()->trHeapMemory()) TR::ExternalRelocation(buffer,
94
(uint8_t *)helperSymRef,
95
TR_HelperAddress,
96
cg()),
97
__FILE__, __LINE__, getNode());
98
}
99
}
100
101
*(int32_t *)buffer = disp32;
102
buffer += 4;
103
104
return genRestartJump(buffer);
105
}
106
107
uint32_t TR::X86AllocPrefetchSnippet::getLength(int32_t estimatedSnippetStart)
108
{
109
return 10 + estimateRestartJumpLength(estimatedSnippetStart + 2);
110
}
111
112
TR_X86AllocPrefetchGeometry
113
TR::X86AllocPrefetchSnippet::generatePrefetchGeometry()
114
{
115
116
if (TR::Options::_TLHPrefetchSize <= 0)
117
TR::Options::_TLHPrefetchSize = 384;
118
119
// These parameters were experimentally determined to be optimal for
120
// Woodcrest hardware for small applications.
121
122
if (TR::Options::_TLHPrefetchLineSize <= 0)
123
TR::Options::_TLHPrefetchLineSize = 64;
124
125
if (TR::Options::_TLHPrefetchLineCount <= 0)
126
TR::Options::_TLHPrefetchLineCount = 8;
127
128
if (TR::Options::_TLHPrefetchStaggeredLineCount <= 0)
129
TR::Options::_TLHPrefetchStaggeredLineCount = 4;
130
131
if (TR::Options::_TLHPrefetchBoundaryLineCount <= 0)
132
TR::Options::_TLHPrefetchBoundaryLineCount = 6;
133
134
if (TR::Options::_TLHPrefetchTLHEndLineCount <= 0)
135
TR::Options::_TLHPrefetchTLHEndLineCount = 6;
136
137
return TR_X86AllocPrefetchGeometry(
138
TR::Options::_TLHPrefetchLineSize,
139
TR::Options::_TLHPrefetchLineCount,
140
TR::Options::_TLHPrefetchStaggeredLineCount,
141
TR::Options::_TLHPrefetchBoundaryLineCount,
142
TR::Options::_TLHPrefetchTLHEndLineCount
143
);
144
}
145
146
template <TR::HeapTypes::Type> struct vmThreadHeapOffsets;
147
148
template <> struct vmThreadHeapOffsets<TR::HeapTypes::ZeroedHeap>
149
{
150
static const int32_t offsetOfHeapAlloc = offsetof(J9VMThread, heapAlloc);
151
static const int32_t offsetOfHeapTop = offsetof(J9VMThread, heapTop);
152
static const int32_t offsetOfTLHPrefetchCount = offsetof(J9VMThread, tlhPrefetchFTA);
153
};
154
155
template <> struct vmThreadHeapOffsets<TR::HeapTypes::NonZeroedHeap>
156
{
157
static const int32_t offsetOfHeapAlloc = offsetof(J9VMThread, nonZeroHeapAlloc);
158
static const int32_t offsetOfHeapTop = offsetof(J9VMThread, nonZeroHeapTop);
159
static const int32_t offsetOfTLHPrefetchCount = offsetof(J9VMThread, nonZeroTlhPrefetchFTA);
160
};
161
162
template <TR::HeapTypes::Type HEAP_TYPE>
163
class HeapProperties
164
{
165
private:
166
167
typedef vmThreadHeapOffsets<HEAP_TYPE> HeapOffsets;
168
169
public:
170
171
static int32_t offsetOfHeapAlloc() { return HeapOffsets::offsetOfHeapAlloc; }
172
static int32_t offsetOfHeapTop() { return HeapOffsets::offsetOfHeapTop; }
173
static int32_t offsetOfTLHPrefetchCount() { return HeapOffsets::offsetOfTLHPrefetchCount; }
174
static bool needWideDisplacementForHeapAlloc() { return (offsetOfHeapAlloc() > 127 || offsetOfHeapAlloc() < -128); }
175
static bool needWideDisplacementForHeapTop() { return (offsetOfHeapTop() > 127 || offsetOfHeapTop() < -128); }
176
static bool needWideDisplacementForTLHPrefetchCount() { return (offsetOfTLHPrefetchCount() > 127 || offsetOfTLHPrefetchCount() < -128); }
177
};
178
179
template <TR::HeapTypes::Type HEAP_TYPE, bool is64Bit>
180
uint8_t* TR::X86AllocPrefetchSnippet::emitSharedBody(uint8_t* prefetchSnippetBuffer, TR::Compilation* comp)
181
{
182
183
typedef HeapProperties<HEAP_TYPE> HeapTraits;
184
185
static char * printCodeCacheSnippetAddress = feGetEnv("TR_printCodeCacheSnippetAddress");
186
if (printCodeCacheSnippetAddress)
187
{
188
fprintf(stdout, "%s Allocation snippet is at address %p, size=%d\n", TR::HeapTypes::getPrefix(HEAP_TYPE), prefetchSnippetBuffer, sizeOfSharedBody<HEAP_TYPE, is64Bit>());
189
fflush(stdout);
190
}
191
192
const TR_X86AllocPrefetchGeometry &prefetchGeometry = generatePrefetchGeometry();
193
194
int32_t lineSize = prefetchGeometry.getPrefetchLineSize();
195
int32_t numLines = prefetchGeometry.getPrefetchLineCount();
196
int32_t staggerLines = prefetchGeometry.getPrefetchStaggeredLineCount();
197
int32_t boundaryLines = prefetchGeometry.getPrefetchBoundaryLineCount();
198
199
// PUSH rcx
200
//
201
*prefetchSnippetBuffer++ = 0x51;
202
203
// MOV rcx, qword ptr [rbp + heapAlloc]
204
//
205
if (is64Bit)
206
{
207
// REX
208
//
209
*prefetchSnippetBuffer++ = 0x48;
210
}
211
212
prefetchSnippetBuffer[0] = 0x8B;
213
214
if (HeapTraits::needWideDisplacementForHeapAlloc())
215
{
216
prefetchSnippetBuffer[1] = 0x8d;
217
prefetchSnippetBuffer += 2;
218
*((int32_t *)prefetchSnippetBuffer) = HeapTraits::offsetOfHeapAlloc();
219
prefetchSnippetBuffer += 4;
220
}
221
else
222
{
223
prefetchSnippetBuffer[1] = 0x4d;
224
prefetchSnippetBuffer[2] = (uint8_t) HeapTraits::offsetOfHeapAlloc();
225
prefetchSnippetBuffer += 3;
226
}
227
228
// TR::InstOpCode::PREFETCHNTA [rcx + distance]
229
// TR::InstOpCode::PREFETCHNTA [rcx + distance + lineSize]
230
// ...
231
// TR::InstOpCode::PREFETCHNTA [rcx + distance + n*lineSize]
232
//
233
for (int32_t lineOffset = 0; lineOffset < numLines; ++lineOffset)
234
{
235
prefetchSnippetBuffer[0] = 0x0F;
236
if (comp->target().cpu.is(OMR_PROCESSOR_X86_AMDFAMILY15H))
237
prefetchSnippetBuffer[1] = 0x0D;
238
else
239
prefetchSnippetBuffer[1] = 0x18;
240
prefetchSnippetBuffer[2] = 0x81;
241
prefetchSnippetBuffer += 3;
242
*(int32_t *)prefetchSnippetBuffer = (staggerLines + lineOffset) * lineSize;
243
prefetchSnippetBuffer += 4;
244
}
245
246
// MOV dword ptr [rbp + TLH_PREFETCH_COUNT], "size"
247
//
248
*prefetchSnippetBuffer++ = 0xC7;
249
250
if (HeapTraits::needWideDisplacementForTLHPrefetchCount())
251
{
252
*prefetchSnippetBuffer++ = 0x85;
253
*(int32_t *)prefetchSnippetBuffer = HeapTraits::offsetOfTLHPrefetchCount();
254
prefetchSnippetBuffer += 4;
255
}
256
else
257
{
258
*prefetchSnippetBuffer++ = 0x45;
259
*prefetchSnippetBuffer++ = (uint8_t)HeapTraits::offsetOfTLHPrefetchCount();
260
}
261
262
*(uint32_t *)prefetchSnippetBuffer = (uint32_t)(boundaryLines*lineSize);
263
prefetchSnippetBuffer += 4;
264
265
// POP rcx
266
//
267
*prefetchSnippetBuffer++ = 0x59;
268
269
// TR::InstOpCode::RET
270
//
271
*prefetchSnippetBuffer++ = 0xC3;
272
273
return prefetchSnippetBuffer;
274
}
275
276
template <TR::HeapTypes::Type HEAP_TYPE, bool is64Bit>
277
int32_t TR::X86AllocPrefetchSnippet::sizeOfSharedBody()
278
{
279
typedef HeapProperties<HEAP_TYPE> HeapTraits;
280
281
const TR_X86AllocPrefetchGeometry &prefetchGeometry = generatePrefetchGeometry();
282
283
int32_t prefetchSnippetSize = (is64Bit ? 14 : 13) + prefetchGeometry.getPrefetchLineCount() * 7;
284
285
if (HeapTraits::needWideDisplacementForHeapAlloc())
286
{
287
prefetchSnippetSize += 3;
288
}
289
290
291
if (HeapTraits::needWideDisplacementForTLHPrefetchCount())
292
{
293
prefetchSnippetSize += 3;
294
}
295
296
/*
297
* TODO: Refactor the alignment value to use a common definition either from the code cache or from some form of target query.
298
*/
299
int32_t alignedSize = TR::alignAllocationSize<32>(prefetchSnippetSize);
300
301
return alignedSize;
302
}
303
304
uint32_t TR::getCCPreLoadedCodeSize()
305
{
306
#if defined(TR_TARGET_64BIT)
307
uint32_t sizeOfZeroedPrefetchBody = TR::X86AllocPrefetchSnippet::sizeOfSharedBody<TR::HeapTypes::ZeroedHeap, true>();
308
uint32_t sizeOfNonZeroedPrefetchBody = TR::X86AllocPrefetchSnippet::sizeOfSharedBody<TR::HeapTypes::NonZeroedHeap, true>();
309
#else
310
uint32_t sizeOfZeroedPrefetchBody = TR::X86AllocPrefetchSnippet::sizeOfSharedBody<TR::HeapTypes::ZeroedHeap, false>();
311
uint32_t sizeOfNonZeroedPrefetchBody = TR::X86AllocPrefetchSnippet::sizeOfSharedBody<TR::HeapTypes::NonZeroedHeap, false>();
312
#endif
313
return sizeOfZeroedPrefetchBody + sizeOfNonZeroedPrefetchBody;
314
}
315
316
void TR::createCCPreLoadedCode(uint8_t *CCPreLoadedCodeBase, uint8_t *CCPreLoadedCodeTop, void ** CCPreLoadedCodeTable, TR::CodeGenerator *cg)
317
{
318
TR::Compilation *comp = cg->comp();
319
uint8_t *cursor = CCPreLoadedCodeBase;
320
321
CCPreLoadedCodeTable[TR_CCPreLoadedCode::TR_AllocPrefetch] = static_cast<void *>(cursor);
322
if (comp->target().is64Bit())
323
cursor = TR::X86AllocPrefetchSnippet::emitSharedBody<TR::HeapTypes::ZeroedHeap, true>(cursor, comp);
324
else
325
cursor = TR::X86AllocPrefetchSnippet::emitSharedBody<TR::HeapTypes::ZeroedHeap, false>(cursor, comp);
326
327
cursor = static_cast<uint8_t *>( TR::alignAllocation<32>(cursor) );
328
329
CCPreLoadedCodeTable[TR_CCPreLoadedCode::TR_NonZeroAllocPrefetch] = static_cast<void *>(cursor);
330
if (comp->target().is64Bit())
331
cursor = TR::X86AllocPrefetchSnippet::emitSharedBody<TR::HeapTypes::NonZeroedHeap, true>(cursor, comp);
332
else
333
cursor = TR::X86AllocPrefetchSnippet::emitSharedBody<TR::HeapTypes::NonZeroedHeap, false>(cursor, comp);
334
335
cursor = static_cast<uint8_t *>( TR::alignAllocation<32>(cursor) );
336
337
TR_ASSERT(cursor == CCPreLoadedCodeTop, "The expected and actual sizes of the emitted code differ. cursor = %p, CCPreLoadedCodeTop = %p", cursor, CCPreLoadedCodeTop);
338
}
339
340