Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/p/codegen/J9PPCSnippet.cpp
6004 views
1
/*******************************************************************************
2
* Copyright (c) 2000, 2020 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
23
#include "p/codegen/J9PPCSnippet.hpp"
24
25
#include <stdint.h>
26
#include "j9.h"
27
#include "thrdsup.h"
28
#include "thrtypes.h"
29
#include "codegen/CodeGenerator.hpp"
30
#include "codegen/Machine.hpp"
31
#include "codegen/Relocation.hpp"
32
#include "codegen/TreeEvaluator.hpp"
33
#include "codegen/SnippetGCMap.hpp"
34
#include "env/CompilerEnv.hpp"
35
#include "env/IO.hpp"
36
#include "env/jittypes.h"
37
#include "env/VMJ9.h"
38
#include "il/DataTypes.hpp"
39
#include "il/LabelSymbol.hpp"
40
#include "il/Node.hpp"
41
#include "il/Node_inlines.hpp"
42
#include "p/codegen/PPCEvaluator.hpp"
43
#include "p/codegen/PPCInstruction.hpp"
44
#include "p/codegen/GenerateInstructions.hpp"
45
#include "runtime/CodeCache.hpp"
46
#include "runtime/CodeCacheManager.hpp"
47
48
#if defined(TR_HOST_POWER)
49
extern uint32_t getPPCCacheLineSize();
50
#else
51
uint32_t getPPCCacheLineSize()
52
{
53
return 32;
54
}
55
#endif
56
57
TR::PPCReadMonitorSnippet::PPCReadMonitorSnippet(
58
TR::CodeGenerator *codeGen,
59
TR::Node *monitorEnterNode,
60
TR::Node *monitorExitNode,
61
TR::LabelSymbol *recurCheckLabel,
62
TR::LabelSymbol *monExitCallLabel,
63
TR::LabelSymbol *restartLabel,
64
TR::InstOpCode::Mnemonic loadOpCode,
65
int32_t loadOffset,
66
TR::Register *objectClassReg)
67
: _monitorEnterHelper(monitorEnterNode->getSymbolReference()),
68
_recurCheckLabel(recurCheckLabel),
69
_loadOpCode(loadOpCode),
70
_loadOffset(loadOffset),
71
_objectClassReg(objectClassReg),
72
TR::PPCHelperCallSnippet(codeGen, monitorExitNode, monExitCallLabel, monitorExitNode->getSymbolReference(), restartLabel)
73
{
74
recurCheckLabel->setSnippet(this);
75
// Helper call, preserves all registers
76
//
77
gcMap().setGCRegisterMask(0xFFFFFFFF);
78
}
79
80
uint8_t *TR::PPCReadMonitorSnippet::emitSnippetBody()
81
{
82
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg()->fe());
83
84
85
// The 32-bit code for the snippet looks like:
86
// recurCheckLabel:
87
// rlwinm monitorReg, monitorReg, 0, LOCK_THREAD_PTR_MASK
88
// cmp cndReg, metaReg, monitorReg
89
// bne cndReg, slowPath
90
// <load>
91
// b restartLabel
92
// slowPath:
93
// bl monitorEnterHelper
94
// <load>
95
// bl monitorExitHelper
96
// b restartLabel;
97
98
// for 64-bit the rlwinm is replaced with:
99
// rldicr threadReg, monitorReg, 0, (long) LOCK_THREAD_PTR_MASK
100
101
TR::RegisterDependencyConditions *deps = getRestartLabel()->getInstruction()->getDependencyConditions();
102
103
TR::RealRegister *metaReg = cg()->getMethodMetaDataRegister();
104
TR::RealRegister *monitorReg = cg()->machine()->getRealRegister(deps->getPostConditions()->getRegisterDependency(0)->getRealRegister());
105
TR::RealRegister *cndReg = cg()->machine()->getRealRegister(deps->getPostConditions()->getRegisterDependency(2)->getRealRegister());
106
TR::RealRegister *loadResultReg = cg()->machine()->getRealRegister(deps->getPostConditions()->getRegisterDependency(3)->getRealRegister());
107
bool isResultCollectable = deps->getPostConditions()->getRegisterDependency(3)->getRegister()->containsCollectedReference();
108
TR::RealRegister *loadBaseReg = cg()->machine()->getRealRegister(deps->getPostConditions()->getRegisterDependency(4)->getRealRegister());
109
TR::Compilation *comp = cg()->comp();
110
TR::InstOpCode opcode;
111
112
uint8_t *buffer = cg()->getBinaryBufferCursor();
113
114
_recurCheckLabel->setCodeLocation(buffer);
115
116
if (comp->target().is64Bit())
117
{
118
opcode.setOpCodeValue(TR::InstOpCode::rldicr);
119
buffer = opcode.copyBinaryToBuffer(buffer);
120
monitorReg->setRegisterFieldRA((uint32_t *)buffer);
121
monitorReg->setRegisterFieldRS((uint32_t *)buffer);
122
// sh = 0
123
// assumption here that thread pointer is in upper bits, so MB = 0
124
// ME = 32 + LOCK_LAST_RECURSION_BIT_NUMBER - 1
125
int32_t ME = 32 + LOCK_LAST_RECURSION_BIT_NUMBER - 1;
126
int32_t me_field_encoding = (ME >> 5) | ((ME & 0x1F) << 1);
127
*(int32_t *)buffer |= (me_field_encoding << 5);
128
}
129
else
130
{
131
opcode.setOpCodeValue(TR::InstOpCode::rlwinm);
132
buffer = opcode.copyBinaryToBuffer(buffer);
133
monitorReg->setRegisterFieldRA((uint32_t *)buffer);
134
monitorReg->setRegisterFieldRS((uint32_t *)buffer);
135
// sh = 0
136
// assumption here that thread pointer is in upper bits, so MB = 0
137
// ME = LOCK_LAST_RECURSION_BIT_NUMBER - 1
138
*(int32_t *)buffer |= ((LOCK_LAST_RECURSION_BIT_NUMBER - 1) << 1);
139
}
140
buffer += PPC_INSTRUCTION_LENGTH;
141
142
opcode.setOpCodeValue(TR::InstOpCode::Op_cmp);
143
buffer = opcode.copyBinaryToBuffer(buffer);
144
cndReg->setRegisterFieldRT((uint32_t *)buffer);
145
metaReg->setRegisterFieldRA((uint32_t *)buffer);
146
monitorReg->setRegisterFieldRB((uint32_t *)buffer);
147
buffer += PPC_INSTRUCTION_LENGTH;
148
149
opcode.setOpCodeValue(TR::InstOpCode::bne);
150
buffer = opcode.copyBinaryToBuffer(buffer);
151
cndReg->setRegisterFieldBI((uint32_t *)buffer);
152
*(int32_t *)buffer |= 12;
153
buffer += PPC_INSTRUCTION_LENGTH;
154
155
opcode.setOpCodeValue(_loadOpCode);
156
buffer = opcode.copyBinaryToBuffer(buffer);
157
loadResultReg->setRegisterFieldRT((uint32_t *)buffer);
158
loadBaseReg->setRegisterFieldRA((uint32_t *)buffer);
159
*(int32_t *)buffer |= _loadOffset & 0xFFFF;
160
buffer += PPC_INSTRUCTION_LENGTH;
161
162
opcode.setOpCodeValue(TR::InstOpCode::b);
163
buffer = opcode.copyBinaryToBuffer(buffer);
164
*(int32_t *)buffer |= (getRestartLabel()->getCodeLocation()-buffer) & 0x03FFFFFC;
165
buffer += PPC_INSTRUCTION_LENGTH;
166
167
intptr_t helperAddress = (intptr_t)getMonitorEnterHelper()->getSymbol()->castToMethodSymbol()->getMethodAddress();
168
if (cg()->directCallRequiresTrampoline(helperAddress, (intptr_t)buffer))
169
{
170
helperAddress = TR::CodeCacheManager::instance()->findHelperTrampoline(getMonitorEnterHelper()->getReferenceNumber(), (void *)buffer);
171
TR_ASSERT_FATAL(comp->target().cpu.isTargetWithinIFormBranchRange(helperAddress, (intptr_t)buffer), "Helper address is out of range");
172
}
173
174
opcode.setOpCodeValue(TR::InstOpCode::bl);
175
buffer = opcode.copyBinaryToBuffer(buffer);
176
177
if (comp->compileRelocatableCode())
178
{
179
cg()->addExternalRelocation(new (cg()->trHeapMemory()) TR::ExternalRelocation(buffer,(uint8_t *)getMonitorEnterHelper(),TR_HelperAddress, cg()),
180
__FILE__, __LINE__, getNode());
181
}
182
183
*(int32_t *)buffer |= (helperAddress - (intptr_t)buffer) & 0x03FFFFFC;
184
buffer += PPC_INSTRUCTION_LENGTH;
185
186
gcMap().registerStackMap(buffer, cg());
187
188
opcode.setOpCodeValue(_loadOpCode);
189
buffer = opcode.copyBinaryToBuffer(buffer);
190
loadResultReg->setRegisterFieldRT((uint32_t *)buffer);
191
loadBaseReg->setRegisterFieldRA((uint32_t *)buffer);
192
*(int32_t *)buffer |= _loadOffset & 0xFFFF;
193
buffer += PPC_INSTRUCTION_LENGTH;
194
195
// this will call jitMonitorExit and return to the restart label
196
cg()->setBinaryBufferCursor(buffer);
197
198
// Defect 101811
199
TR_GCStackMap *exitMap = gcMap().getStackMap()->clone(cg()->trMemory());
200
exitMap->setByteCodeInfo(getNode()->getByteCodeInfo());
201
if (isResultCollectable)
202
exitMap->setRegisterBits(cg()->registerBitMask((int)deps->getPostConditions()->getRegisterDependency(3)->getRealRegister()));
203
204
// Throw away entry map
205
gcMap().setStackMap(exitMap);
206
buffer = TR::PPCHelperCallSnippet::emitSnippetBody();
207
208
return buffer;
209
}
210
211
void
212
TR::PPCReadMonitorSnippet::print(TR::FILE *pOutFile, TR_Debug *debug)
213
{
214
TR::Compilation *comp = cg()->comp();
215
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg()->fe());
216
uint8_t *cursor = getRecurCheckLabel()->getCodeLocation();
217
218
debug->printSnippetLabel(pOutFile, getRecurCheckLabel(), cursor, "Read Monitor Snippet");
219
220
TR::RegisterDependencyConditions *deps = getRestartLabel()->getInstruction()->getDependencyConditions();
221
222
TR::Machine *machine = cg()->machine();
223
TR::RealRegister *metaReg = cg()->getMethodMetaDataRegister();
224
TR::RealRegister *monitorReg = machine->getRealRegister(deps->getPostConditions()->getRegisterDependency(1)->getRealRegister());
225
TR::RealRegister *condReg = machine->getRealRegister(deps->getPostConditions()->getRegisterDependency(2)->getRealRegister());
226
TR::RealRegister *loadResultReg = machine->getRealRegister(deps->getPostConditions()->getRegisterDependency(3)->getRealRegister());
227
TR::RealRegister *loadBaseReg = machine->getRealRegister(deps->getPostConditions()->getRegisterDependency(4)->getRealRegister());
228
229
debug->printPrefix(pOutFile, NULL, cursor, 4);
230
if (comp->target().is64Bit())
231
trfprintf(pOutFile, "rldicr \t%s, %s, 0, " INT64_PRINTF_FORMAT_HEX "\t; Get owner thread value", debug->getName(monitorReg), debug->getName(monitorReg), (int64_t) LOCK_THREAD_PTR_MASK);
232
else
233
trfprintf(pOutFile, "rlwinm \t%s, %s, 0, 0x%x\t; Get owner thread value", debug->getName(monitorReg), debug->getName(monitorReg), LOCK_THREAD_PTR_MASK);
234
cursor+= 4;
235
236
debug->printPrefix(pOutFile, NULL, cursor, 4);
237
if (comp->target().is64Bit())
238
trfprintf(pOutFile, "cmp8 \t%s, %s, %s\t; Compare VMThread to owner thread", debug->getName(condReg), debug->getName(metaReg), debug->getName(monitorReg));
239
else
240
trfprintf(pOutFile, "cmp4 \t%s, %s, %s\t; Compare VMThread to owner thread", debug->getName(condReg), debug->getName(metaReg), debug->getName(monitorReg));
241
cursor+= 4;
242
243
debug->printPrefix(pOutFile, NULL, cursor, 4);
244
int32_t distance = *((int32_t *) cursor) & 0x0000fffc;
245
distance = (distance << 16) >> 16; // sign extend
246
trfprintf(pOutFile, "bne %s, " POINTER_PRINTF_FORMAT "\t; Use Helpers", debug->getName(condReg), (intptr_t)cursor + distance);
247
cursor+= 4;
248
249
debug->printPrefix(pOutFile, NULL, cursor, 4);
250
trfprintf(pOutFile, "%s \t%s, [%s, %d]\t; Load", TR::InstOpCode::metadata[getLoadOpCode()].name, debug->getName(loadResultReg), debug->getName(loadBaseReg), getLoadOffset());
251
cursor+= 4;
252
253
debug->printPrefix(pOutFile, NULL, cursor, 4);
254
distance = *((int32_t *) cursor) & 0x03fffffc;
255
distance = (distance << 6) >> 6; // sign extend
256
trfprintf(pOutFile, "b \t" POINTER_PRINTF_FORMAT "\t\t; ", (intptr_t)cursor + distance);
257
debug->print(pOutFile, getRestartLabel());
258
cursor+= 4;
259
260
debug->printPrefix(pOutFile, NULL, cursor, 4);
261
distance = *((int32_t *) cursor) & 0x03fffffc;
262
distance = (distance << 6) >> 6; // sign extend
263
trfprintf(pOutFile, "bl \t" POINTER_PRINTF_FORMAT "\t\t; %s", (intptr_t)cursor + distance, debug->getName(getMonitorEnterHelper()));
264
if (debug->isBranchToTrampoline(getMonitorEnterHelper(), cursor, distance))
265
trfprintf(pOutFile, " Through trampoline");
266
cursor+= 4;
267
268
debug->printPrefix(pOutFile, NULL, cursor, 4);
269
trfprintf(pOutFile, "%s \t%s, [%s, %d]\t; Load", TR::InstOpCode::metadata[getLoadOpCode()].name, debug->getName(loadResultReg), debug->getName(loadBaseReg), getLoadOffset());
270
271
debug->print(pOutFile, (TR::PPCHelperCallSnippet *)this);
272
}
273
274
uint32_t TR::PPCReadMonitorSnippet::getLength(int32_t estimatedSnippetStart)
275
{
276
int32_t len = 28;
277
len += TR::PPCHelperCallSnippet::getLength(estimatedSnippetStart+len);
278
return len;
279
}
280
281
int32_t TR::PPCReadMonitorSnippet::setEstimatedCodeLocation(int32_t estimatedSnippetStart)
282
{
283
_recurCheckLabel->setEstimatedCodeLocation(estimatedSnippetStart);
284
getSnippetLabel()->setEstimatedCodeLocation(estimatedSnippetStart+28);
285
return(estimatedSnippetStart);
286
}
287
288
TR::PPCAllocPrefetchSnippet::PPCAllocPrefetchSnippet(
289
TR::CodeGenerator *codeGen,
290
TR::Node *node,
291
TR::LabelSymbol *callLabel)
292
: TR::Snippet(codeGen, node, callLabel, false)
293
{
294
}
295
296
uint32_t TR::getCCPreLoadedCodeSize()
297
{
298
uint32_t size = 0;
299
300
// XXX: Can't check if processor supports transient at this point because processor type hasn't been determined
301
// so we have to allocate for the larger of the two scenarios
302
if (false)
303
{
304
const uint32_t linesToPrefetch = TR::Options::_TLHPrefetchLineCount > 0 ? TR::Options::_TLHPrefetchLineCount : 8;
305
size += (linesToPrefetch + 1) / 2 * 4;
306
}
307
else
308
{
309
static bool l3SkipLines = feGetEnv("TR_l3SkipLines") != NULL;
310
const uint32_t linesToLdPrefetch = TR::Options::_TLHPrefetchLineCount > 0 ? TR::Options::_TLHPrefetchLineCount : 4;
311
const uint32_t linesToStPrefetch = linesToLdPrefetch * 2;
312
size += 4 + (linesToLdPrefetch + 1) / 2 * 4 + 1 + (l3SkipLines ? 2 : 0) + (linesToStPrefetch + 1) / 2 * 4;
313
}
314
size += 3;
315
316
//if (!TR::CodeGenerator::supportsTransientPrefetch() && !doL1Pref)
317
// XXX: Can't check if processor supports transient at this point because processor type hasn't been determined
318
// so we have to allocate for the larger of the two scenarios
319
if (false)
320
{
321
const uint32_t linesToPrefetch = TR::Options::_TLHPrefetchLineCount > 0 ? TR::Options::_TLHPrefetchLineCount : 8;
322
size += (linesToPrefetch + 1) / 2 * 4;
323
}
324
else
325
{
326
static bool l3SkipLines = feGetEnv("TR_l3SkipLines") != NULL;
327
const uint32_t linesToLdPrefetch = TR::Options::_TLHPrefetchLineCount > 0 ? TR::Options::_TLHPrefetchLineCount : 4;
328
const uint32_t linesToStPrefetch = linesToLdPrefetch * 2;
329
size += 4 + (linesToLdPrefetch + 1) / 2 * 4 + 1 + (l3SkipLines ? 2 : 0) + (linesToStPrefetch + 1) / 2 * 4;
330
}
331
size += 3;
332
333
//TR_writeBarrier/TR_writeBarrierAndCardMark/TR_cardMark
334
size += 12;
335
if (TR::Options::getCmdLineOptions()->getGcCardSize() > 0)
336
size += 19 + (TR::Compiler->om.writeBarrierType() != gc_modron_wrtbar_cardmark_incremental ? 13 : 10);
337
338
#if defined(TR_TARGET_32BIT)
339
// If heap base and/or size is constant we can materialize them with 1 or 2 instructions
340
// Assume 2 instructions, which means we want space for 1 additional instruction
341
// for both TR_writeBarrier and TR_writeBarrierAndCardMark
342
if (!TR::Options::getCmdLineOptions()->isVariableHeapBaseForBarrierRange0())
343
size += 2;
344
if (!TR::Options::getCmdLineOptions()->isVariableHeapSizeForBarrierRange0())
345
size += 2;
346
#endif
347
348
//TR_arrayStoreCHK
349
size += 25;
350
351
// Add size for other helpers
352
353
// Eyecatchers, one per helper
354
size += TR_numCCPreLoadedCode;
355
356
return TR::alignAllocationSize<8>(size * PPC_INSTRUCTION_LENGTH);
357
}
358
359
#ifdef __LITTLE_ENDIAN__
360
#define CCEYECATCHER(a, b, c, d) (((a) << 0) | ((b) << 8) | ((c) << 16) | ((d) << 24))
361
#else
362
#define CCEYECATCHER(a, b, c, d) (((a) << 24) | ((b) << 16) | ((c) << 8) | ((d) << 0))
363
#endif
364
365
static void performCCPreLoadedBinaryEncoding(uint8_t *buffer, TR::CodeGenerator *cg)
366
{
367
cg->setBinaryBufferStart(buffer);
368
cg->setBinaryBufferCursor(buffer);
369
for (TR::Instruction *i = cg->getFirstInstruction(); i != NULL; i = i->getNext())
370
{
371
i->estimateBinaryLength(cg->getBinaryBufferCursor() - cg->getBinaryBufferStart());
372
cg->setBinaryBufferCursor(i->generateBinaryEncoding());
373
}
374
}
375
376
static uint8_t* initializeCCPreLoadedPrefetch(uint8_t *buffer, void **CCPreLoadedCodeTable, TR::CodeGenerator *cg)
377
{
378
TR::Compilation *comp = cg->comp();
379
TR::Node *n = cg->getFirstInstruction()->getNode();
380
381
// Prefetch helper; prefetches a number of lines and returns directly to JIT code
382
// In:
383
// r8 = object ptr
384
// Out:
385
// r8 = object ptr
386
// Clobbers:
387
// r10, r11
388
// cr0
389
390
cg->setFirstInstruction(NULL);
391
cg->setAppendInstruction(NULL);
392
393
TR::Instruction *eyecatcher = generateImmInstruction(cg, TR::InstOpCode::dd, n, CCEYECATCHER('C', 'C', 'H', 5));
394
395
TR::LabelSymbol *entryLabel = generateLabelSymbol(cg);
396
TR::Instruction *entry = generateLabelInstruction(cg, TR::InstOpCode::label, n, entryLabel);
397
TR::Instruction *cursor = entry;
398
399
TR::Register *metaReg = cg->getMethodMetaDataRegister();
400
TR::Register *r8 = cg->machine()->getRealRegister(TR::RealRegister::gr8);
401
TR::Register *r10 = cg->machine()->getRealRegister(TR::RealRegister::gr10);
402
TR::Register *r11 = cg->machine()->getRealRegister(TR::RealRegister::gr11);
403
TR::Register *cr0 = cg->machine()->getRealRegister(TR::RealRegister::cr0);
404
405
static bool doL1Pref = feGetEnv("TR_doL1Prefetch") != NULL;
406
const uint32_t ppcCacheLineSize = getPPCCacheLineSize();
407
uint32_t helperSize;
408
409
if (!TR::CodeGenerator::supportsTransientPrefetch() && !doL1Pref)
410
{
411
const uint32_t linesToPrefetch = TR::Options::_TLHPrefetchLineCount > 0 ? TR::Options::_TLHPrefetchLineCount : 8;
412
const uint32_t restartAfterLines = TR::Options::_TLHPrefetchBoundaryLineCount > 0 ? TR::Options::_TLHPrefetchBoundaryLineCount : 8;
413
const uint32_t skipLines = TR::Options::_TLHPrefetchStaggeredLineCount > 0 ? TR::Options::_TLHPrefetchStaggeredLineCount : 4;
414
helperSize = (linesToPrefetch + 1) / 2 * 4;
415
416
TR_ASSERT_FATAL( (skipLines + 1) * ppcCacheLineSize <= UPPER_IMMED, "tlhPrefetchStaggeredLineCount (%u) is too high. Will cause imm field to overflow.", skipLines);
417
TR_ASSERT_FATAL( restartAfterLines * ppcCacheLineSize <= UPPER_IMMED, "tlhPrefetchBoundaryLineCount (%u) is too high. Will cause imm field to overflow.", restartAfterLines);
418
419
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r10, r8, skipLines * ppcCacheLineSize, cursor);
420
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r11, r8, (skipLines + 1) * ppcCacheLineSize, cursor);
421
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtst, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r10, 4), cursor);
422
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtst, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r11, 4), cursor);
423
424
for (uint32_t i = 2; i < linesToPrefetch; i += 2)
425
{
426
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r10, r10, ppcCacheLineSize * 2, cursor);
427
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r11, r11, ppcCacheLineSize * 2, cursor);
428
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtst, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r10, 4), cursor);
429
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtst, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r11, 4), cursor);
430
}
431
432
cursor = generateTrg1ImmInstruction(cg, TR::InstOpCode::li, n, r11, restartAfterLines * ppcCacheLineSize, cursor);
433
cursor = generateMemSrc1Instruction(cg,TR::InstOpCode::Op_st, n,
434
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, tlhPrefetchFTA), TR::Compiler->om.sizeofReferenceAddress()),
435
r11, cursor);
436
}
437
else
438
{
439
// Transient version
440
static const char *s = feGetEnv("TR_l3SkipLines");
441
static uint32_t l3SkipLines = s ? atoi(s) : 0;
442
const uint32_t linesToLdPrefetch = TR::Options::_TLHPrefetchLineCount > 0 ? TR::Options::_TLHPrefetchLineCount : 4;
443
const uint32_t linesToStPrefetch = linesToLdPrefetch * 2;
444
const uint32_t restartAfterLines = TR::Options::_TLHPrefetchBoundaryLineCount > 0 ? TR::Options::_TLHPrefetchBoundaryLineCount : 4;
445
const uint32_t skipLines = TR::Options::_TLHPrefetchStaggeredLineCount > 0 ? TR::Options::_TLHPrefetchStaggeredLineCount : 4;
446
helperSize = 4 + (linesToLdPrefetch + 1) / 2 * 4 + 1 + (l3SkipLines ? 2 : 0) + (linesToStPrefetch + 1) / 2 * 4;
447
448
TR_ASSERT_FATAL( (skipLines + 1) * ppcCacheLineSize <= UPPER_IMMED, "tlhPrefetchStaggeredLineCount (%u) is too high. Will cause imm field to overflow.", skipLines);
449
TR_ASSERT_FATAL( restartAfterLines * ppcCacheLineSize <= UPPER_IMMED, "tlhPrefetchBoundaryLineCount (%u) is too high. Will cause imm field to overflow.", restartAfterLines);
450
451
cursor = generateTrg1MemInstruction(cg,TR::InstOpCode::Op_load, n, r10,
452
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, debugEventData3), TR::Compiler->om.sizeofReferenceAddress()),
453
cursor);
454
cursor = generateTrg1Src1ImmInstruction(cg,TR::InstOpCode::Op_cmpli, n, cr0, r10, 0, cursor);
455
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::xori, n, r10, r10, 1, cursor);
456
cursor = generateTrg1ImmInstruction(cg, TR::InstOpCode::li, n, r11, restartAfterLines * ppcCacheLineSize, cursor);
457
cursor = generateMemSrc1Instruction(cg,TR::InstOpCode::Op_st, n,
458
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, tlhPrefetchFTA), TR::Compiler->om.sizeofReferenceAddress()),
459
r11, cursor);
460
cursor = generateMemSrc1Instruction(cg,TR::InstOpCode::Op_st, n,
461
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, debugEventData3), TR::Compiler->om.sizeofReferenceAddress()),
462
r10, cursor);
463
464
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r10, r8, skipLines * ppcCacheLineSize, cursor);
465
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r11, r8, (skipLines + 1) * ppcCacheLineSize, cursor);
466
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtt, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r10, 4), cursor);
467
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtt, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r11, 4), cursor);
468
469
for (uint32_t i = 2; i < linesToLdPrefetch; i += 2)
470
{
471
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r10, r10, ppcCacheLineSize * 2, cursor);
472
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r11, r11, ppcCacheLineSize * 2, cursor);
473
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtt, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r10, 4), cursor);
474
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtt, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r11, 4), cursor);
475
}
476
477
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::beqlr, n, NULL, cr0, cursor);
478
479
if (l3SkipLines > 0)
480
{
481
TR_ASSERT_FATAL( ppcCacheLineSize * l3SkipLines <= UPPER_IMMED, "TR_l3SkipLines (%u) is too high. Will cause imm field to overflow.", l3SkipLines);
482
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r10, r10, ppcCacheLineSize * l3SkipLines, cursor);
483
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r11, r11, ppcCacheLineSize * l3SkipLines, cursor);
484
}
485
486
for (uint32_t i = 0; i < linesToStPrefetch; i += 2)
487
{
488
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r10, r10, ppcCacheLineSize * 2, cursor);
489
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r11, r11, ppcCacheLineSize * 2, cursor);
490
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtstt, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r10, 4), cursor);
491
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtstt, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r11, 4), cursor);
492
}
493
}
494
495
cursor = generateInstruction(cg, TR::InstOpCode::blr, n, cursor);
496
497
performCCPreLoadedBinaryEncoding(buffer, cg);
498
499
helperSize += 3;
500
TR_ASSERT(cg->getBinaryBufferCursor() - entryLabel->getCodeLocation() == helperSize * PPC_INSTRUCTION_LENGTH,
501
"Per-codecache prefetch helper, unexpected size");
502
503
CCPreLoadedCodeTable[TR_AllocPrefetch] = entryLabel->getCodeLocation();
504
505
return cg->getBinaryBufferCursor() - PPC_INSTRUCTION_LENGTH;
506
}
507
508
static uint8_t* initializeCCPreLoadedNonZeroPrefetch(uint8_t *buffer, void **CCPreLoadedCodeTable, TR::CodeGenerator *cg)
509
{
510
TR::Compilation *comp = cg->comp();
511
TR::Node *n = cg->getFirstInstruction()->getNode();
512
513
// NonZero TLH Prefetch helper; prefetches a number of lines and returns directly to JIT code
514
// In:
515
// r8 = object ptr
516
// Out:
517
// r8 = object ptr
518
// Clobbers:
519
// r10, r11
520
// cr0
521
522
cg->setFirstInstruction(NULL);
523
cg->setAppendInstruction(NULL);
524
525
TR::Instruction *eyecatcher = generateImmInstruction(cg, TR::InstOpCode::dd, n, CCEYECATCHER('C', 'C', 'H', 6));
526
527
TR::LabelSymbol *entryLabel = generateLabelSymbol(cg);
528
TR::Instruction *entry = generateLabelInstruction(cg, TR::InstOpCode::label, n, entryLabel);
529
TR::Instruction *cursor = entry;
530
531
TR::Register *metaReg = cg->getMethodMetaDataRegister();
532
TR::Register *r8 = cg->machine()->getRealRegister(TR::RealRegister::gr8);
533
TR::Register *r10 = cg->machine()->getRealRegister(TR::RealRegister::gr10);
534
TR::Register *r11 = cg->machine()->getRealRegister(TR::RealRegister::gr11);
535
TR::Register *cr0 = cg->machine()->getRealRegister(TR::RealRegister::cr0);
536
537
static bool doL1Pref = feGetEnv("TR_doL1Prefetch") != NULL;
538
const uint32_t ppcCacheLineSize = getPPCCacheLineSize();
539
uint32_t helperSize;
540
541
if (!TR::CodeGenerator::supportsTransientPrefetch() && !doL1Pref)
542
{
543
const uint32_t linesToPrefetch = TR::Options::_TLHPrefetchLineCount > 0 ? TR::Options::_TLHPrefetchLineCount : 8;
544
const uint32_t restartAfterLines = TR::Options::_TLHPrefetchBoundaryLineCount > 0 ? TR::Options::_TLHPrefetchBoundaryLineCount : 8;
545
const uint32_t skipLines = TR::Options::_TLHPrefetchStaggeredLineCount > 0 ? TR::Options::_TLHPrefetchStaggeredLineCount : 4;
546
helperSize = (linesToPrefetch + 1) / 2 * 4;
547
548
TR_ASSERT_FATAL( (skipLines + 1) * ppcCacheLineSize <= UPPER_IMMED, "tlhPrefetchStaggeredLineCount (%u) is too high. Will cause imm field to overflow.", skipLines);
549
TR_ASSERT_FATAL( restartAfterLines * ppcCacheLineSize <= UPPER_IMMED, "tlhPrefetchBoundaryLineCount (%u) is too high. Will cause imm field to overflow.", restartAfterLines);
550
551
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r10, r8, skipLines * ppcCacheLineSize, cursor);
552
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r11, r8, (skipLines + 1) * ppcCacheLineSize, cursor);
553
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtst, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r10, 4), cursor);
554
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtst, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r11, 4), cursor);
555
556
for (uint32_t i = 2; i < linesToPrefetch; i += 2)
557
{
558
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r10, r10, ppcCacheLineSize * 2, cursor);
559
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r11, r11, ppcCacheLineSize * 2, cursor);
560
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtst, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r10, 4), cursor);
561
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtst, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r11, 4), cursor);
562
}
563
564
cursor = generateTrg1ImmInstruction(cg, TR::InstOpCode::li, n, r11, restartAfterLines * ppcCacheLineSize, cursor);
565
cursor = generateMemSrc1Instruction(cg,TR::InstOpCode::Op_st, n,
566
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, nonZeroTlhPrefetchFTA), TR::Compiler->om.sizeofReferenceAddress()),
567
r11, cursor);
568
}
569
else
570
{
571
// Transient version
572
static const char *s = feGetEnv("TR_l3SkipLines");
573
static uint32_t l3SkipLines = s ? atoi(s) : 0;
574
const uint32_t linesToLdPrefetch = TR::Options::_TLHPrefetchLineCount > 0 ? TR::Options::_TLHPrefetchLineCount : 4;
575
const uint32_t linesToStPrefetch = linesToLdPrefetch * 2;
576
const uint32_t restartAfterLines = TR::Options::_TLHPrefetchBoundaryLineCount > 0 ? TR::Options::_TLHPrefetchBoundaryLineCount : 4;
577
const uint32_t skipLines = TR::Options::_TLHPrefetchStaggeredLineCount > 0 ? TR::Options::_TLHPrefetchStaggeredLineCount : 4;
578
helperSize = 4 + (linesToLdPrefetch + 1) / 2 * 4 + 1 + (l3SkipLines ? 2 : 0) + (linesToStPrefetch + 1) / 2 * 4;
579
580
TR_ASSERT_FATAL( (skipLines + 1) * ppcCacheLineSize <= UPPER_IMMED, "tlhPrefetchStaggeredLineCount (%u) is too high. Will cause imm field to overflow.", skipLines);
581
TR_ASSERT_FATAL( restartAfterLines * ppcCacheLineSize <= UPPER_IMMED, "tlhPrefetchBoundaryLineCount (%u) is too high. Will cause imm field to overflow.", restartAfterLines);
582
583
cursor = generateTrg1MemInstruction(cg,TR::InstOpCode::Op_load, n, r10,
584
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, debugEventData3), TR::Compiler->om.sizeofReferenceAddress()),
585
cursor);
586
cursor = generateTrg1Src1ImmInstruction(cg,TR::InstOpCode::Op_cmpli, n, cr0, r10, 0, cursor);
587
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::xori, n, r10, r10, 1, cursor);
588
cursor = generateTrg1ImmInstruction(cg, TR::InstOpCode::li, n, r11, restartAfterLines * ppcCacheLineSize, cursor);
589
cursor = generateMemSrc1Instruction(cg,TR::InstOpCode::Op_st, n,
590
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, nonZeroTlhPrefetchFTA), TR::Compiler->om.sizeofReferenceAddress()),
591
r11, cursor);
592
cursor = generateMemSrc1Instruction(cg,TR::InstOpCode::Op_st, n,
593
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, debugEventData3), TR::Compiler->om.sizeofReferenceAddress()),
594
r10, cursor);
595
596
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r10, r8, skipLines * ppcCacheLineSize, cursor);
597
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r11, r8, (skipLines + 1) * ppcCacheLineSize, cursor);
598
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtt, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r10, 4), cursor);
599
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtt, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r11, 4), cursor);
600
601
for (uint32_t i = 2; i < linesToLdPrefetch; i += 2)
602
{
603
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r10, r10, ppcCacheLineSize * 2, cursor);
604
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r11, r11, ppcCacheLineSize * 2, cursor);
605
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtt, n, TR::MemoryReference::createWithIndexReg(cg ,NULL, r10, 4), cursor);
606
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtt, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r11, 4), cursor);
607
}
608
609
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::beqlr, n, NULL, cr0, cursor);
610
611
if (l3SkipLines > 0)
612
{
613
TR_ASSERT_FATAL( ppcCacheLineSize * l3SkipLines <= UPPER_IMMED, "TR_l3SkipLines (%u) is too high. Will cause imm field to overflow.", l3SkipLines);
614
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r10, r10, ppcCacheLineSize * l3SkipLines, cursor);
615
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r11, r11, ppcCacheLineSize * l3SkipLines, cursor);
616
}
617
618
for (uint32_t i = 0; i < linesToStPrefetch; i += 2)
619
{
620
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r10, r10, ppcCacheLineSize * 2, cursor);
621
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, r11, r11, ppcCacheLineSize * 2, cursor);
622
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtstt, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r10, 4), cursor);
623
cursor = generateMemInstruction(cg, TR::InstOpCode::dcbtstt, n, TR::MemoryReference::createWithIndexReg(cg, NULL, r11, 4), cursor);
624
}
625
}
626
627
cursor = generateInstruction(cg, TR::InstOpCode::blr, n, cursor);
628
629
performCCPreLoadedBinaryEncoding(buffer, cg);
630
631
helperSize += 3;
632
TR_ASSERT(cg->getBinaryBufferCursor() - entryLabel->getCodeLocation() == helperSize * PPC_INSTRUCTION_LENGTH,
633
"Per-codecache prefetch helper, unexpected size");
634
635
CCPreLoadedCodeTable[TR_NonZeroAllocPrefetch] = entryLabel->getCodeLocation();
636
637
return cg->getBinaryBufferCursor() - PPC_INSTRUCTION_LENGTH;
638
}
639
640
static TR::Instruction* genZeroInit(TR::CodeGenerator *cg, TR::Node *n, TR::Register *objStartReg, TR::Register *objEndReg, TR::Register *needsZeroInitCondReg,
641
TR::Register *iterReg, TR::Register *zeroReg, TR::Register *condReg, uint32_t initOffset, TR::Instruction *cursor)
642
{
643
TR_ASSERT_FATAL_WITH_NODE(n, initOffset <= UPPER_IMMED, "initOffset (%u) is too big to fit in a signed immediate field.", initOffset);
644
// Generates 24 instructions (+6 if DEBUG)
645
#if defined(DEBUG)
646
// Fill the object with junk to make sure zero-init is working
647
{
648
TR::LabelSymbol *loopStartLabel = generateLabelSymbol(cg);
649
650
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, iterReg, objStartReg, initOffset, cursor);
651
cursor = generateTrg1ImmInstruction(cg, TR::InstOpCode::li, n, zeroReg, -1, cursor);
652
653
cursor = generateLabelInstruction(cg, TR::InstOpCode::label, n, loopStartLabel, cursor);
654
// XXX: This can be improved to use std on 64-bit, but we have to adjust for the size not being 8-byte aligned
655
cursor = generateMemSrc1Instruction(cg, TR::InstOpCode::stw, n,
656
TR::MemoryReference::createWithDisplacement(cg, iterReg, 0, 4),
657
zeroReg, cursor);
658
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, iterReg, iterReg, 4, cursor);
659
cursor = generateTrg1Src2Instruction(cg,TR::InstOpCode::Op_cmpl, n, condReg, iterReg, objEndReg, cursor);
660
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::blt, n, loopStartLabel, condReg, cursor);
661
}
662
#endif
663
664
TR::LabelSymbol *unrolledLoopStartLabel = generateLabelSymbol(cg);
665
TR::LabelSymbol *residueLoopStartLabel = generateLabelSymbol(cg);
666
TR::LabelSymbol *doneZeroInitLabel = generateLabelSymbol(cg);
667
668
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::beqlr, n, NULL, needsZeroInitCondReg, cursor);
669
670
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, iterReg, objStartReg, initOffset, cursor);
671
// Use the zero reg temporarily to calculate unrolled loop iterations, equal to (stop - start) >> 5
672
cursor = generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, n, zeroReg, iterReg, objEndReg, cursor);
673
cursor = generateShiftRightLogicalImmediate(cg, n, zeroReg, zeroReg, 5, cursor);
674
cursor = generateTrg1Src1ImmInstruction(cg,TR::InstOpCode::Op_cmpli, n, condReg, zeroReg, 0, cursor);
675
cursor = generateSrc1Instruction(cg, TR::InstOpCode::mtctr, n, zeroReg);
676
cursor = generateTrg1ImmInstruction(cg, TR::InstOpCode::li, n, zeroReg, 0, cursor);
677
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::beq, n, residueLoopStartLabel, condReg, cursor);
678
679
cursor = generateLabelInstruction(cg, TR::InstOpCode::label, n, unrolledLoopStartLabel, cursor);
680
for (int i = 0; i < 32; i += 4)
681
cursor = generateMemSrc1Instruction(cg, TR::InstOpCode::stw, n,
682
TR::MemoryReference::createWithDisplacement(cg, iterReg, i, 4),
683
zeroReg, cursor);
684
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, iterReg, iterReg, 32, cursor);
685
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::bdnz, n, unrolledLoopStartLabel, /* Not used */ condReg, cursor);
686
687
cursor = generateTrg1Src2Instruction(cg,TR::InstOpCode::Op_cmpl, n, condReg, iterReg, objEndReg, cursor);
688
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::bge, n, doneZeroInitLabel, condReg, cursor);
689
690
// Residue loop
691
cursor = generateLabelInstruction(cg, TR::InstOpCode::label, n, residueLoopStartLabel, cursor);
692
cursor = generateMemSrc1Instruction(cg, TR::InstOpCode::stw, n,
693
TR::MemoryReference::createWithDisplacement(cg, iterReg, 0, 4),
694
zeroReg, cursor);
695
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addi, n, iterReg, iterReg, 4, cursor);
696
cursor = generateTrg1Src2Instruction(cg,TR::InstOpCode::Op_cmpl, n, condReg, iterReg, objEndReg, cursor);
697
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::blt, n, residueLoopStartLabel, condReg, cursor);
698
699
cursor = generateLabelInstruction(cg, TR::InstOpCode::label, n, doneZeroInitLabel, cursor);
700
701
return cursor;
702
}
703
704
static uint8_t* initializeCCPreLoadedWriteBarrier(uint8_t *buffer, void **CCPreLoadedCodeTable, TR::CodeGenerator *cg)
705
{
706
TR::Compilation *comp = cg->comp();
707
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
708
TR::Node *n = cg->getFirstInstruction()->getNode();
709
710
// Write barrier
711
// In:
712
// r3 = dst object
713
// r4 = src object
714
// Out:
715
// none
716
// Clobbers:
717
// r3-r6, r11
718
// cr0, cr1
719
720
cg->setFirstInstruction(NULL);
721
cg->setAppendInstruction(NULL);
722
723
TR::Instruction *eyecatcher = generateImmInstruction(cg, TR::InstOpCode::dd, n, CCEYECATCHER('C', 'C', 'H', 2));
724
725
TR::LabelSymbol *entryLabel = generateLabelSymbol(cg);
726
TR::LabelSymbol *helperTrampolineLabel = generateLabelSymbol(cg);
727
helperTrampolineLabel->setCodeLocation((uint8_t *)TR::CodeCacheManager::instance()->findHelperTrampoline(TR_writeBarrierStoreGenerational, buffer));
728
TR::Instruction *entry = generateLabelInstruction(cg, TR::InstOpCode::label, n, entryLabel);
729
TR::Instruction *cursor = entry;
730
TR::InstOpCode::Mnemonic Op_lclass = TR::InstOpCode::Op_load;
731
if (TR::Compiler->om.compressObjectReferences())
732
Op_lclass = TR::InstOpCode::lwz;
733
const TR::InstOpCode::Mnemonic rememberedClassMaskOp = J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST > UPPER_IMMED ||
734
J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST < LOWER_IMMED ? TR::InstOpCode::andis_r : TR::InstOpCode::andi_r;
735
const uint32_t rememberedClassMask = rememberedClassMaskOp == TR::InstOpCode::andis_r ?
736
J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST >> 16 : J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST;
737
738
TR::Register *metaReg = cg->getMethodMetaDataRegister();
739
TR::Register *r3 = cg->machine()->getRealRegister(TR::RealRegister::gr3);
740
TR::Register *r4 = cg->machine()->getRealRegister(TR::RealRegister::gr4);
741
TR::Register *r5 = cg->machine()->getRealRegister(TR::RealRegister::gr5);
742
TR::Register *r6 = cg->machine()->getRealRegister(TR::RealRegister::gr6);
743
TR::Register *r11 = cg->machine()->getRealRegister(TR::RealRegister::gr11);
744
TR::Register *cr0 = cg->machine()->getRealRegister(TR::RealRegister::cr0);
745
TR::Register *cr1 = cg->machine()->getRealRegister(TR::RealRegister::cr1);
746
747
TR_ASSERT_FATAL((J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST <= UPPER_IMMED && J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST >= LOWER_IMMED) ||
748
(J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST & 0xffff) == 0, "Expecting J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST to fit in immediate field");
749
TR_ASSERT_FATAL( rememberedClassMask <= 0xFFFF, "Expecting rememberedClassMask (%u) to fit in an unsigned immediate field.", rememberedClassMask);
750
751
const bool constHeapBase = !comp->getOptions()->isVariableHeapBaseForBarrierRange0();
752
const bool constHeapSize = !comp->getOptions()->isVariableHeapSizeForBarrierRange0();
753
intptr_t heapBase;
754
intptr_t heapSize;
755
756
if (comp->target().is32Bit() && !comp->compileRelocatableCode() && constHeapBase)
757
{
758
heapBase = comp->getOptions()->getHeapBaseForBarrierRange0();
759
cursor = loadAddressConstant(cg, false, n, heapBase, r5, cursor);
760
}
761
else
762
cursor = generateTrg1MemInstruction(cg,TR::InstOpCode::Op_load, n, r5,
763
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(struct J9VMThread, heapBaseForBarrierRange0), TR::Compiler->om.sizeofReferenceAddress()),
764
cursor);
765
if (comp->target().is32Bit() && !comp->compileRelocatableCode() && constHeapSize)
766
{
767
heapSize = comp->getOptions()->getHeapSizeForBarrierRange0();
768
cursor = loadAddressConstant(cg, false, n, heapSize, r6, cursor);
769
}
770
else
771
cursor = generateTrg1MemInstruction(cg,TR::InstOpCode::Op_load, n, r6,
772
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(struct J9VMThread, heapSizeForBarrierRange0), TR::Compiler->om.sizeofReferenceAddress()),
773
cursor);
774
cursor = generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, n, r11, r5, r3, cursor);
775
cursor = generateTrg1Src2Instruction(cg,TR::InstOpCode::Op_cmpl, n, cr0, r11, r6, cursor);
776
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::bgelr, n, NULL, cr0, cursor);
777
cursor = generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, n, r11, r5, r4, cursor);
778
cursor = generateTrg1Src2Instruction(cg,TR::InstOpCode::Op_cmpl, n, cr0, r11, r6, cursor);
779
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::bltlr, n, NULL, cr0, cursor);
780
cursor = generateTrg1MemInstruction(cg,Op_lclass, n, r11,
781
TR::MemoryReference::createWithDisplacement(cg, r3, TR::Compiler->om.offsetOfObjectVftField(), TR::Compiler->om.sizeofReferenceField()),
782
cursor);
783
cursor = generateTrg1Src1ImmInstruction(cg, rememberedClassMaskOp, n, r11, r11, cr0, rememberedClassMask, cursor);
784
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::bnelr, n, NULL, cr0, cursor);
785
cursor = generateLabelInstruction(cg, TR::InstOpCode::b, n, helperTrampolineLabel, cursor);
786
787
performCCPreLoadedBinaryEncoding(buffer, cg);
788
789
const uint32_t helperSize = 12 +
790
(comp->target().is32Bit() && !comp->compileRelocatableCode() && constHeapBase && heapBase > UPPER_IMMED && heapBase < LOWER_IMMED ? 1 : 0) +
791
(comp->target().is32Bit() && !comp->compileRelocatableCode() && constHeapSize && heapSize > UPPER_IMMED && heapSize < LOWER_IMMED ? 1 : 0);
792
TR_ASSERT(cg->getBinaryBufferCursor() - entryLabel->getCodeLocation() == helperSize * PPC_INSTRUCTION_LENGTH,
793
"Per-codecache write barrier, unexpected size");
794
795
CCPreLoadedCodeTable[TR_writeBarrier] = entryLabel->getCodeLocation();
796
797
return cg->getBinaryBufferCursor() - PPC_INSTRUCTION_LENGTH;
798
}
799
800
static uint8_t* initializeCCPreLoadedWriteBarrierAndCardMark(uint8_t *buffer, void **CCPreLoadedCodeTable, TR::CodeGenerator *cg)
801
{
802
TR::Compilation *comp = cg->comp();
803
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
804
TR::Node *n = cg->getFirstInstruction()->getNode();
805
806
// Write barrier and card mark
807
// In:
808
// r3 = dst object
809
// r4 = src object
810
// Out:
811
// none
812
// Clobbers:
813
// r3-r6, r11
814
// cr0, cr1
815
816
cg->setFirstInstruction(NULL);
817
cg->setAppendInstruction(NULL);
818
819
TR::Instruction *eyecatcher = generateImmInstruction(cg, TR::InstOpCode::dd, n, CCEYECATCHER('C', 'C', 'H', 3));
820
821
TR::LabelSymbol *entryLabel = generateLabelSymbol(cg);
822
TR::LabelSymbol *doneCardMarkLabel = generateLabelSymbol(cg);
823
TR::LabelSymbol *helperTrampolineLabel = generateLabelSymbol(cg);
824
helperTrampolineLabel->setCodeLocation((uint8_t *)TR::CodeCacheManager::instance()->findHelperTrampoline(TR_writeBarrierStoreGenerationalAndConcurrentMark, buffer));
825
TR::Instruction *entry = generateLabelInstruction(cg, TR::InstOpCode::label, n, entryLabel);
826
TR::Instruction *cursor = entry;
827
TR::InstOpCode::Mnemonic Op_lclass = TR::InstOpCode::Op_load;
828
if (TR::Compiler->om.compressObjectReferences())
829
Op_lclass = TR::InstOpCode::lwz;
830
const TR::InstOpCode::Mnemonic cmActiveMaskOp = J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE > UPPER_IMMED ||
831
J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE < LOWER_IMMED ? TR::InstOpCode::andis_r : TR::InstOpCode::andi_r;
832
const uint32_t cmActiveMask = cmActiveMaskOp == TR::InstOpCode::andis_r ?
833
J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE >> 16 : J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE;
834
const TR::InstOpCode::Mnemonic rememberedClassMaskOp = J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST > UPPER_IMMED ||
835
J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST < LOWER_IMMED ? TR::InstOpCode::andis_r : TR::InstOpCode::andi_r;
836
const uint32_t rememberedClassMask = rememberedClassMaskOp == TR::InstOpCode::andis_r ?
837
J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST >> 16 : J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST;
838
const uintptr_t cardTableShift = comp->target().is64Bit() ?
839
trailingZeroes((uint64_t)comp->getOptions()->getGcCardSize()) :
840
trailingZeroes((uint32_t)comp->getOptions()->getGcCardSize());
841
842
TR::Register *metaReg = cg->getMethodMetaDataRegister();
843
TR::Register *r3 = cg->machine()->getRealRegister(TR::RealRegister::gr3);
844
TR::Register *r4 = cg->machine()->getRealRegister(TR::RealRegister::gr4);
845
TR::Register *r5 = cg->machine()->getRealRegister(TR::RealRegister::gr5);
846
TR::Register *r6 = cg->machine()->getRealRegister(TR::RealRegister::gr6);
847
TR::Register *r11 = cg->machine()->getRealRegister(TR::RealRegister::gr11);
848
TR::Register *cr0 = cg->machine()->getRealRegister(TR::RealRegister::cr0);
849
TR::Register *cr1 = cg->machine()->getRealRegister(TR::RealRegister::cr1);
850
851
TR_ASSERT_FATAL((J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE <= UPPER_IMMED && J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE >= LOWER_IMMED) ||
852
(J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE & 0xffff) == 0, "Expecting J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE to fit in immediate field");
853
TR_ASSERT_FATAL((J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST <= UPPER_IMMED && J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST >= LOWER_IMMED) ||
854
(J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST & 0xffff) == 0, "Expecting J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST to fit in immediate field");
855
TR_ASSERT_FATAL( cmActiveMask <= 0xFFFF, "Expecting cmActiveMask (%u) to fit in an unsigned immediate field.", cmActiveMask);
856
TR_ASSERT_FATAL( rememberedClassMask <= 0xFFFF, "Expecting rememberedClassMask (%u) to fit in an unsigned immediate field.", rememberedClassMask);
857
858
const bool constHeapBase = !comp->getOptions()->isVariableHeapBaseForBarrierRange0();
859
const bool constHeapSize = !comp->getOptions()->isVariableHeapSizeForBarrierRange0();
860
intptr_t heapBase;
861
intptr_t heapSize;
862
863
if (comp->target().is32Bit() && !comp->compileRelocatableCode() && constHeapBase)
864
{
865
heapBase = comp->getOptions()->getHeapBaseForBarrierRange0();
866
cursor = loadAddressConstant(cg, false, n, heapBase, r5, cursor);
867
}
868
else
869
cursor = generateTrg1MemInstruction(cg,TR::InstOpCode::Op_load, n, r5,
870
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(struct J9VMThread, heapBaseForBarrierRange0), TR::Compiler->om.sizeofReferenceAddress()),
871
cursor);
872
if (comp->target().is32Bit() && !comp->compileRelocatableCode() && constHeapSize)
873
{
874
heapSize = comp->getOptions()->getHeapSizeForBarrierRange0();
875
cursor = loadAddressConstant(cg, false, n, heapSize, r6, cursor);
876
}
877
else
878
cursor = generateTrg1MemInstruction(cg,TR::InstOpCode::Op_load, n, r6,
879
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(struct J9VMThread, heapSizeForBarrierRange0), TR::Compiler->om.sizeofReferenceAddress()),
880
cursor);
881
cursor = generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, n, r11, r5, r3, cursor);
882
cursor = generateTrg1Src2Instruction(cg,TR::InstOpCode::Op_cmpl, n, cr0, r11, r6, cursor);
883
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::bgelr, n, NULL, cr0, cursor);
884
cursor = generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, n, r5, r5, r4, cursor);
885
cursor = generateTrg1Src2Instruction(cg,TR::InstOpCode::Op_cmpl, n, cr1, r5, r6, cursor);
886
cursor = generateTrg1MemInstruction(cg, TR::InstOpCode::lwz, n, r6,
887
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(struct J9VMThread, privateFlags), 4),
888
cursor);
889
cursor = generateTrg1Src1ImmInstruction(cg, cmActiveMaskOp, n, r6, r6, cr0, cmActiveMask, cursor);
890
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::beq, n, doneCardMarkLabel, cr0, cursor);
891
cursor = generateTrg1MemInstruction(cg,TR::InstOpCode::Op_load, n, r6,
892
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(struct J9VMThread, activeCardTableBase), TR::Compiler->om.sizeofReferenceAddress()),
893
cursor);
894
if (comp->target().is64Bit())
895
cursor = generateShiftRightLogicalImmediateLong(cg, n, r11, r11, cardTableShift, cursor);
896
else
897
cursor = generateShiftRightLogicalImmediate(cg, n, r11, r11, cardTableShift, cursor);
898
cursor = generateTrg1ImmInstruction(cg, TR::InstOpCode::li, n, r5, 1, cursor);
899
cursor = generateMemSrc1Instruction(cg, TR::InstOpCode::stbx, n,
900
TR::MemoryReference::createWithIndexReg(cg, r6, r11, 1),
901
r5, cursor);
902
cursor = generateLabelInstruction(cg, TR::InstOpCode::label, n, doneCardMarkLabel, cursor);
903
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::bltlr, n, NULL, cr1, cursor);
904
cursor = generateTrg1MemInstruction(cg,Op_lclass, n, r11,
905
TR::MemoryReference::createWithDisplacement(cg, r3, TR::Compiler->om.offsetOfObjectVftField(), TR::Compiler->om.sizeofReferenceField()),
906
cursor);
907
cursor = generateTrg1Src1ImmInstruction(cg, rememberedClassMaskOp, n, r11, r11, cr0, rememberedClassMask, cursor);
908
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::bnelr, n, NULL, cr0, cursor);
909
cursor = generateLabelInstruction(cg, TR::InstOpCode::b, n, helperTrampolineLabel, cursor);
910
911
performCCPreLoadedBinaryEncoding(buffer, cg);
912
913
const uint32_t helperSize = 19 +
914
(comp->target().is32Bit() && !comp->compileRelocatableCode() && constHeapBase && heapBase > UPPER_IMMED && heapBase < LOWER_IMMED ? 1 : 0) +
915
(comp->target().is32Bit() && !comp->compileRelocatableCode() && constHeapSize && heapSize > UPPER_IMMED && heapSize < LOWER_IMMED ? 1 : 0);
916
TR_ASSERT(cg->getBinaryBufferCursor() - entryLabel->getCodeLocation() == helperSize * PPC_INSTRUCTION_LENGTH,
917
"Per-codecache write barrier with card mark, unexpected size");
918
919
CCPreLoadedCodeTable[TR_writeBarrierAndCardMark] = entryLabel->getCodeLocation();
920
921
return cg->getBinaryBufferCursor() - PPC_INSTRUCTION_LENGTH;
922
}
923
924
static uint8_t* initializeCCPreLoadedCardMark(uint8_t *buffer, void **CCPreLoadedCodeTable, TR::CodeGenerator *cg)
925
{
926
TR::Compilation *comp = cg->comp();
927
TR::Node *n = cg->getFirstInstruction()->getNode();
928
929
// Card mark
930
// In:
931
// r3 = dst object
932
// Out:
933
// none
934
// Clobbers:
935
// r4-r5, r11
936
// cr0
937
938
cg->setFirstInstruction(NULL);
939
cg->setAppendInstruction(NULL);
940
941
TR::Instruction *eyecatcher = generateImmInstruction(cg, TR::InstOpCode::dd, n, CCEYECATCHER('C', 'C', 'H', 4));
942
943
TR::LabelSymbol *entryLabel = generateLabelSymbol(cg);
944
TR::Instruction *entry = generateLabelInstruction(cg, TR::InstOpCode::label, n, entryLabel);
945
TR::Instruction *cursor = entry;
946
const TR::InstOpCode::Mnemonic cmActiveMaskOp = J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE > UPPER_IMMED ||
947
J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE < LOWER_IMMED ? TR::InstOpCode::andis_r : TR::InstOpCode::andi_r;
948
const uint32_t cmActiveMask = cmActiveMaskOp == TR::InstOpCode::andis_r ?
949
J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE >> 16 : J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE;
950
const uintptr_t cardTableShift = comp->target().is64Bit() ?
951
trailingZeroes((uint64_t)comp->getOptions()->getGcCardSize()) :
952
trailingZeroes((uint32_t)comp->getOptions()->getGcCardSize());
953
954
TR::Register *metaReg = cg->getMethodMetaDataRegister();
955
TR::Register *r3 = cg->machine()->getRealRegister(TR::RealRegister::gr3);
956
TR::Register *r4 = cg->machine()->getRealRegister(TR::RealRegister::gr4);
957
TR::Register *r5 = cg->machine()->getRealRegister(TR::RealRegister::gr5);
958
TR::Register *r11 = cg->machine()->getRealRegister(TR::RealRegister::gr11);
959
TR::Register *cr0 = cg->machine()->getRealRegister(TR::RealRegister::cr0);
960
961
TR_ASSERT_FATAL((J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE <= UPPER_IMMED && J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE >= LOWER_IMMED) ||
962
(J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE & 0xffff) == 0, "Expecting J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE to fit in immediate field");
963
TR_ASSERT_FATAL( cmActiveMask <= 0xFFFF, "Expecting cmActiveMask (%u) to fit in an unsigned immediate field.", cmActiveMask);
964
965
cursor = generateTrg1MemInstruction(cg,TR::InstOpCode::Op_load, n, r5,
966
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(struct J9VMThread, heapBaseForBarrierRange0), TR::Compiler->om.sizeofReferenceAddress()),
967
cursor);
968
cursor = generateTrg1MemInstruction(cg,TR::InstOpCode::Op_load, n, r4,
969
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(struct J9VMThread, heapSizeForBarrierRange0), TR::Compiler->om.sizeofReferenceAddress()),
970
cursor);
971
cursor = generateTrg1Src2Instruction(cg, TR::InstOpCode::subf, n, r5, r5, r3, cursor);
972
cursor = generateTrg1Src2Instruction(cg,TR::InstOpCode::Op_cmpl, n, cr0, r5, r4, cursor);
973
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::bgelr, n, NULL, cr0, cursor);
974
// Incremental (i.e. balanced) always dirties the card
975
if (TR::Compiler->om.writeBarrierType() != gc_modron_wrtbar_cardmark_incremental)
976
{
977
cursor = generateTrg1MemInstruction(cg, TR::InstOpCode::lwz, n, r4,
978
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(struct J9VMThread, privateFlags), 4),
979
cursor);
980
cursor = generateTrg1Src1ImmInstruction(cg, cmActiveMaskOp, n, r4, r4, cr0, cmActiveMask, cursor);
981
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::beqlr, n, NULL, cr0, cursor);
982
}
983
cursor = generateTrg1MemInstruction(cg,TR::InstOpCode::Op_load, n, r4,
984
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(struct J9VMThread, activeCardTableBase), TR::Compiler->om.sizeofReferenceAddress()),
985
cursor);
986
if (comp->target().is64Bit())
987
cursor = generateShiftRightLogicalImmediateLong(cg, n, r5, r5, cardTableShift, cursor);
988
else
989
cursor = generateShiftRightLogicalImmediate(cg, n, r5, r5, cardTableShift, cursor);
990
cursor = generateTrg1ImmInstruction(cg, TR::InstOpCode::li, n, r11, 1, cursor);
991
cursor = generateMemSrc1Instruction(cg, TR::InstOpCode::stbx, n,
992
TR::MemoryReference::createWithIndexReg(cg, r4, r5, 1),
993
r11, cursor);
994
cursor = generateInstruction(cg, TR::InstOpCode::blr, n, cursor);
995
996
performCCPreLoadedBinaryEncoding(buffer, cg);
997
998
const uint32_t helperSize = TR::Compiler->om.writeBarrierType() != gc_modron_wrtbar_cardmark_incremental ? 13 : 10;
999
TR_ASSERT(cg->getBinaryBufferCursor() - entryLabel->getCodeLocation() == helperSize * PPC_INSTRUCTION_LENGTH,
1000
"Per-codecache card mark, unexpected size");
1001
1002
CCPreLoadedCodeTable[TR_cardMark] = entryLabel->getCodeLocation();
1003
1004
return cg->getBinaryBufferCursor() - PPC_INSTRUCTION_LENGTH;
1005
}
1006
1007
static uint8_t* initializeCCPreLoadedArrayStoreCHK(uint8_t *buffer, void **CCPreLoadedCodeTable, TR::CodeGenerator *cg)
1008
{
1009
TR::Compilation *comp = cg->comp();
1010
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
1011
TR::Node *n = cg->getFirstInstruction()->getNode();
1012
1013
// Array store check
1014
// In:
1015
// r3 = dst object
1016
// r4 = src object
1017
// r11 = root class (j/l/Object)
1018
// Out:
1019
// none
1020
// Clobbers:
1021
// r5-r7, r11
1022
// cr0
1023
1024
cg->setFirstInstruction(NULL);
1025
cg->setAppendInstruction(NULL);
1026
1027
TR::Instruction *eyecatcher = generateImmInstruction(cg, TR::InstOpCode::dd, n, CCEYECATCHER('C', 'C', 'H', 7));
1028
1029
TR::LabelSymbol *entryLabel = generateLabelSymbol(cg);
1030
TR::LabelSymbol *skipSuperclassTestLabel = generateLabelSymbol(cg);
1031
TR::LabelSymbol *helperTrampolineLabel = generateLabelSymbol(cg);
1032
helperTrampolineLabel->setCodeLocation((uint8_t *)TR::CodeCacheManager::instance()->findHelperTrampoline(TR_typeCheckArrayStore, buffer));
1033
TR::Instruction *entry = generateLabelInstruction(cg, TR::InstOpCode::label, n, entryLabel);
1034
TR::Instruction *cursor = entry;
1035
TR::InstOpCode::Mnemonic Op_lclass = TR::InstOpCode::Op_load;
1036
if (TR::Compiler->om.compressObjectReferences())
1037
Op_lclass = TR::InstOpCode::lwz;
1038
1039
TR::Register *r3 = cg->machine()->getRealRegister(TR::RealRegister::gr3);
1040
TR::Register *r4 = cg->machine()->getRealRegister(TR::RealRegister::gr4);
1041
TR::Register *r5 = cg->machine()->getRealRegister(TR::RealRegister::gr5);
1042
TR::Register *r6 = cg->machine()->getRealRegister(TR::RealRegister::gr6);
1043
TR::Register *r7 = cg->machine()->getRealRegister(TR::RealRegister::gr7);
1044
TR::Register *r11 = cg->machine()->getRealRegister(TR::RealRegister::gr11);
1045
TR::Register *cr0 = cg->machine()->getRealRegister(TR::RealRegister::cr0);
1046
1047
cursor = generateTrg1MemInstruction(cg,Op_lclass, n, r5,
1048
TR::MemoryReference::createWithDisplacement(cg, r3, TR::Compiler->om.offsetOfObjectVftField(), TR::Compiler->om.sizeofReferenceField()),
1049
cursor);
1050
cursor = generateTrg1MemInstruction(cg,Op_lclass, n, r6,
1051
TR::MemoryReference::createWithDisplacement(cg, r4, TR::Compiler->om.offsetOfObjectVftField(), TR::Compiler->om.sizeofReferenceField()),
1052
cursor);
1053
cursor = TR::TreeEvaluator::generateVFTMaskInstruction(cg, n, r5, cursor);
1054
cursor = TR::TreeEvaluator::generateVFTMaskInstruction(cg, n, r6, cursor);
1055
cursor = generateTrg1MemInstruction(cg,TR::InstOpCode::Op_load, n, r5,
1056
TR::MemoryReference::createWithDisplacement(cg, r5, offsetof(J9ArrayClass, componentType), TR::Compiler->om.sizeofReferenceAddress()),
1057
cursor);
1058
1059
cursor = generateTrg1Src2Instruction(cg,TR::InstOpCode::Op_cmpl, n, cr0, r5, r6, cursor);
1060
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::beqlr, n, NULL, cr0, cursor);
1061
cursor = generateTrg1MemInstruction(cg,TR::InstOpCode::Op_load, n, r7,
1062
TR::MemoryReference::createWithDisplacement(cg, r6, offsetof(J9Class, castClassCache), TR::Compiler->om.sizeofReferenceAddress()),
1063
cursor);
1064
cursor = generateTrg1Src2Instruction(cg,TR::InstOpCode::Op_cmpl, n, cr0, r5, r7, cursor);
1065
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::beqlr, n, NULL, cr0, cursor);
1066
cursor = generateTrg1Src2Instruction(cg,TR::InstOpCode::Op_cmpl, n, cr0, r5, r11, cursor);
1067
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::beqlr, n, NULL, cr0, cursor);
1068
1069
cursor = generateTrg1MemInstruction(cg,TR::InstOpCode::Op_load, n, r7,
1070
TR::MemoryReference::createWithDisplacement(cg, r5, offsetof(J9Class, romClass), TR::Compiler->om.sizeofReferenceAddress()),
1071
cursor);
1072
cursor = generateTrg1MemInstruction(cg, TR::InstOpCode::lwz, n, r7,
1073
TR::MemoryReference::createWithDisplacement(cg, r7, offsetof(J9ROMClass, modifiers), 4),
1074
cursor);
1075
cursor = generateShiftRightLogicalImmediate(cg, n, r7, r7, 1, cursor);
1076
TR_ASSERT_FATAL(!(((J9AccClassArray | J9AccInterface) >> 1) & ~0xffff),
1077
"Expecting shifted ROM class modifiers to fit in immediate");
1078
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::andi_r, n, r7, r7, cr0, (J9AccClassArray | J9AccInterface) >> 1, cursor);
1079
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::bne, n, skipSuperclassTestLabel, cr0, cursor);
1080
1081
cursor = generateTrg1MemInstruction(cg,TR::InstOpCode::Op_load, n, r7,
1082
TR::MemoryReference::createWithDisplacement(cg, r6, offsetof(J9Class, classDepthAndFlags), TR::Compiler->om.sizeofReferenceAddress()),
1083
cursor);
1084
TR_ASSERT_FATAL(!(J9AccClassDepthMask & ~0xffff),
1085
"Expecting class depth mask to fit in immediate");
1086
cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::andi_r, n, r7, r7, cr0, J9AccClassDepthMask, cursor);
1087
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::beq, n, skipSuperclassTestLabel, cr0, cursor);
1088
cursor = generateTrg1MemInstruction(cg,Op_lclass, n, r7,
1089
TR::MemoryReference::createWithDisplacement(cg, r6, offsetof(J9Class, superclasses), TR::Compiler->om.sizeofReferenceField()),
1090
cursor);
1091
cursor = generateTrg1MemInstruction(cg,TR::InstOpCode::Op_load, n, r7,
1092
TR::MemoryReference::createWithDisplacement(cg, r7, 0, TR::Compiler->om.sizeofReferenceAddress()),
1093
cursor);
1094
cursor = generateTrg1Src2Instruction(cg,TR::InstOpCode::Op_cmpl, n, cr0, r5, r7, cursor);
1095
cursor = generateConditionalBranchInstruction(cg, TR::InstOpCode::beqlr, n, NULL, cr0, cursor);
1096
1097
cursor = generateLabelInstruction(cg, TR::InstOpCode::label, n, skipSuperclassTestLabel, cursor);
1098
cursor = generateLabelInstruction(cg, TR::InstOpCode::b, n, helperTrampolineLabel, cursor);
1099
1100
performCCPreLoadedBinaryEncoding(buffer, cg);
1101
1102
const uint32_t helperSize = 25;
1103
TR_ASSERT(cg->getBinaryBufferCursor() - entryLabel->getCodeLocation() == helperSize * PPC_INSTRUCTION_LENGTH,
1104
"Per-codecache array store check, unexpected size");
1105
1106
CCPreLoadedCodeTable[TR_arrayStoreCHK] = entryLabel->getCodeLocation();
1107
1108
return cg->getBinaryBufferCursor() - PPC_INSTRUCTION_LENGTH;
1109
}
1110
1111
void TR::createCCPreLoadedCode(uint8_t *CCPreLoadedCodeBase, uint8_t *CCPreLoadedCodeTop, void **CCPreLoadedCodeTable, TR::CodeGenerator *cg)
1112
{
1113
/* If you modify this make sure you update CCPreLoadedCodeSize above as well */
1114
1115
// We temporarily clobber the first and append instructions so we can use high level codegen to generate pre-loaded code
1116
// So save the original values here and restore them when done
1117
TR::Compilation *comp = cg->comp();
1118
if (comp->getOptions()->realTimeGC())
1119
return;
1120
1121
TR::Instruction *curFirst = cg->getFirstInstruction();
1122
TR::Instruction *curAppend = cg->getAppendInstruction();
1123
uint8_t *curBinaryBufferStart = cg->getBinaryBufferStart();
1124
uint8_t *curBinaryBufferCursor = cg->getBinaryBufferCursor();
1125
1126
uint8_t *buffer = (uint8_t *)CCPreLoadedCodeBase;
1127
1128
buffer = initializeCCPreLoadedPrefetch(buffer, CCPreLoadedCodeTable, cg);
1129
buffer = initializeCCPreLoadedNonZeroPrefetch(buffer + PPC_INSTRUCTION_LENGTH, CCPreLoadedCodeTable, cg);
1130
buffer = initializeCCPreLoadedWriteBarrier(buffer + PPC_INSTRUCTION_LENGTH, CCPreLoadedCodeTable, cg);
1131
if (comp->getOptions()->getGcCardSize() > 0)
1132
{
1133
buffer = initializeCCPreLoadedWriteBarrierAndCardMark(buffer + PPC_INSTRUCTION_LENGTH, CCPreLoadedCodeTable, cg);
1134
buffer = initializeCCPreLoadedCardMark(buffer + PPC_INSTRUCTION_LENGTH, CCPreLoadedCodeTable, cg);
1135
}
1136
buffer = initializeCCPreLoadedArrayStoreCHK(buffer + PPC_INSTRUCTION_LENGTH, CCPreLoadedCodeTable, cg);
1137
1138
// Other Code Cache Helper Initialization will go here
1139
1140
TR_ASSERT(buffer <= (uint8_t*)CCPreLoadedCodeTop, "Exceeded CodeCache Helper Area");
1141
1142
// Apply all of our relocations now before we sync
1143
TR::list<TR::Relocation*> &relocs = cg->getRelocationList();
1144
auto iterator = relocs.begin();
1145
while (iterator != relocs.end())
1146
{
1147
if ((*iterator)->getUpdateLocation() >= CCPreLoadedCodeBase &&
1148
(*iterator)->getUpdateLocation() <= CCPreLoadedCodeTop)
1149
{
1150
(*iterator)->apply(cg);
1151
iterator = relocs.erase(iterator);
1152
}
1153
else
1154
++iterator;
1155
}
1156
1157
#if defined(TR_HOST_POWER)
1158
ppcCodeSync((uint8_t *)CCPreLoadedCodeBase, buffer - (uint8_t *)CCPreLoadedCodeBase + 1);
1159
#endif
1160
1161
cg->setFirstInstruction(curFirst);
1162
cg->setAppendInstruction(curAppend);
1163
cg->setBinaryBufferStart(curBinaryBufferStart);
1164
cg->setBinaryBufferCursor(curBinaryBufferCursor);
1165
1166
}
1167
1168
uint8_t *TR::PPCAllocPrefetchSnippet::emitSnippetBody()
1169
{
1170
TR::Compilation *comp = cg()->comp();
1171
uint8_t *buffer = cg()->getBinaryBufferCursor();
1172
getSnippetLabel()->setCodeLocation(buffer);
1173
TR::InstOpCode opcode;
1174
1175
if (comp->getOptions()->realTimeGC())
1176
return NULL;
1177
1178
TR_ASSERT((uintptr_t)((cg()->getCodeCache())->getCCPreLoadedCodeAddress(TR_AllocPrefetch, cg())) != 0xDEADBEEF,
1179
"Invalid addr for code cache helper");
1180
intptr_t distance = (intptr_t)(cg()->getCodeCache())->getCCPreLoadedCodeAddress(TR_AllocPrefetch, cg())
1181
- (intptr_t)buffer;
1182
opcode.setOpCodeValue(TR::InstOpCode::b);
1183
buffer = opcode.copyBinaryToBuffer(buffer);
1184
*(int32_t *)buffer |= distance & 0x03FFFFFC;
1185
return buffer+PPC_INSTRUCTION_LENGTH;
1186
}
1187
1188
void
1189
TR::PPCAllocPrefetchSnippet::print(TR::FILE *pOutFile, TR_Debug * debug)
1190
{
1191
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg()->fe());
1192
uint8_t *cursor = getSnippetLabel()->getCodeLocation();
1193
1194
debug->printSnippetLabel(pOutFile, getSnippetLabel(), cursor, "Allocation Prefetch Snippet");
1195
1196
int32_t distance;
1197
1198
debug->printPrefix(pOutFile, NULL, cursor, 4);
1199
distance = *((int32_t *) cursor) & 0x03fffffc;
1200
distance = (distance << 6) >> 6; // sign extend
1201
trfprintf(pOutFile, "b \t" POINTER_PRINTF_FORMAT "\t\t", (intptr_t)cursor + distance);
1202
}
1203
1204
uint32_t TR::PPCAllocPrefetchSnippet::getLength(int32_t estimatedCodeStart)
1205
{
1206
1207
if (cg()->comp()->getOptions()->realTimeGC())
1208
return 0;
1209
1210
return PPC_INSTRUCTION_LENGTH;
1211
}
1212
1213
TR::PPCNonZeroAllocPrefetchSnippet::PPCNonZeroAllocPrefetchSnippet(
1214
TR::CodeGenerator *codeGen,
1215
TR::Node *node,
1216
TR::LabelSymbol *callLabel)
1217
: TR::Snippet(codeGen, node, callLabel, false)
1218
{
1219
}
1220
1221
uint8_t *TR::PPCNonZeroAllocPrefetchSnippet::emitSnippetBody()
1222
{
1223
TR::Compilation *comp = cg()->comp();
1224
uint8_t *buffer = cg()->getBinaryBufferCursor();
1225
getSnippetLabel()->setCodeLocation(buffer);
1226
TR::InstOpCode opcode;
1227
1228
if (comp->getOptions()->realTimeGC())
1229
return NULL;
1230
1231
TR_ASSERT((uintptr_t)((cg()->getCodeCache())->getCCPreLoadedCodeAddress(TR_NonZeroAllocPrefetch, cg())) != 0xDEADBEEF,
1232
"Invalid addr for code cache helper");
1233
intptr_t distance = (intptr_t)(cg()->getCodeCache())->getCCPreLoadedCodeAddress(TR_NonZeroAllocPrefetch, cg())
1234
- (intptr_t)buffer;
1235
opcode.setOpCodeValue(TR::InstOpCode::b);
1236
buffer = opcode.copyBinaryToBuffer(buffer);
1237
*(int32_t *)buffer |= distance & 0x03FFFFFC;
1238
return buffer+PPC_INSTRUCTION_LENGTH;
1239
}
1240
1241
void
1242
TR::PPCNonZeroAllocPrefetchSnippet::print(TR::FILE *pOutFile, TR_Debug * debug)
1243
{
1244
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg()->fe());
1245
uint8_t *cursor = getSnippetLabel()->getCodeLocation();
1246
1247
debug->printSnippetLabel(pOutFile, getSnippetLabel(), cursor, "Non Zero TLH Allocation Prefetch Snippet");
1248
1249
int32_t distance;
1250
1251
debug->printPrefix(pOutFile, NULL, cursor, 4);
1252
distance = *((int32_t *) cursor) & 0x03fffffc;
1253
distance = (distance << 6) >> 6; // sign extend
1254
trfprintf(pOutFile, "b \t" POINTER_PRINTF_FORMAT "\t\t", (intptr_t)cursor + distance);
1255
}
1256
1257
uint32_t TR::PPCNonZeroAllocPrefetchSnippet::getLength(int32_t estimatedCodeStart)
1258
{
1259
1260
if (cg()->comp()->getOptions()->realTimeGC())
1261
return 0;
1262
1263
return PPC_INSTRUCTION_LENGTH;
1264
}
1265
1266
1267