Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp
6004 views
1
/*******************************************************************************
2
* Copyright (c) 2019, 2022 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
23
#include <cmath>
24
#include <iterator>
25
#include "codegen/ARM64Instruction.hpp"
26
#include "codegen/ARM64JNILinkage.hpp"
27
#include "codegen/ARM64OutOfLineCodeSection.hpp"
28
#include "codegen/ARM64PrivateLinkage.hpp"
29
#include "codegen/CodeGenerator.hpp"
30
#include "codegen/CodeGenerator_inlines.hpp"
31
#include "codegen/CodeGeneratorUtils.hpp"
32
#include "codegen/GenerateInstructions.hpp"
33
#include "codegen/ARM64Instruction.hpp"
34
#include "codegen/J9ARM64Snippet.hpp"
35
#include "codegen/J9WatchedInstanceFieldSnippet.hpp"
36
#include "codegen/J9WatchedStaticFieldSnippet.hpp"
37
#include "codegen/OMRCodeGenerator.hpp"
38
#include "codegen/RegisterDependency.hpp"
39
#include "codegen/Relocation.hpp"
40
#include "codegen/TreeEvaluator.hpp"
41
#include "compile/VirtualGuard.hpp"
42
#include "il/AutomaticSymbol.hpp"
43
#include "il/DataTypes.hpp"
44
#include "il/LabelSymbol.hpp"
45
#include "il/Node.hpp"
46
#include "il/Node_inlines.hpp"
47
#include "il/OMRDataTypes_inlines.hpp"
48
#include "il/StaticSymbol.hpp"
49
#include "OMR/Bytes.hpp"
50
51
/*
52
* J9 ARM64 specific tree evaluator table overrides
53
*/
54
extern void TEMPORARY_initJ9ARM64TreeEvaluatorTable(TR::CodeGenerator *cg)
55
{
56
TR_TreeEvaluatorFunctionPointer *tet = cg->getTreeEvaluatorTable();
57
58
tet[TR::awrtbar] = TR::TreeEvaluator::awrtbarEvaluator;
59
tet[TR::awrtbari] = TR::TreeEvaluator::awrtbariEvaluator;
60
tet[TR::monexit] = TR::TreeEvaluator::monexitEvaluator;
61
tet[TR::monent] = TR::TreeEvaluator::monentEvaluator;
62
tet[TR::monexitfence] = TR::TreeEvaluator::monexitfenceEvaluator;
63
tet[TR::asynccheck] = TR::TreeEvaluator::asynccheckEvaluator;
64
tet[TR::instanceof] = TR::TreeEvaluator::instanceofEvaluator;
65
tet[TR::checkcast] = TR::TreeEvaluator::checkcastEvaluator;
66
tet[TR::checkcastAndNULLCHK] = TR::TreeEvaluator::checkcastAndNULLCHKEvaluator;
67
tet[TR::New] = TR::TreeEvaluator::newObjectEvaluator;
68
tet[TR::variableNew] = TR::TreeEvaluator::newObjectEvaluator;
69
tet[TR::newarray] = TR::TreeEvaluator::newArrayEvaluator;
70
tet[TR::anewarray] = TR::TreeEvaluator::anewArrayEvaluator;
71
tet[TR::variableNewArray] = TR::TreeEvaluator::anewArrayEvaluator;
72
tet[TR::multianewarray] = TR::TreeEvaluator::multianewArrayEvaluator;
73
tet[TR::arraylength] = TR::TreeEvaluator::arraylengthEvaluator;
74
tet[TR::ZEROCHK] = TR::TreeEvaluator::ZEROCHKEvaluator;
75
tet[TR::ResolveCHK] = TR::TreeEvaluator::resolveCHKEvaluator;
76
tet[TR::DIVCHK] = TR::TreeEvaluator::DIVCHKEvaluator;
77
tet[TR::BNDCHK] = TR::TreeEvaluator::BNDCHKEvaluator;
78
// TODO:ARM64: Enable when Implemented: tet[TR::ArrayCopyBNDCHK] = TR::TreeEvaluator::ArrayCopyBNDCHKEvaluator;
79
tet[TR::BNDCHKwithSpineCHK] = TR::TreeEvaluator::BNDCHKwithSpineCHKEvaluator;
80
tet[TR::SpineCHK] = TR::TreeEvaluator::BNDCHKwithSpineCHKEvaluator;
81
tet[TR::ArrayStoreCHK] = TR::TreeEvaluator::ArrayStoreCHKEvaluator;
82
tet[TR::ArrayCHK] = TR::TreeEvaluator::ArrayCHKEvaluator;
83
tet[TR::MethodEnterHook] = TR::TreeEvaluator::conditionalHelperEvaluator;
84
tet[TR::MethodExitHook] = TR::TreeEvaluator::conditionalHelperEvaluator;
85
tet[TR::allocationFence] = TR::TreeEvaluator::flushEvaluator;
86
tet[TR::loadFence] = TR::TreeEvaluator::flushEvaluator;
87
tet[TR::storeFence] = TR::TreeEvaluator::flushEvaluator;
88
tet[TR::fullFence] = TR::TreeEvaluator::flushEvaluator;
89
tet[TR::frem] = TR::TreeEvaluator::fremEvaluator;
90
tet[TR::drem] = TR::TreeEvaluator::dremEvaluator;
91
tet[TR::NULLCHK] = TR::TreeEvaluator::NULLCHKEvaluator;
92
tet[TR::ResolveAndNULLCHK] = TR::TreeEvaluator::resolveAndNULLCHKEvaluator;
93
}
94
95
static TR::InstOpCode::Mnemonic
96
getStoreOpCodeFromDataType(TR::CodeGenerator *cg, TR::DataType dt, int32_t elementSize, bool useIdxReg);
97
98
void VMgenerateCatchBlockBBStartPrologue(TR::Node *node, TR::Instruction *fenceInstruction, TR::CodeGenerator *cg)
99
{
100
TR::Compilation *comp = cg->comp();
101
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
102
103
TR::Block *block = node->getBlock();
104
105
if (fej9->shouldPerformEDO(block, comp))
106
{
107
TR_UNIMPLEMENTED();
108
}
109
}
110
111
/**
112
* @brief Generates instructions to load j9class from object pointer
113
*
114
* @param[in] node: node
115
* @param[in] j9classReg: register j9class value is assigned to
116
* @param[in] objReg: register holding object pointer
117
* @param[in] cg: code generator
118
*/
119
static void
120
generateLoadJ9Class(TR::Node *node, TR::Register *j9classReg, TR::Register *objReg, TR::CodeGenerator *cg)
121
{
122
generateTrg1MemInstruction(cg, TR::Compiler->om.compressObjectReferences() ? TR::InstOpCode::ldrimmw : TR::InstOpCode::ldrimmx, node, j9classReg,
123
TR::MemoryReference::createWithDisplacement(cg, objReg, static_cast<int32_t>(TR::Compiler->om.offsetOfObjectVftField())));
124
TR::TreeEvaluator::generateVFTMaskInstruction(cg, node, j9classReg);
125
}
126
127
/*
128
* Generate the reporting field access helper call with required arguments
129
*
130
* jitReportInstanceFieldRead
131
* arg1 pointer to static data block
132
* arg2 object being read
133
*
134
* jitReportInstanceFieldWrite
135
* arg1 pointer to static data block
136
* arg2 object being written to
137
* arg3 pointer to value being written
138
*
139
* jitReportStaticFieldRead
140
* arg1 pointer to static data block
141
*
142
* jitReportStaticFieldWrite
143
* arg1 pointer to static data block
144
* arg2 pointer to value being written
145
*
146
*/
147
void generateReportFieldAccessOutlinedInstructions(TR::Node *node, TR::LabelSymbol *endLabel, TR::Register *dataBlockReg, bool isWrite, TR::CodeGenerator *cg, TR::Register *sideEffectRegister, TR::Register *valueReg)
148
{
149
TR::Compilation *comp = cg->comp();
150
bool isInstanceField = node->getOpCode().isIndirect();
151
152
TR_RuntimeHelper helperIndex = isWrite ? (isInstanceField ? TR_jitReportInstanceFieldWrite: TR_jitReportStaticFieldWrite):
153
(isInstanceField ? TR_jitReportInstanceFieldRead: TR_jitReportStaticFieldRead);
154
155
TR::Linkage *linkage = cg->getLinkage(runtimeHelperLinkage(helperIndex));
156
auto linkageProperties = linkage->getProperties();
157
TR::Register *valueReferenceReg = NULL;
158
159
// Figure out the number of dependencies needed to make the VM Helper call.
160
// numOfConditions is equal to the number of arguments required by the VM Helper.
161
uint8_t numOfConditions = 1; // All helpers need at least one parameter.
162
if (isWrite)
163
{
164
numOfConditions += 2;
165
}
166
if (isInstanceField)
167
{
168
numOfConditions += 1;
169
}
170
171
TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory())TR::RegisterDependencyConditions(numOfConditions, numOfConditions, cg->trMemory());
172
173
/*
174
* For reporting field write, reference to the valueNode is needed so we need to store
175
* the value on to a stack location first and pass the stack location address as an arguement
176
* to the VM helper
177
*/
178
if (isWrite)
179
{
180
TR::DataType dt = node->getDataType();
181
int32_t elementSize = TR::Symbol::convertTypeToSize(dt);
182
TR::InstOpCode::Mnemonic storeOp = getStoreOpCodeFromDataType(cg, dt, elementSize, false);
183
TR::SymbolReference *location = cg->allocateLocalTemp(dt);
184
TR::MemoryReference *valueMR = TR::MemoryReference::createWithSymRef(cg, node, location);
185
186
generateMemSrc1Instruction(cg, storeOp, node, valueMR, valueReg);
187
deps->addPreCondition(valueReg, TR::RealRegister::NoReg);
188
deps->addPostCondition(valueReg, TR::RealRegister::NoReg);
189
valueReferenceReg = cg->allocateRegister();
190
191
// store the stack location into a register
192
generateTrg1MemInstruction(cg, TR::InstOpCode::addimmx, node, valueReferenceReg, valueMR);
193
}
194
195
// First Argument - DataBlock
196
deps->addPreCondition(dataBlockReg, TR::RealRegister::x0);
197
deps->addPostCondition(dataBlockReg, TR::RealRegister::x0);
198
199
// Second Argument
200
if (isInstanceField)
201
{
202
deps->addPreCondition(sideEffectRegister, TR::RealRegister::x1);
203
deps->addPostCondition(sideEffectRegister, TR::RealRegister::x1);
204
}
205
else if (isWrite)
206
{
207
deps->addPreCondition(valueReferenceReg, TR::RealRegister::x1);
208
deps->addPostCondition(valueReferenceReg, TR::RealRegister::x1);
209
}
210
211
// Third Argument
212
if (isInstanceField && isWrite)
213
{
214
deps->addPreCondition(valueReferenceReg, TR::RealRegister::x2);
215
deps->addPostCondition(valueReferenceReg, TR::RealRegister::x2);
216
}
217
218
// Generate branch instruction to jump into helper
219
TR::SymbolReference *helperSym = comp->getSymRefTab()->findOrCreateRuntimeHelper(helperIndex);
220
TR::Instruction *call = generateImmSymInstruction(cg, TR::InstOpCode::bl, node, reinterpret_cast<uintptr_t>(helperSym->getMethodAddress()), deps, helperSym, NULL);
221
call->ARM64NeedsGCMap(cg, linkageProperties.getPreservedRegisterMapForGC());
222
cg->machine()->setLinkRegisterKilled(true);
223
224
generateLabelInstruction(cg, TR::InstOpCode::b, node, endLabel);
225
226
if (valueReferenceReg != NULL)
227
{
228
cg->stopUsingRegister(valueReferenceReg);
229
}
230
}
231
232
void
233
J9::ARM64::TreeEvaluator::generateTestAndReportFieldWatchInstructions(TR::CodeGenerator *cg, TR::Node *node, TR::Snippet *dataSnippet, bool isWrite, TR::Register *sideEffectRegister, TR::Register *valueReg, TR::Register *dataSnippetRegister)
234
{
235
bool isInstanceField = node->getOpCode().isIndirect();
236
TR_J9VMBase *fej9 = reinterpret_cast<TR_J9VMBase *>(cg->fe());
237
238
TR::Register *scratchReg = cg->allocateRegister();
239
240
TR::LabelSymbol* startLabel = generateLabelSymbol(cg);
241
TR::LabelSymbol* endLabel = generateLabelSymbol(cg);
242
TR::LabelSymbol* fieldReportLabel = generateLabelSymbol(cg);
243
startLabel->setStartInternalControlFlow();
244
endLabel->setEndInternalControlFlow();
245
246
generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel);
247
generateTrg1ImmSymInstruction(cg, TR::InstOpCode::adr, node, dataSnippetRegister, 0, dataSnippet->getSnippetLabel());
248
249
TR_ARM64OutOfLineCodeSection *generateReportOOL = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(fieldReportLabel, endLabel, cg);
250
cg->getARM64OutOfLineCodeSectionList().push_front(generateReportOOL);
251
252
TR::Register *fieldClassReg = NULL;
253
bool isSideEffectReg = false;
254
// Load fieldClass
255
if (isInstanceField)
256
{
257
fieldClassReg = cg->allocateRegister();
258
generateLoadJ9Class(node, fieldClassReg, sideEffectRegister, cg);
259
}
260
else if (!(node->getSymbolReference()->isUnresolved()))
261
{
262
fieldClassReg = cg->allocateRegister();
263
if (cg->needClassAndMethodPointerRelocations())
264
{
265
// If this is an AOT compile, we generate instructions to load the fieldClass directly from the snippet because the fieldClass will be invalid
266
// if we load using the dataSnippet's helper query at compile time.
267
TR::MemoryReference *fieldClassMemRef = TR::MemoryReference::createWithDisplacement(cg, dataSnippetRegister, offsetof(J9JITWatchedStaticFieldData, fieldClass));
268
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, fieldClassReg, fieldClassMemRef);
269
}
270
else
271
{
272
// For non-AOT compiles we don't need to use sideEffectRegister here as the class information is available to us at compile time.
273
J9Class * fieldClass = static_cast<TR::J9WatchedStaticFieldSnippet *>(dataSnippet)->getFieldClass();
274
loadAddressConstant(cg, node, reinterpret_cast<intptr_t>(fieldClass), fieldClassReg);
275
}
276
}
277
else
278
{
279
// Unresolved
280
if (isWrite)
281
{
282
fieldClassReg = cg->allocateRegister();
283
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, fieldClassReg,
284
TR::MemoryReference::createWithDisplacement(cg, sideEffectRegister, fej9->getOffsetOfClassFromJavaLangClassField()));
285
}
286
else
287
{
288
isSideEffectReg = true;
289
fieldClassReg = sideEffectRegister;
290
}
291
}
292
293
TR::MemoryReference *classFlagsMemRef = TR::MemoryReference::createWithDisplacement(cg, fieldClassReg, static_cast<int32_t>(fej9->getOffsetOfClassFlags()));
294
295
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, scratchReg, classFlagsMemRef);
296
static_assert(J9ClassHasWatchedFields == 0x100, "We assume that J9ClassHasWatchedFields is 0x100");
297
generateTestImmInstruction(cg, node, scratchReg, 0x600); // 0x600 is immr:imms for 0x100
298
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, fieldReportLabel, TR::CC_NE);
299
300
generateReportOOL->swapInstructionListsWithCompilation();
301
302
generateLabelInstruction(cg, TR::InstOpCode::label, node, fieldReportLabel);
303
generateReportFieldAccessOutlinedInstructions(node, endLabel, dataSnippetRegister, isWrite, cg, sideEffectRegister, valueReg);
304
305
generateReportOOL->swapInstructionListsWithCompilation();
306
307
generateLabelInstruction(cg, TR::InstOpCode::label, node, endLabel);
308
309
cg->stopUsingRegister(scratchReg);
310
if (!isSideEffectReg)
311
cg->stopUsingRegister(fieldClassReg);
312
313
}
314
315
void
316
J9::ARM64::TreeEvaluator::generateFillInDataBlockSequenceForUnresolvedField(TR::CodeGenerator *cg, TR::Node *node, TR::Snippet *dataSnippet, bool isWrite, TR::Register *sideEffectRegister, TR::Register *dataSnippetRegister)
317
{
318
TR::Compilation *comp = cg->comp();
319
TR::SymbolReference *symRef = node->getSymbolReference();
320
bool is64Bit = comp->target().is64Bit();
321
bool isStatic = symRef->getSymbol()->getKind() == TR::Symbol::IsStatic;
322
323
TR_RuntimeHelper helperIndex = isWrite? (isStatic ? TR_jitResolveStaticFieldSetterDirect: TR_jitResolveFieldSetterDirect):
324
(isStatic ? TR_jitResolveStaticFieldDirect: TR_jitResolveFieldDirect);
325
326
TR::Linkage *linkage = cg->getLinkage(runtimeHelperLinkage(helperIndex));
327
auto linkageProperties = linkage->getProperties();
328
intptr_t offsetInDataBlock = isStatic ? offsetof(J9JITWatchedStaticFieldData, fieldAddress): offsetof(J9JITWatchedInstanceFieldData, offset);
329
330
331
TR::LabelSymbol* startLabel = generateLabelSymbol(cg);
332
TR::LabelSymbol* endLabel = generateLabelSymbol(cg);
333
TR::LabelSymbol* unresolvedLabel = generateLabelSymbol(cg);
334
startLabel->setStartInternalControlFlow();
335
endLabel->setEndInternalControlFlow();
336
337
TR::Register *cpIndexReg = cg->allocateRegister();
338
TR::Register *resultReg = cg->allocateRegister();
339
TR::Register *scratchReg = cg->allocateRegister();
340
341
// Setup Dependencies
342
// Requires two argument registers: resultReg and cpIndexReg.
343
uint8_t numOfConditions = 2;
344
TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(numOfConditions, numOfConditions, cg->trMemory());
345
346
TR_ARM64OutOfLineCodeSection *generateReportOOL = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(unresolvedLabel, endLabel, cg);
347
cg->getARM64OutOfLineCodeSectionList().push_front(generateReportOOL);
348
349
generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel);
350
351
// Compare J9JITWatchedInstanceFieldData.offset or J9JITWatchedStaticFieldData.fieldAddress (Depending on Instance or Static)
352
// Load value from dataSnippet + offsetInDataBlock then compare and branch
353
generateTrg1ImmSymInstruction(cg, TR::InstOpCode::adr, node, dataSnippetRegister, 0, dataSnippet->getSnippetLabel());
354
TR::MemoryReference *fieldMemRef = TR::MemoryReference::createWithDisplacement(cg, dataSnippetRegister, offsetInDataBlock);
355
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, scratchReg, fieldMemRef);
356
generateCompareImmInstruction(cg, node, scratchReg, -1, true);
357
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, unresolvedLabel, TR::CC_EQ);
358
359
generateReportOOL->swapInstructionListsWithCompilation();
360
361
generateLabelInstruction(cg, TR::InstOpCode::label, node, unresolvedLabel);
362
363
bool isSideEffectReg = false;
364
if (isStatic)
365
{
366
// Fill in J9JITWatchedStaticFieldData.fieldClass
367
TR::Register *fieldClassReg = NULL;
368
369
if (isWrite)
370
{
371
fieldClassReg = cg->allocateRegister();
372
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, fieldClassReg,
373
TR::MemoryReference::createWithDisplacement(cg, sideEffectRegister, static_cast<int32_t>(comp->fej9()->getOffsetOfClassFromJavaLangClassField())));
374
}
375
else
376
{
377
isSideEffectReg = true;
378
fieldClassReg = sideEffectRegister;
379
}
380
TR::MemoryReference *memRef = TR::MemoryReference::createWithDisplacement(cg, dataSnippetRegister, offsetof(J9JITWatchedStaticFieldData, fieldClass));
381
382
// Store value to fieldClass member of the snippet
383
generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node, memRef, fieldClassReg);
384
385
if (!isSideEffectReg)
386
cg->stopUsingRegister(fieldClassReg);
387
}
388
389
TR::ResolvedMethodSymbol *methodSymbol = node->getByteCodeInfo().getCallerIndex() == -1 ? comp->getMethodSymbol(): comp->getInlinedResolvedMethodSymbol(node->getByteCodeInfo().getCallerIndex());
390
391
uintptr_t constantPool = reinterpret_cast<uintptr_t>(methodSymbol->getResolvedMethod()->constantPool());
392
if (cg->needClassAndMethodPointerRelocations())
393
{
394
loadAddressConstantInSnippet(cg, node, constantPool, resultReg, TR_ConstantPool);
395
}
396
else
397
{
398
loadAddressConstant(cg, node, constantPool, resultReg);
399
}
400
loadConstant32(cg, node, symRef->getCPIndex(), cpIndexReg);
401
402
// cpAddress is the first argument of VMHelper
403
deps->addPreCondition(resultReg, TR::RealRegister::x0);
404
deps->addPostCondition(resultReg, TR::RealRegister::x0);
405
// cpIndexReg is the second argument
406
deps->addPreCondition(cpIndexReg, TR::RealRegister::x1);
407
deps->addPostCondition(cpIndexReg, TR::RealRegister::x1);
408
409
// Generate helper address and branch
410
TR::SymbolReference *helperSym = comp->getSymRefTab()->findOrCreateRuntimeHelper(helperIndex);
411
TR::Instruction *call = generateImmSymInstruction(cg, TR::InstOpCode::bl, node, reinterpret_cast<uintptr_t>(helperSym->getMethodAddress()), deps, helperSym, NULL);
412
call->ARM64NeedsGCMap(cg, linkageProperties.getPreservedRegisterMapForGC());
413
cg->machine()->setLinkRegisterKilled(true);
414
415
/*
416
* For instance field offset, the result returned by the vmhelper includes header size.
417
* subtract the header size to get the offset needed by field watch helpers
418
*/
419
if (!isStatic)
420
{
421
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::subimmx, node, resultReg, resultReg, TR::Compiler->om.objectHeaderSizeInBytes());
422
}
423
424
// store result into J9JITWatchedStaticFieldData.fieldAddress / J9JITWatchedInstanceFieldData.offset
425
TR::MemoryReference *dataRef = TR::MemoryReference::createWithDisplacement(cg, dataSnippetRegister, offsetInDataBlock);
426
generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node, dataRef, resultReg);
427
428
generateLabelInstruction(cg, TR::InstOpCode::b, node, endLabel);
429
430
generateReportOOL->swapInstructionListsWithCompilation();
431
432
generateLabelInstruction(cg, TR::InstOpCode::label, node, endLabel);
433
434
cg->stopUsingRegister(scratchReg);
435
cg->stopUsingRegister(cpIndexReg);
436
cg->stopUsingRegister(resultReg);
437
}
438
439
static TR::Register *
440
generateSoftwareReadBarrier(TR::Node *node, TR::CodeGenerator *cg, bool isArdbari)
441
{
442
#ifndef OMR_GC_CONCURRENT_SCAVENGER
443
TR_ASSERT_FATAL(false, "Concurrent Scavenger not supported.");
444
#else
445
TR::Compilation *comp = cg->comp();
446
TR::MemoryReference *tempMR = NULL;
447
448
TR::Register *tempReg;
449
TR::Register *locationReg = cg->allocateRegister();
450
TR::Register *evacuateReg = cg->allocateRegister();
451
TR::Register *x0Reg = cg->allocateRegister();
452
TR::Register *vmThreadReg = cg->getMethodMetaDataRegister();
453
454
if (!node->getSymbolReference()->getSymbol()->isInternalPointer())
455
{
456
if (node->getSymbolReference()->getSymbol()->isNotCollected())
457
tempReg = cg->allocateRegister();
458
else
459
tempReg = cg->allocateCollectedReferenceRegister();
460
}
461
else
462
{
463
tempReg = cg->allocateRegister();
464
tempReg->setPinningArrayPointer(node->getSymbolReference()->getSymbol()->castToInternalPointerAutoSymbol()->getPinningArrayPointer());
465
tempReg->setContainsInternalPointer();
466
}
467
468
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
469
TR::LabelSymbol *endLabel = generateLabelSymbol(cg);
470
startLabel->setStartInternalControlFlow();
471
endLabel->setEndInternalControlFlow();
472
473
TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg->trMemory());
474
deps->addPostCondition(tempReg, TR::RealRegister::NoReg);
475
deps->addPostCondition(locationReg, TR::RealRegister::x1); // TR_softwareReadBarrier helper needs this in x1.
476
deps->addPostCondition(evacuateReg, TR::RealRegister::NoReg);
477
deps->addPostCondition(x0Reg, TR::RealRegister::x0);
478
479
node->setRegister(tempReg);
480
481
tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, node);
482
if (tempMR->getUnresolvedSnippet() != NULL)
483
{
484
generateTrg1MemInstruction(cg, TR::InstOpCode::addx, node, locationReg, tempMR);
485
}
486
else
487
{
488
if (tempMR->useIndexedForm())
489
generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, locationReg, tempMR->getBaseRegister(), tempMR->getIndexRegister());
490
else
491
generateTrg1MemInstruction(cg, TR::InstOpCode::addimmx, node, locationReg, tempMR);
492
}
493
494
TR::InstOpCode::Mnemonic loadOp = isArdbari ? TR::InstOpCode::ldrimmx : TR::InstOpCode::ldrimmw;
495
496
auto faultingInstruction = generateTrg1MemInstruction(cg, loadOp, node, tempReg, TR::MemoryReference::createWithDisplacement(cg, locationReg, 0));
497
498
// InstructonDelegate::setupImplicitNullPointerException checks if the memory reference uses nullcheck reference register.
499
// In this case, nullcheck reference register is base register of tempMR, but the memory reference of load instruction does not use it,
500
// thus we need to explicitly set implicit exception point here.
501
if (cg->getHasResumableTrapHandler() && cg->getCurrentEvaluationTreeTop()->getNode()->getOpCode().isNullCheck())
502
{
503
if (cg->getImplicitExceptionPoint() == NULL)
504
{
505
if (comp->getOption(TR_TraceCG))
506
{
507
traceMsg(comp, "Instruction %p throws an implicit NPE, node: %p NPE node: %p\n", faultingInstruction, node, node->getFirstChild());
508
}
509
cg->setImplicitExceptionPoint(faultingInstruction);
510
}
511
}
512
513
if (isArdbari && node->getSymbolReference() == comp->getSymRefTab()->findVftSymbolRef())
514
TR::TreeEvaluator::generateVFTMaskInstruction(cg, node, tempReg);
515
516
generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel);
517
518
generateTrg1MemInstruction(cg, loadOp, node, evacuateReg,
519
TR::MemoryReference::createWithDisplacement(cg, vmThreadReg, comp->fej9()->thisThreadGetEvacuateBaseAddressOffset()));
520
generateCompareInstruction(cg, node, tempReg, evacuateReg, isArdbari); // 64-bit compare in ardbari
521
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, endLabel, TR::CC_LT);
522
523
generateTrg1MemInstruction(cg, loadOp, node, evacuateReg,
524
TR::MemoryReference::createWithDisplacement(cg, vmThreadReg, comp->fej9()->thisThreadGetEvacuateTopAddressOffset()));
525
generateCompareInstruction(cg, node, tempReg, evacuateReg, isArdbari); // 64-bit compare in ardbari
526
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, endLabel, TR::CC_GT);
527
528
// TR_softwareReadBarrier helper expects the vmThread in x0.
529
generateMovInstruction(cg, node, x0Reg, vmThreadReg);
530
531
TR::SymbolReference *helperSym = comp->getSymRefTab()->findOrCreateRuntimeHelper(TR_softwareReadBarrier);
532
generateImmSymInstruction(cg, TR::InstOpCode::bl, node, (uintptr_t)helperSym->getMethodAddress(), deps, helperSym, NULL);
533
534
generateTrg1MemInstruction(cg, loadOp, node, tempReg, TR::MemoryReference::createWithDisplacement(cg, locationReg, 0));
535
536
if (isArdbari && node->getSymbolReference() == comp->getSymRefTab()->findVftSymbolRef())
537
TR::TreeEvaluator::generateVFTMaskInstruction(cg, node, tempReg);
538
539
generateLabelInstruction(cg, TR::InstOpCode::label, node, endLabel, deps);
540
541
bool needSync = (node->getSymbolReference()->getSymbol()->isSyncVolatile() && comp->target().isSMP());
542
if (needSync)
543
{
544
// Issue an Acquire barrier after volatile load
545
// dmb ishld
546
generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0x9);
547
}
548
549
tempMR->decNodeReferenceCounts(cg);
550
551
cg->stopUsingRegister(evacuateReg);
552
cg->stopUsingRegister(locationReg);
553
cg->stopUsingRegister(x0Reg);
554
555
cg->machine()->setLinkRegisterKilled(true);
556
557
return tempReg;
558
#endif // OMR_GC_CONCURRENT_SCAVENGER
559
}
560
561
TR::Register *
562
J9::ARM64::TreeEvaluator::irdbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
563
{
564
// For rdbar and wrtbar nodes we first evaluate the children we need to
565
// handle the side effects. Then we delegate the evaluation of the remaining
566
// children and the load/store operation to the appropriate load/store evaluator.
567
TR::Node *sideEffectNode = node->getFirstChild();
568
TR::Register * sideEffectRegister = cg->evaluate(sideEffectNode);
569
if (cg->comp()->getOption(TR_EnableFieldWatch))
570
{
571
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);
572
}
573
cg->decReferenceCount(sideEffectNode);
574
return TR::TreeEvaluator::iloadEvaluator(node, cg);
575
}
576
577
TR::Register *
578
J9::ARM64::TreeEvaluator::irdbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
579
{
580
// For rdbar and wrtbar nodes we first evaluate the children we need to
581
// handle the side effects. Then we delegate the evaluation of the remaining
582
// children and the load/store operation to the appropriate load/store evaluator.
583
TR::Node *sideEffectNode = node->getFirstChild();
584
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
585
if (cg->comp()->getOption(TR_EnableFieldWatch))
586
{
587
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);
588
}
589
590
// Note: For indirect rdbar nodes, the first child (sideEffectNode) is also used by the
591
// load evaluator. The load evaluator will also evaluate+decrement it. In order to avoid double
592
// decrementing the node we skip doing it here and let the load evaluator do it.
593
if (TR::Compiler->om.readBarrierType() != gc_modron_readbar_none &&
594
cg->comp()->useCompressedPointers() &&
595
(node->getOpCode().hasSymbolReference() &&
596
node->getSymbolReference()->getSymbol()->getDataType() == TR::Address))
597
{
598
return generateSoftwareReadBarrier(node, cg, false);
599
}
600
else
601
return TR::TreeEvaluator::iloadEvaluator(node, cg);
602
}
603
604
TR::Register *
605
J9::ARM64::TreeEvaluator::ardbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
606
{
607
// For rdbar and wrtbar nodes we first evaluate the children we need to
608
// handle the side effects. Then we delegate the evaluation of the remaining
609
// children and the load/store operation to the appropriate load/store evaluator.
610
TR::Node *sideEffectNode = node->getFirstChild();
611
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
612
if (cg->comp()->getOption(TR_EnableFieldWatch))
613
{
614
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);
615
}
616
cg->decReferenceCount(sideEffectNode);
617
return TR::TreeEvaluator::aloadEvaluator(node, cg);
618
}
619
620
TR::Register *
621
J9::ARM64::TreeEvaluator::ardbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
622
{
623
// For rdbar and wrtbar nodes we first evaluate the children we need to
624
// handle the side effects. Then we delegate the evaluation of the remaining
625
// children and the load/store operation to the appropriate load/store evaluator.
626
TR::Register *sideEffectRegister = cg->evaluate(node->getFirstChild());
627
if (cg->comp()->getOption(TR_EnableFieldWatch))
628
{
629
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);
630
}
631
// Note: For indirect rdbar nodes, the first child (sideEffectNode) is also used by the
632
// load evaluator. The load evaluator will also evaluate+decrement it. In order to avoid double
633
// decrementing the node we skip doing it here and let the load evaluator do it.
634
if (TR::Compiler->om.readBarrierType() == gc_modron_readbar_none)
635
return TR::TreeEvaluator::aloadEvaluator(node, cg);
636
else
637
return generateSoftwareReadBarrier(node, cg, true);
638
}
639
640
TR::Register *
641
J9::ARM64::TreeEvaluator::fwrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
642
{
643
// For rdbar and wrtbar nodes we first evaluate the children we need to
644
// handle the side effects. Then we delegate the evaluation of the remaining
645
// children and the load/store operation to the appropriate load/store evaluator.
646
TR::Node *sideEffectNode = node->getSecondChild();
647
TR::Register *valueReg = cg->evaluate(node->getFirstChild());
648
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
649
if (cg->comp()->getOption(TR_EnableFieldWatch))
650
{
651
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
652
}
653
// Note: The reference count for valueReg's node is not decremented here because the
654
// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here
655
// to avoid double decrementing.
656
cg->decReferenceCount(sideEffectNode);
657
return TR::TreeEvaluator::fstoreEvaluator(node, cg);
658
}
659
660
TR::Register *
661
J9::ARM64::TreeEvaluator::fwrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
662
{
663
// For rdbar and wrtbar nodes we first evaluate the children we need to
664
// handle the side effects. Then we delegate the evaluation of the remaining
665
// children and the load/store operation to the appropriate load/store evaluator.
666
TR::Node *sideEffectNode = node->getThirdChild();
667
TR::Register *valueReg = cg->evaluate(node->getSecondChild());
668
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
669
if (cg->comp()->getOption(TR_EnableFieldWatch))
670
{
671
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
672
}
673
// Note: The reference count for valueReg's node is not decremented here because the
674
// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here
675
// to avoid double decrementing.
676
cg->decReferenceCount(sideEffectNode);
677
return TR::TreeEvaluator::fstoreEvaluator(node, cg);
678
}
679
680
TR::Register *
681
J9::ARM64::TreeEvaluator::dwrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
682
{
683
// For rdbar and wrtbar nodes we first evaluate the children we need to
684
// handle the side effects. Then we delegate the evaluation of the remaining
685
// children and the load/store operation to the appropriate load/store evaluator.
686
TR::Node *sideEffectNode = node->getSecondChild();
687
TR::Register *valueReg = cg->evaluate(node->getFirstChild());
688
TR::Register *sideEffectRegister = cg->evaluate(node->getSecondChild());
689
if (cg->comp()->getOption(TR_EnableFieldWatch))
690
{
691
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
692
}
693
// Note: The reference count for valueReg's node is not decremented here because the
694
// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here
695
// to avoid double decrementing.
696
cg->decReferenceCount(sideEffectNode);
697
return TR::TreeEvaluator::dstoreEvaluator(node, cg);
698
}
699
700
TR::Register *
701
J9::ARM64::TreeEvaluator::dwrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
702
{
703
// For rdbar and wrtbar nodes we first evaluate the children we need to
704
// handle the side effects. Then we delegate the evaluation of the remaining
705
// children and the load/store operation to the appropriate load/store evaluator.
706
TR::Node *sideEffectNode = node->getThirdChild();
707
TR::Register *valueReg = cg->evaluate(node->getSecondChild());
708
TR::Register *sideEffectRegister = cg->evaluate(node->getThirdChild());
709
if (cg->comp()->getOption(TR_EnableFieldWatch))
710
{
711
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
712
}
713
// The Value Node, or the second child is not decremented here. The store evaluator also uses it, and decrements it.
714
cg->decReferenceCount(sideEffectNode);
715
return TR::TreeEvaluator::dstoreEvaluator(node, cg);
716
}
717
718
/**
719
* @brief Generates inlined code for card marking and branch to wrbar helper
720
* @details
721
* This method generates code for write barrier for generational GC policies.
722
* It generates inlined code for
723
* - checking whether the destination object is tenured
724
* - checking if concurrent mark thread is active (for gc_modron_wrtbar_cardmark_and_oldcheck)
725
* - card marking (for gc_modron_wrtbar_cardmark_and_oldcheck)
726
* - checking if source object is in new space
727
* - checking if remembered bit is set in object header
728
*
729
* @param node: node
730
* @param dstReg: register holding owning object
731
* @param srcReg: register holding source object
732
* @param srm: scratch register manager
733
* @param doneLabel: done label
734
* @param wbRef: symbol reference for write barrier helper
735
* @param cg: code generator
736
*/
737
static void
738
VMnonNullSrcWrtBarCardCheckEvaluator(
739
TR::Node *node,
740
TR::Register *dstReg,
741
TR::Register *srcReg,
742
TR_ARM64ScratchRegisterManager *srm,
743
TR::LabelSymbol *doneLabel,
744
TR::SymbolReference *wbRef ,
745
TR::CodeGenerator *cg)
746
{
747
TR::Compilation *comp = cg->comp();
748
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
749
auto gcMode = TR::Compiler->om.writeBarrierType();
750
bool doWrtBar = (gcMode == gc_modron_wrtbar_oldcheck || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_always);
751
bool doCrdMrk = (gcMode == gc_modron_wrtbar_cardmark_and_oldcheck);
752
//We need to do a runtime check on cardmarking for gencon policy if our dstReg is in tenure
753
754
if (gcMode != gc_modron_wrtbar_always)
755
{
756
/*
757
* Generating code checking whether an object is tenured
758
*
759
* movzx temp1Reg, #heapBase
760
* subx temp1Reg, dstReg, temp1Reg
761
* movzx temp2Reg, #heapSize
762
* cmpx temp1Reg, temp2Reg
763
* b.cs doneLabel
764
*
765
*/
766
TR::Register *temp1Reg = srm->findOrCreateScratchRegister();
767
TR::Register *temp2Reg = srm->findOrCreateScratchRegister();
768
TR::Register *metaReg = cg->getMethodMetaDataRegister();
769
770
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:010VMnonNullSrcWrtBarCardCheckEvaluator"), *srm);
771
772
if (comp->getOptions()->isVariableHeapBaseForBarrierRange0() || comp->compileRelocatableCode())
773
{
774
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp1Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapBaseForBarrierRange0)));
775
}
776
else
777
{
778
uintptr_t heapBase = comp->getOptions()->getHeapBaseForBarrierRange0();
779
loadAddressConstant(cg, node, heapBase, temp1Reg);
780
}
781
generateTrg1Src2Instruction(cg, TR::InstOpCode::subx, node, temp1Reg, dstReg, temp1Reg);
782
783
if (comp->getOptions()->isVariableHeapSizeForBarrierRange0() || comp->compileRelocatableCode())
784
{
785
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp2Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapSizeForBarrierRange0)));
786
}
787
else
788
{
789
uintptr_t heapSize = comp->getOptions()->getHeapSizeForBarrierRange0();
790
loadConstant64(cg, node, heapSize, temp2Reg);
791
}
792
generateCompareInstruction(cg, node, temp1Reg, temp2Reg, true);
793
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_CS);
794
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:010VMnonNullSrcWrtBarCardCheckEvaluator:01oldCheckDone"), *srm);
795
796
TR::LabelSymbol *noChkLabel = generateLabelSymbol(cg);
797
if (doCrdMrk)
798
{
799
/*
800
* Check if J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE flag is set.
801
* If not, skip card dirtying.
802
*
803
* ldrimmx temp2Reg, [vmThread, #privateFlag]
804
* tbz temp2Reg, #20, crdMrkDoneLabel
805
* ldrimmx temp2Reg, [vmThread, #activeCardTableBase]
806
* addx temp2Reg, temp2Reg, temp1Reg, LSR #card_size_shift ; At this moment, temp1Reg contains (dstReg - #heapBase)
807
* movzx temp1Reg, 1
808
* strbimm temp1Reg, [temp2Reg, 0]
809
*
810
* crdMrkDoneLabel:
811
*/
812
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:010VMnonNullSrcWrtBarCardCheckEvaluator:02cardmark"), *srm);
813
814
static_assert(J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE == (1 << 20), "We assume that J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE is 0x100000");
815
TR::LabelSymbol *crdMrkDoneLabel = generateLabelSymbol(cg);
816
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp2Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, privateFlags)));
817
generateTestBitBranchInstruction(cg, TR::InstOpCode::tbz, node, temp2Reg, 20, crdMrkDoneLabel);
818
819
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:010VMnonNullSrcWrtBarCardCheckEvaluator:03markThreadActiveCheckDone"), *srm);
820
821
uintptr_t card_size_shift = trailingZeroes((uint64_t)comp->getOptions()->getGcCardSize());
822
if (comp->getOptions()->isVariableActiveCardTableBase() || comp->compileRelocatableCode())
823
{
824
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp2Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, activeCardTableBase)));
825
}
826
else
827
{
828
uintptr_t activeCardTableBase = comp->getOptions()->getActiveCardTableBase();
829
loadAddressConstant(cg, node, activeCardTableBase, temp2Reg);
830
}
831
generateTrg1Src2ShiftedInstruction(cg, TR::InstOpCode::addx, node, temp2Reg, temp2Reg, temp1Reg, TR::SH_LSR, card_size_shift);
832
generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, temp1Reg, 1);
833
generateMemSrc1Instruction(cg, TR::InstOpCode::strbimm, node, TR::MemoryReference::createWithDisplacement(cg, temp2Reg, 0), temp1Reg);
834
835
generateLabelInstruction(cg, TR::InstOpCode::label, node, crdMrkDoneLabel);
836
837
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:010VMnonNullSrcWrtBarCardCheckEvaluator:04cardmarkDone"), *srm);
838
}
839
840
/*
841
* Generating code checking whether the src is in new space
842
*
843
* movzx temp1Reg, #heapBase
844
* subx temp1Reg, srcReg, temp1Reg
845
* movzx temp2Reg, #heapSize
846
* cmpx temp1Reg, temp2Reg
847
* b.cc doneLabel
848
*/
849
if (comp->getOptions()->isVariableHeapBaseForBarrierRange0() || comp->compileRelocatableCode())
850
{
851
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp1Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapBaseForBarrierRange0)));
852
}
853
else
854
{
855
uintptr_t heapBase = comp->getOptions()->getHeapBaseForBarrierRange0();
856
loadAddressConstant(cg, node, heapBase, temp1Reg);
857
}
858
generateTrg1Src2Instruction(cg, TR::InstOpCode::subx, node, temp1Reg, srcReg, temp1Reg);
859
860
// If doCrdMrk is false, then temp2Reg still contains heapSize
861
if (doCrdMrk)
862
{
863
if (comp->getOptions()->isVariableHeapSizeForBarrierRange0() || comp->compileRelocatableCode())
864
{
865
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp2Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapSizeForBarrierRange0)));
866
}
867
else
868
{
869
uintptr_t heapSize = comp->getOptions()->getHeapSizeForBarrierRange0();
870
loadConstant64(cg, node, heapSize, temp2Reg);
871
}
872
}
873
874
generateCompareInstruction(cg, node, temp1Reg, temp2Reg, true);
875
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_CC);
876
877
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:010VMnonNullSrcWrtBarCardCheckEvaluator:05sourceCheckDone"), *srm);
878
879
/*
880
* Generating code checking whether the remembered bit is set
881
*
882
* ldrimmx temp1Reg, [dstReg, #offsetOfHeaderFlags]
883
* tstimmw temp1Reg, J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST
884
* b.ne doneLabel
885
* bl jitWriteBarrierGenerational
886
*/
887
static_assert(J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST == 0xf0, "We assume that J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST is 0xf0");
888
generateTrg1MemInstruction(cg, (TR::Compiler->om.compressObjectReferences() ? TR::InstOpCode::ldrimmw : TR::InstOpCode::ldrimmx), node, temp1Reg, TR::MemoryReference::createWithDisplacement(cg, dstReg, TR::Compiler->om.offsetOfHeaderFlags()));
889
generateTestImmInstruction(cg, node, temp1Reg, 0x703, false); // 0x703 is immr:imms for 0xf0
890
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_NE);
891
892
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:010VMnonNullSrcWrtBarCardCheckEvaluator:06rememberedBitCheckDone"), *srm);
893
894
srm->reclaimScratchRegister(temp1Reg);
895
srm->reclaimScratchRegister(temp2Reg);
896
}
897
generateImmSymInstruction(cg, TR::InstOpCode::bl, node, reinterpret_cast<uintptr_t>(wbRef->getMethodAddress()), NULL, wbRef, NULL);
898
cg->machine()->setLinkRegisterKilled(true);
899
}
900
901
/**
902
* @brief Generates inlined code for card marking
903
* @details
904
* This method generates code for write barrier for optavgpause/balanced GC policies.
905
* It generates inlined code for
906
* - checking if concurrent mark thread is active (for optavgpause)
907
* - checking whether the destination object is in heap
908
* - card marking
909
*
910
* @param node: node
911
* @param dstReg: register holding owning object
912
* @param srm: scratch register manager
913
* @param doneLabel: done label
914
* @param cg: code generator
915
*/
916
static void
917
VMCardCheckEvaluator(
918
TR::Node *node,
919
TR::Register *dstReg,
920
TR_ARM64ScratchRegisterManager *srm,
921
TR::LabelSymbol *doneLabel,
922
TR::CodeGenerator *cg)
923
{
924
TR::Compilation *comp = cg->comp();
925
926
auto gcMode = TR::Compiler->om.writeBarrierType();
927
TR::Register *temp1Reg = srm->findOrCreateScratchRegister();
928
TR::Register *metaReg = cg->getMethodMetaDataRegister();
929
930
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:020VMCardCheckEvaluator"), *srm);
931
// If gcpolicy is balanced, we must always do card marking
932
if (gcMode != gc_modron_wrtbar_cardmark_incremental)
933
{
934
/*
935
* Check if J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE flag is set.
936
* If not, skip card dirtying.
937
*
938
* ldrimmx temp1Reg, [vmThread, #privateFlag]
939
* tbz temp1Reg, #20, doneLabel
940
*/
941
942
static_assert(J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE == (1 << 20), "We assume that J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE is 0x100000");
943
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp1Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, privateFlags)));
944
generateTestBitBranchInstruction(cg, TR::InstOpCode::tbz, node, temp1Reg, 20, doneLabel);
945
946
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:020VMCardCheckEvaluator:01markThreadActiveCheckDone"), *srm);
947
}
948
949
TR::Register *temp2Reg = srm->findOrCreateScratchRegister();
950
/*
951
* Generating code checking whether an object is in heap
952
*
953
* movzx temp1Reg, #heapBase
954
* subx temp1Reg, dstReg, temp1Reg
955
* movzx temp2Reg, #heapSize
956
* cmpx temp1Reg, temp2Reg
957
* b.cs doneLabel
958
*
959
*/
960
961
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:020VMCardCheckEvaluator:020heapCheck"), *srm);
962
963
if (comp->getOptions()->isVariableHeapBaseForBarrierRange0() || comp->compileRelocatableCode())
964
{
965
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp1Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapBaseForBarrierRange0)));
966
}
967
else
968
{
969
uintptr_t heapBase = comp->getOptions()->getHeapBaseForBarrierRange0();
970
loadAddressConstant(cg, node, heapBase, temp1Reg);
971
}
972
generateTrg1Src2Instruction(cg, TR::InstOpCode::subx, node, temp1Reg, dstReg, temp1Reg);
973
974
// If we know the object is definitely in heap, then we skip the check.
975
if (!node->isHeapObjectWrtBar())
976
{
977
if (comp->getOptions()->isVariableHeapSizeForBarrierRange0() || comp->compileRelocatableCode())
978
{
979
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp2Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapSizeForBarrierRange0)));
980
}
981
else
982
{
983
uintptr_t heapSize = comp->getOptions()->getHeapSizeForBarrierRange0();
984
loadConstant64(cg, node, heapSize, temp2Reg);
985
}
986
generateCompareInstruction(cg, node, temp1Reg, temp2Reg, true);
987
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_CS);
988
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:020VMCardCheckEvaluator:03heapCheckDone"), *srm);
989
}
990
991
/*
992
* Generating card dirtying sequence.
993
* We don't call out to VM helpers.
994
*
995
* ldrimmx temp2Reg, [vmThread, #activeCardTableBase]
996
* addx temp2Reg, temp2Reg, temp1Reg, LSR #card_size_shift ; At this moment, temp1Reg contains (dstReg - #heapBase)
997
* movzx temp1Reg, 1
998
* strbimm temp1Reg, [temp2Reg, 0]
999
*
1000
*/
1001
uintptr_t card_size_shift = trailingZeroes((uint64_t)comp->getOptions()->getGcCardSize());
1002
if (comp->getOptions()->isVariableActiveCardTableBase() || comp->compileRelocatableCode())
1003
{
1004
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp2Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, activeCardTableBase)));
1005
}
1006
else
1007
{
1008
uintptr_t activeCardTableBase = comp->getOptions()->getActiveCardTableBase();
1009
loadAddressConstant(cg, node, activeCardTableBase, temp2Reg);
1010
}
1011
generateTrg1Src2ShiftedInstruction(cg, TR::InstOpCode::addx, node, temp2Reg, temp2Reg, temp1Reg, TR::SH_LSR, card_size_shift);
1012
generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, temp1Reg, 1);
1013
generateMemSrc1Instruction(cg, TR::InstOpCode::strbimm, node, TR::MemoryReference::createWithDisplacement(cg, temp2Reg, 0), temp1Reg);
1014
1015
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:020VMCardCheckEvaluator:04cardmarkDone"), *srm);
1016
}
1017
1018
static void wrtbarEvaluator(TR::Node *node, TR::Register *srcReg, TR::Register *dstReg, bool srcNonNull, TR::CodeGenerator *cg)
1019
{
1020
TR::Compilation *comp = cg->comp();
1021
TR::Instruction * cursor;
1022
auto gcMode = TR::Compiler->om.writeBarrierType();
1023
bool doWrtBar = (gcMode == gc_modron_wrtbar_oldcheck || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_always);
1024
bool doCrdMrk = (gcMode == gc_modron_wrtbar_cardmark ||gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_cardmark_incremental);
1025
1026
if ((node->getOpCode().isWrtBar() && node->skipWrtBar()) || node->isNonHeapObjectWrtBar())
1027
return;
1028
1029
TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();
1030
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
1031
1032
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator"), *srm);
1033
1034
if (doWrtBar) // generational or gencon
1035
{
1036
TR::SymbolReference *wbRef = (gcMode == gc_modron_wrtbar_always) ?
1037
comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef() :
1038
// use jitWriteBarrierStoreGenerational for both generational and gencon, because we inline card marking.
1039
comp->getSymRefTab()->findOrCreateWriteBarrierStoreGenerationalSymbolRef();
1040
1041
if (!srcNonNull)
1042
{
1043
// If object is NULL, done
1044
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk"), *srm);
1045
generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, srcReg, doneLabel);
1046
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk:NonNull"), *srm);
1047
}
1048
// Inlines cardmarking and remembered bit check for gencon.
1049
VMnonNullSrcWrtBarCardCheckEvaluator(node, dstReg, srcReg, srm, doneLabel, wbRef, cg);
1050
1051
}
1052
else if (doCrdMrk)
1053
{
1054
TR::SymbolReference *wbRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef();
1055
if (!srcNonNull)
1056
{
1057
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk"), *srm);
1058
generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, srcReg, doneLabel);
1059
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk:NonNull"), *srm);
1060
}
1061
VMCardCheckEvaluator(node, dstReg, srm, doneLabel, cg);
1062
}
1063
1064
TR::RegisterDependencyConditions *conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2 + srm->numAvailableRegisters(), cg->trMemory());
1065
conditions->addPostCondition(dstReg, doWrtBar ? TR::RealRegister::x0 : TR::RealRegister::NoReg);
1066
conditions->addPostCondition(srcReg, doWrtBar ? TR::RealRegister::x1 : TR::RealRegister::NoReg);
1067
srm->addScratchRegistersToDependencyList(conditions);
1068
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions, NULL);
1069
1070
srm->stopUsingRegisters();
1071
}
1072
1073
TR::Register *
1074
J9::ARM64::TreeEvaluator::conditionalHelperEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1075
{
1076
TR::Node *testNode = node->getFirstChild();
1077
TR::Node *callNode = node->getSecondChild();
1078
TR::Node *firstChild = testNode->getFirstChild();
1079
TR::Node *secondChild = testNode->getSecondChild();
1080
TR::Register *jumpReg = cg->evaluate(firstChild);
1081
TR::Register *valReg = NULL;
1082
int32_t i, numArgs = callNode->getNumChildren();
1083
TR::RegisterDependencyConditions *conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(3, 3, cg->trMemory());
1084
1085
TR_ASSERT(numArgs <= 2, "Unexpected number of arguments for helper.");
1086
1087
// Helper arguments are in reversed order of the private linkage
1088
// Argument registers are not needed to be split since the helper will
1089
// preserve all of them.
1090
int32_t iArgIndex = 0, fArgIndex = 0;
1091
TR::Linkage *linkage = cg->createLinkage(TR_Private);
1092
for (i = numArgs - 1; i >= 0; i--)
1093
{
1094
TR::Register *argReg = cg->evaluate(callNode->getChild(i));
1095
TR::addDependency(conditions, argReg, (argReg->getKind() == TR_GPR) ? // Didn't consider Long here
1096
linkage->getProperties().getIntegerArgumentRegister(iArgIndex++) : linkage->getProperties().getFloatArgumentRegister(fArgIndex++), argReg->getKind(), cg);
1097
}
1098
1099
TR::addDependency(conditions, jumpReg, TR::RealRegister::x8, TR_GPR, cg);
1100
bool is64Bit = node->getSecondChild()->getType().isInt64();
1101
int64_t value = is64Bit ? secondChild->getLongInt() : secondChild->getInt();
1102
if (secondChild->getOpCode().isLoadConst() && constantIsUnsignedImm12(value))
1103
{
1104
generateCompareImmInstruction(cg, testNode, jumpReg, value);
1105
}
1106
else
1107
{
1108
valReg = cg->evaluate(secondChild);
1109
generateCompareInstruction(cg, testNode, jumpReg, valReg);
1110
}
1111
1112
TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);
1113
TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference());
1114
cg->addSnippet(snippet);
1115
TR::ARM64ConditionCode cc = (testNode->getOpCodeValue() == TR::icmpeq) ? TR::CC_EQ : TR::CC_NE;
1116
TR::Instruction *gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, cc, conditions);
1117
gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);
1118
snippet->gcMap().setGCRegisterMask(0xffffffff);
1119
// ARM64HelperCallSnippet generates "bl" instruction
1120
cg->machine()->setLinkRegisterKilled(true);
1121
1122
for (i = numArgs - 1; i >= 0; i--)
1123
cg->decReferenceCount(callNode->getChild(i));
1124
cg->decReferenceCount(firstChild);
1125
cg->decReferenceCount(secondChild);
1126
cg->decReferenceCount(testNode);
1127
cg->decReferenceCount(callNode);
1128
return NULL;
1129
}
1130
1131
TR::Register *
1132
J9::ARM64::TreeEvaluator::awrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1133
{
1134
TR::Compilation *comp = cg->comp();
1135
TR::Node *firstChild = node->getFirstChild();
1136
TR::Register *valueReg = cg->evaluate(firstChild);
1137
1138
TR::Register *destinationRegister = cg->evaluate(node->getSecondChild());
1139
TR::Register *sideEffectRegister = destinationRegister;
1140
1141
if (comp->getOption(TR_EnableFieldWatch) && !node->getSymbolReference()->getSymbol()->isShadow())
1142
{
1143
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
1144
}
1145
1146
TR::Register *sourceRegister;
1147
bool killSource = false;
1148
bool isVolatileMode = (node->getSymbolReference()->getSymbol()->isSyncVolatile() && cg->comp()->target().isSMP());
1149
bool isOrderedMode = (node->getSymbolReference()->getSymbol()->isShadow() && node->getSymbolReference()->getSymbol()->isOrdered() && cg->comp()->target().isSMP());
1150
1151
if (firstChild->getReferenceCount() > 1 && firstChild->getRegister() != NULL)
1152
{
1153
if (!firstChild->getRegister()->containsInternalPointer())
1154
sourceRegister = cg->allocateCollectedReferenceRegister();
1155
else
1156
{
1157
sourceRegister = cg->allocateRegister();
1158
sourceRegister->setPinningArrayPointer(firstChild->getRegister()->getPinningArrayPointer());
1159
sourceRegister->setContainsInternalPointer();
1160
}
1161
generateMovInstruction(cg, node, sourceRegister, firstChild->getRegister());
1162
killSource = true;
1163
}
1164
else
1165
sourceRegister = valueReg;
1166
1167
TR::MemoryReference *tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, node);
1168
1169
// Issue a StoreStore barrier before each volatile store.
1170
// dmb ishst
1171
if (isVolatileMode || isOrderedMode)
1172
generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xA);
1173
1174
generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node, tempMR, sourceRegister, NULL);
1175
1176
// Issue a StoreLoad barrier after each volatile store.
1177
// dmb ish
1178
if (isVolatileMode)
1179
generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xB);
1180
1181
wrtbarEvaluator(node, sourceRegister, destinationRegister, firstChild->isNonNull(), cg);
1182
1183
if (killSource)
1184
cg->stopUsingRegister(sourceRegister);
1185
1186
cg->decReferenceCount(node->getFirstChild());
1187
cg->decReferenceCount(node->getSecondChild());
1188
tempMR->decNodeReferenceCounts(cg);
1189
1190
return NULL;
1191
}
1192
1193
TR::Register *
1194
J9::ARM64::TreeEvaluator::awrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1195
{
1196
TR::Compilation *comp = cg->comp();
1197
1198
TR::Register *destinationRegister = cg->evaluate(node->getChild(2));
1199
TR::Node *secondChild = node->getSecondChild();
1200
TR::Register *sourceRegister;
1201
bool killSource = false;
1202
bool usingCompressedPointers = TR::TreeEvaluator::getIndirectWrtbarValueNode(cg, node, secondChild, true);
1203
bool isVolatileMode = (node->getSymbolReference()->getSymbol()->isSyncVolatile() && cg->comp()->target().isSMP());
1204
bool isOrderedMode = (node->getSymbolReference()->getSymbol()->isShadow() && node->getSymbolReference()->getSymbol()->isOrdered() && cg->comp()->target().isSMP());
1205
1206
if (secondChild->getReferenceCount() > 1 && secondChild->getRegister() != NULL)
1207
{
1208
if (!secondChild->getRegister()->containsInternalPointer())
1209
sourceRegister = cg->allocateCollectedReferenceRegister();
1210
else
1211
{
1212
sourceRegister = cg->allocateRegister();
1213
sourceRegister->setPinningArrayPointer(secondChild->getRegister()->getPinningArrayPointer());
1214
sourceRegister->setContainsInternalPointer();
1215
}
1216
generateMovInstruction(cg, node, sourceRegister, secondChild->getRegister());
1217
killSource = true;
1218
}
1219
else
1220
{
1221
sourceRegister = cg->evaluate(secondChild);
1222
}
1223
1224
// Handle fieldwatch side effect first if it's enabled.
1225
if (comp->getOption(TR_EnableFieldWatch) && !node->getSymbolReference()->getSymbol()->isArrayShadowSymbol())
1226
{
1227
// The Third child (sideEffectNode) and valueReg's node is also used by the store evaluator below.
1228
// The store evaluator will also evaluate+decrement it. In order to avoid double
1229
// decrementing the node we skip doing it here and let the store evaluator do it.
1230
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, destinationRegister /* sideEffectRegister */, sourceRegister /* valueReg */);
1231
}
1232
1233
TR::InstOpCode::Mnemonic storeOp = usingCompressedPointers ? TR::InstOpCode::strimmw : TR::InstOpCode::strimmx;
1234
TR::Register *translatedSrcReg = usingCompressedPointers ? cg->evaluate(node->getSecondChild()) : sourceRegister;
1235
1236
TR::MemoryReference *tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, node);
1237
1238
// Issue a StoreStore barrier before each volatile store.
1239
// dmb ishst
1240
if (isVolatileMode || isOrderedMode)
1241
generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xA);
1242
1243
generateMemSrc1Instruction(cg, storeOp, node, tempMR, translatedSrcReg);
1244
1245
// Issue a StoreLoad barrier after each volatile store.
1246
// dmb ish
1247
if (isVolatileMode)
1248
generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xB);
1249
1250
wrtbarEvaluator(node, sourceRegister, destinationRegister, secondChild->isNonNull(), cg);
1251
1252
if (killSource)
1253
cg->stopUsingRegister(sourceRegister);
1254
1255
if (usingCompressedPointers)
1256
{
1257
// The reference count of secondChild has been bumped up.
1258
cg->decReferenceCount(secondChild);
1259
}
1260
cg->decReferenceCount(node->getSecondChild());
1261
cg->decReferenceCount(node->getChild(2));
1262
tempMR->decNodeReferenceCounts(cg);
1263
1264
if (comp->useCompressedPointers())
1265
node->setStoreAlreadyEvaluated(true);
1266
1267
return NULL;
1268
}
1269
1270
TR::Register *
1271
J9::ARM64::TreeEvaluator::DIVCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1272
{
1273
TR::Node *divisor = node->getFirstChild()->getSecondChild();
1274
bool is64Bit = node->getFirstChild()->getType().isInt64();
1275
bool isConstDivisor = divisor->getOpCode().isLoadConst();
1276
1277
if (!isConstDivisor || (!is64Bit && divisor->getInt() == 0) || (is64Bit && divisor->getLongInt() == 0))
1278
{
1279
TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);
1280
TR::Instruction *gcPoint;
1281
TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference());
1282
cg->addSnippet(snippet);
1283
1284
if (isConstDivisor)
1285
{
1286
// No explicit check required
1287
gcPoint = generateLabelInstruction(cg, TR::InstOpCode::b, node, snippetLabel);
1288
}
1289
else
1290
{
1291
TR::Register *divisorReg = cg->evaluate(divisor);
1292
TR::InstOpCode::Mnemonic compareOp = is64Bit ? TR::InstOpCode::cbzx : TR::InstOpCode::cbzw;
1293
gcPoint = generateCompareBranchInstruction(cg, compareOp, node, divisorReg, snippetLabel);
1294
}
1295
gcPoint->ARM64NeedsGCMap(cg, 0xffffffff);
1296
snippet->gcMap().setGCRegisterMask(0xffffffff);
1297
}
1298
1299
cg->evaluate(node->getFirstChild());
1300
cg->decReferenceCount(node->getFirstChild());
1301
// ARM64HelperCallSnippet generates "bl" instruction
1302
cg->machine()->setLinkRegisterKilled(true);
1303
return NULL;
1304
}
1305
1306
void
1307
J9::ARM64::TreeEvaluator::generateCheckForValueMonitorEnterOrExit(TR::Node *node, TR::LabelSymbol *mergeLabel, TR::LabelSymbol *helperCallLabel, TR::Register *objReg, TR::Register *temp1Reg, TR::Register *temp2Reg, TR::CodeGenerator *cg, int32_t classFlag)
1308
{
1309
// get class of object
1310
generateLoadJ9Class(node, temp1Reg, objReg, cg);
1311
1312
// get memory reference to class flags
1313
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
1314
TR::MemoryReference *classFlagsMemRef = TR::MemoryReference::createWithDisplacement(cg, temp1Reg, static_cast<uintptr_t>(fej9->getOffsetOfClassFlags()));
1315
1316
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, temp1Reg, classFlagsMemRef);
1317
loadConstant32(cg, node, classFlag, temp2Reg);
1318
generateTrg1Src2Instruction(cg, TR::InstOpCode::andsw, node, temp1Reg, temp1Reg, temp2Reg);
1319
1320
bool generateOOLSection = helperCallLabel == NULL;
1321
if (generateOOLSection)
1322
helperCallLabel = generateLabelSymbol(cg);
1323
1324
// If obj is value type or value based class instance, call VM helper and throw IllegalMonitorState exception, else continue as usual
1325
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, helperCallLabel, TR::CC_NE);
1326
1327
// TODO: There is now the possibility of multiple distinct OOL sections with helper calls to be generated when
1328
// evaluating the TR::monent or TR::monexit nodes:
1329
//
1330
// 1. Monitor cache lookup OOL (AArch64 does not use OOL for monitor cache lookup at the moment)
1331
// 2. Lock reservation OOL (AArch64 does not implement lock reservation yet)
1332
// 3. Value types or value based object OOL
1333
// 4. Recursive CAS sequence for Locking
1334
//
1335
// These distinct OOL sections may perform non-trivial logic but what they all have in common is they all have a
1336
// call to the same JIT helper which acts as a fall back. This complexity exists because of the way the evaluators
1337
// are currently architected and due to the restriction that we cannot have nested OOL code sections. Whenever
1338
// making future changes to these evaluators we should consider refactoring them to reduce the complexity and
1339
// attempt to consolidate the calls to the JIT helper so as to not have multiple copies.
1340
if (generateOOLSection)
1341
{
1342
TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(node, TR::call, NULL, helperCallLabel, mergeLabel, cg);
1343
cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);
1344
}
1345
}
1346
1347
/**
1348
* @brief Generates instruction sequence for looking up the address of lockword of the object
1349
*
1350
* @param[in] cg: Code Generator
1351
* @param[in] node: node
1352
* @param[in] objReg: register holding object pointer
1353
* @param[in] addrReg: register for assigning address of the lockword
1354
* @param[in] metaReg: register holding vmthread struct pointer
1355
* @param[in] srm: scratch register manager
1356
* @param[in] callLabel: label for slow path
1357
*/
1358
static void
1359
generateLockwordAddressLookup(TR::CodeGenerator *cg, TR::Node *node, TR::Register *objReg, TR::Register *addrReg, TR::Register *metaReg,
1360
TR_ARM64ScratchRegisterManager *srm, TR::LabelSymbol *callLabel)
1361
{
1362
/*
1363
* Generating following intruction sequence.
1364
*
1365
* ldrimmw objectClassReg, [objReg, #0] ; throws an implicit NPE
1366
* andimmw objectClassReg, 0xffffff00
1367
* ldrimmx tempReg, [objectClassReg, offsetOfLockOffset]
1368
* cmpimmx tempReg, #0
1369
* b.le monitorLookupCacheLabel
1370
* addx addrReg, objReg, tempReg
1371
* b fallThruFromMonitorLookupCacheLabel
1372
* monitorLookupCacheLabel:
1373
* ; slot = (object >> objectAlignmentShift) & (J9VMTHREAD_OBJECT_MONITOR_CACHE_SIZE-1)
1374
* ubfx tempReg, objReg, #alignmentBits, #maskWidth ; maskWidth is popcount(J9VMTHREAD_OBJECT_MONITOR_CACHE_SIZE - 1)
1375
*
1376
* ; vmThread->objectMonitorLookupCache[slot]
1377
* addx tempReg, metaReg, tempReg, lsl #elementWidth ; elementWidth is log2(sizeof(j9objectmonitor_t))
1378
* ldrimmw monitorReg, [tempReg, offsetOfMonitorLookupCache]
1379
*
1380
* cbzx monitorReg, callLabel ; if monitor is not found, then call out to helper
1381
* ldrimmx tempReg, [monitorReg, offsetOfMonitor]
1382
* ldrimmx tempReg, [tempReg, offsetOfUserData]
1383
* cmpx tempReg, objReg
1384
* b.ne callLabel ; if userData does not match object, then call out to helper
1385
* addimmx addrReg, monitorReg, offsetOfAlternateLockWord
1386
*
1387
* fallThruFromMonitorLookupCacheLabel:
1388
*
1389
*/
1390
TR::Compilation *comp = TR::comp();
1391
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
1392
TR::Register *tempReg = srm->findOrCreateScratchRegister();
1393
1394
TR::Register *objectClassReg = srm->findOrCreateScratchRegister();
1395
1396
// InstructonDelegate::setupImplicitNullPointerException checks if the memory reference uses nullcheck reference register.
1397
// In this case, nullcheck reference register is objReg and the memory reference does use it,
1398
// so let InstructonDelegate::setupImplicitNullPointerException handle it.
1399
generateLoadJ9Class(node, objectClassReg, objReg, cg);
1400
1401
TR::MemoryReference *lockOffsetMR = TR::MemoryReference::createWithDisplacement(cg, objectClassReg, offsetof(J9Class, lockOffset));
1402
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tempReg, lockOffsetMR);
1403
srm->reclaimScratchRegister(objectClassReg);
1404
1405
generateCompareImmInstruction(cg, node, tempReg, 0, true);
1406
1407
if (comp->getOption(TR_EnableMonitorCacheLookup))
1408
{
1409
TR::LabelSymbol *monitorLookupCacheLabel = generateLabelSymbol(cg);
1410
TR::LabelSymbol *fallThruFromMonitorLookupCacheLabel = generateLabelSymbol(cg);
1411
1412
// If the lockword offset in the class pointer <= 0, then lookup monitor from the cache
1413
auto branchInstrToLookup = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, monitorLookupCacheLabel, TR::CC_LE);
1414
TR_Debug * debugObj = cg->getDebug();
1415
if (debugObj)
1416
{
1417
debugObj->addInstructionComment(branchInstrToLookup, "Branch to monitor lookup cache label");
1418
}
1419
generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, addrReg, objReg, tempReg);
1420
auto branchInstrToFallThru = generateLabelInstruction(cg, TR::InstOpCode::b, node, fallThruFromMonitorLookupCacheLabel);
1421
if (debugObj)
1422
{
1423
debugObj->addInstructionComment(branchInstrToFallThru, "Branch to fall through label as lockOffset is positive");
1424
}
1425
generateLabelInstruction(cg, TR::InstOpCode::label, node, monitorLookupCacheLabel);
1426
static const uint32_t maskWidth = populationCount(J9VMTHREAD_OBJECT_MONITOR_CACHE_SIZE - 1);
1427
uint32_t shiftAmount = trailingZeroes(TR::Compiler->om.getObjectAlignmentInBytes()); // shift amount
1428
generateUBFXInstruction(cg, node, tempReg, objReg, shiftAmount, maskWidth, true);
1429
1430
#ifdef OMR_GC_FULL_POINTERS
1431
// In mixed refs and large heap builds, the element type of monitorLookupCacheLabel is UDATA.
1432
uint32_t elementWidth = trailingZeroes((uint32_t)sizeof(UDATA));
1433
#else
1434
uint32_t elementWidth = trailingZeroes((uint32_t)sizeof(U_32));
1435
#endif
1436
generateTrg1Src2ShiftedInstruction(cg, TR::InstOpCode::addx, node, tempReg, metaReg, tempReg, TR::ARM64ShiftCode::SH_LSL, elementWidth);
1437
1438
int32_t offsetOfObjectMonitorLookpCache = offsetof(J9VMThread, objectMonitorLookupCache);
1439
TR::MemoryReference *monitorLookupMR = TR::MemoryReference::createWithDisplacement(cg, tempReg, offsetOfObjectMonitorLookpCache);
1440
TR::Register *monitorReg = srm->findOrCreateScratchRegister();
1441
1442
generateTrg1MemInstruction(cg, fej9->generateCompressedLockWord() ? TR::InstOpCode::ldrimmw : TR::InstOpCode::ldrimmx, node, monitorReg, monitorLookupMR);
1443
generateCompareBranchInstruction(cg, fej9->generateCompressedLockWord() ? TR::InstOpCode::cbzw : TR::InstOpCode::cbzx, node, monitorReg, callLabel);
1444
1445
int32_t offsetOfMonitor = offsetof(J9ObjectMonitor, monitor);
1446
TR::MemoryReference *monitorMR = TR::MemoryReference::createWithDisplacement(cg, monitorReg, offsetOfMonitor);
1447
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tempReg, monitorMR);
1448
1449
int32_t offsetOfUserData = offsetof(J9ThreadAbstractMonitor, userData);
1450
TR::MemoryReference *userDataMR = TR::MemoryReference::createWithDisplacement(cg, tempReg, offsetOfUserData);
1451
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tempReg, userDataMR);
1452
1453
generateCompareInstruction(cg, node, tempReg, objReg, true);
1454
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callLabel, TR::CC_NE);
1455
1456
int32_t offsetOfAlternateLockword = offsetof(J9ObjectMonitor, alternateLockword);
1457
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, addrReg, monitorReg, offsetOfAlternateLockword);
1458
1459
srm->reclaimScratchRegister(monitorReg);
1460
generateLabelInstruction(cg, TR::InstOpCode::label, node, fallThruFromMonitorLookupCacheLabel);
1461
}
1462
else
1463
{
1464
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callLabel, TR::CC_LE);
1465
generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, addrReg, objReg, tempReg);
1466
}
1467
1468
srm->reclaimScratchRegister(tempReg);
1469
}
1470
1471
TR::Register *
1472
J9::ARM64::TreeEvaluator::monexitEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1473
{
1474
TR::Compilation *comp = TR::comp();
1475
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
1476
int32_t staticLwOffset = fej9->getByteOffsetToLockword(cg->getMonClass(node));
1477
TR::InstOpCode::Mnemonic op;
1478
TR_YesNoMaybe isMonitorValueBasedOrValueType = cg->isMonitorValueBasedOrValueType(node);
1479
1480
if (comp->getOption(TR_FullSpeedDebug) ||
1481
(isMonitorValueBasedOrValueType == TR_yes) ||
1482
comp->getOption(TR_DisableInlineMonExit))
1483
{
1484
TR::ILOpCodes opCode = node->getOpCodeValue();
1485
TR::Node::recreate(node, TR::call);
1486
TR::Register *targetRegister = directCallEvaluator(node, cg);
1487
TR::Node::recreate(node, opCode);
1488
return targetRegister;
1489
}
1490
1491
TR::Node *objNode = node->getFirstChild();
1492
TR::Register *objReg = cg->evaluate(objNode);
1493
TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();
1494
TR::Register *metaReg = cg->getMethodMetaDataRegister();
1495
1496
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
1497
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
1498
TR::LabelSymbol *OOLLabel = generateLabelSymbol(cg);
1499
1500
1501
generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel);
1502
startLabel->setStartInternalControlFlow();
1503
1504
const bool isImplicitNullChkIsDoneAtLoadJ9Class = (isMonitorValueBasedOrValueType == TR_maybe) || (staticLwOffset <= 0);
1505
// If lockword offset is not known at compile time, we need to jump into the OOL code section for helper call if monitor lookup fails.
1506
// In that case, we cannot have inline recursive code in the OOL code section.
1507
const bool inlineRecursive = staticLwOffset > 0;
1508
1509
// If object is not known to be value type or value based class at compile time, check at run time
1510
if (isMonitorValueBasedOrValueType == TR_maybe)
1511
{
1512
TR::Register *temp1Reg = srm->findOrCreateScratchRegister();
1513
TR::Register *temp2Reg = srm->findOrCreateScratchRegister();
1514
1515
// InstructonDelegate::setupImplicitNullPointerException checks if the memory reference uses nullcheck reference register.
1516
// In this case, nullcheck reference register is objReg and the memory reference does use it,
1517
// so let InstructonDelegate::setupImplicitNullPointerException handle it.
1518
//
1519
// If we are generating code for MonitorCacheLookup then we will not have a separate OOL for inlineRecursive, and OOLLabel points
1520
// to the OOL Containing only helper call. Otherwise, OOL will have other code apart from helper call which we do not want to execute
1521
// for ValueType or ValueBased object and in that scenario we will need to generate another OOL that just contains helper call.
1522
generateCheckForValueMonitorEnterOrExit(node, doneLabel, inlineRecursive ? NULL : OOLLabel, objReg, temp1Reg, temp2Reg, cg, J9_CLASS_DISALLOWS_LOCKING_FLAGS);
1523
1524
srm->reclaimScratchRegister(temp1Reg);
1525
srm->reclaimScratchRegister(temp2Reg);
1526
}
1527
1528
TR::Register *addrReg = srm->findOrCreateScratchRegister();
1529
1530
// If we do not know the lockword offset at compile time, obtrain it from the class pointer of the object being locked
1531
if (staticLwOffset <= 0)
1532
{
1533
generateLockwordAddressLookup(cg, node, objReg, addrReg, metaReg, srm, OOLLabel);
1534
}
1535
else
1536
{
1537
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, addrReg, objReg, staticLwOffset); // stlr instructions does not take immediate offset
1538
}
1539
TR::Register *dataReg = srm->findOrCreateScratchRegister();
1540
1541
op = fej9->generateCompressedLockWord() ? TR::InstOpCode::ldrimmw : TR::InstOpCode::ldrimmx;
1542
auto faultingInstruction = generateTrg1MemInstruction(cg, op, node, dataReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0));
1543
1544
// InstructonDelegate::setupImplicitNullPointerException checks if the memory reference uses nullcheck reference register.
1545
// In this case, nullcheck reference register is objReg, but the memory reference does not use it,
1546
// thus we need to explicitly set implicit exception point here.
1547
if (cg->getHasResumableTrapHandler() && cg->getCurrentEvaluationTreeTop()->getNode()->getOpCode().isNullCheck() && (!isImplicitNullChkIsDoneAtLoadJ9Class))
1548
{
1549
if (cg->getImplicitExceptionPoint() == NULL)
1550
{
1551
if (comp->getOption(TR_TraceCG))
1552
{
1553
traceMsg(comp, "Instruction %p throws an implicit NPE, node: %p NPE node: %p\n", faultingInstruction, node, objNode);
1554
}
1555
cg->setImplicitExceptionPoint(faultingInstruction);
1556
}
1557
}
1558
1559
generateCompareInstruction(cg, node, dataReg, metaReg, true);
1560
1561
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, OOLLabel, TR::CC_NE);
1562
1563
static const bool useMemoryBarrierForMonitorExit = feGetEnv("TR_aarch64UseMemoryBarrierForMonitorExit") != NULL;
1564
if (useMemoryBarrierForMonitorExit)
1565
{
1566
generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xB); // dmb ish (Inner Shareable full barrier)
1567
op = fej9->generateCompressedLockWord() ? TR::InstOpCode::strimmw : TR::InstOpCode::strimmx;
1568
}
1569
else
1570
{
1571
op = fej9->generateCompressedLockWord() ? TR::InstOpCode::stlrw : TR::InstOpCode::stlrx;
1572
}
1573
1574
// Avoid zeroReg from being reused by scratch register manager
1575
TR::Register *zeroReg = cg->allocateRegister();
1576
1577
generateMemSrc1Instruction(cg, op, node, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), zeroReg);
1578
1579
if (inlineRecursive)
1580
{
1581
/*
1582
* OOLLabel:
1583
* subimmx dataReg, dataReg, OBJECT_HEADER_LOCK_FIRST_RECURSION_BIT
1584
* andimmx tempReg, dataReg, ~OBJECT_HEADER_LOCK_RECURSION_MASK
1585
* cmpx metaReg, tempReg
1586
* b.ne snippetLabel
1587
* strimmx dataReg, [addrReg]
1588
* OOLEndLabel:
1589
* b doneLabel
1590
*
1591
*/
1592
1593
// This register is only required for OOL code section
1594
// If we obtain this from scratch register manager, then one more register is used in mainline.
1595
TR::Register *tempReg = cg->allocateRegister();
1596
1597
TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);
1598
TR::LabelSymbol *OOLEndLabel = generateLabelSymbol(cg);
1599
TR_ARM64OutOfLineCodeSection *oolSection = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(OOLLabel, doneLabel, cg);
1600
cg->getARM64OutOfLineCodeSectionList().push_front(oolSection);
1601
oolSection->swapInstructionListsWithCompilation();
1602
generateLabelInstruction(cg, TR::InstOpCode::label, node, OOLLabel);
1603
1604
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::subimmx, node, dataReg, dataReg, OBJECT_HEADER_LOCK_FIRST_RECURSION_BIT);
1605
// OBJECT_HEADER_LOCK_RECURSION_MASK is 0xF0, immr=0x38, imms=0x3b for ~(0xF0)
1606
generateLogicalImmInstruction(cg, TR::InstOpCode::andimmx, node, tempReg, dataReg, true, 0xe3b);
1607
generateCompareInstruction(cg, node, metaReg, tempReg, true);
1608
1609
TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference(), OOLEndLabel);
1610
cg->addSnippet(snippet);
1611
TR::Instruction *gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_NE);
1612
gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);
1613
snippet->gcMap().setGCRegisterMask(0xffffffff);
1614
1615
generateMemSrc1Instruction(cg, fej9->generateCompressedLockWord() ? TR::InstOpCode::strimmw : TR::InstOpCode::strimmx,
1616
node, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), dataReg);
1617
1618
TR::RegisterDependencyConditions *ooldeps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg->trMemory());
1619
ooldeps->addPostCondition(objReg, TR::RealRegister::x0);
1620
ooldeps->addPostCondition(tempReg, TR::RealRegister::NoReg);
1621
ooldeps->addPostCondition(dataReg, TR::RealRegister::NoReg);
1622
ooldeps->addPostCondition(addrReg, TR::RealRegister::NoReg);
1623
1624
generateLabelInstruction(cg, TR::InstOpCode::label, node, OOLEndLabel, ooldeps);
1625
generateLabelInstruction(cg, TR::InstOpCode::b, node, doneLabel);
1626
1627
cg->stopUsingRegister(tempReg);
1628
// ARM64HelperCallSnippet generates "bl" instruction
1629
cg->machine()->setLinkRegisterKilled(true);
1630
oolSection->swapInstructionListsWithCompilation();
1631
}
1632
else
1633
{
1634
TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(node, TR::call, NULL, OOLLabel, doneLabel, cg);
1635
cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);
1636
}
1637
1638
TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2 + srm->numAvailableRegisters(), cg->trMemory());
1639
deps->addPostCondition(objReg, TR::RealRegister::NoReg);
1640
deps->addPostCondition(zeroReg, TR::RealRegister::xzr);
1641
srm->addScratchRegistersToDependencyList(deps);
1642
1643
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, deps);
1644
1645
doneLabel->setEndInternalControlFlow();
1646
1647
cg->stopUsingRegister(zeroReg);
1648
srm->stopUsingRegisters();
1649
1650
cg->decReferenceCount(objNode);
1651
cg->machine()->setLinkRegisterKilled(true);
1652
return NULL;
1653
}
1654
1655
TR::Register *
1656
J9::ARM64::TreeEvaluator::asynccheckEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1657
{
1658
// The child contains an inline test. If it succeeds, the helper is called.
1659
// The address of the helper is contained as a long in this node.
1660
//
1661
TR::Node *testNode = node->getFirstChild();
1662
TR::Node *firstChild = testNode->getFirstChild();
1663
TR::Register *src1Reg = cg->evaluate(firstChild);
1664
TR::Node *secondChild = testNode->getSecondChild();
1665
1666
TR_ASSERT(testNode->getOpCodeValue() == TR::lcmpeq && secondChild->getLongInt() == -1L, "asynccheck bad format");
1667
1668
TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);
1669
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
1670
TR::SymbolReference *asynccheckHelper = node->getSymbolReference();
1671
TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, asynccheckHelper, doneLabel);
1672
cg->addSnippet(snippet);
1673
1674
generateCompareImmInstruction(cg, node, src1Reg, secondChild->getLongInt(), true); // 64-bit compare
1675
1676
TR::Instruction *gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_EQ);
1677
gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);
1678
snippet->gcMap().setGCRegisterMask(0xffffffff);
1679
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel);
1680
1681
// ARM64HelperCallSnippet generates "bl" instruction
1682
cg->machine()->setLinkRegisterKilled(true);
1683
1684
cg->decReferenceCount(firstChild);
1685
cg->decReferenceCount(secondChild);
1686
cg->decReferenceCount(testNode);
1687
return NULL;
1688
}
1689
1690
TR::Register *
1691
J9::ARM64::TreeEvaluator::instanceofEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1692
{
1693
return VMinstanceofEvaluator(node, cg);
1694
}
1695
1696
/**
1697
* @brief Generates Superclass Test for checkcast/instanceof/ArrayStoreCHK nodes.
1698
* @details
1699
* It will generate pseudocode as follows.
1700
* if (objectClassDepth <= castClassDepth) call Helper
1701
* else
1702
* load superClassArrReg,superClassOfObjectClass
1703
* cmp superClassArrReg[castClassDepth], castClass
1704
* Here It sets up the condition code for callee to react on.
1705
*
1706
* @param[in] node: node
1707
* @param[in] instanceClassReg: register contains instance class
1708
* @param[in] instanceClassRegCanBeReclaimed: if true, instanceClassReg is reclaimed
1709
* @param[in] castClassReg: register contains cast class
1710
* @param[in] castClassDepth: class depth of the cast class. If -1 is passed, depth is loaded at runtime
1711
* @param[in] falseLabel: label to jump when test fails
1712
* @param[in] srm: scratch register manager
1713
* @param[in] cg: code generator
1714
*/
1715
static
1716
void genSuperClassTest(TR::Node *node, TR::Register *instanceClassReg, bool instanceClassRegCanBeReclaimed, TR::Register *castClassReg, int32_t castClassDepth,
1717
TR::LabelSymbol *falseLabel, TR_ARM64ScratchRegisterManager *srm, TR::CodeGenerator *cg)
1718
{
1719
// Compare the instance class depth to the cast class depth. If the instance class depth is less than or equal to
1720
// to the cast class depth then the cast class cannot be a superclass of the instance class.
1721
//
1722
TR::Register *instanceClassDepthReg = srm->findOrCreateScratchRegister();
1723
TR::Register *castClassDepthReg = NULL;
1724
static_assert(J9AccClassDepthMask == 0xffff, "J9_JAVA_CLASS_DEPTH_MASK must be 0xffff");
1725
// load lower 16bit of classDepthAndFlags
1726
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrhimm, node, instanceClassDepthReg,
1727
TR::MemoryReference::createWithDisplacement(cg, instanceClassReg, offsetof(J9Class, classDepthAndFlags)));
1728
if (castClassDepth != -1)
1729
{
1730
// castClassDepth is known at compile time
1731
if (constantIsUnsignedImm12(castClassDepth))
1732
{
1733
generateCompareImmInstruction(cg, node, instanceClassDepthReg, castClassDepth);
1734
}
1735
else
1736
{
1737
castClassDepthReg = srm->findOrCreateScratchRegister();
1738
loadConstant32(cg, node, castClassDepth, castClassDepthReg);
1739
generateCompareInstruction(cg, node, instanceClassDepthReg, castClassDepthReg);
1740
}
1741
}
1742
else
1743
{
1744
// castClassDepth needs to be loaded from castClass
1745
castClassDepthReg = srm->findOrCreateScratchRegister();
1746
// load lower 16bit of classDepthAndFlags
1747
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrhimm, node, castClassDepthReg,
1748
TR::MemoryReference::createWithDisplacement(cg, castClassReg, offsetof(J9Class, classDepthAndFlags)));
1749
generateCompareInstruction(cg, node, instanceClassDepthReg, castClassDepthReg);
1750
}
1751
srm->reclaimScratchRegister(instanceClassDepthReg);
1752
instanceClassDepthReg = NULL; // prevent re-using this register by error
1753
1754
// if objectClassDepth is less than or equal to castClassDepth, then call Helper
1755
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, falseLabel, TR::CC_LE);
1756
1757
// Load the superclasses array of the instance class and check if the superclass that appears at the depth of the cast class is in fact the cast class.
1758
// If not, the instance class and cast class are not in the same hierarchy.
1759
//
1760
TR::Register *instanceClassSuperClassesArrayReg = srm->findOrCreateScratchRegister();
1761
1762
generateTrg1MemInstruction(cg,TR::InstOpCode::ldrimmx, node, instanceClassSuperClassesArrayReg,
1763
TR::MemoryReference::createWithDisplacement(cg, instanceClassReg, offsetof(J9Class, superclasses)));
1764
1765
if (instanceClassRegCanBeReclaimed)
1766
{
1767
srm->reclaimScratchRegister(instanceClassReg);
1768
instanceClassReg = NULL; // prevent re-using this register by error
1769
}
1770
1771
TR::Register *instanceClassSuperClassReg = srm->findOrCreateScratchRegister();
1772
1773
int32_t castClassDepthOffset = castClassDepth * TR::Compiler->om.sizeofReferenceAddress();
1774
if ((castClassDepth != -1) && constantIsUnsignedImm12(castClassDepthOffset))
1775
{
1776
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, instanceClassSuperClassReg,
1777
TR::MemoryReference::createWithDisplacement(cg, instanceClassSuperClassesArrayReg, castClassDepthOffset));
1778
}
1779
else
1780
{
1781
if (!castClassDepthReg)
1782
{
1783
castClassDepthReg = srm->findOrCreateScratchRegister();
1784
loadConstant32(cg, node, castClassDepth, castClassDepthReg);
1785
}
1786
generateLogicalShiftLeftImmInstruction(cg, node, castClassDepthReg, castClassDepthReg, 3, false);
1787
generateTrg1MemInstruction(cg, TR::InstOpCode::ldroffx, node, instanceClassSuperClassReg, TR::MemoryReference::createWithIndexReg(cg, instanceClassSuperClassesArrayReg, castClassDepthReg));
1788
}
1789
generateCompareInstruction(cg, node, instanceClassSuperClassReg, castClassReg, true);
1790
1791
if (castClassDepthReg)
1792
srm->reclaimScratchRegister(castClassDepthReg);
1793
srm->reclaimScratchRegister(instanceClassSuperClassesArrayReg);
1794
srm->reclaimScratchRegister(instanceClassSuperClassReg);
1795
1796
// At this point EQ flag will be set if the cast class is a superclass of the instance class. Caller is responsible for acting on the result.
1797
}
1798
1799
/**
1800
* @brief Generates Arbitrary Class Test for instanceOf or checkCast node
1801
*/
1802
static
1803
void genInstanceOfOrCheckCastArbitraryClassTest(TR::Node *node, TR::Register *instanceClassReg, TR_OpaqueClassBlock *arbitraryClass,
1804
TR_ARM64ScratchRegisterManager *srm, TR::CodeGenerator *cg)
1805
{
1806
TR::Compilation *comp = cg->comp();
1807
TR::Register *arbitraryClassReg = srm->findOrCreateScratchRegister();
1808
TR_J9VMBase *fej9 = static_cast<TR_J9VMBase *>(comp->fe());
1809
1810
if (comp->compileRelocatableCode())
1811
{
1812
loadAddressConstantInSnippet(cg, node, reinterpret_cast<intptr_t>(arbitraryClass), arbitraryClassReg, TR_ClassPointer);
1813
}
1814
else
1815
{
1816
bool isUnloadAssumptionRequired = fej9->isUnloadAssumptionRequired(arbitraryClass, comp->getCurrentMethod());
1817
1818
if (isUnloadAssumptionRequired)
1819
{
1820
loadAddressConstantInSnippet(cg, node, reinterpret_cast<intptr_t>(arbitraryClass), arbitraryClassReg, TR_NoRelocation, true);
1821
}
1822
else
1823
{
1824
loadAddressConstant(cg, node, reinterpret_cast<intptr_t>(arbitraryClass), arbitraryClassReg, NULL, true);
1825
}
1826
}
1827
generateCompareInstruction(cg, node, instanceClassReg, arbitraryClassReg, true);
1828
1829
srm->reclaimScratchRegister(arbitraryClassReg);
1830
1831
// At this point EQ flag will be set if the cast class matches the arbitrary class. Caller is responsible for acting on the result.
1832
}
1833
1834
/**
1835
* @brief Generates ArrayOfJavaLangObjectTest (object class is reference array) for instanceOf or checkCast node
1836
* @details
1837
* scratchReg1 = load (objectClassReg+offset_romClass)
1838
* scratchReg1 = load (ROMClass+J9ROMClass+modifiers)
1839
* tstImmediate with J9AccClassArray(0x10000)
1840
* If not Array -> Branch to Fail Label
1841
* testerReg = load (objectClassReg + leafcomponent_offset)
1842
* testerReg = load (objectClassReg + offset_romClass)
1843
* testerReg = load (objectClassReg + offset_modifiers)
1844
* tstImmediate with J9AccClassInternalPrimitiveType(0x20000)
1845
* // if branchOnPrimitiveTypeCheck is true
1846
* If arrays of primitive -> Branch to Fail Label
1847
* // else
1848
* if not arrays of primitive set condition code to Zero indicating true result
1849
*/
1850
static
1851
void genInstanceOfOrCheckCastObjectArrayTest(TR::Node *node, TR::Register *instanceClassReg, TR::LabelSymbol *falseLabel, bool useTBZ,
1852
TR_ARM64ScratchRegisterManager *srm, TR::CodeGenerator *cg)
1853
{
1854
// Load the object ROM class and test the modifiers to see if this is an array.
1855
//
1856
TR::Register *scratchReg = srm->findOrCreateScratchRegister();
1857
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, scratchReg, TR::MemoryReference::createWithDisplacement(cg, instanceClassReg, offsetof(J9Class, romClass)));
1858
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, scratchReg, TR::MemoryReference::createWithDisplacement(cg, scratchReg, offsetof(J9ROMClass, modifiers)));
1859
static_assert(J9AccClassArray == 0x10000, "J9AccClassArray must be 0x10000");
1860
// If not array, branch to falseLabel
1861
if (useTBZ)
1862
{
1863
generateTestBitBranchInstruction(cg, TR::InstOpCode::tbz, node, scratchReg, 16, falseLabel);
1864
}
1865
else
1866
{
1867
generateTestImmInstruction(cg, node, scratchReg, 0x400); // 0x400 is immr:imms for 0x10000
1868
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, falseLabel, TR::CC_EQ);
1869
}
1870
1871
// If it's an array, load the component ROM class and test the modifiers to see if this is a primitive array.
1872
//
1873
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, scratchReg, TR::MemoryReference::createWithDisplacement(cg, instanceClassReg, offsetof(J9ArrayClass, componentType)));
1874
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, scratchReg, TR::MemoryReference::createWithDisplacement(cg, scratchReg, offsetof(J9Class, romClass)));
1875
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, scratchReg, TR::MemoryReference::createWithDisplacement(cg, scratchReg, offsetof(J9ROMClass, modifiers)));
1876
1877
static_assert(J9AccClassInternalPrimitiveType == 0x20000, "J9AccClassInternalPrimitiveType must be 0x20000");
1878
generateTestImmInstruction(cg, node, scratchReg, 0x3c0); // 0x3c0 is immr:imms for 0x20000
1879
1880
srm->reclaimScratchRegister(scratchReg);
1881
1882
// At this point EQ flag will be set if this is not a primitive array. Caller is responsible acting on the result.
1883
}
1884
1885
template<class It>
1886
bool
1887
isTerminalSequence(It it, It itEnd)
1888
{
1889
return (it + 1) == itEnd;
1890
}
1891
1892
template<class It>
1893
bool
1894
isNextItemGoToTrue(It it, It itEnd)
1895
{
1896
return (!isTerminalSequence(it, itEnd)) && *(it + 1) == J9::TreeEvaluator::GoToTrue;
1897
}
1898
1899
template<class It>
1900
bool
1901
isNextItemGoToFalse(It it, It itEnd)
1902
{
1903
return (!isTerminalSequence(it, itEnd)) && *(it + 1) == J9::TreeEvaluator::GoToFalse;
1904
}
1905
1906
template<class It>
1907
bool
1908
isNextItemHelperCall(It it, It itEnd)
1909
{
1910
return (!isTerminalSequence(it, itEnd)) && *(it + 1) == J9::TreeEvaluator::HelperCall;
1911
}
1912
1913
TR::Register *
1914
J9::ARM64::TreeEvaluator::VMinstanceofEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1915
{
1916
TR::Compilation *comp = cg->comp();
1917
TR_OpaqueClassBlock *compileTimeGuessClass;
1918
int32_t maxProfiledClasses = comp->getOptions()->getCheckcastMaxProfiledClassTests();
1919
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s:Maximum Profiled Classes = %d\n", node->getOpCode().getName(),maxProfiledClasses);
1920
TR_ASSERT_FATAL(maxProfiledClasses <= 4, "Maximum 4 profiled classes per site allowed because we use a fixed stack allocated buffer for profiled classes\n");
1921
InstanceOfOrCheckCastSequences sequences[InstanceOfOrCheckCastMaxSequences];
1922
bool topClassWasCastClass = false;
1923
float topClassProbability = 0.0;
1924
1925
bool profiledClassIsInstanceOf;
1926
InstanceOfOrCheckCastProfiledClasses profiledClassesList[4];
1927
uint32_t numberOfProfiledClass;
1928
uint32_t numSequencesRemaining = calculateInstanceOfOrCheckCastSequences(node, sequences, &compileTimeGuessClass, cg, profiledClassesList, &numberOfProfiledClass, maxProfiledClasses, &topClassProbability, &topClassWasCastClass);
1929
1930
1931
TR::Node *objectNode = node->getFirstChild();
1932
TR::Node *castClassNode = node->getSecondChild();
1933
TR::Register *objectReg = cg->evaluate(objectNode);
1934
TR::Register *castClassReg = NULL;
1935
TR::Register *resultReg = cg->allocateRegister();
1936
1937
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
1938
TR::LabelSymbol *callHelperLabel = generateLabelSymbol(cg);
1939
TR::LabelSymbol *nextSequenceLabel = generateLabelSymbol(cg);
1940
1941
TR::Instruction *gcPoint;
1942
1943
TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();
1944
TR::Register *objectClassReg = NULL;
1945
1946
// initial result is false
1947
generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, resultReg, 0);
1948
1949
auto itBegin = std::begin(sequences);
1950
const auto itEnd = std::next(itBegin, numSequencesRemaining);
1951
1952
for (auto it = itBegin; it != itEnd; it++)
1953
{
1954
auto current = *it;
1955
switch (current)
1956
{
1957
case EvaluateCastClass:
1958
TR_ASSERT(!castClassReg, "Cast class already evaluated");
1959
castClassReg = cg->gprClobberEvaluate(castClassNode);
1960
break;
1961
case LoadObjectClass:
1962
TR_ASSERT(!objectClassReg, "Object class already loaded");
1963
objectClassReg = srm->findOrCreateScratchRegister();
1964
generateLoadJ9Class(node, objectClassReg, objectReg, cg);
1965
break;
1966
case NullTest:
1967
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting NullTest\n", node->getOpCode().getName());
1968
TR_ASSERT(!objectNode->isNonNull(), "Object is known to be non-null, no need for a null test");
1969
if (isNextItemGoToTrue(it, itEnd))
1970
{
1971
generateCompareImmInstruction(cg, node, objectReg, 0, true);
1972
generateCSetInstruction(cg, node, resultReg, TR::CC_NE);
1973
// consume GoToTrue
1974
it++;
1975
}
1976
else
1977
{
1978
auto nullLabel = isNextItemHelperCall(it, itEnd) ? callHelperLabel : doneLabel;
1979
// branch to doneLabel to return false
1980
generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, objectReg, nullLabel);
1981
}
1982
break;
1983
case GoToTrue:
1984
TR_ASSERT_FATAL(isTerminalSequence(it, itEnd), "GoToTrue should be the terminal sequence");
1985
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting GoToTrue\n", node->getOpCode().getName());
1986
generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, resultReg, 1);
1987
break;
1988
case GoToFalse:
1989
TR_ASSERT_FATAL(isTerminalSequence(it, itEnd), "GoToFalse should be the terminal sequence");
1990
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting GoToFalse\n", node->getOpCode().getName());
1991
break;
1992
case ClassEqualityTest:
1993
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting ClassEqualityTest\n", node->getOpCode().getName());
1994
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/Equality", comp->signature()),1,TR::DebugCounter::Undetermined);
1995
1996
generateCompareInstruction(cg, node, objectClassReg, castClassReg, true);
1997
generateCSetInstruction(cg, node, resultReg, TR::CC_EQ);
1998
break;
1999
case SuperClassTest:
2000
{
2001
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting SuperClassTest\n", node->getOpCode().getName());
2002
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/SuperClassTest", comp->signature()),1,TR::DebugCounter::Undetermined);
2003
2004
int32_t castClassDepth = castClassNode->getSymbolReference()->classDepth(comp);
2005
auto falseLabel = isNextItemGoToFalse(it, itEnd) ? doneLabel : (isNextItemHelperCall(it, itEnd) ? callHelperLabel : nextSequenceLabel);
2006
genSuperClassTest(node, objectClassReg, false, castClassReg, castClassDepth, falseLabel, srm, cg);
2007
generateCSetInstruction(cg, node, resultReg, TR::CC_EQ);
2008
}
2009
break;
2010
case ProfiledClassTest:
2011
{
2012
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting ProfiledClassTest\n", node->getOpCode().getName());
2013
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/Profile", comp->signature()),1,TR::DebugCounter::Undetermined);
2014
2015
auto profiledClassesIt = std::begin(profiledClassesList);
2016
auto profiledClassesItEnd = std::next(profiledClassesIt, numberOfProfiledClass);
2017
while (profiledClassesIt != profiledClassesItEnd)
2018
{
2019
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: ProfiledClassTest: profiledClass = %p, isProfiledClassInstanceOfCastClass = %s\n",
2020
node->getOpCode().getName(), profiledClassesIt->profiledClass,
2021
(profiledClassesIt->isProfiledClassInstanceOfCastClass) ? "true" : "false");
2022
2023
genInstanceOfOrCheckCastArbitraryClassTest(node, objectClassReg, profiledClassesIt->profiledClass, srm, cg);
2024
/**
2025
* At this point EQ flag will be set if the profiledClass matches the cast class.
2026
* Set resultReg to 1 if isProfiledClassInstanceOfCastClass is true
2027
*/
2028
if (profiledClassesIt->isProfiledClassInstanceOfCastClass)
2029
{
2030
generateCSetInstruction(cg, node, resultReg, TR::CC_EQ);
2031
}
2032
profiledClassesIt++;
2033
if (profiledClassesIt != profiledClassesItEnd)
2034
{
2035
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);
2036
}
2037
}
2038
}
2039
break;
2040
case CompileTimeGuessClassTest:
2041
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting CompileTimeGuessClassTest\n", node->getOpCode().getName());
2042
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/compTimeGuess", comp->signature()),1,TR::DebugCounter::Undetermined);
2043
2044
genInstanceOfOrCheckCastArbitraryClassTest(node, objectClassReg, compileTimeGuessClass, srm, cg);
2045
generateCSetInstruction(cg, node, resultReg, TR::CC_EQ);
2046
2047
break;
2048
case CastClassCacheTest:
2049
{
2050
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting CastClassCacheTest\n", node->getOpCode().getName());
2051
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/CastClassCache", comp->signature()),1,TR::DebugCounter::Undetermined);
2052
2053
/**
2054
* Compare the cast class against the cache on the instance class.
2055
* If they are the same the cast is successful.
2056
* If not it's either because the cache class does not match the cast class,
2057
* or it does match except the cache class has the low bit set, which means the cast is not successful.
2058
*/
2059
TR::Register *castClassCacheReg = srm->findOrCreateScratchRegister();
2060
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, castClassCacheReg,
2061
TR::MemoryReference::createWithDisplacement(cg, objectClassReg, offsetof(J9Class, castClassCache)));
2062
generateTrg1Src2Instruction(cg, TR::InstOpCode::eorx, node, castClassCacheReg, castClassCacheReg, castClassReg);
2063
generateCompareImmInstruction(cg, node, castClassCacheReg, 1, true);
2064
2065
/**
2066
* At this point LT flag will be set if the cast is successful, EQ flag will be set if the cast is unsuccessful,
2067
* and GT flag will be set if the cache class did not match the cast class.
2068
*/
2069
generateCSetInstruction(cg, node, resultReg, TR::CC_LT);
2070
srm->reclaimScratchRegister(castClassCacheReg);
2071
}
2072
break;
2073
case ArrayOfJavaLangObjectTest:
2074
{
2075
TR_ASSERT_FATAL(isNextItemGoToFalse(it, itEnd), "ArrayOfJavaLangObjectTest is always followed by GoToFalse");
2076
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting ArrayOfJavaLangObjectTest\n", node->getOpCode().getName());
2077
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/ArrayTest", comp->signature()),1,TR::DebugCounter::Undetermined);
2078
genInstanceOfOrCheckCastObjectArrayTest(node, objectClassReg, doneLabel, true, srm, cg);
2079
generateCSetInstruction(cg, node, resultReg, TR::CC_EQ);
2080
}
2081
break;
2082
case DynamicCacheObjectClassTest:
2083
TR_ASSERT_FATAL(false, "%s: DynamicCacheObjectClassTest is not implemented on aarch64\n", node->getOpCode().getName());
2084
break;
2085
case DynamicCacheDynamicCastClassTest:
2086
TR_ASSERT_FATAL(false, "%s: DynamicCacheDynamicCastClassTest is not implemented on aarch64\n", node->getOpCode().getName());
2087
break;
2088
case HelperCall:
2089
{
2090
TR_ASSERT_FATAL(isTerminalSequence(it, itEnd), "HelperCall should be the terminal sequence");
2091
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting HelperCall\n", node->getOpCode().getName());
2092
TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(node, TR::icall, resultReg, callHelperLabel, doneLabel, cg);
2093
2094
cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);
2095
2096
if (it == itBegin)
2097
{
2098
// If HelperCall is only the item in the sequence, branch to OOL
2099
generateLabelInstruction(cg, TR::InstOpCode::b, node, callHelperLabel);
2100
}
2101
}
2102
break;
2103
}
2104
2105
switch (current)
2106
{
2107
case ClassEqualityTest:
2108
case SuperClassTest:
2109
case ProfiledClassTest:
2110
case CompileTimeGuessClassTest:
2111
case ArrayOfJavaLangObjectTest:
2112
/**
2113
* For those tests, EQ flag is set if the cache hit
2114
*/
2115
if (isNextItemHelperCall(it, itEnd))
2116
{
2117
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callHelperLabel, TR::CC_NE);
2118
}
2119
else if (!isNextItemGoToFalse(it, itEnd))
2120
{
2121
// If other tests follow, branch to doneLabel
2122
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);
2123
}
2124
break;
2125
case CastClassCacheTest:
2126
if (isNextItemHelperCall(it, itEnd))
2127
{
2128
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callHelperLabel, TR::CC_GT);
2129
}
2130
else if (!isNextItemGoToFalse(it, itEnd))
2131
{
2132
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_LE);
2133
}
2134
break;
2135
case NullTest:
2136
break;
2137
default:
2138
if (isNextItemHelperCall(it, itEnd))
2139
{
2140
generateLabelInstruction(cg, TR::InstOpCode::b, node, callHelperLabel);
2141
}
2142
break;
2143
}
2144
2145
if (!isTerminalSequence(it, itEnd))
2146
{
2147
generateLabelInstruction(cg, TR::InstOpCode::label, node, nextSequenceLabel);
2148
nextSequenceLabel = generateLabelSymbol(cg);
2149
}
2150
2151
}
2152
2153
if (objectClassReg)
2154
srm->reclaimScratchRegister(objectClassReg);
2155
2156
TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 3 + srm->numAvailableRegisters(), cg->trMemory());
2157
srm->addScratchRegistersToDependencyList(deps);
2158
2159
deps->addPostCondition(resultReg, TR::RealRegister::NoReg);
2160
deps->addPostCondition(objectReg, TR::RealRegister::NoReg);
2161
2162
if (castClassReg)
2163
{
2164
deps->addPostCondition(castClassReg, TR::RealRegister::NoReg);
2165
}
2166
2167
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, deps);
2168
2169
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfOrCheckCast/%s/fastPath",
2170
node->getOpCode().getName()),
2171
*srm);
2172
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfOrCheckCast.perMethod/%s/(%s)/%d/%d/fastPath",
2173
node->getOpCode().getName(), comp->signature(), node->getByteCodeInfo().getCallerIndex(), node->getByteCodeInfo().getByteCodeIndex()),
2174
*srm);
2175
2176
2177
cg->decReferenceCount(objectNode);
2178
cg->decReferenceCount(castClassNode);
2179
// Stop using every reg in the deps except these ones.
2180
//
2181
deps->stopUsingDepRegs(cg, objectReg, resultReg);
2182
2183
node->setRegister(resultReg);
2184
2185
return resultReg;
2186
}
2187
2188
/**
2189
* @brief Generates null test instructions
2190
*
2191
* @param[in] cg: code generator
2192
* @param[in] objReg: register holding object
2193
* @param[in] node: null check node
2194
* @param[in] nullSymRef: symbol reference of null check
2195
*
2196
*/
2197
static
2198
void generateNullTest(TR::CodeGenerator *cg, TR::Register *objReg, TR::Node *node, TR::SymbolReference *nullSymRef = NULL)
2199
{
2200
TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);
2201
TR::Compilation *comp = cg->comp();
2202
if (nullSymRef == NULL)
2203
{
2204
nullSymRef = comp->getSymRefTab()->findOrCreateNullCheckSymbolRef(comp->getMethodSymbol());
2205
}
2206
TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, nullSymRef, NULL);
2207
cg->addSnippet(snippet);
2208
2209
TR::Instruction *cbzInstruction = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, objReg, snippetLabel);
2210
cbzInstruction->setNeedsGCMap(0xffffffff);
2211
snippet->gcMap().setGCRegisterMask(0xffffffff);
2212
// ARM64HelperCallSnippet generates "bl" instruction
2213
cg->machine()->setLinkRegisterKilled(true);
2214
}
2215
2216
TR::Register *
2217
J9::ARM64::TreeEvaluator::VMcheckcastEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2218
{
2219
TR::Compilation *comp = cg->comp();
2220
TR_OpaqueClassBlock *compileTimeGuessClass;
2221
int32_t maxProfiledClasses = comp->getOptions()->getCheckcastMaxProfiledClassTests();
2222
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s:Maximum Profiled Classes = %d\n", node->getOpCode().getName(),maxProfiledClasses);
2223
TR_ASSERT_FATAL(maxProfiledClasses <= 4, "Maximum 4 profiled classes per site allowed because we use a fixed stack allocated buffer for profiled classes\n");
2224
InstanceOfOrCheckCastSequences sequences[InstanceOfOrCheckCastMaxSequences];
2225
bool topClassWasCastClass = false;
2226
float topClassProbability = 0.0;
2227
2228
bool profiledClassIsInstanceOf;
2229
InstanceOfOrCheckCastProfiledClasses profiledClassesList[4];
2230
uint32_t numberOfProfiledClass;
2231
uint32_t numSequencesRemaining = calculateInstanceOfOrCheckCastSequences(node, sequences, &compileTimeGuessClass, cg, profiledClassesList, &numberOfProfiledClass, maxProfiledClasses, &topClassProbability, &topClassWasCastClass);
2232
2233
2234
TR::Node *objectNode = node->getFirstChild();
2235
TR::Node *castClassNode = node->getSecondChild();
2236
TR::Register *objectReg = cg->evaluate(objectNode);
2237
TR::Register *castClassReg = NULL;
2238
2239
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
2240
TR::LabelSymbol *callHelperLabel = generateLabelSymbol(cg);
2241
TR::LabelSymbol *nextSequenceLabel = generateLabelSymbol(cg);
2242
2243
TR::Instruction *gcPoint;
2244
2245
TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();
2246
TR::Register *objectClassReg = NULL;
2247
2248
auto itBegin = std::begin(sequences);
2249
const auto itEnd = std::next(itBegin, numSequencesRemaining);
2250
2251
for (auto it = itBegin; it != itEnd; it++)
2252
{
2253
auto current = *it;
2254
switch (current)
2255
{
2256
case EvaluateCastClass:
2257
TR_ASSERT(!castClassReg, "Cast class already evaluated");
2258
castClassReg = cg->gprClobberEvaluate(castClassNode);
2259
break;
2260
case LoadObjectClass:
2261
TR_ASSERT(!objectClassReg, "Object class already loaded");
2262
objectClassReg = srm->findOrCreateScratchRegister();
2263
generateLoadJ9Class(node, objectClassReg, objectReg, cg);
2264
break;
2265
case NullTest:
2266
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting NullTest\n", node->getOpCode().getName());
2267
TR_ASSERT(!objectNode->isNonNull(), "Object is known to be non-null, no need for a null test");
2268
if (node->getOpCodeValue() == TR::checkcastAndNULLCHK)
2269
{
2270
TR::Node *nullChkInfo = comp->findNullChkInfo(node);
2271
generateNullTest(cg, objectReg, nullChkInfo);
2272
}
2273
else
2274
{
2275
if (isNextItemHelperCall(it, itEnd) || isNextItemGoToFalse(it, itEnd))
2276
{
2277
generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzx, node, objectReg, callHelperLabel);
2278
}
2279
else
2280
{
2281
// branch to doneLabel if object is null
2282
generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, objectReg, doneLabel);
2283
}
2284
}
2285
break;
2286
case GoToTrue:
2287
TR_ASSERT_FATAL(isTerminalSequence(it, itEnd), "GoToTrue should be the terminal sequence");
2288
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting GoToTrue\n", node->getOpCode().getName());
2289
break;
2290
case ClassEqualityTest:
2291
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting ClassEqualityTest\n", node->getOpCode().getName());
2292
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/Equality", comp->signature()),1,TR::DebugCounter::Undetermined);
2293
2294
generateCompareInstruction(cg, node, objectClassReg, castClassReg, true);
2295
break;
2296
case SuperClassTest:
2297
{
2298
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting SuperClassTest\n", node->getOpCode().getName());
2299
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/SuperClassTest", comp->signature()),1,TR::DebugCounter::Undetermined);
2300
2301
int32_t castClassDepth = castClassNode->getSymbolReference()->classDepth(comp);
2302
auto falseLabel = (isNextItemGoToFalse(it, itEnd) || isNextItemHelperCall(it, itEnd)) ? callHelperLabel : nextSequenceLabel;
2303
genSuperClassTest(node, objectClassReg, false, castClassReg, castClassDepth, falseLabel, srm, cg);
2304
}
2305
break;
2306
/**
2307
* Following switch case generates sequence of instructions for profiled class test for this checkCast node
2308
* arbitraryClassReg1 <= profiledClass
2309
* if (arbitraryClassReg1 == objClassReg)
2310
* JMP DoneLabel
2311
* else
2312
* continue to NextTest
2313
*/
2314
case ProfiledClassTest:
2315
{
2316
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting ProfiledClassTest\n", node->getOpCode().getName());
2317
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/Profile", comp->signature()),1,TR::DebugCounter::Undetermined);
2318
2319
auto profiledClassesIt = std::begin(profiledClassesList);
2320
auto profiledClassesItEnd = std::next(profiledClassesIt, numberOfProfiledClass);
2321
while (profiledClassesIt != profiledClassesItEnd)
2322
{
2323
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: ProfiledClassTest: profiledClass = %p, isProfiledClassInstanceOfCastClass = %s\n",
2324
node->getOpCode().getName(), profiledClassesIt->profiledClass,
2325
(profiledClassesIt->isProfiledClassInstanceOfCastClass) ? "true" : "false");
2326
2327
genInstanceOfOrCheckCastArbitraryClassTest(node, objectClassReg, profiledClassesIt->profiledClass, srm, cg);
2328
/**
2329
* At this point EQ flag will be set if the profiledClass matches the cast class.
2330
*/
2331
profiledClassesIt++;
2332
if (profiledClassesIt != profiledClassesItEnd)
2333
{
2334
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);
2335
}
2336
}
2337
}
2338
break;
2339
case CompileTimeGuessClassTest:
2340
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting CompileTimeGuessClassTest\n", node->getOpCode().getName());
2341
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/compTimeGuess", comp->signature()),1,TR::DebugCounter::Undetermined);
2342
2343
genInstanceOfOrCheckCastArbitraryClassTest(node, objectClassReg, compileTimeGuessClass, srm, cg);
2344
break;
2345
/**
2346
* Following switch case generates sequence of instructions for cast class cache test for this checkCast node
2347
* Load castClassCacheReg, offsetOf(J9Class,castClassCache)
2348
* if castClassCacheReg == castClassReg
2349
* JMP DoneLabel
2350
* else
2351
* continue to NextTest
2352
*/
2353
case CastClassCacheTest:
2354
{
2355
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting CastClassCacheTest\n", node->getOpCode().getName());
2356
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/CastClassCache", comp->signature()),1,TR::DebugCounter::Undetermined);
2357
2358
/**
2359
* Compare the cast class against the cache on the instance class.
2360
* If they are the same the cast is successful.
2361
* If not it's either because the cache class does not match the cast class,
2362
* or it does match except the cache class has the low bit set, which means the cast is not successful.
2363
* In those cases, we need to call out to helper.
2364
*/
2365
TR::Register *castClassCacheReg = srm->findOrCreateScratchRegister();
2366
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, castClassCacheReg,
2367
TR::MemoryReference::createWithDisplacement(cg, objectClassReg, offsetof(J9Class, castClassCache)));
2368
generateCompareInstruction(cg, node, castClassCacheReg, castClassReg, true);
2369
/**
2370
* At this point, EQ flag will be set if the cast is successful.
2371
*/
2372
srm->reclaimScratchRegister(castClassCacheReg);
2373
}
2374
break;
2375
case ArrayOfJavaLangObjectTest:
2376
{
2377
TR_ASSERT_FATAL(isNextItemGoToFalse(it, itEnd), "ArrayOfJavaLangObjectTest is always followed by GoToFalse");
2378
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting ArrayOfJavaLangObjectTest\n", node->getOpCode().getName());
2379
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/ArrayTest", comp->signature()),1,TR::DebugCounter::Undetermined);
2380
2381
/*
2382
* In this case, the false label is in the OOLCodeSection, and it can be placed far away from here.
2383
* The offset of tbz/tbnz instruction must be within +-32KB range, so we do not use tbz/tbnz.
2384
*/
2385
genInstanceOfOrCheckCastObjectArrayTest(node, objectClassReg, callHelperLabel, false, srm, cg);
2386
}
2387
break;
2388
case DynamicCacheObjectClassTest:
2389
TR_ASSERT_FATAL(false, "%s: DynamicCacheObjectClassTest is not implemented on aarch64\n", node->getOpCode().getName());
2390
break;
2391
case DynamicCacheDynamicCastClassTest:
2392
TR_ASSERT_FATAL(false, "%s: DynamicCacheDynamicCastClassTest is not implemented on aarch64\n", node->getOpCode().getName());
2393
break;
2394
case GoToFalse:
2395
case HelperCall:
2396
{
2397
auto seq = (current == GoToFalse) ? "GoToFalse" : "HelperCall";
2398
TR_ASSERT_FATAL(isTerminalSequence(it, itEnd), "%s should be the terminal sequence", seq);
2399
if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting %s\n", node->getOpCode().getName(), seq);
2400
TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(node, TR::call, NULL, callHelperLabel, doneLabel, cg);
2401
2402
cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);
2403
2404
if (it == itBegin)
2405
{
2406
// If HelperCall or GoToFalse is the only item in the sequence, branch to OOL
2407
generateLabelInstruction(cg, TR::InstOpCode::b, node, callHelperLabel);
2408
}
2409
}
2410
break;
2411
}
2412
2413
switch (current)
2414
{
2415
case ClassEqualityTest:
2416
case SuperClassTest:
2417
case ProfiledClassTest:
2418
case CompileTimeGuessClassTest:
2419
case CastClassCacheTest:
2420
case ArrayOfJavaLangObjectTest:
2421
/**
2422
* For those tests, EQ flag is set if the cast is successful
2423
*/
2424
if (isNextItemHelperCall(it, itEnd) || isNextItemGoToFalse(it, itEnd))
2425
{
2426
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callHelperLabel, TR::CC_NE);
2427
}
2428
else
2429
{
2430
// When other tests follow, branch to doneLabel if EQ flag is set
2431
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);
2432
}
2433
break;
2434
case NullTest:
2435
break;
2436
default:
2437
if (isNextItemHelperCall(it, itEnd) || isNextItemGoToFalse(it, itEnd))
2438
{
2439
generateLabelInstruction(cg, TR::InstOpCode::b, node, callHelperLabel);
2440
}
2441
}
2442
2443
if (!isTerminalSequence(it, itEnd))
2444
{
2445
generateLabelInstruction(cg, TR::InstOpCode::label, node, nextSequenceLabel);
2446
nextSequenceLabel = generateLabelSymbol(cg);
2447
}
2448
2449
}
2450
2451
if (objectClassReg)
2452
srm->reclaimScratchRegister(objectClassReg);
2453
2454
TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 3 + srm->numAvailableRegisters(), cg->trMemory());
2455
srm->addScratchRegistersToDependencyList(deps);
2456
2457
deps->addPostCondition(objectReg, TR::RealRegister::NoReg);
2458
2459
if (castClassReg)
2460
{
2461
deps->addPostCondition(castClassReg, TR::RealRegister::NoReg);
2462
}
2463
2464
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, deps);
2465
2466
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfOrCheckCast/%s/fastPath",
2467
node->getOpCode().getName()),
2468
*srm);
2469
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfOrCheckCast.perMethod/%s/(%s)/%d/%d/fastPath",
2470
node->getOpCode().getName(), comp->signature(), node->getByteCodeInfo().getCallerIndex(), node->getByteCodeInfo().getByteCodeIndex()),
2471
*srm);
2472
2473
2474
cg->decReferenceCount(objectNode);
2475
cg->decReferenceCount(castClassNode);
2476
// Stop using every reg in the deps except objectReg
2477
//
2478
deps->stopUsingDepRegs(cg, objectReg);
2479
2480
node->setRegister(NULL);
2481
2482
return NULL;
2483
}
2484
2485
TR::Register *
2486
J9::ARM64::TreeEvaluator::checkcastAndNULLCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2487
{
2488
return VMcheckcastEvaluator(node, cg);
2489
}
2490
2491
TR::Register *
2492
J9::ARM64::TreeEvaluator::checkcastEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2493
{
2494
return VMcheckcastEvaluator(node, cg);
2495
}
2496
2497
TR::Register *
2498
J9::ARM64::TreeEvaluator::flushEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2499
{
2500
TR::ILOpCodes op = node->getOpCodeValue();
2501
2502
if (op == TR::allocationFence)
2503
{
2504
if (!node->canOmitSync())
2505
{
2506
// StoreStore barrier is required after publishing new object reference to other threads.
2507
// dmb ishst (Inner Shareable store barrier)
2508
generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xA);
2509
}
2510
}
2511
else
2512
{
2513
uint32_t imm;
2514
if (op == TR::loadFence)
2515
{
2516
// TR::loadFence is used for both loadLoadFence and acquireFence.
2517
// Loads before the barrier are ordered before loads/stores after the barrier.
2518
// dmb ishld (Inner Shareable load barrier)
2519
imm = 0x9;
2520
}
2521
else if (op == TR::storeFence)
2522
{
2523
// TR::storeFence is used for both storeStoreFence and releaseFence.
2524
// Loads/Stores before the barrier are ordered before stores after the barrier.
2525
// dmb ish (Inner Shareable full barrier)
2526
imm = 0xB;
2527
}
2528
else
2529
{
2530
// TR::fullFence is used for fullFence.
2531
// dmb ish (Inner Shareable full barrier)
2532
imm = 0xB;
2533
}
2534
generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, imm);
2535
}
2536
2537
return NULL;
2538
}
2539
2540
/**
2541
* Helper template function to get value clamped between low and high.
2542
* std::clamp is unavailable for C++11.
2543
*/
2544
template<typename T>
2545
const T clamp(const T& value, const T& low, const T& high)
2546
{
2547
return std::min(std::max(value, low), high);
2548
}
2549
2550
template<typename T>
2551
const T clamp(const int& value, const T& low, const T& high)
2552
{
2553
return static_cast<T>(std::min(std::max(value, static_cast<int>(low)), static_cast<int>(high)));
2554
}
2555
2556
/**
2557
* @brief Generates instructions for allocating heap for new/newarray/anewarray
2558
* The limitation of the current implementation:
2559
* - supports `new` only
2560
* - does not support dual TLH
2561
* - does not support realtimeGC
2562
*
2563
* @param[in] node: node
2564
* @param[in] cg: code generator
2565
* @param[in] isVariableLen: true if allocating variable length array
2566
* @param[in] allocSize: size to allocate on heap if isVariableLen is false. offset to data start if isVariableLen is true.
2567
* @param[in] elementSize: size of array elements. Used if isVariableLen is true.
2568
* @param[in] resultReg: the register that contains allocated heap address
2569
* @param[in] lengthReg: the register that contains array length (number of elements). Used if isVariableLen is true.
2570
* @param[in] heapTopReg: temporary register 1
2571
* @param[in] tempReg: temporary register 2
2572
* @param[in] dataSizeReg: temporary register 3, this register contains the number of allocated bytes if isVariableLen is true.
2573
* @param[in] conditions: dependency conditions
2574
* @param[in] callLabel: label to call when allocation fails
2575
*/
2576
static void
2577
genHeapAlloc(TR::Node *node, TR::CodeGenerator *cg, bool isVariableLen, uint32_t allocSize, int32_t elementSize, TR::Register *resultReg,
2578
TR::Register *lengthReg, TR::Register *heapTopReg, TR::Register *tempReg, TR::Register *dataSizeReg, TR::RegisterDependencyConditions *conditions,
2579
TR::LabelSymbol *callLabel)
2580
{
2581
static const char *pTLHPrefetchThresholdSize = feGetEnv("TR_AArch64PrefetchThresholdSize");
2582
static const char *pTLHPrefetchArrayLineCount = feGetEnv("TR_AArch64PrefetchArrayLineCount");
2583
static const char *pTLHPrefetchType = feGetEnv("TR_AArch64PrefetchType");
2584
static const char *pTLHPrefetchTarget = feGetEnv("TR_AArch64PrefetchTarget");
2585
static const char *pTLHPrefetchPolicy = feGetEnv("TR_AArch64PrefetchPolicy");
2586
static const int cacheLineSize = (TR::Options::_TLHPrefetchLineSize > 0) ? TR::Options::_TLHPrefetchLineSize : 64;
2587
static const int tlhPrefetchLineCount = (TR::Options::_TLHPrefetchLineCount > 0) ? TR::Options::_TLHPrefetchLineCount : 1;
2588
static const int tlhPrefetchStaggeredLineCount = (TR::Options::_TLHPrefetchStaggeredLineCount > 0) ? TR::Options::_TLHPrefetchStaggeredLineCount : 4;
2589
static const int tlhPrefetchThresholdSize = (pTLHPrefetchThresholdSize) ? atoi(pTLHPrefetchThresholdSize) : 64;
2590
static const int tlhPrefetchArrayLineCount = (pTLHPrefetchArrayLineCount) ? atoi(pTLHPrefetchArrayLineCount) : 4;
2591
static const ARM64PrefetchType tlhPrefetchType = (pTLHPrefetchType) ? clamp(atoi(pTLHPrefetchType), ARM64PrefetchType::LOAD, ARM64PrefetchType::STORE)
2592
: ARM64PrefetchType::STORE;
2593
static const ARM64PrefetchTarget tlhPrefetchTarget = (pTLHPrefetchTarget) ? clamp(atoi(pTLHPrefetchTarget), ARM64PrefetchTarget::L1, ARM64PrefetchTarget::L3)
2594
: ARM64PrefetchTarget::L3;
2595
static const ARM64PrefetchPolicy tlhPrefetchPolicy = (pTLHPrefetchPolicy) ? clamp(atoi(pTLHPrefetchPolicy), ARM64PrefetchPolicy::KEEP, ARM64PrefetchPolicy::STRM)
2596
: ARM64PrefetchPolicy::STRM;
2597
2598
TR::Compilation *comp = cg->comp();
2599
TR::Register *metaReg = cg->getMethodMetaDataRegister();
2600
2601
uint32_t maxSafeSize = cg->getMaxObjectSizeGuaranteedNotToOverflow();
2602
bool isTooSmallToPrefetch = false;
2603
2604
static_assert(offsetof(J9VMThread, heapAlloc) < 32760, "Expecting offset to heapAlloc fits in imm12");
2605
static_assert(offsetof(J9VMThread, heapTop) < 32760, "Expecting offset to heapTop fits in imm12");
2606
2607
if (isVariableLen)
2608
{
2609
/*
2610
* Instructions for allocating heap for variable length `newarray/anewarray`.
2611
*
2612
* cmp lengthReg, #maxObjectSizeInElements
2613
* b.hi callLabel
2614
*
2615
* uxtw tempReg, lengthReg
2616
* ldrimmx resultReg, [metaReg, offsetToHeapAlloc]
2617
* lsl tempReg, lengthReg, #shiftValue
2618
* addimmx tempReg, tempReg, #headerSize+round-1
2619
* cmpimmw lengthReg, 0; # of array elements
2620
* andimmx tempReg, tempReg, #-round
2621
* movzx tempReg2, aligned(#sizeOfDiscontiguousArrayHeader)
2622
* cselx dataSizeReg, tempReg, tempReg2, ne
2623
* ldrimmx heapTopReg, [metaReg, offsetToHeapTop]
2624
* addimmx tempReg, resultReg, dataSizeReg
2625
*
2626
* # check for overflow
2627
* cmp tempReg, heapTopReg
2628
* b.gt callLabel
2629
* # write back heapAlloc
2630
* strimmx tempReg, [metaReg, offsetToHeapAlloc]
2631
*
2632
*/
2633
// Detect large or negative number of elements in case addr wrap-around
2634
//
2635
// The GC will guarantee that at least 'maxObjectSizeGuaranteedNotToOverflow' bytes
2636
// of slush will exist between the top of the heap and the end of the address space.
2637
//
2638
uint32_t maxObjectSizeInElements = maxSafeSize / elementSize;
2639
if (constantIsUnsignedImm12(maxObjectSizeInElements))
2640
{
2641
generateCompareImmInstruction(cg, node, lengthReg, maxObjectSizeInElements, false);
2642
}
2643
else
2644
{
2645
loadConstant32(cg, node, maxObjectSizeInElements, tempReg);
2646
generateCompareInstruction(cg, node, lengthReg, tempReg, false);
2647
}
2648
// Must be an unsigned comparison on sizes.
2649
//
2650
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callLabel, TR::CC_HI, conditions);
2651
2652
// At this point, lengthReg must contain non-negative value.
2653
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::ubfmx, node, tempReg, lengthReg, 31); // uxtw
2654
2655
// Load the base of the next available heap storage.
2656
generateTrg1MemInstruction(cg,TR::InstOpCode::ldrimmx, node, resultReg,
2657
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapAlloc)));
2658
2659
// calculate variable size, rounding up if necessary to a intptr_t multiple boundary
2660
//
2661
static const int32_t objectAlignmentInBytes = TR::Compiler->om.getObjectAlignmentInBytes();
2662
bool headerAligned = (allocSize % objectAlignmentInBytes) == 0;
2663
// zero indicates no rounding is necessary
2664
const int32_t round = ((elementSize >= objectAlignmentInBytes) && headerAligned) ? 0 : objectAlignmentInBytes;
2665
2666
// If the array is zero length, the array is a discontiguous.
2667
// Large heap builds do not need to care about this because the
2668
// contiguous and discontiguous array headers are the same size.
2669
//
2670
auto shiftAmount = trailingZeroes(elementSize);
2671
auto displacement = (round > 0) ? round - 1 : 0;
2672
uint32_t alignmentMaskEncoding;
2673
bool maskN;
2674
2675
if (round != 0)
2676
{
2677
if (round == 8)
2678
{
2679
maskN = true;
2680
alignmentMaskEncoding = 0xf7c;
2681
}
2682
else
2683
{
2684
bool canBeEncoded = logicImmediateHelper(-round, true, maskN, alignmentMaskEncoding);
2685
TR_ASSERT_FATAL(canBeEncoded, "mask for andimmx (%d) cannnot be encoded", (-round));
2686
}
2687
}
2688
if (comp->useCompressedPointers())
2689
{
2690
if (shiftAmount > 0)
2691
{
2692
generateLogicalShiftLeftImmInstruction(cg, node, tempReg, tempReg, shiftAmount, true);
2693
}
2694
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, tempReg, tempReg, (allocSize + displacement));
2695
generateCompareImmInstruction(cg, node, lengthReg, 0, false); // lengthReg is 32bit
2696
if (round != 0)
2697
{
2698
generateLogicalImmInstruction(cg, TR::InstOpCode::andimmx, node, tempReg, tempReg, maskN, alignmentMaskEncoding);
2699
}
2700
static const int32_t zeroArraySizeAligned = OMR::align(TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), objectAlignmentInBytes);
2701
loadConstant64(cg, node, zeroArraySizeAligned, heapTopReg);
2702
2703
generateCondTrg1Src2Instruction(cg, TR::InstOpCode::cselx, node, dataSizeReg, tempReg, heapTopReg, TR::CC_NE);
2704
}
2705
else
2706
{
2707
if (shiftAmount > 0)
2708
{
2709
generateLogicalShiftLeftImmInstruction(cg, node, tempReg, tempReg, shiftAmount, false);
2710
}
2711
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, dataSizeReg, tempReg, (allocSize + displacement));
2712
if (round != 0)
2713
{
2714
generateLogicalImmInstruction(cg, TR::InstOpCode::andimmx, node, dataSizeReg, dataSizeReg, maskN, alignmentMaskEncoding);
2715
}
2716
}
2717
2718
// Load the heap top
2719
generateTrg1MemInstruction(cg,TR::InstOpCode::ldrimmx, node, heapTopReg,
2720
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapTop)));
2721
generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, tempReg, resultReg, dataSizeReg);
2722
2723
}
2724
else
2725
{
2726
isTooSmallToPrefetch = allocSize < tlhPrefetchThresholdSize;
2727
/*
2728
* Instructions for allocating heap for fixed length `new/newarray/anewarray`.
2729
*
2730
* ldrimmx resultReg, [metaReg, offsetToHeapAlloc]
2731
* ldrimmx heapTopReg, [metaReg, offsetToHeapTop]
2732
* addsimmx tempReg, resultReg, #allocSize
2733
* # check for address wrap-around if necessary
2734
* b.cc callLabel
2735
* # check for overflow
2736
* cmp tempReg, heapTopReg
2737
* b.gt callLabel
2738
* # write back heapAlloc
2739
* strimmx tempReg, [metaReg, offsetToHeapAlloc]
2740
*
2741
*/
2742
2743
// Load the base of the next available heap storage.
2744
generateTrg1MemInstruction(cg,TR::InstOpCode::ldrimmx, node, resultReg,
2745
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapAlloc)));
2746
// Load the heap top
2747
generateTrg1MemInstruction(cg,TR::InstOpCode::ldrimmx, node, heapTopReg,
2748
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapTop)));
2749
2750
// Calculate the after-allocation heapAlloc: if the size is huge,
2751
// we need to check address wrap-around also. This is unsigned
2752
// integer arithmetic, checking carry bit is enough to detect it.
2753
const bool isAllocSizeInReg = !constantIsUnsignedImm12(allocSize);
2754
const bool isWithinMaxSafeSize = allocSize <= maxSafeSize;
2755
if (isAllocSizeInReg)
2756
{
2757
loadConstant64(cg, node, allocSize, tempReg);
2758
generateTrg1Src2Instruction(cg, isWithinMaxSafeSize ? TR::InstOpCode::addx : TR::InstOpCode::addsx,
2759
node, tempReg, resultReg, tempReg);
2760
}
2761
else
2762
{
2763
generateTrg1Src1ImmInstruction(cg, isWithinMaxSafeSize ? TR::InstOpCode::addimmx : TR::InstOpCode::addsimmx,
2764
node, tempReg, resultReg, allocSize);
2765
}
2766
if (!isWithinMaxSafeSize)
2767
{
2768
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callLabel, TR::CC_CC, conditions);
2769
}
2770
2771
}
2772
2773
// Ok, tempReg now points to where the object will end on the TLH.
2774
// resultReg will contain the start of the object where we'll write out our
2775
// J9Class*. Should look like this in memory:
2776
// [heapAlloc == resultReg] ... tempReg ...//... heapTopReg.
2777
2778
//Here we check if we overflow the TLH Heap Top
2779
//branch to heapAlloc Snippet if we overflow (ie callLabel).
2780
generateCompareInstruction(cg, node, tempReg, heapTopReg, true);
2781
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callLabel, TR::CC_GT, conditions);
2782
2783
if (comp->getOption(TR_TLHPrefetch) && (!isTooSmallToPrefetch))
2784
{
2785
int offset = tlhPrefetchStaggeredLineCount * cacheLineSize;
2786
int loopCount = (node->getOpCodeValue() == TR::New) ? tlhPrefetchLineCount : tlhPrefetchArrayLineCount;
2787
2788
for (int i = 0; i < loopCount; i++)
2789
{
2790
generateMemImmInstruction(cg, TR::InstOpCode::prfmimm, node,
2791
TR::MemoryReference::createWithDisplacement(cg, tempReg, offset), toPrefetchOp(tlhPrefetchType, tlhPrefetchTarget, tlhPrefetchPolicy));
2792
offset += cacheLineSize;
2793
}
2794
}
2795
//Done, write back to heapAlloc here.
2796
generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node,
2797
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapAlloc)), tempReg);
2798
2799
}
2800
2801
/**
2802
* @brief Generates instructions for initializing allocated memory for new/newarray/anewarray
2803
*
2804
* @param[in] node: node
2805
* @param[in] cg: code generator
2806
* @param[in] isVariableLen: true if allocating variable length array
2807
* @param[in] objectSize: size of the object
2808
* @param[in] headerSize: header size of the object
2809
* @param[in] objectReg: the register that holds object address
2810
* @param[in] dataSizeReg: the register that holds the number of allocated bytes if isVariableLength is true
2811
* @param[in] zeroReg: the register whose value is zero
2812
* @param[in] tempReg1: temporary register 1
2813
* @param[in] tempReg2: temporary register 2
2814
*/
2815
static void
2816
genZeroInitObject(TR::Node *node, TR::CodeGenerator *cg, bool isVariableLen, uint32_t objectSize, uint32_t headerSize, TR::Register *objectReg,
2817
TR::Register *dataSizeReg, TR::Register *zeroReg, TR::Register *tempReg1, TR::Register *tempReg2)
2818
{
2819
2820
if (isVariableLen)
2821
{
2822
/*
2823
* Instructions for clearing allocated memory for variable length
2824
* We assume that the objectSize is multiple of 8.
2825
* Because the size of the header of contiguous arrays are multiple of 8,
2826
* the data size to clear is also multiple of 8.
2827
*
2828
* subimmx dataSizeReg, dataSizeReg, #headerSize
2829
* cbz dataSizeReg, zeroinitdone
2830
* // Adjust tempReg1 so that (tempReg1 + 16) points to
2831
* // the memory area beyond the object header
2832
* subimmx tempReg1, objectReg, (16 - #headerSize)
2833
* cmp dataSizeReg, #64
2834
* b.lt medium
2835
* large: // dataSizeReg >= 64
2836
* lsr tempReg2, dataSizeReg, #6 // loopCount = dataSize / 64
2837
* and dataSizeReg, dataSizeReg, #63
2838
* loopStart:
2839
* stpimmx xzr, xzr, [tempReg1, #16]
2840
* stpimmx xzr, xzr, [tempReg1, #32]
2841
* stpimmx xzr, xzr, [tempReg1, #48]
2842
* stpimmx xzr, xzr, [tempReg1, #64]! // pre index
2843
* subsimmx tempReg2, tempReg2, #1
2844
* b.ne loopStart
2845
* cbz dataSizeReg, zeroinitdone
2846
* medium:
2847
* addx tempReg2, tempReg1, dataSizeReg // tempReg2 points to 16bytes before the end of the buffer
2848
* // write residues. We have at least 8bytes before (tempReg1 + 16)
2849
* cmpimmx dataSizeReg, #16
2850
* b.le write16
2851
* cmpimmx dataSizeReg, #32
2852
* b.le write32
2853
* cmpimmx dataSizeReg, #48
2854
* b.le write48
2855
* write64: // 56 bytes
2856
* stpimmx xzr, xzr, [tempReg2, #-48]
2857
* write48: // 40, 48 bytes
2858
* stpimmx xzr, xzr, [tempReg2, #-32]
2859
* write32: // 24, 32 bytes
2860
* stpimmx xzr, xzr, [tempReg2, #-16]
2861
* write16: // 8, 16 bytes
2862
* stpimmx xzr, xzr, [tempReg2]
2863
* zeroinitdone:
2864
*/
2865
TR::LabelSymbol *zeroInitDoneLabel = generateLabelSymbol(cg);
2866
TR::LabelSymbol *mediumLabel = generateLabelSymbol(cg);
2867
TR::LabelSymbol *loopStartLabel = generateLabelSymbol(cg);
2868
TR::LabelSymbol *write16Label = generateLabelSymbol(cg);
2869
TR::LabelSymbol *write32Label = generateLabelSymbol(cg);
2870
TR::LabelSymbol *write48Label = generateLabelSymbol(cg);
2871
2872
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::subimmx, node, dataSizeReg, dataSizeReg, headerSize);
2873
if (!TR::Compiler->om.generateCompressedObjectHeaders())
2874
{
2875
// Array Header is smaller than the minimum data size in compressedrefs build, so this check is not necessary.
2876
// This check is necessary in large heap build.
2877
generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, dataSizeReg, zeroInitDoneLabel);
2878
}
2879
generateTrg1Src1ImmInstruction(cg, (headerSize > 16) ? TR::InstOpCode::addimmx : TR::InstOpCode::subimmx,
2880
node, tempReg1, objectReg, std::abs(static_cast<int>(headerSize - 16)));
2881
2882
generateCompareImmInstruction(cg, node, dataSizeReg, 64, true);
2883
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, mediumLabel, TR::CC_LT);
2884
generateLogicalShiftRightImmInstruction(cg, node, tempReg2, dataSizeReg, 6, true);
2885
generateLogicalImmInstruction(cg, TR::InstOpCode::andimmx, node, dataSizeReg, dataSizeReg, true, 5); // N = true, immr:imms = 5
2886
2887
generateLabelInstruction(cg, TR::InstOpCode::label, node, loopStartLabel);
2888
generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, 16), zeroReg, zeroReg);
2889
generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, 32), zeroReg, zeroReg);
2890
generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, 48), zeroReg, zeroReg);
2891
generateMemSrc2Instruction(cg, TR::InstOpCode::stpprex, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, 64), zeroReg, zeroReg);
2892
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::subsimmx, node, tempReg2, tempReg2, 1);
2893
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, loopStartLabel, TR::CC_NE);
2894
2895
generateLabelInstruction(cg, TR::InstOpCode::label, node, mediumLabel);
2896
generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, tempReg2, tempReg1, dataSizeReg);
2897
generateCompareImmInstruction(cg, node, dataSizeReg, 16, true);
2898
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, write16Label, TR::CC_LE);
2899
generateCompareImmInstruction(cg, node, dataSizeReg, 32, true);
2900
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, write32Label, TR::CC_LE);
2901
generateCompareImmInstruction(cg, node, dataSizeReg, 48, true);
2902
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, write48Label, TR::CC_LE);
2903
2904
generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg2, -48), zeroReg, zeroReg);
2905
generateLabelInstruction(cg, TR::InstOpCode::label, node, write48Label);
2906
generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg2, -32), zeroReg, zeroReg);
2907
generateLabelInstruction(cg, TR::InstOpCode::label, node, write32Label);
2908
generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg2, -16), zeroReg, zeroReg);
2909
generateLabelInstruction(cg, TR::InstOpCode::label, node, write16Label);
2910
generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg2, 0), zeroReg, zeroReg);
2911
generateLabelInstruction(cg, TR::InstOpCode::label, node, zeroInitDoneLabel);
2912
}
2913
else
2914
{
2915
/*
2916
* Instructions for clearing allocated memory for fixed length
2917
* We assume that the objectSize is multiple of 4.
2918
*
2919
* // Adjust tempReg1 so that (tempReg1 + 16) points to
2920
* // the memory area beyond the object header
2921
* subimmx tempReg1, objectReg, (16 - #headerSize)
2922
* movzx tempReg2, loopCount
2923
* loop:
2924
* stpimmx xzr, xzr, [tempReg1, #16]
2925
* stpimmx xzr, xzr, [tempReg1, #32]
2926
* stpimmx xzr, xzr, [tempReg1, #48]
2927
* stpimmx xzr, xzr, [tempReg1, #64]! // pre index
2928
* subsimmx tempReg2, tempReg2, #1
2929
* b.ne loop
2930
* // write residues
2931
* stpimmx xzr, xzr [tempReg1, #16]
2932
* stpimmx xzr, xzr [tempReg1, #32]
2933
* stpimmx xzr, xzr [tempReg1, #48]
2934
* strimmx xzr, [tempReg1, #64]
2935
* strimmw xzr, [tempReg1, #72]
2936
*
2937
*/
2938
// TODO align tempReg1 to 16-byte boundary if objectSize is large
2939
// TODO use vector register
2940
// TODO use dc zva
2941
const int32_t unrollFactor = 4;
2942
const int32_t width = 16; // use stp to clear 16 bytes
2943
const int32_t loopCount = (objectSize - headerSize) / (unrollFactor * width);
2944
const int32_t res1 = (objectSize - headerSize) % (unrollFactor * width);
2945
const int32_t residueCount = res1 / width;
2946
const int32_t res2 = res1 % width;
2947
TR::LabelSymbol *loopStart = generateLabelSymbol(cg);
2948
2949
generateTrg1Src1ImmInstruction(cg, (headerSize > 16) ? TR::InstOpCode::addimmx : TR::InstOpCode::subimmx,
2950
node, tempReg1, objectReg, std::abs(static_cast<int>(headerSize - 16)));
2951
2952
if (loopCount > 0)
2953
{
2954
if (loopCount > 1)
2955
{
2956
loadConstant64(cg, node, loopCount, tempReg2);
2957
generateLabelInstruction(cg, TR::InstOpCode::label, node, loopStart);
2958
}
2959
for (int i = 1; i < unrollFactor; i++)
2960
{
2961
generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, i * width), zeroReg, zeroReg);
2962
}
2963
generateMemSrc2Instruction(cg, TR::InstOpCode::stpprex, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, unrollFactor * width), zeroReg, zeroReg);
2964
if (loopCount > 1)
2965
{
2966
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::subsimmx, node, tempReg2, tempReg2, 1);
2967
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, loopStart, TR::CC_NE);
2968
}
2969
}
2970
for (int i = 0; i < residueCount; i++)
2971
{
2972
generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, (i + 1) * width), zeroReg, zeroReg);
2973
}
2974
int offset = (residueCount + 1) * width;
2975
if (res2 >= 8)
2976
{
2977
generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, offset), zeroReg);
2978
offset += 8;
2979
}
2980
if ((res2 & 4) > 0)
2981
{
2982
generateMemSrc1Instruction(cg, TR::InstOpCode::strimmw, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, offset), zeroReg);
2983
}
2984
2985
}
2986
}
2987
2988
/**
2989
* @brief Generates instructions for initializing Object header for new/newarray/anewarray
2990
*
2991
* @param[in] node: node
2992
* @param[in] cg: code generator
2993
* @param[in] clazz: class pointer to store in the object header
2994
* @param[in] objectReg: the register that holds object address
2995
* @param[in] classReg: the register that holds class
2996
* @param[in] zeroReg: the register whose value is zero
2997
* @param[in] tempReg1: temporary register 1
2998
* @param[in] isTLHHasNotBeenCleared: true if TLH has not been cleared
2999
*/
3000
static void
3001
genInitObjectHeader(TR::Node *node, TR::CodeGenerator *cg, TR_OpaqueClassBlock *clazz, TR::Register *objectReg, TR::Register *classReg, TR::Register *zeroReg, TR::Register *tempReg1, bool isTLHHasNotBeenCleared)
3002
{
3003
TR_ASSERT(clazz, "Cannot have a null OpaqueClassBlock\n");
3004
TR_J9VM *fej9 = reinterpret_cast<TR_J9VM *>(cg->fe());
3005
TR::Compilation *comp = cg->comp();
3006
TR::Register * clzReg = classReg;
3007
TR::Register *metaReg = cg->getMethodMetaDataRegister();
3008
3009
// For newarray/anewarray, classReg holds the class pointer of array elements
3010
// Prepare valid class pointer for arrays
3011
if (node->getOpCodeValue() != TR::New)
3012
{
3013
if (cg->needClassAndMethodPointerRelocations())
3014
{
3015
if (comp->getOption(TR_UseSymbolValidationManager))
3016
{
3017
loadAddressConstantInSnippet(cg, node, reinterpret_cast<intptr_t>(clazz), tempReg1, TR_ClassPointer);
3018
}
3019
else
3020
{
3021
if (node->getOpCodeValue() == TR::newarray)
3022
{
3023
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tempReg1,
3024
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, javaVM)));
3025
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tempReg1,
3026
TR::MemoryReference::createWithDisplacement(cg, tempReg1,
3027
fej9->getPrimitiveArrayOffsetInJavaVM(node->getSecondChild()->getInt())));
3028
}
3029
else
3030
{
3031
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tempReg1,
3032
TR::MemoryReference::createWithDisplacement(cg, classReg, offsetof(J9Class, arrayClass)));
3033
}
3034
}
3035
}
3036
else
3037
{
3038
loadAddressConstant(cg, node, reinterpret_cast<intptr_t>(clazz), tempReg1);
3039
}
3040
clzReg = tempReg1;
3041
}
3042
3043
// Store the class
3044
generateMemSrc1Instruction(cg, TR::Compiler->om.generateCompressedObjectHeaders() ? TR::InstOpCode::strimmw : TR::InstOpCode::strimmx,
3045
node, TR::MemoryReference::createWithDisplacement(cg, objectReg, (int32_t) TR::Compiler->om.offsetOfObjectVftField()), clzReg);
3046
3047
int32_t lwOffset = fej9->getByteOffsetToLockword(clazz);
3048
if (clazz && (lwOffset > 0))
3049
{
3050
int32_t lwInitialValue = fej9->getInitialLockword(clazz);
3051
3052
if ((0 != lwInitialValue) || isTLHHasNotBeenCleared)
3053
{
3054
bool isCompressedLockWord = fej9->generateCompressedLockWord();
3055
if (0 != lwInitialValue)
3056
{
3057
loadConstant64(cg, node, lwInitialValue, tempReg1);
3058
generateMemSrc1Instruction(cg, isCompressedLockWord ? TR::InstOpCode::strimmw : TR::InstOpCode::strimmx,
3059
node, TR::MemoryReference::createWithDisplacement(cg, objectReg, lwOffset), tempReg1);
3060
}
3061
else
3062
{
3063
generateMemSrc1Instruction(cg, isCompressedLockWord ? TR::InstOpCode::strimmw : TR::InstOpCode::strimmx,
3064
node, TR::MemoryReference::createWithDisplacement(cg, objectReg, lwOffset), zeroReg);
3065
}
3066
}
3067
}
3068
}
3069
3070
/**
3071
* @brief Generates instructions for initializing array header for newarray/anewarray
3072
*
3073
* @param[in] node: node
3074
* @param[in] cg: code generator
3075
* @param[in] clazz: class pointer to store in the object header
3076
* @param[in] objectReg: the register that holds object address
3077
* @param[in] classReg: the register that holds class
3078
* @param[in] sizeReg: the register that holds array length.
3079
* @param[in] zeroReg: the register whose value is zero
3080
* @param[in] tempReg1: temporary register 1
3081
* @param[in] isBatchClearTLHEnabled: true if BatchClearTLH is enabled
3082
* @param[in] isTLHHasNotBeenCleared: true if TLH has not been cleared
3083
*/
3084
static void
3085
genInitArrayHeader(TR::Node *node, TR::CodeGenerator *cg, TR_OpaqueClassBlock *clazz, TR::Register *objectReg, TR::Register *classReg, TR::Register *sizeReg, TR::Register *zeroReg, TR::Register *tempReg1,
3086
bool isBatchClearTLHEnabled, bool isTLHHasNotBeenCleared)
3087
{
3088
TR_J9VMBase *fej9 = (TR_J9VMBase *) (cg->fe());
3089
3090
genInitObjectHeader(node, cg, clazz, objectReg, classReg, zeroReg, tempReg1, isTLHHasNotBeenCleared);
3091
if (node->getFirstChild()->getOpCode().isLoadConst() && (node->getFirstChild()->getInt() == 0))
3092
{
3093
// If BatchClearTLH is enabled, we do not need to write 0 into the header.
3094
if (!isBatchClearTLHEnabled)
3095
{
3096
// constant zero length array
3097
// Zero length arrays are discontiguous (i.e. they also need the discontiguous length field to be 0) because
3098
// they are indistinguishable from non-zero length discontiguous arrays
3099
if (TR::Compiler->om.generateCompressedObjectHeaders())
3100
{
3101
// `mustBeZero` and `size` field of J9IndexableObjectDiscontiguousCompressed must be cleared.
3102
// We cannot use `strimmx` in this case because offset would be 4 bytes, which cannot be encoded as imm12 of `strimmx`.
3103
generateMemSrc1Instruction(cg, TR::InstOpCode::strimmw, node,
3104
TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfDiscontiguousArraySizeField() - 4),
3105
zeroReg);
3106
generateMemSrc1Instruction(cg, TR::InstOpCode::strimmw, node,
3107
TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfDiscontiguousArraySizeField()),
3108
zeroReg);
3109
}
3110
else
3111
{
3112
// `strimmx` can be used as offset is 8 bytes.
3113
generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node,
3114
TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfDiscontiguousArraySizeField() - 4),
3115
zeroReg);
3116
}
3117
}
3118
}
3119
else
3120
{
3121
// Store the array size
3122
// If the size field of contiguous array header is 0, the array is discontiguous and
3123
// the size of discontiguous array must be in the size field of discontiguous array header.
3124
// For now, we do not create non-zero length discontigous array,
3125
// so it is safe to write 0 into the size field of discontiguous array header.
3126
//
3127
// In the compressedrefs build, the size field of discontigous array header is cleared by instructions generated by genZeroInit().
3128
// In the large heap build, we must clear size and mustBeZero field here
3129
if (TR::Compiler->om.generateCompressedObjectHeaders())
3130
{
3131
generateMemSrc1Instruction(cg, TR::InstOpCode::strimmw, node,
3132
TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfContiguousArraySizeField()),
3133
sizeReg);
3134
if (!isTLHHasNotBeenCleared)
3135
{
3136
// If BatchClearTLH is not enabled and TLH has not been cleared, write 0 into the size field of J9IndexableObjectDiscontiguousCompressed.
3137
generateMemSrc1Instruction(cg, TR::InstOpCode::strimmw, node,
3138
TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfDiscontiguousArraySizeField()),
3139
zeroReg);
3140
}
3141
}
3142
else
3143
{
3144
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::ubfmx, node, tempReg1, sizeReg, 31); // uxtw
3145
generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node,
3146
TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfContiguousArraySizeField()),
3147
tempReg1);
3148
}
3149
}
3150
}
3151
3152
/**
3153
* @brief Generates instructions for inlining new/newarray/anewarray
3154
* The limitation of the current implementation:
3155
* - supports `new` only
3156
* - does not support dual TLH
3157
* - does not support realtimeGC
3158
*
3159
* @param node: node
3160
* @param cg: code generator
3161
*
3162
* @return register containing allocated object, NULL if inlining is not possible
3163
*/
3164
TR::Register *
3165
J9::ARM64::TreeEvaluator::VMnewEvaluator(TR::Node *node, TR::CodeGenerator *cg)
3166
{
3167
TR::Compilation * comp = cg->comp();
3168
TR_J9VMBase *fej9 = (TR_J9VMBase *) (cg->fe());
3169
3170
bool generateArraylets = comp->generateArraylets();
3171
3172
if (comp->suppressAllocationInlining() || TR::TreeEvaluator::requireHelperCallValueTypeAllocation(node, cg))
3173
return NULL;
3174
3175
if (comp->getOption(TR_DisableTarokInlineArrayletAllocation) && (node->getOpCodeValue() == TR::anewarray || node->getOpCodeValue() == TR::newarray))
3176
return NULL;
3177
3178
// Currently, we do not support realtime GC.
3179
if (comp->getOptions()->realTimeGC())
3180
return NULL;
3181
3182
TR_OpaqueClassBlock *clazz = NULL;
3183
3184
// --------------------------------------------------------------------------------
3185
//
3186
// Find the class info and allocation size depending on the node type.
3187
//
3188
// Returns:
3189
// size of object includes the size of the array header
3190
// -1 cannot allocate inline
3191
// 0 variable sized allocation
3192
//
3193
// --------------------------------------------------------------------------------
3194
3195
int32_t objectSize = comp->canAllocateInline(node, clazz);
3196
if (objectSize < 0)
3197
return NULL;
3198
const bool isVariableLength = (objectSize == 0);
3199
3200
static long count = 0;
3201
if (!performTransformation(comp, "O^O <%3d> Inlining Allocation of %s [0x%p].\n", count++, node->getOpCode().getName(), node))
3202
return NULL;
3203
3204
3205
// 1. Evaluate children
3206
int32_t headerSize;
3207
TR::Node *firstChild = node->getFirstChild();
3208
TR::Node *secondChild = NULL;
3209
int32_t elementSize = 0;
3210
bool isArrayNew = false;
3211
TR::Register *classReg = NULL;
3212
TR::Register *lengthReg = NULL;
3213
TR::ILOpCodes opCode = node->getOpCodeValue();
3214
if (opCode == TR::New)
3215
{
3216
// classReg is passed to the VM helper on the slow path and subsequently clobbered; copy it for later nodes if necessary
3217
classReg = cg->gprClobberEvaluate(firstChild);
3218
headerSize = TR::Compiler->om.objectHeaderSizeInBytes();
3219
lengthReg = cg->allocateRegister();
3220
}
3221
else
3222
{
3223
if (generateArraylets || TR::Compiler->om.useHybridArraylets())
3224
{
3225
if (node->getOpCodeValue() == TR::newarray)
3226
elementSize = TR::Compiler->om.getSizeOfArrayElement(node);
3227
else if (comp->useCompressedPointers())
3228
elementSize = TR::Compiler->om.sizeofReferenceField();
3229
else
3230
elementSize = TR::Compiler->om.sizeofReferenceAddress();
3231
3232
if (generateArraylets)
3233
headerSize = fej9->getArrayletFirstElementOffset(elementSize, comp);
3234
else
3235
headerSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
3236
}
3237
else
3238
{
3239
elementSize = TR::Compiler->om.getSizeOfArrayElement(node);
3240
headerSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
3241
}
3242
3243
// If the array cannot be allocated as a contiguous array, then comp->canAllocateInline should have returned -1.
3244
// The only exception is when the array length is 0.
3245
isArrayNew = true;
3246
3247
lengthReg = cg->evaluate(firstChild);
3248
secondChild = node->getSecondChild();
3249
// classReg is passed to the VM helper on the slow path and subsequently clobbered; copy it for later nodes if necessary
3250
classReg = cg->gprClobberEvaluate(secondChild);
3251
}
3252
3253
TR::Instruction *firstInstructionAfterClassAndLengthRegsAreReady = cg->getAppendInstruction();
3254
// 2. Calculate allocation size
3255
int32_t allocateSize = isVariableLength ? headerSize : (objectSize + TR::Compiler->om.getObjectAlignmentInBytes() - 1) & (-TR::Compiler->om.getObjectAlignmentInBytes());
3256
3257
// 3. Allocate registers
3258
TR::Register *resultReg = cg->allocateRegister();
3259
TR::Register *tempReg1 = cg->allocateRegister();
3260
TR::Register *tempReg2 = cg->allocateRegister();
3261
TR::Register *tempReg3 = isVariableLength ? cg->allocateRegister() : NULL;
3262
TR::Register *zeroReg = cg->allocateRegister();
3263
TR::LabelSymbol *callLabel = generateLabelSymbol(cg);
3264
TR::LabelSymbol *callReturnLabel = generateLabelSymbol(cg);
3265
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
3266
3267
// 4. Setup register dependencies
3268
const int numReg = isVariableLength ? 7 : 6;
3269
TR::RegisterDependencyConditions *conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(numReg, numReg, cg->trMemory());
3270
TR::addDependency(conditions, classReg, TR::RealRegister::NoReg, TR_GPR, cg);
3271
TR::addDependency(conditions, resultReg, TR::RealRegister::NoReg, TR_GPR, cg);
3272
TR::addDependency(conditions, lengthReg, TR::RealRegister::NoReg, TR_GPR, cg);
3273
TR::addDependency(conditions, zeroReg, TR::RealRegister::xzr, TR_GPR, cg);
3274
TR::addDependency(conditions, tempReg1, TR::RealRegister::NoReg, TR_GPR, cg);
3275
TR::addDependency(conditions, tempReg2, TR::RealRegister::NoReg, TR_GPR, cg);
3276
if (isVariableLength)
3277
{
3278
TR::addDependency(conditions, tempReg3, TR::RealRegister::NoReg, TR_GPR, cg);
3279
}
3280
3281
// 5. Allocate object/array on heap
3282
genHeapAlloc(node, cg, isVariableLength, allocateSize, elementSize, resultReg, lengthReg, tempReg1, tempReg2, tempReg3, conditions, callLabel);
3283
3284
// 6. Setup OOL Section for slowpath
3285
TR::Register *objReg = cg->allocateCollectedReferenceRegister();
3286
TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(node, TR::acall, objReg, callLabel, callReturnLabel, cg);
3287
cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);
3288
3289
// 7. Initialize the allocated memory area with zero
3290
const bool isBatchClearTLHEnabled = fej9->tlhHasBeenCleared();
3291
if (!isBatchClearTLHEnabled)
3292
{
3293
// TODO selectively initialize necessary slots
3294
if (!node->canSkipZeroInitialization())
3295
{
3296
genZeroInitObject(node, cg, isVariableLength, objectSize, headerSize, resultReg, tempReg3, zeroReg, tempReg1, tempReg2);
3297
}
3298
}
3299
const bool tlhHasNotBeenCleared = (!isBatchClearTLHEnabled) && node->canSkipZeroInitialization();
3300
3301
// 8. Initialize Object Header
3302
if (isArrayNew)
3303
{
3304
genInitArrayHeader(node, cg, clazz, resultReg, classReg, lengthReg, zeroReg, tempReg1, isBatchClearTLHEnabled, tlhHasNotBeenCleared);
3305
3306
/* Here we'll update dataAddr slot for both fixed and variable length arrays. Fixed length arrays are
3307
* simple as we just need to check first child of the node for array size. For variable length arrays
3308
* runtime size checks are needed to determine whether to use contiguous or discontiguous header layout.
3309
*
3310
* In both scenarios, arrays of non-zero size use contiguous header layout while zero size arrays use
3311
* discontiguous header layout.
3312
*/
3313
TR::Register *offsetReg = tempReg1;
3314
TR::Register *firstDataElementReg = tempReg2;
3315
TR::MemoryReference *dataAddrSlotMR = NULL;
3316
3317
if (isVariableLength && TR::Compiler->om.compressObjectReferences())
3318
{
3319
/* We need to check lengthReg (array size) at runtime to determine correct offset of dataAddr field.
3320
* Here we deal only with compressed refs because dataAddr offset for discontiguous and contiguous
3321
* arrays is the same in full refs.
3322
*/
3323
if (comp->getOption(TR_TraceCG))
3324
traceMsg(comp, "Node (%p): Dealing with compressed refs variable length array.\n", node);
3325
3326
TR_ASSERT_FATAL_WITH_NODE(node,
3327
(fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField()) == 8,
3328
"Offset of dataAddr field in discontiguous array is expected to be 8 bytes more than contiguous array. "
3329
"But was %d bytes for discontigous and %d bytes for contiguous array.\n",
3330
fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());
3331
3332
// Since array size is capped at 32 bits, we don't need to check all 64 bits of lengthReg.
3333
generateCompareImmInstruction(cg, node, lengthReg, 0, false);
3334
generateCSetInstruction(cg, node, offsetReg, TR::CC_EQ);
3335
// offsetReg at this point is either 1 (if lengthReg == 0) or 0 (otherwise).
3336
// offsetReg = resultReg + (offsetReg << 3)
3337
generateTrg1Src2ShiftedInstruction(cg, TR::InstOpCode::addx, node, offsetReg, resultReg, offsetReg, TR::SH_LSL, 3);
3338
3339
dataAddrSlotMR = TR::MemoryReference::createWithDisplacement(cg, offsetReg, fej9->getOffsetOfContiguousDataAddrField());
3340
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, firstDataElementReg, offsetReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
3341
}
3342
else if (!isVariableLength && node->getFirstChild()->getOpCode().isLoadConst() && node->getFirstChild()->getInt() == 0)
3343
{
3344
if (comp->getOption(TR_TraceCG))
3345
traceMsg(comp, "Node (%p): Dealing with full/compressed refs fixed length zero size array.\n", node);
3346
3347
dataAddrSlotMR = TR::MemoryReference::createWithDisplacement(cg, resultReg, fej9->getOffsetOfDiscontiguousDataAddrField());
3348
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, firstDataElementReg, resultReg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes());
3349
}
3350
else
3351
{
3352
if (comp->getOption(TR_TraceCG))
3353
{
3354
traceMsg(comp,
3355
"Node (%p): Dealing with either full/compressed refs fixed length non-zero size array or full refs variable length array.\n",
3356
node);
3357
}
3358
3359
if (!TR::Compiler->om.compressObjectReferences())
3360
{
3361
TR_ASSERT_FATAL_WITH_NODE(node,
3362
fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField(),
3363
"dataAddr field offset is expected to be same for both contiguous and discontiguous arrays in full refs. "
3364
"But was %d bytes for discontiguous and %d bytes for contiguous array.\n",
3365
fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());
3366
}
3367
3368
dataAddrSlotMR = TR::MemoryReference::createWithDisplacement(cg, resultReg, fej9->getOffsetOfContiguousDataAddrField());
3369
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, firstDataElementReg, resultReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
3370
}
3371
3372
generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node, dataAddrSlotMR, firstDataElementReg);
3373
3374
if (generateArraylets)
3375
{
3376
// write arraylet pointer to object header
3377
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, tempReg2, resultReg, headerSize);
3378
if (TR::Compiler->om.compressedReferenceShiftOffset() > 0)
3379
generateLogicalShiftRightImmInstruction(cg, node, tempReg2, tempReg2, TR::Compiler->om.compressedReferenceShiftOffset());
3380
3381
TR::InstOpCode::Mnemonic storeOp = comp->useCompressedPointers() ? TR::InstOpCode::strimmx : TR::InstOpCode::strimmw;
3382
generateMemSrc1Instruction(cg, storeOp, node,
3383
TR::MemoryReference::createWithDisplacement(cg, resultReg, fej9->getFirstArrayletPointerOffset(comp)),
3384
tempReg2);
3385
}
3386
}
3387
else
3388
{
3389
genInitObjectHeader(node, cg, clazz, resultReg, classReg, zeroReg, tempReg1, tlhHasNotBeenCleared);
3390
}
3391
3392
// 9. Setup AOT relocation
3393
if (cg->comp()->compileRelocatableCode() && (opCode == TR::New || opCode == TR::anewarray))
3394
{
3395
TR::Instruction *firstInstruction = firstInstructionAfterClassAndLengthRegsAreReady->getNext();
3396
TR_OpaqueClassBlock *classToValidate = clazz;
3397
3398
TR_RelocationRecordInformation *recordInfo = (TR_RelocationRecordInformation *) comp->trMemory()->allocateMemory(sizeof(TR_RelocationRecordInformation), heapAlloc);
3399
recordInfo->data1 = allocateSize;
3400
recordInfo->data2 = node->getInlinedSiteIndex();
3401
recordInfo->data3 = (uintptr_t) callLabel;
3402
recordInfo->data4 = (uintptr_t) firstInstruction;
3403
3404
TR::SymbolReference * classSymRef;
3405
TR_ExternalRelocationTargetKind reloKind;
3406
3407
if (opCode == TR::New)
3408
{
3409
classSymRef = node->getFirstChild()->getSymbolReference();
3410
reloKind = TR_VerifyClassObjectForAlloc;
3411
}
3412
else
3413
{
3414
classSymRef = node->getSecondChild()->getSymbolReference();
3415
reloKind = TR_VerifyRefArrayForAlloc;
3416
3417
if (comp->getOption(TR_UseSymbolValidationManager))
3418
classToValidate = comp->fej9()->getComponentClassFromArrayClass(classToValidate);
3419
}
3420
3421
if (comp->getOption(TR_UseSymbolValidationManager))
3422
{
3423
TR_ASSERT_FATAL(classToValidate, "classToValidate should not be NULL, clazz=%p\n", clazz);
3424
recordInfo->data5 = (uintptr_t)classToValidate;
3425
}
3426
3427
cg->addExternalRelocation(new (cg->trHeapMemory()) TR::BeforeBinaryEncodingExternalRelocation(firstInstruction, (uint8_t *) classSymRef, (uint8_t *) recordInfo, reloKind, cg),
3428
__FILE__, __LINE__, node);
3429
}
3430
3431
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions);
3432
3433
// At this point the object is initialized and we can move it to a collected register.
3434
// The out of line path will do the same.
3435
generateMovInstruction(cg, node, objReg, resultReg, true);
3436
3437
generateLabelInstruction(cg, TR::InstOpCode::label, node, callReturnLabel);
3438
3439
// Cleanup registers
3440
cg->stopUsingRegister(tempReg1);
3441
cg->stopUsingRegister(tempReg2);
3442
cg->stopUsingRegister(zeroReg);
3443
cg->stopUsingRegister(resultReg);
3444
if (isVariableLength)
3445
{
3446
cg->stopUsingRegister(tempReg3);
3447
}
3448
3449
cg->decReferenceCount(firstChild);
3450
if (opCode == TR::New)
3451
{
3452
if (classReg != firstChild->getRegister())
3453
{
3454
cg->stopUsingRegister(classReg);
3455
}
3456
cg->stopUsingRegister(lengthReg);
3457
}
3458
else
3459
{
3460
cg->decReferenceCount(secondChild);
3461
if (classReg != secondChild->getRegister())
3462
{
3463
cg->stopUsingRegister(classReg);
3464
}
3465
}
3466
3467
node->setRegister(objReg);
3468
return objReg;
3469
}
3470
3471
TR::Register *
3472
J9::ARM64::TreeEvaluator::multianewArrayEvaluator(TR::Node *node, TR::CodeGenerator *cg)
3473
{
3474
TR::ILOpCodes opCode = node->getOpCodeValue();
3475
TR::Node::recreate(node, TR::acall);
3476
TR::Register *targetRegister = directCallEvaluator(node, cg);
3477
TR::Node::recreate(node, opCode);
3478
return targetRegister;
3479
}
3480
3481
TR::Register *
3482
J9::ARM64::TreeEvaluator::newObjectEvaluator(TR::Node *node, TR::CodeGenerator *cg)
3483
{
3484
TR::Register *targetRegister = TR::TreeEvaluator::VMnewEvaluator(node, cg);
3485
if (!targetRegister)
3486
{
3487
// Inline object allocation wasn't generated, just generate a call to the helper.
3488
//
3489
TR::ILOpCodes opCode = node->getOpCodeValue();
3490
TR::Node::recreate(node, TR::acall);
3491
targetRegister = directCallEvaluator(node, cg);
3492
TR::Node::recreate(node, opCode);
3493
}
3494
return targetRegister;
3495
}
3496
3497
TR::Register *
3498
J9::ARM64::TreeEvaluator::newArrayEvaluator(TR::Node *node, TR::CodeGenerator *cg)
3499
{
3500
TR::Register *targetRegister = TR::TreeEvaluator::VMnewEvaluator(node, cg);
3501
if (!targetRegister)
3502
{
3503
// Inline array allocation wasn't generated, just generate a call to the helper.
3504
//
3505
TR::ILOpCodes opCode = node->getOpCodeValue();
3506
TR::Node::recreate(node, TR::acall);
3507
targetRegister = directCallEvaluator(node, cg);
3508
TR::Node::recreate(node, opCode);
3509
}
3510
return targetRegister;
3511
}
3512
3513
TR::Register *
3514
J9::ARM64::TreeEvaluator::anewArrayEvaluator(TR::Node *node, TR::CodeGenerator *cg)
3515
{
3516
TR::Register *targetRegister = TR::TreeEvaluator::VMnewEvaluator(node, cg);
3517
if (!targetRegister)
3518
{
3519
// Inline array allocation wasn't generated, just generate a call to the helper.
3520
//
3521
TR::ILOpCodes opCode = node->getOpCodeValue();
3522
TR::Node::recreate(node, TR::acall);
3523
targetRegister = directCallEvaluator(node, cg);
3524
TR::Node::recreate(node, opCode);
3525
}
3526
return targetRegister;
3527
}
3528
3529
TR::Register *
3530
J9::ARM64::TreeEvaluator::monentEvaluator(TR::Node *node, TR::CodeGenerator *cg)
3531
{
3532
TR::Compilation *comp = TR::comp();
3533
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
3534
const int32_t staticLwOffset = fej9->getByteOffsetToLockword(cg->getMonClass(node));
3535
TR::InstOpCode::Mnemonic op;
3536
TR_YesNoMaybe isMonitorValueBasedOrValueType = cg->isMonitorValueBasedOrValueType(node);
3537
3538
if (comp->getOption(TR_FullSpeedDebug) ||
3539
(isMonitorValueBasedOrValueType == TR_yes) ||
3540
comp->getOption(TR_DisableInlineMonEnt))
3541
{
3542
TR::ILOpCodes opCode = node->getOpCodeValue();
3543
TR::Node::recreate(node, TR::call);
3544
TR::Register *targetRegister = directCallEvaluator(node, cg);
3545
TR::Node::recreate(node, opCode);
3546
return targetRegister;
3547
}
3548
3549
TR::Node *objNode = node->getFirstChild();
3550
TR::Register *objReg = cg->evaluate(objNode);
3551
TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();
3552
TR::Register *metaReg = cg->getMethodMetaDataRegister();
3553
3554
TR::LabelSymbol *loopLabel = generateLabelSymbol(cg);
3555
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
3556
TR::LabelSymbol *OOLLabel = generateLabelSymbol(cg);
3557
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
3558
3559
generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel);
3560
startLabel->setStartInternalControlFlow();
3561
3562
const bool isImplicitNullChkIsDoneAtLoadJ9Class = (isMonitorValueBasedOrValueType == TR_maybe) || (staticLwOffset <= 0);
3563
const bool inlineRecursive = staticLwOffset > 0;
3564
// If object is not known to be value type or value based class at compile time, check at run time
3565
if (isMonitorValueBasedOrValueType == TR_maybe)
3566
{
3567
TR::Register *temp1Reg = srm->findOrCreateScratchRegister();
3568
TR::Register *temp2Reg = srm->findOrCreateScratchRegister();
3569
3570
// InstructonDelegate::setupImplicitNullPointerException checks if the memory reference uses nullcheck reference register.
3571
// In this case, nullcheck reference register is objReg and the memory reference does use it,
3572
// so let InstructonDelegate::setupImplicitNullPointerException handle it.
3573
//
3574
// If we are generating code for MonitorCacheLookup then we will not have a separate OOL for inlineRecursive, and OOLLabel points
3575
// to the OOL Containing only helper call. Otherwise, OOL will have other code apart from helper call which we do not want to execute
3576
// for ValueType or ValueBased object and in that scenario we will need to generate another OOL that just contains helper call.
3577
generateCheckForValueMonitorEnterOrExit(node, doneLabel, inlineRecursive ? NULL : OOLLabel, objReg, temp1Reg, temp2Reg, cg, J9_CLASS_DISALLOWS_LOCKING_FLAGS);
3578
3579
srm->reclaimScratchRegister(temp1Reg);
3580
srm->reclaimScratchRegister(temp2Reg);
3581
}
3582
3583
TR::Register *addrReg = srm->findOrCreateScratchRegister();
3584
3585
// If we do not know the lockword offset at compile time, obtrain it from the class pointer of the object being locked
3586
if (staticLwOffset <= 0)
3587
{
3588
generateLockwordAddressLookup(cg, node, objReg, addrReg, metaReg, srm, OOLLabel);
3589
}
3590
else
3591
{
3592
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, addrReg, objReg, staticLwOffset); // ldxr/stxr instructions does not take immediate offset
3593
}
3594
TR::Register *dataReg = srm->findOrCreateScratchRegister();
3595
3596
TR::Instruction *faultingInstruction;
3597
static const bool disableLSE = feGetEnv("TR_aarch64DisableLSE") != NULL;
3598
if (comp->target().cpu.supportsFeature(OMR_FEATURE_ARM64_LSE) && (!disableLSE))
3599
{
3600
generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, dataReg, 0); // expected value
3601
/*
3602
* We need to generate a CASAL, not a CASA because loads/stores before monitor exit can be reordered after a CASA
3603
* as the store to lockword for monitor exit is a plain store.
3604
*/
3605
op = fej9->generateCompressedLockWord() ? TR::InstOpCode::casalw : TR::InstOpCode::casalx;
3606
/*
3607
* As Trg1MemSrc1Instruction was introduced to support ldxr/stxr instructions, target and source register convention
3608
* is somewhat confusing. Its `treg` register actually is a source register and `sreg` register is a target register.
3609
* This needs to be fixed at some point.
3610
*/
3611
faultingInstruction = generateTrg1MemSrc1Instruction(cg, op, node, dataReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), metaReg);
3612
generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzx, node, dataReg, OOLLabel);
3613
}
3614
else
3615
{
3616
TR::Register *tempReg = srm->findOrCreateScratchRegister();
3617
3618
generateLabelInstruction(cg, TR::InstOpCode::label, node, loopLabel);
3619
op = fej9->generateCompressedLockWord() ? TR::InstOpCode::ldxrw : TR::InstOpCode::ldxrx;
3620
faultingInstruction = generateTrg1MemInstruction(cg, op, node, dataReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0));
3621
3622
generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzx, node, dataReg, OOLLabel);
3623
op = fej9->generateCompressedLockWord() ? TR::InstOpCode::stxrw : TR::InstOpCode::stxrx;
3624
3625
generateTrg1MemSrc1Instruction(cg, op, node, tempReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), metaReg);
3626
generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzx, node, tempReg, loopLabel);
3627
3628
generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xB); // dmb ish (Inner Shareable full barrier)
3629
3630
srm->reclaimScratchRegister(tempReg);
3631
}
3632
3633
// InstructonDelegate::setupImplicitNullPointerException checks if the memory reference uses nullcheck reference register.
3634
// In this case, nullcheck reference register is objReg, but the memory reference does not use it,
3635
// thus we need to explicitly set implicit exception point here.
3636
if (cg->getHasResumableTrapHandler() && cg->getCurrentEvaluationTreeTop()->getNode()->getOpCode().isNullCheck() && (!isImplicitNullChkIsDoneAtLoadJ9Class))
3637
{
3638
if (cg->getImplicitExceptionPoint() == NULL)
3639
{
3640
if (comp->getOption(TR_TraceCG))
3641
{
3642
traceMsg(comp, "Instruction %p throws an implicit NPE, node: %p NPE node: %p\n", faultingInstruction, node, objNode);
3643
}
3644
cg->setImplicitExceptionPoint(faultingInstruction);
3645
}
3646
}
3647
3648
if (inlineRecursive)
3649
{
3650
/*
3651
* OOLLabel:
3652
* addimmx dataReg, dataReg, OBJECT_HEADER_LOCK_FIRST_RECURSION_BIT
3653
* andimmx tempReg, dataReg, ~(OBJECT_HEADER_LOCK_RECURSION_MASK)
3654
* cmpx metaReg, tempReg
3655
* b.ne snippetLabel
3656
* strimmx dataReg, [addrReg]
3657
* OOLEndLabel:
3658
* b doneLabel
3659
*
3660
*/
3661
// This register is only required for OOL code section
3662
// If we obtain this from scratch register manager, then one more register is used in mainline.
3663
TR::Register *tempReg = cg->allocateRegister();
3664
TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);
3665
TR::LabelSymbol *OOLEndLabel = generateLabelSymbol(cg);
3666
TR_ARM64OutOfLineCodeSection *oolSection = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(OOLLabel, doneLabel, cg);
3667
cg->getARM64OutOfLineCodeSectionList().push_front(oolSection);
3668
oolSection->swapInstructionListsWithCompilation();
3669
generateLabelInstruction(cg, TR::InstOpCode::label, node, OOLLabel);
3670
3671
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, dataReg, dataReg, OBJECT_HEADER_LOCK_FIRST_RECURSION_BIT);
3672
// OBJECT_HEADER_LOCK_RECURSION_MASK is 0xF0, immr=0x38, imms=0x3b for ~(0xF0)
3673
generateLogicalImmInstruction(cg, TR::InstOpCode::andimmx, node, tempReg, dataReg, true, 0xe3b);
3674
generateCompareInstruction(cg, node, metaReg, tempReg, true);
3675
3676
TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference(), OOLEndLabel);
3677
cg->addSnippet(snippet);
3678
TR::Instruction *gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_NE);
3679
gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);
3680
snippet->gcMap().setGCRegisterMask(0xffffffff);
3681
3682
generateMemSrc1Instruction(cg, fej9->generateCompressedLockWord() ? TR::InstOpCode::strimmw : TR::InstOpCode::strimmx,
3683
node, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), dataReg);
3684
3685
TR::RegisterDependencyConditions *ooldeps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg->trMemory());
3686
ooldeps->addPostCondition(objReg, TR::RealRegister::x0);
3687
ooldeps->addPostCondition(tempReg, TR::RealRegister::NoReg);
3688
ooldeps->addPostCondition(dataReg, TR::RealRegister::NoReg);
3689
ooldeps->addPostCondition(addrReg, TR::RealRegister::NoReg);
3690
3691
generateLabelInstruction(cg, TR::InstOpCode::label, node, OOLEndLabel, ooldeps);
3692
generateLabelInstruction(cg, TR::InstOpCode::b, node, doneLabel);
3693
3694
cg->stopUsingRegister(tempReg);
3695
// ARM64HelperCallSnippet generates "bl" instruction
3696
cg->machine()->setLinkRegisterKilled(true);
3697
oolSection->swapInstructionListsWithCompilation();
3698
}
3699
else
3700
{
3701
TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(node, TR::call, NULL, OOLLabel, doneLabel, cg);
3702
cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);
3703
}
3704
3705
TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2 + srm->numAvailableRegisters(), cg->trMemory());
3706
deps->addPostCondition(objReg, TR::RealRegister::NoReg);
3707
srm->addScratchRegistersToDependencyList(deps);
3708
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, deps);
3709
3710
doneLabel->setEndInternalControlFlow();
3711
3712
srm->stopUsingRegisters();
3713
3714
cg->decReferenceCount(objNode);
3715
cg->machine()->setLinkRegisterKilled(true);
3716
return NULL;
3717
}
3718
3719
TR::Register *
3720
J9::ARM64::TreeEvaluator::arraylengthEvaluator(TR::Node *node, TR::CodeGenerator *cg)
3721
{
3722
TR_ASSERT(cg->comp()->requiresSpineChecks(), "TR::arraylength should be lowered when hybrid arraylets are not in use");
3723
3724
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
3725
// ldrimmw R1, [B, contiguousSize]
3726
// cmpimmw R1, 0 ; If 0, must be a discontiguous array
3727
// ldrimmw R2, [B, discontiguousSize]
3728
// cselw R1, R1, R2, ne
3729
//
3730
TR::Register *objectReg = cg->evaluate(node->getFirstChild());
3731
TR::Register *lengthReg = cg->allocateRegister();
3732
TR::Register *discontiguousLengthReg = cg->allocateRegister();
3733
3734
TR::MemoryReference *contiguousArraySizeMR = TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfContiguousArraySizeField());
3735
TR::MemoryReference *discontiguousArraySizeMR = TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfDiscontiguousArraySizeField());
3736
3737
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, lengthReg, contiguousArraySizeMR);
3738
generateCompareImmInstruction(cg, node, lengthReg, 0);
3739
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, discontiguousLengthReg, discontiguousArraySizeMR);
3740
generateCondTrg1Src2Instruction(cg, TR::InstOpCode::cselw, node, lengthReg, lengthReg, discontiguousLengthReg, TR::CC_NE);
3741
3742
cg->stopUsingRegister(discontiguousLengthReg);
3743
cg->decReferenceCount(node->getFirstChild());
3744
node->setRegister(lengthReg);
3745
3746
return lengthReg;
3747
}
3748
3749
TR::Register *
3750
J9::ARM64::TreeEvaluator::ZEROCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
3751
{
3752
// NOTE: ZEROCHK is intended to be general and straightforward. If you're
3753
// thinking of adding special code for specific situations in here, consider
3754
// whether you want to add your own CHK opcode instead. If you feel the
3755
// need for special handling here, you may also want special handling in the
3756
// optimizer, in which case a separate opcode may be more suitable.
3757
//
3758
// On the other hand, if the improvements you're adding could benefit other
3759
// users of ZEROCHK, please go ahead and add them!
3760
//
3761
3762
TR::LabelSymbol *slowPathLabel = generateLabelSymbol(cg);
3763
TR::LabelSymbol *restartLabel = generateLabelSymbol(cg);
3764
slowPathLabel->setStartInternalControlFlow();
3765
restartLabel->setEndInternalControlFlow();
3766
3767
// Temporarily hide the first child so it doesn't appear in the outlined call
3768
//
3769
node->rotateChildren(node->getNumChildren()-1, 0);
3770
node->setNumChildren(node->getNumChildren()-1);
3771
3772
// Outlined instructions for check failure
3773
//
3774
TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(node, TR::call, NULL, slowPathLabel, restartLabel, cg);
3775
cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);
3776
3777
// Restore the first child
3778
//
3779
node->setNumChildren(node->getNumChildren()+1);
3780
node->rotateChildren(0, node->getNumChildren()-1);
3781
3782
// Children other than the first are only for the outlined path; we don't need them here
3783
//
3784
for (int32_t i = 1; i < node->getNumChildren(); i++)
3785
cg->recursivelyDecReferenceCount(node->getChild(i));
3786
3787
// Instructions for the check
3788
// ToDo: Optimize isBooleanCompare() case
3789
//
3790
TR::Node *valueToCheck = node->getFirstChild();
3791
TR::Register *value = cg->evaluate(valueToCheck);
3792
3793
generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, value, slowPathLabel);
3794
3795
cg->decReferenceCount(node->getFirstChild());
3796
generateLabelInstruction(cg, TR::InstOpCode::label, node, restartLabel);
3797
3798
return NULL;
3799
}
3800
3801
TR::Register *
3802
J9::ARM64::TreeEvaluator::BNDCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
3803
{
3804
TR::Node *secondChild = node->getSecondChild();
3805
TR::Node *firstChild = node->getFirstChild();
3806
TR::Register *src1Reg;
3807
TR::Register *src2Reg = NULL;
3808
uint64_t value;
3809
TR::LabelSymbol *snippetLabel;
3810
TR::Instruction *gcPoint;
3811
bool reversed = false;
3812
3813
if ((firstChild->getOpCode().isLoadConst())
3814
&& (constantIsUnsignedImm12(firstChild->get64bitIntegralValueAsUnsigned()))
3815
&& (NULL == firstChild->getRegister()))
3816
{
3817
src2Reg = cg->evaluate(secondChild);
3818
reversed = true;
3819
}
3820
else
3821
{
3822
src1Reg = cg->evaluate(firstChild);
3823
3824
// If this BNDCHK is combined with previous NULLCHK, there is
3825
// an instruction that will cause a hardware trap if the exception is to be
3826
// taken. If this method may catch the exception, a GC stack map must be
3827
// created for this instruction. All registers are valid at this GC point
3828
// TODO - if the method may not catch the exception we still need to note
3829
// that the GC point exists, since maps before this point and after it cannot
3830
// be merged.
3831
//
3832
if (cg->getHasResumableTrapHandler() && node->hasFoldedImplicitNULLCHK())
3833
{
3834
TR::Compilation *comp = cg->comp();
3835
TR::Instruction *faultingInstruction = cg->getImplicitExceptionPoint();
3836
if (comp->getOption(TR_TraceCG))
3837
{
3838
traceMsg(comp, "\nNode %p has foldedimplicitNULLCHK, and a faulting instruction of %p\n", node, faultingInstruction);
3839
}
3840
3841
if (faultingInstruction)
3842
{
3843
faultingInstruction->setNeedsGCMap(0xffffffff);
3844
cg->machine()->setLinkRegisterKilled(true);
3845
3846
TR_Debug * debugObj = cg->getDebug();
3847
if (debugObj)
3848
{
3849
debugObj->addInstructionComment(faultingInstruction, "Throws Implicit Null Pointer Exception");
3850
}
3851
}
3852
}
3853
if ((secondChild->getOpCode().isLoadConst())
3854
&& (NULL == secondChild->getRegister()))
3855
{
3856
value = secondChild->get64bitIntegralValueAsUnsigned();
3857
if (!constantIsUnsignedImm12(value))
3858
{
3859
src2Reg = cg->evaluate(secondChild);
3860
}
3861
}
3862
else
3863
src2Reg = cg->evaluate(secondChild);
3864
}
3865
3866
if (reversed)
3867
{
3868
generateCompareImmInstruction(cg, node, src2Reg, firstChild->get64bitIntegralValueAsUnsigned());
3869
}
3870
else
3871
{
3872
if (NULL == src2Reg)
3873
generateCompareImmInstruction(cg, node, src1Reg, value);
3874
else
3875
generateCompareInstruction(cg, node, src1Reg, src2Reg);
3876
}
3877
3878
snippetLabel = generateLabelSymbol(cg);
3879
TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference());
3880
cg->addSnippet(snippet);
3881
3882
gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, (reversed ? TR::CC_CS : TR::CC_LS));
3883
3884
gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);
3885
snippet->gcMap().setGCRegisterMask(0xffffffff);
3886
3887
cg->decReferenceCount(firstChild);
3888
cg->decReferenceCount(secondChild);
3889
secondChild->setIsNonNegative(true);
3890
// ARM64HelperCallSnippet generates "bl" instruction
3891
cg->machine()->setLinkRegisterKilled(true);
3892
return (NULL);
3893
}
3894
3895
/**
3896
* @brief Generate instruction sequence for array store check
3897
*
3898
* @param[in] node: node
3899
* @param[in] srcReg: register contains source object
3900
* @param[in] dstReg: register contains destination array
3901
* @param[in] srm: scratch register manager
3902
* @param[in] doneLabel: label to jump when check is successful
3903
* @param[in] helperCallLabel: label to jump when helper call is needed
3904
* @param[in] cg: code generator
3905
*/
3906
static void VMarrayStoreCHKEvaluator(TR::Node *node, TR::Register *srcReg, TR::Register *dstReg, TR_ARM64ScratchRegisterManager *srm,
3907
TR::LabelSymbol *doneLabel, TR::LabelSymbol *helperCallLabel, TR::CodeGenerator *cg)
3908
{
3909
TR::Compilation *comp = cg->comp();
3910
TR_J9VM *fej9 = reinterpret_cast<TR_J9VM *>(cg->fe());
3911
TR::Register *sourceClassReg = srm->findOrCreateScratchRegister();
3912
TR::Register *destArrayClassReg = srm->findOrCreateScratchRegister();
3913
3914
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator"), *srm);
3915
3916
generateLoadJ9Class(node, sourceClassReg, srcReg, cg);
3917
generateLoadJ9Class(node, destArrayClassReg, dstReg, cg);
3918
3919
TR::Register *destComponentClassReg = srm->findOrCreateScratchRegister();
3920
TR_Debug *debugObj = cg->getDebug();
3921
3922
auto instr = generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, destComponentClassReg,
3923
TR::MemoryReference::createWithDisplacement(cg, destArrayClassReg, offsetof(J9ArrayClass, componentType)));
3924
if (debugObj)
3925
{
3926
debugObj->addInstructionComment(instr, "load component type of the destination array");
3927
}
3928
srm->reclaimScratchRegister(destArrayClassReg);
3929
destArrayClassReg = NULL; // prevent re-using this register by error
3930
3931
generateCompareInstruction(cg, node, destComponentClassReg, sourceClassReg, true);
3932
instr = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);
3933
if (debugObj)
3934
{
3935
debugObj->addInstructionComment(instr, "done if component type of the destination array equals to source object class");
3936
}
3937
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:01ClassEqualityCheckDone"), *srm);
3938
3939
TR_OpaqueClassBlock *objectClass = fej9->getSystemClassFromClassName("java/lang/Object", 16, true);
3940
/*
3941
* objectClass is used for Object arrays check optimization: when we are storing to Object arrays we can skip all other array store checks
3942
* However, TR_J9SharedCacheVM::getSystemClassFromClassName can return 0 when it's impossible to relocate j9class later for AOT loads
3943
* in that case we don't want to generate the Object arrays check
3944
*/
3945
bool doObjectArrayCheck = objectClass != NULL;
3946
if (doObjectArrayCheck)
3947
{
3948
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:02JavaLangObjectCheck"), *srm);
3949
3950
TR::Register *javaLangObjectClassReg = srm->findOrCreateScratchRegister();
3951
if (cg->wantToPatchClassPointer(objectClass, node) || cg->needClassAndMethodPointerRelocations())
3952
{
3953
loadAddressConstantInSnippet(cg, node, reinterpret_cast<intptr_t>(objectClass), javaLangObjectClassReg, TR_ClassPointer);
3954
}
3955
else
3956
{
3957
loadAddressConstant(cg, node, reinterpret_cast<intptr_t>(objectClass), javaLangObjectClassReg);
3958
}
3959
generateCompareInstruction(cg, node, javaLangObjectClassReg, destComponentClassReg, true);
3960
instr = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);
3961
if (debugObj)
3962
{
3963
debugObj->addInstructionComment(instr, "done if component type of the destination array equals to java/lang/Object");
3964
}
3965
srm->reclaimScratchRegister(javaLangObjectClassReg);
3966
3967
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:03JavaLangObjectCheckDone"), *srm);
3968
}
3969
3970
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:04CastClassCacheCheck"), *srm);
3971
3972
TR::Register *castClassCacheReg = srm->findOrCreateScratchRegister();
3973
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, castClassCacheReg,
3974
TR::MemoryReference::createWithDisplacement(cg, sourceClassReg, offsetof(J9Class, castClassCache)));
3975
generateCompareInstruction(cg, node, castClassCacheReg, destComponentClassReg, true);
3976
instr = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);
3977
if (debugObj)
3978
{
3979
debugObj->addInstructionComment(instr, "done if component type of the destination array equals to castClassCache of source object class");
3980
}
3981
srm->reclaimScratchRegister(castClassCacheReg);
3982
castClassCacheReg = NULL; // prevent re-using this register by error
3983
3984
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:05CastClassCacheCheckDone"), *srm);
3985
3986
/*
3987
* If isInstanceOf (objectClass,ArrayComponentClass,true,true) was successful and stored during VP, we need to test again the real arrayComponentClass
3988
* Need to relocate address of arrayComponentClass under AOT compilation.
3989
* Need to add PICsite on class constant if the class can be unloaded.
3990
*/
3991
if (node->getArrayComponentClassInNode())
3992
{
3993
TR::Register *arrayComponentClassReg = srm->findOrCreateScratchRegister();
3994
TR_OpaqueClassBlock *arrayComponentClass = node->getArrayComponentClassInNode();
3995
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:06ArrayComponentClassCheck"), *srm);
3996
3997
if (cg->wantToPatchClassPointer(arrayComponentClass, node) || cg->needClassAndMethodPointerRelocations())
3998
{
3999
loadAddressConstantInSnippet(cg, node, reinterpret_cast<intptr_t>(arrayComponentClass), arrayComponentClassReg, TR_ClassPointer);
4000
}
4001
else
4002
{
4003
bool isUnloadAssumptionRequired = fej9->isUnloadAssumptionRequired(arrayComponentClass, comp->getCurrentMethod());
4004
4005
if (isUnloadAssumptionRequired)
4006
{
4007
loadAddressConstantInSnippet(cg, node, reinterpret_cast<intptr_t>(arrayComponentClass), arrayComponentClassReg, TR_NoRelocation, true);
4008
}
4009
else
4010
{
4011
loadAddressConstant(cg, node, reinterpret_cast<intptr_t>(arrayComponentClass), arrayComponentClassReg, NULL, true);
4012
}
4013
}
4014
generateCompareInstruction(cg, node, arrayComponentClassReg, destComponentClassReg, true);
4015
instr = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);
4016
4017
if (debugObj)
4018
{
4019
debugObj->addInstructionComment(instr, "done if component type of the destination array equals to arrayComponentClass set in node");
4020
}
4021
srm->reclaimScratchRegister(arrayComponentClassReg);
4022
4023
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:06ArrayComponentClassCheckDone"), *srm);
4024
}
4025
4026
genSuperClassTest(node, sourceClassReg, true, destComponentClassReg, -1, helperCallLabel, srm, cg);
4027
srm->reclaimScratchRegister(destComponentClassReg);
4028
4029
// prevent re-using these registers by error
4030
sourceClassReg = NULL;
4031
destComponentClassReg = NULL;
4032
4033
instr = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, helperCallLabel, TR::CC_NE);
4034
if (debugObj)
4035
{
4036
debugObj->addInstructionComment(instr, "Call helper if super class test fails");
4037
}
4038
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:07SuperClassTestDone"), *srm);
4039
4040
cg->machine()->setLinkRegisterKilled(true);
4041
}
4042
4043
TR::Register *
4044
J9::ARM64::TreeEvaluator::ArrayStoreCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
4045
{
4046
TR::Compilation *comp = cg->comp();
4047
TR::Node *firstChild = node->getFirstChild();
4048
TR::Node *sourceChild = firstChild->getSecondChild();
4049
TR::Node *dstNode = firstChild->getThirdChild();
4050
4051
bool usingCompressedPointers = false;
4052
if (comp->useCompressedPointers() && firstChild->getOpCode().isIndirect())
4053
{
4054
usingCompressedPointers = true;
4055
4056
while ((sourceChild->getNumChildren() > 0) && (sourceChild->getOpCodeValue() != TR::a2l))
4057
sourceChild = sourceChild->getFirstChild();
4058
if (sourceChild->getOpCodeValue() == TR::a2l)
4059
sourceChild = sourceChild->getFirstChild();
4060
}
4061
4062
TR::Register *srcReg = cg->evaluate(sourceChild);
4063
TR::Register *dstReg = cg->evaluate(dstNode);
4064
TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();
4065
4066
TR::LabelSymbol *wbLabel = generateLabelSymbol(cg);
4067
TR::LabelSymbol *OOLMergeLabel = generateLabelSymbol(cg);
4068
TR_Debug * debugObj = cg->getDebug();
4069
4070
if (!sourceChild->isNull())
4071
{
4072
static const bool disableArrayStoreCHKOpts = comp->getOption(TR_DisableArrayStoreCheckOpts);
4073
TR_J9VM *fej9 = reinterpret_cast<TR_J9VM *>(cg->fe());
4074
TR::LabelSymbol *helperCallLabel = generateLabelSymbol(cg);
4075
// Since ArrayStoreCHK doesn't have the shape of the corresponding helper call we have to create this tree
4076
// so we can have it evaluated out of line
4077
TR::Node *helperCallNode = TR::Node::createWithSymRef(node, TR::call, 2, node->getSymbolReference());
4078
helperCallNode->setAndIncChild(0, sourceChild);
4079
helperCallNode->setAndIncChild(1, dstNode);
4080
if (comp->getOption(TR_TraceCG))
4081
{
4082
traceMsg(comp, "%s: Creating and evaluating the following tree to generate the necessary helper call for this node\n", node->getOpCode().getName());
4083
cg->getDebug()->print(comp->getOutFile(), helperCallNode);
4084
}
4085
4086
bool nopASC = node->getArrayStoreClassInNode() && comp->performVirtualGuardNOPing() &&
4087
(!fej9->classHasBeenExtended(node->getArrayStoreClassInNode())) && (!disableArrayStoreCHKOpts);
4088
if (nopASC)
4089
{
4090
// Speculatively NOP the array store check if VP is able to prove that the ASC
4091
// would always succeed given the current state of the class hierarchy.
4092
//
4093
TR_VirtualGuard *virtualGuard = TR_VirtualGuard::createArrayStoreCheckGuard(comp, node, node->getArrayStoreClassInNode());
4094
TR::Instruction *vgnopInstr = generateVirtualGuardNOPInstruction(cg, node, virtualGuard->addNOPSite(), NULL, helperCallLabel);
4095
}
4096
else
4097
{
4098
// If source is null, we can skip array store check.
4099
auto cbzInstruction = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, srcReg, wbLabel);
4100
if (debugObj)
4101
{
4102
debugObj->addInstructionComment(cbzInstruction, "jump past array store check");
4103
}
4104
if (!disableArrayStoreCHKOpts)
4105
{
4106
VMarrayStoreCHKEvaluator(node, srcReg, dstReg, srm, wbLabel, helperCallLabel, cg);
4107
}
4108
else
4109
{
4110
generateLabelInstruction(cg, TR::InstOpCode::b, node, helperCallLabel);
4111
}
4112
}
4113
4114
TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(helperCallNode, TR::call, NULL, helperCallLabel, OOLMergeLabel, cg);
4115
cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);
4116
cg->decReferenceCount(helperCallNode->getFirstChild());
4117
cg->decReferenceCount(helperCallNode->getSecondChild());
4118
}
4119
TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2 + srm->numAvailableRegisters(), cg->trMemory());
4120
srm->addScratchRegistersToDependencyList(deps);
4121
4122
deps->addPostCondition(srcReg, TR::RealRegister::NoReg);
4123
deps->addPostCondition(dstReg, TR::RealRegister::NoReg);
4124
auto instr = generateLabelInstruction(cg, TR::InstOpCode::label, node, wbLabel);
4125
if (debugObj)
4126
{
4127
debugObj->addInstructionComment(instr, "ArrayStoreCHK Done");
4128
}
4129
instr = generateLabelInstruction(cg, TR::InstOpCode::label, node, OOLMergeLabel, deps);
4130
if (debugObj)
4131
{
4132
debugObj->addInstructionComment(instr, "OOL merge point");
4133
}
4134
4135
srm->stopUsingRegisters();
4136
4137
cg->evaluate(firstChild);
4138
4139
cg->decReferenceCount(firstChild);
4140
4141
return NULL;
4142
}
4143
4144
static TR::Register *
4145
VMarrayCheckEvaluator(TR::Node *node, TR::CodeGenerator *cg)
4146
{
4147
TR::Register *obj1Reg = cg->evaluate(node->getFirstChild());
4148
TR::Register *obj2Reg = cg->evaluate(node->getSecondChild());
4149
TR::Register *tmp1Reg = cg->allocateRegister();
4150
TR::Register *tmp2Reg = cg->allocateRegister();
4151
4152
TR::Instruction *gcPoint;
4153
TR::Snippet *snippet;
4154
TR::RegisterDependencyConditions *conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(4, 4, cg->trMemory());;
4155
4156
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
4157
4158
TR::addDependency(conditions, obj1Reg, TR::RealRegister::NoReg, TR_GPR, cg);
4159
TR::addDependency(conditions, obj2Reg, TR::RealRegister::NoReg, TR_GPR, cg);
4160
TR::addDependency(conditions, tmp1Reg, TR::RealRegister::NoReg, TR_GPR, cg);
4161
TR::addDependency(conditions, tmp2Reg, TR::RealRegister::NoReg, TR_GPR, cg);
4162
4163
// We have a unique snippet sharing arrangement in this code sequence.
4164
// It is not generally applicable for other situations.
4165
TR::LabelSymbol *snippetLabel = NULL;
4166
4167
// Same array, we are done.
4168
//
4169
generateCompareInstruction(cg, node, obj1Reg, obj2Reg, true);
4170
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);
4171
4172
// If we know nothing about either object, test object1 first. It has to be an array.
4173
//
4174
if (!node->isArrayChkPrimitiveArray1() && !node->isArrayChkReferenceArray1() && !node->isArrayChkPrimitiveArray2() && !node->isArrayChkReferenceArray2())
4175
{
4176
generateLoadJ9Class(node, tmp1Reg, obj1Reg, cg);
4177
4178
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, tmp1Reg, TR::MemoryReference::createWithDisplacement(cg, tmp1Reg, offsetof(J9Class, classDepthAndFlags)));
4179
4180
loadConstant32(cg, node, (int32_t) J9AccClassRAMArray, tmp2Reg);
4181
generateTrg1Src2Instruction(cg, TR::InstOpCode::andx, node, tmp2Reg, tmp1Reg, tmp2Reg);
4182
4183
snippetLabel = generateLabelSymbol(cg);
4184
gcPoint = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzw, node, tmp2Reg, snippetLabel);
4185
4186
snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference(), doneLabel);
4187
cg->addSnippet(snippet);
4188
}
4189
4190
// One of the object is array. Test equality of two objects' classes.
4191
//
4192
generateLoadJ9Class(node, tmp2Reg, obj2Reg, cg);
4193
generateLoadJ9Class(node, tmp1Reg, obj1Reg, cg);
4194
4195
generateCompareInstruction(cg, node, tmp1Reg, tmp2Reg, true);
4196
4197
// If either object is known to be of primitive component type,
4198
// we are done: since both of them have to be of equal class.
4199
if (node->isArrayChkPrimitiveArray1() || node->isArrayChkPrimitiveArray2())
4200
{
4201
if (snippetLabel == NULL)
4202
{
4203
snippetLabel = generateLabelSymbol(cg);
4204
gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_NE);
4205
snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference(), doneLabel);
4206
cg->addSnippet(snippet);
4207
}
4208
else
4209
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_NE);
4210
}
4211
else
4212
{
4213
// We have to take care of the un-equal class situation: both of them must be of reference array
4214
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);
4215
4216
// Object1 must be of reference component type, otherwise throw exception
4217
if (!node->isArrayChkReferenceArray1())
4218
{
4219
// Loading the Class Pointer -> classDepthAndFlags
4220
generateLoadJ9Class(node, tmp1Reg, obj1Reg, cg);
4221
4222
generateTrg1MemInstruction(cg,TR::InstOpCode::ldrimmw, node, tmp1Reg, TR::MemoryReference::createWithDisplacement(cg, tmp1Reg, offsetof(J9Class, classDepthAndFlags)));
4223
4224
// We already have classDepth&Flags in tmp1Reg. X = (ramclass->ClassDepthAndFlags)>>J9AccClassRAMShapeShift
4225
generateLogicalShiftRightImmInstruction(cg, node, tmp1Reg, tmp1Reg, J9AccClassRAMShapeShift);
4226
4227
// We need to perform a X & OBJECT_HEADER_SHAPE_MASK
4228
4229
loadConstant32(cg, node, OBJECT_HEADER_SHAPE_MASK, tmp2Reg);
4230
generateTrg1Src2Instruction(cg, TR::InstOpCode::andx, node, tmp2Reg, tmp1Reg, tmp2Reg);
4231
generateCompareImmInstruction(cg, node, tmp2Reg, OBJECT_HEADER_SHAPE_POINTERS);
4232
4233
if (snippetLabel == NULL)
4234
{
4235
snippetLabel = generateLabelSymbol(cg);
4236
gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_NE);
4237
snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference(), doneLabel);
4238
cg->addSnippet(snippet);
4239
}
4240
else
4241
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_NE);
4242
}
4243
4244
// Object2 must be of reference component type array, otherwise throw exception
4245
if (!node->isArrayChkReferenceArray2())
4246
{
4247
generateLoadJ9Class(node, tmp1Reg, obj2Reg, cg);
4248
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, tmp1Reg, TR::MemoryReference::createWithDisplacement(cg, tmp1Reg, offsetof(J9Class, classDepthAndFlags)));
4249
4250
loadConstant32(cg, node, (int32_t) J9AccClassRAMArray, tmp2Reg);
4251
generateTrg1Src2Instruction(cg, TR::InstOpCode::andx, node, tmp2Reg, tmp1Reg, tmp2Reg);
4252
4253
if (snippetLabel == NULL)
4254
{
4255
snippetLabel = generateLabelSymbol(cg);
4256
gcPoint = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, tmp2Reg, snippetLabel);
4257
snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference(), doneLabel);
4258
cg->addSnippet(snippet);
4259
}
4260
else
4261
generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, tmp2Reg, snippetLabel);
4262
4263
// We already have classDepth&Flags in tmp1Reg. X = (ramclass->ClassDepthAndFlags)>>J9AccClassRAMShapeShift
4264
generateLogicalShiftRightImmInstruction(cg, node, tmp1Reg, tmp1Reg, J9AccClassRAMShapeShift);
4265
4266
// We need to perform a X & OBJECT_HEADER_SHAPE_MASK
4267
4268
loadConstant32(cg, node, OBJECT_HEADER_SHAPE_MASK, tmp2Reg);
4269
generateTrg1Src2Instruction(cg, TR::InstOpCode::andx, node, tmp2Reg, tmp1Reg, tmp2Reg);
4270
generateCompareImmInstruction(cg, node, tmp2Reg, OBJECT_HEADER_SHAPE_POINTERS);
4271
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_NE);
4272
}
4273
}
4274
4275
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions);
4276
if (snippetLabel != NULL)
4277
{
4278
gcPoint->ARM64NeedsGCMap(cg, 0x0);
4279
snippet->gcMap().setGCRegisterMask(0x0);
4280
}
4281
4282
cg->stopUsingRegister(tmp1Reg);
4283
cg->stopUsingRegister(tmp2Reg);
4284
4285
cg->decReferenceCount(node->getFirstChild());
4286
cg->decReferenceCount(node->getSecondChild());
4287
return NULL;
4288
}
4289
4290
TR::Register *
4291
J9::ARM64::TreeEvaluator::ArrayCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
4292
{
4293
return VMarrayCheckEvaluator(node, cg);
4294
}
4295
4296
void
4297
J9::ARM64::TreeEvaluator::genWrtbarForArrayCopy(TR::Node *node, TR::Register *srcObjReg, TR::Register *dstObjReg, TR::CodeGenerator *cg)
4298
{
4299
TR::Compilation *comp = cg->comp();
4300
bool ageCheckIsNeeded;
4301
bool cardMarkIsNeeded;
4302
auto gcMode = TR::Compiler->om.writeBarrierType();
4303
4304
ageCheckIsNeeded = (gcMode == gc_modron_wrtbar_oldcheck ||
4305
gcMode == gc_modron_wrtbar_cardmark_and_oldcheck ||
4306
gcMode == gc_modron_wrtbar_always);
4307
cardMarkIsNeeded = (gcMode == gc_modron_wrtbar_cardmark ||
4308
gcMode == gc_modron_wrtbar_cardmark_incremental);
4309
4310
if (!ageCheckIsNeeded && !cardMarkIsNeeded)
4311
return;
4312
4313
if (ageCheckIsNeeded)
4314
{
4315
TR::Register *tmp1Reg = NULL;
4316
TR::Register *tmp2Reg = NULL;
4317
TR::RegisterDependencyConditions *deps;
4318
TR::Instruction *gcPoint;
4319
TR::LabelSymbol *doneLabel;
4320
4321
if (gcMode != gc_modron_wrtbar_always)
4322
{
4323
tmp1Reg = cg->allocateRegister();
4324
tmp2Reg = cg->allocateRegister();
4325
deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(3, 3, cg->trMemory());
4326
TR::addDependency(deps, tmp1Reg, TR::RealRegister::NoReg, TR_GPR, cg);
4327
TR::addDependency(deps, tmp2Reg, TR::RealRegister::NoReg, TR_GPR, cg);
4328
}
4329
else
4330
{
4331
deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(1, 1, cg->trMemory());
4332
}
4333
4334
TR::addDependency(deps, dstObjReg, TR::RealRegister::x0, TR_GPR, cg);
4335
4336
TR::SymbolReference *wbRef = comp->getSymRefTab()->findOrCreateWriteBarrierBatchStoreSymbolRef(comp->getMethodSymbol());
4337
4338
if (gcMode != gc_modron_wrtbar_always)
4339
{
4340
doneLabel = generateLabelSymbol(cg);
4341
4342
TR::Register *metaReg = cg->getMethodMetaDataRegister();
4343
4344
// tmp1Reg = dstObjReg - heapBaseForBarrierRange0
4345
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tmp1Reg,
4346
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapBaseForBarrierRange0)));
4347
generateTrg1Src2Instruction(cg, TR::InstOpCode::subx, node, tmp1Reg, dstObjReg, tmp1Reg);
4348
4349
// if (tmp1Reg >= heapSizeForBarrierRange0), object not in the tenured area
4350
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tmp2Reg,
4351
TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapSizeForBarrierRange0)));
4352
generateCompareInstruction(cg, node, tmp1Reg, tmp2Reg, true);
4353
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_CS); // greater or equal (unsigned)
4354
}
4355
4356
gcPoint = generateImmSymInstruction(cg, TR::InstOpCode::bl, node, reinterpret_cast<uintptr_t>(wbRef->getSymbol()->castToMethodSymbol()->getMethodAddress()),
4357
new (cg->trHeapMemory()) TR::RegisterDependencyConditions((uint8_t) 0, 0, cg->trMemory()), wbRef, NULL);
4358
cg->machine()->setLinkRegisterKilled(true);
4359
4360
if (gcMode != gc_modron_wrtbar_always)
4361
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, deps);
4362
4363
gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);
4364
4365
if (tmp1Reg)
4366
cg->stopUsingRegister(tmp1Reg);
4367
if (tmp2Reg)
4368
cg->stopUsingRegister(tmp2Reg);
4369
}
4370
4371
if (!ageCheckIsNeeded && cardMarkIsNeeded)
4372
{
4373
if (!comp->getOptions()->realTimeGC())
4374
{
4375
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
4376
4377
TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();
4378
4379
TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(1, 1, cg->trMemory());
4380
TR::addDependency(deps, dstObjReg, TR::RealRegister::NoReg, TR_GPR, cg);
4381
srm->addScratchRegistersToDependencyList(deps);
4382
VMCardCheckEvaluator(node, dstObjReg, srm, doneLabel, cg);
4383
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, deps);
4384
srm->stopUsingRegisters();
4385
}
4386
else
4387
{
4388
TR_ASSERT(0, "genWrtbarForArrayCopy card marking not supported for RT");
4389
}
4390
}
4391
}
4392
4393
TR::Register *
4394
J9::ARM64::TreeEvaluator::arraycopyEvaluator(TR::Node *node, TR::CodeGenerator *cg)
4395
{
4396
#ifdef OMR_GC_CONCURRENT_SCAVENGER
4397
/*
4398
* This version of arraycopyEvaluator is designed to handle the special case where read barriers are
4399
* needed for field loads. At the time of writing, read barriers are used for Concurrent Scavenge GC.
4400
* If there are no read barriers then the original implementation of arraycopyEvaluator can be used.
4401
*/
4402
if (TR::Compiler->om.readBarrierType() == gc_modron_readbar_none ||
4403
!node->chkNoArrayStoreCheckArrayCopy() ||
4404
!node->isReferenceArrayCopy())
4405
{
4406
return OMR::TreeEvaluatorConnector::arraycopyEvaluator(node, cg);
4407
}
4408
4409
TR::Compilation *comp = cg->comp();
4410
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
4411
4412
// child 0 ------ Source array object
4413
// child 1 ------ Destination array object
4414
// child 2 ------ Source byte address
4415
// child 3 ------ Destination byte address
4416
// child 4 ------ Copy length in bytes
4417
TR::Node *srcObjNode = node->getFirstChild();
4418
TR::Node *dstObjNode = node->getSecondChild();
4419
TR::Node *srcAddrNode = node->getChild(2);
4420
TR::Node *dstAddrNode = node->getChild(3);
4421
TR::Node *lengthNode = node->getChild(4);
4422
TR::Register *srcObjReg, *dstObjReg, *srcAddrReg, *dstAddrReg, *lengthReg;
4423
bool stopUsingCopyReg1, stopUsingCopyReg2, stopUsingCopyReg3, stopUsingCopyReg4, stopUsingCopyReg5 = false;
4424
4425
stopUsingCopyReg1 = stopUsingCopyReg(srcObjNode, srcObjReg, cg);
4426
stopUsingCopyReg2 = stopUsingCopyReg(dstObjNode, dstObjReg, cg);
4427
stopUsingCopyReg3 = stopUsingCopyReg(srcAddrNode, srcAddrReg, cg);
4428
stopUsingCopyReg4 = stopUsingCopyReg(dstAddrNode, dstAddrReg, cg);
4429
4430
lengthReg = cg->evaluate(lengthNode);
4431
if (!cg->canClobberNodesRegister(lengthNode))
4432
{
4433
TR::Register *lenCopyReg = cg->allocateRegister();
4434
generateMovInstruction(cg, lengthNode, lenCopyReg, lengthReg);
4435
lengthReg = lenCopyReg;
4436
stopUsingCopyReg5 = true;
4437
}
4438
4439
TR::Register *metaReg = cg->getMethodMetaDataRegister();
4440
TR::Register *x0Reg = cg->allocateRegister();
4441
TR::Register *tmp1Reg = cg->allocateRegister();
4442
TR::Register *tmp2Reg = cg->allocateRegister();
4443
TR::Register *tmp3Reg = cg->allocateRegister();
4444
4445
TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(17, 17, cg->trMemory());
4446
4447
TR::addDependency(deps, x0Reg, TR::RealRegister::x0, TR_GPR, cg); // copy of metaReg
4448
TR::addDependency(deps, tmp1Reg, TR::RealRegister::x1, TR_GPR, cg); // copy of srcObjReg
4449
TR::addDependency(deps, tmp2Reg, TR::RealRegister::x2, TR_GPR, cg); // copy of dstObjReg
4450
TR::addDependency(deps, srcAddrReg, TR::RealRegister::x3, TR_GPR, cg);
4451
TR::addDependency(deps, dstAddrReg, TR::RealRegister::x4, TR_GPR, cg);
4452
TR::addDependency(deps, lengthReg, TR::RealRegister::x5, TR_GPR, cg);
4453
TR::addDependency(deps, tmp3Reg, TR::RealRegister::x6, TR_GPR, cg); // this is not an argument
4454
for (int32_t i = (int32_t)TR::RealRegister::x7; i <= (int32_t)TR::RealRegister::x15; i++)
4455
{
4456
TR::addDependency(deps, NULL, (TR::RealRegister::RegNum)i, TR_GPR, cg);
4457
}
4458
// x16 and x17 are reserved registers
4459
TR::addDependency(deps, NULL, TR::RealRegister::x18, TR_GPR, cg);
4460
4461
generateMovInstruction(cg, node, x0Reg, metaReg);
4462
generateMovInstruction(cg, node, tmp1Reg, srcObjReg);
4463
generateMovInstruction(cg, node, tmp2Reg, dstObjReg);
4464
4465
// The C routine expects length measured by slots
4466
int32_t elementSize = comp->useCompressedPointers() ?
4467
TR::Compiler->om.sizeofReferenceField() : TR::Compiler->om.sizeofReferenceAddress();
4468
generateLogicalShiftRightImmInstruction(cg, node, lengthReg, lengthReg, trailingZeroes(elementSize));
4469
4470
intptr_t *funcdescrptr = (intptr_t *)fej9->getReferenceArrayCopyHelperAddress();
4471
loadAddressConstant(cg, node, (intptr_t)funcdescrptr, tmp3Reg, NULL, false, TR_ArrayCopyHelper);
4472
4473
// call the C routine
4474
TR::Instruction *gcPoint = generateRegBranchInstruction(cg, TR::InstOpCode::blr, node, tmp3Reg, deps);
4475
gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);
4476
4477
TR::TreeEvaluator::genWrtbarForArrayCopy(node, srcObjReg, dstObjReg, cg);
4478
4479
// ARM64HelperCallSnippet generates "bl" instruction
4480
cg->machine()->setLinkRegisterKilled(true);
4481
4482
cg->decReferenceCount(srcObjNode);
4483
cg->decReferenceCount(dstObjNode);
4484
cg->decReferenceCount(srcAddrNode);
4485
cg->decReferenceCount(dstAddrNode);
4486
cg->decReferenceCount(lengthNode);
4487
4488
if (stopUsingCopyReg1)
4489
cg->stopUsingRegister(srcObjReg);
4490
if (stopUsingCopyReg2)
4491
cg->stopUsingRegister(dstObjReg);
4492
4493
TR::Register *retRegisters[3];
4494
int retRegCount = 0;
4495
if (!stopUsingCopyReg3)
4496
retRegisters[retRegCount++] = srcAddrReg;
4497
if (!stopUsingCopyReg4)
4498
retRegisters[retRegCount++] = dstAddrReg;
4499
if (!stopUsingCopyReg5)
4500
retRegisters[retRegCount++] = lengthReg;
4501
4502
deps->stopUsingDepRegs(cg, retRegCount, retRegisters);
4503
4504
return NULL;
4505
#else /* OMR_GC_CONCURRENT_SCAVENGER */
4506
return OMR::TreeEvaluatorConnector::arraycopyEvaluator(node, cg);
4507
#endif /* OMR_GC_CONCURRENT_SCAVENGER */
4508
}
4509
4510
void
4511
J9::ARM64::TreeEvaluator::genArrayCopyWithArrayStoreCHK(TR::Node *node, TR::CodeGenerator *cg)
4512
{
4513
TR::Compilation *comp = cg->comp();
4514
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
4515
4516
// child 0 ------ Source array object
4517
// child 1 ------ Destination array object
4518
// child 2 ------ Source byte address
4519
// child 3 ------ Destination byte address
4520
// child 4 ------ Copy length in bytes
4521
TR::Node *srcObjNode = node->getFirstChild();
4522
TR::Node *dstObjNode = node->getSecondChild();
4523
TR::Node *srcAddrNode = node->getChild(2);
4524
TR::Node *dstAddrNode = node->getChild(3);
4525
TR::Node *lengthNode = node->getChild(4);
4526
TR::Register *srcObjReg, *dstObjReg, *srcAddrReg, *dstAddrReg, *lengthReg;
4527
bool stopUsingCopyReg1, stopUsingCopyReg2, stopUsingCopyReg3, stopUsingCopyReg4, stopUsingCopyReg5 = false;
4528
4529
stopUsingCopyReg1 = stopUsingCopyReg(srcObjNode, srcObjReg, cg);
4530
stopUsingCopyReg2 = stopUsingCopyReg(dstObjNode, dstObjReg, cg);
4531
stopUsingCopyReg3 = stopUsingCopyReg(srcAddrNode, srcAddrReg, cg);
4532
stopUsingCopyReg4 = stopUsingCopyReg(dstAddrNode, dstAddrReg, cg);
4533
4534
lengthReg = cg->evaluate(lengthNode);
4535
if (!cg->canClobberNodesRegister(lengthNode))
4536
{
4537
TR::Register *lenCopyReg = cg->allocateRegister();
4538
generateMovInstruction(cg, lengthNode, lenCopyReg, lengthReg);
4539
lengthReg = lenCopyReg;
4540
stopUsingCopyReg5 = true;
4541
}
4542
4543
// the C routine expects length measured by slots
4544
int32_t elementSize = comp->useCompressedPointers() ?
4545
TR::Compiler->om.sizeofReferenceField() : TR::Compiler->om.sizeofReferenceAddress();
4546
generateLogicalShiftRightImmInstruction(cg, node, lengthReg, lengthReg, trailingZeroes(elementSize), true);
4547
4548
// pass vmThread as the first parameter
4549
TR::Register *x0Reg = cg->allocateRegister();
4550
TR::Register *metaReg = cg->getMethodMetaDataRegister();
4551
generateMovInstruction(cg, node, x0Reg, metaReg);
4552
4553
TR::Register *tmpReg = cg->allocateRegister();
4554
4555
// I_32 referenceArrayCopy(J9VMThread *vmThread,
4556
// J9IndexableObjectContiguous *srcObject,
4557
// J9IndexableObjectContiguous *destObject,
4558
// U_8 *srcAddress,
4559
// U_8 *destAddress,
4560
// I_32 lengthInSlots)
4561
TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(17, 17, cg->trMemory());
4562
TR::addDependency(deps, x0Reg, TR::RealRegister::x0, TR_GPR, cg);
4563
TR::addDependency(deps, srcObjReg, TR::RealRegister::x1, TR_GPR, cg);
4564
TR::addDependency(deps, dstObjReg, TR::RealRegister::x2, TR_GPR, cg);
4565
TR::addDependency(deps, srcAddrReg, TR::RealRegister::x3, TR_GPR, cg);
4566
TR::addDependency(deps, dstAddrReg, TR::RealRegister::x4, TR_GPR, cg);
4567
TR::addDependency(deps, lengthReg, TR::RealRegister::x5, TR_GPR, cg);
4568
TR::addDependency(deps, tmpReg, TR::RealRegister::x6, TR_GPR, cg); // this is not an argument
4569
for (int32_t i = (int32_t)TR::RealRegister::x7; i <= (int32_t)TR::RealRegister::x15; i++)
4570
{
4571
TR::addDependency(deps, NULL, (TR::RealRegister::RegNum)i, TR_GPR, cg);
4572
}
4573
// x16 and x17 are reserved registers
4574
TR::addDependency(deps, NULL, TR::RealRegister::x18, TR_GPR, cg);
4575
4576
intptr_t *funcdescrptr = (intptr_t *)fej9->getReferenceArrayCopyHelperAddress();
4577
loadAddressConstant(cg, node, (intptr_t)funcdescrptr, tmpReg, NULL, false, TR_ArrayCopyHelper);
4578
4579
// call the C routine
4580
TR::Instruction *gcPoint = generateRegBranchInstruction(cg, TR::InstOpCode::blr, node, tmpReg, deps);
4581
gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);
4582
// check return value (-1 on success)
4583
generateCompareImmInstruction(cg, node, x0Reg, -1); // 32-bit compare
4584
// throw exception if needed
4585
TR::SymbolReference *throwSymRef = comp->getSymRefTab()->findOrCreateArrayStoreExceptionSymbolRef(comp->getJittedMethodSymbol());
4586
TR::LabelSymbol *exceptionSnippetLabel = cg->lookUpSnippet(TR::Snippet::IsHelperCall, throwSymRef);
4587
if (exceptionSnippetLabel == NULL)
4588
{
4589
exceptionSnippetLabel = generateLabelSymbol(cg);
4590
TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, exceptionSnippetLabel, throwSymRef);
4591
cg->addSnippet(snippet);
4592
snippet->gcMap().setGCRegisterMask(0xFFFFFFFF);
4593
}
4594
4595
gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, exceptionSnippetLabel, TR::CC_NE);
4596
gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);
4597
4598
// ARM64HelperCallSnippet generates "bl" instruction
4599
cg->machine()->setLinkRegisterKilled(true);
4600
4601
TR::Register *retRegisters[5];
4602
int retRegCount = 0;
4603
if (!stopUsingCopyReg1)
4604
retRegisters[retRegCount++] = srcObjReg;
4605
if (!stopUsingCopyReg2)
4606
retRegisters[retRegCount++] = dstObjReg;
4607
if (!stopUsingCopyReg3)
4608
retRegisters[retRegCount++] = srcAddrReg;
4609
if (!stopUsingCopyReg4)
4610
retRegisters[retRegCount++] = dstAddrReg;
4611
if (!stopUsingCopyReg5)
4612
retRegisters[retRegCount++] = lengthReg;
4613
4614
deps->stopUsingDepRegs(cg, retRegCount, retRegisters);
4615
4616
cg->decReferenceCount(srcObjNode);
4617
cg->decReferenceCount(dstObjNode);
4618
cg->decReferenceCount(srcAddrNode);
4619
cg->decReferenceCount(dstAddrNode);
4620
cg->decReferenceCount(lengthNode);
4621
}
4622
4623
static TR::Register *
4624
genCAS(TR::Node *node, TR::CodeGenerator *cg, TR_ARM64ScratchRegisterManager *srm, TR::Register *objReg, TR::Register *offsetReg, intptr_t offset, bool offsetInReg, TR::Register *oldVReg, TR::Register *newVReg,
4625
TR::LabelSymbol *doneLabel, int32_t oldValue, bool oldValueInReg, bool is64bit, bool casWithoutSync = false)
4626
{
4627
TR::Compilation * comp = cg->comp();
4628
TR::Register *addrReg = srm->findOrCreateScratchRegister();
4629
TR::Register *resultReg = cg->allocateRegister();
4630
TR::InstOpCode::Mnemonic op;
4631
4632
4633
if (offsetInReg)
4634
{
4635
generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, addrReg, objReg, offsetReg); // ldxr/stxr instructions does not take offset
4636
}
4637
else
4638
{
4639
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, addrReg, objReg, offset); // ldxr/stxr instructions does not take offset
4640
}
4641
4642
const bool createDoneLabel = (doneLabel == NULL);
4643
4644
static const bool disableLSE = feGetEnv("TR_aarch64DisableLSE") != NULL;
4645
if (comp->target().cpu.supportsFeature(OMR_FEATURE_ARM64_LSE) && (!disableLSE))
4646
{
4647
TR_ASSERT_FATAL(oldValueInReg, "Expecting oldValue is in register if LSE is enabled");
4648
/*
4649
* movx resultReg, oldVReg
4650
* casal resultReg, newVReg, [addrReg]
4651
* cmp resultReg, oldReg
4652
* cset resultReg, eq
4653
*/
4654
generateMovInstruction(cg, node, resultReg, oldVReg, is64bit);
4655
op = casWithoutSync ? (is64bit ? TR::InstOpCode::casx : TR::InstOpCode::casw) : (is64bit ? TR::InstOpCode::casalx : TR::InstOpCode::casalw);
4656
generateTrg1MemSrc1Instruction(cg, op, node, resultReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), newVReg);
4657
generateCompareInstruction(cg, node, resultReg, oldVReg, is64bit);
4658
generateCSetInstruction(cg, node, resultReg, TR::CC_EQ);
4659
if (!createDoneLabel)
4660
{
4661
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_NE);
4662
}
4663
}
4664
else
4665
{
4666
TR::LabelSymbol *loopLabel = generateLabelSymbol(cg);
4667
generateLabelInstruction(cg, TR::InstOpCode::label, node, loopLabel);
4668
if (createDoneLabel)
4669
{
4670
doneLabel = generateLabelSymbol(cg);
4671
}
4672
/*
4673
* Generating the same instruction sequence as __sync_bool_compare_and_swap
4674
*
4675
* loop:
4676
* ldxrx resultReg, [addrReg]
4677
* cmpx resultReg, oldVReg
4678
* bne done
4679
* stlxrx resultReg, newVReg, [addrReg]
4680
* cbnz resultReg, loop
4681
* dmb ish
4682
* done:
4683
* cset resultReg, eq
4684
*
4685
*/
4686
op = is64bit ? TR::InstOpCode::ldxrx : TR::InstOpCode::ldxrw;
4687
generateTrg1MemInstruction(cg, op, node, resultReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0));
4688
if (oldValueInReg)
4689
generateCompareInstruction(cg, node, resultReg, oldVReg, is64bit);
4690
else
4691
generateCompareImmInstruction(cg, node, resultReg, oldValue, is64bit);
4692
if (!createDoneLabel)
4693
generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, resultReg, 0); // failure
4694
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_NE);
4695
4696
if (casWithoutSync)
4697
{
4698
op = is64bit ? TR::InstOpCode::stxrx : TR::InstOpCode::stxrw;
4699
}
4700
else
4701
{
4702
op = is64bit ? TR::InstOpCode::stlxrx : TR::InstOpCode::stlxrw;
4703
}
4704
generateTrg1MemSrc1Instruction(cg, op, node, resultReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), newVReg);
4705
generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzx, node, resultReg, loopLabel);
4706
4707
if (!casWithoutSync)
4708
generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xB); // dmb ish (Inner Shareable full barrier)
4709
4710
if (createDoneLabel)
4711
{
4712
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel);
4713
generateCSetInstruction(cg, node, resultReg, TR::CC_EQ);
4714
}
4715
else
4716
{
4717
generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, resultReg, 1); // success
4718
}
4719
}
4720
srm->reclaimScratchRegister(addrReg);
4721
4722
node->setRegister(resultReg);
4723
return resultReg;
4724
}
4725
4726
static TR::Register *
4727
VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *cg, bool isLong)
4728
{
4729
TR::Compilation * comp = cg->comp();
4730
TR::Node *firstChild = node->getFirstChild();
4731
TR::Node *secondChild = node->getSecondChild();
4732
TR::Node *thirdChild = node->getChild(2);
4733
TR::Node *fourthChild = node->getChild(3);
4734
TR::Node *fifthChild = node->getChild(4);
4735
TR::Register *offsetReg = NULL;
4736
TR::Register *oldVReg = NULL;
4737
TR::Register *newVReg = NULL;
4738
TR::Register *resultReg = NULL;
4739
TR::Register *objReg = cg->evaluate(secondChild);
4740
TR::RegisterDependencyConditions *conditions = NULL;
4741
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
4742
intptr_t oldValue = 0;
4743
bool oldValueInReg = true;
4744
intptr_t offset;
4745
bool offsetInReg = true;
4746
4747
if (thirdChild->getOpCode().isLoadConst() && thirdChild->getRegister() == NULL)
4748
{
4749
offset = (thirdChild->getOpCodeValue() == TR::iconst) ? thirdChild->getInt() : thirdChild->getLongInt();
4750
offsetInReg = !constantIsUnsignedImm12(offset);
4751
}
4752
if (offsetInReg)
4753
offsetReg = cg->evaluate(thirdChild);
4754
4755
static const bool disableLSE = feGetEnv("TR_aarch64DisableLSE") != NULL;
4756
static const bool useLSE = comp->target().cpu.supportsFeature(OMR_FEATURE_ARM64_LSE) && (!disableLSE);
4757
// Obtain values to be checked for, and swapped in:
4758
if ((!useLSE) && fourthChild->getOpCode().isLoadConst() && fourthChild->getRegister() == NULL)
4759
{
4760
if (isLong)
4761
oldValue = fourthChild->getLongInt();
4762
else
4763
oldValue = fourthChild->getInt();
4764
if (constantIsUnsignedImm12(oldValue))
4765
oldValueInReg = false;
4766
}
4767
if (oldValueInReg)
4768
oldVReg = cg->evaluate(fourthChild);
4769
newVReg = cg->evaluate(fifthChild);
4770
4771
// Determine if synchronization needed:
4772
bool casWithoutSync = false;
4773
TR_OpaqueMethodBlock *caller = node->getOwningMethod();
4774
if (caller)
4775
{
4776
TR_J9VMBase *fej9 = (TR_J9VMBase *) (cg->fe());
4777
TR_ResolvedMethod *m = fej9->createResolvedMethod(cg->trMemory(), caller, node->getSymbolReference()->getOwningMethod(comp));
4778
if ((m->getRecognizedMethod() == TR::java_util_concurrent_atomic_AtomicInteger_weakCompareAndSet)
4779
|| (m->getRecognizedMethod() == TR::java_util_concurrent_atomic_AtomicLong_weakCompareAndSet)
4780
|| (m->getRecognizedMethod() == TR::java_util_concurrent_atomic_AtomicReference_weakCompareAndSet))
4781
{
4782
casWithoutSync = true;
4783
}
4784
}
4785
TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();
4786
4787
// Compare and swap:
4788
resultReg = genCAS(node, cg, srm, objReg, offsetReg, offset, offsetInReg, oldVReg, newVReg, NULL, oldValue, oldValueInReg, isLong, casWithoutSync);
4789
4790
const int regnum = 3 + (oldValueInReg ? 1 : 0) + (offsetInReg ? 1 : 0) + srm->numAvailableRegisters();
4791
conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, regnum, cg->trMemory());
4792
conditions->addPostCondition(objReg, TR::RealRegister::NoReg);
4793
if (offsetInReg)
4794
conditions->addPostCondition(offsetReg, TR::RealRegister::NoReg);
4795
conditions->addPostCondition(resultReg, TR::RealRegister::NoReg);
4796
conditions->addPostCondition(newVReg, TR::RealRegister::NoReg);
4797
4798
if (oldValueInReg)
4799
conditions->addPostCondition(oldVReg, TR::RealRegister::NoReg);
4800
4801
srm->addScratchRegistersToDependencyList(conditions);
4802
4803
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions);
4804
4805
srm->stopUsingRegisters();
4806
4807
cg->recursivelyDecReferenceCount(firstChild);
4808
cg->decReferenceCount(secondChild);
4809
if (offsetInReg)
4810
cg->decReferenceCount(thirdChild);
4811
else
4812
cg->recursivelyDecReferenceCount(thirdChild);
4813
4814
if (oldValueInReg)
4815
cg->decReferenceCount(fourthChild);
4816
else
4817
cg->recursivelyDecReferenceCount(fourthChild);
4818
cg->decReferenceCount(fifthChild);
4819
return resultReg;
4820
}
4821
4822
static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenerator *cg)
4823
{
4824
TR::Compilation *comp = cg->comp();
4825
TR_J9VMBase *fej9 = reinterpret_cast<TR_J9VMBase *>(comp->fe());
4826
TR::Register *objReg, *offsetReg, *resultReg;
4827
TR::Node *firstChild, *secondChild, *thirdChild, *fourthChild, *fifthChild;
4828
TR::LabelSymbol *doneLabel;
4829
bool offsetInReg = true;
4830
intptr_t offset;
4831
4832
auto gcMode = TR::Compiler->om.writeBarrierType();
4833
const bool doWrtBar = (gcMode == gc_modron_wrtbar_oldcheck || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_always);
4834
const bool doCrdMrk = (gcMode == gc_modron_wrtbar_cardmark || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_cardmark_incremental);
4835
4836
/**
4837
* icall jdk/internal/misc/Unsafe.compareAndSetObject
4838
* aload java/lang/invoke/VarHandle._unsafe
4839
* aload (objNode)
4840
* lconst (offset)
4841
* aload (oldValueNode)
4842
* aload (newValueNode)
4843
*/
4844
firstChild = node->getFirstChild();
4845
secondChild = node->getSecondChild();
4846
thirdChild = node->getChild(2);
4847
fourthChild = node->getChild(3);
4848
fifthChild = node->getChild(4);
4849
4850
objReg = cg->evaluate(secondChild);
4851
4852
if (thirdChild->getOpCode().isLoadConst() && thirdChild->getRegister() == NULL)
4853
{
4854
offset = (thirdChild->getOpCodeValue() == TR::iconst) ? thirdChild->getInt() : thirdChild->getLongInt();
4855
offsetInReg = !constantIsUnsignedImm12(offset);
4856
}
4857
if (offsetInReg)
4858
offsetReg = cg->evaluate(thirdChild);
4859
4860
TR::Register *oldVReg = cg->evaluate(fourthChild);
4861
TR::Register *newVReg = cg->evaluate(fifthChild);
4862
doneLabel = generateLabelSymbol(cg);
4863
4864
TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();
4865
4866
#ifdef OMR_GC_CONCURRENT_SCAVENGER
4867
if (TR::Compiler->om.readBarrierType() != gc_modron_readbar_none)
4868
{
4869
TR::Register *tempReg = srm->findOrCreateScratchRegister();
4870
TR::Register *locationReg = cg->allocateRegister();
4871
TR::Register *evacuateReg = srm->findOrCreateScratchRegister();
4872
TR::Register *x0Reg = cg->allocateRegister();
4873
TR::Register *vmThreadReg = cg->getMethodMetaDataRegister();
4874
4875
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
4876
TR::LabelSymbol *endLabel = generateLabelSymbol(cg);
4877
startLabel->setStartInternalControlFlow();
4878
endLabel->setEndInternalControlFlow();
4879
4880
TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg->trMemory());
4881
deps->addPostCondition(tempReg, TR::RealRegister::NoReg);
4882
deps->addPostCondition(locationReg, TR::RealRegister::x1); // TR_softwareReadBarrier helper needs this in x1.
4883
deps->addPostCondition(evacuateReg, TR::RealRegister::NoReg);
4884
deps->addPostCondition(x0Reg, TR::RealRegister::x0);
4885
4886
if (offsetInReg)
4887
{
4888
generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, locationReg, objReg, offsetReg);
4889
}
4890
else
4891
{
4892
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, locationReg, objReg, offset);
4893
}
4894
TR::InstOpCode::Mnemonic loadOp = comp->useCompressedPointers() ? TR::InstOpCode::ldrimmw : TR::InstOpCode::ldrimmx;
4895
4896
auto faultingInstruction = generateTrg1MemInstruction(cg, loadOp, node, tempReg, TR::MemoryReference::createWithDisplacement(cg, locationReg, 0));
4897
4898
// InstructonDelegate::setupImplicitNullPointerException checks if the memory reference uses nullcheck reference register.
4899
// In this case, nullcheck reference register is objReg, but the memory reference does not use it,
4900
// thus we need to explicitly set implicit exception point here.
4901
if (cg->getHasResumableTrapHandler() && cg->getCurrentEvaluationTreeTop()->getNode()->getOpCode().isNullCheck())
4902
{
4903
if (cg->getImplicitExceptionPoint() == NULL)
4904
{
4905
if (comp->getOption(TR_TraceCG))
4906
{
4907
traceMsg(comp, "Instruction %p throws an implicit NPE, node: %p NPE node: %p\n", faultingInstruction, node, secondChild);
4908
}
4909
cg->setImplicitExceptionPoint(faultingInstruction);
4910
}
4911
}
4912
4913
if (node->getSymbolReference() == comp->getSymRefTab()->findVftSymbolRef())
4914
TR::TreeEvaluator::generateVFTMaskInstruction(cg, node, tempReg);
4915
4916
generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel);
4917
4918
generateTrg1MemInstruction(cg, loadOp, node, evacuateReg,
4919
TR::MemoryReference::createWithDisplacement(cg, vmThreadReg, comp->fej9()->thisThreadGetEvacuateBaseAddressOffset()));
4920
generateCompareInstruction(cg, node, tempReg, evacuateReg, true);
4921
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, endLabel, TR::CC_LT);
4922
4923
generateTrg1MemInstruction(cg, loadOp, node, evacuateReg,
4924
TR::MemoryReference::createWithDisplacement(cg, vmThreadReg, comp->fej9()->thisThreadGetEvacuateTopAddressOffset()));
4925
generateCompareInstruction(cg, node, tempReg, evacuateReg, true);
4926
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, endLabel, TR::CC_GT);
4927
4928
// TR_softwareReadBarrier helper expects the vmThread in x0.
4929
generateMovInstruction(cg, node, x0Reg, vmThreadReg);
4930
4931
TR::SymbolReference *helperSym = comp->getSymRefTab()->findOrCreateRuntimeHelper(TR_softwareReadBarrier);
4932
generateImmSymInstruction(cg, TR::InstOpCode::bl, node, (uintptr_t)helperSym->getMethodAddress(), deps, helperSym, NULL);
4933
4934
generateLabelInstruction(cg, TR::InstOpCode::label, node, endLabel, deps);
4935
4936
srm->reclaimScratchRegister(tempReg);
4937
srm->reclaimScratchRegister(evacuateReg);
4938
4939
cg->stopUsingRegister(locationReg);
4940
cg->stopUsingRegister(x0Reg);
4941
4942
cg->machine()->setLinkRegisterKilled(true);
4943
}
4944
#endif //OMR_GC_CONCURRENT_SCAVENGER
4945
4946
TR::Register *oReg = oldVReg;
4947
TR::Register *nReg = newVReg;
4948
bool useShiftedOffsets = (TR::Compiler->om.compressedReferenceShiftOffset() != 0);
4949
if (useShiftedOffsets)
4950
{
4951
if (!fourthChild->isNull())
4952
{
4953
oReg = srm->findOrCreateScratchRegister();
4954
generateLogicalShiftRightImmInstruction(cg, node, oReg, oldVReg, TR::Compiler->om.compressedReferenceShiftOffset());
4955
}
4956
if (!fifthChild->isNull())
4957
{
4958
nReg = srm->findOrCreateScratchRegister();
4959
generateLogicalShiftRightImmInstruction(cg, node, nReg, newVReg, TR::Compiler->om.compressedReferenceShiftOffset());
4960
}
4961
}
4962
resultReg = genCAS(node, cg, srm, objReg, offsetReg, offset, offsetInReg, oReg, nReg, doneLabel, 0, true, !comp->useCompressedPointers());
4963
4964
if (useShiftedOffsets)
4965
{
4966
srm->reclaimScratchRegister(oReg);
4967
srm->reclaimScratchRegister(nReg);
4968
}
4969
4970
const bool skipWrtBar = (gcMode == gc_modron_wrtbar_none) || (fifthChild->isNull() && (gcMode != gc_modron_wrtbar_always));
4971
if (!skipWrtBar)
4972
{
4973
TR::Register *wrtBarSrcReg = newVReg;
4974
4975
if (objReg == wrtBarSrcReg)
4976
{
4977
// write barrier helper requires that dstObject and srcObject are in the different registers.
4978
// Because wrtBarSrcReg will be dead as soon as writeBarrier is done (which is not a GC safe point),
4979
// it is not required to be a collected refernce register.
4980
wrtBarSrcReg = srm->findOrCreateScratchRegister();
4981
generateMovInstruction(cg, node, wrtBarSrcReg, objReg, true);
4982
}
4983
4984
const bool srcNonNull = fifthChild->isNonNull();
4985
4986
if (doWrtBar) // generational or gencon
4987
{
4988
TR::SymbolReference *wbRef = (gcMode == gc_modron_wrtbar_always) ?
4989
comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef() :
4990
// use jitWriteBarrierStoreGenerational for both generational and gencon, because we inline card marking.
4991
comp->getSymRefTab()->findOrCreateWriteBarrierStoreGenerationalSymbolRef();
4992
4993
if (!srcNonNull)
4994
{
4995
// If object is NULL, done
4996
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk"), *srm);
4997
generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, wrtBarSrcReg, doneLabel);
4998
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk:NonNull"), *srm);
4999
}
5000
// Inlines cardmarking and remembered bit check for gencon.
5001
VMnonNullSrcWrtBarCardCheckEvaluator(node, objReg, wrtBarSrcReg, srm, doneLabel, wbRef, cg);
5002
5003
}
5004
else if (doCrdMrk)
5005
{
5006
TR::SymbolReference *wbRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef();
5007
if (!srcNonNull)
5008
{
5009
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk"), *srm);
5010
generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, wrtBarSrcReg, doneLabel);
5011
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk:NonNull"), *srm);
5012
}
5013
VMCardCheckEvaluator(node, objReg, srm, doneLabel, cg);
5014
}
5015
5016
TR_ARM64ScratchRegisterDependencyConditions scratchDeps;
5017
scratchDeps.addDependency(cg, objReg, doWrtBar ? TR::RealRegister::x0 : TR::RealRegister::NoReg);
5018
scratchDeps.addDependency(cg, wrtBarSrcReg, doWrtBar ? TR::RealRegister::x1 : TR::RealRegister::NoReg);
5019
if (offsetInReg)
5020
{
5021
scratchDeps.addDependency(cg, offsetReg, TR::RealRegister::NoReg);
5022
}
5023
scratchDeps.unionDependency(cg, oldVReg, TR::RealRegister::NoReg);
5024
scratchDeps.unionDependency(cg, newVReg, TR::RealRegister::NoReg);
5025
scratchDeps.addDependency(cg, resultReg, TR::RealRegister::NoReg);
5026
scratchDeps.addScratchRegisters(cg, srm);
5027
TR::RegisterDependencyConditions *conditions = TR_ARM64ScratchRegisterDependencyConditions::createDependencyConditions(cg, NULL, &scratchDeps);
5028
5029
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions, NULL);
5030
}
5031
else
5032
{
5033
TR_ARM64ScratchRegisterDependencyConditions scratchDeps;
5034
scratchDeps.addDependency(cg, objReg, TR::RealRegister::NoReg);
5035
if (offsetInReg)
5036
{
5037
scratchDeps.addDependency(cg, offsetReg, TR::RealRegister::NoReg);
5038
}
5039
scratchDeps.unionDependency(cg, oldVReg, TR::RealRegister::NoReg);
5040
scratchDeps.unionDependency(cg, newVReg, TR::RealRegister::NoReg);
5041
scratchDeps.addDependency(cg, resultReg, TR::RealRegister::NoReg);
5042
scratchDeps.addScratchRegisters(cg, srm);
5043
TR::RegisterDependencyConditions *conditions = TR_ARM64ScratchRegisterDependencyConditions::createDependencyConditions(cg, NULL, &scratchDeps);
5044
5045
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions, NULL);
5046
}
5047
5048
srm->stopUsingRegisters();
5049
5050
cg->recursivelyDecReferenceCount(firstChild);
5051
cg->decReferenceCount(secondChild);
5052
if (offsetInReg)
5053
{
5054
cg->decReferenceCount(thirdChild);
5055
}
5056
else
5057
{
5058
cg->recursivelyDecReferenceCount(thirdChild);
5059
}
5060
5061
cg->decReferenceCount(fourthChild);
5062
cg->decReferenceCount(fifthChild);
5063
5064
return resultReg;
5065
}
5066
5067
bool
5068
J9::ARM64::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&resultReg)
5069
{
5070
TR::CodeGenerator *cg = self();
5071
TR::MethodSymbol * methodSymbol = node->getSymbol()->getMethodSymbol();
5072
5073
if (OMR::CodeGeneratorConnector::inlineDirectCall(node, resultReg))
5074
{
5075
return true;
5076
}
5077
if (methodSymbol)
5078
{
5079
switch (methodSymbol->getRecognizedMethod())
5080
{
5081
case TR::java_nio_Bits_keepAlive:
5082
case TR::java_lang_ref_Reference_reachabilityFence:
5083
{
5084
5085
// The only purpose of these functions is to prevent an otherwise
5086
// unreachable object from being garbage collected, because we don't
5087
// want its finalizer to be called too early. There's no need to
5088
// generate a full-blown call site just for this purpose.
5089
5090
TR::Node *paramNode = node->getFirstChild();
5091
TR::Register *paramReg = cg->evaluate(paramNode);
5092
5093
// In theory, a value could be kept alive on the stack, rather than in
5094
// a register. It is unfortunate that the following deps will force
5095
// the value into a register for no reason. However, in many common
5096
// cases, this label will have no effect on the generated code, and
5097
// will only affect GC maps.
5098
//
5099
TR::RegisterDependencyConditions *conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(1, 1, cg->trMemory());
5100
TR::addDependency(conditions, paramReg, TR::RealRegister::NoReg, TR_GPR, cg);
5101
TR::LabelSymbol *label = generateLabelSymbol(cg);
5102
generateLabelInstruction(cg, TR::InstOpCode::label, node, label, conditions);
5103
cg->decReferenceCount(paramNode);
5104
resultReg = NULL;
5105
return true;
5106
}
5107
5108
case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z:
5109
{
5110
// In Java9 and newer this can be either the jdk.internal JNI method or the sun.misc Java wrapper.
5111
// In Java8 it will be sun.misc which will contain the JNI directly.
5112
// We only want to inline the JNI methods, so add an explicit test for isNative().
5113
if (!methodSymbol->isNative())
5114
break;
5115
5116
if ((node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall())
5117
{
5118
resultReg = VMinlineCompareAndSwap(node, cg, false);
5119
return true;
5120
}
5121
break;
5122
}
5123
5124
case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z:
5125
{
5126
// As above, we only want to inline the JNI methods, so add an explicit test for isNative()
5127
if (!methodSymbol->isNative())
5128
break;
5129
5130
if ((node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall())
5131
{
5132
resultReg = VMinlineCompareAndSwap(node, cg, true);
5133
return true;
5134
}
5135
break;
5136
}
5137
5138
case TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z:
5139
{
5140
if (!methodSymbol->isNative())
5141
break;
5142
5143
if ((node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall())
5144
{
5145
resultReg = VMinlineCompareAndSwapObject(node, cg);
5146
return true;
5147
}
5148
break;
5149
}
5150
default:
5151
break;
5152
}
5153
}
5154
5155
// Nothing was done
5156
resultReg = NULL;
5157
return false;
5158
}
5159
5160
TR::Instruction *J9::ARM64::TreeEvaluator::generateVFTMaskInstruction(TR::CodeGenerator *cg, TR::Node *node, TR::Register *dstReg, TR::Register *srcReg, TR::Instruction *preced)
5161
{
5162
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
5163
uintptr_t mask = TR::Compiler->om.maskOfObjectVftField();
5164
bool isCompressed = TR::Compiler->om.compressObjectReferences();
5165
5166
if (~mask == 0)
5167
{
5168
// no mask instruction required
5169
return preced;
5170
}
5171
else if (~mask == 0xFF)
5172
{
5173
TR::InstOpCode::Mnemonic op = isCompressed ? TR::InstOpCode::andimmw : TR::InstOpCode::andimmx;
5174
uint32_t imm = isCompressed ? 0x617 : 0xE37; // encoding for ~0xFF
5175
return generateLogicalImmInstruction(cg, op, node, dstReg, srcReg, !isCompressed, imm, preced);
5176
}
5177
else
5178
{
5179
TR_UNIMPLEMENTED();
5180
}
5181
}
5182
5183
TR::Instruction *J9::ARM64::TreeEvaluator::generateVFTMaskInstruction(TR::CodeGenerator *cg, TR::Node *node, TR::Register *reg, TR::Instruction *preced)
5184
{
5185
return J9::ARM64::TreeEvaluator::generateVFTMaskInstruction(cg, node, reg, reg, preced);
5186
}
5187
5188
TR::Register *
5189
J9::ARM64::TreeEvaluator::loadaddrEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5190
{
5191
TR::Register *resultReg;
5192
TR::Symbol *sym = node->getSymbol();
5193
TR::Compilation *comp = cg->comp();
5194
TR::MemoryReference *mref = TR::MemoryReference::createWithSymRef(cg, node, node->getSymbolReference());
5195
5196
if (mref->getUnresolvedSnippet() != NULL)
5197
{
5198
resultReg = sym->isLocalObject() ? cg->allocateCollectedReferenceRegister() : cg->allocateRegister();
5199
if (mref->useIndexedForm())
5200
{
5201
TR_ASSERT(false, "Unresolved indexed snippet is not supported");
5202
}
5203
else
5204
{
5205
generateTrg1MemInstruction(cg, TR::InstOpCode::addx, node, resultReg, mref);
5206
}
5207
}
5208
else
5209
{
5210
if (mref->useIndexedForm())
5211
{
5212
resultReg = sym->isLocalObject() ? cg->allocateCollectedReferenceRegister() : cg->allocateRegister();
5213
generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, resultReg, mref->getBaseRegister(), mref->getIndexRegister());
5214
}
5215
else
5216
{
5217
int32_t offset = mref->getOffset();
5218
if (mref->hasDelayedOffset() || offset != 0)
5219
{
5220
resultReg = sym->isLocalObject() ? cg->allocateCollectedReferenceRegister() : cg->allocateRegister();
5221
if (mref->hasDelayedOffset())
5222
{
5223
generateTrg1MemInstruction(cg, TR::InstOpCode::addimmx, node, resultReg, mref);
5224
}
5225
else
5226
{
5227
if (offset >= 0 && constantIsUnsignedImm12(offset))
5228
{
5229
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, resultReg, mref->getBaseRegister(), offset);
5230
}
5231
else
5232
{
5233
loadConstant64(cg, node, offset, resultReg);
5234
generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, resultReg, mref->getBaseRegister(), resultReg);
5235
}
5236
}
5237
}
5238
else
5239
{
5240
resultReg = mref->getBaseRegister();
5241
if (resultReg == cg->getMethodMetaDataRegister())
5242
{
5243
resultReg = cg->allocateRegister();
5244
generateMovInstruction(cg, node, resultReg, mref->getBaseRegister());
5245
}
5246
}
5247
}
5248
}
5249
node->setRegister(resultReg);
5250
mref->decNodeReferenceCounts(cg);
5251
return resultReg;
5252
}
5253
5254
TR::Register *J9::ARM64::TreeEvaluator::fremHelper(TR::Node *node, TR::CodeGenerator *cg, bool isSinglePrecision)
5255
{
5256
TR::Register *trgReg = isSinglePrecision ? cg->allocateSinglePrecisionRegister() : cg->allocateRegister(TR_FPR);
5257
TR::Node *child1 = node->getFirstChild();
5258
TR::Node *child2 = node->getSecondChild();
5259
TR::Register *source1Reg = cg->evaluate(child1);
5260
TR::Register *source2Reg = cg->evaluate(child2);
5261
5262
if (!cg->canClobberNodesRegister(child1))
5263
{
5264
auto copyReg = isSinglePrecision ? cg->allocateSinglePrecisionRegister() : cg->allocateRegister(TR_FPR);
5265
generateTrg1Src1Instruction(cg, isSinglePrecision ? TR::InstOpCode::fmovs : TR::InstOpCode::fmovd, node, copyReg, source1Reg);
5266
source1Reg = copyReg;
5267
}
5268
if (!cg->canClobberNodesRegister(child2))
5269
{
5270
auto copyReg = isSinglePrecision ? cg->allocateSinglePrecisionRegister() : cg->allocateRegister(TR_FPR);
5271
generateTrg1Src1Instruction(cg, isSinglePrecision ? TR::InstOpCode::fmovs : TR::InstOpCode::fmovd, node, copyReg, source2Reg);
5272
source2Reg = copyReg;
5273
}
5274
5275
// We call helperCFloatRemainderFloat, thus we need to set appropriate register dependencies.
5276
// First, count all volatile registers.
5277
TR::Linkage *linkage = cg->createLinkage(TR_System);
5278
auto linkageProp = linkage->getProperties();
5279
int nregs = 0;
5280
for (int32_t i = TR::RealRegister::FirstGPR; i <= TR::RealRegister::LastAssignableFPR; i++)
5281
{
5282
if ((linkageProp._registerFlags[i] != ARM64_Reserved) && (linkageProp._registerFlags[i] != Preserved))
5283
{
5284
nregs++;
5285
}
5286
}
5287
5288
TR::RegisterDependencyConditions *dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(nregs, nregs, cg->trMemory());
5289
5290
// Then, add all volatile registers to dependencies except for v0 and v1.
5291
for (int32_t i = TR::RealRegister::FirstGPR; i <= TR::RealRegister::LastAssignableGPR; i++)
5292
{
5293
if ((linkageProp._registerFlags[i] != ARM64_Reserved) && (linkageProp._registerFlags[i] != Preserved))
5294
{
5295
auto tmpReg = cg->allocateRegister(TR_GPR);
5296
TR::addDependency(dependencies, tmpReg, static_cast<TR::RealRegister::RegNum>(i), TR_GPR, cg);
5297
cg->stopUsingRegister(tmpReg);
5298
}
5299
}
5300
for (int32_t i = TR::RealRegister::v2; i <= TR::RealRegister::LastAssignableFPR; i++)
5301
{
5302
if ((linkageProp._registerFlags[i] != ARM64_Reserved) && (linkageProp._registerFlags[i] != Preserved))
5303
{
5304
auto tmpReg = cg->allocateRegister(TR_FPR);
5305
TR::addDependency(dependencies, tmpReg, static_cast<TR::RealRegister::RegNum>(i), TR_FPR, cg);
5306
cg->stopUsingRegister(tmpReg);
5307
}
5308
}
5309
5310
// Finally add v0 and v1 to dependencies.
5311
dependencies->addPreCondition(source1Reg, TR::RealRegister::v0);
5312
dependencies->addPostCondition(trgReg, TR::RealRegister::v0);
5313
dependencies->addPreCondition(source2Reg, TR::RealRegister::v1);
5314
auto tmpReg = cg->allocateRegister(TR_FPR);
5315
dependencies->addPostCondition(tmpReg, TR::RealRegister::v1);
5316
cg->stopUsingRegister(tmpReg);
5317
5318
TR::SymbolReference *helperSym = cg->symRefTab()->findOrCreateRuntimeHelper(isSinglePrecision ? TR_ARM64floatRemainder : TR_ARM64doubleRemainder,
5319
false, false, false);
5320
generateImmSymInstruction(cg, TR::InstOpCode::bl, node,
5321
(uintptr_t)helperSym->getMethodAddress(),
5322
dependencies, helperSym, NULL);
5323
cg->stopUsingRegister(source1Reg);
5324
cg->stopUsingRegister(source2Reg);
5325
cg->decReferenceCount(child1);
5326
cg->decReferenceCount(child2);
5327
node->setRegister(trgReg);
5328
cg->machine()->setLinkRegisterKilled(true);
5329
5330
return trgReg;
5331
}
5332
TR::Register *J9::ARM64::TreeEvaluator::fremEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5333
{
5334
return fremHelper(node, cg, true);
5335
}
5336
5337
TR::Register *J9::ARM64::TreeEvaluator::dremEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5338
{
5339
return fremHelper(node, cg, false);
5340
}
5341
5342
TR::Register *
5343
J9::ARM64::TreeEvaluator::NULLCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5344
{
5345
return TR::TreeEvaluator::evaluateNULLCHKWithPossibleResolve(node, false, cg);
5346
}
5347
5348
TR::Register *
5349
J9::ARM64::TreeEvaluator::resolveAndNULLCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5350
{
5351
return TR::TreeEvaluator::evaluateNULLCHKWithPossibleResolve(node, true, cg);
5352
}
5353
5354
TR::Register *
5355
J9::ARM64::TreeEvaluator::evaluateNULLCHKWithPossibleResolve(TR::Node *node, bool needsResolve, TR::CodeGenerator *cg)
5356
{
5357
// NOTE:
5358
// If no code is generated for the null check, just evaluate the
5359
// child and decrement its use count UNLESS the child is a pass-through node
5360
// in which case some kind of explicit test or indirect load must be generated
5361
// to force the null check at this point.
5362
TR::Node * const firstChild = node->getFirstChild();
5363
TR::ILOpCode &opCode = firstChild->getOpCode();
5364
TR::Node *reference = NULL;
5365
TR::Compilation *comp = cg->comp();
5366
TR::Node *n = firstChild;
5367
bool hasCompressedPointers = false;
5368
5369
// NULLCHK has a special case with compressed pointers.
5370
// In the scenario where the first child is TR::l2a, the
5371
// node to be null checked is not the iloadi, but its child.
5372
// i.e. aload, aRegLoad, etc.
5373
if (comp->useCompressedPointers() && firstChild->getOpCodeValue() == TR::l2a)
5374
{
5375
// pattern match the sequence under the l2a
5376
// NULLCHK NULLCHK <- node
5377
// aloadi f l2a
5378
// aload O lshl
5379
// iu2l
5380
// iloadi/irdbari f <- n
5381
// aload O <- reference
5382
// iconst shftKonst
5383
//
5384
hasCompressedPointers = true;
5385
TR::ILOpCodes loadOp = cg->comp()->il.opCodeForIndirectLoad(TR::Int32);
5386
TR::ILOpCodes rdbarOp = cg->comp()->il.opCodeForIndirectReadBarrier(TR::Int32);
5387
while ((n->getOpCodeValue() != loadOp) && (n->getOpCodeValue() != rdbarOp))
5388
n = n->getFirstChild();
5389
reference = n->getFirstChild();
5390
}
5391
else
5392
reference = node->getNullCheckReference();
5393
5394
// Skip the NULLCHK for TR::loadaddr nodes.
5395
//
5396
if (cg->getHasResumableTrapHandler()
5397
&& reference->getOpCodeValue() == TR::loadaddr)
5398
{
5399
cg->evaluate(firstChild);
5400
cg->decReferenceCount(firstChild);
5401
return NULL;
5402
}
5403
5404
bool needExplicitCheck = true;
5405
bool needLateEvaluation = true;
5406
bool firstChildEvaluated = false;
5407
5408
// Add the explicit check after this instruction
5409
//
5410
TR::Instruction *appendTo = NULL;
5411
5412
// determine if an explicit check is needed
5413
if (cg->getHasResumableTrapHandler())
5414
{
5415
if (n->getOpCode().isLoadVar()
5416
|| (opCode.getOpCodeValue() == TR::l2i))
5417
{
5418
TR::SymbolReference *symRef = NULL;
5419
5420
if (opCode.getOpCodeValue() == TR::l2i)
5421
symRef = n->getFirstChild()->getSymbolReference();
5422
else
5423
symRef = n->getSymbolReference();
5424
5425
// We prefer to generate an explicit NULLCHK vs an implicit one
5426
// to prevent potential costs of a cache miss on an unnecessary load.
5427
if (n->getReferenceCount() == 1
5428
&& !n->getSymbolReference()->isUnresolved())
5429
{
5430
// If the child is only used here, we don't need to evaluate it
5431
// since all we need is the grandchild which will be evaluated by
5432
// the generation of the explicit check below.
5433
needLateEvaluation = false;
5434
5435
// at this point, n is the raw iloadi (created by lowerTrees) and
5436
// reference is the aload of the object. node->getFirstChild is the
5437
// l2a sequence; as a result, n's refCount will always be 1.
5438
//
5439
if (hasCompressedPointers
5440
&& node->getFirstChild()->getReferenceCount() >= 2)
5441
{
5442
// In this case, the result of load is used in other places, so we need to evaluate it here
5443
//
5444
needLateEvaluation = true;
5445
5446
// Check if offset from a NULL reference will fall into the inaccessible bytes,
5447
// resulting in an implicit trap being raised.
5448
if (symRef
5449
&& ((symRef->getSymbol()->getOffset() + symRef->getOffset()) < cg->getNumberBytesReadInaccessible()))
5450
{
5451
needExplicitCheck = false;
5452
}
5453
}
5454
}
5455
5456
// Check if offset from a NULL reference will fall into the inaccessible bytes,
5457
// resulting in an implicit trap being raised.
5458
else if (symRef
5459
&& ((symRef->getSymbol()->getOffset() + symRef->getOffset()) < cg->getNumberBytesReadInaccessible()))
5460
{
5461
needExplicitCheck = false;
5462
5463
// If the child is an arraylength which has been reduced to an iiload,
5464
// and is only going to be used immediately in a BNDCHK, combine the checks.
5465
//
5466
TR::TreeTop *nextTreeTop = cg->getCurrentEvaluationTreeTop()->getNextTreeTop();
5467
if (n->getReferenceCount() == 2 && nextTreeTop)
5468
{
5469
TR::Node *nextTopNode = nextTreeTop->getNode();
5470
5471
if (nextTopNode)
5472
{
5473
if (nextTopNode->getOpCode().isBndCheck())
5474
{
5475
if ((nextTopNode->getOpCode().isSpineCheck() && (nextTopNode->getChild(2) == n))
5476
|| (!nextTopNode->getOpCode().isSpineCheck() && (nextTopNode->getFirstChild() == n)))
5477
{
5478
needLateEvaluation = false;
5479
nextTopNode->setHasFoldedImplicitNULLCHK(true);
5480
if (comp->getOption(TR_TraceCG))
5481
{
5482
traceMsg(comp, "\nMerging NULLCHK [%p] and BNDCHK [%p] of load child [%p]\n", node, nextTopNode, n);
5483
}
5484
}
5485
}
5486
else if (nextTopNode->getOpCode().isIf()
5487
&& nextTopNode->isNonoverriddenGuard()
5488
&& nextTopNode->getFirstChild() == firstChild)
5489
{
5490
needLateEvaluation = false;
5491
needExplicitCheck = true;
5492
}
5493
}
5494
}
5495
}
5496
}
5497
else if (opCode.isStore())
5498
{
5499
TR::SymbolReference *symRef = n->getSymbolReference();
5500
if (n->getOpCode().hasSymbolReference()
5501
&& (symRef->getSymbol()->getOffset() + symRef->getOffset() < cg->getNumberBytesWriteInaccessible()))
5502
{
5503
needExplicitCheck = false;
5504
}
5505
}
5506
else if (opCode.isCall()
5507
&& opCode.isIndirect()
5508
&& (cg->getNumberBytesReadInaccessible() > TR::Compiler->om.offsetOfObjectVftField()))
5509
{
5510
needExplicitCheck = false;
5511
}
5512
else if (opCode.getOpCodeValue() == TR::iushr
5513
&& (cg->getNumberBytesReadInaccessible() > cg->fe()->getOffsetOfContiguousArraySizeField()))
5514
{
5515
// If the child is an arraylength which has been reduced to an iushr,
5516
// we must evaluate it here so that the implicit exception will occur
5517
// at the right point in the program.
5518
//
5519
// This can occur when the array length is represented in bytes, not elements.
5520
// The optimizer must intervene for this to happen.
5521
//
5522
cg->evaluate(n->getFirstChild());
5523
needExplicitCheck = false;
5524
}
5525
else if (opCode.getOpCodeValue() == TR::monent
5526
|| opCode.getOpCodeValue() == TR::monexit)
5527
{
5528
// The child may generate inline code that provides an implicit null check
5529
// but we won't know until the child is evaluated.
5530
//
5531
needLateEvaluation = false;
5532
cg->evaluate(reference);
5533
appendTo = cg->getAppendInstruction();
5534
cg->evaluate(firstChild);
5535
firstChildEvaluated = true;
5536
if (cg->getImplicitExceptionPoint()
5537
&& (cg->getNumberBytesReadInaccessible() > cg->fe()->getOffsetOfContiguousArraySizeField()))
5538
{
5539
needExplicitCheck = false;
5540
}
5541
}
5542
}
5543
5544
// Generate the code for the null check
5545
//
5546
if(needExplicitCheck)
5547
{
5548
TR::Register * targetRegister = NULL;
5549
/* TODO: Resolution */
5550
/* if(needsResolve) ... */
5551
5552
TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);
5553
TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference(), NULL);
5554
cg->addSnippet(snippet);
5555
TR::Register *referenceReg = cg->evaluate(reference);
5556
TR::Instruction *cbzInstruction = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, referenceReg, snippetLabel, appendTo);
5557
cbzInstruction->setNeedsGCMap(0xffffffff);
5558
snippet->gcMap().setGCRegisterMask(0xffffffff);
5559
// ARM64HelperCallSnippet generates "bl" instruction
5560
cg->machine()->setLinkRegisterKilled(true);
5561
}
5562
5563
// If we need to evaluate the child, do so. Otherwise, if we have
5564
// evaluated the reference node, then decrement its use count.
5565
// The use count of the child is decremented when we are done
5566
// evaluating the NULLCHK.
5567
//
5568
if (needLateEvaluation)
5569
{
5570
cg->evaluate(firstChild);
5571
firstChildEvaluated = true;
5572
}
5573
// If the firstChild is evaluated, we simply call decReferenceCount.
5574
// Otherwise, we need to call recursivelyDecReferenceCount so that the ref count of
5575
// child nodes of the firstChild is properly decremented when the ref count of the firstChild is 1.
5576
if (firstChildEvaluated)
5577
{
5578
cg->decReferenceCount(firstChild);
5579
}
5580
else
5581
{
5582
cg->recursivelyDecReferenceCount(firstChild);
5583
}
5584
5585
// If an explicit check has not been generated for the null check, there is
5586
// an instruction that will cause a hardware trap if the exception is to be
5587
// taken. If this method may catch the exception, a GC stack map must be
5588
// created for this instruction. All registers are valid at this GC point
5589
// TODO - if the method may not catch the exception we still need to note
5590
// that the GC point exists, since maps before this point and after it cannot
5591
// be merged.
5592
//
5593
if (cg->getHasResumableTrapHandler() && !needExplicitCheck)
5594
{
5595
TR::Instruction *faultingInstruction = cg->getImplicitExceptionPoint();
5596
if (faultingInstruction)
5597
{
5598
faultingInstruction->setNeedsGCMap(0xffffffff);
5599
cg->machine()->setLinkRegisterKilled(true);
5600
5601
TR_Debug * debugObj = cg->getDebug();
5602
if (debugObj)
5603
{
5604
debugObj->addInstructionComment(faultingInstruction, "Throws Implicit Null Pointer Exception");
5605
}
5606
}
5607
}
5608
5609
if (comp->useCompressedPointers()
5610
&& reference->getOpCodeValue() == TR::l2a)
5611
{
5612
TR::Node *n = reference->getFirstChild();
5613
reference->setIsNonNull(true);
5614
TR::ILOpCodes loadOp = cg->comp()->il.opCodeForIndirectLoad(TR::Int32);
5615
TR::ILOpCodes rdbarOp = cg->comp()->il.opCodeForIndirectReadBarrier(TR::Int32);
5616
while ((n->getOpCodeValue() != loadOp) && (n->getOpCodeValue() != rdbarOp))
5617
{
5618
n->setIsNonZero(true);
5619
n = n->getFirstChild();
5620
}
5621
n->setIsNonZero(true);
5622
}
5623
5624
reference->setIsNonNull(true);
5625
5626
return NULL;
5627
}
5628
5629
static void
5630
genBoundCheck(TR::CodeGenerator *cg, TR::Node *node, TR::Register *indexReg, int32_t indexVal, TR::Register *arrayLengthReg, int32_t arrayLengthVal)
5631
{
5632
TR::Instruction *gcPoint;
5633
5634
TR::LabelSymbol *boundCheckFailSnippetLabel = cg->lookUpSnippet(TR::Snippet::IsHelperCall, node->getSymbolReference());
5635
if (!boundCheckFailSnippetLabel)
5636
{
5637
boundCheckFailSnippetLabel = generateLabelSymbol(cg);
5638
cg->addSnippet(new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, boundCheckFailSnippetLabel, node->getSymbolReference()));
5639
}
5640
5641
if (indexReg)
5642
generateCompareInstruction(cg, node, arrayLengthReg, indexReg, false); // 32-bit compare
5643
else
5644
generateCompareImmInstruction(cg, node, arrayLengthReg, indexVal, false); // 32-bit compare
5645
5646
gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, boundCheckFailSnippetLabel, TR::CC_LS);
5647
5648
// Exception edges don't have any live regs
5649
gcPoint->ARM64NeedsGCMap(cg, 0);
5650
5651
// ARM64HelperCallSnippet generates "bl" instruction
5652
cg->machine()->setLinkRegisterKilled(true);
5653
}
5654
5655
static TR::Instruction *
5656
genSpineCheck(TR::CodeGenerator *cg, TR::Node *node, TR::Register *arrayLengthReg, TR::LabelSymbol *discontiguousArrayLabel)
5657
{
5658
return generateCompareBranchInstruction(cg, TR::InstOpCode::cbzw, node, arrayLengthReg, discontiguousArrayLabel);
5659
}
5660
5661
static TR::Instruction *
5662
genSpineCheck(TR::CodeGenerator *cg, TR::Node *node, TR::Register *baseArrayReg, TR::Register *arrayLengthReg, TR::LabelSymbol *discontiguousArrayLabel)
5663
{
5664
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
5665
TR::MemoryReference *contiguousArraySizeMR = TR::MemoryReference::createWithDisplacement(cg, baseArrayReg, fej9->getOffsetOfContiguousArraySizeField());
5666
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, arrayLengthReg, contiguousArraySizeMR);
5667
return genSpineCheck(cg, node, arrayLengthReg, discontiguousArrayLabel);
5668
}
5669
5670
static void
5671
genArrayletAccessAddr(TR::CodeGenerator *cg, TR::Node *node, int32_t elementSize,
5672
// Inputs:
5673
TR::Register *baseArrayReg, TR::Register *indexReg, int32_t indexVal,
5674
// Outputs:
5675
TR::Register *arrayletReg, TR::Register *offsetReg, int32_t& offsetVal)
5676
{
5677
TR::Compilation* comp = cg->comp();
5678
TR_J9VMBase *fej9 = (TR_J9VMBase *) (cg->fe());
5679
TR_ASSERT(offsetReg || !indexReg, "Expecting valid offset reg when index reg is passed");
5680
5681
uintptr_t arrayHeaderSize = TR::Compiler->om.discontiguousArrayHeaderSizeInBytes();
5682
int32_t spinePointerSize = TR::Compiler->om.sizeofReferenceField();
5683
int32_t spinePointerSizeShift = spinePointerSize == 8 ? 3 : 2;
5684
5685
TR::MemoryReference *spineMR;
5686
TR::InstOpCode::Mnemonic loadOp;
5687
5688
// Calculate the spine offset.
5689
//
5690
if (indexReg)
5691
{
5692
int32_t spineShift = fej9->getArraySpineShift(elementSize);
5693
5694
// spineOffset = (index >> spineShift) * spinePtrSize
5695
// = (index >> spineShift) << spinePtrSizeShift
5696
// spineOffset += arrayHeaderSize
5697
//
5698
TR_ASSERT(spineShift >= spinePointerSizeShift, "Unexpected spine shift value");
5699
generateLogicalShiftRightImmInstruction(cg, node, arrayletReg, indexReg, spineShift);
5700
generateLogicalShiftLeftImmInstruction(cg, node, arrayletReg, arrayletReg, spinePointerSizeShift);
5701
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, arrayletReg, arrayletReg, arrayHeaderSize);
5702
5703
spineMR = TR::MemoryReference::createWithIndexReg(cg, baseArrayReg, arrayletReg);
5704
loadOp = spinePointerSize == 8 ? TR::InstOpCode::ldroffx : TR::InstOpCode::ldroffw;
5705
}
5706
else
5707
{
5708
int32_t spineIndex = fej9->getArrayletLeafIndex(indexVal, elementSize);
5709
int32_t spineDisp32 = spineIndex * spinePointerSize + arrayHeaderSize;
5710
5711
spineMR = TR::MemoryReference::createWithDisplacement(cg, baseArrayReg, spineDisp32);
5712
loadOp = spinePointerSize == 8 ? TR::InstOpCode::ldrimmx : TR::InstOpCode::ldrimmw;
5713
}
5714
5715
// Load the arraylet from the spine.
5716
//
5717
generateTrg1MemInstruction(cg, loadOp, node, arrayletReg, spineMR);
5718
5719
// Calculate the arraylet offset.
5720
//
5721
if (indexReg)
5722
{
5723
int32_t arrayletMask = fej9->getArrayletMask(elementSize);
5724
5725
loadConstant64(cg, node, arrayletMask, offsetReg);
5726
generateTrg1Src2Instruction(cg, TR::InstOpCode::andx, node, offsetReg, indexReg, offsetReg);
5727
if (elementSize > 1)
5728
{
5729
int32_t elementSizeShift = CHAR_BIT * sizeof(int32_t) - leadingZeroes(elementSize - 1);
5730
generateLogicalShiftLeftImmInstruction(cg, node, offsetReg, offsetReg, elementSizeShift);
5731
}
5732
}
5733
else
5734
offsetVal = (fej9->getLeafElementIndex(indexVal, elementSize) * elementSize);
5735
}
5736
5737
static void
5738
genDecompressPointer(TR::CodeGenerator *cg, TR::Node *node, TR::Register *ptrReg)
5739
{
5740
int32_t shiftAmount = TR::Compiler->om.compressedReferenceShift();
5741
5742
if (shiftAmount != 0)
5743
generateLogicalShiftLeftImmInstruction(cg, node, ptrReg, ptrReg, shiftAmount);
5744
}
5745
5746
static TR::InstOpCode::Mnemonic
5747
getLoadOpCodeFromDataType(TR::CodeGenerator *cg, TR::DataType dt, int32_t elementSize, bool isUnsigned, bool useIdxReg)
5748
{
5749
switch (dt)
5750
{
5751
case TR::Int8:
5752
if (isUnsigned)
5753
return useIdxReg ? TR::InstOpCode::ldrboff : TR::InstOpCode::ldrbimm;
5754
else
5755
return useIdxReg ? TR::InstOpCode::ldrsboffw : TR::InstOpCode::ldrsbimmw;
5756
case TR::Int16:
5757
if (isUnsigned)
5758
return useIdxReg ? TR::InstOpCode::ldrhoff : TR::InstOpCode::ldrhimm;
5759
else
5760
return useIdxReg ? TR::InstOpCode::ldrshoffw : TR::InstOpCode::ldrshimmw;
5761
case TR::Int32:
5762
return useIdxReg ? TR::InstOpCode::ldroffw : TR::InstOpCode::ldrimmw;
5763
case TR::Int64:
5764
return useIdxReg ? TR::InstOpCode::ldroffx : TR::InstOpCode::ldrimmx;
5765
case TR::Float:
5766
return useIdxReg ? TR::InstOpCode::vldroffs : TR::InstOpCode::vstrimms;
5767
case TR::Double:
5768
return useIdxReg ? TR::InstOpCode::vldroffd : TR::InstOpCode::vstrimmd;
5769
case TR::Address:
5770
if (elementSize == 8)
5771
return useIdxReg ? TR::InstOpCode::ldroffx : TR::InstOpCode::ldrimmx;
5772
else
5773
return useIdxReg ? TR::InstOpCode::ldroffw : TR::InstOpCode::ldrimmw;
5774
default:
5775
TR_ASSERT(false, "Unexpected array data type");
5776
return TR::InstOpCode::bad;
5777
}
5778
}
5779
5780
static TR::InstOpCode::Mnemonic
5781
getStoreOpCodeFromDataType(TR::CodeGenerator *cg, TR::DataType dt, int32_t elementSize, bool useIdxReg)
5782
{
5783
switch (dt)
5784
{
5785
case TR::Int8:
5786
return useIdxReg ? TR::InstOpCode::strboff : TR::InstOpCode::strbimm;
5787
case TR::Int16:
5788
return useIdxReg ? TR::InstOpCode::strhoff : TR::InstOpCode::strhimm;
5789
case TR::Int32:
5790
return useIdxReg ? TR::InstOpCode::stroffw : TR::InstOpCode::strimmw;
5791
case TR::Int64:
5792
return useIdxReg ? TR::InstOpCode::stroffx : TR::InstOpCode::strimmx;
5793
case TR::Float:
5794
return useIdxReg ? TR::InstOpCode::vstroffs : TR::InstOpCode::vstrimms;
5795
case TR::Double:
5796
return useIdxReg ? TR::InstOpCode::vstroffd : TR::InstOpCode::vstrimmd;
5797
case TR::Address:
5798
if (elementSize == 8)
5799
return useIdxReg ? TR::InstOpCode::stroffx : TR::InstOpCode::strimmx;
5800
else
5801
return useIdxReg ? TR::InstOpCode::stroffw : TR::InstOpCode::strimmw;
5802
default:
5803
TR_ASSERT(false, "Unexpected array data type");
5804
return TR::InstOpCode::bad;
5805
}
5806
}
5807
5808
// Handles both BNDCHKwithSpineCHK and SpineCHK nodes.
5809
//
5810
TR::Register *
5811
J9::ARM64::TreeEvaluator::BNDCHKwithSpineCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5812
{
5813
TR::Compilation *comp = cg->comp();
5814
TR_J9VMBase *fej9 = (TR_J9VMBase *) (cg->fe());
5815
bool needsBoundCheck = node->getOpCodeValue() == TR::BNDCHKwithSpineCHK;
5816
bool needsBoundCheckOOL;
5817
5818
TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();
5819
5820
TR::Node *loadOrStoreChild = node->getFirstChild();
5821
TR::Node *baseArrayChild = node->getSecondChild();
5822
TR::Node *arrayLengthChild;
5823
TR::Node *indexChild;
5824
5825
if (needsBoundCheck)
5826
{
5827
arrayLengthChild = node->getChild(2);
5828
indexChild = node->getChild(3);
5829
}
5830
else
5831
indexChild = node->getChild(2);
5832
5833
TR::Register *baseArrayReg = cg->evaluate(baseArrayChild);
5834
TR::Register *indexReg;
5835
TR::Register *loadOrStoreReg;
5836
TR::Register *arrayLengthReg;
5837
5838
// If the index is too large to be an immediate load it in a register
5839
if (!indexChild->getOpCode().isLoadConst() || !constantIsUnsignedImm12(indexChild->getInt()))
5840
indexReg = cg->evaluate(indexChild);
5841
else
5842
indexReg = NULL;
5843
5844
// For primitive stores anchored under the check node, we must evaluate the source node
5845
// before the bound check branch so that its available to the snippet.
5846
//
5847
if (loadOrStoreChild->getOpCode().isStore() && !loadOrStoreChild->getRegister())
5848
{
5849
TR::Node *valueChild = loadOrStoreChild->getSecondChild();
5850
cg->evaluate(valueChild);
5851
}
5852
5853
// Evaluate any escaping nodes before the OOL branch since they won't be evaluated in the OOL path.
5854
preEvaluateEscapingNodesForSpineCheck(node, cg);
5855
5856
// Label to the OOL code that will perform the load/store/agen for discontiguous arrays (and the bound check if needed).
5857
TR::LabelSymbol *discontiguousArrayLabel = generateLabelSymbol(cg);
5858
5859
// Label back to main-line that the OOL code will branch to when done.
5860
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
5861
doneLabel->setEndInternalControlFlow();
5862
5863
TR_ARM64OutOfLineCodeSection *discontiguousArrayOOL = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(discontiguousArrayLabel, doneLabel, cg);
5864
cg->getARM64OutOfLineCodeSectionList().push_front(discontiguousArrayOOL);
5865
5866
TR::Instruction *OOLBranchInstr;
5867
5868
if (needsBoundCheck)
5869
{
5870
TR_ASSERT(arrayLengthChild, "Expecting to have an array length child for BNDCHKwithSpineCHK node");
5871
TR_ASSERT(
5872
arrayLengthChild->getOpCode().isConversion() || arrayLengthChild->getOpCodeValue() == TR::iloadi || arrayLengthChild->getOpCodeValue() == TR::iload
5873
|| arrayLengthChild->getOpCodeValue() == TR::iRegLoad || arrayLengthChild->getOpCode().isLoadConst(),
5874
"Expecting array length child under BNDCHKwithSpineCHK to be a conversion, iiload, iload, iRegLoad or iconst");
5875
5876
arrayLengthReg = arrayLengthChild->getRegister();
5877
5878
if (arrayLengthReg)
5879
{
5880
OOLBranchInstr = genSpineCheck(cg, node, baseArrayReg, arrayLengthReg, discontiguousArrayLabel);
5881
needsBoundCheckOOL = true;
5882
genBoundCheck(cg, node, indexReg, indexChild->getInt(), arrayLengthReg, arrayLengthChild->getInt());
5883
}
5884
else if (arrayLengthChild->getOpCode().isLoadConst())
5885
{
5886
// If the constant arraylength is non-zero then it will pass the spine check (hence its
5887
// a contiguous array) and the BNDCHK can be done inline with no OOL path.
5888
//
5889
// If the constant arraylength is zero then we will always go OOL.
5890
//
5891
// TODO: in the future there shouldn't be an OOL path because any valid access must be
5892
// on a discontiguous array.
5893
//
5894
if (arrayLengthChild->getInt() != 0)
5895
{
5896
// The array must be contiguous.
5897
//
5898
5899
// If the array length is too large to be an immediate load it in a register for the bound check
5900
if (!constantIsUnsignedImm12(arrayLengthChild->getInt()))
5901
arrayLengthReg = cg->evaluate(arrayLengthChild);
5902
5903
// Do the bound check first.
5904
genBoundCheck(cg, node, indexReg, indexChild->getInt(), arrayLengthReg, arrayLengthChild->getInt());
5905
needsBoundCheckOOL = false;
5906
TR::Register *scratchArrayLengthReg = srm->findOrCreateScratchRegister();
5907
OOLBranchInstr = genSpineCheck(cg, node, baseArrayReg, scratchArrayLengthReg, discontiguousArrayLabel);
5908
srm->reclaimScratchRegister(scratchArrayLengthReg);
5909
}
5910
else
5911
{
5912
// Zero length array or discontiguous array. Unconditionally branch to the OOL path
5913
// to find out which.
5914
//
5915
OOLBranchInstr = generateLabelInstruction(cg, TR::InstOpCode::b, node, discontiguousArrayLabel);
5916
needsBoundCheckOOL = true;
5917
}
5918
}
5919
else
5920
{
5921
// Load the contiguous array length.
5922
arrayLengthReg = cg->evaluate(arrayLengthChild);
5923
// If the array length is 0, this is a discontiguous array and the bound check will be handled OOL.
5924
OOLBranchInstr = genSpineCheck(cg, node, arrayLengthReg, discontiguousArrayLabel);
5925
needsBoundCheckOOL = true;
5926
// Do the bound check using the contiguous array length.
5927
genBoundCheck(cg, node, indexReg, indexChild->getInt(), arrayLengthReg, arrayLengthChild->getInt());
5928
}
5929
5930
cg->decReferenceCount(arrayLengthChild);
5931
}
5932
else
5933
{
5934
// Spine check only; load the contiguous length, check for 0, branch OOL if discontiguous.
5935
needsBoundCheckOOL = false;
5936
5937
arrayLengthReg = srm->findOrCreateScratchRegister();
5938
OOLBranchInstr = genSpineCheck(cg, node, baseArrayReg, arrayLengthReg, discontiguousArrayLabel);
5939
srm->reclaimScratchRegister(arrayLengthReg);
5940
}
5941
5942
// For reference stores, only evaluate the array element address because the store cannot
5943
// happen here (it must be done via the array store check).
5944
//
5945
// For primitive stores, evaluate them now.
5946
// For loads, evaluate them now.
5947
// For address calculations (aladd/aiadd), evaluate them now.
5948
//
5949
bool doLoadOrStore;
5950
bool doLoadDecompress = false;
5951
bool doAddressComputation;
5952
5953
if (loadOrStoreChild->getOpCode().isStore() && loadOrStoreChild->getReferenceCount() > 1)
5954
{
5955
TR_ASSERT(loadOrStoreChild->getOpCode().isWrtBar(), "Opcode must be wrtbar");
5956
loadOrStoreReg = cg->evaluate(loadOrStoreChild->getFirstChild());
5957
cg->decReferenceCount(loadOrStoreChild->getFirstChild());
5958
doLoadOrStore = false;
5959
doAddressComputation = true;
5960
}
5961
else
5962
{
5963
// If it's a store and not commoned then it must be a primitive store.
5964
// If it's an address load it may need decompression in the OOL path.
5965
5966
// Top-level check whether a decompression sequence is necessary, because the first child
5967
// may have been created by a PRE temp.
5968
//
5969
if ((loadOrStoreChild->getOpCodeValue() == TR::aload || loadOrStoreChild->getOpCodeValue() == TR::aRegLoad)
5970
&& node->isSpineCheckWithArrayElementChild()
5971
&& comp->useCompressedPointers())
5972
{
5973
doLoadDecompress = true;
5974
}
5975
5976
TR::Node *actualLoadOrStoreChild = loadOrStoreChild;
5977
while (actualLoadOrStoreChild->getOpCode().isConversion() || actualLoadOrStoreChild->containsCompressionSequence())
5978
{
5979
if (actualLoadOrStoreChild->containsCompressionSequence())
5980
doLoadDecompress = true;
5981
actualLoadOrStoreChild = actualLoadOrStoreChild->getFirstChild();
5982
}
5983
5984
doLoadOrStore = actualLoadOrStoreChild->getOpCode().hasSymbolReference()
5985
&& (actualLoadOrStoreChild->getSymbolReference()->getSymbol()->isArrayShadowSymbol()
5986
|| actualLoadOrStoreChild->getSymbolReference()->getSymbol()->isArrayletShadowSymbol()) && node->isSpineCheckWithArrayElementChild();
5987
5988
// If the 1st child is not a load/store/aladd/aiadd it's probably a nop (e.g. const) at this point due to commoning
5989
//
5990
doAddressComputation = !doLoadOrStore && actualLoadOrStoreChild->getOpCode().isArrayRef() && !node->isSpineCheckWithArrayElementChild();
5991
5992
if (doLoadOrStore || doAddressComputation || !loadOrStoreChild->getOpCode().isLoadConst())
5993
loadOrStoreReg = cg->evaluate(loadOrStoreChild);
5994
else
5995
loadOrStoreReg = NULL;
5996
}
5997
5998
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel);
5999
TR::LabelSymbol *doneMainlineLabel = generateLabelSymbol(cg);
6000
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneMainlineLabel);
6001
6002
// start of OOL
6003
//
6004
discontiguousArrayOOL->swapInstructionListsWithCompilation();
6005
{
6006
TR::Instruction *OOLLabelInstr = generateLabelInstruction(cg, TR::InstOpCode::label, node, discontiguousArrayLabel);
6007
// XXX: Temporary fix, OOL instruction stream does not pick up live locals or monitors correctly.
6008
TR_ASSERT(!OOLLabelInstr->getLiveLocals() && !OOLLabelInstr->getLiveMonitors(), "Expecting first OOL instruction to not have live locals/monitors info");
6009
OOLLabelInstr->setLiveLocals(OOLBranchInstr->getLiveLocals());
6010
OOLLabelInstr->setLiveMonitors(OOLBranchInstr->getLiveMonitors());
6011
6012
if (needsBoundCheckOOL)
6013
{
6014
TR_ASSERT(needsBoundCheck, "Inconsistent state, needs bound check OOL but doesn't need bound check");
6015
6016
TR::MemoryReference *discontiguousArraySizeMR = TR::MemoryReference::createWithDisplacement(cg, baseArrayReg, fej9->getOffsetOfDiscontiguousArraySizeField());
6017
TR::Register *arrayLengthScratchReg = srm->findOrCreateScratchRegister();
6018
6019
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, arrayLengthScratchReg, discontiguousArraySizeMR);
6020
6021
// Do the bound check using the discontiguous array length.
6022
genBoundCheck(cg, node, indexReg, indexChild->getInt(), arrayLengthScratchReg, arrayLengthChild->getInt());
6023
6024
srm->reclaimScratchRegister(arrayLengthScratchReg);
6025
}
6026
6027
TR_ASSERT(!(doLoadOrStore && doAddressComputation), "Unexpected condition");
6028
6029
TR::Register *arrayletReg = NULL;
6030
TR::DataType dt = loadOrStoreChild->getDataType();
6031
6032
if (doLoadOrStore || doAddressComputation)
6033
{
6034
arrayletReg = doAddressComputation ? loadOrStoreReg : cg->allocateRegister();
6035
6036
// Generate the base+offset address pair into the arraylet.
6037
//
6038
int32_t elementSize = (dt == TR::Address) ? TR::Compiler->om.sizeofReferenceField() : TR::Symbol::convertTypeToSize(dt);
6039
TR::Register *arrayletOffsetReg;
6040
int32_t arrayletOffsetVal;
6041
6042
if (indexReg)
6043
arrayletOffsetReg = srm->findOrCreateScratchRegister();
6044
6045
genArrayletAccessAddr(cg, node, elementSize, baseArrayReg, indexReg, indexChild->getInt(), arrayletReg, arrayletOffsetReg, arrayletOffsetVal);
6046
6047
// Decompress the arraylet pointer if necessary.
6048
//
6049
genDecompressPointer(cg, node, arrayletReg);
6050
6051
if (doLoadOrStore)
6052
{
6053
// Generate the load or store.
6054
//
6055
if (loadOrStoreChild->getOpCode().isStore())
6056
{
6057
TR::InstOpCode::Mnemonic storeOp = getStoreOpCodeFromDataType(cg, dt, elementSize, indexReg != NULL);
6058
6059
TR::MemoryReference *arrayletMR = indexReg ?
6060
TR::MemoryReference::createWithIndexReg(cg, arrayletReg, arrayletOffsetReg) :
6061
TR::MemoryReference::createWithDisplacement(cg, arrayletReg, arrayletOffsetVal);
6062
generateMemSrc1Instruction(cg, storeOp, node, arrayletMR, loadOrStoreChild->getSecondChild()->getRegister());
6063
}
6064
else
6065
{
6066
TR_ASSERT(loadOrStoreChild->getOpCode().isConversion() || loadOrStoreChild->getOpCode().isLoad(), "Unexpected op");
6067
6068
bool isUnsigned = loadOrStoreChild->getOpCode().isUnsigned();
6069
TR::InstOpCode::Mnemonic loadOp = getLoadOpCodeFromDataType(cg, dt, elementSize, isUnsigned, indexReg != NULL);
6070
6071
TR::MemoryReference *arrayletMR = indexReg ?
6072
TR::MemoryReference::createWithIndexReg(cg, arrayletReg, arrayletOffsetReg) :
6073
TR::MemoryReference::createWithDisplacement(cg, arrayletReg, arrayletOffsetVal);
6074
generateTrg1MemInstruction(cg, loadOp, node, loadOrStoreReg, arrayletMR);
6075
6076
if (doLoadDecompress)
6077
{
6078
TR_ASSERT(dt == TR::Address, "Expecting loads with decompression trees to have data type TR::Address");
6079
genDecompressPointer(cg, node, loadOrStoreReg);
6080
}
6081
}
6082
6083
cg->stopUsingRegister(arrayletReg);
6084
}
6085
else
6086
{
6087
if (indexReg)
6088
generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, loadOrStoreReg, loadOrStoreReg, arrayletOffsetReg);
6089
else
6090
addConstant32(cg, node, loadOrStoreReg, loadOrStoreReg, arrayletOffsetVal);
6091
}
6092
6093
if (indexReg)
6094
srm->reclaimScratchRegister(arrayletOffsetReg);
6095
}
6096
6097
const uint32_t numOOLDeps = 1 + (doLoadOrStore ? 1 : 0) + (needsBoundCheck && arrayLengthReg ? 1 : 0) + (loadOrStoreReg ? 1 : 0)
6098
+ (indexReg ? 1 : 0) + srm->numAvailableRegisters();
6099
TR::RegisterDependencyConditions *OOLDeps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, numOOLDeps, cg->trMemory());
6100
OOLDeps->addPostCondition(baseArrayReg, TR::RealRegister::NoReg);
6101
TR_ASSERT(OOLDeps->getPostConditions()->getRegisterDependency(0)->getRegister() == baseArrayReg, "Unexpected register");
6102
if (doLoadOrStore)
6103
{
6104
OOLDeps->addPostCondition(arrayletReg, TR::RealRegister::NoReg);
6105
TR_ASSERT(OOLDeps->getPostConditions()->getRegisterDependency(1)->getRegister() == arrayletReg, "Unexpected register");
6106
}
6107
if (indexReg)
6108
OOLDeps->addPostCondition(indexReg, TR::RealRegister::NoReg);
6109
if (loadOrStoreReg)
6110
OOLDeps->addPostCondition(loadOrStoreReg, TR::RealRegister::NoReg);
6111
if (needsBoundCheck && arrayLengthReg)
6112
OOLDeps->addPostCondition(arrayLengthReg, TR::RealRegister::NoReg);
6113
srm->addScratchRegistersToDependencyList(OOLDeps);
6114
6115
srm->stopUsingRegisters();
6116
6117
TR::LabelSymbol *doneOOLLabel = generateLabelSymbol(cg);
6118
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneOOLLabel, OOLDeps);
6119
generateLabelInstruction(cg, TR::InstOpCode::b, node, doneLabel);
6120
}
6121
discontiguousArrayOOL->swapInstructionListsWithCompilation();
6122
//
6123
// end of OOL
6124
6125
cg->decReferenceCount(loadOrStoreChild);
6126
cg->decReferenceCount(baseArrayChild);
6127
cg->decReferenceCount(indexChild);
6128
6129
return NULL;
6130
}
6131
6132
TR::Register *J9::ARM64::TreeEvaluator::directCallEvaluator(TR::Node *node, TR::CodeGenerator *cg)
6133
{
6134
TR::Register *returnRegister;
6135
6136
if (!cg->inlineDirectCall(node, returnRegister))
6137
{
6138
TR::SymbolReference *symRef = node->getSymbolReference();
6139
TR::MethodSymbol *callee = symRef->getSymbol()->castToMethodSymbol();
6140
TR::Linkage *linkage;
6141
6142
if (callee->isJNI() && (node->isPreparedForDirectJNI() || callee->getResolvedMethodSymbol()->canDirectNativeCall()))
6143
{
6144
linkage = cg->getLinkage(TR_J9JNILinkage);
6145
}
6146
else
6147
{
6148
linkage = cg->getLinkage(callee->getLinkageConvention());
6149
}
6150
returnRegister = linkage->buildDirectDispatch(node);
6151
}
6152
6153
return returnRegister;
6154
}
6155
6156