Path: blob/master/runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp
6004 views
/*******************************************************************************1* Copyright (c) 2019, 2022 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/2122#include <cmath>23#include <iterator>24#include "codegen/ARM64Instruction.hpp"25#include "codegen/ARM64JNILinkage.hpp"26#include "codegen/ARM64OutOfLineCodeSection.hpp"27#include "codegen/ARM64PrivateLinkage.hpp"28#include "codegen/CodeGenerator.hpp"29#include "codegen/CodeGenerator_inlines.hpp"30#include "codegen/CodeGeneratorUtils.hpp"31#include "codegen/GenerateInstructions.hpp"32#include "codegen/ARM64Instruction.hpp"33#include "codegen/J9ARM64Snippet.hpp"34#include "codegen/J9WatchedInstanceFieldSnippet.hpp"35#include "codegen/J9WatchedStaticFieldSnippet.hpp"36#include "codegen/OMRCodeGenerator.hpp"37#include "codegen/RegisterDependency.hpp"38#include "codegen/Relocation.hpp"39#include "codegen/TreeEvaluator.hpp"40#include "compile/VirtualGuard.hpp"41#include "il/AutomaticSymbol.hpp"42#include "il/DataTypes.hpp"43#include "il/LabelSymbol.hpp"44#include "il/Node.hpp"45#include "il/Node_inlines.hpp"46#include "il/OMRDataTypes_inlines.hpp"47#include "il/StaticSymbol.hpp"48#include "OMR/Bytes.hpp"4950/*51* J9 ARM64 specific tree evaluator table overrides52*/53extern void TEMPORARY_initJ9ARM64TreeEvaluatorTable(TR::CodeGenerator *cg)54{55TR_TreeEvaluatorFunctionPointer *tet = cg->getTreeEvaluatorTable();5657tet[TR::awrtbar] = TR::TreeEvaluator::awrtbarEvaluator;58tet[TR::awrtbari] = TR::TreeEvaluator::awrtbariEvaluator;59tet[TR::monexit] = TR::TreeEvaluator::monexitEvaluator;60tet[TR::monent] = TR::TreeEvaluator::monentEvaluator;61tet[TR::monexitfence] = TR::TreeEvaluator::monexitfenceEvaluator;62tet[TR::asynccheck] = TR::TreeEvaluator::asynccheckEvaluator;63tet[TR::instanceof] = TR::TreeEvaluator::instanceofEvaluator;64tet[TR::checkcast] = TR::TreeEvaluator::checkcastEvaluator;65tet[TR::checkcastAndNULLCHK] = TR::TreeEvaluator::checkcastAndNULLCHKEvaluator;66tet[TR::New] = TR::TreeEvaluator::newObjectEvaluator;67tet[TR::variableNew] = TR::TreeEvaluator::newObjectEvaluator;68tet[TR::newarray] = TR::TreeEvaluator::newArrayEvaluator;69tet[TR::anewarray] = TR::TreeEvaluator::anewArrayEvaluator;70tet[TR::variableNewArray] = TR::TreeEvaluator::anewArrayEvaluator;71tet[TR::multianewarray] = TR::TreeEvaluator::multianewArrayEvaluator;72tet[TR::arraylength] = TR::TreeEvaluator::arraylengthEvaluator;73tet[TR::ZEROCHK] = TR::TreeEvaluator::ZEROCHKEvaluator;74tet[TR::ResolveCHK] = TR::TreeEvaluator::resolveCHKEvaluator;75tet[TR::DIVCHK] = TR::TreeEvaluator::DIVCHKEvaluator;76tet[TR::BNDCHK] = TR::TreeEvaluator::BNDCHKEvaluator;77// TODO:ARM64: Enable when Implemented: tet[TR::ArrayCopyBNDCHK] = TR::TreeEvaluator::ArrayCopyBNDCHKEvaluator;78tet[TR::BNDCHKwithSpineCHK] = TR::TreeEvaluator::BNDCHKwithSpineCHKEvaluator;79tet[TR::SpineCHK] = TR::TreeEvaluator::BNDCHKwithSpineCHKEvaluator;80tet[TR::ArrayStoreCHK] = TR::TreeEvaluator::ArrayStoreCHKEvaluator;81tet[TR::ArrayCHK] = TR::TreeEvaluator::ArrayCHKEvaluator;82tet[TR::MethodEnterHook] = TR::TreeEvaluator::conditionalHelperEvaluator;83tet[TR::MethodExitHook] = TR::TreeEvaluator::conditionalHelperEvaluator;84tet[TR::allocationFence] = TR::TreeEvaluator::flushEvaluator;85tet[TR::loadFence] = TR::TreeEvaluator::flushEvaluator;86tet[TR::storeFence] = TR::TreeEvaluator::flushEvaluator;87tet[TR::fullFence] = TR::TreeEvaluator::flushEvaluator;88tet[TR::frem] = TR::TreeEvaluator::fremEvaluator;89tet[TR::drem] = TR::TreeEvaluator::dremEvaluator;90tet[TR::NULLCHK] = TR::TreeEvaluator::NULLCHKEvaluator;91tet[TR::ResolveAndNULLCHK] = TR::TreeEvaluator::resolveAndNULLCHKEvaluator;92}9394static TR::InstOpCode::Mnemonic95getStoreOpCodeFromDataType(TR::CodeGenerator *cg, TR::DataType dt, int32_t elementSize, bool useIdxReg);9697void VMgenerateCatchBlockBBStartPrologue(TR::Node *node, TR::Instruction *fenceInstruction, TR::CodeGenerator *cg)98{99TR::Compilation *comp = cg->comp();100TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());101102TR::Block *block = node->getBlock();103104if (fej9->shouldPerformEDO(block, comp))105{106TR_UNIMPLEMENTED();107}108}109110/**111* @brief Generates instructions to load j9class from object pointer112*113* @param[in] node: node114* @param[in] j9classReg: register j9class value is assigned to115* @param[in] objReg: register holding object pointer116* @param[in] cg: code generator117*/118static void119generateLoadJ9Class(TR::Node *node, TR::Register *j9classReg, TR::Register *objReg, TR::CodeGenerator *cg)120{121generateTrg1MemInstruction(cg, TR::Compiler->om.compressObjectReferences() ? TR::InstOpCode::ldrimmw : TR::InstOpCode::ldrimmx, node, j9classReg,122TR::MemoryReference::createWithDisplacement(cg, objReg, static_cast<int32_t>(TR::Compiler->om.offsetOfObjectVftField())));123TR::TreeEvaluator::generateVFTMaskInstruction(cg, node, j9classReg);124}125126/*127* Generate the reporting field access helper call with required arguments128*129* jitReportInstanceFieldRead130* arg1 pointer to static data block131* arg2 object being read132*133* jitReportInstanceFieldWrite134* arg1 pointer to static data block135* arg2 object being written to136* arg3 pointer to value being written137*138* jitReportStaticFieldRead139* arg1 pointer to static data block140*141* jitReportStaticFieldWrite142* arg1 pointer to static data block143* arg2 pointer to value being written144*145*/146void generateReportFieldAccessOutlinedInstructions(TR::Node *node, TR::LabelSymbol *endLabel, TR::Register *dataBlockReg, bool isWrite, TR::CodeGenerator *cg, TR::Register *sideEffectRegister, TR::Register *valueReg)147{148TR::Compilation *comp = cg->comp();149bool isInstanceField = node->getOpCode().isIndirect();150151TR_RuntimeHelper helperIndex = isWrite ? (isInstanceField ? TR_jitReportInstanceFieldWrite: TR_jitReportStaticFieldWrite):152(isInstanceField ? TR_jitReportInstanceFieldRead: TR_jitReportStaticFieldRead);153154TR::Linkage *linkage = cg->getLinkage(runtimeHelperLinkage(helperIndex));155auto linkageProperties = linkage->getProperties();156TR::Register *valueReferenceReg = NULL;157158// Figure out the number of dependencies needed to make the VM Helper call.159// numOfConditions is equal to the number of arguments required by the VM Helper.160uint8_t numOfConditions = 1; // All helpers need at least one parameter.161if (isWrite)162{163numOfConditions += 2;164}165if (isInstanceField)166{167numOfConditions += 1;168}169170TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory())TR::RegisterDependencyConditions(numOfConditions, numOfConditions, cg->trMemory());171172/*173* For reporting field write, reference to the valueNode is needed so we need to store174* the value on to a stack location first and pass the stack location address as an arguement175* to the VM helper176*/177if (isWrite)178{179TR::DataType dt = node->getDataType();180int32_t elementSize = TR::Symbol::convertTypeToSize(dt);181TR::InstOpCode::Mnemonic storeOp = getStoreOpCodeFromDataType(cg, dt, elementSize, false);182TR::SymbolReference *location = cg->allocateLocalTemp(dt);183TR::MemoryReference *valueMR = TR::MemoryReference::createWithSymRef(cg, node, location);184185generateMemSrc1Instruction(cg, storeOp, node, valueMR, valueReg);186deps->addPreCondition(valueReg, TR::RealRegister::NoReg);187deps->addPostCondition(valueReg, TR::RealRegister::NoReg);188valueReferenceReg = cg->allocateRegister();189190// store the stack location into a register191generateTrg1MemInstruction(cg, TR::InstOpCode::addimmx, node, valueReferenceReg, valueMR);192}193194// First Argument - DataBlock195deps->addPreCondition(dataBlockReg, TR::RealRegister::x0);196deps->addPostCondition(dataBlockReg, TR::RealRegister::x0);197198// Second Argument199if (isInstanceField)200{201deps->addPreCondition(sideEffectRegister, TR::RealRegister::x1);202deps->addPostCondition(sideEffectRegister, TR::RealRegister::x1);203}204else if (isWrite)205{206deps->addPreCondition(valueReferenceReg, TR::RealRegister::x1);207deps->addPostCondition(valueReferenceReg, TR::RealRegister::x1);208}209210// Third Argument211if (isInstanceField && isWrite)212{213deps->addPreCondition(valueReferenceReg, TR::RealRegister::x2);214deps->addPostCondition(valueReferenceReg, TR::RealRegister::x2);215}216217// Generate branch instruction to jump into helper218TR::SymbolReference *helperSym = comp->getSymRefTab()->findOrCreateRuntimeHelper(helperIndex);219TR::Instruction *call = generateImmSymInstruction(cg, TR::InstOpCode::bl, node, reinterpret_cast<uintptr_t>(helperSym->getMethodAddress()), deps, helperSym, NULL);220call->ARM64NeedsGCMap(cg, linkageProperties.getPreservedRegisterMapForGC());221cg->machine()->setLinkRegisterKilled(true);222223generateLabelInstruction(cg, TR::InstOpCode::b, node, endLabel);224225if (valueReferenceReg != NULL)226{227cg->stopUsingRegister(valueReferenceReg);228}229}230231void232J9::ARM64::TreeEvaluator::generateTestAndReportFieldWatchInstructions(TR::CodeGenerator *cg, TR::Node *node, TR::Snippet *dataSnippet, bool isWrite, TR::Register *sideEffectRegister, TR::Register *valueReg, TR::Register *dataSnippetRegister)233{234bool isInstanceField = node->getOpCode().isIndirect();235TR_J9VMBase *fej9 = reinterpret_cast<TR_J9VMBase *>(cg->fe());236237TR::Register *scratchReg = cg->allocateRegister();238239TR::LabelSymbol* startLabel = generateLabelSymbol(cg);240TR::LabelSymbol* endLabel = generateLabelSymbol(cg);241TR::LabelSymbol* fieldReportLabel = generateLabelSymbol(cg);242startLabel->setStartInternalControlFlow();243endLabel->setEndInternalControlFlow();244245generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel);246generateTrg1ImmSymInstruction(cg, TR::InstOpCode::adr, node, dataSnippetRegister, 0, dataSnippet->getSnippetLabel());247248TR_ARM64OutOfLineCodeSection *generateReportOOL = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(fieldReportLabel, endLabel, cg);249cg->getARM64OutOfLineCodeSectionList().push_front(generateReportOOL);250251TR::Register *fieldClassReg = NULL;252bool isSideEffectReg = false;253// Load fieldClass254if (isInstanceField)255{256fieldClassReg = cg->allocateRegister();257generateLoadJ9Class(node, fieldClassReg, sideEffectRegister, cg);258}259else if (!(node->getSymbolReference()->isUnresolved()))260{261fieldClassReg = cg->allocateRegister();262if (cg->needClassAndMethodPointerRelocations())263{264// If this is an AOT compile, we generate instructions to load the fieldClass directly from the snippet because the fieldClass will be invalid265// if we load using the dataSnippet's helper query at compile time.266TR::MemoryReference *fieldClassMemRef = TR::MemoryReference::createWithDisplacement(cg, dataSnippetRegister, offsetof(J9JITWatchedStaticFieldData, fieldClass));267generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, fieldClassReg, fieldClassMemRef);268}269else270{271// For non-AOT compiles we don't need to use sideEffectRegister here as the class information is available to us at compile time.272J9Class * fieldClass = static_cast<TR::J9WatchedStaticFieldSnippet *>(dataSnippet)->getFieldClass();273loadAddressConstant(cg, node, reinterpret_cast<intptr_t>(fieldClass), fieldClassReg);274}275}276else277{278// Unresolved279if (isWrite)280{281fieldClassReg = cg->allocateRegister();282generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, fieldClassReg,283TR::MemoryReference::createWithDisplacement(cg, sideEffectRegister, fej9->getOffsetOfClassFromJavaLangClassField()));284}285else286{287isSideEffectReg = true;288fieldClassReg = sideEffectRegister;289}290}291292TR::MemoryReference *classFlagsMemRef = TR::MemoryReference::createWithDisplacement(cg, fieldClassReg, static_cast<int32_t>(fej9->getOffsetOfClassFlags()));293294generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, scratchReg, classFlagsMemRef);295static_assert(J9ClassHasWatchedFields == 0x100, "We assume that J9ClassHasWatchedFields is 0x100");296generateTestImmInstruction(cg, node, scratchReg, 0x600); // 0x600 is immr:imms for 0x100297generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, fieldReportLabel, TR::CC_NE);298299generateReportOOL->swapInstructionListsWithCompilation();300301generateLabelInstruction(cg, TR::InstOpCode::label, node, fieldReportLabel);302generateReportFieldAccessOutlinedInstructions(node, endLabel, dataSnippetRegister, isWrite, cg, sideEffectRegister, valueReg);303304generateReportOOL->swapInstructionListsWithCompilation();305306generateLabelInstruction(cg, TR::InstOpCode::label, node, endLabel);307308cg->stopUsingRegister(scratchReg);309if (!isSideEffectReg)310cg->stopUsingRegister(fieldClassReg);311312}313314void315J9::ARM64::TreeEvaluator::generateFillInDataBlockSequenceForUnresolvedField(TR::CodeGenerator *cg, TR::Node *node, TR::Snippet *dataSnippet, bool isWrite, TR::Register *sideEffectRegister, TR::Register *dataSnippetRegister)316{317TR::Compilation *comp = cg->comp();318TR::SymbolReference *symRef = node->getSymbolReference();319bool is64Bit = comp->target().is64Bit();320bool isStatic = symRef->getSymbol()->getKind() == TR::Symbol::IsStatic;321322TR_RuntimeHelper helperIndex = isWrite? (isStatic ? TR_jitResolveStaticFieldSetterDirect: TR_jitResolveFieldSetterDirect):323(isStatic ? TR_jitResolveStaticFieldDirect: TR_jitResolveFieldDirect);324325TR::Linkage *linkage = cg->getLinkage(runtimeHelperLinkage(helperIndex));326auto linkageProperties = linkage->getProperties();327intptr_t offsetInDataBlock = isStatic ? offsetof(J9JITWatchedStaticFieldData, fieldAddress): offsetof(J9JITWatchedInstanceFieldData, offset);328329330TR::LabelSymbol* startLabel = generateLabelSymbol(cg);331TR::LabelSymbol* endLabel = generateLabelSymbol(cg);332TR::LabelSymbol* unresolvedLabel = generateLabelSymbol(cg);333startLabel->setStartInternalControlFlow();334endLabel->setEndInternalControlFlow();335336TR::Register *cpIndexReg = cg->allocateRegister();337TR::Register *resultReg = cg->allocateRegister();338TR::Register *scratchReg = cg->allocateRegister();339340// Setup Dependencies341// Requires two argument registers: resultReg and cpIndexReg.342uint8_t numOfConditions = 2;343TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(numOfConditions, numOfConditions, cg->trMemory());344345TR_ARM64OutOfLineCodeSection *generateReportOOL = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(unresolvedLabel, endLabel, cg);346cg->getARM64OutOfLineCodeSectionList().push_front(generateReportOOL);347348generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel);349350// Compare J9JITWatchedInstanceFieldData.offset or J9JITWatchedStaticFieldData.fieldAddress (Depending on Instance or Static)351// Load value from dataSnippet + offsetInDataBlock then compare and branch352generateTrg1ImmSymInstruction(cg, TR::InstOpCode::adr, node, dataSnippetRegister, 0, dataSnippet->getSnippetLabel());353TR::MemoryReference *fieldMemRef = TR::MemoryReference::createWithDisplacement(cg, dataSnippetRegister, offsetInDataBlock);354generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, scratchReg, fieldMemRef);355generateCompareImmInstruction(cg, node, scratchReg, -1, true);356generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, unresolvedLabel, TR::CC_EQ);357358generateReportOOL->swapInstructionListsWithCompilation();359360generateLabelInstruction(cg, TR::InstOpCode::label, node, unresolvedLabel);361362bool isSideEffectReg = false;363if (isStatic)364{365// Fill in J9JITWatchedStaticFieldData.fieldClass366TR::Register *fieldClassReg = NULL;367368if (isWrite)369{370fieldClassReg = cg->allocateRegister();371generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, fieldClassReg,372TR::MemoryReference::createWithDisplacement(cg, sideEffectRegister, static_cast<int32_t>(comp->fej9()->getOffsetOfClassFromJavaLangClassField())));373}374else375{376isSideEffectReg = true;377fieldClassReg = sideEffectRegister;378}379TR::MemoryReference *memRef = TR::MemoryReference::createWithDisplacement(cg, dataSnippetRegister, offsetof(J9JITWatchedStaticFieldData, fieldClass));380381// Store value to fieldClass member of the snippet382generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node, memRef, fieldClassReg);383384if (!isSideEffectReg)385cg->stopUsingRegister(fieldClassReg);386}387388TR::ResolvedMethodSymbol *methodSymbol = node->getByteCodeInfo().getCallerIndex() == -1 ? comp->getMethodSymbol(): comp->getInlinedResolvedMethodSymbol(node->getByteCodeInfo().getCallerIndex());389390uintptr_t constantPool = reinterpret_cast<uintptr_t>(methodSymbol->getResolvedMethod()->constantPool());391if (cg->needClassAndMethodPointerRelocations())392{393loadAddressConstantInSnippet(cg, node, constantPool, resultReg, TR_ConstantPool);394}395else396{397loadAddressConstant(cg, node, constantPool, resultReg);398}399loadConstant32(cg, node, symRef->getCPIndex(), cpIndexReg);400401// cpAddress is the first argument of VMHelper402deps->addPreCondition(resultReg, TR::RealRegister::x0);403deps->addPostCondition(resultReg, TR::RealRegister::x0);404// cpIndexReg is the second argument405deps->addPreCondition(cpIndexReg, TR::RealRegister::x1);406deps->addPostCondition(cpIndexReg, TR::RealRegister::x1);407408// Generate helper address and branch409TR::SymbolReference *helperSym = comp->getSymRefTab()->findOrCreateRuntimeHelper(helperIndex);410TR::Instruction *call = generateImmSymInstruction(cg, TR::InstOpCode::bl, node, reinterpret_cast<uintptr_t>(helperSym->getMethodAddress()), deps, helperSym, NULL);411call->ARM64NeedsGCMap(cg, linkageProperties.getPreservedRegisterMapForGC());412cg->machine()->setLinkRegisterKilled(true);413414/*415* For instance field offset, the result returned by the vmhelper includes header size.416* subtract the header size to get the offset needed by field watch helpers417*/418if (!isStatic)419{420generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::subimmx, node, resultReg, resultReg, TR::Compiler->om.objectHeaderSizeInBytes());421}422423// store result into J9JITWatchedStaticFieldData.fieldAddress / J9JITWatchedInstanceFieldData.offset424TR::MemoryReference *dataRef = TR::MemoryReference::createWithDisplacement(cg, dataSnippetRegister, offsetInDataBlock);425generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node, dataRef, resultReg);426427generateLabelInstruction(cg, TR::InstOpCode::b, node, endLabel);428429generateReportOOL->swapInstructionListsWithCompilation();430431generateLabelInstruction(cg, TR::InstOpCode::label, node, endLabel);432433cg->stopUsingRegister(scratchReg);434cg->stopUsingRegister(cpIndexReg);435cg->stopUsingRegister(resultReg);436}437438static TR::Register *439generateSoftwareReadBarrier(TR::Node *node, TR::CodeGenerator *cg, bool isArdbari)440{441#ifndef OMR_GC_CONCURRENT_SCAVENGER442TR_ASSERT_FATAL(false, "Concurrent Scavenger not supported.");443#else444TR::Compilation *comp = cg->comp();445TR::MemoryReference *tempMR = NULL;446447TR::Register *tempReg;448TR::Register *locationReg = cg->allocateRegister();449TR::Register *evacuateReg = cg->allocateRegister();450TR::Register *x0Reg = cg->allocateRegister();451TR::Register *vmThreadReg = cg->getMethodMetaDataRegister();452453if (!node->getSymbolReference()->getSymbol()->isInternalPointer())454{455if (node->getSymbolReference()->getSymbol()->isNotCollected())456tempReg = cg->allocateRegister();457else458tempReg = cg->allocateCollectedReferenceRegister();459}460else461{462tempReg = cg->allocateRegister();463tempReg->setPinningArrayPointer(node->getSymbolReference()->getSymbol()->castToInternalPointerAutoSymbol()->getPinningArrayPointer());464tempReg->setContainsInternalPointer();465}466467TR::LabelSymbol *startLabel = generateLabelSymbol(cg);468TR::LabelSymbol *endLabel = generateLabelSymbol(cg);469startLabel->setStartInternalControlFlow();470endLabel->setEndInternalControlFlow();471472TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg->trMemory());473deps->addPostCondition(tempReg, TR::RealRegister::NoReg);474deps->addPostCondition(locationReg, TR::RealRegister::x1); // TR_softwareReadBarrier helper needs this in x1.475deps->addPostCondition(evacuateReg, TR::RealRegister::NoReg);476deps->addPostCondition(x0Reg, TR::RealRegister::x0);477478node->setRegister(tempReg);479480tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, node);481if (tempMR->getUnresolvedSnippet() != NULL)482{483generateTrg1MemInstruction(cg, TR::InstOpCode::addx, node, locationReg, tempMR);484}485else486{487if (tempMR->useIndexedForm())488generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, locationReg, tempMR->getBaseRegister(), tempMR->getIndexRegister());489else490generateTrg1MemInstruction(cg, TR::InstOpCode::addimmx, node, locationReg, tempMR);491}492493TR::InstOpCode::Mnemonic loadOp = isArdbari ? TR::InstOpCode::ldrimmx : TR::InstOpCode::ldrimmw;494495auto faultingInstruction = generateTrg1MemInstruction(cg, loadOp, node, tempReg, TR::MemoryReference::createWithDisplacement(cg, locationReg, 0));496497// InstructonDelegate::setupImplicitNullPointerException checks if the memory reference uses nullcheck reference register.498// In this case, nullcheck reference register is base register of tempMR, but the memory reference of load instruction does not use it,499// thus we need to explicitly set implicit exception point here.500if (cg->getHasResumableTrapHandler() && cg->getCurrentEvaluationTreeTop()->getNode()->getOpCode().isNullCheck())501{502if (cg->getImplicitExceptionPoint() == NULL)503{504if (comp->getOption(TR_TraceCG))505{506traceMsg(comp, "Instruction %p throws an implicit NPE, node: %p NPE node: %p\n", faultingInstruction, node, node->getFirstChild());507}508cg->setImplicitExceptionPoint(faultingInstruction);509}510}511512if (isArdbari && node->getSymbolReference() == comp->getSymRefTab()->findVftSymbolRef())513TR::TreeEvaluator::generateVFTMaskInstruction(cg, node, tempReg);514515generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel);516517generateTrg1MemInstruction(cg, loadOp, node, evacuateReg,518TR::MemoryReference::createWithDisplacement(cg, vmThreadReg, comp->fej9()->thisThreadGetEvacuateBaseAddressOffset()));519generateCompareInstruction(cg, node, tempReg, evacuateReg, isArdbari); // 64-bit compare in ardbari520generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, endLabel, TR::CC_LT);521522generateTrg1MemInstruction(cg, loadOp, node, evacuateReg,523TR::MemoryReference::createWithDisplacement(cg, vmThreadReg, comp->fej9()->thisThreadGetEvacuateTopAddressOffset()));524generateCompareInstruction(cg, node, tempReg, evacuateReg, isArdbari); // 64-bit compare in ardbari525generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, endLabel, TR::CC_GT);526527// TR_softwareReadBarrier helper expects the vmThread in x0.528generateMovInstruction(cg, node, x0Reg, vmThreadReg);529530TR::SymbolReference *helperSym = comp->getSymRefTab()->findOrCreateRuntimeHelper(TR_softwareReadBarrier);531generateImmSymInstruction(cg, TR::InstOpCode::bl, node, (uintptr_t)helperSym->getMethodAddress(), deps, helperSym, NULL);532533generateTrg1MemInstruction(cg, loadOp, node, tempReg, TR::MemoryReference::createWithDisplacement(cg, locationReg, 0));534535if (isArdbari && node->getSymbolReference() == comp->getSymRefTab()->findVftSymbolRef())536TR::TreeEvaluator::generateVFTMaskInstruction(cg, node, tempReg);537538generateLabelInstruction(cg, TR::InstOpCode::label, node, endLabel, deps);539540bool needSync = (node->getSymbolReference()->getSymbol()->isSyncVolatile() && comp->target().isSMP());541if (needSync)542{543// Issue an Acquire barrier after volatile load544// dmb ishld545generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0x9);546}547548tempMR->decNodeReferenceCounts(cg);549550cg->stopUsingRegister(evacuateReg);551cg->stopUsingRegister(locationReg);552cg->stopUsingRegister(x0Reg);553554cg->machine()->setLinkRegisterKilled(true);555556return tempReg;557#endif // OMR_GC_CONCURRENT_SCAVENGER558}559560TR::Register *561J9::ARM64::TreeEvaluator::irdbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)562{563// For rdbar and wrtbar nodes we first evaluate the children we need to564// handle the side effects. Then we delegate the evaluation of the remaining565// children and the load/store operation to the appropriate load/store evaluator.566TR::Node *sideEffectNode = node->getFirstChild();567TR::Register * sideEffectRegister = cg->evaluate(sideEffectNode);568if (cg->comp()->getOption(TR_EnableFieldWatch))569{570TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);571}572cg->decReferenceCount(sideEffectNode);573return TR::TreeEvaluator::iloadEvaluator(node, cg);574}575576TR::Register *577J9::ARM64::TreeEvaluator::irdbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)578{579// For rdbar and wrtbar nodes we first evaluate the children we need to580// handle the side effects. Then we delegate the evaluation of the remaining581// children and the load/store operation to the appropriate load/store evaluator.582TR::Node *sideEffectNode = node->getFirstChild();583TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);584if (cg->comp()->getOption(TR_EnableFieldWatch))585{586TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);587}588589// Note: For indirect rdbar nodes, the first child (sideEffectNode) is also used by the590// load evaluator. The load evaluator will also evaluate+decrement it. In order to avoid double591// decrementing the node we skip doing it here and let the load evaluator do it.592if (TR::Compiler->om.readBarrierType() != gc_modron_readbar_none &&593cg->comp()->useCompressedPointers() &&594(node->getOpCode().hasSymbolReference() &&595node->getSymbolReference()->getSymbol()->getDataType() == TR::Address))596{597return generateSoftwareReadBarrier(node, cg, false);598}599else600return TR::TreeEvaluator::iloadEvaluator(node, cg);601}602603TR::Register *604J9::ARM64::TreeEvaluator::ardbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)605{606// For rdbar and wrtbar nodes we first evaluate the children we need to607// handle the side effects. Then we delegate the evaluation of the remaining608// children and the load/store operation to the appropriate load/store evaluator.609TR::Node *sideEffectNode = node->getFirstChild();610TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);611if (cg->comp()->getOption(TR_EnableFieldWatch))612{613TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);614}615cg->decReferenceCount(sideEffectNode);616return TR::TreeEvaluator::aloadEvaluator(node, cg);617}618619TR::Register *620J9::ARM64::TreeEvaluator::ardbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)621{622// For rdbar and wrtbar nodes we first evaluate the children we need to623// handle the side effects. Then we delegate the evaluation of the remaining624// children and the load/store operation to the appropriate load/store evaluator.625TR::Register *sideEffectRegister = cg->evaluate(node->getFirstChild());626if (cg->comp()->getOption(TR_EnableFieldWatch))627{628TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);629}630// Note: For indirect rdbar nodes, the first child (sideEffectNode) is also used by the631// load evaluator. The load evaluator will also evaluate+decrement it. In order to avoid double632// decrementing the node we skip doing it here and let the load evaluator do it.633if (TR::Compiler->om.readBarrierType() == gc_modron_readbar_none)634return TR::TreeEvaluator::aloadEvaluator(node, cg);635else636return generateSoftwareReadBarrier(node, cg, true);637}638639TR::Register *640J9::ARM64::TreeEvaluator::fwrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)641{642// For rdbar and wrtbar nodes we first evaluate the children we need to643// handle the side effects. Then we delegate the evaluation of the remaining644// children and the load/store operation to the appropriate load/store evaluator.645TR::Node *sideEffectNode = node->getSecondChild();646TR::Register *valueReg = cg->evaluate(node->getFirstChild());647TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);648if (cg->comp()->getOption(TR_EnableFieldWatch))649{650TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);651}652// Note: The reference count for valueReg's node is not decremented here because the653// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here654// to avoid double decrementing.655cg->decReferenceCount(sideEffectNode);656return TR::TreeEvaluator::fstoreEvaluator(node, cg);657}658659TR::Register *660J9::ARM64::TreeEvaluator::fwrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)661{662// For rdbar and wrtbar nodes we first evaluate the children we need to663// handle the side effects. Then we delegate the evaluation of the remaining664// children and the load/store operation to the appropriate load/store evaluator.665TR::Node *sideEffectNode = node->getThirdChild();666TR::Register *valueReg = cg->evaluate(node->getSecondChild());667TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);668if (cg->comp()->getOption(TR_EnableFieldWatch))669{670TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);671}672// Note: The reference count for valueReg's node is not decremented here because the673// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here674// to avoid double decrementing.675cg->decReferenceCount(sideEffectNode);676return TR::TreeEvaluator::fstoreEvaluator(node, cg);677}678679TR::Register *680J9::ARM64::TreeEvaluator::dwrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)681{682// For rdbar and wrtbar nodes we first evaluate the children we need to683// handle the side effects. Then we delegate the evaluation of the remaining684// children and the load/store operation to the appropriate load/store evaluator.685TR::Node *sideEffectNode = node->getSecondChild();686TR::Register *valueReg = cg->evaluate(node->getFirstChild());687TR::Register *sideEffectRegister = cg->evaluate(node->getSecondChild());688if (cg->comp()->getOption(TR_EnableFieldWatch))689{690TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);691}692// Note: The reference count for valueReg's node is not decremented here because the693// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here694// to avoid double decrementing.695cg->decReferenceCount(sideEffectNode);696return TR::TreeEvaluator::dstoreEvaluator(node, cg);697}698699TR::Register *700J9::ARM64::TreeEvaluator::dwrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)701{702// For rdbar and wrtbar nodes we first evaluate the children we need to703// handle the side effects. Then we delegate the evaluation of the remaining704// children and the load/store operation to the appropriate load/store evaluator.705TR::Node *sideEffectNode = node->getThirdChild();706TR::Register *valueReg = cg->evaluate(node->getSecondChild());707TR::Register *sideEffectRegister = cg->evaluate(node->getThirdChild());708if (cg->comp()->getOption(TR_EnableFieldWatch))709{710TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);711}712// The Value Node, or the second child is not decremented here. The store evaluator also uses it, and decrements it.713cg->decReferenceCount(sideEffectNode);714return TR::TreeEvaluator::dstoreEvaluator(node, cg);715}716717/**718* @brief Generates inlined code for card marking and branch to wrbar helper719* @details720* This method generates code for write barrier for generational GC policies.721* It generates inlined code for722* - checking whether the destination object is tenured723* - checking if concurrent mark thread is active (for gc_modron_wrtbar_cardmark_and_oldcheck)724* - card marking (for gc_modron_wrtbar_cardmark_and_oldcheck)725* - checking if source object is in new space726* - checking if remembered bit is set in object header727*728* @param node: node729* @param dstReg: register holding owning object730* @param srcReg: register holding source object731* @param srm: scratch register manager732* @param doneLabel: done label733* @param wbRef: symbol reference for write barrier helper734* @param cg: code generator735*/736static void737VMnonNullSrcWrtBarCardCheckEvaluator(738TR::Node *node,739TR::Register *dstReg,740TR::Register *srcReg,741TR_ARM64ScratchRegisterManager *srm,742TR::LabelSymbol *doneLabel,743TR::SymbolReference *wbRef ,744TR::CodeGenerator *cg)745{746TR::Compilation *comp = cg->comp();747TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());748auto gcMode = TR::Compiler->om.writeBarrierType();749bool doWrtBar = (gcMode == gc_modron_wrtbar_oldcheck || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_always);750bool doCrdMrk = (gcMode == gc_modron_wrtbar_cardmark_and_oldcheck);751//We need to do a runtime check on cardmarking for gencon policy if our dstReg is in tenure752753if (gcMode != gc_modron_wrtbar_always)754{755/*756* Generating code checking whether an object is tenured757*758* movzx temp1Reg, #heapBase759* subx temp1Reg, dstReg, temp1Reg760* movzx temp2Reg, #heapSize761* cmpx temp1Reg, temp2Reg762* b.cs doneLabel763*764*/765TR::Register *temp1Reg = srm->findOrCreateScratchRegister();766TR::Register *temp2Reg = srm->findOrCreateScratchRegister();767TR::Register *metaReg = cg->getMethodMetaDataRegister();768769cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:010VMnonNullSrcWrtBarCardCheckEvaluator"), *srm);770771if (comp->getOptions()->isVariableHeapBaseForBarrierRange0() || comp->compileRelocatableCode())772{773generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp1Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapBaseForBarrierRange0)));774}775else776{777uintptr_t heapBase = comp->getOptions()->getHeapBaseForBarrierRange0();778loadAddressConstant(cg, node, heapBase, temp1Reg);779}780generateTrg1Src2Instruction(cg, TR::InstOpCode::subx, node, temp1Reg, dstReg, temp1Reg);781782if (comp->getOptions()->isVariableHeapSizeForBarrierRange0() || comp->compileRelocatableCode())783{784generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp2Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapSizeForBarrierRange0)));785}786else787{788uintptr_t heapSize = comp->getOptions()->getHeapSizeForBarrierRange0();789loadConstant64(cg, node, heapSize, temp2Reg);790}791generateCompareInstruction(cg, node, temp1Reg, temp2Reg, true);792generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_CS);793cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:010VMnonNullSrcWrtBarCardCheckEvaluator:01oldCheckDone"), *srm);794795TR::LabelSymbol *noChkLabel = generateLabelSymbol(cg);796if (doCrdMrk)797{798/*799* Check if J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE flag is set.800* If not, skip card dirtying.801*802* ldrimmx temp2Reg, [vmThread, #privateFlag]803* tbz temp2Reg, #20, crdMrkDoneLabel804* ldrimmx temp2Reg, [vmThread, #activeCardTableBase]805* addx temp2Reg, temp2Reg, temp1Reg, LSR #card_size_shift ; At this moment, temp1Reg contains (dstReg - #heapBase)806* movzx temp1Reg, 1807* strbimm temp1Reg, [temp2Reg, 0]808*809* crdMrkDoneLabel:810*/811cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:010VMnonNullSrcWrtBarCardCheckEvaluator:02cardmark"), *srm);812813static_assert(J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE == (1 << 20), "We assume that J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE is 0x100000");814TR::LabelSymbol *crdMrkDoneLabel = generateLabelSymbol(cg);815generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp2Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, privateFlags)));816generateTestBitBranchInstruction(cg, TR::InstOpCode::tbz, node, temp2Reg, 20, crdMrkDoneLabel);817818cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:010VMnonNullSrcWrtBarCardCheckEvaluator:03markThreadActiveCheckDone"), *srm);819820uintptr_t card_size_shift = trailingZeroes((uint64_t)comp->getOptions()->getGcCardSize());821if (comp->getOptions()->isVariableActiveCardTableBase() || comp->compileRelocatableCode())822{823generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp2Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, activeCardTableBase)));824}825else826{827uintptr_t activeCardTableBase = comp->getOptions()->getActiveCardTableBase();828loadAddressConstant(cg, node, activeCardTableBase, temp2Reg);829}830generateTrg1Src2ShiftedInstruction(cg, TR::InstOpCode::addx, node, temp2Reg, temp2Reg, temp1Reg, TR::SH_LSR, card_size_shift);831generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, temp1Reg, 1);832generateMemSrc1Instruction(cg, TR::InstOpCode::strbimm, node, TR::MemoryReference::createWithDisplacement(cg, temp2Reg, 0), temp1Reg);833834generateLabelInstruction(cg, TR::InstOpCode::label, node, crdMrkDoneLabel);835836cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:010VMnonNullSrcWrtBarCardCheckEvaluator:04cardmarkDone"), *srm);837}838839/*840* Generating code checking whether the src is in new space841*842* movzx temp1Reg, #heapBase843* subx temp1Reg, srcReg, temp1Reg844* movzx temp2Reg, #heapSize845* cmpx temp1Reg, temp2Reg846* b.cc doneLabel847*/848if (comp->getOptions()->isVariableHeapBaseForBarrierRange0() || comp->compileRelocatableCode())849{850generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp1Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapBaseForBarrierRange0)));851}852else853{854uintptr_t heapBase = comp->getOptions()->getHeapBaseForBarrierRange0();855loadAddressConstant(cg, node, heapBase, temp1Reg);856}857generateTrg1Src2Instruction(cg, TR::InstOpCode::subx, node, temp1Reg, srcReg, temp1Reg);858859// If doCrdMrk is false, then temp2Reg still contains heapSize860if (doCrdMrk)861{862if (comp->getOptions()->isVariableHeapSizeForBarrierRange0() || comp->compileRelocatableCode())863{864generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp2Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapSizeForBarrierRange0)));865}866else867{868uintptr_t heapSize = comp->getOptions()->getHeapSizeForBarrierRange0();869loadConstant64(cg, node, heapSize, temp2Reg);870}871}872873generateCompareInstruction(cg, node, temp1Reg, temp2Reg, true);874generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_CC);875876cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:010VMnonNullSrcWrtBarCardCheckEvaluator:05sourceCheckDone"), *srm);877878/*879* Generating code checking whether the remembered bit is set880*881* ldrimmx temp1Reg, [dstReg, #offsetOfHeaderFlags]882* tstimmw temp1Reg, J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST883* b.ne doneLabel884* bl jitWriteBarrierGenerational885*/886static_assert(J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST == 0xf0, "We assume that J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST is 0xf0");887generateTrg1MemInstruction(cg, (TR::Compiler->om.compressObjectReferences() ? TR::InstOpCode::ldrimmw : TR::InstOpCode::ldrimmx), node, temp1Reg, TR::MemoryReference::createWithDisplacement(cg, dstReg, TR::Compiler->om.offsetOfHeaderFlags()));888generateTestImmInstruction(cg, node, temp1Reg, 0x703, false); // 0x703 is immr:imms for 0xf0889generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_NE);890891cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:010VMnonNullSrcWrtBarCardCheckEvaluator:06rememberedBitCheckDone"), *srm);892893srm->reclaimScratchRegister(temp1Reg);894srm->reclaimScratchRegister(temp2Reg);895}896generateImmSymInstruction(cg, TR::InstOpCode::bl, node, reinterpret_cast<uintptr_t>(wbRef->getMethodAddress()), NULL, wbRef, NULL);897cg->machine()->setLinkRegisterKilled(true);898}899900/**901* @brief Generates inlined code for card marking902* @details903* This method generates code for write barrier for optavgpause/balanced GC policies.904* It generates inlined code for905* - checking if concurrent mark thread is active (for optavgpause)906* - checking whether the destination object is in heap907* - card marking908*909* @param node: node910* @param dstReg: register holding owning object911* @param srm: scratch register manager912* @param doneLabel: done label913* @param cg: code generator914*/915static void916VMCardCheckEvaluator(917TR::Node *node,918TR::Register *dstReg,919TR_ARM64ScratchRegisterManager *srm,920TR::LabelSymbol *doneLabel,921TR::CodeGenerator *cg)922{923TR::Compilation *comp = cg->comp();924925auto gcMode = TR::Compiler->om.writeBarrierType();926TR::Register *temp1Reg = srm->findOrCreateScratchRegister();927TR::Register *metaReg = cg->getMethodMetaDataRegister();928929cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:020VMCardCheckEvaluator"), *srm);930// If gcpolicy is balanced, we must always do card marking931if (gcMode != gc_modron_wrtbar_cardmark_incremental)932{933/*934* Check if J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE flag is set.935* If not, skip card dirtying.936*937* ldrimmx temp1Reg, [vmThread, #privateFlag]938* tbz temp1Reg, #20, doneLabel939*/940941static_assert(J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE == (1 << 20), "We assume that J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE is 0x100000");942generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp1Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, privateFlags)));943generateTestBitBranchInstruction(cg, TR::InstOpCode::tbz, node, temp1Reg, 20, doneLabel);944945cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:020VMCardCheckEvaluator:01markThreadActiveCheckDone"), *srm);946}947948TR::Register *temp2Reg = srm->findOrCreateScratchRegister();949/*950* Generating code checking whether an object is in heap951*952* movzx temp1Reg, #heapBase953* subx temp1Reg, dstReg, temp1Reg954* movzx temp2Reg, #heapSize955* cmpx temp1Reg, temp2Reg956* b.cs doneLabel957*958*/959960cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:020VMCardCheckEvaluator:020heapCheck"), *srm);961962if (comp->getOptions()->isVariableHeapBaseForBarrierRange0() || comp->compileRelocatableCode())963{964generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp1Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapBaseForBarrierRange0)));965}966else967{968uintptr_t heapBase = comp->getOptions()->getHeapBaseForBarrierRange0();969loadAddressConstant(cg, node, heapBase, temp1Reg);970}971generateTrg1Src2Instruction(cg, TR::InstOpCode::subx, node, temp1Reg, dstReg, temp1Reg);972973// If we know the object is definitely in heap, then we skip the check.974if (!node->isHeapObjectWrtBar())975{976if (comp->getOptions()->isVariableHeapSizeForBarrierRange0() || comp->compileRelocatableCode())977{978generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp2Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapSizeForBarrierRange0)));979}980else981{982uintptr_t heapSize = comp->getOptions()->getHeapSizeForBarrierRange0();983loadConstant64(cg, node, heapSize, temp2Reg);984}985generateCompareInstruction(cg, node, temp1Reg, temp2Reg, true);986generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_CS);987cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:020VMCardCheckEvaluator:03heapCheckDone"), *srm);988}989990/*991* Generating card dirtying sequence.992* We don't call out to VM helpers.993*994* ldrimmx temp2Reg, [vmThread, #activeCardTableBase]995* addx temp2Reg, temp2Reg, temp1Reg, LSR #card_size_shift ; At this moment, temp1Reg contains (dstReg - #heapBase)996* movzx temp1Reg, 1997* strbimm temp1Reg, [temp2Reg, 0]998*999*/1000uintptr_t card_size_shift = trailingZeroes((uint64_t)comp->getOptions()->getGcCardSize());1001if (comp->getOptions()->isVariableActiveCardTableBase() || comp->compileRelocatableCode())1002{1003generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, temp2Reg, TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, activeCardTableBase)));1004}1005else1006{1007uintptr_t activeCardTableBase = comp->getOptions()->getActiveCardTableBase();1008loadAddressConstant(cg, node, activeCardTableBase, temp2Reg);1009}1010generateTrg1Src2ShiftedInstruction(cg, TR::InstOpCode::addx, node, temp2Reg, temp2Reg, temp1Reg, TR::SH_LSR, card_size_shift);1011generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, temp1Reg, 1);1012generateMemSrc1Instruction(cg, TR::InstOpCode::strbimm, node, TR::MemoryReference::createWithDisplacement(cg, temp2Reg, 0), temp1Reg);10131014cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:020VMCardCheckEvaluator:04cardmarkDone"), *srm);1015}10161017static void wrtbarEvaluator(TR::Node *node, TR::Register *srcReg, TR::Register *dstReg, bool srcNonNull, TR::CodeGenerator *cg)1018{1019TR::Compilation *comp = cg->comp();1020TR::Instruction * cursor;1021auto gcMode = TR::Compiler->om.writeBarrierType();1022bool doWrtBar = (gcMode == gc_modron_wrtbar_oldcheck || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_always);1023bool doCrdMrk = (gcMode == gc_modron_wrtbar_cardmark ||gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_cardmark_incremental);10241025if ((node->getOpCode().isWrtBar() && node->skipWrtBar()) || node->isNonHeapObjectWrtBar())1026return;10271028TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();1029TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);10301031cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator"), *srm);10321033if (doWrtBar) // generational or gencon1034{1035TR::SymbolReference *wbRef = (gcMode == gc_modron_wrtbar_always) ?1036comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef() :1037// use jitWriteBarrierStoreGenerational for both generational and gencon, because we inline card marking.1038comp->getSymRefTab()->findOrCreateWriteBarrierStoreGenerationalSymbolRef();10391040if (!srcNonNull)1041{1042// If object is NULL, done1043cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk"), *srm);1044generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, srcReg, doneLabel);1045cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk:NonNull"), *srm);1046}1047// Inlines cardmarking and remembered bit check for gencon.1048VMnonNullSrcWrtBarCardCheckEvaluator(node, dstReg, srcReg, srm, doneLabel, wbRef, cg);10491050}1051else if (doCrdMrk)1052{1053TR::SymbolReference *wbRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef();1054if (!srcNonNull)1055{1056cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk"), *srm);1057generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, srcReg, doneLabel);1058cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk:NonNull"), *srm);1059}1060VMCardCheckEvaluator(node, dstReg, srm, doneLabel, cg);1061}10621063TR::RegisterDependencyConditions *conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2 + srm->numAvailableRegisters(), cg->trMemory());1064conditions->addPostCondition(dstReg, doWrtBar ? TR::RealRegister::x0 : TR::RealRegister::NoReg);1065conditions->addPostCondition(srcReg, doWrtBar ? TR::RealRegister::x1 : TR::RealRegister::NoReg);1066srm->addScratchRegistersToDependencyList(conditions);1067generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions, NULL);10681069srm->stopUsingRegisters();1070}10711072TR::Register *1073J9::ARM64::TreeEvaluator::conditionalHelperEvaluator(TR::Node *node, TR::CodeGenerator *cg)1074{1075TR::Node *testNode = node->getFirstChild();1076TR::Node *callNode = node->getSecondChild();1077TR::Node *firstChild = testNode->getFirstChild();1078TR::Node *secondChild = testNode->getSecondChild();1079TR::Register *jumpReg = cg->evaluate(firstChild);1080TR::Register *valReg = NULL;1081int32_t i, numArgs = callNode->getNumChildren();1082TR::RegisterDependencyConditions *conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(3, 3, cg->trMemory());10831084TR_ASSERT(numArgs <= 2, "Unexpected number of arguments for helper.");10851086// Helper arguments are in reversed order of the private linkage1087// Argument registers are not needed to be split since the helper will1088// preserve all of them.1089int32_t iArgIndex = 0, fArgIndex = 0;1090TR::Linkage *linkage = cg->createLinkage(TR_Private);1091for (i = numArgs - 1; i >= 0; i--)1092{1093TR::Register *argReg = cg->evaluate(callNode->getChild(i));1094TR::addDependency(conditions, argReg, (argReg->getKind() == TR_GPR) ? // Didn't consider Long here1095linkage->getProperties().getIntegerArgumentRegister(iArgIndex++) : linkage->getProperties().getFloatArgumentRegister(fArgIndex++), argReg->getKind(), cg);1096}10971098TR::addDependency(conditions, jumpReg, TR::RealRegister::x8, TR_GPR, cg);1099bool is64Bit = node->getSecondChild()->getType().isInt64();1100int64_t value = is64Bit ? secondChild->getLongInt() : secondChild->getInt();1101if (secondChild->getOpCode().isLoadConst() && constantIsUnsignedImm12(value))1102{1103generateCompareImmInstruction(cg, testNode, jumpReg, value);1104}1105else1106{1107valReg = cg->evaluate(secondChild);1108generateCompareInstruction(cg, testNode, jumpReg, valReg);1109}11101111TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);1112TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference());1113cg->addSnippet(snippet);1114TR::ARM64ConditionCode cc = (testNode->getOpCodeValue() == TR::icmpeq) ? TR::CC_EQ : TR::CC_NE;1115TR::Instruction *gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, cc, conditions);1116gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);1117snippet->gcMap().setGCRegisterMask(0xffffffff);1118// ARM64HelperCallSnippet generates "bl" instruction1119cg->machine()->setLinkRegisterKilled(true);11201121for (i = numArgs - 1; i >= 0; i--)1122cg->decReferenceCount(callNode->getChild(i));1123cg->decReferenceCount(firstChild);1124cg->decReferenceCount(secondChild);1125cg->decReferenceCount(testNode);1126cg->decReferenceCount(callNode);1127return NULL;1128}11291130TR::Register *1131J9::ARM64::TreeEvaluator::awrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)1132{1133TR::Compilation *comp = cg->comp();1134TR::Node *firstChild = node->getFirstChild();1135TR::Register *valueReg = cg->evaluate(firstChild);11361137TR::Register *destinationRegister = cg->evaluate(node->getSecondChild());1138TR::Register *sideEffectRegister = destinationRegister;11391140if (comp->getOption(TR_EnableFieldWatch) && !node->getSymbolReference()->getSymbol()->isShadow())1141{1142TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);1143}11441145TR::Register *sourceRegister;1146bool killSource = false;1147bool isVolatileMode = (node->getSymbolReference()->getSymbol()->isSyncVolatile() && cg->comp()->target().isSMP());1148bool isOrderedMode = (node->getSymbolReference()->getSymbol()->isShadow() && node->getSymbolReference()->getSymbol()->isOrdered() && cg->comp()->target().isSMP());11491150if (firstChild->getReferenceCount() > 1 && firstChild->getRegister() != NULL)1151{1152if (!firstChild->getRegister()->containsInternalPointer())1153sourceRegister = cg->allocateCollectedReferenceRegister();1154else1155{1156sourceRegister = cg->allocateRegister();1157sourceRegister->setPinningArrayPointer(firstChild->getRegister()->getPinningArrayPointer());1158sourceRegister->setContainsInternalPointer();1159}1160generateMovInstruction(cg, node, sourceRegister, firstChild->getRegister());1161killSource = true;1162}1163else1164sourceRegister = valueReg;11651166TR::MemoryReference *tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, node);11671168// Issue a StoreStore barrier before each volatile store.1169// dmb ishst1170if (isVolatileMode || isOrderedMode)1171generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xA);11721173generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node, tempMR, sourceRegister, NULL);11741175// Issue a StoreLoad barrier after each volatile store.1176// dmb ish1177if (isVolatileMode)1178generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xB);11791180wrtbarEvaluator(node, sourceRegister, destinationRegister, firstChild->isNonNull(), cg);11811182if (killSource)1183cg->stopUsingRegister(sourceRegister);11841185cg->decReferenceCount(node->getFirstChild());1186cg->decReferenceCount(node->getSecondChild());1187tempMR->decNodeReferenceCounts(cg);11881189return NULL;1190}11911192TR::Register *1193J9::ARM64::TreeEvaluator::awrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)1194{1195TR::Compilation *comp = cg->comp();11961197TR::Register *destinationRegister = cg->evaluate(node->getChild(2));1198TR::Node *secondChild = node->getSecondChild();1199TR::Register *sourceRegister;1200bool killSource = false;1201bool usingCompressedPointers = TR::TreeEvaluator::getIndirectWrtbarValueNode(cg, node, secondChild, true);1202bool isVolatileMode = (node->getSymbolReference()->getSymbol()->isSyncVolatile() && cg->comp()->target().isSMP());1203bool isOrderedMode = (node->getSymbolReference()->getSymbol()->isShadow() && node->getSymbolReference()->getSymbol()->isOrdered() && cg->comp()->target().isSMP());12041205if (secondChild->getReferenceCount() > 1 && secondChild->getRegister() != NULL)1206{1207if (!secondChild->getRegister()->containsInternalPointer())1208sourceRegister = cg->allocateCollectedReferenceRegister();1209else1210{1211sourceRegister = cg->allocateRegister();1212sourceRegister->setPinningArrayPointer(secondChild->getRegister()->getPinningArrayPointer());1213sourceRegister->setContainsInternalPointer();1214}1215generateMovInstruction(cg, node, sourceRegister, secondChild->getRegister());1216killSource = true;1217}1218else1219{1220sourceRegister = cg->evaluate(secondChild);1221}12221223// Handle fieldwatch side effect first if it's enabled.1224if (comp->getOption(TR_EnableFieldWatch) && !node->getSymbolReference()->getSymbol()->isArrayShadowSymbol())1225{1226// The Third child (sideEffectNode) and valueReg's node is also used by the store evaluator below.1227// The store evaluator will also evaluate+decrement it. In order to avoid double1228// decrementing the node we skip doing it here and let the store evaluator do it.1229TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, destinationRegister /* sideEffectRegister */, sourceRegister /* valueReg */);1230}12311232TR::InstOpCode::Mnemonic storeOp = usingCompressedPointers ? TR::InstOpCode::strimmw : TR::InstOpCode::strimmx;1233TR::Register *translatedSrcReg = usingCompressedPointers ? cg->evaluate(node->getSecondChild()) : sourceRegister;12341235TR::MemoryReference *tempMR = TR::MemoryReference::createWithRootLoadOrStore(cg, node);12361237// Issue a StoreStore barrier before each volatile store.1238// dmb ishst1239if (isVolatileMode || isOrderedMode)1240generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xA);12411242generateMemSrc1Instruction(cg, storeOp, node, tempMR, translatedSrcReg);12431244// Issue a StoreLoad barrier after each volatile store.1245// dmb ish1246if (isVolatileMode)1247generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xB);12481249wrtbarEvaluator(node, sourceRegister, destinationRegister, secondChild->isNonNull(), cg);12501251if (killSource)1252cg->stopUsingRegister(sourceRegister);12531254if (usingCompressedPointers)1255{1256// The reference count of secondChild has been bumped up.1257cg->decReferenceCount(secondChild);1258}1259cg->decReferenceCount(node->getSecondChild());1260cg->decReferenceCount(node->getChild(2));1261tempMR->decNodeReferenceCounts(cg);12621263if (comp->useCompressedPointers())1264node->setStoreAlreadyEvaluated(true);12651266return NULL;1267}12681269TR::Register *1270J9::ARM64::TreeEvaluator::DIVCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)1271{1272TR::Node *divisor = node->getFirstChild()->getSecondChild();1273bool is64Bit = node->getFirstChild()->getType().isInt64();1274bool isConstDivisor = divisor->getOpCode().isLoadConst();12751276if (!isConstDivisor || (!is64Bit && divisor->getInt() == 0) || (is64Bit && divisor->getLongInt() == 0))1277{1278TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);1279TR::Instruction *gcPoint;1280TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference());1281cg->addSnippet(snippet);12821283if (isConstDivisor)1284{1285// No explicit check required1286gcPoint = generateLabelInstruction(cg, TR::InstOpCode::b, node, snippetLabel);1287}1288else1289{1290TR::Register *divisorReg = cg->evaluate(divisor);1291TR::InstOpCode::Mnemonic compareOp = is64Bit ? TR::InstOpCode::cbzx : TR::InstOpCode::cbzw;1292gcPoint = generateCompareBranchInstruction(cg, compareOp, node, divisorReg, snippetLabel);1293}1294gcPoint->ARM64NeedsGCMap(cg, 0xffffffff);1295snippet->gcMap().setGCRegisterMask(0xffffffff);1296}12971298cg->evaluate(node->getFirstChild());1299cg->decReferenceCount(node->getFirstChild());1300// ARM64HelperCallSnippet generates "bl" instruction1301cg->machine()->setLinkRegisterKilled(true);1302return NULL;1303}13041305void1306J9::ARM64::TreeEvaluator::generateCheckForValueMonitorEnterOrExit(TR::Node *node, TR::LabelSymbol *mergeLabel, TR::LabelSymbol *helperCallLabel, TR::Register *objReg, TR::Register *temp1Reg, TR::Register *temp2Reg, TR::CodeGenerator *cg, int32_t classFlag)1307{1308// get class of object1309generateLoadJ9Class(node, temp1Reg, objReg, cg);13101311// get memory reference to class flags1312TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());1313TR::MemoryReference *classFlagsMemRef = TR::MemoryReference::createWithDisplacement(cg, temp1Reg, static_cast<uintptr_t>(fej9->getOffsetOfClassFlags()));13141315generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, temp1Reg, classFlagsMemRef);1316loadConstant32(cg, node, classFlag, temp2Reg);1317generateTrg1Src2Instruction(cg, TR::InstOpCode::andsw, node, temp1Reg, temp1Reg, temp2Reg);13181319bool generateOOLSection = helperCallLabel == NULL;1320if (generateOOLSection)1321helperCallLabel = generateLabelSymbol(cg);13221323// If obj is value type or value based class instance, call VM helper and throw IllegalMonitorState exception, else continue as usual1324generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, helperCallLabel, TR::CC_NE);13251326// TODO: There is now the possibility of multiple distinct OOL sections with helper calls to be generated when1327// evaluating the TR::monent or TR::monexit nodes:1328//1329// 1. Monitor cache lookup OOL (AArch64 does not use OOL for monitor cache lookup at the moment)1330// 2. Lock reservation OOL (AArch64 does not implement lock reservation yet)1331// 3. Value types or value based object OOL1332// 4. Recursive CAS sequence for Locking1333//1334// These distinct OOL sections may perform non-trivial logic but what they all have in common is they all have a1335// call to the same JIT helper which acts as a fall back. This complexity exists because of the way the evaluators1336// are currently architected and due to the restriction that we cannot have nested OOL code sections. Whenever1337// making future changes to these evaluators we should consider refactoring them to reduce the complexity and1338// attempt to consolidate the calls to the JIT helper so as to not have multiple copies.1339if (generateOOLSection)1340{1341TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(node, TR::call, NULL, helperCallLabel, mergeLabel, cg);1342cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);1343}1344}13451346/**1347* @brief Generates instruction sequence for looking up the address of lockword of the object1348*1349* @param[in] cg: Code Generator1350* @param[in] node: node1351* @param[in] objReg: register holding object pointer1352* @param[in] addrReg: register for assigning address of the lockword1353* @param[in] metaReg: register holding vmthread struct pointer1354* @param[in] srm: scratch register manager1355* @param[in] callLabel: label for slow path1356*/1357static void1358generateLockwordAddressLookup(TR::CodeGenerator *cg, TR::Node *node, TR::Register *objReg, TR::Register *addrReg, TR::Register *metaReg,1359TR_ARM64ScratchRegisterManager *srm, TR::LabelSymbol *callLabel)1360{1361/*1362* Generating following intruction sequence.1363*1364* ldrimmw objectClassReg, [objReg, #0] ; throws an implicit NPE1365* andimmw objectClassReg, 0xffffff001366* ldrimmx tempReg, [objectClassReg, offsetOfLockOffset]1367* cmpimmx tempReg, #01368* b.le monitorLookupCacheLabel1369* addx addrReg, objReg, tempReg1370* b fallThruFromMonitorLookupCacheLabel1371* monitorLookupCacheLabel:1372* ; slot = (object >> objectAlignmentShift) & (J9VMTHREAD_OBJECT_MONITOR_CACHE_SIZE-1)1373* ubfx tempReg, objReg, #alignmentBits, #maskWidth ; maskWidth is popcount(J9VMTHREAD_OBJECT_MONITOR_CACHE_SIZE - 1)1374*1375* ; vmThread->objectMonitorLookupCache[slot]1376* addx tempReg, metaReg, tempReg, lsl #elementWidth ; elementWidth is log2(sizeof(j9objectmonitor_t))1377* ldrimmw monitorReg, [tempReg, offsetOfMonitorLookupCache]1378*1379* cbzx monitorReg, callLabel ; if monitor is not found, then call out to helper1380* ldrimmx tempReg, [monitorReg, offsetOfMonitor]1381* ldrimmx tempReg, [tempReg, offsetOfUserData]1382* cmpx tempReg, objReg1383* b.ne callLabel ; if userData does not match object, then call out to helper1384* addimmx addrReg, monitorReg, offsetOfAlternateLockWord1385*1386* fallThruFromMonitorLookupCacheLabel:1387*1388*/1389TR::Compilation *comp = TR::comp();1390TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());1391TR::Register *tempReg = srm->findOrCreateScratchRegister();13921393TR::Register *objectClassReg = srm->findOrCreateScratchRegister();13941395// InstructonDelegate::setupImplicitNullPointerException checks if the memory reference uses nullcheck reference register.1396// In this case, nullcheck reference register is objReg and the memory reference does use it,1397// so let InstructonDelegate::setupImplicitNullPointerException handle it.1398generateLoadJ9Class(node, objectClassReg, objReg, cg);13991400TR::MemoryReference *lockOffsetMR = TR::MemoryReference::createWithDisplacement(cg, objectClassReg, offsetof(J9Class, lockOffset));1401generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tempReg, lockOffsetMR);1402srm->reclaimScratchRegister(objectClassReg);14031404generateCompareImmInstruction(cg, node, tempReg, 0, true);14051406if (comp->getOption(TR_EnableMonitorCacheLookup))1407{1408TR::LabelSymbol *monitorLookupCacheLabel = generateLabelSymbol(cg);1409TR::LabelSymbol *fallThruFromMonitorLookupCacheLabel = generateLabelSymbol(cg);14101411// If the lockword offset in the class pointer <= 0, then lookup monitor from the cache1412auto branchInstrToLookup = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, monitorLookupCacheLabel, TR::CC_LE);1413TR_Debug * debugObj = cg->getDebug();1414if (debugObj)1415{1416debugObj->addInstructionComment(branchInstrToLookup, "Branch to monitor lookup cache label");1417}1418generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, addrReg, objReg, tempReg);1419auto branchInstrToFallThru = generateLabelInstruction(cg, TR::InstOpCode::b, node, fallThruFromMonitorLookupCacheLabel);1420if (debugObj)1421{1422debugObj->addInstructionComment(branchInstrToFallThru, "Branch to fall through label as lockOffset is positive");1423}1424generateLabelInstruction(cg, TR::InstOpCode::label, node, monitorLookupCacheLabel);1425static const uint32_t maskWidth = populationCount(J9VMTHREAD_OBJECT_MONITOR_CACHE_SIZE - 1);1426uint32_t shiftAmount = trailingZeroes(TR::Compiler->om.getObjectAlignmentInBytes()); // shift amount1427generateUBFXInstruction(cg, node, tempReg, objReg, shiftAmount, maskWidth, true);14281429#ifdef OMR_GC_FULL_POINTERS1430// In mixed refs and large heap builds, the element type of monitorLookupCacheLabel is UDATA.1431uint32_t elementWidth = trailingZeroes((uint32_t)sizeof(UDATA));1432#else1433uint32_t elementWidth = trailingZeroes((uint32_t)sizeof(U_32));1434#endif1435generateTrg1Src2ShiftedInstruction(cg, TR::InstOpCode::addx, node, tempReg, metaReg, tempReg, TR::ARM64ShiftCode::SH_LSL, elementWidth);14361437int32_t offsetOfObjectMonitorLookpCache = offsetof(J9VMThread, objectMonitorLookupCache);1438TR::MemoryReference *monitorLookupMR = TR::MemoryReference::createWithDisplacement(cg, tempReg, offsetOfObjectMonitorLookpCache);1439TR::Register *monitorReg = srm->findOrCreateScratchRegister();14401441generateTrg1MemInstruction(cg, fej9->generateCompressedLockWord() ? TR::InstOpCode::ldrimmw : TR::InstOpCode::ldrimmx, node, monitorReg, monitorLookupMR);1442generateCompareBranchInstruction(cg, fej9->generateCompressedLockWord() ? TR::InstOpCode::cbzw : TR::InstOpCode::cbzx, node, monitorReg, callLabel);14431444int32_t offsetOfMonitor = offsetof(J9ObjectMonitor, monitor);1445TR::MemoryReference *monitorMR = TR::MemoryReference::createWithDisplacement(cg, monitorReg, offsetOfMonitor);1446generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tempReg, monitorMR);14471448int32_t offsetOfUserData = offsetof(J9ThreadAbstractMonitor, userData);1449TR::MemoryReference *userDataMR = TR::MemoryReference::createWithDisplacement(cg, tempReg, offsetOfUserData);1450generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tempReg, userDataMR);14511452generateCompareInstruction(cg, node, tempReg, objReg, true);1453generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callLabel, TR::CC_NE);14541455int32_t offsetOfAlternateLockword = offsetof(J9ObjectMonitor, alternateLockword);1456generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, addrReg, monitorReg, offsetOfAlternateLockword);14571458srm->reclaimScratchRegister(monitorReg);1459generateLabelInstruction(cg, TR::InstOpCode::label, node, fallThruFromMonitorLookupCacheLabel);1460}1461else1462{1463generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callLabel, TR::CC_LE);1464generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, addrReg, objReg, tempReg);1465}14661467srm->reclaimScratchRegister(tempReg);1468}14691470TR::Register *1471J9::ARM64::TreeEvaluator::monexitEvaluator(TR::Node *node, TR::CodeGenerator *cg)1472{1473TR::Compilation *comp = TR::comp();1474TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());1475int32_t staticLwOffset = fej9->getByteOffsetToLockword(cg->getMonClass(node));1476TR::InstOpCode::Mnemonic op;1477TR_YesNoMaybe isMonitorValueBasedOrValueType = cg->isMonitorValueBasedOrValueType(node);14781479if (comp->getOption(TR_FullSpeedDebug) ||1480(isMonitorValueBasedOrValueType == TR_yes) ||1481comp->getOption(TR_DisableInlineMonExit))1482{1483TR::ILOpCodes opCode = node->getOpCodeValue();1484TR::Node::recreate(node, TR::call);1485TR::Register *targetRegister = directCallEvaluator(node, cg);1486TR::Node::recreate(node, opCode);1487return targetRegister;1488}14891490TR::Node *objNode = node->getFirstChild();1491TR::Register *objReg = cg->evaluate(objNode);1492TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();1493TR::Register *metaReg = cg->getMethodMetaDataRegister();14941495TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);1496TR::LabelSymbol *startLabel = generateLabelSymbol(cg);1497TR::LabelSymbol *OOLLabel = generateLabelSymbol(cg);149814991500generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel);1501startLabel->setStartInternalControlFlow();15021503const bool isImplicitNullChkIsDoneAtLoadJ9Class = (isMonitorValueBasedOrValueType == TR_maybe) || (staticLwOffset <= 0);1504// If lockword offset is not known at compile time, we need to jump into the OOL code section for helper call if monitor lookup fails.1505// In that case, we cannot have inline recursive code in the OOL code section.1506const bool inlineRecursive = staticLwOffset > 0;15071508// If object is not known to be value type or value based class at compile time, check at run time1509if (isMonitorValueBasedOrValueType == TR_maybe)1510{1511TR::Register *temp1Reg = srm->findOrCreateScratchRegister();1512TR::Register *temp2Reg = srm->findOrCreateScratchRegister();15131514// InstructonDelegate::setupImplicitNullPointerException checks if the memory reference uses nullcheck reference register.1515// In this case, nullcheck reference register is objReg and the memory reference does use it,1516// so let InstructonDelegate::setupImplicitNullPointerException handle it.1517//1518// If we are generating code for MonitorCacheLookup then we will not have a separate OOL for inlineRecursive, and OOLLabel points1519// to the OOL Containing only helper call. Otherwise, OOL will have other code apart from helper call which we do not want to execute1520// for ValueType or ValueBased object and in that scenario we will need to generate another OOL that just contains helper call.1521generateCheckForValueMonitorEnterOrExit(node, doneLabel, inlineRecursive ? NULL : OOLLabel, objReg, temp1Reg, temp2Reg, cg, J9_CLASS_DISALLOWS_LOCKING_FLAGS);15221523srm->reclaimScratchRegister(temp1Reg);1524srm->reclaimScratchRegister(temp2Reg);1525}15261527TR::Register *addrReg = srm->findOrCreateScratchRegister();15281529// If we do not know the lockword offset at compile time, obtrain it from the class pointer of the object being locked1530if (staticLwOffset <= 0)1531{1532generateLockwordAddressLookup(cg, node, objReg, addrReg, metaReg, srm, OOLLabel);1533}1534else1535{1536generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, addrReg, objReg, staticLwOffset); // stlr instructions does not take immediate offset1537}1538TR::Register *dataReg = srm->findOrCreateScratchRegister();15391540op = fej9->generateCompressedLockWord() ? TR::InstOpCode::ldrimmw : TR::InstOpCode::ldrimmx;1541auto faultingInstruction = generateTrg1MemInstruction(cg, op, node, dataReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0));15421543// InstructonDelegate::setupImplicitNullPointerException checks if the memory reference uses nullcheck reference register.1544// In this case, nullcheck reference register is objReg, but the memory reference does not use it,1545// thus we need to explicitly set implicit exception point here.1546if (cg->getHasResumableTrapHandler() && cg->getCurrentEvaluationTreeTop()->getNode()->getOpCode().isNullCheck() && (!isImplicitNullChkIsDoneAtLoadJ9Class))1547{1548if (cg->getImplicitExceptionPoint() == NULL)1549{1550if (comp->getOption(TR_TraceCG))1551{1552traceMsg(comp, "Instruction %p throws an implicit NPE, node: %p NPE node: %p\n", faultingInstruction, node, objNode);1553}1554cg->setImplicitExceptionPoint(faultingInstruction);1555}1556}15571558generateCompareInstruction(cg, node, dataReg, metaReg, true);15591560generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, OOLLabel, TR::CC_NE);15611562static const bool useMemoryBarrierForMonitorExit = feGetEnv("TR_aarch64UseMemoryBarrierForMonitorExit") != NULL;1563if (useMemoryBarrierForMonitorExit)1564{1565generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xB); // dmb ish (Inner Shareable full barrier)1566op = fej9->generateCompressedLockWord() ? TR::InstOpCode::strimmw : TR::InstOpCode::strimmx;1567}1568else1569{1570op = fej9->generateCompressedLockWord() ? TR::InstOpCode::stlrw : TR::InstOpCode::stlrx;1571}15721573// Avoid zeroReg from being reused by scratch register manager1574TR::Register *zeroReg = cg->allocateRegister();15751576generateMemSrc1Instruction(cg, op, node, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), zeroReg);15771578if (inlineRecursive)1579{1580/*1581* OOLLabel:1582* subimmx dataReg, dataReg, OBJECT_HEADER_LOCK_FIRST_RECURSION_BIT1583* andimmx tempReg, dataReg, ~OBJECT_HEADER_LOCK_RECURSION_MASK1584* cmpx metaReg, tempReg1585* b.ne snippetLabel1586* strimmx dataReg, [addrReg]1587* OOLEndLabel:1588* b doneLabel1589*1590*/15911592// This register is only required for OOL code section1593// If we obtain this from scratch register manager, then one more register is used in mainline.1594TR::Register *tempReg = cg->allocateRegister();15951596TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);1597TR::LabelSymbol *OOLEndLabel = generateLabelSymbol(cg);1598TR_ARM64OutOfLineCodeSection *oolSection = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(OOLLabel, doneLabel, cg);1599cg->getARM64OutOfLineCodeSectionList().push_front(oolSection);1600oolSection->swapInstructionListsWithCompilation();1601generateLabelInstruction(cg, TR::InstOpCode::label, node, OOLLabel);16021603generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::subimmx, node, dataReg, dataReg, OBJECT_HEADER_LOCK_FIRST_RECURSION_BIT);1604// OBJECT_HEADER_LOCK_RECURSION_MASK is 0xF0, immr=0x38, imms=0x3b for ~(0xF0)1605generateLogicalImmInstruction(cg, TR::InstOpCode::andimmx, node, tempReg, dataReg, true, 0xe3b);1606generateCompareInstruction(cg, node, metaReg, tempReg, true);16071608TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference(), OOLEndLabel);1609cg->addSnippet(snippet);1610TR::Instruction *gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_NE);1611gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);1612snippet->gcMap().setGCRegisterMask(0xffffffff);16131614generateMemSrc1Instruction(cg, fej9->generateCompressedLockWord() ? TR::InstOpCode::strimmw : TR::InstOpCode::strimmx,1615node, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), dataReg);16161617TR::RegisterDependencyConditions *ooldeps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg->trMemory());1618ooldeps->addPostCondition(objReg, TR::RealRegister::x0);1619ooldeps->addPostCondition(tempReg, TR::RealRegister::NoReg);1620ooldeps->addPostCondition(dataReg, TR::RealRegister::NoReg);1621ooldeps->addPostCondition(addrReg, TR::RealRegister::NoReg);16221623generateLabelInstruction(cg, TR::InstOpCode::label, node, OOLEndLabel, ooldeps);1624generateLabelInstruction(cg, TR::InstOpCode::b, node, doneLabel);16251626cg->stopUsingRegister(tempReg);1627// ARM64HelperCallSnippet generates "bl" instruction1628cg->machine()->setLinkRegisterKilled(true);1629oolSection->swapInstructionListsWithCompilation();1630}1631else1632{1633TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(node, TR::call, NULL, OOLLabel, doneLabel, cg);1634cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);1635}16361637TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2 + srm->numAvailableRegisters(), cg->trMemory());1638deps->addPostCondition(objReg, TR::RealRegister::NoReg);1639deps->addPostCondition(zeroReg, TR::RealRegister::xzr);1640srm->addScratchRegistersToDependencyList(deps);16411642generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, deps);16431644doneLabel->setEndInternalControlFlow();16451646cg->stopUsingRegister(zeroReg);1647srm->stopUsingRegisters();16481649cg->decReferenceCount(objNode);1650cg->machine()->setLinkRegisterKilled(true);1651return NULL;1652}16531654TR::Register *1655J9::ARM64::TreeEvaluator::asynccheckEvaluator(TR::Node *node, TR::CodeGenerator *cg)1656{1657// The child contains an inline test. If it succeeds, the helper is called.1658// The address of the helper is contained as a long in this node.1659//1660TR::Node *testNode = node->getFirstChild();1661TR::Node *firstChild = testNode->getFirstChild();1662TR::Register *src1Reg = cg->evaluate(firstChild);1663TR::Node *secondChild = testNode->getSecondChild();16641665TR_ASSERT(testNode->getOpCodeValue() == TR::lcmpeq && secondChild->getLongInt() == -1L, "asynccheck bad format");16661667TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);1668TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);1669TR::SymbolReference *asynccheckHelper = node->getSymbolReference();1670TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, asynccheckHelper, doneLabel);1671cg->addSnippet(snippet);16721673generateCompareImmInstruction(cg, node, src1Reg, secondChild->getLongInt(), true); // 64-bit compare16741675TR::Instruction *gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_EQ);1676gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);1677snippet->gcMap().setGCRegisterMask(0xffffffff);1678generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel);16791680// ARM64HelperCallSnippet generates "bl" instruction1681cg->machine()->setLinkRegisterKilled(true);16821683cg->decReferenceCount(firstChild);1684cg->decReferenceCount(secondChild);1685cg->decReferenceCount(testNode);1686return NULL;1687}16881689TR::Register *1690J9::ARM64::TreeEvaluator::instanceofEvaluator(TR::Node *node, TR::CodeGenerator *cg)1691{1692return VMinstanceofEvaluator(node, cg);1693}16941695/**1696* @brief Generates Superclass Test for checkcast/instanceof/ArrayStoreCHK nodes.1697* @details1698* It will generate pseudocode as follows.1699* if (objectClassDepth <= castClassDepth) call Helper1700* else1701* load superClassArrReg,superClassOfObjectClass1702* cmp superClassArrReg[castClassDepth], castClass1703* Here It sets up the condition code for callee to react on.1704*1705* @param[in] node: node1706* @param[in] instanceClassReg: register contains instance class1707* @param[in] instanceClassRegCanBeReclaimed: if true, instanceClassReg is reclaimed1708* @param[in] castClassReg: register contains cast class1709* @param[in] castClassDepth: class depth of the cast class. If -1 is passed, depth is loaded at runtime1710* @param[in] falseLabel: label to jump when test fails1711* @param[in] srm: scratch register manager1712* @param[in] cg: code generator1713*/1714static1715void genSuperClassTest(TR::Node *node, TR::Register *instanceClassReg, bool instanceClassRegCanBeReclaimed, TR::Register *castClassReg, int32_t castClassDepth,1716TR::LabelSymbol *falseLabel, TR_ARM64ScratchRegisterManager *srm, TR::CodeGenerator *cg)1717{1718// Compare the instance class depth to the cast class depth. If the instance class depth is less than or equal to1719// to the cast class depth then the cast class cannot be a superclass of the instance class.1720//1721TR::Register *instanceClassDepthReg = srm->findOrCreateScratchRegister();1722TR::Register *castClassDepthReg = NULL;1723static_assert(J9AccClassDepthMask == 0xffff, "J9_JAVA_CLASS_DEPTH_MASK must be 0xffff");1724// load lower 16bit of classDepthAndFlags1725generateTrg1MemInstruction(cg, TR::InstOpCode::ldrhimm, node, instanceClassDepthReg,1726TR::MemoryReference::createWithDisplacement(cg, instanceClassReg, offsetof(J9Class, classDepthAndFlags)));1727if (castClassDepth != -1)1728{1729// castClassDepth is known at compile time1730if (constantIsUnsignedImm12(castClassDepth))1731{1732generateCompareImmInstruction(cg, node, instanceClassDepthReg, castClassDepth);1733}1734else1735{1736castClassDepthReg = srm->findOrCreateScratchRegister();1737loadConstant32(cg, node, castClassDepth, castClassDepthReg);1738generateCompareInstruction(cg, node, instanceClassDepthReg, castClassDepthReg);1739}1740}1741else1742{1743// castClassDepth needs to be loaded from castClass1744castClassDepthReg = srm->findOrCreateScratchRegister();1745// load lower 16bit of classDepthAndFlags1746generateTrg1MemInstruction(cg, TR::InstOpCode::ldrhimm, node, castClassDepthReg,1747TR::MemoryReference::createWithDisplacement(cg, castClassReg, offsetof(J9Class, classDepthAndFlags)));1748generateCompareInstruction(cg, node, instanceClassDepthReg, castClassDepthReg);1749}1750srm->reclaimScratchRegister(instanceClassDepthReg);1751instanceClassDepthReg = NULL; // prevent re-using this register by error17521753// if objectClassDepth is less than or equal to castClassDepth, then call Helper1754generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, falseLabel, TR::CC_LE);17551756// Load the superclasses array of the instance class and check if the superclass that appears at the depth of the cast class is in fact the cast class.1757// If not, the instance class and cast class are not in the same hierarchy.1758//1759TR::Register *instanceClassSuperClassesArrayReg = srm->findOrCreateScratchRegister();17601761generateTrg1MemInstruction(cg,TR::InstOpCode::ldrimmx, node, instanceClassSuperClassesArrayReg,1762TR::MemoryReference::createWithDisplacement(cg, instanceClassReg, offsetof(J9Class, superclasses)));17631764if (instanceClassRegCanBeReclaimed)1765{1766srm->reclaimScratchRegister(instanceClassReg);1767instanceClassReg = NULL; // prevent re-using this register by error1768}17691770TR::Register *instanceClassSuperClassReg = srm->findOrCreateScratchRegister();17711772int32_t castClassDepthOffset = castClassDepth * TR::Compiler->om.sizeofReferenceAddress();1773if ((castClassDepth != -1) && constantIsUnsignedImm12(castClassDepthOffset))1774{1775generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, instanceClassSuperClassReg,1776TR::MemoryReference::createWithDisplacement(cg, instanceClassSuperClassesArrayReg, castClassDepthOffset));1777}1778else1779{1780if (!castClassDepthReg)1781{1782castClassDepthReg = srm->findOrCreateScratchRegister();1783loadConstant32(cg, node, castClassDepth, castClassDepthReg);1784}1785generateLogicalShiftLeftImmInstruction(cg, node, castClassDepthReg, castClassDepthReg, 3, false);1786generateTrg1MemInstruction(cg, TR::InstOpCode::ldroffx, node, instanceClassSuperClassReg, TR::MemoryReference::createWithIndexReg(cg, instanceClassSuperClassesArrayReg, castClassDepthReg));1787}1788generateCompareInstruction(cg, node, instanceClassSuperClassReg, castClassReg, true);17891790if (castClassDepthReg)1791srm->reclaimScratchRegister(castClassDepthReg);1792srm->reclaimScratchRegister(instanceClassSuperClassesArrayReg);1793srm->reclaimScratchRegister(instanceClassSuperClassReg);17941795// At this point EQ flag will be set if the cast class is a superclass of the instance class. Caller is responsible for acting on the result.1796}17971798/**1799* @brief Generates Arbitrary Class Test for instanceOf or checkCast node1800*/1801static1802void genInstanceOfOrCheckCastArbitraryClassTest(TR::Node *node, TR::Register *instanceClassReg, TR_OpaqueClassBlock *arbitraryClass,1803TR_ARM64ScratchRegisterManager *srm, TR::CodeGenerator *cg)1804{1805TR::Compilation *comp = cg->comp();1806TR::Register *arbitraryClassReg = srm->findOrCreateScratchRegister();1807TR_J9VMBase *fej9 = static_cast<TR_J9VMBase *>(comp->fe());18081809if (comp->compileRelocatableCode())1810{1811loadAddressConstantInSnippet(cg, node, reinterpret_cast<intptr_t>(arbitraryClass), arbitraryClassReg, TR_ClassPointer);1812}1813else1814{1815bool isUnloadAssumptionRequired = fej9->isUnloadAssumptionRequired(arbitraryClass, comp->getCurrentMethod());18161817if (isUnloadAssumptionRequired)1818{1819loadAddressConstantInSnippet(cg, node, reinterpret_cast<intptr_t>(arbitraryClass), arbitraryClassReg, TR_NoRelocation, true);1820}1821else1822{1823loadAddressConstant(cg, node, reinterpret_cast<intptr_t>(arbitraryClass), arbitraryClassReg, NULL, true);1824}1825}1826generateCompareInstruction(cg, node, instanceClassReg, arbitraryClassReg, true);18271828srm->reclaimScratchRegister(arbitraryClassReg);18291830// At this point EQ flag will be set if the cast class matches the arbitrary class. Caller is responsible for acting on the result.1831}18321833/**1834* @brief Generates ArrayOfJavaLangObjectTest (object class is reference array) for instanceOf or checkCast node1835* @details1836* scratchReg1 = load (objectClassReg+offset_romClass)1837* scratchReg1 = load (ROMClass+J9ROMClass+modifiers)1838* tstImmediate with J9AccClassArray(0x10000)1839* If not Array -> Branch to Fail Label1840* testerReg = load (objectClassReg + leafcomponent_offset)1841* testerReg = load (objectClassReg + offset_romClass)1842* testerReg = load (objectClassReg + offset_modifiers)1843* tstImmediate with J9AccClassInternalPrimitiveType(0x20000)1844* // if branchOnPrimitiveTypeCheck is true1845* If arrays of primitive -> Branch to Fail Label1846* // else1847* if not arrays of primitive set condition code to Zero indicating true result1848*/1849static1850void genInstanceOfOrCheckCastObjectArrayTest(TR::Node *node, TR::Register *instanceClassReg, TR::LabelSymbol *falseLabel, bool useTBZ,1851TR_ARM64ScratchRegisterManager *srm, TR::CodeGenerator *cg)1852{1853// Load the object ROM class and test the modifiers to see if this is an array.1854//1855TR::Register *scratchReg = srm->findOrCreateScratchRegister();1856generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, scratchReg, TR::MemoryReference::createWithDisplacement(cg, instanceClassReg, offsetof(J9Class, romClass)));1857generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, scratchReg, TR::MemoryReference::createWithDisplacement(cg, scratchReg, offsetof(J9ROMClass, modifiers)));1858static_assert(J9AccClassArray == 0x10000, "J9AccClassArray must be 0x10000");1859// If not array, branch to falseLabel1860if (useTBZ)1861{1862generateTestBitBranchInstruction(cg, TR::InstOpCode::tbz, node, scratchReg, 16, falseLabel);1863}1864else1865{1866generateTestImmInstruction(cg, node, scratchReg, 0x400); // 0x400 is immr:imms for 0x100001867generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, falseLabel, TR::CC_EQ);1868}18691870// If it's an array, load the component ROM class and test the modifiers to see if this is a primitive array.1871//1872generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, scratchReg, TR::MemoryReference::createWithDisplacement(cg, instanceClassReg, offsetof(J9ArrayClass, componentType)));1873generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, scratchReg, TR::MemoryReference::createWithDisplacement(cg, scratchReg, offsetof(J9Class, romClass)));1874generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, scratchReg, TR::MemoryReference::createWithDisplacement(cg, scratchReg, offsetof(J9ROMClass, modifiers)));18751876static_assert(J9AccClassInternalPrimitiveType == 0x20000, "J9AccClassInternalPrimitiveType must be 0x20000");1877generateTestImmInstruction(cg, node, scratchReg, 0x3c0); // 0x3c0 is immr:imms for 0x2000018781879srm->reclaimScratchRegister(scratchReg);18801881// At this point EQ flag will be set if this is not a primitive array. Caller is responsible acting on the result.1882}18831884template<class It>1885bool1886isTerminalSequence(It it, It itEnd)1887{1888return (it + 1) == itEnd;1889}18901891template<class It>1892bool1893isNextItemGoToTrue(It it, It itEnd)1894{1895return (!isTerminalSequence(it, itEnd)) && *(it + 1) == J9::TreeEvaluator::GoToTrue;1896}18971898template<class It>1899bool1900isNextItemGoToFalse(It it, It itEnd)1901{1902return (!isTerminalSequence(it, itEnd)) && *(it + 1) == J9::TreeEvaluator::GoToFalse;1903}19041905template<class It>1906bool1907isNextItemHelperCall(It it, It itEnd)1908{1909return (!isTerminalSequence(it, itEnd)) && *(it + 1) == J9::TreeEvaluator::HelperCall;1910}19111912TR::Register *1913J9::ARM64::TreeEvaluator::VMinstanceofEvaluator(TR::Node *node, TR::CodeGenerator *cg)1914{1915TR::Compilation *comp = cg->comp();1916TR_OpaqueClassBlock *compileTimeGuessClass;1917int32_t maxProfiledClasses = comp->getOptions()->getCheckcastMaxProfiledClassTests();1918if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s:Maximum Profiled Classes = %d\n", node->getOpCode().getName(),maxProfiledClasses);1919TR_ASSERT_FATAL(maxProfiledClasses <= 4, "Maximum 4 profiled classes per site allowed because we use a fixed stack allocated buffer for profiled classes\n");1920InstanceOfOrCheckCastSequences sequences[InstanceOfOrCheckCastMaxSequences];1921bool topClassWasCastClass = false;1922float topClassProbability = 0.0;19231924bool profiledClassIsInstanceOf;1925InstanceOfOrCheckCastProfiledClasses profiledClassesList[4];1926uint32_t numberOfProfiledClass;1927uint32_t numSequencesRemaining = calculateInstanceOfOrCheckCastSequences(node, sequences, &compileTimeGuessClass, cg, profiledClassesList, &numberOfProfiledClass, maxProfiledClasses, &topClassProbability, &topClassWasCastClass);192819291930TR::Node *objectNode = node->getFirstChild();1931TR::Node *castClassNode = node->getSecondChild();1932TR::Register *objectReg = cg->evaluate(objectNode);1933TR::Register *castClassReg = NULL;1934TR::Register *resultReg = cg->allocateRegister();19351936TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);1937TR::LabelSymbol *callHelperLabel = generateLabelSymbol(cg);1938TR::LabelSymbol *nextSequenceLabel = generateLabelSymbol(cg);19391940TR::Instruction *gcPoint;19411942TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();1943TR::Register *objectClassReg = NULL;19441945// initial result is false1946generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, resultReg, 0);19471948auto itBegin = std::begin(sequences);1949const auto itEnd = std::next(itBegin, numSequencesRemaining);19501951for (auto it = itBegin; it != itEnd; it++)1952{1953auto current = *it;1954switch (current)1955{1956case EvaluateCastClass:1957TR_ASSERT(!castClassReg, "Cast class already evaluated");1958castClassReg = cg->gprClobberEvaluate(castClassNode);1959break;1960case LoadObjectClass:1961TR_ASSERT(!objectClassReg, "Object class already loaded");1962objectClassReg = srm->findOrCreateScratchRegister();1963generateLoadJ9Class(node, objectClassReg, objectReg, cg);1964break;1965case NullTest:1966if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting NullTest\n", node->getOpCode().getName());1967TR_ASSERT(!objectNode->isNonNull(), "Object is known to be non-null, no need for a null test");1968if (isNextItemGoToTrue(it, itEnd))1969{1970generateCompareImmInstruction(cg, node, objectReg, 0, true);1971generateCSetInstruction(cg, node, resultReg, TR::CC_NE);1972// consume GoToTrue1973it++;1974}1975else1976{1977auto nullLabel = isNextItemHelperCall(it, itEnd) ? callHelperLabel : doneLabel;1978// branch to doneLabel to return false1979generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, objectReg, nullLabel);1980}1981break;1982case GoToTrue:1983TR_ASSERT_FATAL(isTerminalSequence(it, itEnd), "GoToTrue should be the terminal sequence");1984if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting GoToTrue\n", node->getOpCode().getName());1985generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, resultReg, 1);1986break;1987case GoToFalse:1988TR_ASSERT_FATAL(isTerminalSequence(it, itEnd), "GoToFalse should be the terminal sequence");1989if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting GoToFalse\n", node->getOpCode().getName());1990break;1991case ClassEqualityTest:1992if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting ClassEqualityTest\n", node->getOpCode().getName());1993cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/Equality", comp->signature()),1,TR::DebugCounter::Undetermined);19941995generateCompareInstruction(cg, node, objectClassReg, castClassReg, true);1996generateCSetInstruction(cg, node, resultReg, TR::CC_EQ);1997break;1998case SuperClassTest:1999{2000if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting SuperClassTest\n", node->getOpCode().getName());2001cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/SuperClassTest", comp->signature()),1,TR::DebugCounter::Undetermined);20022003int32_t castClassDepth = castClassNode->getSymbolReference()->classDepth(comp);2004auto falseLabel = isNextItemGoToFalse(it, itEnd) ? doneLabel : (isNextItemHelperCall(it, itEnd) ? callHelperLabel : nextSequenceLabel);2005genSuperClassTest(node, objectClassReg, false, castClassReg, castClassDepth, falseLabel, srm, cg);2006generateCSetInstruction(cg, node, resultReg, TR::CC_EQ);2007}2008break;2009case ProfiledClassTest:2010{2011if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting ProfiledClassTest\n", node->getOpCode().getName());2012cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/Profile", comp->signature()),1,TR::DebugCounter::Undetermined);20132014auto profiledClassesIt = std::begin(profiledClassesList);2015auto profiledClassesItEnd = std::next(profiledClassesIt, numberOfProfiledClass);2016while (profiledClassesIt != profiledClassesItEnd)2017{2018if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: ProfiledClassTest: profiledClass = %p, isProfiledClassInstanceOfCastClass = %s\n",2019node->getOpCode().getName(), profiledClassesIt->profiledClass,2020(profiledClassesIt->isProfiledClassInstanceOfCastClass) ? "true" : "false");20212022genInstanceOfOrCheckCastArbitraryClassTest(node, objectClassReg, profiledClassesIt->profiledClass, srm, cg);2023/**2024* At this point EQ flag will be set if the profiledClass matches the cast class.2025* Set resultReg to 1 if isProfiledClassInstanceOfCastClass is true2026*/2027if (profiledClassesIt->isProfiledClassInstanceOfCastClass)2028{2029generateCSetInstruction(cg, node, resultReg, TR::CC_EQ);2030}2031profiledClassesIt++;2032if (profiledClassesIt != profiledClassesItEnd)2033{2034generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);2035}2036}2037}2038break;2039case CompileTimeGuessClassTest:2040if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting CompileTimeGuessClassTest\n", node->getOpCode().getName());2041cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/compTimeGuess", comp->signature()),1,TR::DebugCounter::Undetermined);20422043genInstanceOfOrCheckCastArbitraryClassTest(node, objectClassReg, compileTimeGuessClass, srm, cg);2044generateCSetInstruction(cg, node, resultReg, TR::CC_EQ);20452046break;2047case CastClassCacheTest:2048{2049if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting CastClassCacheTest\n", node->getOpCode().getName());2050cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/CastClassCache", comp->signature()),1,TR::DebugCounter::Undetermined);20512052/**2053* Compare the cast class against the cache on the instance class.2054* If they are the same the cast is successful.2055* If not it's either because the cache class does not match the cast class,2056* or it does match except the cache class has the low bit set, which means the cast is not successful.2057*/2058TR::Register *castClassCacheReg = srm->findOrCreateScratchRegister();2059generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, castClassCacheReg,2060TR::MemoryReference::createWithDisplacement(cg, objectClassReg, offsetof(J9Class, castClassCache)));2061generateTrg1Src2Instruction(cg, TR::InstOpCode::eorx, node, castClassCacheReg, castClassCacheReg, castClassReg);2062generateCompareImmInstruction(cg, node, castClassCacheReg, 1, true);20632064/**2065* At this point LT flag will be set if the cast is successful, EQ flag will be set if the cast is unsuccessful,2066* and GT flag will be set if the cache class did not match the cast class.2067*/2068generateCSetInstruction(cg, node, resultReg, TR::CC_LT);2069srm->reclaimScratchRegister(castClassCacheReg);2070}2071break;2072case ArrayOfJavaLangObjectTest:2073{2074TR_ASSERT_FATAL(isNextItemGoToFalse(it, itEnd), "ArrayOfJavaLangObjectTest is always followed by GoToFalse");2075if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting ArrayOfJavaLangObjectTest\n", node->getOpCode().getName());2076cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/ArrayTest", comp->signature()),1,TR::DebugCounter::Undetermined);2077genInstanceOfOrCheckCastObjectArrayTest(node, objectClassReg, doneLabel, true, srm, cg);2078generateCSetInstruction(cg, node, resultReg, TR::CC_EQ);2079}2080break;2081case DynamicCacheObjectClassTest:2082TR_ASSERT_FATAL(false, "%s: DynamicCacheObjectClassTest is not implemented on aarch64\n", node->getOpCode().getName());2083break;2084case DynamicCacheDynamicCastClassTest:2085TR_ASSERT_FATAL(false, "%s: DynamicCacheDynamicCastClassTest is not implemented on aarch64\n", node->getOpCode().getName());2086break;2087case HelperCall:2088{2089TR_ASSERT_FATAL(isTerminalSequence(it, itEnd), "HelperCall should be the terminal sequence");2090if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting HelperCall\n", node->getOpCode().getName());2091TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(node, TR::icall, resultReg, callHelperLabel, doneLabel, cg);20922093cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);20942095if (it == itBegin)2096{2097// If HelperCall is only the item in the sequence, branch to OOL2098generateLabelInstruction(cg, TR::InstOpCode::b, node, callHelperLabel);2099}2100}2101break;2102}21032104switch (current)2105{2106case ClassEqualityTest:2107case SuperClassTest:2108case ProfiledClassTest:2109case CompileTimeGuessClassTest:2110case ArrayOfJavaLangObjectTest:2111/**2112* For those tests, EQ flag is set if the cache hit2113*/2114if (isNextItemHelperCall(it, itEnd))2115{2116generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callHelperLabel, TR::CC_NE);2117}2118else if (!isNextItemGoToFalse(it, itEnd))2119{2120// If other tests follow, branch to doneLabel2121generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);2122}2123break;2124case CastClassCacheTest:2125if (isNextItemHelperCall(it, itEnd))2126{2127generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callHelperLabel, TR::CC_GT);2128}2129else if (!isNextItemGoToFalse(it, itEnd))2130{2131generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_LE);2132}2133break;2134case NullTest:2135break;2136default:2137if (isNextItemHelperCall(it, itEnd))2138{2139generateLabelInstruction(cg, TR::InstOpCode::b, node, callHelperLabel);2140}2141break;2142}21432144if (!isTerminalSequence(it, itEnd))2145{2146generateLabelInstruction(cg, TR::InstOpCode::label, node, nextSequenceLabel);2147nextSequenceLabel = generateLabelSymbol(cg);2148}21492150}21512152if (objectClassReg)2153srm->reclaimScratchRegister(objectClassReg);21542155TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 3 + srm->numAvailableRegisters(), cg->trMemory());2156srm->addScratchRegistersToDependencyList(deps);21572158deps->addPostCondition(resultReg, TR::RealRegister::NoReg);2159deps->addPostCondition(objectReg, TR::RealRegister::NoReg);21602161if (castClassReg)2162{2163deps->addPostCondition(castClassReg, TR::RealRegister::NoReg);2164}21652166generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, deps);21672168cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfOrCheckCast/%s/fastPath",2169node->getOpCode().getName()),2170*srm);2171cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfOrCheckCast.perMethod/%s/(%s)/%d/%d/fastPath",2172node->getOpCode().getName(), comp->signature(), node->getByteCodeInfo().getCallerIndex(), node->getByteCodeInfo().getByteCodeIndex()),2173*srm);217421752176cg->decReferenceCount(objectNode);2177cg->decReferenceCount(castClassNode);2178// Stop using every reg in the deps except these ones.2179//2180deps->stopUsingDepRegs(cg, objectReg, resultReg);21812182node->setRegister(resultReg);21832184return resultReg;2185}21862187/**2188* @brief Generates null test instructions2189*2190* @param[in] cg: code generator2191* @param[in] objReg: register holding object2192* @param[in] node: null check node2193* @param[in] nullSymRef: symbol reference of null check2194*2195*/2196static2197void generateNullTest(TR::CodeGenerator *cg, TR::Register *objReg, TR::Node *node, TR::SymbolReference *nullSymRef = NULL)2198{2199TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);2200TR::Compilation *comp = cg->comp();2201if (nullSymRef == NULL)2202{2203nullSymRef = comp->getSymRefTab()->findOrCreateNullCheckSymbolRef(comp->getMethodSymbol());2204}2205TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, nullSymRef, NULL);2206cg->addSnippet(snippet);22072208TR::Instruction *cbzInstruction = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, objReg, snippetLabel);2209cbzInstruction->setNeedsGCMap(0xffffffff);2210snippet->gcMap().setGCRegisterMask(0xffffffff);2211// ARM64HelperCallSnippet generates "bl" instruction2212cg->machine()->setLinkRegisterKilled(true);2213}22142215TR::Register *2216J9::ARM64::TreeEvaluator::VMcheckcastEvaluator(TR::Node *node, TR::CodeGenerator *cg)2217{2218TR::Compilation *comp = cg->comp();2219TR_OpaqueClassBlock *compileTimeGuessClass;2220int32_t maxProfiledClasses = comp->getOptions()->getCheckcastMaxProfiledClassTests();2221if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s:Maximum Profiled Classes = %d\n", node->getOpCode().getName(),maxProfiledClasses);2222TR_ASSERT_FATAL(maxProfiledClasses <= 4, "Maximum 4 profiled classes per site allowed because we use a fixed stack allocated buffer for profiled classes\n");2223InstanceOfOrCheckCastSequences sequences[InstanceOfOrCheckCastMaxSequences];2224bool topClassWasCastClass = false;2225float topClassProbability = 0.0;22262227bool profiledClassIsInstanceOf;2228InstanceOfOrCheckCastProfiledClasses profiledClassesList[4];2229uint32_t numberOfProfiledClass;2230uint32_t numSequencesRemaining = calculateInstanceOfOrCheckCastSequences(node, sequences, &compileTimeGuessClass, cg, profiledClassesList, &numberOfProfiledClass, maxProfiledClasses, &topClassProbability, &topClassWasCastClass);223122322233TR::Node *objectNode = node->getFirstChild();2234TR::Node *castClassNode = node->getSecondChild();2235TR::Register *objectReg = cg->evaluate(objectNode);2236TR::Register *castClassReg = NULL;22372238TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);2239TR::LabelSymbol *callHelperLabel = generateLabelSymbol(cg);2240TR::LabelSymbol *nextSequenceLabel = generateLabelSymbol(cg);22412242TR::Instruction *gcPoint;22432244TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();2245TR::Register *objectClassReg = NULL;22462247auto itBegin = std::begin(sequences);2248const auto itEnd = std::next(itBegin, numSequencesRemaining);22492250for (auto it = itBegin; it != itEnd; it++)2251{2252auto current = *it;2253switch (current)2254{2255case EvaluateCastClass:2256TR_ASSERT(!castClassReg, "Cast class already evaluated");2257castClassReg = cg->gprClobberEvaluate(castClassNode);2258break;2259case LoadObjectClass:2260TR_ASSERT(!objectClassReg, "Object class already loaded");2261objectClassReg = srm->findOrCreateScratchRegister();2262generateLoadJ9Class(node, objectClassReg, objectReg, cg);2263break;2264case NullTest:2265if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting NullTest\n", node->getOpCode().getName());2266TR_ASSERT(!objectNode->isNonNull(), "Object is known to be non-null, no need for a null test");2267if (node->getOpCodeValue() == TR::checkcastAndNULLCHK)2268{2269TR::Node *nullChkInfo = comp->findNullChkInfo(node);2270generateNullTest(cg, objectReg, nullChkInfo);2271}2272else2273{2274if (isNextItemHelperCall(it, itEnd) || isNextItemGoToFalse(it, itEnd))2275{2276generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzx, node, objectReg, callHelperLabel);2277}2278else2279{2280// branch to doneLabel if object is null2281generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, objectReg, doneLabel);2282}2283}2284break;2285case GoToTrue:2286TR_ASSERT_FATAL(isTerminalSequence(it, itEnd), "GoToTrue should be the terminal sequence");2287if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting GoToTrue\n", node->getOpCode().getName());2288break;2289case ClassEqualityTest:2290if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting ClassEqualityTest\n", node->getOpCode().getName());2291cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/Equality", comp->signature()),1,TR::DebugCounter::Undetermined);22922293generateCompareInstruction(cg, node, objectClassReg, castClassReg, true);2294break;2295case SuperClassTest:2296{2297if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting SuperClassTest\n", node->getOpCode().getName());2298cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/SuperClassTest", comp->signature()),1,TR::DebugCounter::Undetermined);22992300int32_t castClassDepth = castClassNode->getSymbolReference()->classDepth(comp);2301auto falseLabel = (isNextItemGoToFalse(it, itEnd) || isNextItemHelperCall(it, itEnd)) ? callHelperLabel : nextSequenceLabel;2302genSuperClassTest(node, objectClassReg, false, castClassReg, castClassDepth, falseLabel, srm, cg);2303}2304break;2305/**2306* Following switch case generates sequence of instructions for profiled class test for this checkCast node2307* arbitraryClassReg1 <= profiledClass2308* if (arbitraryClassReg1 == objClassReg)2309* JMP DoneLabel2310* else2311* continue to NextTest2312*/2313case ProfiledClassTest:2314{2315if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting ProfiledClassTest\n", node->getOpCode().getName());2316cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/Profile", comp->signature()),1,TR::DebugCounter::Undetermined);23172318auto profiledClassesIt = std::begin(profiledClassesList);2319auto profiledClassesItEnd = std::next(profiledClassesIt, numberOfProfiledClass);2320while (profiledClassesIt != profiledClassesItEnd)2321{2322if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: ProfiledClassTest: profiledClass = %p, isProfiledClassInstanceOfCastClass = %s\n",2323node->getOpCode().getName(), profiledClassesIt->profiledClass,2324(profiledClassesIt->isProfiledClassInstanceOfCastClass) ? "true" : "false");23252326genInstanceOfOrCheckCastArbitraryClassTest(node, objectClassReg, profiledClassesIt->profiledClass, srm, cg);2327/**2328* At this point EQ flag will be set if the profiledClass matches the cast class.2329*/2330profiledClassesIt++;2331if (profiledClassesIt != profiledClassesItEnd)2332{2333generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);2334}2335}2336}2337break;2338case CompileTimeGuessClassTest:2339if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting CompileTimeGuessClassTest\n", node->getOpCode().getName());2340cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/compTimeGuess", comp->signature()),1,TR::DebugCounter::Undetermined);23412342genInstanceOfOrCheckCastArbitraryClassTest(node, objectClassReg, compileTimeGuessClass, srm, cg);2343break;2344/**2345* Following switch case generates sequence of instructions for cast class cache test for this checkCast node2346* Load castClassCacheReg, offsetOf(J9Class,castClassCache)2347* if castClassCacheReg == castClassReg2348* JMP DoneLabel2349* else2350* continue to NextTest2351*/2352case CastClassCacheTest:2353{2354if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting CastClassCacheTest\n", node->getOpCode().getName());2355cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/CastClassCache", comp->signature()),1,TR::DebugCounter::Undetermined);23562357/**2358* Compare the cast class against the cache on the instance class.2359* If they are the same the cast is successful.2360* If not it's either because the cache class does not match the cast class,2361* or it does match except the cache class has the low bit set, which means the cast is not successful.2362* In those cases, we need to call out to helper.2363*/2364TR::Register *castClassCacheReg = srm->findOrCreateScratchRegister();2365generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, castClassCacheReg,2366TR::MemoryReference::createWithDisplacement(cg, objectClassReg, offsetof(J9Class, castClassCache)));2367generateCompareInstruction(cg, node, castClassCacheReg, castClassReg, true);2368/**2369* At this point, EQ flag will be set if the cast is successful.2370*/2371srm->reclaimScratchRegister(castClassCacheReg);2372}2373break;2374case ArrayOfJavaLangObjectTest:2375{2376TR_ASSERT_FATAL(isNextItemGoToFalse(it, itEnd), "ArrayOfJavaLangObjectTest is always followed by GoToFalse");2377if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting ArrayOfJavaLangObjectTest\n", node->getOpCode().getName());2378cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/ArrayTest", comp->signature()),1,TR::DebugCounter::Undetermined);23792380/*2381* In this case, the false label is in the OOLCodeSection, and it can be placed far away from here.2382* The offset of tbz/tbnz instruction must be within +-32KB range, so we do not use tbz/tbnz.2383*/2384genInstanceOfOrCheckCastObjectArrayTest(node, objectClassReg, callHelperLabel, false, srm, cg);2385}2386break;2387case DynamicCacheObjectClassTest:2388TR_ASSERT_FATAL(false, "%s: DynamicCacheObjectClassTest is not implemented on aarch64\n", node->getOpCode().getName());2389break;2390case DynamicCacheDynamicCastClassTest:2391TR_ASSERT_FATAL(false, "%s: DynamicCacheDynamicCastClassTest is not implemented on aarch64\n", node->getOpCode().getName());2392break;2393case GoToFalse:2394case HelperCall:2395{2396auto seq = (current == GoToFalse) ? "GoToFalse" : "HelperCall";2397TR_ASSERT_FATAL(isTerminalSequence(it, itEnd), "%s should be the terminal sequence", seq);2398if (comp->getOption(TR_TraceCG)) traceMsg(comp, "%s: Emitting %s\n", node->getOpCode().getName(), seq);2399TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(node, TR::call, NULL, callHelperLabel, doneLabel, cg);24002401cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);24022403if (it == itBegin)2404{2405// If HelperCall or GoToFalse is the only item in the sequence, branch to OOL2406generateLabelInstruction(cg, TR::InstOpCode::b, node, callHelperLabel);2407}2408}2409break;2410}24112412switch (current)2413{2414case ClassEqualityTest:2415case SuperClassTest:2416case ProfiledClassTest:2417case CompileTimeGuessClassTest:2418case CastClassCacheTest:2419case ArrayOfJavaLangObjectTest:2420/**2421* For those tests, EQ flag is set if the cast is successful2422*/2423if (isNextItemHelperCall(it, itEnd) || isNextItemGoToFalse(it, itEnd))2424{2425generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callHelperLabel, TR::CC_NE);2426}2427else2428{2429// When other tests follow, branch to doneLabel if EQ flag is set2430generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);2431}2432break;2433case NullTest:2434break;2435default:2436if (isNextItemHelperCall(it, itEnd) || isNextItemGoToFalse(it, itEnd))2437{2438generateLabelInstruction(cg, TR::InstOpCode::b, node, callHelperLabel);2439}2440}24412442if (!isTerminalSequence(it, itEnd))2443{2444generateLabelInstruction(cg, TR::InstOpCode::label, node, nextSequenceLabel);2445nextSequenceLabel = generateLabelSymbol(cg);2446}24472448}24492450if (objectClassReg)2451srm->reclaimScratchRegister(objectClassReg);24522453TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 3 + srm->numAvailableRegisters(), cg->trMemory());2454srm->addScratchRegistersToDependencyList(deps);24552456deps->addPostCondition(objectReg, TR::RealRegister::NoReg);24572458if (castClassReg)2459{2460deps->addPostCondition(castClassReg, TR::RealRegister::NoReg);2461}24622463generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, deps);24642465cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfOrCheckCast/%s/fastPath",2466node->getOpCode().getName()),2467*srm);2468cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfOrCheckCast.perMethod/%s/(%s)/%d/%d/fastPath",2469node->getOpCode().getName(), comp->signature(), node->getByteCodeInfo().getCallerIndex(), node->getByteCodeInfo().getByteCodeIndex()),2470*srm);247124722473cg->decReferenceCount(objectNode);2474cg->decReferenceCount(castClassNode);2475// Stop using every reg in the deps except objectReg2476//2477deps->stopUsingDepRegs(cg, objectReg);24782479node->setRegister(NULL);24802481return NULL;2482}24832484TR::Register *2485J9::ARM64::TreeEvaluator::checkcastAndNULLCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)2486{2487return VMcheckcastEvaluator(node, cg);2488}24892490TR::Register *2491J9::ARM64::TreeEvaluator::checkcastEvaluator(TR::Node *node, TR::CodeGenerator *cg)2492{2493return VMcheckcastEvaluator(node, cg);2494}24952496TR::Register *2497J9::ARM64::TreeEvaluator::flushEvaluator(TR::Node *node, TR::CodeGenerator *cg)2498{2499TR::ILOpCodes op = node->getOpCodeValue();25002501if (op == TR::allocationFence)2502{2503if (!node->canOmitSync())2504{2505// StoreStore barrier is required after publishing new object reference to other threads.2506// dmb ishst (Inner Shareable store barrier)2507generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xA);2508}2509}2510else2511{2512uint32_t imm;2513if (op == TR::loadFence)2514{2515// TR::loadFence is used for both loadLoadFence and acquireFence.2516// Loads before the barrier are ordered before loads/stores after the barrier.2517// dmb ishld (Inner Shareable load barrier)2518imm = 0x9;2519}2520else if (op == TR::storeFence)2521{2522// TR::storeFence is used for both storeStoreFence and releaseFence.2523// Loads/Stores before the barrier are ordered before stores after the barrier.2524// dmb ish (Inner Shareable full barrier)2525imm = 0xB;2526}2527else2528{2529// TR::fullFence is used for fullFence.2530// dmb ish (Inner Shareable full barrier)2531imm = 0xB;2532}2533generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, imm);2534}25352536return NULL;2537}25382539/**2540* Helper template function to get value clamped between low and high.2541* std::clamp is unavailable for C++11.2542*/2543template<typename T>2544const T clamp(const T& value, const T& low, const T& high)2545{2546return std::min(std::max(value, low), high);2547}25482549template<typename T>2550const T clamp(const int& value, const T& low, const T& high)2551{2552return static_cast<T>(std::min(std::max(value, static_cast<int>(low)), static_cast<int>(high)));2553}25542555/**2556* @brief Generates instructions for allocating heap for new/newarray/anewarray2557* The limitation of the current implementation:2558* - supports `new` only2559* - does not support dual TLH2560* - does not support realtimeGC2561*2562* @param[in] node: node2563* @param[in] cg: code generator2564* @param[in] isVariableLen: true if allocating variable length array2565* @param[in] allocSize: size to allocate on heap if isVariableLen is false. offset to data start if isVariableLen is true.2566* @param[in] elementSize: size of array elements. Used if isVariableLen is true.2567* @param[in] resultReg: the register that contains allocated heap address2568* @param[in] lengthReg: the register that contains array length (number of elements). Used if isVariableLen is true.2569* @param[in] heapTopReg: temporary register 12570* @param[in] tempReg: temporary register 22571* @param[in] dataSizeReg: temporary register 3, this register contains the number of allocated bytes if isVariableLen is true.2572* @param[in] conditions: dependency conditions2573* @param[in] callLabel: label to call when allocation fails2574*/2575static void2576genHeapAlloc(TR::Node *node, TR::CodeGenerator *cg, bool isVariableLen, uint32_t allocSize, int32_t elementSize, TR::Register *resultReg,2577TR::Register *lengthReg, TR::Register *heapTopReg, TR::Register *tempReg, TR::Register *dataSizeReg, TR::RegisterDependencyConditions *conditions,2578TR::LabelSymbol *callLabel)2579{2580static const char *pTLHPrefetchThresholdSize = feGetEnv("TR_AArch64PrefetchThresholdSize");2581static const char *pTLHPrefetchArrayLineCount = feGetEnv("TR_AArch64PrefetchArrayLineCount");2582static const char *pTLHPrefetchType = feGetEnv("TR_AArch64PrefetchType");2583static const char *pTLHPrefetchTarget = feGetEnv("TR_AArch64PrefetchTarget");2584static const char *pTLHPrefetchPolicy = feGetEnv("TR_AArch64PrefetchPolicy");2585static const int cacheLineSize = (TR::Options::_TLHPrefetchLineSize > 0) ? TR::Options::_TLHPrefetchLineSize : 64;2586static const int tlhPrefetchLineCount = (TR::Options::_TLHPrefetchLineCount > 0) ? TR::Options::_TLHPrefetchLineCount : 1;2587static const int tlhPrefetchStaggeredLineCount = (TR::Options::_TLHPrefetchStaggeredLineCount > 0) ? TR::Options::_TLHPrefetchStaggeredLineCount : 4;2588static const int tlhPrefetchThresholdSize = (pTLHPrefetchThresholdSize) ? atoi(pTLHPrefetchThresholdSize) : 64;2589static const int tlhPrefetchArrayLineCount = (pTLHPrefetchArrayLineCount) ? atoi(pTLHPrefetchArrayLineCount) : 4;2590static const ARM64PrefetchType tlhPrefetchType = (pTLHPrefetchType) ? clamp(atoi(pTLHPrefetchType), ARM64PrefetchType::LOAD, ARM64PrefetchType::STORE)2591: ARM64PrefetchType::STORE;2592static const ARM64PrefetchTarget tlhPrefetchTarget = (pTLHPrefetchTarget) ? clamp(atoi(pTLHPrefetchTarget), ARM64PrefetchTarget::L1, ARM64PrefetchTarget::L3)2593: ARM64PrefetchTarget::L3;2594static const ARM64PrefetchPolicy tlhPrefetchPolicy = (pTLHPrefetchPolicy) ? clamp(atoi(pTLHPrefetchPolicy), ARM64PrefetchPolicy::KEEP, ARM64PrefetchPolicy::STRM)2595: ARM64PrefetchPolicy::STRM;25962597TR::Compilation *comp = cg->comp();2598TR::Register *metaReg = cg->getMethodMetaDataRegister();25992600uint32_t maxSafeSize = cg->getMaxObjectSizeGuaranteedNotToOverflow();2601bool isTooSmallToPrefetch = false;26022603static_assert(offsetof(J9VMThread, heapAlloc) < 32760, "Expecting offset to heapAlloc fits in imm12");2604static_assert(offsetof(J9VMThread, heapTop) < 32760, "Expecting offset to heapTop fits in imm12");26052606if (isVariableLen)2607{2608/*2609* Instructions for allocating heap for variable length `newarray/anewarray`.2610*2611* cmp lengthReg, #maxObjectSizeInElements2612* b.hi callLabel2613*2614* uxtw tempReg, lengthReg2615* ldrimmx resultReg, [metaReg, offsetToHeapAlloc]2616* lsl tempReg, lengthReg, #shiftValue2617* addimmx tempReg, tempReg, #headerSize+round-12618* cmpimmw lengthReg, 0; # of array elements2619* andimmx tempReg, tempReg, #-round2620* movzx tempReg2, aligned(#sizeOfDiscontiguousArrayHeader)2621* cselx dataSizeReg, tempReg, tempReg2, ne2622* ldrimmx heapTopReg, [metaReg, offsetToHeapTop]2623* addimmx tempReg, resultReg, dataSizeReg2624*2625* # check for overflow2626* cmp tempReg, heapTopReg2627* b.gt callLabel2628* # write back heapAlloc2629* strimmx tempReg, [metaReg, offsetToHeapAlloc]2630*2631*/2632// Detect large or negative number of elements in case addr wrap-around2633//2634// The GC will guarantee that at least 'maxObjectSizeGuaranteedNotToOverflow' bytes2635// of slush will exist between the top of the heap and the end of the address space.2636//2637uint32_t maxObjectSizeInElements = maxSafeSize / elementSize;2638if (constantIsUnsignedImm12(maxObjectSizeInElements))2639{2640generateCompareImmInstruction(cg, node, lengthReg, maxObjectSizeInElements, false);2641}2642else2643{2644loadConstant32(cg, node, maxObjectSizeInElements, tempReg);2645generateCompareInstruction(cg, node, lengthReg, tempReg, false);2646}2647// Must be an unsigned comparison on sizes.2648//2649generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callLabel, TR::CC_HI, conditions);26502651// At this point, lengthReg must contain non-negative value.2652generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::ubfmx, node, tempReg, lengthReg, 31); // uxtw26532654// Load the base of the next available heap storage.2655generateTrg1MemInstruction(cg,TR::InstOpCode::ldrimmx, node, resultReg,2656TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapAlloc)));26572658// calculate variable size, rounding up if necessary to a intptr_t multiple boundary2659//2660static const int32_t objectAlignmentInBytes = TR::Compiler->om.getObjectAlignmentInBytes();2661bool headerAligned = (allocSize % objectAlignmentInBytes) == 0;2662// zero indicates no rounding is necessary2663const int32_t round = ((elementSize >= objectAlignmentInBytes) && headerAligned) ? 0 : objectAlignmentInBytes;26642665// If the array is zero length, the array is a discontiguous.2666// Large heap builds do not need to care about this because the2667// contiguous and discontiguous array headers are the same size.2668//2669auto shiftAmount = trailingZeroes(elementSize);2670auto displacement = (round > 0) ? round - 1 : 0;2671uint32_t alignmentMaskEncoding;2672bool maskN;26732674if (round != 0)2675{2676if (round == 8)2677{2678maskN = true;2679alignmentMaskEncoding = 0xf7c;2680}2681else2682{2683bool canBeEncoded = logicImmediateHelper(-round, true, maskN, alignmentMaskEncoding);2684TR_ASSERT_FATAL(canBeEncoded, "mask for andimmx (%d) cannnot be encoded", (-round));2685}2686}2687if (comp->useCompressedPointers())2688{2689if (shiftAmount > 0)2690{2691generateLogicalShiftLeftImmInstruction(cg, node, tempReg, tempReg, shiftAmount, true);2692}2693generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, tempReg, tempReg, (allocSize + displacement));2694generateCompareImmInstruction(cg, node, lengthReg, 0, false); // lengthReg is 32bit2695if (round != 0)2696{2697generateLogicalImmInstruction(cg, TR::InstOpCode::andimmx, node, tempReg, tempReg, maskN, alignmentMaskEncoding);2698}2699static const int32_t zeroArraySizeAligned = OMR::align(TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), objectAlignmentInBytes);2700loadConstant64(cg, node, zeroArraySizeAligned, heapTopReg);27012702generateCondTrg1Src2Instruction(cg, TR::InstOpCode::cselx, node, dataSizeReg, tempReg, heapTopReg, TR::CC_NE);2703}2704else2705{2706if (shiftAmount > 0)2707{2708generateLogicalShiftLeftImmInstruction(cg, node, tempReg, tempReg, shiftAmount, false);2709}2710generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, dataSizeReg, tempReg, (allocSize + displacement));2711if (round != 0)2712{2713generateLogicalImmInstruction(cg, TR::InstOpCode::andimmx, node, dataSizeReg, dataSizeReg, maskN, alignmentMaskEncoding);2714}2715}27162717// Load the heap top2718generateTrg1MemInstruction(cg,TR::InstOpCode::ldrimmx, node, heapTopReg,2719TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapTop)));2720generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, tempReg, resultReg, dataSizeReg);27212722}2723else2724{2725isTooSmallToPrefetch = allocSize < tlhPrefetchThresholdSize;2726/*2727* Instructions for allocating heap for fixed length `new/newarray/anewarray`.2728*2729* ldrimmx resultReg, [metaReg, offsetToHeapAlloc]2730* ldrimmx heapTopReg, [metaReg, offsetToHeapTop]2731* addsimmx tempReg, resultReg, #allocSize2732* # check for address wrap-around if necessary2733* b.cc callLabel2734* # check for overflow2735* cmp tempReg, heapTopReg2736* b.gt callLabel2737* # write back heapAlloc2738* strimmx tempReg, [metaReg, offsetToHeapAlloc]2739*2740*/27412742// Load the base of the next available heap storage.2743generateTrg1MemInstruction(cg,TR::InstOpCode::ldrimmx, node, resultReg,2744TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapAlloc)));2745// Load the heap top2746generateTrg1MemInstruction(cg,TR::InstOpCode::ldrimmx, node, heapTopReg,2747TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapTop)));27482749// Calculate the after-allocation heapAlloc: if the size is huge,2750// we need to check address wrap-around also. This is unsigned2751// integer arithmetic, checking carry bit is enough to detect it.2752const bool isAllocSizeInReg = !constantIsUnsignedImm12(allocSize);2753const bool isWithinMaxSafeSize = allocSize <= maxSafeSize;2754if (isAllocSizeInReg)2755{2756loadConstant64(cg, node, allocSize, tempReg);2757generateTrg1Src2Instruction(cg, isWithinMaxSafeSize ? TR::InstOpCode::addx : TR::InstOpCode::addsx,2758node, tempReg, resultReg, tempReg);2759}2760else2761{2762generateTrg1Src1ImmInstruction(cg, isWithinMaxSafeSize ? TR::InstOpCode::addimmx : TR::InstOpCode::addsimmx,2763node, tempReg, resultReg, allocSize);2764}2765if (!isWithinMaxSafeSize)2766{2767generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callLabel, TR::CC_CC, conditions);2768}27692770}27712772// Ok, tempReg now points to where the object will end on the TLH.2773// resultReg will contain the start of the object where we'll write out our2774// J9Class*. Should look like this in memory:2775// [heapAlloc == resultReg] ... tempReg ...//... heapTopReg.27762777//Here we check if we overflow the TLH Heap Top2778//branch to heapAlloc Snippet if we overflow (ie callLabel).2779generateCompareInstruction(cg, node, tempReg, heapTopReg, true);2780generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, callLabel, TR::CC_GT, conditions);27812782if (comp->getOption(TR_TLHPrefetch) && (!isTooSmallToPrefetch))2783{2784int offset = tlhPrefetchStaggeredLineCount * cacheLineSize;2785int loopCount = (node->getOpCodeValue() == TR::New) ? tlhPrefetchLineCount : tlhPrefetchArrayLineCount;27862787for (int i = 0; i < loopCount; i++)2788{2789generateMemImmInstruction(cg, TR::InstOpCode::prfmimm, node,2790TR::MemoryReference::createWithDisplacement(cg, tempReg, offset), toPrefetchOp(tlhPrefetchType, tlhPrefetchTarget, tlhPrefetchPolicy));2791offset += cacheLineSize;2792}2793}2794//Done, write back to heapAlloc here.2795generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node,2796TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapAlloc)), tempReg);27972798}27992800/**2801* @brief Generates instructions for initializing allocated memory for new/newarray/anewarray2802*2803* @param[in] node: node2804* @param[in] cg: code generator2805* @param[in] isVariableLen: true if allocating variable length array2806* @param[in] objectSize: size of the object2807* @param[in] headerSize: header size of the object2808* @param[in] objectReg: the register that holds object address2809* @param[in] dataSizeReg: the register that holds the number of allocated bytes if isVariableLength is true2810* @param[in] zeroReg: the register whose value is zero2811* @param[in] tempReg1: temporary register 12812* @param[in] tempReg2: temporary register 22813*/2814static void2815genZeroInitObject(TR::Node *node, TR::CodeGenerator *cg, bool isVariableLen, uint32_t objectSize, uint32_t headerSize, TR::Register *objectReg,2816TR::Register *dataSizeReg, TR::Register *zeroReg, TR::Register *tempReg1, TR::Register *tempReg2)2817{28182819if (isVariableLen)2820{2821/*2822* Instructions for clearing allocated memory for variable length2823* We assume that the objectSize is multiple of 8.2824* Because the size of the header of contiguous arrays are multiple of 8,2825* the data size to clear is also multiple of 8.2826*2827* subimmx dataSizeReg, dataSizeReg, #headerSize2828* cbz dataSizeReg, zeroinitdone2829* // Adjust tempReg1 so that (tempReg1 + 16) points to2830* // the memory area beyond the object header2831* subimmx tempReg1, objectReg, (16 - #headerSize)2832* cmp dataSizeReg, #642833* b.lt medium2834* large: // dataSizeReg >= 642835* lsr tempReg2, dataSizeReg, #6 // loopCount = dataSize / 642836* and dataSizeReg, dataSizeReg, #632837* loopStart:2838* stpimmx xzr, xzr, [tempReg1, #16]2839* stpimmx xzr, xzr, [tempReg1, #32]2840* stpimmx xzr, xzr, [tempReg1, #48]2841* stpimmx xzr, xzr, [tempReg1, #64]! // pre index2842* subsimmx tempReg2, tempReg2, #12843* b.ne loopStart2844* cbz dataSizeReg, zeroinitdone2845* medium:2846* addx tempReg2, tempReg1, dataSizeReg // tempReg2 points to 16bytes before the end of the buffer2847* // write residues. We have at least 8bytes before (tempReg1 + 16)2848* cmpimmx dataSizeReg, #162849* b.le write162850* cmpimmx dataSizeReg, #322851* b.le write322852* cmpimmx dataSizeReg, #482853* b.le write482854* write64: // 56 bytes2855* stpimmx xzr, xzr, [tempReg2, #-48]2856* write48: // 40, 48 bytes2857* stpimmx xzr, xzr, [tempReg2, #-32]2858* write32: // 24, 32 bytes2859* stpimmx xzr, xzr, [tempReg2, #-16]2860* write16: // 8, 16 bytes2861* stpimmx xzr, xzr, [tempReg2]2862* zeroinitdone:2863*/2864TR::LabelSymbol *zeroInitDoneLabel = generateLabelSymbol(cg);2865TR::LabelSymbol *mediumLabel = generateLabelSymbol(cg);2866TR::LabelSymbol *loopStartLabel = generateLabelSymbol(cg);2867TR::LabelSymbol *write16Label = generateLabelSymbol(cg);2868TR::LabelSymbol *write32Label = generateLabelSymbol(cg);2869TR::LabelSymbol *write48Label = generateLabelSymbol(cg);28702871generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::subimmx, node, dataSizeReg, dataSizeReg, headerSize);2872if (!TR::Compiler->om.generateCompressedObjectHeaders())2873{2874// Array Header is smaller than the minimum data size in compressedrefs build, so this check is not necessary.2875// This check is necessary in large heap build.2876generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, dataSizeReg, zeroInitDoneLabel);2877}2878generateTrg1Src1ImmInstruction(cg, (headerSize > 16) ? TR::InstOpCode::addimmx : TR::InstOpCode::subimmx,2879node, tempReg1, objectReg, std::abs(static_cast<int>(headerSize - 16)));28802881generateCompareImmInstruction(cg, node, dataSizeReg, 64, true);2882generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, mediumLabel, TR::CC_LT);2883generateLogicalShiftRightImmInstruction(cg, node, tempReg2, dataSizeReg, 6, true);2884generateLogicalImmInstruction(cg, TR::InstOpCode::andimmx, node, dataSizeReg, dataSizeReg, true, 5); // N = true, immr:imms = 528852886generateLabelInstruction(cg, TR::InstOpCode::label, node, loopStartLabel);2887generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, 16), zeroReg, zeroReg);2888generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, 32), zeroReg, zeroReg);2889generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, 48), zeroReg, zeroReg);2890generateMemSrc2Instruction(cg, TR::InstOpCode::stpprex, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, 64), zeroReg, zeroReg);2891generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::subsimmx, node, tempReg2, tempReg2, 1);2892generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, loopStartLabel, TR::CC_NE);28932894generateLabelInstruction(cg, TR::InstOpCode::label, node, mediumLabel);2895generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, tempReg2, tempReg1, dataSizeReg);2896generateCompareImmInstruction(cg, node, dataSizeReg, 16, true);2897generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, write16Label, TR::CC_LE);2898generateCompareImmInstruction(cg, node, dataSizeReg, 32, true);2899generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, write32Label, TR::CC_LE);2900generateCompareImmInstruction(cg, node, dataSizeReg, 48, true);2901generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, write48Label, TR::CC_LE);29022903generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg2, -48), zeroReg, zeroReg);2904generateLabelInstruction(cg, TR::InstOpCode::label, node, write48Label);2905generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg2, -32), zeroReg, zeroReg);2906generateLabelInstruction(cg, TR::InstOpCode::label, node, write32Label);2907generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg2, -16), zeroReg, zeroReg);2908generateLabelInstruction(cg, TR::InstOpCode::label, node, write16Label);2909generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg2, 0), zeroReg, zeroReg);2910generateLabelInstruction(cg, TR::InstOpCode::label, node, zeroInitDoneLabel);2911}2912else2913{2914/*2915* Instructions for clearing allocated memory for fixed length2916* We assume that the objectSize is multiple of 4.2917*2918* // Adjust tempReg1 so that (tempReg1 + 16) points to2919* // the memory area beyond the object header2920* subimmx tempReg1, objectReg, (16 - #headerSize)2921* movzx tempReg2, loopCount2922* loop:2923* stpimmx xzr, xzr, [tempReg1, #16]2924* stpimmx xzr, xzr, [tempReg1, #32]2925* stpimmx xzr, xzr, [tempReg1, #48]2926* stpimmx xzr, xzr, [tempReg1, #64]! // pre index2927* subsimmx tempReg2, tempReg2, #12928* b.ne loop2929* // write residues2930* stpimmx xzr, xzr [tempReg1, #16]2931* stpimmx xzr, xzr [tempReg1, #32]2932* stpimmx xzr, xzr [tempReg1, #48]2933* strimmx xzr, [tempReg1, #64]2934* strimmw xzr, [tempReg1, #72]2935*2936*/2937// TODO align tempReg1 to 16-byte boundary if objectSize is large2938// TODO use vector register2939// TODO use dc zva2940const int32_t unrollFactor = 4;2941const int32_t width = 16; // use stp to clear 16 bytes2942const int32_t loopCount = (objectSize - headerSize) / (unrollFactor * width);2943const int32_t res1 = (objectSize - headerSize) % (unrollFactor * width);2944const int32_t residueCount = res1 / width;2945const int32_t res2 = res1 % width;2946TR::LabelSymbol *loopStart = generateLabelSymbol(cg);29472948generateTrg1Src1ImmInstruction(cg, (headerSize > 16) ? TR::InstOpCode::addimmx : TR::InstOpCode::subimmx,2949node, tempReg1, objectReg, std::abs(static_cast<int>(headerSize - 16)));29502951if (loopCount > 0)2952{2953if (loopCount > 1)2954{2955loadConstant64(cg, node, loopCount, tempReg2);2956generateLabelInstruction(cg, TR::InstOpCode::label, node, loopStart);2957}2958for (int i = 1; i < unrollFactor; i++)2959{2960generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, i * width), zeroReg, zeroReg);2961}2962generateMemSrc2Instruction(cg, TR::InstOpCode::stpprex, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, unrollFactor * width), zeroReg, zeroReg);2963if (loopCount > 1)2964{2965generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::subsimmx, node, tempReg2, tempReg2, 1);2966generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, loopStart, TR::CC_NE);2967}2968}2969for (int i = 0; i < residueCount; i++)2970{2971generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, (i + 1) * width), zeroReg, zeroReg);2972}2973int offset = (residueCount + 1) * width;2974if (res2 >= 8)2975{2976generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, offset), zeroReg);2977offset += 8;2978}2979if ((res2 & 4) > 0)2980{2981generateMemSrc1Instruction(cg, TR::InstOpCode::strimmw, node, TR::MemoryReference::createWithDisplacement(cg, tempReg1, offset), zeroReg);2982}29832984}2985}29862987/**2988* @brief Generates instructions for initializing Object header for new/newarray/anewarray2989*2990* @param[in] node: node2991* @param[in] cg: code generator2992* @param[in] clazz: class pointer to store in the object header2993* @param[in] objectReg: the register that holds object address2994* @param[in] classReg: the register that holds class2995* @param[in] zeroReg: the register whose value is zero2996* @param[in] tempReg1: temporary register 12997* @param[in] isTLHHasNotBeenCleared: true if TLH has not been cleared2998*/2999static void3000genInitObjectHeader(TR::Node *node, TR::CodeGenerator *cg, TR_OpaqueClassBlock *clazz, TR::Register *objectReg, TR::Register *classReg, TR::Register *zeroReg, TR::Register *tempReg1, bool isTLHHasNotBeenCleared)3001{3002TR_ASSERT(clazz, "Cannot have a null OpaqueClassBlock\n");3003TR_J9VM *fej9 = reinterpret_cast<TR_J9VM *>(cg->fe());3004TR::Compilation *comp = cg->comp();3005TR::Register * clzReg = classReg;3006TR::Register *metaReg = cg->getMethodMetaDataRegister();30073008// For newarray/anewarray, classReg holds the class pointer of array elements3009// Prepare valid class pointer for arrays3010if (node->getOpCodeValue() != TR::New)3011{3012if (cg->needClassAndMethodPointerRelocations())3013{3014if (comp->getOption(TR_UseSymbolValidationManager))3015{3016loadAddressConstantInSnippet(cg, node, reinterpret_cast<intptr_t>(clazz), tempReg1, TR_ClassPointer);3017}3018else3019{3020if (node->getOpCodeValue() == TR::newarray)3021{3022generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tempReg1,3023TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, javaVM)));3024generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tempReg1,3025TR::MemoryReference::createWithDisplacement(cg, tempReg1,3026fej9->getPrimitiveArrayOffsetInJavaVM(node->getSecondChild()->getInt())));3027}3028else3029{3030generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tempReg1,3031TR::MemoryReference::createWithDisplacement(cg, classReg, offsetof(J9Class, arrayClass)));3032}3033}3034}3035else3036{3037loadAddressConstant(cg, node, reinterpret_cast<intptr_t>(clazz), tempReg1);3038}3039clzReg = tempReg1;3040}30413042// Store the class3043generateMemSrc1Instruction(cg, TR::Compiler->om.generateCompressedObjectHeaders() ? TR::InstOpCode::strimmw : TR::InstOpCode::strimmx,3044node, TR::MemoryReference::createWithDisplacement(cg, objectReg, (int32_t) TR::Compiler->om.offsetOfObjectVftField()), clzReg);30453046int32_t lwOffset = fej9->getByteOffsetToLockword(clazz);3047if (clazz && (lwOffset > 0))3048{3049int32_t lwInitialValue = fej9->getInitialLockword(clazz);30503051if ((0 != lwInitialValue) || isTLHHasNotBeenCleared)3052{3053bool isCompressedLockWord = fej9->generateCompressedLockWord();3054if (0 != lwInitialValue)3055{3056loadConstant64(cg, node, lwInitialValue, tempReg1);3057generateMemSrc1Instruction(cg, isCompressedLockWord ? TR::InstOpCode::strimmw : TR::InstOpCode::strimmx,3058node, TR::MemoryReference::createWithDisplacement(cg, objectReg, lwOffset), tempReg1);3059}3060else3061{3062generateMemSrc1Instruction(cg, isCompressedLockWord ? TR::InstOpCode::strimmw : TR::InstOpCode::strimmx,3063node, TR::MemoryReference::createWithDisplacement(cg, objectReg, lwOffset), zeroReg);3064}3065}3066}3067}30683069/**3070* @brief Generates instructions for initializing array header for newarray/anewarray3071*3072* @param[in] node: node3073* @param[in] cg: code generator3074* @param[in] clazz: class pointer to store in the object header3075* @param[in] objectReg: the register that holds object address3076* @param[in] classReg: the register that holds class3077* @param[in] sizeReg: the register that holds array length.3078* @param[in] zeroReg: the register whose value is zero3079* @param[in] tempReg1: temporary register 13080* @param[in] isBatchClearTLHEnabled: true if BatchClearTLH is enabled3081* @param[in] isTLHHasNotBeenCleared: true if TLH has not been cleared3082*/3083static void3084genInitArrayHeader(TR::Node *node, TR::CodeGenerator *cg, TR_OpaqueClassBlock *clazz, TR::Register *objectReg, TR::Register *classReg, TR::Register *sizeReg, TR::Register *zeroReg, TR::Register *tempReg1,3085bool isBatchClearTLHEnabled, bool isTLHHasNotBeenCleared)3086{3087TR_J9VMBase *fej9 = (TR_J9VMBase *) (cg->fe());30883089genInitObjectHeader(node, cg, clazz, objectReg, classReg, zeroReg, tempReg1, isTLHHasNotBeenCleared);3090if (node->getFirstChild()->getOpCode().isLoadConst() && (node->getFirstChild()->getInt() == 0))3091{3092// If BatchClearTLH is enabled, we do not need to write 0 into the header.3093if (!isBatchClearTLHEnabled)3094{3095// constant zero length array3096// Zero length arrays are discontiguous (i.e. they also need the discontiguous length field to be 0) because3097// they are indistinguishable from non-zero length discontiguous arrays3098if (TR::Compiler->om.generateCompressedObjectHeaders())3099{3100// `mustBeZero` and `size` field of J9IndexableObjectDiscontiguousCompressed must be cleared.3101// We cannot use `strimmx` in this case because offset would be 4 bytes, which cannot be encoded as imm12 of `strimmx`.3102generateMemSrc1Instruction(cg, TR::InstOpCode::strimmw, node,3103TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfDiscontiguousArraySizeField() - 4),3104zeroReg);3105generateMemSrc1Instruction(cg, TR::InstOpCode::strimmw, node,3106TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfDiscontiguousArraySizeField()),3107zeroReg);3108}3109else3110{3111// `strimmx` can be used as offset is 8 bytes.3112generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node,3113TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfDiscontiguousArraySizeField() - 4),3114zeroReg);3115}3116}3117}3118else3119{3120// Store the array size3121// If the size field of contiguous array header is 0, the array is discontiguous and3122// the size of discontiguous array must be in the size field of discontiguous array header.3123// For now, we do not create non-zero length discontigous array,3124// so it is safe to write 0 into the size field of discontiguous array header.3125//3126// In the compressedrefs build, the size field of discontigous array header is cleared by instructions generated by genZeroInit().3127// In the large heap build, we must clear size and mustBeZero field here3128if (TR::Compiler->om.generateCompressedObjectHeaders())3129{3130generateMemSrc1Instruction(cg, TR::InstOpCode::strimmw, node,3131TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfContiguousArraySizeField()),3132sizeReg);3133if (!isTLHHasNotBeenCleared)3134{3135// If BatchClearTLH is not enabled and TLH has not been cleared, write 0 into the size field of J9IndexableObjectDiscontiguousCompressed.3136generateMemSrc1Instruction(cg, TR::InstOpCode::strimmw, node,3137TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfDiscontiguousArraySizeField()),3138zeroReg);3139}3140}3141else3142{3143generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::ubfmx, node, tempReg1, sizeReg, 31); // uxtw3144generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node,3145TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfContiguousArraySizeField()),3146tempReg1);3147}3148}3149}31503151/**3152* @brief Generates instructions for inlining new/newarray/anewarray3153* The limitation of the current implementation:3154* - supports `new` only3155* - does not support dual TLH3156* - does not support realtimeGC3157*3158* @param node: node3159* @param cg: code generator3160*3161* @return register containing allocated object, NULL if inlining is not possible3162*/3163TR::Register *3164J9::ARM64::TreeEvaluator::VMnewEvaluator(TR::Node *node, TR::CodeGenerator *cg)3165{3166TR::Compilation * comp = cg->comp();3167TR_J9VMBase *fej9 = (TR_J9VMBase *) (cg->fe());31683169bool generateArraylets = comp->generateArraylets();31703171if (comp->suppressAllocationInlining() || TR::TreeEvaluator::requireHelperCallValueTypeAllocation(node, cg))3172return NULL;31733174if (comp->getOption(TR_DisableTarokInlineArrayletAllocation) && (node->getOpCodeValue() == TR::anewarray || node->getOpCodeValue() == TR::newarray))3175return NULL;31763177// Currently, we do not support realtime GC.3178if (comp->getOptions()->realTimeGC())3179return NULL;31803181TR_OpaqueClassBlock *clazz = NULL;31823183// --------------------------------------------------------------------------------3184//3185// Find the class info and allocation size depending on the node type.3186//3187// Returns:3188// size of object includes the size of the array header3189// -1 cannot allocate inline3190// 0 variable sized allocation3191//3192// --------------------------------------------------------------------------------31933194int32_t objectSize = comp->canAllocateInline(node, clazz);3195if (objectSize < 0)3196return NULL;3197const bool isVariableLength = (objectSize == 0);31983199static long count = 0;3200if (!performTransformation(comp, "O^O <%3d> Inlining Allocation of %s [0x%p].\n", count++, node->getOpCode().getName(), node))3201return NULL;320232033204// 1. Evaluate children3205int32_t headerSize;3206TR::Node *firstChild = node->getFirstChild();3207TR::Node *secondChild = NULL;3208int32_t elementSize = 0;3209bool isArrayNew = false;3210TR::Register *classReg = NULL;3211TR::Register *lengthReg = NULL;3212TR::ILOpCodes opCode = node->getOpCodeValue();3213if (opCode == TR::New)3214{3215// classReg is passed to the VM helper on the slow path and subsequently clobbered; copy it for later nodes if necessary3216classReg = cg->gprClobberEvaluate(firstChild);3217headerSize = TR::Compiler->om.objectHeaderSizeInBytes();3218lengthReg = cg->allocateRegister();3219}3220else3221{3222if (generateArraylets || TR::Compiler->om.useHybridArraylets())3223{3224if (node->getOpCodeValue() == TR::newarray)3225elementSize = TR::Compiler->om.getSizeOfArrayElement(node);3226else if (comp->useCompressedPointers())3227elementSize = TR::Compiler->om.sizeofReferenceField();3228else3229elementSize = TR::Compiler->om.sizeofReferenceAddress();32303231if (generateArraylets)3232headerSize = fej9->getArrayletFirstElementOffset(elementSize, comp);3233else3234headerSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();3235}3236else3237{3238elementSize = TR::Compiler->om.getSizeOfArrayElement(node);3239headerSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();3240}32413242// If the array cannot be allocated as a contiguous array, then comp->canAllocateInline should have returned -1.3243// The only exception is when the array length is 0.3244isArrayNew = true;32453246lengthReg = cg->evaluate(firstChild);3247secondChild = node->getSecondChild();3248// classReg is passed to the VM helper on the slow path and subsequently clobbered; copy it for later nodes if necessary3249classReg = cg->gprClobberEvaluate(secondChild);3250}32513252TR::Instruction *firstInstructionAfterClassAndLengthRegsAreReady = cg->getAppendInstruction();3253// 2. Calculate allocation size3254int32_t allocateSize = isVariableLength ? headerSize : (objectSize + TR::Compiler->om.getObjectAlignmentInBytes() - 1) & (-TR::Compiler->om.getObjectAlignmentInBytes());32553256// 3. Allocate registers3257TR::Register *resultReg = cg->allocateRegister();3258TR::Register *tempReg1 = cg->allocateRegister();3259TR::Register *tempReg2 = cg->allocateRegister();3260TR::Register *tempReg3 = isVariableLength ? cg->allocateRegister() : NULL;3261TR::Register *zeroReg = cg->allocateRegister();3262TR::LabelSymbol *callLabel = generateLabelSymbol(cg);3263TR::LabelSymbol *callReturnLabel = generateLabelSymbol(cg);3264TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);32653266// 4. Setup register dependencies3267const int numReg = isVariableLength ? 7 : 6;3268TR::RegisterDependencyConditions *conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(numReg, numReg, cg->trMemory());3269TR::addDependency(conditions, classReg, TR::RealRegister::NoReg, TR_GPR, cg);3270TR::addDependency(conditions, resultReg, TR::RealRegister::NoReg, TR_GPR, cg);3271TR::addDependency(conditions, lengthReg, TR::RealRegister::NoReg, TR_GPR, cg);3272TR::addDependency(conditions, zeroReg, TR::RealRegister::xzr, TR_GPR, cg);3273TR::addDependency(conditions, tempReg1, TR::RealRegister::NoReg, TR_GPR, cg);3274TR::addDependency(conditions, tempReg2, TR::RealRegister::NoReg, TR_GPR, cg);3275if (isVariableLength)3276{3277TR::addDependency(conditions, tempReg3, TR::RealRegister::NoReg, TR_GPR, cg);3278}32793280// 5. Allocate object/array on heap3281genHeapAlloc(node, cg, isVariableLength, allocateSize, elementSize, resultReg, lengthReg, tempReg1, tempReg2, tempReg3, conditions, callLabel);32823283// 6. Setup OOL Section for slowpath3284TR::Register *objReg = cg->allocateCollectedReferenceRegister();3285TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(node, TR::acall, objReg, callLabel, callReturnLabel, cg);3286cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);32873288// 7. Initialize the allocated memory area with zero3289const bool isBatchClearTLHEnabled = fej9->tlhHasBeenCleared();3290if (!isBatchClearTLHEnabled)3291{3292// TODO selectively initialize necessary slots3293if (!node->canSkipZeroInitialization())3294{3295genZeroInitObject(node, cg, isVariableLength, objectSize, headerSize, resultReg, tempReg3, zeroReg, tempReg1, tempReg2);3296}3297}3298const bool tlhHasNotBeenCleared = (!isBatchClearTLHEnabled) && node->canSkipZeroInitialization();32993300// 8. Initialize Object Header3301if (isArrayNew)3302{3303genInitArrayHeader(node, cg, clazz, resultReg, classReg, lengthReg, zeroReg, tempReg1, isBatchClearTLHEnabled, tlhHasNotBeenCleared);33043305/* Here we'll update dataAddr slot for both fixed and variable length arrays. Fixed length arrays are3306* simple as we just need to check first child of the node for array size. For variable length arrays3307* runtime size checks are needed to determine whether to use contiguous or discontiguous header layout.3308*3309* In both scenarios, arrays of non-zero size use contiguous header layout while zero size arrays use3310* discontiguous header layout.3311*/3312TR::Register *offsetReg = tempReg1;3313TR::Register *firstDataElementReg = tempReg2;3314TR::MemoryReference *dataAddrSlotMR = NULL;33153316if (isVariableLength && TR::Compiler->om.compressObjectReferences())3317{3318/* We need to check lengthReg (array size) at runtime to determine correct offset of dataAddr field.3319* Here we deal only with compressed refs because dataAddr offset for discontiguous and contiguous3320* arrays is the same in full refs.3321*/3322if (comp->getOption(TR_TraceCG))3323traceMsg(comp, "Node (%p): Dealing with compressed refs variable length array.\n", node);33243325TR_ASSERT_FATAL_WITH_NODE(node,3326(fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField()) == 8,3327"Offset of dataAddr field in discontiguous array is expected to be 8 bytes more than contiguous array. "3328"But was %d bytes for discontigous and %d bytes for contiguous array.\n",3329fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());33303331// Since array size is capped at 32 bits, we don't need to check all 64 bits of lengthReg.3332generateCompareImmInstruction(cg, node, lengthReg, 0, false);3333generateCSetInstruction(cg, node, offsetReg, TR::CC_EQ);3334// offsetReg at this point is either 1 (if lengthReg == 0) or 0 (otherwise).3335// offsetReg = resultReg + (offsetReg << 3)3336generateTrg1Src2ShiftedInstruction(cg, TR::InstOpCode::addx, node, offsetReg, resultReg, offsetReg, TR::SH_LSL, 3);33373338dataAddrSlotMR = TR::MemoryReference::createWithDisplacement(cg, offsetReg, fej9->getOffsetOfContiguousDataAddrField());3339generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, firstDataElementReg, offsetReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes());3340}3341else if (!isVariableLength && node->getFirstChild()->getOpCode().isLoadConst() && node->getFirstChild()->getInt() == 0)3342{3343if (comp->getOption(TR_TraceCG))3344traceMsg(comp, "Node (%p): Dealing with full/compressed refs fixed length zero size array.\n", node);33453346dataAddrSlotMR = TR::MemoryReference::createWithDisplacement(cg, resultReg, fej9->getOffsetOfDiscontiguousDataAddrField());3347generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, firstDataElementReg, resultReg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes());3348}3349else3350{3351if (comp->getOption(TR_TraceCG))3352{3353traceMsg(comp,3354"Node (%p): Dealing with either full/compressed refs fixed length non-zero size array or full refs variable length array.\n",3355node);3356}33573358if (!TR::Compiler->om.compressObjectReferences())3359{3360TR_ASSERT_FATAL_WITH_NODE(node,3361fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField(),3362"dataAddr field offset is expected to be same for both contiguous and discontiguous arrays in full refs. "3363"But was %d bytes for discontiguous and %d bytes for contiguous array.\n",3364fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());3365}33663367dataAddrSlotMR = TR::MemoryReference::createWithDisplacement(cg, resultReg, fej9->getOffsetOfContiguousDataAddrField());3368generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, firstDataElementReg, resultReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes());3369}33703371generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, node, dataAddrSlotMR, firstDataElementReg);33723373if (generateArraylets)3374{3375// write arraylet pointer to object header3376generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, tempReg2, resultReg, headerSize);3377if (TR::Compiler->om.compressedReferenceShiftOffset() > 0)3378generateLogicalShiftRightImmInstruction(cg, node, tempReg2, tempReg2, TR::Compiler->om.compressedReferenceShiftOffset());33793380TR::InstOpCode::Mnemonic storeOp = comp->useCompressedPointers() ? TR::InstOpCode::strimmx : TR::InstOpCode::strimmw;3381generateMemSrc1Instruction(cg, storeOp, node,3382TR::MemoryReference::createWithDisplacement(cg, resultReg, fej9->getFirstArrayletPointerOffset(comp)),3383tempReg2);3384}3385}3386else3387{3388genInitObjectHeader(node, cg, clazz, resultReg, classReg, zeroReg, tempReg1, tlhHasNotBeenCleared);3389}33903391// 9. Setup AOT relocation3392if (cg->comp()->compileRelocatableCode() && (opCode == TR::New || opCode == TR::anewarray))3393{3394TR::Instruction *firstInstruction = firstInstructionAfterClassAndLengthRegsAreReady->getNext();3395TR_OpaqueClassBlock *classToValidate = clazz;33963397TR_RelocationRecordInformation *recordInfo = (TR_RelocationRecordInformation *) comp->trMemory()->allocateMemory(sizeof(TR_RelocationRecordInformation), heapAlloc);3398recordInfo->data1 = allocateSize;3399recordInfo->data2 = node->getInlinedSiteIndex();3400recordInfo->data3 = (uintptr_t) callLabel;3401recordInfo->data4 = (uintptr_t) firstInstruction;34023403TR::SymbolReference * classSymRef;3404TR_ExternalRelocationTargetKind reloKind;34053406if (opCode == TR::New)3407{3408classSymRef = node->getFirstChild()->getSymbolReference();3409reloKind = TR_VerifyClassObjectForAlloc;3410}3411else3412{3413classSymRef = node->getSecondChild()->getSymbolReference();3414reloKind = TR_VerifyRefArrayForAlloc;34153416if (comp->getOption(TR_UseSymbolValidationManager))3417classToValidate = comp->fej9()->getComponentClassFromArrayClass(classToValidate);3418}34193420if (comp->getOption(TR_UseSymbolValidationManager))3421{3422TR_ASSERT_FATAL(classToValidate, "classToValidate should not be NULL, clazz=%p\n", clazz);3423recordInfo->data5 = (uintptr_t)classToValidate;3424}34253426cg->addExternalRelocation(new (cg->trHeapMemory()) TR::BeforeBinaryEncodingExternalRelocation(firstInstruction, (uint8_t *) classSymRef, (uint8_t *) recordInfo, reloKind, cg),3427__FILE__, __LINE__, node);3428}34293430generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions);34313432// At this point the object is initialized and we can move it to a collected register.3433// The out of line path will do the same.3434generateMovInstruction(cg, node, objReg, resultReg, true);34353436generateLabelInstruction(cg, TR::InstOpCode::label, node, callReturnLabel);34373438// Cleanup registers3439cg->stopUsingRegister(tempReg1);3440cg->stopUsingRegister(tempReg2);3441cg->stopUsingRegister(zeroReg);3442cg->stopUsingRegister(resultReg);3443if (isVariableLength)3444{3445cg->stopUsingRegister(tempReg3);3446}34473448cg->decReferenceCount(firstChild);3449if (opCode == TR::New)3450{3451if (classReg != firstChild->getRegister())3452{3453cg->stopUsingRegister(classReg);3454}3455cg->stopUsingRegister(lengthReg);3456}3457else3458{3459cg->decReferenceCount(secondChild);3460if (classReg != secondChild->getRegister())3461{3462cg->stopUsingRegister(classReg);3463}3464}34653466node->setRegister(objReg);3467return objReg;3468}34693470TR::Register *3471J9::ARM64::TreeEvaluator::multianewArrayEvaluator(TR::Node *node, TR::CodeGenerator *cg)3472{3473TR::ILOpCodes opCode = node->getOpCodeValue();3474TR::Node::recreate(node, TR::acall);3475TR::Register *targetRegister = directCallEvaluator(node, cg);3476TR::Node::recreate(node, opCode);3477return targetRegister;3478}34793480TR::Register *3481J9::ARM64::TreeEvaluator::newObjectEvaluator(TR::Node *node, TR::CodeGenerator *cg)3482{3483TR::Register *targetRegister = TR::TreeEvaluator::VMnewEvaluator(node, cg);3484if (!targetRegister)3485{3486// Inline object allocation wasn't generated, just generate a call to the helper.3487//3488TR::ILOpCodes opCode = node->getOpCodeValue();3489TR::Node::recreate(node, TR::acall);3490targetRegister = directCallEvaluator(node, cg);3491TR::Node::recreate(node, opCode);3492}3493return targetRegister;3494}34953496TR::Register *3497J9::ARM64::TreeEvaluator::newArrayEvaluator(TR::Node *node, TR::CodeGenerator *cg)3498{3499TR::Register *targetRegister = TR::TreeEvaluator::VMnewEvaluator(node, cg);3500if (!targetRegister)3501{3502// Inline array allocation wasn't generated, just generate a call to the helper.3503//3504TR::ILOpCodes opCode = node->getOpCodeValue();3505TR::Node::recreate(node, TR::acall);3506targetRegister = directCallEvaluator(node, cg);3507TR::Node::recreate(node, opCode);3508}3509return targetRegister;3510}35113512TR::Register *3513J9::ARM64::TreeEvaluator::anewArrayEvaluator(TR::Node *node, TR::CodeGenerator *cg)3514{3515TR::Register *targetRegister = TR::TreeEvaluator::VMnewEvaluator(node, cg);3516if (!targetRegister)3517{3518// Inline array allocation wasn't generated, just generate a call to the helper.3519//3520TR::ILOpCodes opCode = node->getOpCodeValue();3521TR::Node::recreate(node, TR::acall);3522targetRegister = directCallEvaluator(node, cg);3523TR::Node::recreate(node, opCode);3524}3525return targetRegister;3526}35273528TR::Register *3529J9::ARM64::TreeEvaluator::monentEvaluator(TR::Node *node, TR::CodeGenerator *cg)3530{3531TR::Compilation *comp = TR::comp();3532TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());3533const int32_t staticLwOffset = fej9->getByteOffsetToLockword(cg->getMonClass(node));3534TR::InstOpCode::Mnemonic op;3535TR_YesNoMaybe isMonitorValueBasedOrValueType = cg->isMonitorValueBasedOrValueType(node);35363537if (comp->getOption(TR_FullSpeedDebug) ||3538(isMonitorValueBasedOrValueType == TR_yes) ||3539comp->getOption(TR_DisableInlineMonEnt))3540{3541TR::ILOpCodes opCode = node->getOpCodeValue();3542TR::Node::recreate(node, TR::call);3543TR::Register *targetRegister = directCallEvaluator(node, cg);3544TR::Node::recreate(node, opCode);3545return targetRegister;3546}35473548TR::Node *objNode = node->getFirstChild();3549TR::Register *objReg = cg->evaluate(objNode);3550TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();3551TR::Register *metaReg = cg->getMethodMetaDataRegister();35523553TR::LabelSymbol *loopLabel = generateLabelSymbol(cg);3554TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);3555TR::LabelSymbol *OOLLabel = generateLabelSymbol(cg);3556TR::LabelSymbol *startLabel = generateLabelSymbol(cg);35573558generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel);3559startLabel->setStartInternalControlFlow();35603561const bool isImplicitNullChkIsDoneAtLoadJ9Class = (isMonitorValueBasedOrValueType == TR_maybe) || (staticLwOffset <= 0);3562const bool inlineRecursive = staticLwOffset > 0;3563// If object is not known to be value type or value based class at compile time, check at run time3564if (isMonitorValueBasedOrValueType == TR_maybe)3565{3566TR::Register *temp1Reg = srm->findOrCreateScratchRegister();3567TR::Register *temp2Reg = srm->findOrCreateScratchRegister();35683569// InstructonDelegate::setupImplicitNullPointerException checks if the memory reference uses nullcheck reference register.3570// In this case, nullcheck reference register is objReg and the memory reference does use it,3571// so let InstructonDelegate::setupImplicitNullPointerException handle it.3572//3573// If we are generating code for MonitorCacheLookup then we will not have a separate OOL for inlineRecursive, and OOLLabel points3574// to the OOL Containing only helper call. Otherwise, OOL will have other code apart from helper call which we do not want to execute3575// for ValueType or ValueBased object and in that scenario we will need to generate another OOL that just contains helper call.3576generateCheckForValueMonitorEnterOrExit(node, doneLabel, inlineRecursive ? NULL : OOLLabel, objReg, temp1Reg, temp2Reg, cg, J9_CLASS_DISALLOWS_LOCKING_FLAGS);35773578srm->reclaimScratchRegister(temp1Reg);3579srm->reclaimScratchRegister(temp2Reg);3580}35813582TR::Register *addrReg = srm->findOrCreateScratchRegister();35833584// If we do not know the lockword offset at compile time, obtrain it from the class pointer of the object being locked3585if (staticLwOffset <= 0)3586{3587generateLockwordAddressLookup(cg, node, objReg, addrReg, metaReg, srm, OOLLabel);3588}3589else3590{3591generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, addrReg, objReg, staticLwOffset); // ldxr/stxr instructions does not take immediate offset3592}3593TR::Register *dataReg = srm->findOrCreateScratchRegister();35943595TR::Instruction *faultingInstruction;3596static const bool disableLSE = feGetEnv("TR_aarch64DisableLSE") != NULL;3597if (comp->target().cpu.supportsFeature(OMR_FEATURE_ARM64_LSE) && (!disableLSE))3598{3599generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, dataReg, 0); // expected value3600/*3601* We need to generate a CASAL, not a CASA because loads/stores before monitor exit can be reordered after a CASA3602* as the store to lockword for monitor exit is a plain store.3603*/3604op = fej9->generateCompressedLockWord() ? TR::InstOpCode::casalw : TR::InstOpCode::casalx;3605/*3606* As Trg1MemSrc1Instruction was introduced to support ldxr/stxr instructions, target and source register convention3607* is somewhat confusing. Its `treg` register actually is a source register and `sreg` register is a target register.3608* This needs to be fixed at some point.3609*/3610faultingInstruction = generateTrg1MemSrc1Instruction(cg, op, node, dataReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), metaReg);3611generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzx, node, dataReg, OOLLabel);3612}3613else3614{3615TR::Register *tempReg = srm->findOrCreateScratchRegister();36163617generateLabelInstruction(cg, TR::InstOpCode::label, node, loopLabel);3618op = fej9->generateCompressedLockWord() ? TR::InstOpCode::ldxrw : TR::InstOpCode::ldxrx;3619faultingInstruction = generateTrg1MemInstruction(cg, op, node, dataReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0));36203621generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzx, node, dataReg, OOLLabel);3622op = fej9->generateCompressedLockWord() ? TR::InstOpCode::stxrw : TR::InstOpCode::stxrx;36233624generateTrg1MemSrc1Instruction(cg, op, node, tempReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), metaReg);3625generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzx, node, tempReg, loopLabel);36263627generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xB); // dmb ish (Inner Shareable full barrier)36283629srm->reclaimScratchRegister(tempReg);3630}36313632// InstructonDelegate::setupImplicitNullPointerException checks if the memory reference uses nullcheck reference register.3633// In this case, nullcheck reference register is objReg, but the memory reference does not use it,3634// thus we need to explicitly set implicit exception point here.3635if (cg->getHasResumableTrapHandler() && cg->getCurrentEvaluationTreeTop()->getNode()->getOpCode().isNullCheck() && (!isImplicitNullChkIsDoneAtLoadJ9Class))3636{3637if (cg->getImplicitExceptionPoint() == NULL)3638{3639if (comp->getOption(TR_TraceCG))3640{3641traceMsg(comp, "Instruction %p throws an implicit NPE, node: %p NPE node: %p\n", faultingInstruction, node, objNode);3642}3643cg->setImplicitExceptionPoint(faultingInstruction);3644}3645}36463647if (inlineRecursive)3648{3649/*3650* OOLLabel:3651* addimmx dataReg, dataReg, OBJECT_HEADER_LOCK_FIRST_RECURSION_BIT3652* andimmx tempReg, dataReg, ~(OBJECT_HEADER_LOCK_RECURSION_MASK)3653* cmpx metaReg, tempReg3654* b.ne snippetLabel3655* strimmx dataReg, [addrReg]3656* OOLEndLabel:3657* b doneLabel3658*3659*/3660// This register is only required for OOL code section3661// If we obtain this from scratch register manager, then one more register is used in mainline.3662TR::Register *tempReg = cg->allocateRegister();3663TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);3664TR::LabelSymbol *OOLEndLabel = generateLabelSymbol(cg);3665TR_ARM64OutOfLineCodeSection *oolSection = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(OOLLabel, doneLabel, cg);3666cg->getARM64OutOfLineCodeSectionList().push_front(oolSection);3667oolSection->swapInstructionListsWithCompilation();3668generateLabelInstruction(cg, TR::InstOpCode::label, node, OOLLabel);36693670generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, dataReg, dataReg, OBJECT_HEADER_LOCK_FIRST_RECURSION_BIT);3671// OBJECT_HEADER_LOCK_RECURSION_MASK is 0xF0, immr=0x38, imms=0x3b for ~(0xF0)3672generateLogicalImmInstruction(cg, TR::InstOpCode::andimmx, node, tempReg, dataReg, true, 0xe3b);3673generateCompareInstruction(cg, node, metaReg, tempReg, true);36743675TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference(), OOLEndLabel);3676cg->addSnippet(snippet);3677TR::Instruction *gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_NE);3678gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);3679snippet->gcMap().setGCRegisterMask(0xffffffff);36803681generateMemSrc1Instruction(cg, fej9->generateCompressedLockWord() ? TR::InstOpCode::strimmw : TR::InstOpCode::strimmx,3682node, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), dataReg);36833684TR::RegisterDependencyConditions *ooldeps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg->trMemory());3685ooldeps->addPostCondition(objReg, TR::RealRegister::x0);3686ooldeps->addPostCondition(tempReg, TR::RealRegister::NoReg);3687ooldeps->addPostCondition(dataReg, TR::RealRegister::NoReg);3688ooldeps->addPostCondition(addrReg, TR::RealRegister::NoReg);36893690generateLabelInstruction(cg, TR::InstOpCode::label, node, OOLEndLabel, ooldeps);3691generateLabelInstruction(cg, TR::InstOpCode::b, node, doneLabel);36923693cg->stopUsingRegister(tempReg);3694// ARM64HelperCallSnippet generates "bl" instruction3695cg->machine()->setLinkRegisterKilled(true);3696oolSection->swapInstructionListsWithCompilation();3697}3698else3699{3700TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(node, TR::call, NULL, OOLLabel, doneLabel, cg);3701cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);3702}37033704TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2 + srm->numAvailableRegisters(), cg->trMemory());3705deps->addPostCondition(objReg, TR::RealRegister::NoReg);3706srm->addScratchRegistersToDependencyList(deps);3707generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, deps);37083709doneLabel->setEndInternalControlFlow();37103711srm->stopUsingRegisters();37123713cg->decReferenceCount(objNode);3714cg->machine()->setLinkRegisterKilled(true);3715return NULL;3716}37173718TR::Register *3719J9::ARM64::TreeEvaluator::arraylengthEvaluator(TR::Node *node, TR::CodeGenerator *cg)3720{3721TR_ASSERT(cg->comp()->requiresSpineChecks(), "TR::arraylength should be lowered when hybrid arraylets are not in use");37223723TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());3724// ldrimmw R1, [B, contiguousSize]3725// cmpimmw R1, 0 ; If 0, must be a discontiguous array3726// ldrimmw R2, [B, discontiguousSize]3727// cselw R1, R1, R2, ne3728//3729TR::Register *objectReg = cg->evaluate(node->getFirstChild());3730TR::Register *lengthReg = cg->allocateRegister();3731TR::Register *discontiguousLengthReg = cg->allocateRegister();37323733TR::MemoryReference *contiguousArraySizeMR = TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfContiguousArraySizeField());3734TR::MemoryReference *discontiguousArraySizeMR = TR::MemoryReference::createWithDisplacement(cg, objectReg, fej9->getOffsetOfDiscontiguousArraySizeField());37353736generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, lengthReg, contiguousArraySizeMR);3737generateCompareImmInstruction(cg, node, lengthReg, 0);3738generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, discontiguousLengthReg, discontiguousArraySizeMR);3739generateCondTrg1Src2Instruction(cg, TR::InstOpCode::cselw, node, lengthReg, lengthReg, discontiguousLengthReg, TR::CC_NE);37403741cg->stopUsingRegister(discontiguousLengthReg);3742cg->decReferenceCount(node->getFirstChild());3743node->setRegister(lengthReg);37443745return lengthReg;3746}37473748TR::Register *3749J9::ARM64::TreeEvaluator::ZEROCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)3750{3751// NOTE: ZEROCHK is intended to be general and straightforward. If you're3752// thinking of adding special code for specific situations in here, consider3753// whether you want to add your own CHK opcode instead. If you feel the3754// need for special handling here, you may also want special handling in the3755// optimizer, in which case a separate opcode may be more suitable.3756//3757// On the other hand, if the improvements you're adding could benefit other3758// users of ZEROCHK, please go ahead and add them!3759//37603761TR::LabelSymbol *slowPathLabel = generateLabelSymbol(cg);3762TR::LabelSymbol *restartLabel = generateLabelSymbol(cg);3763slowPathLabel->setStartInternalControlFlow();3764restartLabel->setEndInternalControlFlow();37653766// Temporarily hide the first child so it doesn't appear in the outlined call3767//3768node->rotateChildren(node->getNumChildren()-1, 0);3769node->setNumChildren(node->getNumChildren()-1);37703771// Outlined instructions for check failure3772//3773TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(node, TR::call, NULL, slowPathLabel, restartLabel, cg);3774cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);37753776// Restore the first child3777//3778node->setNumChildren(node->getNumChildren()+1);3779node->rotateChildren(0, node->getNumChildren()-1);37803781// Children other than the first are only for the outlined path; we don't need them here3782//3783for (int32_t i = 1; i < node->getNumChildren(); i++)3784cg->recursivelyDecReferenceCount(node->getChild(i));37853786// Instructions for the check3787// ToDo: Optimize isBooleanCompare() case3788//3789TR::Node *valueToCheck = node->getFirstChild();3790TR::Register *value = cg->evaluate(valueToCheck);37913792generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, value, slowPathLabel);37933794cg->decReferenceCount(node->getFirstChild());3795generateLabelInstruction(cg, TR::InstOpCode::label, node, restartLabel);37963797return NULL;3798}37993800TR::Register *3801J9::ARM64::TreeEvaluator::BNDCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)3802{3803TR::Node *secondChild = node->getSecondChild();3804TR::Node *firstChild = node->getFirstChild();3805TR::Register *src1Reg;3806TR::Register *src2Reg = NULL;3807uint64_t value;3808TR::LabelSymbol *snippetLabel;3809TR::Instruction *gcPoint;3810bool reversed = false;38113812if ((firstChild->getOpCode().isLoadConst())3813&& (constantIsUnsignedImm12(firstChild->get64bitIntegralValueAsUnsigned()))3814&& (NULL == firstChild->getRegister()))3815{3816src2Reg = cg->evaluate(secondChild);3817reversed = true;3818}3819else3820{3821src1Reg = cg->evaluate(firstChild);38223823// If this BNDCHK is combined with previous NULLCHK, there is3824// an instruction that will cause a hardware trap if the exception is to be3825// taken. If this method may catch the exception, a GC stack map must be3826// created for this instruction. All registers are valid at this GC point3827// TODO - if the method may not catch the exception we still need to note3828// that the GC point exists, since maps before this point and after it cannot3829// be merged.3830//3831if (cg->getHasResumableTrapHandler() && node->hasFoldedImplicitNULLCHK())3832{3833TR::Compilation *comp = cg->comp();3834TR::Instruction *faultingInstruction = cg->getImplicitExceptionPoint();3835if (comp->getOption(TR_TraceCG))3836{3837traceMsg(comp, "\nNode %p has foldedimplicitNULLCHK, and a faulting instruction of %p\n", node, faultingInstruction);3838}38393840if (faultingInstruction)3841{3842faultingInstruction->setNeedsGCMap(0xffffffff);3843cg->machine()->setLinkRegisterKilled(true);38443845TR_Debug * debugObj = cg->getDebug();3846if (debugObj)3847{3848debugObj->addInstructionComment(faultingInstruction, "Throws Implicit Null Pointer Exception");3849}3850}3851}3852if ((secondChild->getOpCode().isLoadConst())3853&& (NULL == secondChild->getRegister()))3854{3855value = secondChild->get64bitIntegralValueAsUnsigned();3856if (!constantIsUnsignedImm12(value))3857{3858src2Reg = cg->evaluate(secondChild);3859}3860}3861else3862src2Reg = cg->evaluate(secondChild);3863}38643865if (reversed)3866{3867generateCompareImmInstruction(cg, node, src2Reg, firstChild->get64bitIntegralValueAsUnsigned());3868}3869else3870{3871if (NULL == src2Reg)3872generateCompareImmInstruction(cg, node, src1Reg, value);3873else3874generateCompareInstruction(cg, node, src1Reg, src2Reg);3875}38763877snippetLabel = generateLabelSymbol(cg);3878TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference());3879cg->addSnippet(snippet);38803881gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, (reversed ? TR::CC_CS : TR::CC_LS));38823883gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);3884snippet->gcMap().setGCRegisterMask(0xffffffff);38853886cg->decReferenceCount(firstChild);3887cg->decReferenceCount(secondChild);3888secondChild->setIsNonNegative(true);3889// ARM64HelperCallSnippet generates "bl" instruction3890cg->machine()->setLinkRegisterKilled(true);3891return (NULL);3892}38933894/**3895* @brief Generate instruction sequence for array store check3896*3897* @param[in] node: node3898* @param[in] srcReg: register contains source object3899* @param[in] dstReg: register contains destination array3900* @param[in] srm: scratch register manager3901* @param[in] doneLabel: label to jump when check is successful3902* @param[in] helperCallLabel: label to jump when helper call is needed3903* @param[in] cg: code generator3904*/3905static void VMarrayStoreCHKEvaluator(TR::Node *node, TR::Register *srcReg, TR::Register *dstReg, TR_ARM64ScratchRegisterManager *srm,3906TR::LabelSymbol *doneLabel, TR::LabelSymbol *helperCallLabel, TR::CodeGenerator *cg)3907{3908TR::Compilation *comp = cg->comp();3909TR_J9VM *fej9 = reinterpret_cast<TR_J9VM *>(cg->fe());3910TR::Register *sourceClassReg = srm->findOrCreateScratchRegister();3911TR::Register *destArrayClassReg = srm->findOrCreateScratchRegister();39123913cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator"), *srm);39143915generateLoadJ9Class(node, sourceClassReg, srcReg, cg);3916generateLoadJ9Class(node, destArrayClassReg, dstReg, cg);39173918TR::Register *destComponentClassReg = srm->findOrCreateScratchRegister();3919TR_Debug *debugObj = cg->getDebug();39203921auto instr = generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, destComponentClassReg,3922TR::MemoryReference::createWithDisplacement(cg, destArrayClassReg, offsetof(J9ArrayClass, componentType)));3923if (debugObj)3924{3925debugObj->addInstructionComment(instr, "load component type of the destination array");3926}3927srm->reclaimScratchRegister(destArrayClassReg);3928destArrayClassReg = NULL; // prevent re-using this register by error39293930generateCompareInstruction(cg, node, destComponentClassReg, sourceClassReg, true);3931instr = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);3932if (debugObj)3933{3934debugObj->addInstructionComment(instr, "done if component type of the destination array equals to source object class");3935}3936cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:01ClassEqualityCheckDone"), *srm);39373938TR_OpaqueClassBlock *objectClass = fej9->getSystemClassFromClassName("java/lang/Object", 16, true);3939/*3940* objectClass is used for Object arrays check optimization: when we are storing to Object arrays we can skip all other array store checks3941* However, TR_J9SharedCacheVM::getSystemClassFromClassName can return 0 when it's impossible to relocate j9class later for AOT loads3942* in that case we don't want to generate the Object arrays check3943*/3944bool doObjectArrayCheck = objectClass != NULL;3945if (doObjectArrayCheck)3946{3947cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:02JavaLangObjectCheck"), *srm);39483949TR::Register *javaLangObjectClassReg = srm->findOrCreateScratchRegister();3950if (cg->wantToPatchClassPointer(objectClass, node) || cg->needClassAndMethodPointerRelocations())3951{3952loadAddressConstantInSnippet(cg, node, reinterpret_cast<intptr_t>(objectClass), javaLangObjectClassReg, TR_ClassPointer);3953}3954else3955{3956loadAddressConstant(cg, node, reinterpret_cast<intptr_t>(objectClass), javaLangObjectClassReg);3957}3958generateCompareInstruction(cg, node, javaLangObjectClassReg, destComponentClassReg, true);3959instr = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);3960if (debugObj)3961{3962debugObj->addInstructionComment(instr, "done if component type of the destination array equals to java/lang/Object");3963}3964srm->reclaimScratchRegister(javaLangObjectClassReg);39653966cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:03JavaLangObjectCheckDone"), *srm);3967}39683969cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:04CastClassCacheCheck"), *srm);39703971TR::Register *castClassCacheReg = srm->findOrCreateScratchRegister();3972generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, castClassCacheReg,3973TR::MemoryReference::createWithDisplacement(cg, sourceClassReg, offsetof(J9Class, castClassCache)));3974generateCompareInstruction(cg, node, castClassCacheReg, destComponentClassReg, true);3975instr = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);3976if (debugObj)3977{3978debugObj->addInstructionComment(instr, "done if component type of the destination array equals to castClassCache of source object class");3979}3980srm->reclaimScratchRegister(castClassCacheReg);3981castClassCacheReg = NULL; // prevent re-using this register by error39823983cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:05CastClassCacheCheckDone"), *srm);39843985/*3986* If isInstanceOf (objectClass,ArrayComponentClass,true,true) was successful and stored during VP, we need to test again the real arrayComponentClass3987* Need to relocate address of arrayComponentClass under AOT compilation.3988* Need to add PICsite on class constant if the class can be unloaded.3989*/3990if (node->getArrayComponentClassInNode())3991{3992TR::Register *arrayComponentClassReg = srm->findOrCreateScratchRegister();3993TR_OpaqueClassBlock *arrayComponentClass = node->getArrayComponentClassInNode();3994cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:06ArrayComponentClassCheck"), *srm);39953996if (cg->wantToPatchClassPointer(arrayComponentClass, node) || cg->needClassAndMethodPointerRelocations())3997{3998loadAddressConstantInSnippet(cg, node, reinterpret_cast<intptr_t>(arrayComponentClass), arrayComponentClassReg, TR_ClassPointer);3999}4000else4001{4002bool isUnloadAssumptionRequired = fej9->isUnloadAssumptionRequired(arrayComponentClass, comp->getCurrentMethod());40034004if (isUnloadAssumptionRequired)4005{4006loadAddressConstantInSnippet(cg, node, reinterpret_cast<intptr_t>(arrayComponentClass), arrayComponentClassReg, TR_NoRelocation, true);4007}4008else4009{4010loadAddressConstant(cg, node, reinterpret_cast<intptr_t>(arrayComponentClass), arrayComponentClassReg, NULL, true);4011}4012}4013generateCompareInstruction(cg, node, arrayComponentClassReg, destComponentClassReg, true);4014instr = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);40154016if (debugObj)4017{4018debugObj->addInstructionComment(instr, "done if component type of the destination array equals to arrayComponentClass set in node");4019}4020srm->reclaimScratchRegister(arrayComponentClassReg);40214022cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:06ArrayComponentClassCheckDone"), *srm);4023}40244025genSuperClassTest(node, sourceClassReg, true, destComponentClassReg, -1, helperCallLabel, srm, cg);4026srm->reclaimScratchRegister(destComponentClassReg);40274028// prevent re-using these registers by error4029sourceClassReg = NULL;4030destComponentClassReg = NULL;40314032instr = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, helperCallLabel, TR::CC_NE);4033if (debugObj)4034{4035debugObj->addInstructionComment(instr, "Call helper if super class test fails");4036}4037cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "ArrayStoreCHKEvaluator:010VMarrayStoreCHKEvaluator:07SuperClassTestDone"), *srm);40384039cg->machine()->setLinkRegisterKilled(true);4040}40414042TR::Register *4043J9::ARM64::TreeEvaluator::ArrayStoreCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)4044{4045TR::Compilation *comp = cg->comp();4046TR::Node *firstChild = node->getFirstChild();4047TR::Node *sourceChild = firstChild->getSecondChild();4048TR::Node *dstNode = firstChild->getThirdChild();40494050bool usingCompressedPointers = false;4051if (comp->useCompressedPointers() && firstChild->getOpCode().isIndirect())4052{4053usingCompressedPointers = true;40544055while ((sourceChild->getNumChildren() > 0) && (sourceChild->getOpCodeValue() != TR::a2l))4056sourceChild = sourceChild->getFirstChild();4057if (sourceChild->getOpCodeValue() == TR::a2l)4058sourceChild = sourceChild->getFirstChild();4059}40604061TR::Register *srcReg = cg->evaluate(sourceChild);4062TR::Register *dstReg = cg->evaluate(dstNode);4063TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();40644065TR::LabelSymbol *wbLabel = generateLabelSymbol(cg);4066TR::LabelSymbol *OOLMergeLabel = generateLabelSymbol(cg);4067TR_Debug * debugObj = cg->getDebug();40684069if (!sourceChild->isNull())4070{4071static const bool disableArrayStoreCHKOpts = comp->getOption(TR_DisableArrayStoreCheckOpts);4072TR_J9VM *fej9 = reinterpret_cast<TR_J9VM *>(cg->fe());4073TR::LabelSymbol *helperCallLabel = generateLabelSymbol(cg);4074// Since ArrayStoreCHK doesn't have the shape of the corresponding helper call we have to create this tree4075// so we can have it evaluated out of line4076TR::Node *helperCallNode = TR::Node::createWithSymRef(node, TR::call, 2, node->getSymbolReference());4077helperCallNode->setAndIncChild(0, sourceChild);4078helperCallNode->setAndIncChild(1, dstNode);4079if (comp->getOption(TR_TraceCG))4080{4081traceMsg(comp, "%s: Creating and evaluating the following tree to generate the necessary helper call for this node\n", node->getOpCode().getName());4082cg->getDebug()->print(comp->getOutFile(), helperCallNode);4083}40844085bool nopASC = node->getArrayStoreClassInNode() && comp->performVirtualGuardNOPing() &&4086(!fej9->classHasBeenExtended(node->getArrayStoreClassInNode())) && (!disableArrayStoreCHKOpts);4087if (nopASC)4088{4089// Speculatively NOP the array store check if VP is able to prove that the ASC4090// would always succeed given the current state of the class hierarchy.4091//4092TR_VirtualGuard *virtualGuard = TR_VirtualGuard::createArrayStoreCheckGuard(comp, node, node->getArrayStoreClassInNode());4093TR::Instruction *vgnopInstr = generateVirtualGuardNOPInstruction(cg, node, virtualGuard->addNOPSite(), NULL, helperCallLabel);4094}4095else4096{4097// If source is null, we can skip array store check.4098auto cbzInstruction = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, srcReg, wbLabel);4099if (debugObj)4100{4101debugObj->addInstructionComment(cbzInstruction, "jump past array store check");4102}4103if (!disableArrayStoreCHKOpts)4104{4105VMarrayStoreCHKEvaluator(node, srcReg, dstReg, srm, wbLabel, helperCallLabel, cg);4106}4107else4108{4109generateLabelInstruction(cg, TR::InstOpCode::b, node, helperCallLabel);4110}4111}41124113TR_ARM64OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(helperCallNode, TR::call, NULL, helperCallLabel, OOLMergeLabel, cg);4114cg->getARM64OutOfLineCodeSectionList().push_front(outlinedHelperCall);4115cg->decReferenceCount(helperCallNode->getFirstChild());4116cg->decReferenceCount(helperCallNode->getSecondChild());4117}4118TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2 + srm->numAvailableRegisters(), cg->trMemory());4119srm->addScratchRegistersToDependencyList(deps);41204121deps->addPostCondition(srcReg, TR::RealRegister::NoReg);4122deps->addPostCondition(dstReg, TR::RealRegister::NoReg);4123auto instr = generateLabelInstruction(cg, TR::InstOpCode::label, node, wbLabel);4124if (debugObj)4125{4126debugObj->addInstructionComment(instr, "ArrayStoreCHK Done");4127}4128instr = generateLabelInstruction(cg, TR::InstOpCode::label, node, OOLMergeLabel, deps);4129if (debugObj)4130{4131debugObj->addInstructionComment(instr, "OOL merge point");4132}41334134srm->stopUsingRegisters();41354136cg->evaluate(firstChild);41374138cg->decReferenceCount(firstChild);41394140return NULL;4141}41424143static TR::Register *4144VMarrayCheckEvaluator(TR::Node *node, TR::CodeGenerator *cg)4145{4146TR::Register *obj1Reg = cg->evaluate(node->getFirstChild());4147TR::Register *obj2Reg = cg->evaluate(node->getSecondChild());4148TR::Register *tmp1Reg = cg->allocateRegister();4149TR::Register *tmp2Reg = cg->allocateRegister();41504151TR::Instruction *gcPoint;4152TR::Snippet *snippet;4153TR::RegisterDependencyConditions *conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(4, 4, cg->trMemory());;41544155TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);41564157TR::addDependency(conditions, obj1Reg, TR::RealRegister::NoReg, TR_GPR, cg);4158TR::addDependency(conditions, obj2Reg, TR::RealRegister::NoReg, TR_GPR, cg);4159TR::addDependency(conditions, tmp1Reg, TR::RealRegister::NoReg, TR_GPR, cg);4160TR::addDependency(conditions, tmp2Reg, TR::RealRegister::NoReg, TR_GPR, cg);41614162// We have a unique snippet sharing arrangement in this code sequence.4163// It is not generally applicable for other situations.4164TR::LabelSymbol *snippetLabel = NULL;41654166// Same array, we are done.4167//4168generateCompareInstruction(cg, node, obj1Reg, obj2Reg, true);4169generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);41704171// If we know nothing about either object, test object1 first. It has to be an array.4172//4173if (!node->isArrayChkPrimitiveArray1() && !node->isArrayChkReferenceArray1() && !node->isArrayChkPrimitiveArray2() && !node->isArrayChkReferenceArray2())4174{4175generateLoadJ9Class(node, tmp1Reg, obj1Reg, cg);41764177generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, tmp1Reg, TR::MemoryReference::createWithDisplacement(cg, tmp1Reg, offsetof(J9Class, classDepthAndFlags)));41784179loadConstant32(cg, node, (int32_t) J9AccClassRAMArray, tmp2Reg);4180generateTrg1Src2Instruction(cg, TR::InstOpCode::andx, node, tmp2Reg, tmp1Reg, tmp2Reg);41814182snippetLabel = generateLabelSymbol(cg);4183gcPoint = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzw, node, tmp2Reg, snippetLabel);41844185snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference(), doneLabel);4186cg->addSnippet(snippet);4187}41884189// One of the object is array. Test equality of two objects' classes.4190//4191generateLoadJ9Class(node, tmp2Reg, obj2Reg, cg);4192generateLoadJ9Class(node, tmp1Reg, obj1Reg, cg);41934194generateCompareInstruction(cg, node, tmp1Reg, tmp2Reg, true);41954196// If either object is known to be of primitive component type,4197// we are done: since both of them have to be of equal class.4198if (node->isArrayChkPrimitiveArray1() || node->isArrayChkPrimitiveArray2())4199{4200if (snippetLabel == NULL)4201{4202snippetLabel = generateLabelSymbol(cg);4203gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_NE);4204snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference(), doneLabel);4205cg->addSnippet(snippet);4206}4207else4208generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_NE);4209}4210else4211{4212// We have to take care of the un-equal class situation: both of them must be of reference array4213generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);42144215// Object1 must be of reference component type, otherwise throw exception4216if (!node->isArrayChkReferenceArray1())4217{4218// Loading the Class Pointer -> classDepthAndFlags4219generateLoadJ9Class(node, tmp1Reg, obj1Reg, cg);42204221generateTrg1MemInstruction(cg,TR::InstOpCode::ldrimmw, node, tmp1Reg, TR::MemoryReference::createWithDisplacement(cg, tmp1Reg, offsetof(J9Class, classDepthAndFlags)));42224223// We already have classDepth&Flags in tmp1Reg. X = (ramclass->ClassDepthAndFlags)>>J9AccClassRAMShapeShift4224generateLogicalShiftRightImmInstruction(cg, node, tmp1Reg, tmp1Reg, J9AccClassRAMShapeShift);42254226// We need to perform a X & OBJECT_HEADER_SHAPE_MASK42274228loadConstant32(cg, node, OBJECT_HEADER_SHAPE_MASK, tmp2Reg);4229generateTrg1Src2Instruction(cg, TR::InstOpCode::andx, node, tmp2Reg, tmp1Reg, tmp2Reg);4230generateCompareImmInstruction(cg, node, tmp2Reg, OBJECT_HEADER_SHAPE_POINTERS);42314232if (snippetLabel == NULL)4233{4234snippetLabel = generateLabelSymbol(cg);4235gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_NE);4236snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference(), doneLabel);4237cg->addSnippet(snippet);4238}4239else4240generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_NE);4241}42424243// Object2 must be of reference component type array, otherwise throw exception4244if (!node->isArrayChkReferenceArray2())4245{4246generateLoadJ9Class(node, tmp1Reg, obj2Reg, cg);4247generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, tmp1Reg, TR::MemoryReference::createWithDisplacement(cg, tmp1Reg, offsetof(J9Class, classDepthAndFlags)));42484249loadConstant32(cg, node, (int32_t) J9AccClassRAMArray, tmp2Reg);4250generateTrg1Src2Instruction(cg, TR::InstOpCode::andx, node, tmp2Reg, tmp1Reg, tmp2Reg);42514252if (snippetLabel == NULL)4253{4254snippetLabel = generateLabelSymbol(cg);4255gcPoint = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, tmp2Reg, snippetLabel);4256snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference(), doneLabel);4257cg->addSnippet(snippet);4258}4259else4260generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, tmp2Reg, snippetLabel);42614262// We already have classDepth&Flags in tmp1Reg. X = (ramclass->ClassDepthAndFlags)>>J9AccClassRAMShapeShift4263generateLogicalShiftRightImmInstruction(cg, node, tmp1Reg, tmp1Reg, J9AccClassRAMShapeShift);42644265// We need to perform a X & OBJECT_HEADER_SHAPE_MASK42664267loadConstant32(cg, node, OBJECT_HEADER_SHAPE_MASK, tmp2Reg);4268generateTrg1Src2Instruction(cg, TR::InstOpCode::andx, node, tmp2Reg, tmp1Reg, tmp2Reg);4269generateCompareImmInstruction(cg, node, tmp2Reg, OBJECT_HEADER_SHAPE_POINTERS);4270generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, snippetLabel, TR::CC_NE);4271}4272}42734274generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions);4275if (snippetLabel != NULL)4276{4277gcPoint->ARM64NeedsGCMap(cg, 0x0);4278snippet->gcMap().setGCRegisterMask(0x0);4279}42804281cg->stopUsingRegister(tmp1Reg);4282cg->stopUsingRegister(tmp2Reg);42834284cg->decReferenceCount(node->getFirstChild());4285cg->decReferenceCount(node->getSecondChild());4286return NULL;4287}42884289TR::Register *4290J9::ARM64::TreeEvaluator::ArrayCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)4291{4292return VMarrayCheckEvaluator(node, cg);4293}42944295void4296J9::ARM64::TreeEvaluator::genWrtbarForArrayCopy(TR::Node *node, TR::Register *srcObjReg, TR::Register *dstObjReg, TR::CodeGenerator *cg)4297{4298TR::Compilation *comp = cg->comp();4299bool ageCheckIsNeeded;4300bool cardMarkIsNeeded;4301auto gcMode = TR::Compiler->om.writeBarrierType();43024303ageCheckIsNeeded = (gcMode == gc_modron_wrtbar_oldcheck ||4304gcMode == gc_modron_wrtbar_cardmark_and_oldcheck ||4305gcMode == gc_modron_wrtbar_always);4306cardMarkIsNeeded = (gcMode == gc_modron_wrtbar_cardmark ||4307gcMode == gc_modron_wrtbar_cardmark_incremental);43084309if (!ageCheckIsNeeded && !cardMarkIsNeeded)4310return;43114312if (ageCheckIsNeeded)4313{4314TR::Register *tmp1Reg = NULL;4315TR::Register *tmp2Reg = NULL;4316TR::RegisterDependencyConditions *deps;4317TR::Instruction *gcPoint;4318TR::LabelSymbol *doneLabel;43194320if (gcMode != gc_modron_wrtbar_always)4321{4322tmp1Reg = cg->allocateRegister();4323tmp2Reg = cg->allocateRegister();4324deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(3, 3, cg->trMemory());4325TR::addDependency(deps, tmp1Reg, TR::RealRegister::NoReg, TR_GPR, cg);4326TR::addDependency(deps, tmp2Reg, TR::RealRegister::NoReg, TR_GPR, cg);4327}4328else4329{4330deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(1, 1, cg->trMemory());4331}43324333TR::addDependency(deps, dstObjReg, TR::RealRegister::x0, TR_GPR, cg);43344335TR::SymbolReference *wbRef = comp->getSymRefTab()->findOrCreateWriteBarrierBatchStoreSymbolRef(comp->getMethodSymbol());43364337if (gcMode != gc_modron_wrtbar_always)4338{4339doneLabel = generateLabelSymbol(cg);43404341TR::Register *metaReg = cg->getMethodMetaDataRegister();43424343// tmp1Reg = dstObjReg - heapBaseForBarrierRange04344generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tmp1Reg,4345TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapBaseForBarrierRange0)));4346generateTrg1Src2Instruction(cg, TR::InstOpCode::subx, node, tmp1Reg, dstObjReg, tmp1Reg);43474348// if (tmp1Reg >= heapSizeForBarrierRange0), object not in the tenured area4349generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmx, node, tmp2Reg,4350TR::MemoryReference::createWithDisplacement(cg, metaReg, offsetof(J9VMThread, heapSizeForBarrierRange0)));4351generateCompareInstruction(cg, node, tmp1Reg, tmp2Reg, true);4352generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_CS); // greater or equal (unsigned)4353}43544355gcPoint = generateImmSymInstruction(cg, TR::InstOpCode::bl, node, reinterpret_cast<uintptr_t>(wbRef->getSymbol()->castToMethodSymbol()->getMethodAddress()),4356new (cg->trHeapMemory()) TR::RegisterDependencyConditions((uint8_t) 0, 0, cg->trMemory()), wbRef, NULL);4357cg->machine()->setLinkRegisterKilled(true);43584359if (gcMode != gc_modron_wrtbar_always)4360generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, deps);43614362gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);43634364if (tmp1Reg)4365cg->stopUsingRegister(tmp1Reg);4366if (tmp2Reg)4367cg->stopUsingRegister(tmp2Reg);4368}43694370if (!ageCheckIsNeeded && cardMarkIsNeeded)4371{4372if (!comp->getOptions()->realTimeGC())4373{4374TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);43754376TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();43774378TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(1, 1, cg->trMemory());4379TR::addDependency(deps, dstObjReg, TR::RealRegister::NoReg, TR_GPR, cg);4380srm->addScratchRegistersToDependencyList(deps);4381VMCardCheckEvaluator(node, dstObjReg, srm, doneLabel, cg);4382generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, deps);4383srm->stopUsingRegisters();4384}4385else4386{4387TR_ASSERT(0, "genWrtbarForArrayCopy card marking not supported for RT");4388}4389}4390}43914392TR::Register *4393J9::ARM64::TreeEvaluator::arraycopyEvaluator(TR::Node *node, TR::CodeGenerator *cg)4394{4395#ifdef OMR_GC_CONCURRENT_SCAVENGER4396/*4397* This version of arraycopyEvaluator is designed to handle the special case where read barriers are4398* needed for field loads. At the time of writing, read barriers are used for Concurrent Scavenge GC.4399* If there are no read barriers then the original implementation of arraycopyEvaluator can be used.4400*/4401if (TR::Compiler->om.readBarrierType() == gc_modron_readbar_none ||4402!node->chkNoArrayStoreCheckArrayCopy() ||4403!node->isReferenceArrayCopy())4404{4405return OMR::TreeEvaluatorConnector::arraycopyEvaluator(node, cg);4406}44074408TR::Compilation *comp = cg->comp();4409TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());44104411// child 0 ------ Source array object4412// child 1 ------ Destination array object4413// child 2 ------ Source byte address4414// child 3 ------ Destination byte address4415// child 4 ------ Copy length in bytes4416TR::Node *srcObjNode = node->getFirstChild();4417TR::Node *dstObjNode = node->getSecondChild();4418TR::Node *srcAddrNode = node->getChild(2);4419TR::Node *dstAddrNode = node->getChild(3);4420TR::Node *lengthNode = node->getChild(4);4421TR::Register *srcObjReg, *dstObjReg, *srcAddrReg, *dstAddrReg, *lengthReg;4422bool stopUsingCopyReg1, stopUsingCopyReg2, stopUsingCopyReg3, stopUsingCopyReg4, stopUsingCopyReg5 = false;44234424stopUsingCopyReg1 = stopUsingCopyReg(srcObjNode, srcObjReg, cg);4425stopUsingCopyReg2 = stopUsingCopyReg(dstObjNode, dstObjReg, cg);4426stopUsingCopyReg3 = stopUsingCopyReg(srcAddrNode, srcAddrReg, cg);4427stopUsingCopyReg4 = stopUsingCopyReg(dstAddrNode, dstAddrReg, cg);44284429lengthReg = cg->evaluate(lengthNode);4430if (!cg->canClobberNodesRegister(lengthNode))4431{4432TR::Register *lenCopyReg = cg->allocateRegister();4433generateMovInstruction(cg, lengthNode, lenCopyReg, lengthReg);4434lengthReg = lenCopyReg;4435stopUsingCopyReg5 = true;4436}44374438TR::Register *metaReg = cg->getMethodMetaDataRegister();4439TR::Register *x0Reg = cg->allocateRegister();4440TR::Register *tmp1Reg = cg->allocateRegister();4441TR::Register *tmp2Reg = cg->allocateRegister();4442TR::Register *tmp3Reg = cg->allocateRegister();44434444TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(17, 17, cg->trMemory());44454446TR::addDependency(deps, x0Reg, TR::RealRegister::x0, TR_GPR, cg); // copy of metaReg4447TR::addDependency(deps, tmp1Reg, TR::RealRegister::x1, TR_GPR, cg); // copy of srcObjReg4448TR::addDependency(deps, tmp2Reg, TR::RealRegister::x2, TR_GPR, cg); // copy of dstObjReg4449TR::addDependency(deps, srcAddrReg, TR::RealRegister::x3, TR_GPR, cg);4450TR::addDependency(deps, dstAddrReg, TR::RealRegister::x4, TR_GPR, cg);4451TR::addDependency(deps, lengthReg, TR::RealRegister::x5, TR_GPR, cg);4452TR::addDependency(deps, tmp3Reg, TR::RealRegister::x6, TR_GPR, cg); // this is not an argument4453for (int32_t i = (int32_t)TR::RealRegister::x7; i <= (int32_t)TR::RealRegister::x15; i++)4454{4455TR::addDependency(deps, NULL, (TR::RealRegister::RegNum)i, TR_GPR, cg);4456}4457// x16 and x17 are reserved registers4458TR::addDependency(deps, NULL, TR::RealRegister::x18, TR_GPR, cg);44594460generateMovInstruction(cg, node, x0Reg, metaReg);4461generateMovInstruction(cg, node, tmp1Reg, srcObjReg);4462generateMovInstruction(cg, node, tmp2Reg, dstObjReg);44634464// The C routine expects length measured by slots4465int32_t elementSize = comp->useCompressedPointers() ?4466TR::Compiler->om.sizeofReferenceField() : TR::Compiler->om.sizeofReferenceAddress();4467generateLogicalShiftRightImmInstruction(cg, node, lengthReg, lengthReg, trailingZeroes(elementSize));44684469intptr_t *funcdescrptr = (intptr_t *)fej9->getReferenceArrayCopyHelperAddress();4470loadAddressConstant(cg, node, (intptr_t)funcdescrptr, tmp3Reg, NULL, false, TR_ArrayCopyHelper);44714472// call the C routine4473TR::Instruction *gcPoint = generateRegBranchInstruction(cg, TR::InstOpCode::blr, node, tmp3Reg, deps);4474gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);44754476TR::TreeEvaluator::genWrtbarForArrayCopy(node, srcObjReg, dstObjReg, cg);44774478// ARM64HelperCallSnippet generates "bl" instruction4479cg->machine()->setLinkRegisterKilled(true);44804481cg->decReferenceCount(srcObjNode);4482cg->decReferenceCount(dstObjNode);4483cg->decReferenceCount(srcAddrNode);4484cg->decReferenceCount(dstAddrNode);4485cg->decReferenceCount(lengthNode);44864487if (stopUsingCopyReg1)4488cg->stopUsingRegister(srcObjReg);4489if (stopUsingCopyReg2)4490cg->stopUsingRegister(dstObjReg);44914492TR::Register *retRegisters[3];4493int retRegCount = 0;4494if (!stopUsingCopyReg3)4495retRegisters[retRegCount++] = srcAddrReg;4496if (!stopUsingCopyReg4)4497retRegisters[retRegCount++] = dstAddrReg;4498if (!stopUsingCopyReg5)4499retRegisters[retRegCount++] = lengthReg;45004501deps->stopUsingDepRegs(cg, retRegCount, retRegisters);45024503return NULL;4504#else /* OMR_GC_CONCURRENT_SCAVENGER */4505return OMR::TreeEvaluatorConnector::arraycopyEvaluator(node, cg);4506#endif /* OMR_GC_CONCURRENT_SCAVENGER */4507}45084509void4510J9::ARM64::TreeEvaluator::genArrayCopyWithArrayStoreCHK(TR::Node *node, TR::CodeGenerator *cg)4511{4512TR::Compilation *comp = cg->comp();4513TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());45144515// child 0 ------ Source array object4516// child 1 ------ Destination array object4517// child 2 ------ Source byte address4518// child 3 ------ Destination byte address4519// child 4 ------ Copy length in bytes4520TR::Node *srcObjNode = node->getFirstChild();4521TR::Node *dstObjNode = node->getSecondChild();4522TR::Node *srcAddrNode = node->getChild(2);4523TR::Node *dstAddrNode = node->getChild(3);4524TR::Node *lengthNode = node->getChild(4);4525TR::Register *srcObjReg, *dstObjReg, *srcAddrReg, *dstAddrReg, *lengthReg;4526bool stopUsingCopyReg1, stopUsingCopyReg2, stopUsingCopyReg3, stopUsingCopyReg4, stopUsingCopyReg5 = false;45274528stopUsingCopyReg1 = stopUsingCopyReg(srcObjNode, srcObjReg, cg);4529stopUsingCopyReg2 = stopUsingCopyReg(dstObjNode, dstObjReg, cg);4530stopUsingCopyReg3 = stopUsingCopyReg(srcAddrNode, srcAddrReg, cg);4531stopUsingCopyReg4 = stopUsingCopyReg(dstAddrNode, dstAddrReg, cg);45324533lengthReg = cg->evaluate(lengthNode);4534if (!cg->canClobberNodesRegister(lengthNode))4535{4536TR::Register *lenCopyReg = cg->allocateRegister();4537generateMovInstruction(cg, lengthNode, lenCopyReg, lengthReg);4538lengthReg = lenCopyReg;4539stopUsingCopyReg5 = true;4540}45414542// the C routine expects length measured by slots4543int32_t elementSize = comp->useCompressedPointers() ?4544TR::Compiler->om.sizeofReferenceField() : TR::Compiler->om.sizeofReferenceAddress();4545generateLogicalShiftRightImmInstruction(cg, node, lengthReg, lengthReg, trailingZeroes(elementSize), true);45464547// pass vmThread as the first parameter4548TR::Register *x0Reg = cg->allocateRegister();4549TR::Register *metaReg = cg->getMethodMetaDataRegister();4550generateMovInstruction(cg, node, x0Reg, metaReg);45514552TR::Register *tmpReg = cg->allocateRegister();45534554// I_32 referenceArrayCopy(J9VMThread *vmThread,4555// J9IndexableObjectContiguous *srcObject,4556// J9IndexableObjectContiguous *destObject,4557// U_8 *srcAddress,4558// U_8 *destAddress,4559// I_32 lengthInSlots)4560TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(17, 17, cg->trMemory());4561TR::addDependency(deps, x0Reg, TR::RealRegister::x0, TR_GPR, cg);4562TR::addDependency(deps, srcObjReg, TR::RealRegister::x1, TR_GPR, cg);4563TR::addDependency(deps, dstObjReg, TR::RealRegister::x2, TR_GPR, cg);4564TR::addDependency(deps, srcAddrReg, TR::RealRegister::x3, TR_GPR, cg);4565TR::addDependency(deps, dstAddrReg, TR::RealRegister::x4, TR_GPR, cg);4566TR::addDependency(deps, lengthReg, TR::RealRegister::x5, TR_GPR, cg);4567TR::addDependency(deps, tmpReg, TR::RealRegister::x6, TR_GPR, cg); // this is not an argument4568for (int32_t i = (int32_t)TR::RealRegister::x7; i <= (int32_t)TR::RealRegister::x15; i++)4569{4570TR::addDependency(deps, NULL, (TR::RealRegister::RegNum)i, TR_GPR, cg);4571}4572// x16 and x17 are reserved registers4573TR::addDependency(deps, NULL, TR::RealRegister::x18, TR_GPR, cg);45744575intptr_t *funcdescrptr = (intptr_t *)fej9->getReferenceArrayCopyHelperAddress();4576loadAddressConstant(cg, node, (intptr_t)funcdescrptr, tmpReg, NULL, false, TR_ArrayCopyHelper);45774578// call the C routine4579TR::Instruction *gcPoint = generateRegBranchInstruction(cg, TR::InstOpCode::blr, node, tmpReg, deps);4580gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);4581// check return value (-1 on success)4582generateCompareImmInstruction(cg, node, x0Reg, -1); // 32-bit compare4583// throw exception if needed4584TR::SymbolReference *throwSymRef = comp->getSymRefTab()->findOrCreateArrayStoreExceptionSymbolRef(comp->getJittedMethodSymbol());4585TR::LabelSymbol *exceptionSnippetLabel = cg->lookUpSnippet(TR::Snippet::IsHelperCall, throwSymRef);4586if (exceptionSnippetLabel == NULL)4587{4588exceptionSnippetLabel = generateLabelSymbol(cg);4589TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, exceptionSnippetLabel, throwSymRef);4590cg->addSnippet(snippet);4591snippet->gcMap().setGCRegisterMask(0xFFFFFFFF);4592}45934594gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, exceptionSnippetLabel, TR::CC_NE);4595gcPoint->ARM64NeedsGCMap(cg, 0xFFFFFFFF);45964597// ARM64HelperCallSnippet generates "bl" instruction4598cg->machine()->setLinkRegisterKilled(true);45994600TR::Register *retRegisters[5];4601int retRegCount = 0;4602if (!stopUsingCopyReg1)4603retRegisters[retRegCount++] = srcObjReg;4604if (!stopUsingCopyReg2)4605retRegisters[retRegCount++] = dstObjReg;4606if (!stopUsingCopyReg3)4607retRegisters[retRegCount++] = srcAddrReg;4608if (!stopUsingCopyReg4)4609retRegisters[retRegCount++] = dstAddrReg;4610if (!stopUsingCopyReg5)4611retRegisters[retRegCount++] = lengthReg;46124613deps->stopUsingDepRegs(cg, retRegCount, retRegisters);46144615cg->decReferenceCount(srcObjNode);4616cg->decReferenceCount(dstObjNode);4617cg->decReferenceCount(srcAddrNode);4618cg->decReferenceCount(dstAddrNode);4619cg->decReferenceCount(lengthNode);4620}46214622static TR::Register *4623genCAS(TR::Node *node, TR::CodeGenerator *cg, TR_ARM64ScratchRegisterManager *srm, TR::Register *objReg, TR::Register *offsetReg, intptr_t offset, bool offsetInReg, TR::Register *oldVReg, TR::Register *newVReg,4624TR::LabelSymbol *doneLabel, int32_t oldValue, bool oldValueInReg, bool is64bit, bool casWithoutSync = false)4625{4626TR::Compilation * comp = cg->comp();4627TR::Register *addrReg = srm->findOrCreateScratchRegister();4628TR::Register *resultReg = cg->allocateRegister();4629TR::InstOpCode::Mnemonic op;463046314632if (offsetInReg)4633{4634generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, addrReg, objReg, offsetReg); // ldxr/stxr instructions does not take offset4635}4636else4637{4638generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, addrReg, objReg, offset); // ldxr/stxr instructions does not take offset4639}46404641const bool createDoneLabel = (doneLabel == NULL);46424643static const bool disableLSE = feGetEnv("TR_aarch64DisableLSE") != NULL;4644if (comp->target().cpu.supportsFeature(OMR_FEATURE_ARM64_LSE) && (!disableLSE))4645{4646TR_ASSERT_FATAL(oldValueInReg, "Expecting oldValue is in register if LSE is enabled");4647/*4648* movx resultReg, oldVReg4649* casal resultReg, newVReg, [addrReg]4650* cmp resultReg, oldReg4651* cset resultReg, eq4652*/4653generateMovInstruction(cg, node, resultReg, oldVReg, is64bit);4654op = casWithoutSync ? (is64bit ? TR::InstOpCode::casx : TR::InstOpCode::casw) : (is64bit ? TR::InstOpCode::casalx : TR::InstOpCode::casalw);4655generateTrg1MemSrc1Instruction(cg, op, node, resultReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), newVReg);4656generateCompareInstruction(cg, node, resultReg, oldVReg, is64bit);4657generateCSetInstruction(cg, node, resultReg, TR::CC_EQ);4658if (!createDoneLabel)4659{4660generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_NE);4661}4662}4663else4664{4665TR::LabelSymbol *loopLabel = generateLabelSymbol(cg);4666generateLabelInstruction(cg, TR::InstOpCode::label, node, loopLabel);4667if (createDoneLabel)4668{4669doneLabel = generateLabelSymbol(cg);4670}4671/*4672* Generating the same instruction sequence as __sync_bool_compare_and_swap4673*4674* loop:4675* ldxrx resultReg, [addrReg]4676* cmpx resultReg, oldVReg4677* bne done4678* stlxrx resultReg, newVReg, [addrReg]4679* cbnz resultReg, loop4680* dmb ish4681* done:4682* cset resultReg, eq4683*4684*/4685op = is64bit ? TR::InstOpCode::ldxrx : TR::InstOpCode::ldxrw;4686generateTrg1MemInstruction(cg, op, node, resultReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0));4687if (oldValueInReg)4688generateCompareInstruction(cg, node, resultReg, oldVReg, is64bit);4689else4690generateCompareImmInstruction(cg, node, resultReg, oldValue, is64bit);4691if (!createDoneLabel)4692generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, resultReg, 0); // failure4693generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_NE);46944695if (casWithoutSync)4696{4697op = is64bit ? TR::InstOpCode::stxrx : TR::InstOpCode::stxrw;4698}4699else4700{4701op = is64bit ? TR::InstOpCode::stlxrx : TR::InstOpCode::stlxrw;4702}4703generateTrg1MemSrc1Instruction(cg, op, node, resultReg, TR::MemoryReference::createWithDisplacement(cg, addrReg, 0), newVReg);4704generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzx, node, resultReg, loopLabel);47054706if (!casWithoutSync)4707generateSynchronizationInstruction(cg, TR::InstOpCode::dmb, node, 0xB); // dmb ish (Inner Shareable full barrier)47084709if (createDoneLabel)4710{4711generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel);4712generateCSetInstruction(cg, node, resultReg, TR::CC_EQ);4713}4714else4715{4716generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, node, resultReg, 1); // success4717}4718}4719srm->reclaimScratchRegister(addrReg);47204721node->setRegister(resultReg);4722return resultReg;4723}47244725static TR::Register *4726VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *cg, bool isLong)4727{4728TR::Compilation * comp = cg->comp();4729TR::Node *firstChild = node->getFirstChild();4730TR::Node *secondChild = node->getSecondChild();4731TR::Node *thirdChild = node->getChild(2);4732TR::Node *fourthChild = node->getChild(3);4733TR::Node *fifthChild = node->getChild(4);4734TR::Register *offsetReg = NULL;4735TR::Register *oldVReg = NULL;4736TR::Register *newVReg = NULL;4737TR::Register *resultReg = NULL;4738TR::Register *objReg = cg->evaluate(secondChild);4739TR::RegisterDependencyConditions *conditions = NULL;4740TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);4741intptr_t oldValue = 0;4742bool oldValueInReg = true;4743intptr_t offset;4744bool offsetInReg = true;47454746if (thirdChild->getOpCode().isLoadConst() && thirdChild->getRegister() == NULL)4747{4748offset = (thirdChild->getOpCodeValue() == TR::iconst) ? thirdChild->getInt() : thirdChild->getLongInt();4749offsetInReg = !constantIsUnsignedImm12(offset);4750}4751if (offsetInReg)4752offsetReg = cg->evaluate(thirdChild);47534754static const bool disableLSE = feGetEnv("TR_aarch64DisableLSE") != NULL;4755static const bool useLSE = comp->target().cpu.supportsFeature(OMR_FEATURE_ARM64_LSE) && (!disableLSE);4756// Obtain values to be checked for, and swapped in:4757if ((!useLSE) && fourthChild->getOpCode().isLoadConst() && fourthChild->getRegister() == NULL)4758{4759if (isLong)4760oldValue = fourthChild->getLongInt();4761else4762oldValue = fourthChild->getInt();4763if (constantIsUnsignedImm12(oldValue))4764oldValueInReg = false;4765}4766if (oldValueInReg)4767oldVReg = cg->evaluate(fourthChild);4768newVReg = cg->evaluate(fifthChild);47694770// Determine if synchronization needed:4771bool casWithoutSync = false;4772TR_OpaqueMethodBlock *caller = node->getOwningMethod();4773if (caller)4774{4775TR_J9VMBase *fej9 = (TR_J9VMBase *) (cg->fe());4776TR_ResolvedMethod *m = fej9->createResolvedMethod(cg->trMemory(), caller, node->getSymbolReference()->getOwningMethod(comp));4777if ((m->getRecognizedMethod() == TR::java_util_concurrent_atomic_AtomicInteger_weakCompareAndSet)4778|| (m->getRecognizedMethod() == TR::java_util_concurrent_atomic_AtomicLong_weakCompareAndSet)4779|| (m->getRecognizedMethod() == TR::java_util_concurrent_atomic_AtomicReference_weakCompareAndSet))4780{4781casWithoutSync = true;4782}4783}4784TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();47854786// Compare and swap:4787resultReg = genCAS(node, cg, srm, objReg, offsetReg, offset, offsetInReg, oldVReg, newVReg, NULL, oldValue, oldValueInReg, isLong, casWithoutSync);47884789const int regnum = 3 + (oldValueInReg ? 1 : 0) + (offsetInReg ? 1 : 0) + srm->numAvailableRegisters();4790conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, regnum, cg->trMemory());4791conditions->addPostCondition(objReg, TR::RealRegister::NoReg);4792if (offsetInReg)4793conditions->addPostCondition(offsetReg, TR::RealRegister::NoReg);4794conditions->addPostCondition(resultReg, TR::RealRegister::NoReg);4795conditions->addPostCondition(newVReg, TR::RealRegister::NoReg);47964797if (oldValueInReg)4798conditions->addPostCondition(oldVReg, TR::RealRegister::NoReg);47994800srm->addScratchRegistersToDependencyList(conditions);48014802generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions);48034804srm->stopUsingRegisters();48054806cg->recursivelyDecReferenceCount(firstChild);4807cg->decReferenceCount(secondChild);4808if (offsetInReg)4809cg->decReferenceCount(thirdChild);4810else4811cg->recursivelyDecReferenceCount(thirdChild);48124813if (oldValueInReg)4814cg->decReferenceCount(fourthChild);4815else4816cg->recursivelyDecReferenceCount(fourthChild);4817cg->decReferenceCount(fifthChild);4818return resultReg;4819}48204821static TR::Register *VMinlineCompareAndSwapObject(TR::Node *node, TR::CodeGenerator *cg)4822{4823TR::Compilation *comp = cg->comp();4824TR_J9VMBase *fej9 = reinterpret_cast<TR_J9VMBase *>(comp->fe());4825TR::Register *objReg, *offsetReg, *resultReg;4826TR::Node *firstChild, *secondChild, *thirdChild, *fourthChild, *fifthChild;4827TR::LabelSymbol *doneLabel;4828bool offsetInReg = true;4829intptr_t offset;48304831auto gcMode = TR::Compiler->om.writeBarrierType();4832const bool doWrtBar = (gcMode == gc_modron_wrtbar_oldcheck || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_always);4833const bool doCrdMrk = (gcMode == gc_modron_wrtbar_cardmark || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_cardmark_incremental);48344835/**4836* icall jdk/internal/misc/Unsafe.compareAndSetObject4837* aload java/lang/invoke/VarHandle._unsafe4838* aload (objNode)4839* lconst (offset)4840* aload (oldValueNode)4841* aload (newValueNode)4842*/4843firstChild = node->getFirstChild();4844secondChild = node->getSecondChild();4845thirdChild = node->getChild(2);4846fourthChild = node->getChild(3);4847fifthChild = node->getChild(4);48484849objReg = cg->evaluate(secondChild);48504851if (thirdChild->getOpCode().isLoadConst() && thirdChild->getRegister() == NULL)4852{4853offset = (thirdChild->getOpCodeValue() == TR::iconst) ? thirdChild->getInt() : thirdChild->getLongInt();4854offsetInReg = !constantIsUnsignedImm12(offset);4855}4856if (offsetInReg)4857offsetReg = cg->evaluate(thirdChild);48584859TR::Register *oldVReg = cg->evaluate(fourthChild);4860TR::Register *newVReg = cg->evaluate(fifthChild);4861doneLabel = generateLabelSymbol(cg);48624863TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();48644865#ifdef OMR_GC_CONCURRENT_SCAVENGER4866if (TR::Compiler->om.readBarrierType() != gc_modron_readbar_none)4867{4868TR::Register *tempReg = srm->findOrCreateScratchRegister();4869TR::Register *locationReg = cg->allocateRegister();4870TR::Register *evacuateReg = srm->findOrCreateScratchRegister();4871TR::Register *x0Reg = cg->allocateRegister();4872TR::Register *vmThreadReg = cg->getMethodMetaDataRegister();48734874TR::LabelSymbol *startLabel = generateLabelSymbol(cg);4875TR::LabelSymbol *endLabel = generateLabelSymbol(cg);4876startLabel->setStartInternalControlFlow();4877endLabel->setEndInternalControlFlow();48784879TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg->trMemory());4880deps->addPostCondition(tempReg, TR::RealRegister::NoReg);4881deps->addPostCondition(locationReg, TR::RealRegister::x1); // TR_softwareReadBarrier helper needs this in x1.4882deps->addPostCondition(evacuateReg, TR::RealRegister::NoReg);4883deps->addPostCondition(x0Reg, TR::RealRegister::x0);48844885if (offsetInReg)4886{4887generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, locationReg, objReg, offsetReg);4888}4889else4890{4891generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, locationReg, objReg, offset);4892}4893TR::InstOpCode::Mnemonic loadOp = comp->useCompressedPointers() ? TR::InstOpCode::ldrimmw : TR::InstOpCode::ldrimmx;48944895auto faultingInstruction = generateTrg1MemInstruction(cg, loadOp, node, tempReg, TR::MemoryReference::createWithDisplacement(cg, locationReg, 0));48964897// InstructonDelegate::setupImplicitNullPointerException checks if the memory reference uses nullcheck reference register.4898// In this case, nullcheck reference register is objReg, but the memory reference does not use it,4899// thus we need to explicitly set implicit exception point here.4900if (cg->getHasResumableTrapHandler() && cg->getCurrentEvaluationTreeTop()->getNode()->getOpCode().isNullCheck())4901{4902if (cg->getImplicitExceptionPoint() == NULL)4903{4904if (comp->getOption(TR_TraceCG))4905{4906traceMsg(comp, "Instruction %p throws an implicit NPE, node: %p NPE node: %p\n", faultingInstruction, node, secondChild);4907}4908cg->setImplicitExceptionPoint(faultingInstruction);4909}4910}49114912if (node->getSymbolReference() == comp->getSymRefTab()->findVftSymbolRef())4913TR::TreeEvaluator::generateVFTMaskInstruction(cg, node, tempReg);49144915generateLabelInstruction(cg, TR::InstOpCode::label, node, startLabel);49164917generateTrg1MemInstruction(cg, loadOp, node, evacuateReg,4918TR::MemoryReference::createWithDisplacement(cg, vmThreadReg, comp->fej9()->thisThreadGetEvacuateBaseAddressOffset()));4919generateCompareInstruction(cg, node, tempReg, evacuateReg, true);4920generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, endLabel, TR::CC_LT);49214922generateTrg1MemInstruction(cg, loadOp, node, evacuateReg,4923TR::MemoryReference::createWithDisplacement(cg, vmThreadReg, comp->fej9()->thisThreadGetEvacuateTopAddressOffset()));4924generateCompareInstruction(cg, node, tempReg, evacuateReg, true);4925generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, endLabel, TR::CC_GT);49264927// TR_softwareReadBarrier helper expects the vmThread in x0.4928generateMovInstruction(cg, node, x0Reg, vmThreadReg);49294930TR::SymbolReference *helperSym = comp->getSymRefTab()->findOrCreateRuntimeHelper(TR_softwareReadBarrier);4931generateImmSymInstruction(cg, TR::InstOpCode::bl, node, (uintptr_t)helperSym->getMethodAddress(), deps, helperSym, NULL);49324933generateLabelInstruction(cg, TR::InstOpCode::label, node, endLabel, deps);49344935srm->reclaimScratchRegister(tempReg);4936srm->reclaimScratchRegister(evacuateReg);49374938cg->stopUsingRegister(locationReg);4939cg->stopUsingRegister(x0Reg);49404941cg->machine()->setLinkRegisterKilled(true);4942}4943#endif //OMR_GC_CONCURRENT_SCAVENGER49444945TR::Register *oReg = oldVReg;4946TR::Register *nReg = newVReg;4947bool useShiftedOffsets = (TR::Compiler->om.compressedReferenceShiftOffset() != 0);4948if (useShiftedOffsets)4949{4950if (!fourthChild->isNull())4951{4952oReg = srm->findOrCreateScratchRegister();4953generateLogicalShiftRightImmInstruction(cg, node, oReg, oldVReg, TR::Compiler->om.compressedReferenceShiftOffset());4954}4955if (!fifthChild->isNull())4956{4957nReg = srm->findOrCreateScratchRegister();4958generateLogicalShiftRightImmInstruction(cg, node, nReg, newVReg, TR::Compiler->om.compressedReferenceShiftOffset());4959}4960}4961resultReg = genCAS(node, cg, srm, objReg, offsetReg, offset, offsetInReg, oReg, nReg, doneLabel, 0, true, !comp->useCompressedPointers());49624963if (useShiftedOffsets)4964{4965srm->reclaimScratchRegister(oReg);4966srm->reclaimScratchRegister(nReg);4967}49684969const bool skipWrtBar = (gcMode == gc_modron_wrtbar_none) || (fifthChild->isNull() && (gcMode != gc_modron_wrtbar_always));4970if (!skipWrtBar)4971{4972TR::Register *wrtBarSrcReg = newVReg;49734974if (objReg == wrtBarSrcReg)4975{4976// write barrier helper requires that dstObject and srcObject are in the different registers.4977// Because wrtBarSrcReg will be dead as soon as writeBarrier is done (which is not a GC safe point),4978// it is not required to be a collected refernce register.4979wrtBarSrcReg = srm->findOrCreateScratchRegister();4980generateMovInstruction(cg, node, wrtBarSrcReg, objReg, true);4981}49824983const bool srcNonNull = fifthChild->isNonNull();49844985if (doWrtBar) // generational or gencon4986{4987TR::SymbolReference *wbRef = (gcMode == gc_modron_wrtbar_always) ?4988comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef() :4989// use jitWriteBarrierStoreGenerational for both generational and gencon, because we inline card marking.4990comp->getSymRefTab()->findOrCreateWriteBarrierStoreGenerationalSymbolRef();49914992if (!srcNonNull)4993{4994// If object is NULL, done4995cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk"), *srm);4996generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, wrtBarSrcReg, doneLabel);4997cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk:NonNull"), *srm);4998}4999// Inlines cardmarking and remembered bit check for gencon.5000VMnonNullSrcWrtBarCardCheckEvaluator(node, objReg, wrtBarSrcReg, srm, doneLabel, wbRef, cg);50015002}5003else if (doCrdMrk)5004{5005TR::SymbolReference *wbRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef();5006if (!srcNonNull)5007{5008cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk"), *srm);5009generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, wrtBarSrcReg, doneLabel);5010cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "wrtbarEvaluator:000srcNullChk:NonNull"), *srm);5011}5012VMCardCheckEvaluator(node, objReg, srm, doneLabel, cg);5013}50145015TR_ARM64ScratchRegisterDependencyConditions scratchDeps;5016scratchDeps.addDependency(cg, objReg, doWrtBar ? TR::RealRegister::x0 : TR::RealRegister::NoReg);5017scratchDeps.addDependency(cg, wrtBarSrcReg, doWrtBar ? TR::RealRegister::x1 : TR::RealRegister::NoReg);5018if (offsetInReg)5019{5020scratchDeps.addDependency(cg, offsetReg, TR::RealRegister::NoReg);5021}5022scratchDeps.unionDependency(cg, oldVReg, TR::RealRegister::NoReg);5023scratchDeps.unionDependency(cg, newVReg, TR::RealRegister::NoReg);5024scratchDeps.addDependency(cg, resultReg, TR::RealRegister::NoReg);5025scratchDeps.addScratchRegisters(cg, srm);5026TR::RegisterDependencyConditions *conditions = TR_ARM64ScratchRegisterDependencyConditions::createDependencyConditions(cg, NULL, &scratchDeps);50275028generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions, NULL);5029}5030else5031{5032TR_ARM64ScratchRegisterDependencyConditions scratchDeps;5033scratchDeps.addDependency(cg, objReg, TR::RealRegister::NoReg);5034if (offsetInReg)5035{5036scratchDeps.addDependency(cg, offsetReg, TR::RealRegister::NoReg);5037}5038scratchDeps.unionDependency(cg, oldVReg, TR::RealRegister::NoReg);5039scratchDeps.unionDependency(cg, newVReg, TR::RealRegister::NoReg);5040scratchDeps.addDependency(cg, resultReg, TR::RealRegister::NoReg);5041scratchDeps.addScratchRegisters(cg, srm);5042TR::RegisterDependencyConditions *conditions = TR_ARM64ScratchRegisterDependencyConditions::createDependencyConditions(cg, NULL, &scratchDeps);50435044generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions, NULL);5045}50465047srm->stopUsingRegisters();50485049cg->recursivelyDecReferenceCount(firstChild);5050cg->decReferenceCount(secondChild);5051if (offsetInReg)5052{5053cg->decReferenceCount(thirdChild);5054}5055else5056{5057cg->recursivelyDecReferenceCount(thirdChild);5058}50595060cg->decReferenceCount(fourthChild);5061cg->decReferenceCount(fifthChild);50625063return resultReg;5064}50655066bool5067J9::ARM64::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&resultReg)5068{5069TR::CodeGenerator *cg = self();5070TR::MethodSymbol * methodSymbol = node->getSymbol()->getMethodSymbol();50715072if (OMR::CodeGeneratorConnector::inlineDirectCall(node, resultReg))5073{5074return true;5075}5076if (methodSymbol)5077{5078switch (methodSymbol->getRecognizedMethod())5079{5080case TR::java_nio_Bits_keepAlive:5081case TR::java_lang_ref_Reference_reachabilityFence:5082{50835084// The only purpose of these functions is to prevent an otherwise5085// unreachable object from being garbage collected, because we don't5086// want its finalizer to be called too early. There's no need to5087// generate a full-blown call site just for this purpose.50885089TR::Node *paramNode = node->getFirstChild();5090TR::Register *paramReg = cg->evaluate(paramNode);50915092// In theory, a value could be kept alive on the stack, rather than in5093// a register. It is unfortunate that the following deps will force5094// the value into a register for no reason. However, in many common5095// cases, this label will have no effect on the generated code, and5096// will only affect GC maps.5097//5098TR::RegisterDependencyConditions *conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(1, 1, cg->trMemory());5099TR::addDependency(conditions, paramReg, TR::RealRegister::NoReg, TR_GPR, cg);5100TR::LabelSymbol *label = generateLabelSymbol(cg);5101generateLabelInstruction(cg, TR::InstOpCode::label, node, label, conditions);5102cg->decReferenceCount(paramNode);5103resultReg = NULL;5104return true;5105}51065107case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z:5108{5109// In Java9 and newer this can be either the jdk.internal JNI method or the sun.misc Java wrapper.5110// In Java8 it will be sun.misc which will contain the JNI directly.5111// We only want to inline the JNI methods, so add an explicit test for isNative().5112if (!methodSymbol->isNative())5113break;51145115if ((node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall())5116{5117resultReg = VMinlineCompareAndSwap(node, cg, false);5118return true;5119}5120break;5121}51225123case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z:5124{5125// As above, we only want to inline the JNI methods, so add an explicit test for isNative()5126if (!methodSymbol->isNative())5127break;51285129if ((node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall())5130{5131resultReg = VMinlineCompareAndSwap(node, cg, true);5132return true;5133}5134break;5135}51365137case TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z:5138{5139if (!methodSymbol->isNative())5140break;51415142if ((node->isUnsafeGetPutCASCallOnNonArray() || !TR::Compiler->om.canGenerateArraylets()) && node->isSafeForCGToFastPathUnsafeCall())5143{5144resultReg = VMinlineCompareAndSwapObject(node, cg);5145return true;5146}5147break;5148}5149default:5150break;5151}5152}51535154// Nothing was done5155resultReg = NULL;5156return false;5157}51585159TR::Instruction *J9::ARM64::TreeEvaluator::generateVFTMaskInstruction(TR::CodeGenerator *cg, TR::Node *node, TR::Register *dstReg, TR::Register *srcReg, TR::Instruction *preced)5160{5161TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());5162uintptr_t mask = TR::Compiler->om.maskOfObjectVftField();5163bool isCompressed = TR::Compiler->om.compressObjectReferences();51645165if (~mask == 0)5166{5167// no mask instruction required5168return preced;5169}5170else if (~mask == 0xFF)5171{5172TR::InstOpCode::Mnemonic op = isCompressed ? TR::InstOpCode::andimmw : TR::InstOpCode::andimmx;5173uint32_t imm = isCompressed ? 0x617 : 0xE37; // encoding for ~0xFF5174return generateLogicalImmInstruction(cg, op, node, dstReg, srcReg, !isCompressed, imm, preced);5175}5176else5177{5178TR_UNIMPLEMENTED();5179}5180}51815182TR::Instruction *J9::ARM64::TreeEvaluator::generateVFTMaskInstruction(TR::CodeGenerator *cg, TR::Node *node, TR::Register *reg, TR::Instruction *preced)5183{5184return J9::ARM64::TreeEvaluator::generateVFTMaskInstruction(cg, node, reg, reg, preced);5185}51865187TR::Register *5188J9::ARM64::TreeEvaluator::loadaddrEvaluator(TR::Node *node, TR::CodeGenerator *cg)5189{5190TR::Register *resultReg;5191TR::Symbol *sym = node->getSymbol();5192TR::Compilation *comp = cg->comp();5193TR::MemoryReference *mref = TR::MemoryReference::createWithSymRef(cg, node, node->getSymbolReference());51945195if (mref->getUnresolvedSnippet() != NULL)5196{5197resultReg = sym->isLocalObject() ? cg->allocateCollectedReferenceRegister() : cg->allocateRegister();5198if (mref->useIndexedForm())5199{5200TR_ASSERT(false, "Unresolved indexed snippet is not supported");5201}5202else5203{5204generateTrg1MemInstruction(cg, TR::InstOpCode::addx, node, resultReg, mref);5205}5206}5207else5208{5209if (mref->useIndexedForm())5210{5211resultReg = sym->isLocalObject() ? cg->allocateCollectedReferenceRegister() : cg->allocateRegister();5212generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, resultReg, mref->getBaseRegister(), mref->getIndexRegister());5213}5214else5215{5216int32_t offset = mref->getOffset();5217if (mref->hasDelayedOffset() || offset != 0)5218{5219resultReg = sym->isLocalObject() ? cg->allocateCollectedReferenceRegister() : cg->allocateRegister();5220if (mref->hasDelayedOffset())5221{5222generateTrg1MemInstruction(cg, TR::InstOpCode::addimmx, node, resultReg, mref);5223}5224else5225{5226if (offset >= 0 && constantIsUnsignedImm12(offset))5227{5228generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, resultReg, mref->getBaseRegister(), offset);5229}5230else5231{5232loadConstant64(cg, node, offset, resultReg);5233generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, resultReg, mref->getBaseRegister(), resultReg);5234}5235}5236}5237else5238{5239resultReg = mref->getBaseRegister();5240if (resultReg == cg->getMethodMetaDataRegister())5241{5242resultReg = cg->allocateRegister();5243generateMovInstruction(cg, node, resultReg, mref->getBaseRegister());5244}5245}5246}5247}5248node->setRegister(resultReg);5249mref->decNodeReferenceCounts(cg);5250return resultReg;5251}52525253TR::Register *J9::ARM64::TreeEvaluator::fremHelper(TR::Node *node, TR::CodeGenerator *cg, bool isSinglePrecision)5254{5255TR::Register *trgReg = isSinglePrecision ? cg->allocateSinglePrecisionRegister() : cg->allocateRegister(TR_FPR);5256TR::Node *child1 = node->getFirstChild();5257TR::Node *child2 = node->getSecondChild();5258TR::Register *source1Reg = cg->evaluate(child1);5259TR::Register *source2Reg = cg->evaluate(child2);52605261if (!cg->canClobberNodesRegister(child1))5262{5263auto copyReg = isSinglePrecision ? cg->allocateSinglePrecisionRegister() : cg->allocateRegister(TR_FPR);5264generateTrg1Src1Instruction(cg, isSinglePrecision ? TR::InstOpCode::fmovs : TR::InstOpCode::fmovd, node, copyReg, source1Reg);5265source1Reg = copyReg;5266}5267if (!cg->canClobberNodesRegister(child2))5268{5269auto copyReg = isSinglePrecision ? cg->allocateSinglePrecisionRegister() : cg->allocateRegister(TR_FPR);5270generateTrg1Src1Instruction(cg, isSinglePrecision ? TR::InstOpCode::fmovs : TR::InstOpCode::fmovd, node, copyReg, source2Reg);5271source2Reg = copyReg;5272}52735274// We call helperCFloatRemainderFloat, thus we need to set appropriate register dependencies.5275// First, count all volatile registers.5276TR::Linkage *linkage = cg->createLinkage(TR_System);5277auto linkageProp = linkage->getProperties();5278int nregs = 0;5279for (int32_t i = TR::RealRegister::FirstGPR; i <= TR::RealRegister::LastAssignableFPR; i++)5280{5281if ((linkageProp._registerFlags[i] != ARM64_Reserved) && (linkageProp._registerFlags[i] != Preserved))5282{5283nregs++;5284}5285}52865287TR::RegisterDependencyConditions *dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(nregs, nregs, cg->trMemory());52885289// Then, add all volatile registers to dependencies except for v0 and v1.5290for (int32_t i = TR::RealRegister::FirstGPR; i <= TR::RealRegister::LastAssignableGPR; i++)5291{5292if ((linkageProp._registerFlags[i] != ARM64_Reserved) && (linkageProp._registerFlags[i] != Preserved))5293{5294auto tmpReg = cg->allocateRegister(TR_GPR);5295TR::addDependency(dependencies, tmpReg, static_cast<TR::RealRegister::RegNum>(i), TR_GPR, cg);5296cg->stopUsingRegister(tmpReg);5297}5298}5299for (int32_t i = TR::RealRegister::v2; i <= TR::RealRegister::LastAssignableFPR; i++)5300{5301if ((linkageProp._registerFlags[i] != ARM64_Reserved) && (linkageProp._registerFlags[i] != Preserved))5302{5303auto tmpReg = cg->allocateRegister(TR_FPR);5304TR::addDependency(dependencies, tmpReg, static_cast<TR::RealRegister::RegNum>(i), TR_FPR, cg);5305cg->stopUsingRegister(tmpReg);5306}5307}53085309// Finally add v0 and v1 to dependencies.5310dependencies->addPreCondition(source1Reg, TR::RealRegister::v0);5311dependencies->addPostCondition(trgReg, TR::RealRegister::v0);5312dependencies->addPreCondition(source2Reg, TR::RealRegister::v1);5313auto tmpReg = cg->allocateRegister(TR_FPR);5314dependencies->addPostCondition(tmpReg, TR::RealRegister::v1);5315cg->stopUsingRegister(tmpReg);53165317TR::SymbolReference *helperSym = cg->symRefTab()->findOrCreateRuntimeHelper(isSinglePrecision ? TR_ARM64floatRemainder : TR_ARM64doubleRemainder,5318false, false, false);5319generateImmSymInstruction(cg, TR::InstOpCode::bl, node,5320(uintptr_t)helperSym->getMethodAddress(),5321dependencies, helperSym, NULL);5322cg->stopUsingRegister(source1Reg);5323cg->stopUsingRegister(source2Reg);5324cg->decReferenceCount(child1);5325cg->decReferenceCount(child2);5326node->setRegister(trgReg);5327cg->machine()->setLinkRegisterKilled(true);53285329return trgReg;5330}5331TR::Register *J9::ARM64::TreeEvaluator::fremEvaluator(TR::Node *node, TR::CodeGenerator *cg)5332{5333return fremHelper(node, cg, true);5334}53355336TR::Register *J9::ARM64::TreeEvaluator::dremEvaluator(TR::Node *node, TR::CodeGenerator *cg)5337{5338return fremHelper(node, cg, false);5339}53405341TR::Register *5342J9::ARM64::TreeEvaluator::NULLCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)5343{5344return TR::TreeEvaluator::evaluateNULLCHKWithPossibleResolve(node, false, cg);5345}53465347TR::Register *5348J9::ARM64::TreeEvaluator::resolveAndNULLCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)5349{5350return TR::TreeEvaluator::evaluateNULLCHKWithPossibleResolve(node, true, cg);5351}53525353TR::Register *5354J9::ARM64::TreeEvaluator::evaluateNULLCHKWithPossibleResolve(TR::Node *node, bool needsResolve, TR::CodeGenerator *cg)5355{5356// NOTE:5357// If no code is generated for the null check, just evaluate the5358// child and decrement its use count UNLESS the child is a pass-through node5359// in which case some kind of explicit test or indirect load must be generated5360// to force the null check at this point.5361TR::Node * const firstChild = node->getFirstChild();5362TR::ILOpCode &opCode = firstChild->getOpCode();5363TR::Node *reference = NULL;5364TR::Compilation *comp = cg->comp();5365TR::Node *n = firstChild;5366bool hasCompressedPointers = false;53675368// NULLCHK has a special case with compressed pointers.5369// In the scenario where the first child is TR::l2a, the5370// node to be null checked is not the iloadi, but its child.5371// i.e. aload, aRegLoad, etc.5372if (comp->useCompressedPointers() && firstChild->getOpCodeValue() == TR::l2a)5373{5374// pattern match the sequence under the l2a5375// NULLCHK NULLCHK <- node5376// aloadi f l2a5377// aload O lshl5378// iu2l5379// iloadi/irdbari f <- n5380// aload O <- reference5381// iconst shftKonst5382//5383hasCompressedPointers = true;5384TR::ILOpCodes loadOp = cg->comp()->il.opCodeForIndirectLoad(TR::Int32);5385TR::ILOpCodes rdbarOp = cg->comp()->il.opCodeForIndirectReadBarrier(TR::Int32);5386while ((n->getOpCodeValue() != loadOp) && (n->getOpCodeValue() != rdbarOp))5387n = n->getFirstChild();5388reference = n->getFirstChild();5389}5390else5391reference = node->getNullCheckReference();53925393// Skip the NULLCHK for TR::loadaddr nodes.5394//5395if (cg->getHasResumableTrapHandler()5396&& reference->getOpCodeValue() == TR::loadaddr)5397{5398cg->evaluate(firstChild);5399cg->decReferenceCount(firstChild);5400return NULL;5401}54025403bool needExplicitCheck = true;5404bool needLateEvaluation = true;5405bool firstChildEvaluated = false;54065407// Add the explicit check after this instruction5408//5409TR::Instruction *appendTo = NULL;54105411// determine if an explicit check is needed5412if (cg->getHasResumableTrapHandler())5413{5414if (n->getOpCode().isLoadVar()5415|| (opCode.getOpCodeValue() == TR::l2i))5416{5417TR::SymbolReference *symRef = NULL;54185419if (opCode.getOpCodeValue() == TR::l2i)5420symRef = n->getFirstChild()->getSymbolReference();5421else5422symRef = n->getSymbolReference();54235424// We prefer to generate an explicit NULLCHK vs an implicit one5425// to prevent potential costs of a cache miss on an unnecessary load.5426if (n->getReferenceCount() == 15427&& !n->getSymbolReference()->isUnresolved())5428{5429// If the child is only used here, we don't need to evaluate it5430// since all we need is the grandchild which will be evaluated by5431// the generation of the explicit check below.5432needLateEvaluation = false;54335434// at this point, n is the raw iloadi (created by lowerTrees) and5435// reference is the aload of the object. node->getFirstChild is the5436// l2a sequence; as a result, n's refCount will always be 1.5437//5438if (hasCompressedPointers5439&& node->getFirstChild()->getReferenceCount() >= 2)5440{5441// In this case, the result of load is used in other places, so we need to evaluate it here5442//5443needLateEvaluation = true;54445445// Check if offset from a NULL reference will fall into the inaccessible bytes,5446// resulting in an implicit trap being raised.5447if (symRef5448&& ((symRef->getSymbol()->getOffset() + symRef->getOffset()) < cg->getNumberBytesReadInaccessible()))5449{5450needExplicitCheck = false;5451}5452}5453}54545455// Check if offset from a NULL reference will fall into the inaccessible bytes,5456// resulting in an implicit trap being raised.5457else if (symRef5458&& ((symRef->getSymbol()->getOffset() + symRef->getOffset()) < cg->getNumberBytesReadInaccessible()))5459{5460needExplicitCheck = false;54615462// If the child is an arraylength which has been reduced to an iiload,5463// and is only going to be used immediately in a BNDCHK, combine the checks.5464//5465TR::TreeTop *nextTreeTop = cg->getCurrentEvaluationTreeTop()->getNextTreeTop();5466if (n->getReferenceCount() == 2 && nextTreeTop)5467{5468TR::Node *nextTopNode = nextTreeTop->getNode();54695470if (nextTopNode)5471{5472if (nextTopNode->getOpCode().isBndCheck())5473{5474if ((nextTopNode->getOpCode().isSpineCheck() && (nextTopNode->getChild(2) == n))5475|| (!nextTopNode->getOpCode().isSpineCheck() && (nextTopNode->getFirstChild() == n)))5476{5477needLateEvaluation = false;5478nextTopNode->setHasFoldedImplicitNULLCHK(true);5479if (comp->getOption(TR_TraceCG))5480{5481traceMsg(comp, "\nMerging NULLCHK [%p] and BNDCHK [%p] of load child [%p]\n", node, nextTopNode, n);5482}5483}5484}5485else if (nextTopNode->getOpCode().isIf()5486&& nextTopNode->isNonoverriddenGuard()5487&& nextTopNode->getFirstChild() == firstChild)5488{5489needLateEvaluation = false;5490needExplicitCheck = true;5491}5492}5493}5494}5495}5496else if (opCode.isStore())5497{5498TR::SymbolReference *symRef = n->getSymbolReference();5499if (n->getOpCode().hasSymbolReference()5500&& (symRef->getSymbol()->getOffset() + symRef->getOffset() < cg->getNumberBytesWriteInaccessible()))5501{5502needExplicitCheck = false;5503}5504}5505else if (opCode.isCall()5506&& opCode.isIndirect()5507&& (cg->getNumberBytesReadInaccessible() > TR::Compiler->om.offsetOfObjectVftField()))5508{5509needExplicitCheck = false;5510}5511else if (opCode.getOpCodeValue() == TR::iushr5512&& (cg->getNumberBytesReadInaccessible() > cg->fe()->getOffsetOfContiguousArraySizeField()))5513{5514// If the child is an arraylength which has been reduced to an iushr,5515// we must evaluate it here so that the implicit exception will occur5516// at the right point in the program.5517//5518// This can occur when the array length is represented in bytes, not elements.5519// The optimizer must intervene for this to happen.5520//5521cg->evaluate(n->getFirstChild());5522needExplicitCheck = false;5523}5524else if (opCode.getOpCodeValue() == TR::monent5525|| opCode.getOpCodeValue() == TR::monexit)5526{5527// The child may generate inline code that provides an implicit null check5528// but we won't know until the child is evaluated.5529//5530needLateEvaluation = false;5531cg->evaluate(reference);5532appendTo = cg->getAppendInstruction();5533cg->evaluate(firstChild);5534firstChildEvaluated = true;5535if (cg->getImplicitExceptionPoint()5536&& (cg->getNumberBytesReadInaccessible() > cg->fe()->getOffsetOfContiguousArraySizeField()))5537{5538needExplicitCheck = false;5539}5540}5541}55425543// Generate the code for the null check5544//5545if(needExplicitCheck)5546{5547TR::Register * targetRegister = NULL;5548/* TODO: Resolution */5549/* if(needsResolve) ... */55505551TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);5552TR::Snippet *snippet = new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference(), NULL);5553cg->addSnippet(snippet);5554TR::Register *referenceReg = cg->evaluate(reference);5555TR::Instruction *cbzInstruction = generateCompareBranchInstruction(cg, TR::InstOpCode::cbzx, node, referenceReg, snippetLabel, appendTo);5556cbzInstruction->setNeedsGCMap(0xffffffff);5557snippet->gcMap().setGCRegisterMask(0xffffffff);5558// ARM64HelperCallSnippet generates "bl" instruction5559cg->machine()->setLinkRegisterKilled(true);5560}55615562// If we need to evaluate the child, do so. Otherwise, if we have5563// evaluated the reference node, then decrement its use count.5564// The use count of the child is decremented when we are done5565// evaluating the NULLCHK.5566//5567if (needLateEvaluation)5568{5569cg->evaluate(firstChild);5570firstChildEvaluated = true;5571}5572// If the firstChild is evaluated, we simply call decReferenceCount.5573// Otherwise, we need to call recursivelyDecReferenceCount so that the ref count of5574// child nodes of the firstChild is properly decremented when the ref count of the firstChild is 1.5575if (firstChildEvaluated)5576{5577cg->decReferenceCount(firstChild);5578}5579else5580{5581cg->recursivelyDecReferenceCount(firstChild);5582}55835584// If an explicit check has not been generated for the null check, there is5585// an instruction that will cause a hardware trap if the exception is to be5586// taken. If this method may catch the exception, a GC stack map must be5587// created for this instruction. All registers are valid at this GC point5588// TODO - if the method may not catch the exception we still need to note5589// that the GC point exists, since maps before this point and after it cannot5590// be merged.5591//5592if (cg->getHasResumableTrapHandler() && !needExplicitCheck)5593{5594TR::Instruction *faultingInstruction = cg->getImplicitExceptionPoint();5595if (faultingInstruction)5596{5597faultingInstruction->setNeedsGCMap(0xffffffff);5598cg->machine()->setLinkRegisterKilled(true);55995600TR_Debug * debugObj = cg->getDebug();5601if (debugObj)5602{5603debugObj->addInstructionComment(faultingInstruction, "Throws Implicit Null Pointer Exception");5604}5605}5606}56075608if (comp->useCompressedPointers()5609&& reference->getOpCodeValue() == TR::l2a)5610{5611TR::Node *n = reference->getFirstChild();5612reference->setIsNonNull(true);5613TR::ILOpCodes loadOp = cg->comp()->il.opCodeForIndirectLoad(TR::Int32);5614TR::ILOpCodes rdbarOp = cg->comp()->il.opCodeForIndirectReadBarrier(TR::Int32);5615while ((n->getOpCodeValue() != loadOp) && (n->getOpCodeValue() != rdbarOp))5616{5617n->setIsNonZero(true);5618n = n->getFirstChild();5619}5620n->setIsNonZero(true);5621}56225623reference->setIsNonNull(true);56245625return NULL;5626}56275628static void5629genBoundCheck(TR::CodeGenerator *cg, TR::Node *node, TR::Register *indexReg, int32_t indexVal, TR::Register *arrayLengthReg, int32_t arrayLengthVal)5630{5631TR::Instruction *gcPoint;56325633TR::LabelSymbol *boundCheckFailSnippetLabel = cg->lookUpSnippet(TR::Snippet::IsHelperCall, node->getSymbolReference());5634if (!boundCheckFailSnippetLabel)5635{5636boundCheckFailSnippetLabel = generateLabelSymbol(cg);5637cg->addSnippet(new (cg->trHeapMemory()) TR::ARM64HelperCallSnippet(cg, node, boundCheckFailSnippetLabel, node->getSymbolReference()));5638}56395640if (indexReg)5641generateCompareInstruction(cg, node, arrayLengthReg, indexReg, false); // 32-bit compare5642else5643generateCompareImmInstruction(cg, node, arrayLengthReg, indexVal, false); // 32-bit compare56445645gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, boundCheckFailSnippetLabel, TR::CC_LS);56465647// Exception edges don't have any live regs5648gcPoint->ARM64NeedsGCMap(cg, 0);56495650// ARM64HelperCallSnippet generates "bl" instruction5651cg->machine()->setLinkRegisterKilled(true);5652}56535654static TR::Instruction *5655genSpineCheck(TR::CodeGenerator *cg, TR::Node *node, TR::Register *arrayLengthReg, TR::LabelSymbol *discontiguousArrayLabel)5656{5657return generateCompareBranchInstruction(cg, TR::InstOpCode::cbzw, node, arrayLengthReg, discontiguousArrayLabel);5658}56595660static TR::Instruction *5661genSpineCheck(TR::CodeGenerator *cg, TR::Node *node, TR::Register *baseArrayReg, TR::Register *arrayLengthReg, TR::LabelSymbol *discontiguousArrayLabel)5662{5663TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());5664TR::MemoryReference *contiguousArraySizeMR = TR::MemoryReference::createWithDisplacement(cg, baseArrayReg, fej9->getOffsetOfContiguousArraySizeField());5665generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, arrayLengthReg, contiguousArraySizeMR);5666return genSpineCheck(cg, node, arrayLengthReg, discontiguousArrayLabel);5667}56685669static void5670genArrayletAccessAddr(TR::CodeGenerator *cg, TR::Node *node, int32_t elementSize,5671// Inputs:5672TR::Register *baseArrayReg, TR::Register *indexReg, int32_t indexVal,5673// Outputs:5674TR::Register *arrayletReg, TR::Register *offsetReg, int32_t& offsetVal)5675{5676TR::Compilation* comp = cg->comp();5677TR_J9VMBase *fej9 = (TR_J9VMBase *) (cg->fe());5678TR_ASSERT(offsetReg || !indexReg, "Expecting valid offset reg when index reg is passed");56795680uintptr_t arrayHeaderSize = TR::Compiler->om.discontiguousArrayHeaderSizeInBytes();5681int32_t spinePointerSize = TR::Compiler->om.sizeofReferenceField();5682int32_t spinePointerSizeShift = spinePointerSize == 8 ? 3 : 2;56835684TR::MemoryReference *spineMR;5685TR::InstOpCode::Mnemonic loadOp;56865687// Calculate the spine offset.5688//5689if (indexReg)5690{5691int32_t spineShift = fej9->getArraySpineShift(elementSize);56925693// spineOffset = (index >> spineShift) * spinePtrSize5694// = (index >> spineShift) << spinePtrSizeShift5695// spineOffset += arrayHeaderSize5696//5697TR_ASSERT(spineShift >= spinePointerSizeShift, "Unexpected spine shift value");5698generateLogicalShiftRightImmInstruction(cg, node, arrayletReg, indexReg, spineShift);5699generateLogicalShiftLeftImmInstruction(cg, node, arrayletReg, arrayletReg, spinePointerSizeShift);5700generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, arrayletReg, arrayletReg, arrayHeaderSize);57015702spineMR = TR::MemoryReference::createWithIndexReg(cg, baseArrayReg, arrayletReg);5703loadOp = spinePointerSize == 8 ? TR::InstOpCode::ldroffx : TR::InstOpCode::ldroffw;5704}5705else5706{5707int32_t spineIndex = fej9->getArrayletLeafIndex(indexVal, elementSize);5708int32_t spineDisp32 = spineIndex * spinePointerSize + arrayHeaderSize;57095710spineMR = TR::MemoryReference::createWithDisplacement(cg, baseArrayReg, spineDisp32);5711loadOp = spinePointerSize == 8 ? TR::InstOpCode::ldrimmx : TR::InstOpCode::ldrimmw;5712}57135714// Load the arraylet from the spine.5715//5716generateTrg1MemInstruction(cg, loadOp, node, arrayletReg, spineMR);57175718// Calculate the arraylet offset.5719//5720if (indexReg)5721{5722int32_t arrayletMask = fej9->getArrayletMask(elementSize);57235724loadConstant64(cg, node, arrayletMask, offsetReg);5725generateTrg1Src2Instruction(cg, TR::InstOpCode::andx, node, offsetReg, indexReg, offsetReg);5726if (elementSize > 1)5727{5728int32_t elementSizeShift = CHAR_BIT * sizeof(int32_t) - leadingZeroes(elementSize - 1);5729generateLogicalShiftLeftImmInstruction(cg, node, offsetReg, offsetReg, elementSizeShift);5730}5731}5732else5733offsetVal = (fej9->getLeafElementIndex(indexVal, elementSize) * elementSize);5734}57355736static void5737genDecompressPointer(TR::CodeGenerator *cg, TR::Node *node, TR::Register *ptrReg)5738{5739int32_t shiftAmount = TR::Compiler->om.compressedReferenceShift();57405741if (shiftAmount != 0)5742generateLogicalShiftLeftImmInstruction(cg, node, ptrReg, ptrReg, shiftAmount);5743}57445745static TR::InstOpCode::Mnemonic5746getLoadOpCodeFromDataType(TR::CodeGenerator *cg, TR::DataType dt, int32_t elementSize, bool isUnsigned, bool useIdxReg)5747{5748switch (dt)5749{5750case TR::Int8:5751if (isUnsigned)5752return useIdxReg ? TR::InstOpCode::ldrboff : TR::InstOpCode::ldrbimm;5753else5754return useIdxReg ? TR::InstOpCode::ldrsboffw : TR::InstOpCode::ldrsbimmw;5755case TR::Int16:5756if (isUnsigned)5757return useIdxReg ? TR::InstOpCode::ldrhoff : TR::InstOpCode::ldrhimm;5758else5759return useIdxReg ? TR::InstOpCode::ldrshoffw : TR::InstOpCode::ldrshimmw;5760case TR::Int32:5761return useIdxReg ? TR::InstOpCode::ldroffw : TR::InstOpCode::ldrimmw;5762case TR::Int64:5763return useIdxReg ? TR::InstOpCode::ldroffx : TR::InstOpCode::ldrimmx;5764case TR::Float:5765return useIdxReg ? TR::InstOpCode::vldroffs : TR::InstOpCode::vstrimms;5766case TR::Double:5767return useIdxReg ? TR::InstOpCode::vldroffd : TR::InstOpCode::vstrimmd;5768case TR::Address:5769if (elementSize == 8)5770return useIdxReg ? TR::InstOpCode::ldroffx : TR::InstOpCode::ldrimmx;5771else5772return useIdxReg ? TR::InstOpCode::ldroffw : TR::InstOpCode::ldrimmw;5773default:5774TR_ASSERT(false, "Unexpected array data type");5775return TR::InstOpCode::bad;5776}5777}57785779static TR::InstOpCode::Mnemonic5780getStoreOpCodeFromDataType(TR::CodeGenerator *cg, TR::DataType dt, int32_t elementSize, bool useIdxReg)5781{5782switch (dt)5783{5784case TR::Int8:5785return useIdxReg ? TR::InstOpCode::strboff : TR::InstOpCode::strbimm;5786case TR::Int16:5787return useIdxReg ? TR::InstOpCode::strhoff : TR::InstOpCode::strhimm;5788case TR::Int32:5789return useIdxReg ? TR::InstOpCode::stroffw : TR::InstOpCode::strimmw;5790case TR::Int64:5791return useIdxReg ? TR::InstOpCode::stroffx : TR::InstOpCode::strimmx;5792case TR::Float:5793return useIdxReg ? TR::InstOpCode::vstroffs : TR::InstOpCode::vstrimms;5794case TR::Double:5795return useIdxReg ? TR::InstOpCode::vstroffd : TR::InstOpCode::vstrimmd;5796case TR::Address:5797if (elementSize == 8)5798return useIdxReg ? TR::InstOpCode::stroffx : TR::InstOpCode::strimmx;5799else5800return useIdxReg ? TR::InstOpCode::stroffw : TR::InstOpCode::strimmw;5801default:5802TR_ASSERT(false, "Unexpected array data type");5803return TR::InstOpCode::bad;5804}5805}58065807// Handles both BNDCHKwithSpineCHK and SpineCHK nodes.5808//5809TR::Register *5810J9::ARM64::TreeEvaluator::BNDCHKwithSpineCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)5811{5812TR::Compilation *comp = cg->comp();5813TR_J9VMBase *fej9 = (TR_J9VMBase *) (cg->fe());5814bool needsBoundCheck = node->getOpCodeValue() == TR::BNDCHKwithSpineCHK;5815bool needsBoundCheckOOL;58165817TR_ARM64ScratchRegisterManager *srm = cg->generateScratchRegisterManager();58185819TR::Node *loadOrStoreChild = node->getFirstChild();5820TR::Node *baseArrayChild = node->getSecondChild();5821TR::Node *arrayLengthChild;5822TR::Node *indexChild;58235824if (needsBoundCheck)5825{5826arrayLengthChild = node->getChild(2);5827indexChild = node->getChild(3);5828}5829else5830indexChild = node->getChild(2);58315832TR::Register *baseArrayReg = cg->evaluate(baseArrayChild);5833TR::Register *indexReg;5834TR::Register *loadOrStoreReg;5835TR::Register *arrayLengthReg;58365837// If the index is too large to be an immediate load it in a register5838if (!indexChild->getOpCode().isLoadConst() || !constantIsUnsignedImm12(indexChild->getInt()))5839indexReg = cg->evaluate(indexChild);5840else5841indexReg = NULL;58425843// For primitive stores anchored under the check node, we must evaluate the source node5844// before the bound check branch so that its available to the snippet.5845//5846if (loadOrStoreChild->getOpCode().isStore() && !loadOrStoreChild->getRegister())5847{5848TR::Node *valueChild = loadOrStoreChild->getSecondChild();5849cg->evaluate(valueChild);5850}58515852// Evaluate any escaping nodes before the OOL branch since they won't be evaluated in the OOL path.5853preEvaluateEscapingNodesForSpineCheck(node, cg);58545855// Label to the OOL code that will perform the load/store/agen for discontiguous arrays (and the bound check if needed).5856TR::LabelSymbol *discontiguousArrayLabel = generateLabelSymbol(cg);58575858// Label back to main-line that the OOL code will branch to when done.5859TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);5860doneLabel->setEndInternalControlFlow();58615862TR_ARM64OutOfLineCodeSection *discontiguousArrayOOL = new (cg->trHeapMemory()) TR_ARM64OutOfLineCodeSection(discontiguousArrayLabel, doneLabel, cg);5863cg->getARM64OutOfLineCodeSectionList().push_front(discontiguousArrayOOL);58645865TR::Instruction *OOLBranchInstr;58665867if (needsBoundCheck)5868{5869TR_ASSERT(arrayLengthChild, "Expecting to have an array length child for BNDCHKwithSpineCHK node");5870TR_ASSERT(5871arrayLengthChild->getOpCode().isConversion() || arrayLengthChild->getOpCodeValue() == TR::iloadi || arrayLengthChild->getOpCodeValue() == TR::iload5872|| arrayLengthChild->getOpCodeValue() == TR::iRegLoad || arrayLengthChild->getOpCode().isLoadConst(),5873"Expecting array length child under BNDCHKwithSpineCHK to be a conversion, iiload, iload, iRegLoad or iconst");58745875arrayLengthReg = arrayLengthChild->getRegister();58765877if (arrayLengthReg)5878{5879OOLBranchInstr = genSpineCheck(cg, node, baseArrayReg, arrayLengthReg, discontiguousArrayLabel);5880needsBoundCheckOOL = true;5881genBoundCheck(cg, node, indexReg, indexChild->getInt(), arrayLengthReg, arrayLengthChild->getInt());5882}5883else if (arrayLengthChild->getOpCode().isLoadConst())5884{5885// If the constant arraylength is non-zero then it will pass the spine check (hence its5886// a contiguous array) and the BNDCHK can be done inline with no OOL path.5887//5888// If the constant arraylength is zero then we will always go OOL.5889//5890// TODO: in the future there shouldn't be an OOL path because any valid access must be5891// on a discontiguous array.5892//5893if (arrayLengthChild->getInt() != 0)5894{5895// The array must be contiguous.5896//58975898// If the array length is too large to be an immediate load it in a register for the bound check5899if (!constantIsUnsignedImm12(arrayLengthChild->getInt()))5900arrayLengthReg = cg->evaluate(arrayLengthChild);59015902// Do the bound check first.5903genBoundCheck(cg, node, indexReg, indexChild->getInt(), arrayLengthReg, arrayLengthChild->getInt());5904needsBoundCheckOOL = false;5905TR::Register *scratchArrayLengthReg = srm->findOrCreateScratchRegister();5906OOLBranchInstr = genSpineCheck(cg, node, baseArrayReg, scratchArrayLengthReg, discontiguousArrayLabel);5907srm->reclaimScratchRegister(scratchArrayLengthReg);5908}5909else5910{5911// Zero length array or discontiguous array. Unconditionally branch to the OOL path5912// to find out which.5913//5914OOLBranchInstr = generateLabelInstruction(cg, TR::InstOpCode::b, node, discontiguousArrayLabel);5915needsBoundCheckOOL = true;5916}5917}5918else5919{5920// Load the contiguous array length.5921arrayLengthReg = cg->evaluate(arrayLengthChild);5922// If the array length is 0, this is a discontiguous array and the bound check will be handled OOL.5923OOLBranchInstr = genSpineCheck(cg, node, arrayLengthReg, discontiguousArrayLabel);5924needsBoundCheckOOL = true;5925// Do the bound check using the contiguous array length.5926genBoundCheck(cg, node, indexReg, indexChild->getInt(), arrayLengthReg, arrayLengthChild->getInt());5927}59285929cg->decReferenceCount(arrayLengthChild);5930}5931else5932{5933// Spine check only; load the contiguous length, check for 0, branch OOL if discontiguous.5934needsBoundCheckOOL = false;59355936arrayLengthReg = srm->findOrCreateScratchRegister();5937OOLBranchInstr = genSpineCheck(cg, node, baseArrayReg, arrayLengthReg, discontiguousArrayLabel);5938srm->reclaimScratchRegister(arrayLengthReg);5939}59405941// For reference stores, only evaluate the array element address because the store cannot5942// happen here (it must be done via the array store check).5943//5944// For primitive stores, evaluate them now.5945// For loads, evaluate them now.5946// For address calculations (aladd/aiadd), evaluate them now.5947//5948bool doLoadOrStore;5949bool doLoadDecompress = false;5950bool doAddressComputation;59515952if (loadOrStoreChild->getOpCode().isStore() && loadOrStoreChild->getReferenceCount() > 1)5953{5954TR_ASSERT(loadOrStoreChild->getOpCode().isWrtBar(), "Opcode must be wrtbar");5955loadOrStoreReg = cg->evaluate(loadOrStoreChild->getFirstChild());5956cg->decReferenceCount(loadOrStoreChild->getFirstChild());5957doLoadOrStore = false;5958doAddressComputation = true;5959}5960else5961{5962// If it's a store and not commoned then it must be a primitive store.5963// If it's an address load it may need decompression in the OOL path.59645965// Top-level check whether a decompression sequence is necessary, because the first child5966// may have been created by a PRE temp.5967//5968if ((loadOrStoreChild->getOpCodeValue() == TR::aload || loadOrStoreChild->getOpCodeValue() == TR::aRegLoad)5969&& node->isSpineCheckWithArrayElementChild()5970&& comp->useCompressedPointers())5971{5972doLoadDecompress = true;5973}59745975TR::Node *actualLoadOrStoreChild = loadOrStoreChild;5976while (actualLoadOrStoreChild->getOpCode().isConversion() || actualLoadOrStoreChild->containsCompressionSequence())5977{5978if (actualLoadOrStoreChild->containsCompressionSequence())5979doLoadDecompress = true;5980actualLoadOrStoreChild = actualLoadOrStoreChild->getFirstChild();5981}59825983doLoadOrStore = actualLoadOrStoreChild->getOpCode().hasSymbolReference()5984&& (actualLoadOrStoreChild->getSymbolReference()->getSymbol()->isArrayShadowSymbol()5985|| actualLoadOrStoreChild->getSymbolReference()->getSymbol()->isArrayletShadowSymbol()) && node->isSpineCheckWithArrayElementChild();59865987// If the 1st child is not a load/store/aladd/aiadd it's probably a nop (e.g. const) at this point due to commoning5988//5989doAddressComputation = !doLoadOrStore && actualLoadOrStoreChild->getOpCode().isArrayRef() && !node->isSpineCheckWithArrayElementChild();59905991if (doLoadOrStore || doAddressComputation || !loadOrStoreChild->getOpCode().isLoadConst())5992loadOrStoreReg = cg->evaluate(loadOrStoreChild);5993else5994loadOrStoreReg = NULL;5995}59965997generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel);5998TR::LabelSymbol *doneMainlineLabel = generateLabelSymbol(cg);5999generateLabelInstruction(cg, TR::InstOpCode::label, node, doneMainlineLabel);60006001// start of OOL6002//6003discontiguousArrayOOL->swapInstructionListsWithCompilation();6004{6005TR::Instruction *OOLLabelInstr = generateLabelInstruction(cg, TR::InstOpCode::label, node, discontiguousArrayLabel);6006// XXX: Temporary fix, OOL instruction stream does not pick up live locals or monitors correctly.6007TR_ASSERT(!OOLLabelInstr->getLiveLocals() && !OOLLabelInstr->getLiveMonitors(), "Expecting first OOL instruction to not have live locals/monitors info");6008OOLLabelInstr->setLiveLocals(OOLBranchInstr->getLiveLocals());6009OOLLabelInstr->setLiveMonitors(OOLBranchInstr->getLiveMonitors());60106011if (needsBoundCheckOOL)6012{6013TR_ASSERT(needsBoundCheck, "Inconsistent state, needs bound check OOL but doesn't need bound check");60146015TR::MemoryReference *discontiguousArraySizeMR = TR::MemoryReference::createWithDisplacement(cg, baseArrayReg, fej9->getOffsetOfDiscontiguousArraySizeField());6016TR::Register *arrayLengthScratchReg = srm->findOrCreateScratchRegister();60176018generateTrg1MemInstruction(cg, TR::InstOpCode::ldrimmw, node, arrayLengthScratchReg, discontiguousArraySizeMR);60196020// Do the bound check using the discontiguous array length.6021genBoundCheck(cg, node, indexReg, indexChild->getInt(), arrayLengthScratchReg, arrayLengthChild->getInt());60226023srm->reclaimScratchRegister(arrayLengthScratchReg);6024}60256026TR_ASSERT(!(doLoadOrStore && doAddressComputation), "Unexpected condition");60276028TR::Register *arrayletReg = NULL;6029TR::DataType dt = loadOrStoreChild->getDataType();60306031if (doLoadOrStore || doAddressComputation)6032{6033arrayletReg = doAddressComputation ? loadOrStoreReg : cg->allocateRegister();60346035// Generate the base+offset address pair into the arraylet.6036//6037int32_t elementSize = (dt == TR::Address) ? TR::Compiler->om.sizeofReferenceField() : TR::Symbol::convertTypeToSize(dt);6038TR::Register *arrayletOffsetReg;6039int32_t arrayletOffsetVal;60406041if (indexReg)6042arrayletOffsetReg = srm->findOrCreateScratchRegister();60436044genArrayletAccessAddr(cg, node, elementSize, baseArrayReg, indexReg, indexChild->getInt(), arrayletReg, arrayletOffsetReg, arrayletOffsetVal);60456046// Decompress the arraylet pointer if necessary.6047//6048genDecompressPointer(cg, node, arrayletReg);60496050if (doLoadOrStore)6051{6052// Generate the load or store.6053//6054if (loadOrStoreChild->getOpCode().isStore())6055{6056TR::InstOpCode::Mnemonic storeOp = getStoreOpCodeFromDataType(cg, dt, elementSize, indexReg != NULL);60576058TR::MemoryReference *arrayletMR = indexReg ?6059TR::MemoryReference::createWithIndexReg(cg, arrayletReg, arrayletOffsetReg) :6060TR::MemoryReference::createWithDisplacement(cg, arrayletReg, arrayletOffsetVal);6061generateMemSrc1Instruction(cg, storeOp, node, arrayletMR, loadOrStoreChild->getSecondChild()->getRegister());6062}6063else6064{6065TR_ASSERT(loadOrStoreChild->getOpCode().isConversion() || loadOrStoreChild->getOpCode().isLoad(), "Unexpected op");60666067bool isUnsigned = loadOrStoreChild->getOpCode().isUnsigned();6068TR::InstOpCode::Mnemonic loadOp = getLoadOpCodeFromDataType(cg, dt, elementSize, isUnsigned, indexReg != NULL);60696070TR::MemoryReference *arrayletMR = indexReg ?6071TR::MemoryReference::createWithIndexReg(cg, arrayletReg, arrayletOffsetReg) :6072TR::MemoryReference::createWithDisplacement(cg, arrayletReg, arrayletOffsetVal);6073generateTrg1MemInstruction(cg, loadOp, node, loadOrStoreReg, arrayletMR);60746075if (doLoadDecompress)6076{6077TR_ASSERT(dt == TR::Address, "Expecting loads with decompression trees to have data type TR::Address");6078genDecompressPointer(cg, node, loadOrStoreReg);6079}6080}60816082cg->stopUsingRegister(arrayletReg);6083}6084else6085{6086if (indexReg)6087generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, loadOrStoreReg, loadOrStoreReg, arrayletOffsetReg);6088else6089addConstant32(cg, node, loadOrStoreReg, loadOrStoreReg, arrayletOffsetVal);6090}60916092if (indexReg)6093srm->reclaimScratchRegister(arrayletOffsetReg);6094}60956096const uint32_t numOOLDeps = 1 + (doLoadOrStore ? 1 : 0) + (needsBoundCheck && arrayLengthReg ? 1 : 0) + (loadOrStoreReg ? 1 : 0)6097+ (indexReg ? 1 : 0) + srm->numAvailableRegisters();6098TR::RegisterDependencyConditions *OOLDeps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, numOOLDeps, cg->trMemory());6099OOLDeps->addPostCondition(baseArrayReg, TR::RealRegister::NoReg);6100TR_ASSERT(OOLDeps->getPostConditions()->getRegisterDependency(0)->getRegister() == baseArrayReg, "Unexpected register");6101if (doLoadOrStore)6102{6103OOLDeps->addPostCondition(arrayletReg, TR::RealRegister::NoReg);6104TR_ASSERT(OOLDeps->getPostConditions()->getRegisterDependency(1)->getRegister() == arrayletReg, "Unexpected register");6105}6106if (indexReg)6107OOLDeps->addPostCondition(indexReg, TR::RealRegister::NoReg);6108if (loadOrStoreReg)6109OOLDeps->addPostCondition(loadOrStoreReg, TR::RealRegister::NoReg);6110if (needsBoundCheck && arrayLengthReg)6111OOLDeps->addPostCondition(arrayLengthReg, TR::RealRegister::NoReg);6112srm->addScratchRegistersToDependencyList(OOLDeps);61136114srm->stopUsingRegisters();61156116TR::LabelSymbol *doneOOLLabel = generateLabelSymbol(cg);6117generateLabelInstruction(cg, TR::InstOpCode::label, node, doneOOLLabel, OOLDeps);6118generateLabelInstruction(cg, TR::InstOpCode::b, node, doneLabel);6119}6120discontiguousArrayOOL->swapInstructionListsWithCompilation();6121//6122// end of OOL61236124cg->decReferenceCount(loadOrStoreChild);6125cg->decReferenceCount(baseArrayChild);6126cg->decReferenceCount(indexChild);61276128return NULL;6129}61306131TR::Register *J9::ARM64::TreeEvaluator::directCallEvaluator(TR::Node *node, TR::CodeGenerator *cg)6132{6133TR::Register *returnRegister;61346135if (!cg->inlineDirectCall(node, returnRegister))6136{6137TR::SymbolReference *symRef = node->getSymbolReference();6138TR::MethodSymbol *callee = symRef->getSymbol()->castToMethodSymbol();6139TR::Linkage *linkage;61406141if (callee->isJNI() && (node->isPreparedForDirectJNI() || callee->getResolvedMethodSymbol()->canDirectNativeCall()))6142{6143linkage = cg->getLinkage(TR_J9JNILinkage);6144}6145else6146{6147linkage = cg->getLinkage(callee->getLinkageConvention());6148}6149returnRegister = linkage->buildDirectDispatch(node);6150}61516152return returnRegister;6153}615461556156