Path: blob/master/runtime/compiler/aarch64/codegen/ARM64PrivateLinkage.cpp
6004 views
/*******************************************************************************1* Copyright (c) 2019, 2022 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/2122#include <algorithm>23#include <iterator>2425#include "codegen/ARM64Instruction.hpp"26#include "codegen/ARM64OutOfLineCodeSection.hpp"27#include "codegen/ARM64PrivateLinkage.hpp"28#include "codegen/CallSnippet.hpp"29#include "codegen/CodeGenerator.hpp"30#include "codegen/CodeGeneratorUtils.hpp"31#include "codegen/ConstantDataSnippet.hpp"32#include "codegen/GCStackAtlas.hpp"33#include "codegen/GenerateInstructions.hpp"34#include "codegen/Linkage_inlines.hpp"35#include "codegen/Machine.hpp"36#include "codegen/MemoryReference.hpp"37#include "codegen/RealRegister.hpp"38#include "codegen/Register.hpp"39#include "codegen/StackCheckFailureSnippet.hpp"40#include "compile/Compilation.hpp"41#include "env/CompilerEnv.hpp"42#include "env/J2IThunk.hpp"43#include "env/PersistentCHTable.hpp"44#include "env/StackMemoryRegion.hpp"45#include "il/Node_inlines.hpp"46#include "il/ParameterSymbol.hpp"47#include "il/ResolvedMethodSymbol.hpp"48#include "il/SymbolReference.hpp"49#include "infra/Assert.hpp"50#include "infra/List.hpp"51#include "runtime/Runtime.hpp"5253#define MIN_PROFILED_CALL_FREQUENCY (.075f)54#define MAX_PROFILED_CALL_FREQUENCY (.90f)5556uint32_t J9::ARM64::PrivateLinkage::_globalRegisterNumberToRealRegisterMap[] =57{58// GPRs59TR::RealRegister::x15,60TR::RealRegister::x14,61TR::RealRegister::x13,62TR::RealRegister::x12,63TR::RealRegister::x11,64TR::RealRegister::x10,65TR::RealRegister::x9,66TR::RealRegister::x8, // indirect result location register67TR::RealRegister::x18, // platform register68// callee-saved registers69TR::RealRegister::x28,70TR::RealRegister::x27,71TR::RealRegister::x26,72TR::RealRegister::x25,73TR::RealRegister::x24,74TR::RealRegister::x23,75TR::RealRegister::x22,76TR::RealRegister::x21,77// parameter registers78TR::RealRegister::x7,79TR::RealRegister::x6,80TR::RealRegister::x5,81TR::RealRegister::x4,82TR::RealRegister::x3,83TR::RealRegister::x2,84TR::RealRegister::x1,85TR::RealRegister::x0,8687// FPRs88TR::RealRegister::v31,89TR::RealRegister::v30,90TR::RealRegister::v29,91TR::RealRegister::v28,92TR::RealRegister::v27,93TR::RealRegister::v26,94TR::RealRegister::v25,95TR::RealRegister::v24,96TR::RealRegister::v23,97TR::RealRegister::v22,98TR::RealRegister::v21,99TR::RealRegister::v20,100TR::RealRegister::v19,101TR::RealRegister::v18,102TR::RealRegister::v17,103TR::RealRegister::v16,104// callee-saved registers105TR::RealRegister::v15,106TR::RealRegister::v14,107TR::RealRegister::v13,108TR::RealRegister::v12,109TR::RealRegister::v11,110TR::RealRegister::v10,111TR::RealRegister::v9,112TR::RealRegister::v8,113// parameter registers114TR::RealRegister::v7,115TR::RealRegister::v6,116TR::RealRegister::v5,117TR::RealRegister::v4,118TR::RealRegister::v3,119TR::RealRegister::v2,120TR::RealRegister::v1,121TR::RealRegister::v0122};123124J9::ARM64::PrivateLinkage::PrivateLinkage(TR::CodeGenerator *cg)125: J9::PrivateLinkage(cg),126_interpretedMethodEntryPoint(NULL),127_jittedMethodEntryPoint(NULL)128{129int32_t i;130131_properties._properties = 0;132133_properties._registerFlags[TR::RealRegister::NoReg] = 0;134_properties._registerFlags[TR::RealRegister::x0] = IntegerArgument|IntegerReturn;135_properties._registerFlags[TR::RealRegister::x1] = IntegerArgument;136_properties._registerFlags[TR::RealRegister::x2] = IntegerArgument;137_properties._registerFlags[TR::RealRegister::x3] = IntegerArgument;138_properties._registerFlags[TR::RealRegister::x4] = IntegerArgument;139_properties._registerFlags[TR::RealRegister::x5] = IntegerArgument;140_properties._registerFlags[TR::RealRegister::x6] = IntegerArgument;141_properties._registerFlags[TR::RealRegister::x7] = IntegerArgument;142143for (i = TR::RealRegister::x8; i <= TR::RealRegister::x15; i++)144_properties._registerFlags[i] = 0; // x8 - x15 volatile145146_properties._registerFlags[TR::RealRegister::x16] = ARM64_Reserved; // IP0147_properties._registerFlags[TR::RealRegister::x17] = ARM64_Reserved; // IP1148149_properties._registerFlags[TR::RealRegister::x18] = 0;150151_properties._registerFlags[TR::RealRegister::x19] = Preserved|ARM64_Reserved; // vmThread152_properties._registerFlags[TR::RealRegister::x20] = Preserved|ARM64_Reserved; // Java SP153154for (i = TR::RealRegister::x21; i <= TR::RealRegister::x28; i++)155_properties._registerFlags[i] = Preserved; // x21 - x28 Preserved156157_properties._registerFlags[TR::RealRegister::x29] = ARM64_Reserved; // FP158_properties._registerFlags[TR::RealRegister::lr] = ARM64_Reserved; // LR159_properties._registerFlags[TR::RealRegister::sp] = ARM64_Reserved;160_properties._registerFlags[TR::RealRegister::xzr] = ARM64_Reserved;161162_properties._registerFlags[TR::RealRegister::v0] = FloatArgument|FloatReturn;163_properties._registerFlags[TR::RealRegister::v1] = FloatArgument;164_properties._registerFlags[TR::RealRegister::v2] = FloatArgument;165_properties._registerFlags[TR::RealRegister::v3] = FloatArgument;166_properties._registerFlags[TR::RealRegister::v4] = FloatArgument;167_properties._registerFlags[TR::RealRegister::v5] = FloatArgument;168_properties._registerFlags[TR::RealRegister::v6] = FloatArgument;169_properties._registerFlags[TR::RealRegister::v7] = FloatArgument;170171for (i = TR::RealRegister::v8; i <= TR::RealRegister::LastFPR; i++)172_properties._registerFlags[i] = 0; // v8 - v31 volatile173174_properties._numIntegerArgumentRegisters = 8;175_properties._firstIntegerArgumentRegister = 0;176_properties._numFloatArgumentRegisters = 8;177_properties._firstFloatArgumentRegister = 8;178179_properties._argumentRegisters[0] = TR::RealRegister::x0;180_properties._argumentRegisters[1] = TR::RealRegister::x1;181_properties._argumentRegisters[2] = TR::RealRegister::x2;182_properties._argumentRegisters[3] = TR::RealRegister::x3;183_properties._argumentRegisters[4] = TR::RealRegister::x4;184_properties._argumentRegisters[5] = TR::RealRegister::x5;185_properties._argumentRegisters[6] = TR::RealRegister::x6;186_properties._argumentRegisters[7] = TR::RealRegister::x7;187_properties._argumentRegisters[8] = TR::RealRegister::v0;188_properties._argumentRegisters[9] = TR::RealRegister::v1;189_properties._argumentRegisters[10] = TR::RealRegister::v2;190_properties._argumentRegisters[11] = TR::RealRegister::v3;191_properties._argumentRegisters[12] = TR::RealRegister::v4;192_properties._argumentRegisters[13] = TR::RealRegister::v5;193_properties._argumentRegisters[14] = TR::RealRegister::v6;194_properties._argumentRegisters[15] = TR::RealRegister::v7;195196std::copy(std::begin(_globalRegisterNumberToRealRegisterMap), std::end(_globalRegisterNumberToRealRegisterMap), std::begin(_properties._allocationOrder));197198_properties._firstIntegerReturnRegister = 0;199_properties._firstFloatReturnRegister = 1;200201_properties._returnRegisters[0] = TR::RealRegister::x0;202_properties._returnRegisters[1] = TR::RealRegister::v0;203204_properties._numAllocatableIntegerRegisters = 25;205_properties._numAllocatableFloatRegisters = 32;206207_properties._preservedRegisterMapForGC = 0x1fe40000;208_properties._methodMetaDataRegister = TR::RealRegister::x19;209_properties._stackPointerRegister = TR::RealRegister::x20;210_properties._framePointerRegister = TR::RealRegister::x29;211_properties._computedCallTargetRegister = TR::RealRegister::x8;212_properties._vtableIndexArgumentRegister = TR::RealRegister::x9;213_properties._j9methodArgumentRegister = TR::RealRegister::x0;214215// Volatile GPR (0-15, 18) + FPR (0-31) + VFT Reg216_properties._numberOfDependencyGPRegisters = 17 + 32 + 1;217setOffsetToFirstParm(0);218_properties._offsetToFirstLocal = -8;219}220221TR::ARM64LinkageProperties& J9::ARM64::PrivateLinkage::getProperties()222{223return _properties;224}225226uint32_t J9::ARM64::PrivateLinkage::getRightToLeft()227{228return getProperties().getRightToLeft();229}230231intptr_t232J9::ARM64::PrivateLinkage::entryPointFromCompiledMethod()233{234return reinterpret_cast<intptr_t>(getJittedMethodEntryPoint()->getBinaryEncoding());235}236237intptr_t238J9::ARM64::PrivateLinkage::entryPointFromInterpretedMethod()239{240return reinterpret_cast<intptr_t>(getInterpretedMethodEntryPoint()->getBinaryEncoding());241}242243void J9::ARM64::PrivateLinkage::alignLocalReferences(uint32_t &stackIndex)244{245TR::Compilation *comp = self()->comp();246TR::GCStackAtlas *atlas = self()->cg()->getStackAtlas();247const int32_t localObjectAlignment = TR::Compiler->om.getObjectAlignmentInBytes();248const uint8_t pointerSize = TR::Compiler->om.sizeofReferenceAddress();249250if (comp->useCompressedPointers())251{252if (comp->getOption(TR_TraceCG))253{254traceMsg(comp,"\nLOCAL OBJECT ALIGNMENT: stack offset before alignment: %d,", stackIndex);255}256257// stackIndex in mapCompactedStack is calculated using only local reference sizes and does not include the padding258stackIndex -= pointerSize * atlas->getNumberOfPaddingSlots();259260if (comp->getOption(TR_TraceCG))261{262traceMsg(comp," with padding: %d,", stackIndex);263}264// If there are any local objects we have to make sure they are aligned properly265// when compressed pointers are used. Otherwise, pointer compression may clobber266// part of the pointer.267//268// Each auto's GC index will have already been aligned, so just the starting stack269// offset needs to be aligned.270//271uint32_t unalignedStackIndex = stackIndex;272stackIndex &= ~(localObjectAlignment - 1);273uint32_t paddingBytes = unalignedStackIndex - stackIndex;274if (paddingBytes > 0)275{276TR_ASSERT_FATAL((paddingBytes & (pointerSize - 1)) == 0, "Padding bytes should be a multiple of the slot/pointer size");277uint32_t paddingSlots = paddingBytes / pointerSize;278atlas->setNumberOfSlotsMapped(atlas->getNumberOfSlotsMapped() + paddingSlots);279}280}281}282283void J9::ARM64::PrivateLinkage::mapStack(TR::ResolvedMethodSymbol *method)284{285if (self()->cg()->getLocalsIG() && self()->cg()->getSupportsCompactedLocals())286{287mapCompactedStack(method);288return;289}290291const TR::ARM64LinkageProperties& linkageProperties = getProperties();292int32_t firstLocalOffset = linkageProperties.getOffsetToFirstLocal();293uint32_t stackIndex = firstLocalOffset;294int32_t lowGCOffset = stackIndex;295296TR::GCStackAtlas *atlas = cg()->getStackAtlas();297298// Map all garbage collected references together so can concisely represent299// stack maps. They must be mapped so that the GC map index in each local300// symbol is honoured.301//302uint32_t numberOfLocalSlotsMapped = atlas->getNumberOfSlotsMapped() - atlas->getNumberOfParmSlotsMapped();303304stackIndex -= numberOfLocalSlotsMapped * TR::Compiler->om.sizeofReferenceAddress();305306if (comp()->useCompressedPointers())307{308// If there are any local objects we have to make sure they are aligned properly309// when compressed pointers are used. Otherwise, pointer compression may clobber310// part of the pointer.311//312// Each auto's GC index will have already been aligned, so just the starting stack313// offset needs to be aligned.314//315uint32_t unalignedStackIndex = stackIndex;316stackIndex &= ~(TR::Compiler->om.getObjectAlignmentInBytes() - 1);317uint32_t paddingBytes = unalignedStackIndex - stackIndex;318if (paddingBytes > 0)319{320TR_ASSERT((paddingBytes & (TR::Compiler->om.sizeofReferenceAddress() - 1)) == 0, "Padding bytes should be a multiple of the slot/pointer size");321uint32_t paddingSlots = paddingBytes / TR::Compiler->om.sizeofReferenceAddress();322atlas->setNumberOfSlotsMapped(atlas->getNumberOfSlotsMapped() + paddingSlots);323}324}325326ListIterator<TR::AutomaticSymbol> automaticIterator(&method->getAutomaticList());327TR::AutomaticSymbol *localCursor;328int32_t firstLocalGCIndex = atlas->getNumberOfParmSlotsMapped();329330// Map local references to set the stack position correct according to the GC map index331//332for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())333{334if (localCursor->getGCMapIndex() >= 0)335{336localCursor->setOffset(stackIndex + TR::Compiler->om.sizeofReferenceAddress() * (localCursor->getGCMapIndex() - firstLocalGCIndex));337if (localCursor->getGCMapIndex() == atlas->getIndexOfFirstInternalPointer())338{339atlas->setOffsetOfFirstInternalPointer(localCursor->getOffset() - firstLocalOffset);340}341}342}343344method->setObjectTempSlots((lowGCOffset - stackIndex) / TR::Compiler->om.sizeofReferenceAddress());345lowGCOffset = stackIndex;346347// Now map the rest of the locals348//349automaticIterator.reset();350localCursor = automaticIterator.getFirst();351352while (localCursor != NULL)353{354if (localCursor->getGCMapIndex() < 0 &&355localCursor->getSize() != 8)356{357mapSingleAutomatic(localCursor, stackIndex);358}359360localCursor = automaticIterator.getNext();361}362363automaticIterator.reset();364localCursor = automaticIterator.getFirst();365366while (localCursor != NULL)367{368if (localCursor->getGCMapIndex() < 0 &&369localCursor->getSize() == 8)370{371stackIndex -= (stackIndex & 0x4)?4:0;372mapSingleAutomatic(localCursor, stackIndex);373}374375localCursor = automaticIterator.getNext();376}377378method->setLocalMappingCursor(stackIndex);379380mapIncomingParms(method);381382atlas->setLocalBaseOffset(lowGCOffset - firstLocalOffset);383atlas->setParmBaseOffset(atlas->getParmBaseOffset() + getOffsetToFirstParm() - firstLocalOffset);384}385386void J9::ARM64::PrivateLinkage::mapSingleAutomatic(TR::AutomaticSymbol *p, uint32_t &stackIndex)387{388mapSingleAutomatic(p, p->getRoundedSize(), stackIndex);389}390391void J9::ARM64::PrivateLinkage::mapSingleAutomatic(TR::AutomaticSymbol *p, uint32_t size, uint32_t &stackIndex)392{393/*394* Align stack-allocated objects that don't have GC map index > 0.395*/396if (comp()->useCompressedPointers() && p->isLocalObject() && (p->getGCMapIndex() == -1))397{398int32_t roundup = TR::Compiler->om.getObjectAlignmentInBytes() - 1;399400size = (size + roundup) & (~roundup);401}402403p->setOffset(stackIndex -= size);404}405406static void lockRegister(TR::RealRegister *regToAssign)407{408regToAssign->setState(TR::RealRegister::Locked);409regToAssign->setAssignedRegister(regToAssign);410}411412void J9::ARM64::PrivateLinkage::initARM64RealRegisterLinkage()413{414TR::Machine *machine = cg()->machine();415TR::RealRegister *reg;416int icount;417418reg = machine->getRealRegister(TR::RealRegister::RegNum::x16); // IP0419lockRegister(reg);420421reg = machine->getRealRegister(TR::RealRegister::RegNum::x17); // IP1422lockRegister(reg);423424reg = machine->getRealRegister(TR::RealRegister::RegNum::x19); // vmThread425lockRegister(reg);426427reg = machine->getRealRegister(TR::RealRegister::RegNum::x20); // Java SP428lockRegister(reg);429430reg = machine->getRealRegister(TR::RealRegister::RegNum::x29); // FP431lockRegister(reg);432433reg = machine->getRealRegister(TR::RealRegister::RegNum::lr); // LR434lockRegister(reg);435436reg = machine->getRealRegister(TR::RealRegister::RegNum::sp); // SP437lockRegister(reg);438439// assign "maximum" weight to registers x0-x15440for (icount = TR::RealRegister::x0; icount <= TR::RealRegister::x15; icount++)441machine->getRealRegister((TR::RealRegister::RegNum)icount)->setWeight(0xf000);442443// assign "maximum" weight to registers x21-x28444for (icount = TR::RealRegister::x21; icount <= TR::RealRegister::x28; icount++)445machine->getRealRegister((TR::RealRegister::RegNum)icount)->setWeight(0xf000);446447// assign "maximum" weight to registers v0-v31448for (icount = TR::RealRegister::v0; icount <= TR::RealRegister::v31; icount++)449machine->getRealRegister((TR::RealRegister::RegNum)icount)->setWeight(0xf000);450}451452453void454J9::ARM64::PrivateLinkage::setParameterLinkageRegisterIndex(TR::ResolvedMethodSymbol *method)455{456ListIterator<TR::ParameterSymbol> paramIterator(&(method->getParameterList()));457TR::ParameterSymbol *paramCursor = paramIterator.getFirst();458int32_t numIntArgs = 0, numFloatArgs = 0;459const TR::ARM64LinkageProperties& properties = getProperties();460461while ( (paramCursor!=NULL) &&462( (numIntArgs < properties.getNumIntArgRegs()) ||463(numFloatArgs < properties.getNumFloatArgRegs()) ) )464{465int32_t index = -1;466467switch (paramCursor->getDataType())468{469case TR::Int8:470case TR::Int16:471case TR::Int32:472case TR::Int64:473case TR::Address:474if (numIntArgs < properties.getNumIntArgRegs())475{476index = numIntArgs;477}478numIntArgs++;479break;480481case TR::Float:482case TR::Double:483if (numFloatArgs < properties.getNumFloatArgRegs())484{485index = numFloatArgs;486}487numFloatArgs++;488break;489}490491paramCursor->setLinkageRegisterIndex(index);492paramCursor = paramIterator.getNext();493}494}495496497int32_t498J9::ARM64::PrivateLinkage::calculatePreservedRegisterSaveSize(499uint32_t ®isterSaveDescription,500uint32_t &numGPRsSaved)501{502TR::Machine *machine = cg()->machine();503504TR::RealRegister::RegNum firstPreservedGPR = TR::RealRegister::x21;505TR::RealRegister::RegNum lastPreservedGPR = TR::RealRegister::x28;506507// Create a bit vector of preserved registers that have been modified508// in this method.509//510for (int32_t i = firstPreservedGPR; i <= lastPreservedGPR; i++)511{512if (machine->getRealRegister((TR::RealRegister::RegNum)i)->getHasBeenAssignedInMethod())513{514registerSaveDescription |= 1 << (i-1);515numGPRsSaved++;516}517}518519return numGPRsSaved*8;520}521522/**523* @brief Generates instructions for initializing local variable and internal pointer slots in prologue524*525* @param[in] cursor : instruction cursor526* @param[in] numSlotsToBeInitialized : number of slots to be initialized527* @param[in] offsetToFirstSlotFromAdjustedSP : offset to first slot from adjusted Java SP528* @param[in] zeroReg : zero register (x31)529* @param[in] baseReg : base register (x10)530* @param[in] javaSP : Java SP register (x20)531* @param[in] cg : Code Generator532*533* @return instruction cursor534*/535static TR::Instruction* initializeLocals(TR::Instruction *cursor, uint32_t numSlotsToBeInitialized, int32_t offsetToFirstSlotFromAdjustedSP,536TR::RealRegister *zeroReg, TR::RealRegister *baseReg, TR::RealRegister *javaSP, TR::CodeGenerator *cg)537{538auto loopCount = numSlotsToBeInitialized / 2;539// stp instruction has 7bit immediate offset which is scaled by 8 for 64bit registers.540// If the offset to the last 2 slots cleared by stp instruction does not fit in imm7,541// we use x10 as base register.542const bool isImm7OffsetOverflow = (loopCount > 0) &&543!constantIsImm7((offsetToFirstSlotFromAdjustedSP + (loopCount - 1) * 2 * TR::Compiler->om.sizeofReferenceAddress()) >> 3);544545auto offset = offsetToFirstSlotFromAdjustedSP;546if (isImm7OffsetOverflow)547{548if (!constantIsImm7(offset >> 3))549{550// If offset does not fit in imm7, update baseReg and reset offset to 0551if (constantIsUnsignedImm12(offset))552{553cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, NULL, baseReg, javaSP, offset, cursor);554}555else556{557cursor = loadConstant32(cg, NULL, offset, baseReg, cursor);558cursor = generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, NULL, baseReg, javaSP, baseReg, cursor);559}560offset = 0;561}562else563{564// mov baseReg, javaSP565cursor = generateTrg1Src2Instruction(cg, TR::InstOpCode::orrx, NULL, baseReg, zeroReg, javaSP, cursor);566}567568569for (int32_t i = 0; i < loopCount; i++)570{571if (!constantIsImm7(offset >> 3))572{573// If offset does not fit in imm7, update baseReg and reset offset to 0574cursor = generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, NULL, baseReg, baseReg, offset, cursor);575offset = 0;576}577TR::MemoryReference *localMR = TR::MemoryReference::createWithDisplacement(cg, baseReg, offset);578cursor = generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, NULL, localMR, zeroReg, zeroReg, cursor);579offset += (TR::Compiler->om.sizeofReferenceAddress() * 2);580}581if (numSlotsToBeInitialized % 2)582{583// clear residue584TR::MemoryReference *localMR = TR::MemoryReference::createWithDisplacement(cg, baseReg, offset);585cursor = generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, NULL, localMR, zeroReg, cursor);586}587}588else589{590for (int32_t i = 0; i < loopCount; i++, offset += (TR::Compiler->om.sizeofReferenceAddress() * 2))591{592TR::MemoryReference *localMR = TR::MemoryReference::createWithDisplacement(cg, javaSP, offset);593cursor = generateMemSrc2Instruction(cg, TR::InstOpCode::stpoffx, NULL, localMR, zeroReg, zeroReg, cursor);594}595if (numSlotsToBeInitialized % 2)596{597// clear residue598TR::MemoryReference *localMR = TR::MemoryReference::createWithDisplacement(cg, javaSP, offset);599cursor = generateMemSrc1Instruction(cg, TR::InstOpCode::strimmx, NULL, localMR, zeroReg, cursor);600}601}602603return cursor;604}605606void J9::ARM64::PrivateLinkage::createPrologue(TR::Instruction *cursor)607{608609// Prologues are emitted post-RA so it is fine to use real registers directly610// in instructions611//612TR::ARM64LinkageProperties& properties = getProperties();613TR::Machine *machine = cg()->machine();614TR::RealRegister *vmThread = machine->getRealRegister(properties.getMethodMetaDataRegister()); // x19615TR::RealRegister *javaSP = machine->getRealRegister(properties.getStackPointerRegister()); // x20616617TR::Instruction *beforeInterpreterMethodEntryPointInstruction = cursor;618619// --------------------------------------------------------------------------620// Create the entry point when transitioning from an interpreted method.621// Parameters are passed on the stack, so load them into the appropriate622// linkage registers expected by the JITed method entry point.623//624cursor = loadStackParametersToLinkageRegisters(cursor);625626TR::Instruction *beforeJittedMethodEntryPointInstruction = cursor;627628// Entry breakpoint629//630if (comp()->getOption(TR_EntryBreakPoints))631{632cursor = generateExceptionInstruction(cg(), TR::InstOpCode::brkarm64, NULL, 0, cursor);633}634635// --------------------------------------------------------------------------636// Determine the bitvector of registers to preserve in the prologue637//638uint32_t registerSaveDescription = 0;639uint32_t numGPRsSaved = 0;640641uint32_t preservedRegisterSaveSize = calculatePreservedRegisterSaveSize(registerSaveDescription, numGPRsSaved);642643// Offset between the entry JavaSP of a method and the first mapped local. This covers644// the space needed to preserve the RA. It is a negative (or zero) offset.645//646int32_t firstLocalOffset = properties.getOffsetToFirstLocal();647648// The localMappingCursor is a negative-offset mapping of locals (autos and spills) to649// the stack relative to the entry JavaSP of a method. It includes the offset to the650// first mapped local.651//652TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol();653int32_t localsSize = -(int32_t)(bodySymbol->getLocalMappingCursor());654655// Size of the frame needed to handle the argument storage requirements of any method656// call in the current method.657//658// The offset to the first parm is the offset between the entry JavaSP and the first659// mapped parameter. It is a positive (or zero) offset.660//661int32_t outgoingArgsSize = cg()->getLargestOutgoingArgSize() + getOffsetToFirstParm();662663int32_t frameSizeIncludingReturnAddress = preservedRegisterSaveSize + localsSize + outgoingArgsSize;664665// Align the frame to 16 bytes666//667int32_t alignedFrameSizeIncludingReturnAddress = (frameSizeIncludingReturnAddress + 15) & ~15;668669// The frame size maintained by the code generator does not include the RA670//671cg()->setFrameSizeInBytes(alignedFrameSizeIncludingReturnAddress + firstLocalOffset);672673// --------------------------------------------------------------------------674// Encode register save description (RSD)675//676int32_t preservedRegisterOffsetFromJavaBP = (alignedFrameSizeIncludingReturnAddress - outgoingArgsSize + firstLocalOffset);677678TR_ASSERT_FATAL(preservedRegisterOffsetFromJavaBP >= 0, "expecting a positive preserved register area offset");679680// Frame size is too large for the RSD word in the metadata681//682if (preservedRegisterOffsetFromJavaBP > 0xffff)683{684comp()->failCompilation<TR::CompilationInterrupted>("Overflowed or underflowed bounds of regSaveOffset in calculateFrameSize.");685}686687registerSaveDescription |= (preservedRegisterOffsetFromJavaBP & 0xffff);688689cg()->setRegisterSaveDescription(registerSaveDescription);690691// In FSD, we must save linkage regs to the incoming argument area because692// the stack overflow check doesn't preserve them.693bool parmsHaveBeenStored = false;694if (comp()->getOption(TR_FullSpeedDebug))695{696cursor = saveParametersToStack(cursor);697parmsHaveBeenStored = true;698}699700// --------------------------------------------------------------------------701// Store return address (RA)702//703TR::MemoryReference *returnAddressMR = TR::MemoryReference::createWithDisplacement(cg(), javaSP, firstLocalOffset);704cursor = generateMemSrc1Instruction(cg(), TR::InstOpCode::sturx, NULL, returnAddressMR, machine->getRealRegister(TR::RealRegister::lr), cursor);705706// --------------------------------------------------------------------------707// Speculatively adjust Java SP with the needed frame size.708// This includes the preserved RA slot.709//710if (constantIsUnsignedImm12(alignedFrameSizeIncludingReturnAddress))711{712cursor = generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::subimmx, NULL, javaSP, javaSP, alignedFrameSizeIncludingReturnAddress, cursor);713}714else715{716TR::RealRegister *x9Reg = machine->getRealRegister(TR::RealRegister::RegNum::x9);717718if (constantIsUnsignedImm16(alignedFrameSizeIncludingReturnAddress))719{720// x9 will contain the aligned frame size721//722cursor = loadConstant32(cg(), NULL, alignedFrameSizeIncludingReturnAddress, x9Reg, cursor);723cursor = generateTrg1Src2Instruction(cg(), TR::InstOpCode::subx, NULL, javaSP, javaSP, x9Reg, cursor);724}725else726{727TR_ASSERT_FATAL(0, "Large frame size not supported in prologue yet");728}729}730731// --------------------------------------------------------------------------732// Perform javaSP overflow check733//734if (!comp()->isDLT())735{736// if (javaSP < vmThread->SOM)737// goto stackOverflowSnippetLabel738//739// stackOverflowRestartLabel:740//741TR::MemoryReference *somMR = TR::MemoryReference::createWithDisplacement(cg(), vmThread, cg()->getStackLimitOffset());742TR::RealRegister *somReg = machine->getRealRegister(TR::RealRegister::RegNum::x10);743cursor = generateTrg1MemInstruction(cg(), TR::InstOpCode::ldrimmx, NULL, somReg, somMR, cursor);744745TR::RealRegister *zeroReg = machine->getRealRegister(TR::RealRegister::xzr);746cursor = generateTrg1Src2Instruction(cg(), TR::InstOpCode::subsx, NULL, zeroReg, javaSP, somReg, cursor);747748TR::LabelSymbol *stackOverflowSnippetLabel = generateLabelSymbol(cg());749cursor = generateConditionalBranchInstruction(cg(), TR::InstOpCode::b_cond, NULL, stackOverflowSnippetLabel, TR::CC_LS, cursor);750751TR::LabelSymbol *stackOverflowRestartLabel = generateLabelSymbol(cg());752cursor = generateLabelInstruction(cg(), TR::InstOpCode::label, NULL, stackOverflowRestartLabel, cursor);753754cg()->addSnippet(new (cg()->trHeapMemory()) TR::ARM64StackCheckFailureSnippet(cg(), NULL, stackOverflowRestartLabel, stackOverflowSnippetLabel));755}756else757{758// If StackCheckFailureSnippet is not added to the end of the snippet list and no data snippets exist,759// we might have a HelperCallSnippet at the end of the method.760// HelperCallSnippets add a GCMap to the instruction next to the `bl` instruction to the helper,761// and if a HelperCallSnippet is at the end of the method, GCMap is added to the address beyond the range of the method.762// To avoid that, we add a dummy ConstantDataSnippet. (Data snippets are emitted after normal snippets.)763if (!cg()->hasDataSnippets())764{765auto snippet = cg()->findOrCreate4ByteConstant(NULL, 0);766snippet->setReloType(TR_NoRelocation);767}768}769770// --------------------------------------------------------------------------771// Preserve GPRs772//773// javaSP has been adjusted, so preservedRegs start at offset outgoingArgSize774// relative to the javaSP775//776// Registers are preserved in order from x21 (low memory) -> x28 (high memory)777//778if (numGPRsSaved)779{780TR::RealRegister::RegNum firstPreservedGPR = TR::RealRegister::x21;781TR::RealRegister::RegNum lastPreservedGPR = TR::RealRegister::x28;782783int32_t preservedRegisterOffsetFromJavaSP = outgoingArgsSize;784785for (TR::RealRegister::RegNum regIndex = firstPreservedGPR; regIndex <= lastPreservedGPR; regIndex=(TR::RealRegister::RegNum)((uint32_t)regIndex+1))786{787TR::RealRegister *preservedRealReg = machine->getRealRegister(regIndex);788if (preservedRealReg->getHasBeenAssignedInMethod())789{790TR::MemoryReference *preservedRegMR = TR::MemoryReference::createWithDisplacement(cg(), javaSP, preservedRegisterOffsetFromJavaSP);791cursor = generateMemSrc1Instruction(cg(), TR::InstOpCode::strimmx, NULL, preservedRegMR, preservedRealReg, cursor);792preservedRegisterOffsetFromJavaSP += 8;793numGPRsSaved--;794}795}796797TR_ASSERT_FATAL(numGPRsSaved == 0, "preserved register mismatch in prologue");798}799800// --------------------------------------------------------------------------801// Initialize locals802//803TR::GCStackAtlas *atlas = cg()->getStackAtlas();804if (atlas)805{806// The GC stack maps are conservative in that they all say that807// collectable locals are live. This means that these locals must be808// cleared out in case a GC happens before they are allocated a valid809// value.810// The atlas contains the number of locals that need to be cleared. They811// are all mapped together starting at GC index 0.812//813uint32_t numLocalsToBeInitialized = atlas->getNumberOfSlotsToBeInitialized();814if (numLocalsToBeInitialized > 0 || atlas->getInternalPointerMap())815{816// The LocalBaseOffset and firstLocalOffset are either negative or zero values817//818int32_t initializedLocalsOffsetFromAdjustedJavaSP = alignedFrameSizeIncludingReturnAddress + atlas->getLocalBaseOffset() + firstLocalOffset;819820TR::RealRegister *zeroReg = machine->getRealRegister(TR::RealRegister::RegNum::xzr);821TR::RealRegister *baseReg = machine->getRealRegister(TR::RealRegister::RegNum::x10);822823cursor = initializeLocals(cursor, numLocalsToBeInitialized, initializedLocalsOffsetFromAdjustedJavaSP,824zeroReg, baseReg, javaSP, cg());825826if (atlas->getInternalPointerMap())827{828// Total number of slots to be initialized is number of pinning arrays +829// number of derived internal pointer stack slots830//831int32_t numSlotsToBeInitialized = atlas->getNumberOfDistinctPinningArrays() + atlas->getInternalPointerMap()->getNumInternalPointers();832int32_t offsetToFirstInternalPointerFromAdjustedJavaSP = alignedFrameSizeIncludingReturnAddress + atlas->getOffsetOfFirstInternalPointer() + firstLocalOffset;833834cursor = initializeLocals(cursor, numSlotsToBeInitialized, offsetToFirstInternalPointerFromAdjustedJavaSP,835zeroReg, baseReg, javaSP, cg());836}837}838}839840// Adjust final offsets on locals and parm symbols now that the frame size is known.841// These offsets are relative to the javaSP which has been adjusted downward to842// accommodate the frame of this method.843//844ListIterator<TR::AutomaticSymbol> automaticIterator(&bodySymbol->getAutomaticList());845TR::AutomaticSymbol *localCursor = automaticIterator.getFirst();846847while (localCursor != NULL)848{849localCursor->setOffset(localCursor->getOffset() + alignedFrameSizeIncludingReturnAddress);850localCursor = automaticIterator.getNext();851}852853ListIterator<TR::ParameterSymbol> parameterIterator(&bodySymbol->getParameterList());854TR::ParameterSymbol *parmCursor = parameterIterator.getFirst();855while (parmCursor != NULL)856{857parmCursor->setParameterOffset(parmCursor->getParameterOffset() + alignedFrameSizeIncludingReturnAddress);858parmCursor = parameterIterator.getNext();859}860861// Ensure arguments reside where the method body expects them to be (either in registers or862// on the stack). This state is influenced by global register assignment.863//864cursor = copyParametersToHomeLocation(cursor, parmsHaveBeenStored);865866// Set the instructions for method entry points867setInterpretedMethodEntryPoint(beforeInterpreterMethodEntryPointInstruction->getNext());868setJittedMethodEntryPoint(beforeJittedMethodEntryPointInstruction->getNext());869}870871void J9::ARM64::PrivateLinkage::createEpilogue(TR::Instruction *cursor)872{873const TR::ARM64LinkageProperties& properties = getProperties();874TR::Machine *machine = cg()->machine();875TR::Node *lastNode = cursor->getNode();876TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol();877TR::RealRegister *javaSP = machine->getRealRegister(properties.getStackPointerRegister()); // x20878879// restore preserved GPRs880int32_t preservedRegisterOffsetFromJavaSP = cg()->getLargestOutgoingArgSize() + getOffsetToFirstParm(); // outgoingArgsSize881TR::RealRegister::RegNum firstPreservedGPR = TR::RealRegister::x21;882TR::RealRegister::RegNum lastPreservedGPR = TR::RealRegister::x28;883for (TR::RealRegister::RegNum r = firstPreservedGPR; r <= lastPreservedGPR; r = (TR::RealRegister::RegNum)((uint32_t)r+1))884{885TR::RealRegister *rr = machine->getRealRegister(r);886if (rr->getHasBeenAssignedInMethod())887{888TR::MemoryReference *preservedRegMR = TR::MemoryReference::createWithDisplacement(cg(), javaSP, preservedRegisterOffsetFromJavaSP);889cursor = generateTrg1MemInstruction(cg(), TR::InstOpCode::ldrimmx, lastNode, rr, preservedRegMR, cursor);890preservedRegisterOffsetFromJavaSP += 8;891}892}893894// remove space for preserved registers895int32_t firstLocalOffset = properties.getOffsetToFirstLocal();896897uint32_t alignedFrameSizeIncludingReturnAddress = cg()->getFrameSizeInBytes() - firstLocalOffset;898if (constantIsUnsignedImm12(alignedFrameSizeIncludingReturnAddress))899{900cursor = generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::addimmx, lastNode, javaSP, javaSP, alignedFrameSizeIncludingReturnAddress, cursor);901}902else903{904TR::RealRegister *x9Reg = machine->getRealRegister(TR::RealRegister::RegNum::x9);905cursor = loadConstant32(cg(), lastNode, alignedFrameSizeIncludingReturnAddress, x9Reg, cursor);906cursor = generateTrg1Src2Instruction(cg(), TR::InstOpCode::addx, lastNode, javaSP, javaSP, x9Reg, cursor);907}908909// restore return address910TR::RealRegister *lr = machine->getRealRegister(TR::RealRegister::lr);911if (machine->getLinkRegisterKilled())912{913TR::MemoryReference *returnAddressMR = TR::MemoryReference::createWithDisplacement(cg(), javaSP, firstLocalOffset);914cursor = generateTrg1MemInstruction(cg(), TR::InstOpCode::ldurx, lastNode, lr, returnAddressMR, cursor);915}916917// return918generateRegBranchInstruction(cg(), TR::InstOpCode::ret, lastNode, lr, cursor);919}920921void J9::ARM64::PrivateLinkage::pushOutgoingMemArgument(TR::Register *argReg, int32_t offset, TR::InstOpCode::Mnemonic opCode, TR::ARM64MemoryArgument &memArg)922{923const TR::ARM64LinkageProperties& properties = self()->getProperties();924TR::RealRegister *javaSP = cg()->machine()->getRealRegister(properties.getStackPointerRegister()); // x20925926TR::MemoryReference *result = TR::MemoryReference::createWithDisplacement(cg(), javaSP, offset);927memArg.argRegister = argReg;928memArg.argMemory = result;929memArg.opCode = opCode;930}931932int32_t J9::ARM64::PrivateLinkage::buildArgs(TR::Node *callNode,933TR::RegisterDependencyConditions *dependencies)934{935return buildPrivateLinkageArgs(callNode, dependencies, TR_Private);936}937938int32_t J9::ARM64::PrivateLinkage::buildPrivateLinkageArgs(TR::Node *callNode,939TR::RegisterDependencyConditions *dependencies,940TR_LinkageConventions linkage)941{942TR_ASSERT(linkage == TR_Private || linkage == TR_Helper || linkage == TR_CHelper, "Unexpected linkage convention");943944const TR::ARM64LinkageProperties& properties = getProperties();945TR::ARM64MemoryArgument *pushToMemory = NULL;946TR::Register *tempReg;947int32_t argIndex = 0;948int32_t numMemArgs = 0;949int32_t memArgSize = 0;950int32_t firstExplicitArg = 0;951int32_t from, to, step;952int32_t argSize = -getOffsetToFirstParm();953int32_t totalSize = 0;954int32_t multiplier;955956uint32_t numIntegerArgs = 0;957uint32_t numFloatArgs = 0;958959TR::Node *child;960TR::DataType childType;961TR::DataType resType = callNode->getType();962963uint32_t firstArgumentChild = callNode->getFirstArgumentIndex();964965TR::MethodSymbol *callSymbol = callNode->getSymbol()->castToMethodSymbol();966967bool isHelperCall = linkage == TR_Helper || linkage == TR_CHelper;968bool rightToLeft = isHelperCall &&969//we want the arguments for induceOSR to be passed from left to right as in any other non-helper call970!callNode->getSymbolReference()->isOSRInductionHelper();971972if (rightToLeft)973{974from = callNode->getNumChildren() - 1;975to = firstArgumentChild;976step = -1;977}978else979{980from = firstArgumentChild;981to = callNode->getNumChildren() - 1;982step = 1;983}984985uint32_t numIntArgRegs = properties.getNumIntArgRegs();986uint32_t numFloatArgRegs = properties.getNumFloatArgRegs();987988TR::RealRegister::RegNum specialArgReg = TR::RealRegister::NoReg;989switch (callSymbol->getMandatoryRecognizedMethod())990{991// Node: special long args are still only passed in one GPR992case TR::java_lang_invoke_ComputedCalls_dispatchJ9Method:993specialArgReg = getProperties().getJ9MethodArgumentRegister();994// Other args go in memory995numIntArgRegs = 0;996numFloatArgRegs = 0;997break;998case TR::java_lang_invoke_ComputedCalls_dispatchVirtual:999case TR::com_ibm_jit_JITHelpers_dispatchVirtual:1000specialArgReg = getProperties().getVTableIndexArgumentRegister();1001break;1002}1003if (specialArgReg != TR::RealRegister::NoReg)1004{1005if (comp()->getOption(TR_TraceCG))1006{1007traceMsg(comp(), "Special arg %s in %s\n",1008comp()->getDebug()->getName(callNode->getChild(from)),1009comp()->getDebug()->getName(cg()->machine()->getRealRegister(specialArgReg)));1010}1011// Skip the special arg in the first loop1012from += step;1013}10141015// C helpers have an implicit first argument (the VM thread) that we have to account for1016if (linkage == TR_CHelper)1017{1018TR_ASSERT(numIntArgRegs > 0, "This code doesn't handle passing this implicit arg on the stack");1019numIntegerArgs++;1020totalSize += TR::Compiler->om.sizeofReferenceAddress();1021}10221023for (int32_t i = from; (rightToLeft && i >= to) || (!rightToLeft && i <= to); i += step)1024{1025child = callNode->getChild(i);1026childType = child->getDataType();10271028switch (childType)1029{1030case TR::Int8:1031case TR::Int16:1032case TR::Int32:1033case TR::Int64:1034case TR::Address:1035multiplier = (childType == TR::Int64) ? 2 : 1;1036if (numIntegerArgs >= numIntArgRegs)1037{1038numMemArgs++;1039memArgSize += TR::Compiler->om.sizeofReferenceAddress() * multiplier;1040}1041numIntegerArgs++;1042totalSize += TR::Compiler->om.sizeofReferenceAddress() * multiplier;1043break;1044case TR::Float:1045case TR::Double:1046multiplier = (childType == TR::Double) ? 2 : 1;1047if (numFloatArgs >= numFloatArgRegs)1048{1049numMemArgs++;1050memArgSize += TR::Compiler->om.sizeofReferenceAddress() * multiplier;1051}1052numFloatArgs++;1053totalSize += TR::Compiler->om.sizeofReferenceAddress() * multiplier;1054break;1055default:1056TR_ASSERT(false, "Argument type %s is not supported\n", childType.toString());1057}1058}10591060// From here, down, any new stack allocations will expire / die when the function returns1061TR::StackMemoryRegion stackMemoryRegion(*trMemory());10621063if (numMemArgs > 0)1064{1065pushToMemory = new (trStackMemory()) TR::ARM64MemoryArgument[numMemArgs];1066}10671068if (specialArgReg)1069from -= step; // we do want to process special args in the following loop10701071numIntegerArgs = 0;1072numFloatArgs = 0;10731074// C helpers have an implicit first argument (the VM thread) that we have to account for1075if (linkage == TR_CHelper)1076{1077TR_ASSERT(numIntArgRegs > 0, "This code doesn't handle passing this implicit arg on the stack");1078TR::Register *vmThreadArgRegister = cg()->allocateRegister();1079generateMovInstruction(cg(), callNode, vmThreadArgRegister, cg()->getMethodMetaDataRegister());1080dependencies->addPreCondition(vmThreadArgRegister, properties.getIntegerArgumentRegister(numIntegerArgs));1081if (resType.getDataType() == TR::NoType)1082dependencies->addPostCondition(vmThreadArgRegister, properties.getIntegerArgumentRegister(numIntegerArgs));1083numIntegerArgs++;1084firstExplicitArg = 1;1085}10861087// Helper linkage preserves all argument registers except the return register1088// TODO: C helper linkage does not, this code needs to make sure argument registers are killed in post dependencies1089for (int32_t i = from; (rightToLeft && i >= to) || (!rightToLeft && i <= to); i += step)1090{1091TR::Register *argRegister;1092TR::InstOpCode::Mnemonic op;1093bool isSpecialArg = (i == from && specialArgReg != TR::RealRegister::NoReg);10941095child = callNode->getChild(i);1096childType = child->getDataType();10971098switch (childType)1099{1100case TR::Int8:1101case TR::Int16:1102case TR::Int32:1103case TR::Int64:1104case TR::Address:1105if (childType == TR::Address)1106{1107argRegister = pushAddressArg(child);1108}1109else if (childType == TR::Int64)1110{1111argRegister = pushLongArg(child);1112}1113else1114{1115argRegister = pushIntegerWordArg(child);1116}1117if (isSpecialArg)1118{1119if (specialArgReg == properties.getIntegerReturnRegister(0))1120{1121TR::Register *resultReg;1122if (resType.isAddress())1123resultReg = cg()->allocateCollectedReferenceRegister();1124else1125resultReg = cg()->allocateRegister();1126dependencies->addPreCondition(argRegister, specialArgReg);1127dependencies->addPostCondition(resultReg, properties.getIntegerReturnRegister(0));1128}1129else1130{1131TR::addDependency(dependencies, argRegister, specialArgReg, TR_GPR, cg());1132}1133}1134else1135{1136argSize += TR::Compiler->om.sizeofReferenceAddress() * ((childType == TR::Int64) ? 2 : 1);1137if (numIntegerArgs < numIntArgRegs)1138{1139if (!cg()->canClobberNodesRegister(child, 0))1140{1141if (argRegister->containsCollectedReference())1142tempReg = cg()->allocateCollectedReferenceRegister();1143else1144tempReg = cg()->allocateRegister();1145generateMovInstruction(cg(), callNode, tempReg, argRegister);1146argRegister = tempReg;1147}1148if (numIntegerArgs == firstExplicitArg)1149{1150// the first integer argument1151TR::Register *resultReg;1152if (resType.isAddress())1153resultReg = cg()->allocateCollectedReferenceRegister();1154else1155resultReg = cg()->allocateRegister();1156dependencies->addPreCondition(argRegister, properties.getIntegerArgumentRegister(numIntegerArgs));1157dependencies->addPostCondition(resultReg, TR::RealRegister::x0);1158if (firstExplicitArg == 1)1159dependencies->addPostCondition(argRegister, properties.getIntegerArgumentRegister(numIntegerArgs));1160}1161else1162{1163TR::addDependency(dependencies, argRegister, properties.getIntegerArgumentRegister(numIntegerArgs), TR_GPR, cg());1164}1165}1166else // numIntegerArgs >= numIntArgRegs1167{1168op = ((childType == TR::Address) || (childType == TR::Int64)) ? TR::InstOpCode::strimmx : TR::InstOpCode::strimmw;1169pushOutgoingMemArgument(argRegister, totalSize - argSize, op, pushToMemory[argIndex++]);1170}1171numIntegerArgs++;1172}1173break;1174case TR::Float:1175case TR::Double:1176if (childType == TR::Float)1177{1178argSize += TR::Compiler->om.sizeofReferenceAddress();1179argRegister = pushFloatArg(child);1180}1181else1182{1183argSize += TR::Compiler->om.sizeofReferenceAddress() * 2;1184argRegister = pushDoubleArg(child);1185}1186if (numFloatArgs < numFloatArgRegs)1187{1188if (!cg()->canClobberNodesRegister(child, 0))1189{1190tempReg = cg()->allocateRegister(TR_FPR);1191op = (childType == TR::Float) ? TR::InstOpCode::fmovs : TR::InstOpCode::fmovd;1192generateTrg1Src1Instruction(cg(), op, callNode, tempReg, argRegister);1193argRegister = tempReg;1194}1195if (numFloatArgs == 0 && resType.isFloatingPoint())1196{1197TR::Register *resultReg;1198if (resType.getDataType() == TR::Float)1199resultReg = cg()->allocateSinglePrecisionRegister();1200else1201resultReg = cg()->allocateRegister(TR_FPR);1202dependencies->addPreCondition(argRegister, TR::RealRegister::v0);1203dependencies->addPostCondition(resultReg, TR::RealRegister::v0);1204}1205else1206TR::addDependency(dependencies, argRegister, properties.getFloatArgumentRegister(numFloatArgs), TR_FPR, cg());1207}1208else // numFloatArgs >= numFloatArgRegs1209{1210op = (childType == TR::Float) ? TR::InstOpCode::vstrimms : TR::InstOpCode::vstrimmd;1211pushOutgoingMemArgument(argRegister, totalSize - argSize, op, pushToMemory[argIndex++]);1212}1213numFloatArgs++;1214break;1215}1216}12171218for (int32_t i = TR::RealRegister::FirstGPR; i <= TR::RealRegister::LastGPR; ++i)1219{1220TR::RealRegister::RegNum realReg = (TR::RealRegister::RegNum)i;1221if (properties.getPreserved(realReg) || (properties.getRegisterFlags(realReg) & ARM64_Reserved))1222continue;1223if (realReg == specialArgReg)1224continue; // already added deps above. No need to add them here.1225if (callSymbol->isComputed() && i == getProperties().getComputedCallTargetRegister())1226continue;1227if (!dependencies->searchPreConditionRegister(realReg))1228{1229if (realReg == properties.getIntegerArgumentRegister(0) && callNode->getDataType() == TR::Address)1230{1231dependencies->addPreCondition(cg()->allocateRegister(), TR::RealRegister::x0);1232dependencies->addPostCondition(cg()->allocateCollectedReferenceRegister(), TR::RealRegister::x0);1233}1234else1235{1236// Helper linkage preserves all registers that are not argument registers, so we don't need to spill them.1237if (linkage != TR_Helper)1238TR::addDependency(dependencies, NULL, realReg, TR_GPR, cg());1239}1240}1241}12421243if (callNode->getType().isFloatingPoint() && numFloatArgs == 0)1244{1245//add return floating-point register dependency1246TR::addDependency(dependencies, NULL, (TR::RealRegister::RegNum)getProperties().getFloatReturnRegister(), TR_FPR, cg());1247}12481249// Helper linkage preserves all registers that are not argument registers, so we don't need to spill them.1250if (linkage != TR_Helper)1251{1252for (int32_t i = TR::RealRegister::FirstFPR; i <= TR::RealRegister::LastFPR; ++i)1253{1254TR::RealRegister::RegNum realReg = (TR::RealRegister::RegNum)i;1255if (properties.getPreserved(realReg))1256continue;1257if (!dependencies->searchPreConditionRegister(realReg))1258{1259TR::addDependency(dependencies, NULL, realReg, TR_FPR, cg());1260}1261}1262}12631264/* Spills all vector registers */1265if ((linkage != TR_Helper) && killsVectorRegisters())1266{1267TR::Register *tmpReg = cg()->allocateRegister();1268dependencies->addPostCondition(tmpReg, TR::RealRegister::KillVectorRegs);1269cg()->stopUsingRegister(tmpReg);1270}12711272if (numMemArgs > 0)1273{1274for (argIndex = 0; argIndex < numMemArgs; argIndex++)1275{1276TR::Register *aReg = pushToMemory[argIndex].argRegister;1277generateMemSrc1Instruction(cg(), pushToMemory[argIndex].opCode, callNode, pushToMemory[argIndex].argMemory, aReg);1278cg()->stopUsingRegister(aReg);1279}1280}12811282return totalSize;1283}12841285void J9::ARM64::PrivateLinkage::buildDirectCall(TR::Node *callNode,1286TR::SymbolReference *callSymRef,1287TR::RegisterDependencyConditions *dependencies,1288const TR::ARM64LinkageProperties &pp,1289uint32_t argSize)1290{1291TR::Instruction *gcPoint;1292TR::MethodSymbol *callSymbol = callSymRef->getSymbol()->castToMethodSymbol();12931294TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp()->fe());12951296if (callSymRef->getReferenceNumber() >= TR_ARM64numRuntimeHelpers)1297fej9->reserveTrampolineIfNecessary(comp(), callSymRef, false);12981299bool forceUnresolvedDispatch = !fej9->isResolvedDirectDispatchGuaranteed(comp());13001301if (callSymbol->isJITInternalNative() ||1302(!callSymRef->isUnresolved() && !callSymbol->isInterpreted() &&1303((forceUnresolvedDispatch && callSymbol->isHelper()) || !forceUnresolvedDispatch)))1304{1305bool isMyself = comp()->isRecursiveMethodTarget(callSymbol);13061307gcPoint = generateImmSymInstruction(cg(), TR::InstOpCode::bl, callNode,1308isMyself ? 0 : (uintptr_t)callSymbol->getMethodAddress(),1309dependencies,1310callSymRef ? callSymRef : callNode->getSymbolReference(),1311NULL);1312}1313else1314{1315TR::LabelSymbol *label = generateLabelSymbol(cg());1316TR::Snippet *snippet;13171318if (callSymRef->isUnresolved() || comp()->compileRelocatableCode())1319{1320snippet = new (trHeapMemory()) TR::ARM64UnresolvedCallSnippet(cg(), callNode, label, argSize);1321}1322else1323{1324snippet = new (trHeapMemory()) TR::ARM64CallSnippet(cg(), callNode, label, argSize);1325snippet->gcMap().setGCRegisterMask(pp.getPreservedRegisterMapForGC());1326}13271328cg()->addSnippet(snippet);1329gcPoint = generateImmSymInstruction(cg(), TR::InstOpCode::bl, callNode,13300, dependencies,1331new (trHeapMemory()) TR::SymbolReference(comp()->getSymRefTab(), label),1332snippet);13331334// Nop is necessary due to confusion when resolving shared slots at a transition1335if (callSymRef->isOSRInductionHelper())1336cg()->generateNop(callNode);13371338}13391340gcPoint->ARM64NeedsGCMap(cg(), callSymbol->getLinkageConvention() == TR_Helper ? 0xffffffff : pp.getPreservedRegisterMapForGC());1341}13421343TR::Register *J9::ARM64::PrivateLinkage::buildDirectDispatch(TR::Node *callNode)1344{1345TR::SymbolReference *callSymRef = callNode->getSymbolReference();1346const TR::ARM64LinkageProperties &pp = getProperties();1347// Extra post dependency for killing vector registers (see KillVectorRegs)1348const int extraPostReg = killsVectorRegisters() ? 1 : 0;1349TR::RegisterDependencyConditions *dependencies =1350new (trHeapMemory()) TR::RegisterDependencyConditions(1351pp.getNumberOfDependencyGPRegisters(),1352pp.getNumberOfDependencyGPRegisters() + extraPostReg, trMemory());13531354int32_t argSize = buildArgs(callNode, dependencies);13551356buildDirectCall(callNode, callSymRef, dependencies, pp, argSize);1357cg()->machine()->setLinkRegisterKilled(true);13581359TR::Register *retReg;1360switch(callNode->getOpCodeValue())1361{1362case TR::icall:1363retReg = dependencies->searchPostConditionRegister(1364pp.getIntegerReturnRegister());1365break;1366case TR::lcall:1367case TR::acall:1368retReg = dependencies->searchPostConditionRegister(1369pp.getLongReturnRegister());1370break;1371case TR::fcall:1372case TR::dcall:1373retReg = dependencies->searchPostConditionRegister(1374pp.getFloatReturnRegister());1375break;1376case TR::call:1377retReg = NULL;1378break;1379default:1380retReg = NULL;1381TR_ASSERT_FATAL(false, "Unsupported direct call Opcode.");1382}13831384callNode->setRegister(retReg);13851386dependencies->stopUsingDepRegs(cg(), retReg);1387return retReg;1388}13891390/**1391* @brief Gets profiled call site information1392*1393* @param[in] cg: code generator1394* @param[in] callNode: node for call1395* @param[in] maxStaticPIC: maximum number of static PICs1396* @param[out] values: list of PIC items1397* @returns true if any call site information is returned1398*/1399static bool getProfiledCallSiteInfo(TR::CodeGenerator *cg, TR::Node *callNode, uint32_t maxStaticPICs, TR_ScratchList<J9::ARM64PICItem> &values)1400{1401TR::Compilation *comp = cg->comp();1402TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());1403if (comp->compileRelocatableCode())1404return false;14051406TR::SymbolReference *methodSymRef = callNode->getSymbolReference();1407TR::MethodSymbol *methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol();14081409if (!methodSymbol->isVirtual() && !methodSymbol->isInterface())1410return false;14111412TR_AddressInfo *info = static_cast<TR_AddressInfo*>(TR_ValueProfileInfoManager::getProfiledValueInfo(callNode, comp, AddressInfo));1413if (!info)1414{1415if (comp->getOption(TR_TraceCG))1416{1417traceMsg(comp, "Profiled target not found for node %p\n", callNode);1418}1419return false;1420}1421static const bool tracePIC = feGetEnv("TR_TracePIC") != NULL;1422if (tracePIC)1423{1424traceMsg(comp, "Value profile info for callNode %p in %s\n", callNode, comp->signature());1425info->getProfiler()->dumpInfo(comp->getOutFile());1426traceMsg(comp, "\n");1427}1428uint32_t totalFreq = info->getTotalFrequency();1429if (totalFreq == 0 || info->getTopProbability() < MIN_PROFILED_CALL_FREQUENCY)1430{1431if (comp->getOption(TR_TraceCG))1432{1433traceMsg(comp, "Profiled target with enough frequency not found for node %p\n", callNode);1434}1435return false;1436}14371438TR_ScratchList<TR_ExtraAddressInfo> allValues(comp->trMemory());1439info->getSortedList(comp, &allValues);14401441TR_ResolvedMethod *owningMethod = methodSymRef->getOwningMethod(comp);1442TR_OpaqueClassBlock *callSiteMethodClass;14431444if (methodSymbol->isVirtual())1445callSiteMethodClass = methodSymRef->getSymbol()->getResolvedMethodSymbol()->getResolvedMethod()->classOfMethod();14461447ListIterator<TR_ExtraAddressInfo> valuesIt(&allValues);14481449uint32_t numStaticPics = 0;1450TR_ExtraAddressInfo *profiledInfo;1451for (profiledInfo = valuesIt.getFirst(); numStaticPics < maxStaticPICs && profiledInfo != NULL; profiledInfo = valuesIt.getNext())1452{1453float freq = (float)profiledInfo->_frequency / totalFreq;1454if (freq < MIN_PROFILED_CALL_FREQUENCY)1455break;14561457TR_OpaqueClassBlock *clazz = (TR_OpaqueClassBlock *)profiledInfo->_value;1458if (comp->getPersistentInfo()->isObsoleteClass(clazz, fej9))1459continue;14601461TR_ResolvedMethod *method;14621463if (methodSymbol->isVirtual())1464{1465TR_ASSERT_FATAL(callSiteMethodClass, "Expecting valid callSiteMethodClass for virtual call");1466if (!cg->isProfiledClassAndCallSiteCompatible(clazz, callSiteMethodClass))1467continue;14681469method = owningMethod->getResolvedVirtualMethod(comp, clazz, methodSymRef->getOffset());1470}1471else1472{1473method = owningMethod->getResolvedInterfaceMethod(comp, clazz, methodSymRef->getCPIndex());1474}14751476if (!method || method->isInterpreted())1477continue;14781479values.add(new (comp->trStackMemory()) J9::ARM64PICItem(clazz, method, freq));1480++numStaticPics;1481}14821483return numStaticPics > 0;1484}14851486/**1487* @brief Generates instruction sequence for static PIC call1488*1489* @param[in] cg: code generator1490* @param[in] callNode: node for call1491* @param[in] profiledClass: class suggested by interpreter profiler1492* @param[in] profiledMethod: method suggested by interpreter profiler1493* @param[in] vftReg: register containing VFT1494* @param[in] tempReg: temporary register1495* @param[in] missLabel: label for cache miss1496* @param[in] regMapForGC: register map for GC1497* @returns instruction making direct call to the method1498*/1499static TR::Instruction* buildStaticPICCall(TR::CodeGenerator *cg, TR::Node *callNode, TR_OpaqueClassBlock *profiledClass, TR_ResolvedMethod *profiledMethod,1500TR::Register *vftReg, TR::Register *tempReg, TR::LabelSymbol *missLabel, uint32_t regMapForGC)1501{1502TR::Compilation *comp = cg->comp();1503TR::SymbolReference *methodSymRef = callNode->getSymbolReference();1504TR::SymbolReference *profiledMethodSymRef = comp->getSymRefTab()->findOrCreateMethodSymbol(methodSymRef->getOwningMethodIndex(),1505-1,1506profiledMethod,1507TR::MethodSymbol::Virtual);15081509TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());1510if (comp->compileRelocatableCode())1511{1512loadAddressConstantInSnippet(cg, callNode, reinterpret_cast<intptr_t>(profiledClass), tempReg, TR_ClassPointer);1513}1514else1515{1516bool isUnloadAssumptionRequired = fej9->isUnloadAssumptionRequired(profiledClass, comp->getCurrentMethod());15171518if (isUnloadAssumptionRequired)1519{1520loadAddressConstantInSnippet(cg, callNode, reinterpret_cast<intptr_t>(profiledClass), tempReg, TR_NoRelocation, true);1521}1522else1523{1524loadAddressConstant(cg, callNode, reinterpret_cast<intptr_t>(profiledClass), tempReg, NULL, true);1525}1526}1527generateCompareInstruction(cg, callNode, vftReg, tempReg, true);15281529generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, callNode, missLabel, TR::CC_NE);15301531TR::Instruction *gcPoint = generateImmSymInstruction(cg, TR::InstOpCode::bl, callNode, (uintptr_t)profiledMethod->startAddressForJittedMethod(),1532NULL, profiledMethodSymRef, NULL);1533gcPoint->ARM64NeedsGCMap(cg, regMapForGC);1534fej9->reserveTrampolineIfNecessary(comp, profiledMethodSymRef, false);1535return gcPoint;1536}15371538/**1539* @brief Generates instruction sequence for virtual call1540*1541* @param[in] cg: code generator1542* @param[in] callNode: node for the virtual call1543* @param[in] vftReg: register containing VFT1544* @param[in] x9: x9 register1545* @param[in] regMapForGC: register map for GC1546*/1547static void buildVirtualCall(TR::CodeGenerator *cg, TR::Node *callNode, TR::Register *vftReg, TR::Register *x9, uint32_t regMapForGC)1548{1549int32_t offset = callNode->getSymbolReference()->getOffset();1550TR_ASSERT_FATAL(offset < 0, "Unexpected positive offset for virtual call");15511552// jitVTableIndex() in oti/JITInterface.hpp assumes the instruction sequence below1553if (offset >= -65536)1554{1555generateTrg1ImmInstruction(cg, TR::InstOpCode::movnx, callNode, x9, ~offset & 0xFFFF);1556}1557else1558{1559generateTrg1ImmInstruction(cg, TR::InstOpCode::movzx, callNode, x9, offset & 0xFFFF);1560generateTrg1ImmInstruction(cg, TR::InstOpCode::movkx, callNode, x9,1561(((offset >> 16) & 0xFFFF) | TR::MOV_LSL16));1562generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::sbfmx, callNode, x9, x9, 0x1F); // sxtw x9, w91563}1564TR::MemoryReference *tempMR = TR::MemoryReference::createWithIndexReg(cg, vftReg, x9);1565generateTrg1MemInstruction(cg, TR::InstOpCode::ldroffx, callNode, x9, tempMR);1566TR::Instruction *gcPoint = generateRegBranchInstruction(cg, TR::InstOpCode::blr, callNode, x9);1567gcPoint->ARM64NeedsGCMap(cg, regMapForGC);1568}15691570/**1571* @brief Generates instruction sequence for interface call1572*1573* @param[in] cg: code generator1574* @param[in] callNode: node for the interface call1575* @param[in] vftReg: vft register1576* @param[in] tmpReg: temporary register1577* @param[in] ifcSnippet: interface call snippet1578* @param[in] regMapForGC: register map for GC1579*/1580static void buildInterfaceCall(TR::CodeGenerator *cg, TR::Node *callNode, TR::Register *vftReg, TR::Register *tmpReg, TR::ARM64InterfaceCallSnippet *ifcSnippet, uint32_t regMapForGC)1581{1582/*1583* Generating following instruction sequence.1584*1585* ldrx tmpReg, L_firstClassCacheSlot1586* cmpx vftReg, tmpReg1587* ldrx tmpReg, L_firstBranchAddressCacheSlot1588* beq hitLabel1589* ldrx tmpReg, L_secondClassCacheSlot1590* cmpx vftReg, tmpReg1591* bne snippetLabel1592* ldrx tmpReg, L_secondBranchAddressCacheSlot1593* hitLabel:1594* blr tmpReg1595* doneLabel:1596*/15971598TR::LabelSymbol *ifcSnippetLabel = ifcSnippet->getSnippetLabel();1599TR::LabelSymbol *firstClassCacheSlotLabel = ifcSnippet->getFirstClassCacheSlotLabel();1600generateTrg1ImmSymInstruction(cg, TR::InstOpCode::ldrx, callNode, tmpReg, 0, firstClassCacheSlotLabel);16011602TR::LabelSymbol *hitLabel = generateLabelSymbol(cg);1603generateCompareInstruction(cg, callNode, vftReg, tmpReg, true);1604TR::LabelSymbol *firstBranchAddressCacheSlotLabel = ifcSnippet->getFirstBranchAddressCacheSlotLabel();16051606generateTrg1ImmSymInstruction(cg, TR::InstOpCode::ldrx, callNode, tmpReg, 0, firstBranchAddressCacheSlotLabel);1607generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, callNode, hitLabel, TR::CC_EQ);16081609TR::LabelSymbol *secondClassCacheSlotLabel = ifcSnippet->getSecondClassCacheSlotLabel();16101611generateTrg1ImmSymInstruction(cg, TR::InstOpCode::ldrx, callNode, tmpReg, 0, secondClassCacheSlotLabel);1612generateCompareInstruction(cg, callNode, vftReg, tmpReg, true);1613TR::Instruction *gcPoint = generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, callNode, ifcSnippetLabel, TR::CC_NE);1614gcPoint->ARM64NeedsGCMap(cg, regMapForGC);1615TR::LabelSymbol *secondBranchAddressCacheSlotLabel = ifcSnippet->getSecondBranchAddressCacheSlotLabel();16161617generateTrg1ImmSymInstruction(cg, TR::InstOpCode::ldrx, callNode, tmpReg, 0, secondBranchAddressCacheSlotLabel);1618generateLabelInstruction(cg, TR::InstOpCode::label, callNode, hitLabel);1619gcPoint = generateRegBranchInstruction(cg, TR::InstOpCode::blr, callNode, tmpReg);1620gcPoint->ARM64NeedsGCMap(cg, regMapForGC);1621}16221623static TR::Register *evaluateUpToVftChild(TR::Node *callNode, TR::CodeGenerator *cg)1624{1625TR::Register *vftReg = NULL;1626if (callNode->getFirstArgumentIndex() == 1)1627{1628TR::Node *child = callNode->getFirstChild();1629vftReg = cg->evaluate(child);1630cg->decReferenceCount(child);1631}1632TR_ASSERT_FATAL(vftReg != NULL, "Failed to find vft child.");1633return vftReg;1634}16351636void J9::ARM64::PrivateLinkage::buildVirtualDispatch(TR::Node *callNode,1637TR::RegisterDependencyConditions *dependencies,1638uint32_t argSize)1639{1640TR::Register *x0 = dependencies->searchPreConditionRegister(TR::RealRegister::x0);1641TR::Register *x9 = dependencies->searchPreConditionRegister(TR::RealRegister::x9);16421643TR::SymbolReference *methodSymRef = callNode->getSymbolReference();1644TR::MethodSymbol *methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol();1645TR::LabelSymbol *doneLabel = generateLabelSymbol(cg());1646uint32_t regMapForGC = getProperties().getPreservedRegisterMapForGC();1647void *thunk = NULL;16481649TR::Instruction *gcPoint;16501651TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp()->fe());16521653// Computed calls1654//1655if (methodSymbol->isComputed())1656{1657TR::Register *vftReg = evaluateUpToVftChild(callNode, cg());1658TR::addDependency(dependencies, vftReg, getProperties().getComputedCallTargetRegister(), TR_GPR, cg());16591660switch (methodSymbol->getMandatoryRecognizedMethod())1661{1662case TR::java_lang_invoke_ComputedCalls_dispatchVirtual:1663case TR::com_ibm_jit_JITHelpers_dispatchVirtual:1664{1665// Need a j2i thunk for the method that will ultimately be dispatched by this handle call1666char *j2iSignature = fej9->getJ2IThunkSignatureForDispatchVirtual(methodSymbol->getMethod()->signatureChars(), methodSymbol->getMethod()->signatureLength(), comp());1667int32_t signatureLen = strlen(j2iSignature);1668thunk = fej9->getJ2IThunk(j2iSignature, signatureLen, comp());1669if (!thunk)1670{1671thunk = fej9->setJ2IThunk(j2iSignature, signatureLen,1672TR::ARM64CallSnippet::generateVIThunk(fej9->getEquivalentVirtualCallNodeForDispatchVirtual(callNode, comp()), argSize, cg()), comp());1673}1674}1675default:1676if (fej9->needsInvokeExactJ2IThunk(callNode, comp()))1677{1678comp()->getPersistentInfo()->getInvokeExactJ2IThunkTable()->addThunk(1679TR::ARM64CallSnippet::generateInvokeExactJ2IThunk(callNode, argSize, cg(), methodSymbol->getMethod()->signatureChars()), fej9);1680}1681break;1682}16831684TR::Instruction *gcPoint = generateRegBranchInstruction(cg(), TR::InstOpCode::blr, callNode, vftReg, dependencies);1685gcPoint->ARM64NeedsGCMap(cg(), regMapForGC);16861687return;1688}16891690// Virtual and interface calls1691//1692TR_ASSERT_FATAL(methodSymbol->isVirtual() || methodSymbol->isInterface(), "Unexpected method type");16931694thunk = fej9->getJ2IThunk(methodSymbol->getMethod(), comp());1695if (!thunk)1696thunk = fej9->setJ2IThunk(methodSymbol->getMethod(), TR::ARM64CallSnippet::generateVIThunk(callNode, argSize, cg()), comp());16971698bool callIsSafe = methodSymRef != comp()->getSymRefTab()->findObjectNewInstanceImplSymbol();16991700// evaluate vftReg because it is required for implicit NULLCHK1701TR::Register *vftReg = evaluateUpToVftChild(callNode, cg());1702TR::addDependency(dependencies, vftReg, TR::RealRegister::NoReg, TR_GPR, cg());17031704if (methodSymbol->isVirtual())1705{1706TR::MemoryReference *tempMR;1707if (methodSymRef->isUnresolved() || comp()->compileRelocatableCode())1708{1709TR::LabelSymbol *vcSnippetLabel = generateLabelSymbol(cg());1710TR::ARM64VirtualUnresolvedSnippet *vcSnippet =1711new (trHeapMemory())1712TR::ARM64VirtualUnresolvedSnippet(cg(), callNode, vcSnippetLabel, argSize, doneLabel, (uint8_t *)thunk);1713cg()->addSnippet(vcSnippet);171417151716// The following instructions are modified by _virtualUnresolvedHelper1717// in aarch64/runtime/PicBuilder.spp to load the vTable index in x917181719// This `b` instruction is modified to movzx x9, lower 16bit of offset1720generateLabelInstruction(cg(), TR::InstOpCode::b, callNode, vcSnippetLabel);1721generateTrg1ImmInstruction(cg(), TR::InstOpCode::movkx, callNode, x9, TR::MOV_LSL16);1722generateTrg1Src1ImmInstruction(cg(), TR::InstOpCode::sbfmx, callNode, x9, x9, 0x1F); // sxtw x9, w91723tempMR = TR::MemoryReference::createWithIndexReg(cg(), vftReg, x9);1724generateTrg1MemInstruction(cg(), TR::InstOpCode::ldroffx, callNode, x9, tempMR);1725gcPoint = generateRegBranchInstruction(cg(), TR::InstOpCode::blr, callNode, x9);1726gcPoint->ARM64NeedsGCMap(cg(), regMapForGC);1727generateLabelInstruction(cg(), TR::InstOpCode::label, callNode, doneLabel, dependencies);17281729return;1730}17311732// Handle guarded devirtualization next1733//1734if (callIsSafe)1735{1736TR::ResolvedMethodSymbol *resolvedMethodSymbol = methodSymRef->getSymbol()->getResolvedMethodSymbol();1737TR_ResolvedMethod *resolvedMethod = resolvedMethodSymbol->getResolvedMethod();17381739if (comp()->performVirtualGuardNOPing() &&1740comp()->isVirtualGuardNOPingRequired() &&1741!resolvedMethod->isInterpreted() &&1742!callNode->isTheVirtualCallNodeForAGuardedInlinedCall())1743{1744TR_VirtualGuard *virtualGuard = NULL;17451746if (!resolvedMethod->virtualMethodIsOverridden() &&1747!resolvedMethod->isAbstract())1748{1749if (comp()->getOption(TR_TraceCG))1750{1751traceMsg(comp(), "Creating TR_NonoverriddenGuard for node %p\n", callNode);1752}1753virtualGuard = TR_VirtualGuard::createGuardedDevirtualizationGuard(TR_NonoverriddenGuard, comp(), callNode);1754}1755else1756{1757TR_DevirtualizedCallInfo *devirtualizedCallInfo = comp()->findDevirtualizedCall(callNode);1758TR_OpaqueClassBlock *refinedThisClass = devirtualizedCallInfo ? devirtualizedCallInfo->_thisType : NULL;1759TR_OpaqueClassBlock *thisClass = refinedThisClass ? refinedThisClass : resolvedMethod->containingClass();17601761TR_PersistentCHTable *chTable = comp()->getPersistentInfo()->getPersistentCHTable();1762/* Devirtualization is not currently supported for AOT compilations */1763if (thisClass && TR::Compiler->cls.isAbstractClass(comp(), thisClass) && !comp()->compileRelocatableCode())1764{1765TR_ResolvedMethod *calleeMethod = chTable->findSingleAbstractImplementer(thisClass, methodSymRef->getOffset(), methodSymRef->getOwningMethod(comp()), comp());1766if (calleeMethod &&1767(comp()->isRecursiveMethodTarget(calleeMethod) ||1768!calleeMethod->isInterpreted() ||1769calleeMethod->isJITInternalNative()))1770{1771if (comp()->getOption(TR_TraceCG))1772{1773traceMsg(comp(), "Creating TR_AbstractGuard for node %p\n", callNode);1774}1775resolvedMethod = calleeMethod;1776virtualGuard = TR_VirtualGuard::createGuardedDevirtualizationGuard(TR_AbstractGuard, comp(), callNode);1777}1778}1779else if (refinedThisClass &&1780resolvedMethod->virtualMethodIsOverridden() &&1781!chTable->isOverriddenInThisHierarchy(resolvedMethod, refinedThisClass, methodSymRef->getOffset(), comp()))1782{1783TR_ResolvedMethod *calleeMethod = methodSymRef->getOwningMethod(comp())->getResolvedVirtualMethod(comp(), refinedThisClass, methodSymRef->getOffset());1784if (calleeMethod &&1785(comp()->isRecursiveMethodTarget(calleeMethod) ||1786!calleeMethod->isInterpreted() ||1787calleeMethod->isJITInternalNative()))1788{1789if (comp()->getOption(TR_TraceCG))1790{1791traceMsg(comp(), "Creating TR_HierarchyGuard for node %p\n", callNode);1792}1793resolvedMethod = calleeMethod;1794virtualGuard = TR_VirtualGuard::createGuardedDevirtualizationGuard(TR_HierarchyGuard, comp(), callNode);1795}1796}1797}17981799// If we have a virtual call guard generate a direct call1800// in the inline path and the virtual call out of line.1801// If the guard is later patched we'll go out of line path.1802//1803if (virtualGuard)1804{1805TR::LabelSymbol *virtualCallLabel = generateLabelSymbol(cg());1806generateVirtualGuardNOPInstruction(cg(), callNode, virtualGuard->addNOPSite(), NULL, virtualCallLabel);18071808if (comp()->getOption(TR_EnableHCR))1809{1810if (cg()->supportsMergingGuards())1811{1812virtualGuard->setMergedWithHCRGuard();1813}1814else1815{1816TR_VirtualGuard *HCRGuard = TR_VirtualGuard::createGuardedDevirtualizationGuard(TR_HCRGuard, comp(), callNode);1817generateVirtualGuardNOPInstruction(cg(), callNode, HCRGuard->addNOPSite(), NULL, virtualCallLabel);1818}1819}1820if (resolvedMethod != resolvedMethodSymbol->getResolvedMethod())1821{1822methodSymRef = comp()->getSymRefTab()->findOrCreateMethodSymbol(methodSymRef->getOwningMethodIndex(),1823-1,1824resolvedMethod,1825TR::MethodSymbol::Virtual);1826methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol();1827resolvedMethodSymbol = methodSymbol->getResolvedMethodSymbol();1828resolvedMethod = resolvedMethodSymbol->getResolvedMethod();1829}1830uintptr_t methodAddress = comp()->isRecursiveMethodTarget(resolvedMethod) ? 0 : (uintptr_t)resolvedMethod->startAddressForJittedMethod();1831TR::Instruction *gcPoint = generateImmSymInstruction(cg(), TR::InstOpCode::bl, callNode, methodAddress, NULL, methodSymRef, NULL);1832generateLabelInstruction(cg(), TR::InstOpCode::label, callNode, doneLabel, dependencies);1833gcPoint->ARM64NeedsGCMap(cg(), regMapForGC);18341835fej9->reserveTrampolineIfNecessary(comp(), methodSymRef, false);18361837// Out of line virtual call1838//1839TR_ARM64OutOfLineCodeSection *virtualCallOOL = new (trHeapMemory()) TR_ARM64OutOfLineCodeSection(virtualCallLabel, doneLabel, cg());18401841virtualCallOOL->swapInstructionListsWithCompilation();1842TR::Instruction *OOLLabelInstr = generateLabelInstruction(cg(), TR::InstOpCode::label, callNode, virtualCallLabel);18431844// XXX: Temporary fix, OOL instruction stream does not pick up live locals or monitors correctly.1845TR_ASSERT(!OOLLabelInstr->getLiveLocals() && !OOLLabelInstr->getLiveMonitors(), "Expecting first OOL instruction to not have live locals/monitors info");1846OOLLabelInstr->setLiveLocals(gcPoint->getLiveLocals());1847OOLLabelInstr->setLiveMonitors(gcPoint->getLiveMonitors());18481849buildVirtualCall(cg(), callNode, vftReg, x9, regMapForGC);18501851generateLabelInstruction(cg(), TR::InstOpCode::b, callNode, doneLabel);1852virtualCallOOL->swapInstructionListsWithCompilation();1853cg()->getARM64OutOfLineCodeSectionList().push_front(virtualCallOOL);18541855return;1856}1857}1858}1859}18601861// Profile-driven virtual and interface calls1862//1863// If the top value dominates everything else, generate a single static1864// PIC call inline and a virtual call or dynamic PIC call out of line.1865//1866// Otherwise generate a reasonable amount of static PIC calls and a1867// virtual call or dynamic PIC call all inline.1868//1869if (callIsSafe && !callNode->isTheVirtualCallNodeForAGuardedInlinedCall() && !comp()->getOption(TR_DisableInterpreterProfiling))1870{1871static uint32_t maxVirtualStaticPICs = comp()->getOptions()->getMaxStaticPICSlots(comp()->getMethodHotness());1872static uint32_t maxInterfaceStaticPICs = comp()->getOptions()->getNumInterfaceCallCacheSlots();18731874TR_ScratchList<J9::ARM64PICItem> values(cg()->trMemory());1875const uint32_t maxStaticPICs = methodSymbol->isInterface() ? maxInterfaceStaticPICs : maxVirtualStaticPICs;18761877if (getProfiledCallSiteInfo(cg(), callNode, maxStaticPICs, values))1878{1879ListIterator<J9::ARM64PICItem> i(&values);1880J9::ARM64PICItem *pic = i.getFirst();18811882// If this value is dominant, optimize exclusively for it1883if (pic->_frequency > MAX_PROFILED_CALL_FREQUENCY)1884{1885if (comp()->getOption(TR_TraceCG))1886{1887traceMsg(comp(), "Found dominant profiled target, frequency = %f\n", pic->_frequency);1888}1889TR::LabelSymbol *slowCallLabel = generateLabelSymbol(cg());18901891TR::Instruction *gcPoint = buildStaticPICCall(cg(), callNode, pic->_clazz, pic->_method,1892vftReg, x9, slowCallLabel, regMapForGC);1893generateLabelInstruction(cg(), TR::InstOpCode::label, callNode, doneLabel, dependencies);18941895// Out of line virtual/interface call1896//1897TR_ARM64OutOfLineCodeSection *slowCallOOL = new (trHeapMemory()) TR_ARM64OutOfLineCodeSection(slowCallLabel, doneLabel, cg());18981899slowCallOOL->swapInstructionListsWithCompilation();1900TR::Instruction *OOLLabelInstr = generateLabelInstruction(cg(), TR::InstOpCode::label, callNode, slowCallLabel);19011902// XXX: Temporary fix, OOL instruction stream does not pick up live locals or monitors correctly.1903TR_ASSERT_FATAL(!OOLLabelInstr->getLiveLocals() && !OOLLabelInstr->getLiveMonitors(), "Expecting first OOL instruction to not have live locals/monitors info");1904OOLLabelInstr->setLiveLocals(gcPoint->getLiveLocals());1905OOLLabelInstr->setLiveMonitors(gcPoint->getLiveMonitors());19061907TR::LabelSymbol *doneOOLLabel = generateLabelSymbol(cg());19081909if (methodSymbol->isInterface())1910{1911TR::LabelSymbol *ifcSnippetLabel = generateLabelSymbol(cg());1912TR::LabelSymbol *firstClassCacheSlotLabel = generateLabelSymbol(cg());1913TR::LabelSymbol *firstBranchAddressCacheSlotLabel = generateLabelSymbol(cg());1914TR::LabelSymbol *secondClassCacheSlotLabel = generateLabelSymbol(cg());1915TR::LabelSymbol *secondBranchAddressCacheSlotLabel = generateLabelSymbol(cg());1916TR::ARM64InterfaceCallSnippet *ifcSnippet = new (trHeapMemory()) TR::ARM64InterfaceCallSnippet(cg(), callNode, ifcSnippetLabel,1917argSize, doneOOLLabel, firstClassCacheSlotLabel, secondClassCacheSlotLabel,1918firstBranchAddressCacheSlotLabel, secondBranchAddressCacheSlotLabel, static_cast<uint8_t *>(thunk));1919cg()->addSnippet(ifcSnippet);1920buildInterfaceCall(cg(), callNode, vftReg, x9, ifcSnippet, regMapForGC);1921}1922else1923{1924buildVirtualCall(cg(), callNode, vftReg, x9, regMapForGC);1925}19261927generateLabelInstruction(cg(), TR::InstOpCode::label, callNode, doneOOLLabel);1928generateLabelInstruction(cg(), TR::InstOpCode::b, callNode, doneLabel);1929slowCallOOL->swapInstructionListsWithCompilation();1930cg()->getARM64OutOfLineCodeSectionList().push_front(slowCallOOL);19311932return;1933}1934else1935{1936if (comp()->getOption(TR_TraceCG))1937{1938traceMsg(comp(), "Generating %d static PIC calls\n", values.getSize());1939}1940// Build multiple static PIC calls1941while (pic)1942{1943TR::LabelSymbol *nextLabel = generateLabelSymbol(cg());19441945buildStaticPICCall(cg(), callNode, pic->_clazz, pic->_method,1946vftReg, x9, nextLabel, regMapForGC);1947generateLabelInstruction(cg(), TR::InstOpCode::b, callNode, doneLabel);1948generateLabelInstruction(cg(), TR::InstOpCode::label, callNode, nextLabel);1949pic = i.getNext();1950}1951// Regular virtual/interface call will be built below1952}1953}1954}19551956// Finally, regular virtual and interface calls1957//1958if (methodSymbol->isInterface())1959{1960// interface calls1961// ToDo: Inline interface dispatch19621963TR::LabelSymbol *ifcSnippetLabel = generateLabelSymbol(cg());1964TR::LabelSymbol *firstClassCacheSlotLabel = generateLabelSymbol(cg());1965TR::LabelSymbol *firstBranchAddressCacheSlotLabel = generateLabelSymbol(cg());1966TR::LabelSymbol *secondClassCacheSlotLabel = generateLabelSymbol(cg());1967TR::LabelSymbol *secondBranchAddressCacheSlotLabel = generateLabelSymbol(cg());1968TR::ARM64InterfaceCallSnippet *ifcSnippet =1969new (trHeapMemory())1970TR::ARM64InterfaceCallSnippet(cg(), callNode, ifcSnippetLabel, argSize, doneLabel, firstClassCacheSlotLabel, firstBranchAddressCacheSlotLabel, secondClassCacheSlotLabel, secondBranchAddressCacheSlotLabel, static_cast<uint8_t *>(thunk));1971cg()->addSnippet(ifcSnippet);19721973buildInterfaceCall(cg(), callNode, vftReg, x9, ifcSnippet, regMapForGC);1974}1975else1976{1977buildVirtualCall(cg(), callNode, vftReg, x9, regMapForGC);1978}1979generateLabelInstruction(cg(), TR::InstOpCode::label, callNode, doneLabel, dependencies);1980}19811982TR::Register *J9::ARM64::PrivateLinkage::buildIndirectDispatch(TR::Node *callNode)1983{1984const TR::ARM64LinkageProperties &pp = getProperties();1985TR::RealRegister *sp = cg()->machine()->getRealRegister(pp.getStackPointerRegister());19861987// Extra post dependency for killing vector registers (see KillVectorRegs)1988const int extraPostReg = killsVectorRegisters() ? 1 : 0;1989TR::RegisterDependencyConditions *dependencies =1990new (trHeapMemory()) TR::RegisterDependencyConditions(1991pp.getNumberOfDependencyGPRegisters(),1992pp.getNumberOfDependencyGPRegisters() + extraPostReg, trMemory());19931994int32_t argSize = buildArgs(callNode, dependencies);19951996buildVirtualDispatch(callNode, dependencies, argSize);1997cg()->machine()->setLinkRegisterKilled(true);19981999TR::Register *retReg;2000switch(callNode->getOpCodeValue())2001{2002case TR::icalli:2003retReg = dependencies->searchPostConditionRegister(2004pp.getIntegerReturnRegister());2005break;2006case TR::lcalli:2007case TR::acalli:2008retReg = dependencies->searchPostConditionRegister(2009pp.getLongReturnRegister());2010break;2011case TR::fcalli:2012case TR::dcalli:2013retReg = dependencies->searchPostConditionRegister(2014pp.getFloatReturnRegister());2015break;2016case TR::calli:2017retReg = NULL;2018break;2019default:2020retReg = NULL;2021TR_ASSERT_FATAL(false, "Unsupported indirect call Opcode.");2022}20232024callNode->setRegister(retReg);20252026dependencies->stopUsingDepRegs(cg(), retReg);2027return retReg;2028}20292030TR::Instruction *2031J9::ARM64::PrivateLinkage::loadStackParametersToLinkageRegisters(TR::Instruction *cursor)2032{2033TR::Machine *machine = cg()->machine();2034TR::ARM64LinkageProperties& properties = getProperties();2035TR::RealRegister *javaSP = machine->getRealRegister(properties.getStackPointerRegister()); // x2020362037TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol();2038ListIterator<TR::ParameterSymbol> parmIterator(&(bodySymbol->getParameterList()));2039TR::ParameterSymbol *parmCursor;20402041// Copy from stack all parameters that belong in linkage regs2042//2043for (parmCursor = parmIterator.getFirst();2044parmCursor != NULL;2045parmCursor = parmIterator.getNext())2046{2047if (parmCursor->isParmPassedInRegister())2048{2049int8_t lri = parmCursor->getLinkageRegisterIndex();2050TR::RealRegister *linkageReg;2051TR::InstOpCode::Mnemonic op;2052TR::DataType dataType = parmCursor->getDataType();20532054if (dataType == TR::Double || dataType == TR::Float)2055{2056linkageReg = machine->getRealRegister(properties.getFloatArgumentRegister(lri));2057op = (dataType == TR::Double) ? TR::InstOpCode::vldrimmd : TR::InstOpCode::vldrimms;2058}2059else2060{2061linkageReg = machine->getRealRegister(properties.getIntegerArgumentRegister(lri));2062op = (dataType == TR::Int64 || dataType == TR::Address) ? TR::InstOpCode::ldrimmx : TR::InstOpCode::ldrimmw;2063}20642065TR::MemoryReference *stackMR = TR::MemoryReference::createWithDisplacement(cg(), javaSP, parmCursor->getParameterOffset());2066cursor = generateTrg1MemInstruction(cg(), op, NULL, linkageReg, stackMR, cursor);2067}2068}20692070return cursor;2071}20722073TR::Instruction *2074J9::ARM64::PrivateLinkage::saveParametersToStack(TR::Instruction *cursor)2075{2076TR::Machine *machine = cg()->machine();2077TR::ARM64LinkageProperties& properties = getProperties();2078TR::RealRegister *javaSP = machine->getRealRegister(properties.getStackPointerRegister()); // x2020792080TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol();2081ListIterator<TR::ParameterSymbol> parmIterator(&(bodySymbol->getParameterList()));2082TR::ParameterSymbol *parmCursor;20832084// Store to stack all parameters passed in linkage registers2085//2086for (parmCursor = parmIterator.getFirst();2087parmCursor != NULL;2088parmCursor = parmIterator.getNext())2089{2090if (parmCursor->isParmPassedInRegister())2091{2092int8_t lri = parmCursor->getLinkageRegisterIndex();2093TR::RealRegister *linkageReg;2094TR::InstOpCode::Mnemonic op;20952096if (parmCursor->getDataType() == TR::Double || parmCursor->getDataType() == TR::Float)2097{2098linkageReg = machine->getRealRegister(properties.getFloatArgumentRegister(lri));2099op = (parmCursor->getDataType() == TR::Double) ? TR::InstOpCode::vstrimmd : TR::InstOpCode::vstrimms;2100}2101else2102{2103linkageReg = machine->getRealRegister(properties.getIntegerArgumentRegister(lri));2104op = TR::InstOpCode::strimmx;2105}21062107TR::MemoryReference *stackMR = TR::MemoryReference::createWithDisplacement(cg(), javaSP, parmCursor->getParameterOffset());2108cursor = generateMemSrc1Instruction(cg(), op, NULL, stackMR, linkageReg, cursor);2109}2110}21112112return cursor;2113}21142115void J9::ARM64::PrivateLinkage::performPostBinaryEncoding()2116{2117// --------------------------------------------------------------------------2118// Encode the size of the interpreter entry area into the linkage info word2119//2120TR_ASSERT_FATAL(cg()->getReturnTypeInfoInstruction(),2121"Expecting the return type info instruction to be created");21222123TR::ARM64ImmInstruction *linkageInfoWordInstruction = cg()->getReturnTypeInfoInstruction();2124uint32_t linkageInfoWord = linkageInfoWordInstruction->getSourceImmediate();21252126intptr_t jittedMethodEntryAddress = reinterpret_cast<intptr_t>(getJittedMethodEntryPoint()->getBinaryEncoding());2127intptr_t interpretedMethodEntryAddress = reinterpret_cast<intptr_t>(getInterpretedMethodEntryPoint()->getBinaryEncoding());21282129linkageInfoWord = (static_cast<uint32_t>(jittedMethodEntryAddress - interpretedMethodEntryAddress) << 16) | linkageInfoWord;2130linkageInfoWordInstruction->setSourceImmediate(linkageInfoWord);21312132*(uint32_t *)(linkageInfoWordInstruction->getBinaryEncoding()) = linkageInfoWord;21332134// Set recompilation info2135//2136TR::Recompilation *recomp = comp()->getRecompilationInfo();2137if (recomp != NULL && recomp->couldBeCompiledAgain())2138{2139J9::PrivateLinkage::LinkageInfo *lkInfo = J9::PrivateLinkage::LinkageInfo::get(cg()->getCodeStart());2140if (recomp->useSampling())2141lkInfo->setSamplingMethodBody();2142else2143lkInfo->setCountingMethodBody();2144}2145}21462147int32_t J9::ARM64::HelperLinkage::buildArgs(TR::Node *callNode,2148TR::RegisterDependencyConditions *dependencies)2149{2150return buildPrivateLinkageArgs(callNode, dependencies, _helperLinkage);2151}215221532154