Path: blob/master/runtime/compiler/x/codegen/X86PrivateLinkage.cpp
6004 views
/*******************************************************************************1* Copyright (c) 2000, 2022 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/2122#include "codegen/X86PrivateLinkage.hpp"2324#include "codegen/Linkage_inlines.hpp"25#include "codegen/LiveRegister.hpp"26#include "codegen/Machine.hpp"27#include "codegen/MemoryReference.hpp"28#include "codegen/Register.hpp"29#include "codegen/RegisterDependency.hpp"30#include "codegen/RegisterPair.hpp"31#include "codegen/Snippet.hpp"32#include "codegen/UnresolvedDataSnippet.hpp"33#include "compile/Method.hpp"34#include "compile/ResolvedMethod.hpp"35#include "compile/VirtualGuard.hpp"36#if defined(J9VM_OPT_JITSERVER)37#include "control/CompilationThread.hpp"38#endif /* defined(J9VM_OPT_JITSERVER) */39#include "env/CHTable.hpp"40#include "env/CompilerEnv.hpp"41#include "env/J2IThunk.hpp"42#include "env/PersistentCHTable.hpp"43#include "env/StackMemoryRegion.hpp"44#include "env/jittypes.h"45#include "env/VMJ9.h"46#include "il/DataTypes.hpp"47#include "il/Node.hpp"48#include "il/Node_inlines.hpp"49#include "il/ParameterSymbol.hpp"50#include "il/TreeTop.hpp"51#include "il/TreeTop_inlines.hpp"52#include "infra/SimpleRegex.hpp"53#include "env/VMJ9.h"54#include "x/codegen/X86Instruction.hpp"55#include "x/codegen/CallSnippet.hpp"56#include "x/codegen/FPTreeEvaluator.hpp"57#include "runtime/J9Profiler.hpp"58#include "runtime/J9ValueProfiler.hpp"59#include "OMR/Bytes.hpp"6061#ifdef TR_TARGET_64BIT62#include "x/amd64/codegen/AMD64GuardedDevirtualSnippet.hpp"63#else64#include "x/codegen/GuardedDevirtualSnippet.hpp"65#endif6667inline uint32_t gcd(uint32_t a, uint32_t b)68{69while (b != 0)70{71uint32_t t = b;72b = a % b;73a = t;74}75return a;76}7778inline uint32_t lcm(uint32_t a, uint32_t b)79{80return a * b / gcd(a, b);81}8283J9::X86::PrivateLinkage::PrivateLinkage(TR::CodeGenerator *cg) : J9::PrivateLinkage(cg)84{85// Stack alignment basic requirement:86// X86-32: 4 bytes, per hardware requirement87// X86-64: 16 bytes, required by both Linux and Windows88// Stack alignment additional requirement:89// Stack alignment has to match the alignment requirement for local object address90_properties.setOutgoingArgAlignment(lcm(cg->comp()->target().is32Bit() ? 4 : 16,91cg->fej9()->getLocalObjectAlignmentInBytes()));92}9394const TR::X86LinkageProperties& J9::X86::PrivateLinkage::getProperties()95{96return _properties;97}9899////////////////////////////////////////////////100//101// Argument manipulation102//103104static const TR::RealRegister::RegNum NOT_ASSIGNED = (TR::RealRegister::RegNum)-1;105106107void J9::X86::PrivateLinkage::copyLinkageInfoToParameterSymbols()108{109TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol();110ListIterator<TR::ParameterSymbol> paramIterator(&(bodySymbol->getParameterList()));111TR::ParameterSymbol *paramCursor;112const TR::X86LinkageProperties &properties = getProperties();113int32_t maxIntArgs, maxFloatArgs;114int32_t numIntArgs = 0, numFloatArgs = 0;115116maxIntArgs = properties.getNumIntegerArgumentRegisters();117maxFloatArgs = properties.getNumFloatArgumentRegisters();118for (paramCursor = paramIterator.getFirst(); paramCursor != NULL; paramCursor = paramIterator.getNext())119{120// If we're out of registers, just stop now instead of looping doing nothing121//122if (numIntArgs >= maxIntArgs && numFloatArgs >= maxFloatArgs)123break;124125// Assign linkage registers of each type until we run out126//127switch(paramCursor->getDataType())128{129case TR::Float:130case TR::Double:131if (numFloatArgs < maxFloatArgs)132paramCursor->setLinkageRegisterIndex(numFloatArgs++);133break;134default:135if (numIntArgs < maxIntArgs)136paramCursor->setLinkageRegisterIndex(numIntArgs++);137break;138}139}140}141142void J9::X86::PrivateLinkage::copyGlRegDepsToParameterSymbols(TR::Node *bbStart, TR::CodeGenerator *cg)143{144TR_ASSERT(bbStart->getOpCodeValue() == TR::BBStart, "assertion failure");145if (bbStart->getNumChildren() > 0)146{147TR::Node *glRegDeps = bbStart->getFirstChild();148if (!glRegDeps) // No global register info, so nothing to do149return;150151TR_ASSERT(glRegDeps->getOpCodeValue() == TR::GlRegDeps, "First child of first Node must be a GlRegDeps");152153uint16_t childNum;154for (childNum=0; childNum < glRegDeps->getNumChildren(); childNum++)155{156TR::Node *child = glRegDeps->getChild(childNum);157TR::ParameterSymbol *sym = child->getSymbol()->getParmSymbol();158sym->setAssignedGlobalRegisterIndex(cg->getGlobalRegister(child->getGlobalRegisterNumber()));159}160}161}162163TR::Instruction *J9::X86::PrivateLinkage::copyStackParametersToLinkageRegisters(TR::Instruction *procEntryInstruction)164{165TR_ASSERT(procEntryInstruction && procEntryInstruction->getOpCodeValue() == TR::InstOpCode::proc, "assertion failure");166TR::Instruction *intrpPrev = procEntryInstruction->getPrev(); // The instruction before the interpreter entry point167movLinkageRegisters(intrpPrev, false);168return intrpPrev->getNext();169}170171TR::Instruction *J9::X86::PrivateLinkage::movLinkageRegisters(TR::Instruction *cursor, bool isStore)172{173TR_ASSERT(cursor, "assertion failure");174175TR::Machine *machine = cg()->machine();176TR::RealRegister *rspReal = machine->getRealRegister(TR::RealRegister::esp);177178TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol();179ListIterator<TR::ParameterSymbol> paramIterator(&(bodySymbol->getParameterList()));180TR::ParameterSymbol *paramCursor;181182// Copy from stack all parameters that belong in linkage regs183//184for (paramCursor = paramIterator.getFirst();185paramCursor != NULL;186paramCursor = paramIterator.getNext())187{188int8_t lri = paramCursor->getLinkageRegisterIndex();189190if (lri != NOT_LINKAGE) // This param should be in a linkage reg191{192TR_MovDataTypes movDataType = paramMovType(paramCursor);193TR::RealRegister *reg = machine->getRealRegister(getProperties().getArgumentRegister(lri, isFloat(movDataType)));194TR::MemoryReference *memRef = generateX86MemoryReference(rspReal, paramCursor->getParameterOffset(), cg());195196if (isStore)197{198// stack := lri199cursor = generateMemRegInstruction(cursor, TR::Linkage::movOpcodes(MemReg, movDataType), memRef, reg, cg());200}201else202{203// lri := stack204cursor = generateRegMemInstruction(cursor, TR::Linkage::movOpcodes(RegMem, movDataType), reg, memRef, cg());205}206}207}208209return cursor;210}211212213// Copies parameters from where they enter the method (either on stack or in a214// linkage register) to their "home location" where the method body will expect215// to find them (either on stack or in a global register).216//217TR::Instruction *J9::X86::PrivateLinkage::copyParametersToHomeLocation(TR::Instruction *cursor, bool parmsHaveBeenStored)218{219TR::Machine *machine = cg()->machine();220TR::RealRegister *framePointer = machine->getRealRegister(TR::RealRegister::vfp);221222TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol();223ListIterator<TR::ParameterSymbol> paramIterator(&(bodySymbol->getParameterList()));224TR::ParameterSymbol *paramCursor;225226const TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg;227TR_ASSERT(noReg == 0, "noReg must be zero so zero-initializing movStatus will work");228229TR::MovStatus movStatus[TR::RealRegister::NumRegisters] = {{(TR::RealRegister::RegNum)0,(TR::RealRegister::RegNum)0,(TR_MovDataTypes)0}};230231// We must always do the stores first, then the reg-reg copies, then the232// loads, so that we never clobber a register we will need later. However,233// the logic is simpler if we do the loads and stores in the same loop.234// Therefore, we maintain a separate instruction cursor for the loads.235//236// We defer the initialization of loadCursor until we generate the first237// load. Otherwise, if we happen to generate some stores first, then the238// store cursor would get ahead of the loadCursor, and the instructions239// would end up in the wrong order despite our efforts.240//241TR::Instruction *loadCursor = NULL;242243// Phase 1: generate RegMem and MemReg movs, and collect information about244// the required RegReg movs.245//246for (paramCursor = paramIterator.getFirst();247paramCursor != NULL;248paramCursor = paramIterator.getNext())249{250int8_t lri = paramCursor->getLinkageRegisterIndex(); // How the parameter enters the method251TR::RealRegister::RegNum ai // Where method body expects to find it252= (TR::RealRegister::RegNum)paramCursor->getAssignedGlobalRegisterIndex();253int32_t offset = paramCursor->getParameterOffset(); // Location of the parameter's stack slot254TR_MovDataTypes movDataType = paramMovType(paramCursor); // What sort of MOV instruction does it need?255256// Copy the parameter to wherever it should be257//258if (lri == NOT_LINKAGE) // It's on the stack259{260if (ai == NOT_ASSIGNED) // It only needs to be on the stack261{262// Nothing to do263}264else // Method body expects it to be in the ai register265{266if (loadCursor == NULL)267loadCursor = cursor;268269if (debug("traceCopyParametersToHomeLocation"))270diagnostic("copyParametersToHomeLocation: Loading %d\n", ai);271// ai := stack272loadCursor = generateRegMemInstruction(273loadCursor,274TR::Linkage::movOpcodes(RegMem, movDataType),275machine->getRealRegister(ai),276generateX86MemoryReference(framePointer, offset, cg()),277cg()278);279}280}281else // It's in a linkage register282{283TR::RealRegister::RegNum sourceIndex = getProperties().getArgumentRegister(lri, isFloat(movDataType));284285// Copy to the stack if necessary286//287if (ai == NOT_ASSIGNED || hasToBeOnStack(paramCursor))288{289if (parmsHaveBeenStored)290{291if (debug("traceCopyParametersToHomeLocation"))292diagnostic("copyParametersToHomeLocation: Skipping store of %d because parmsHaveBeenStored already\n", sourceIndex);293}294else295{296if (debug("traceCopyParametersToHomeLocation"))297diagnostic("copyParametersToHomeLocation: Storing %d\n", sourceIndex);298// stack := lri299cursor = generateMemRegInstruction(300cursor,301TR::Linkage::movOpcodes(MemReg, movDataType),302generateX86MemoryReference(framePointer, offset, cg()),303machine->getRealRegister(sourceIndex),304cg()305);306}307}308309// Copy to the ai register if necessary310//311if (ai != NOT_ASSIGNED && ai != sourceIndex)312{313// This parameter needs a RegReg move. We don't know yet whether314// we need the value in the target register, so for now we just315// remember that we need to do this and keep going.316//317TR_ASSERT(movStatus[ai ].sourceReg == noReg, "Each target reg must have only one source");318TR_ASSERT(movStatus[sourceIndex].targetReg == noReg, "Each source reg must have only one target");319if (debug("traceCopyParametersToHomeLocation"))320diagnostic("copyParametersToHomeLocation: Planning to move %d to %d\n", sourceIndex, ai);321movStatus[ai].sourceReg = sourceIndex;322movStatus[sourceIndex].targetReg = ai;323movStatus[sourceIndex].outgoingDataType = movDataType;324}325326if (debug("traceCopyParametersToHomeLocation") && ai == sourceIndex)327{328diagnostic("copyParametersToHomeLocation: Parameter #%d already in register %d\n", lri, ai);329}330}331}332333// Phase 2: Iterate through the parameters again to insert the RegReg moves.334//335for (paramCursor = paramIterator.getFirst();336paramCursor != NULL;337paramCursor = paramIterator.getNext())338{339if (paramCursor->getLinkageRegisterIndex() == NOT_LINKAGE)340continue;341342const TR::RealRegister::RegNum paramReg =343getProperties().getArgumentRegister(paramCursor->getLinkageRegisterIndex(), isFloat(paramMovType(paramCursor)));344345if (movStatus[paramReg].targetReg == 0)346{347// This parameter does not need to be copied anywhere348if (debug("traceCopyParametersToHomeLocation"))349diagnostic("copyParametersToHomeLocation: Not moving %d\n", paramReg);350}351else352{353if (debug("traceCopyParametersToHomeLocation"))354diagnostic("copyParametersToHomeLocation: Preparing to move %d\n", paramReg);355356// If a mov's target register is the source for another mov, we need357// to do that other mov first. The idea is to find the end point of358// the chain of movs starting with paramReg and ending with a359// register whose current value is not needed; then do that chain of360// movs in reverse order.361//362TR_ASSERT(noReg == 0, "noReg must be zero (not %d) for zero-filled initialization to work", noReg);363364TR::RealRegister::RegNum regCursor;365366// Find the last target in the chain367//368regCursor = movStatus[paramReg].targetReg;369while(movStatus[regCursor].targetReg != noReg)370{371// Haven't found the end yet372regCursor = movStatus[regCursor].targetReg;373TR_ASSERT(regCursor != paramReg, "Can't yet handle cyclic dependencies");374375// TODO:AMD64 Use scratch register to break cycles376//377// A properly-written pickRegister should never378// cause cycles to occur in the first place. However, we may want379// to consider adding cycle-breaking logic so that (1) pickRegister380// has more flexibility, and (2) we're more robust against381// otherwise harmless bugs in pickRegister.382}383384// Work our way backward along the chain, generating all the necessary movs385//386while(movStatus[regCursor].sourceReg != noReg)387{388TR::RealRegister::RegNum source = movStatus[regCursor].sourceReg;389if (debug("traceCopyParametersToHomeLocation"))390diagnostic("copyParametersToHomeLocation: Moving %d to %d\n", source, regCursor);391// regCursor := regCursor.sourceReg392cursor = generateRegRegInstruction(393cursor,394TR::Linkage::movOpcodes(RegReg, movStatus[source].outgoingDataType),395machine->getRealRegister(regCursor),396machine->getRealRegister(source),397cg()398);399// Update movStatus as we go so we don't generate redundant movs400movStatus[regCursor].sourceReg = noReg;401movStatus[source ].targetReg = noReg;402// Continue with the next register in the chain403regCursor = source;404}405}406}407408// Return the last instruction we inserted, whether or not it was a load.409//410return loadCursor? loadCursor : cursor;411}412413static TR::Instruction *initializeLocals(TR::Instruction *cursor,414int32_t lowOffset,415uint32_t count,416int32_t pointerSize,417TR::RealRegister *framePointer,418TR::RealRegister *sourceReg,419TR::RealRegister *loopReg,420TR::CodeGenerator *cg)421{422TR::Compilation *comp = cg->comp();423int32_t offset = lowOffset;424425if (count <= 4)426{427// For a small number, just generate a sequence of stores.428//429for (int32_t i=0; i < count; i++, offset += pointerSize)430{431cursor = new (cg->trHeapMemory()) TR::X86MemRegInstruction(432cursor,433TR::InstOpCode::SMemReg(),434generateX86MemoryReference(framePointer, offset, cg),435sourceReg,436cg);437}438}439else440{441// For a large number, generate a loop.442//443// for (loopReg = count-1; loopReg >= 0; loopReg--)444// framePointer[offset + loopReg * pointerSize] = sourceReg;445//446TR_ASSERT(count > 0, "positive count required for dword RegImm instruction");447448cursor = new (cg->trHeapMemory()) TR::X86RegMemInstruction(449cursor,450TR::InstOpCode::LEARegMem(),451loopReg,452generateX86MemoryReference(sourceReg, count-1, cg),453cg);454455TR::LabelSymbol *loopLabel = generateLabelSymbol(cg);456cursor = new (cg->trHeapMemory()) TR::X86LabelInstruction(cursor, TR::InstOpCode::label, loopLabel, cg);457458cursor = new (cg->trHeapMemory()) TR::X86MemRegInstruction(459cursor,460TR::InstOpCode::SMemReg(),461generateX86MemoryReference(462framePointer,463loopReg,464TR::MemoryReference::convertMultiplierToStride(pointerSize),465offset,466cg),467sourceReg,468cg);469470cursor = new (cg->trHeapMemory()) TR::X86RegImmInstruction(cursor, TR::InstOpCode::SUB4RegImms, loopReg, 1, cg);471cursor = new (cg->trHeapMemory()) TR::X86LabelInstruction(cursor, TR::InstOpCode::JAE4, loopLabel, cg);472}473474return cursor;475}476477478#define STACKCHECKBUFFER 512479480void J9::X86::PrivateLinkage::createPrologue(TR::Instruction *cursor)481{482#if defined(DEBUG)483// TODO:AMD64: Get this into the debug DLL484485class TR_DebugFrameSegmentInfo486{487private:488489TR_DebugFrameSegmentInfo *_next;490const char *_description;491TR::RealRegister *_register;492int32_t _lowOffset;493uint8_t _size;494TR::Compilation * _comp;495496public:497498TR_ALLOC(TR_Memory::CodeGenerator)499500TR_DebugFrameSegmentInfo(501TR::Compilation * c,502int32_t lowOffset,503uint8_t size,504const char *description,505TR_DebugFrameSegmentInfo *next,506TR::RealRegister *reg=NULL507):508_comp(c),509_next(next),510_description(description),511_register(reg),512_lowOffset(lowOffset),513_size(size)514{}515516TR::Compilation * comp() { return _comp; }517518TR_DebugFrameSegmentInfo *getNext(){ return _next; }519520TR_DebugFrameSegmentInfo *sort()521{522TR_DebugFrameSegmentInfo *result;523TR_DebugFrameSegmentInfo *tail = _next? _next->sort() : NULL;524TR_DebugFrameSegmentInfo *before=NULL, *after;525for (after = tail; after; before=after, after=after->_next)526{527if (after->_lowOffset > _lowOffset)528break;529}530_next = after;531if (before)532{533before->_next = this;534result = tail;535}536else537{538result = this;539}540return result;541}542543void print(TR_Debug *debug)544{545if (_next)546_next->print(debug);547if (_size > 0)548{549diagnostic(" % 4d: % 4d -> % 4d (% 4d) %5.5s %s\n",550_lowOffset, _lowOffset, _lowOffset + _size - 1, _size,551_register? debug->getName(_register, TR_DoubleWordReg) : "",552_description553);554}555else556{557diagnostic(" % 4d: % 4d -> ---- (% 4d) %5.5s %s\n",558_lowOffset, _lowOffset, _size,559_register? debug->getName(_register, TR_DoubleWordReg) : "",560_description561);562}563}564565};566567TR_DebugFrameSegmentInfo *debugFrameSlotInfo=NULL;568#endif569bool trace = comp()->getOption(TR_TraceCG);570571TR::RealRegister *espReal = machine()->getRealRegister(TR::RealRegister::esp);572TR::RealRegister *scratchReg = machine()->getRealRegister(getProperties().getIntegerScratchRegister(0));573TR::RealRegister *metaDataReg = machine()->getRealRegister(getProperties().getMethodMetaDataRegister());574575TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol();576ListIterator<TR::ParameterSymbol> paramIterator(&(bodySymbol->getParameterList()));577TR::ParameterSymbol *paramCursor;578579const TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg;580const TR::X86LinkageProperties &properties = getProperties();581582const uint32_t outgoingArgSize = cg()->getLargestOutgoingArgSize();583584// We will set this to zero after generating the first instruction (and thus585// satisfying the size constraint).586uint8_t minInstructionSize = getMinimumFirstInstructionSize();587588// Entry breakpoint589//590if (comp()->getOption(TR_EntryBreakPoints))591{592if (minInstructionSize > 0)593{594// We don't want the breakpoint to get patched, so generate a sacrificial no-op595//596cursor = new (trHeapMemory()) TR::X86PaddingInstruction(cursor, minInstructionSize, TR_AtomicNoOpPadding, cg());597}598cursor = new (trHeapMemory()) TR::Instruction(TR::InstOpCode::INT3, cursor, cg());599}600601// Compute the nature of the preserved regs602//603uint32_t preservedRegsSize = 0;604uint32_t registerSaveDescription = 0; // bit N corresponds to real reg N, with 1=preserved605606// Preserved register index607for (int32_t pindex = 0; pindex < properties.getMaxRegistersPreservedInPrologue(); pindex++)608{609TR::RealRegister *reg = machine()->getRealRegister(properties.getPreservedRegister((uint32_t)pindex));610if (reg->getHasBeenAssignedInMethod() && reg->getState() != TR::RealRegister::Locked)611{612preservedRegsSize += properties.getPointerSize();613registerSaveDescription |= reg->getRealRegisterMask();614}615}616617cg()->setRegisterSaveDescription(registerSaveDescription);618619// Compute frame size620//621// allocSize: bytes to be subtracted from the stack pointer when allocating the frame622// peakSize: maximum bytes of stack this method might consume before encountering another stack check623//624const int32_t localSize = _properties.getOffsetToFirstLocal() - bodySymbol->getLocalMappingCursor();625TR_ASSERT(localSize >= 0, "assertion failure");626627// Note that the return address doesn't appear here because it is allocated by the call instruction628//629{630int32_t frameSize = localSize + preservedRegsSize + ( _properties.getReservesOutgoingArgsInPrologue()? outgoingArgSize : 0 );631uint32_t stackSize = frameSize + _properties.getRetAddressWidth();632uint32_t adjust = OMR::align(stackSize, _properties.getOutgoingArgAlignment()) - stackSize;633cg()->setStackFramePaddingSizeInBytes(adjust);634cg()->setFrameSizeInBytes(frameSize + adjust);635if (trace)636traceMsg(comp(),637"Stack size was %d, and is adjusted by +%d (alignment %d, return address width %d)\n",638stackSize,639cg()->getStackFramePaddingSizeInBytes(),640_properties.getOutgoingArgAlignment(),641_properties.getRetAddressWidth());642}643auto allocSize = cg()->getFrameSizeInBytes();644645// Here we conservatively assume there is a call in this method that will require space for its return address646const int32_t peakSize = allocSize + _properties.getPointerSize();647648bool doOverflowCheck = !comp()->isDLT();649650// Small: entire stack usage fits in STACKCHECKBUFFER, so if sp is within651// the soft limit before buying the frame, then the whole frame will fit652// within the hard limit.653//654// Medium: the additional stack required after bumping the sp fits in655// STACKCHECKBUFFER, so if sp after the bump is within the soft limit, the656// whole frame will fit within the hard limit.657//658// Large: No shortcuts. Calculate the maximum extent of stack needed and659// compare that against the soft limit. (We have to use the soft limit here660// if for no other reason than that's the one used for asyncchecks.)661//662const bool frameIsSmall = peakSize < STACKCHECKBUFFER;663const bool frameIsMedium = !frameIsSmall;664665if (trace)666{667traceMsg(comp(), "\nFrame size: %c%c locals=%d frame=%d peak=%d\n",668frameIsSmall? 'S':'-', frameIsMedium? 'M':'-',669localSize, cg()->getFrameSizeInBytes(), peakSize);670}671672#if defined(DEBUG)673for (674paramCursor = paramIterator.getFirst();675paramCursor != NULL;676paramCursor = paramIterator.getNext()677){678TR::RealRegister::RegNum ai = (TR::RealRegister::RegNum)paramCursor->getAssignedGlobalRegisterIndex();679debugFrameSlotInfo = new (trHeapMemory()) TR_DebugFrameSegmentInfo(comp(),680paramCursor->getOffset(), paramCursor->getSize(), "Parameter",681debugFrameSlotInfo,682(ai==NOT_ASSIGNED)? NULL : machine()->getRealRegister(ai)683);684}685686ListIterator<TR::AutomaticSymbol> autoIterator(&bodySymbol->getAutomaticList());687TR::AutomaticSymbol *autoCursor;688for (689autoCursor = autoIterator.getFirst();690autoCursor != NULL;691autoCursor = autoIterator.getNext()692){693debugFrameSlotInfo = new (trHeapMemory()) TR_DebugFrameSegmentInfo(comp(),694autoCursor->getOffset(), autoCursor->getSize(), "Local",695debugFrameSlotInfo696);697}698699debugFrameSlotInfo = new (trHeapMemory()) TR_DebugFrameSegmentInfo(comp(),7000, getProperties().getPointerSize(), "Return address",701debugFrameSlotInfo702);703#endif704705// Set the VFP state for the TR::InstOpCode::proc instruction706//707if (_properties.getAlwaysDedicateFramePointerRegister())708{709cg()->initializeVFPState(getProperties().getFramePointerRegister(), 0);710}711else712{713cg()->initializeVFPState(TR::RealRegister::esp, 0);714}715716// In FSD, we must save linkage regs to the incoming argument area because717// the stack overflow check doesn't preserve them.718//719bool parmsHaveBeenBackSpilled = false;720if (comp()->getOption(TR_FullSpeedDebug))721{722cursor = movLinkageRegisters(cursor, true);723parmsHaveBeenBackSpilled = true;724}725726// Allocating the frame "speculatively" means bumping the stack pointer before checking for overflow727//728TR::GCStackAtlas *atlas = cg()->getStackAtlas();729bool doAllocateFrameSpeculatively = false;730if (metaDataReg)731{732// Generate stack overflow check733doAllocateFrameSpeculatively = frameIsMedium;734735if (doAllocateFrameSpeculatively)736{737// Subtract allocSize from esp before stack overflow check738739TR_ASSERT(minInstructionSize <= 5, "Can't guarantee SUB instruction will be at least %d bytes", minInstructionSize);740TR_ASSERT(allocSize >= 1, "When allocSize >= 1, the frame should be small or large, but never medium");741742const TR::InstOpCode::Mnemonic subOp = (allocSize <= 127 && getMinimumFirstInstructionSize() <= 3)? TR::InstOpCode::SUBRegImms() : TR::InstOpCode::SUBRegImm4();743cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, subOp, espReal, allocSize, cg());744745minInstructionSize = 0; // The SUB satisfies the constraint746}747748TR::Instruction* jitOverflowCheck = NULL;749if (doOverflowCheck)750{751TR::X86VFPSaveInstruction* vfp = generateVFPSaveInstruction(cursor, cg());752cursor = generateStackOverflowCheckInstruction(vfp, TR::InstOpCode::CMPRegMem(), espReal, generateX86MemoryReference(metaDataReg, cg()->getStackLimitOffset(), cg()), cg());753754TR::LabelSymbol* begLabel = generateLabelSymbol(cg());755TR::LabelSymbol* endLabel = generateLabelSymbol(cg());756TR::LabelSymbol* checkLabel = generateLabelSymbol(cg());757begLabel->setStartInternalControlFlow();758endLabel->setEndInternalControlFlow();759checkLabel->setStartOfColdInstructionStream();760761cursor = generateLabelInstruction(cursor, TR::InstOpCode::label, begLabel, cg());762cursor = generateLabelInstruction(cursor, TR::InstOpCode::JBE4, checkLabel, cg());763cursor = generateLabelInstruction(cursor, TR::InstOpCode::label, endLabel, cg());764765// At this point, cg()->getAppendInstruction() is already in the cold code section.766generateVFPRestoreInstruction(vfp, cursor->getNode(), cg());767generateLabelInstruction(TR::InstOpCode::label, cursor->getNode(), checkLabel, cg());768generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, cursor->getNode(), machine()->getRealRegister(TR::RealRegister::edi), allocSize, cg());769if (doAllocateFrameSpeculatively)770{771generateRegImmInstruction(TR::InstOpCode::ADDRegImm4(), cursor->getNode(), espReal, allocSize, cg());772}773TR::SymbolReference* helper = comp()->getSymRefTab()->findOrCreateStackOverflowSymbolRef(NULL);774jitOverflowCheck = generateImmSymInstruction(TR::InstOpCode::CALLImm4, cursor->getNode(), (uintptr_t)helper->getMethodAddress(), helper, cg());775jitOverflowCheck->setNeedsGCMap(0xFF00FFFF);776if (doAllocateFrameSpeculatively)777{778generateRegImmInstruction(TR::InstOpCode::SUBRegImm4(), cursor->getNode(), espReal, allocSize, cg());779}780generateLabelInstruction(TR::InstOpCode::JMP4, cursor->getNode(), endLabel, cg());781}782783if (cg()->canEmitBreakOnDFSet())784cursor = generateBreakOnDFSet(cg(), cursor);785786if (atlas)787{788uint32_t numberOfParmSlots = atlas->getNumberOfParmSlotsMapped();789TR_GCStackMap *map;790if (_properties.getNumIntegerArgumentRegisters() == 0)791{792map = atlas->getParameterMap();793}794else795{796map = new (trHeapMemory(), numberOfParmSlots) TR_GCStackMap(numberOfParmSlots);797map->copy(atlas->getParameterMap());798799// Before this point, the parameter stack considers all parms to be on the stack.800// Fix it to have register parameters in registers.801//802TR::ParameterSymbol *paramCursor = paramIterator.getFirst();803804for (805paramCursor = paramIterator.getFirst();806paramCursor != NULL;807paramCursor = paramIterator.getNext()808){809int32_t intRegArgIndex = paramCursor->getLinkageRegisterIndex();810if (intRegArgIndex >= 0 &&811paramCursor->isReferencedParameter() &&812paramCursor->isCollectedReference())813{814// In FSD, the register parameters have already been backspilled.815// They exist in both registers and on the stack.816//817if (!parmsHaveBeenBackSpilled)818map->resetBit(paramCursor->getGCMapIndex());819820map->setRegisterBits(TR::RealRegister::gprMask((getProperties().getIntegerArgumentRegister(intRegArgIndex))));821}822}823}824825if (jitOverflowCheck)826jitOverflowCheck->setGCMap(map);827828atlas->setParameterMap(map);829}830}831832bodySymbol->setProloguePushSlots(preservedRegsSize / properties.getPointerSize());833834// Allocate the stack frame835//836if (allocSize == 0)837{838// No need to do anything839}840else if (!doAllocateFrameSpeculatively)841{842TR_ASSERT(minInstructionSize <= 5, "Can't guarantee SUB instruction will be at least %d bytes", minInstructionSize);843const TR::InstOpCode::Mnemonic subOp = (allocSize <= 127 && getMinimumFirstInstructionSize() <= 3)? TR::InstOpCode::SUBRegImms() : TR::InstOpCode::SUBRegImm4();844cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, subOp, espReal, allocSize, cg());845}846847//Support to paint allocated frame slots.848//849if (( comp()->getOption(TR_PaintAllocatedFrameSlotsDead) || comp()->getOption(TR_PaintAllocatedFrameSlotsFauxObject) ) && allocSize!=0)850{851uint32_t paintValue32 = 0;852uint64_t paintValue64 = 0;853854TR::RealRegister *paintReg = NULL;855TR::RealRegister *frameSlotIndexReg = machine()->getRealRegister(TR::RealRegister::edi);856uint32_t paintBound = 0;857uint32_t paintSlotsOffset = 0;858uint32_t paintSize = allocSize-sizeof(uintptr_t);859860//Paint the slots with deadf00d861//862if (comp()->getOption(TR_PaintAllocatedFrameSlotsDead))863{864if (comp()->target().is64Bit())865paintValue64 = (uint64_t)CONSTANT64(0xdeadf00ddeadf00d);866else867paintValue32 = 0xdeadf00d;868}869//Paint stack slots with a arbitrary object aligned address.870//871else872{873if (comp()->target().is64Bit())874{875paintValue64 = ((uintptr_t) ((uintptr_t)comp()->getOptions()->getHeapBase() + (uintptr_t) 4096));876}877else878{879paintValue32 = ((uintptr_t) ((uintptr_t)comp()->getOptions()->getHeapBase() + (uintptr_t) 4096));880}881}882883TR::LabelSymbol *startLabel = generateLabelSymbol(cg());884885//Load the 64 bit paint value into a paint reg.886#ifdef TR_TARGET_64BIT887paintReg = machine()->getRealRegister(TR::RealRegister::r8);888cursor = new (trHeapMemory()) TR::AMD64RegImm64Instruction(cursor, TR::InstOpCode::MOV8RegImm64, paintReg, paintValue64, cg());889#endif890891//Perform the paint.892//893cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, TR::InstOpCode::MOVRegImm4(), frameSlotIndexReg, paintSize, cg());894cursor = new (trHeapMemory()) TR::X86LabelInstruction(cursor, TR::InstOpCode::label, startLabel, cg());895if (comp()->target().is64Bit())896cursor = new (trHeapMemory()) TR::X86MemRegInstruction(cursor, TR::InstOpCode::S8MemReg, generateX86MemoryReference(espReal, frameSlotIndexReg, 0,(uint8_t) paintSlotsOffset, cg()), paintReg, cg());897else898cursor = new (trHeapMemory()) TR::X86MemImmInstruction(cursor, TR::InstOpCode::SMemImm4(), generateX86MemoryReference(espReal, frameSlotIndexReg, 0,(uint8_t) paintSlotsOffset, cg()), paintValue32, cg());899cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, TR::InstOpCode::SUBRegImms(), frameSlotIndexReg, sizeof(intptr_t),cg());900cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, TR::InstOpCode::CMPRegImm4(), frameSlotIndexReg, paintBound, cg());901cursor = new (trHeapMemory()) TR::X86LabelInstruction(cursor, TR::InstOpCode::JGE4, startLabel,cg());902}903904// Save preserved regs905//906cursor = savePreservedRegisters(cursor);907908// Insert some counters909//910cursor = cg()->generateDebugCounter(cursor, "cg.prologues:#preserved", preservedRegsSize >> getProperties().getParmSlotShift(), TR::DebugCounter::Expensive);911cursor = cg()->generateDebugCounter(cursor, "cg.prologues:inline", 1, TR::DebugCounter::Expensive);912913// Initialize any local pointers that could otherwise confuse the GC.914//915TR::RealRegister *framePointer = machine()->getRealRegister(TR::RealRegister::vfp);916if (atlas)917{918TR_ASSERT(_properties.getNumScratchRegisters() >= 2, "Need two scratch registers to initialize reference locals");919TR::RealRegister *loopReg = machine()->getRealRegister(properties.getIntegerScratchRegister(1));920921int32_t numReferenceLocalSlotsToInitialize = atlas->getNumberOfSlotsToBeInitialized();922int32_t numInternalPointerSlotsToInitialize = 0;923924if (atlas->getInternalPointerMap())925{926numInternalPointerSlotsToInitialize = atlas->getNumberOfDistinctPinningArrays() +927atlas->getInternalPointerMap()->getNumInternalPointers();928}929930if (numReferenceLocalSlotsToInitialize > 0 || numInternalPointerSlotsToInitialize > 0)931{932cursor = new (trHeapMemory()) TR::X86RegRegInstruction(cursor, TR::InstOpCode::XORRegReg(), scratchReg, scratchReg, cg());933934// Initialize locals that are live on entry935//936if (numReferenceLocalSlotsToInitialize > 0)937{938cursor = initializeLocals(939cursor,940atlas->getLocalBaseOffset(),941numReferenceLocalSlotsToInitialize,942properties.getPointerSize(),943framePointer, scratchReg, loopReg,944cg());945946#if defined(DEBUG)947debugFrameSlotInfo = new (trHeapMemory()) TR_DebugFrameSegmentInfo(comp(),948atlas->getLocalBaseOffset(),949numReferenceLocalSlotsToInitialize * properties.getPointerSize(), "Initialized live vars",950debugFrameSlotInfo);951#endif952}953954// Initialize internal pointers and their pinning arrays955//956if (numInternalPointerSlotsToInitialize > 0)957{958cursor = initializeLocals(959cursor,960atlas->getOffsetOfFirstInternalPointer(),961numInternalPointerSlotsToInitialize,962properties.getPointerSize(),963framePointer, scratchReg, loopReg,964cg());965966#if defined(DEBUG)967debugFrameSlotInfo = new (trHeapMemory()) TR_DebugFrameSegmentInfo(comp(),968atlas->getOffsetOfFirstInternalPointer(),969numInternalPointerSlotsToInitialize * properties.getPointerSize(),970"Initialized internal pointers",971debugFrameSlotInfo);972#endif973}974}975}976977#if defined(DEBUG)978debugFrameSlotInfo = new (trHeapMemory()) TR_DebugFrameSegmentInfo(comp(),979-localSize - preservedRegsSize - outgoingArgSize,980outgoingArgSize, "Outgoing args",981debugFrameSlotInfo982);983#endif984985// Move parameters to where the method body will expect to find them986// TODO: If we separate the stores from the reg moves, we could do the stores987// before buying the stack frame, thereby using tiny offsets and thus smaller988// instructions.989//990cursor = copyParametersToHomeLocation(cursor, parmsHaveBeenBackSpilled);991992cursor = cg()->generateDebugCounter(cursor, "cg.prologues", 1, TR::DebugCounter::Expensive);993cursor = cg()->generateDebugCounter(cursor, "cg.prologues:#allocBytes", allocSize, TR::DebugCounter::Expensive);994cursor = cg()->generateDebugCounter(cursor, "cg.prologues:#localBytes", localSize, TR::DebugCounter::Expensive);995cursor = cg()->generateDebugCounter(cursor, "cg.prologues:#frameBytes", cg()->getFrameSizeInBytes(), TR::DebugCounter::Expensive);996cursor = cg()->generateDebugCounter(cursor, "cg.prologues:#peakBytes", peakSize, TR::DebugCounter::Expensive);997998#if defined(DEBUG)999if (comp()->getOption(TR_TraceCG))1000{1001diagnostic("\nFrame layout:\n");1002diagnostic(" +rsp +vfp end size what\n");1003debugFrameSlotInfo->sort()->print(cg()->getDebug());1004diagnostic("\n");1005}1006#endif1007}10081009bool J9::X86::PrivateLinkage::needsFrameDeallocation()1010{1011// frame needs a deallocation if FrameSize == 01012//1013return !_properties.getAlwaysDedicateFramePointerRegister() && cg()->getFrameSizeInBytes() == 0;1014}10151016TR::Instruction *J9::X86::PrivateLinkage::deallocateFrameIfNeeded(TR::Instruction *cursor, int32_t size)1017{1018return cursor;1019}102010211022void J9::X86::PrivateLinkage::createEpilogue(TR::Instruction *cursor)1023{1024if (cg()->canEmitBreakOnDFSet())1025cursor = generateBreakOnDFSet(cg(), cursor);10261027TR::RealRegister* espReal = machine()->getRealRegister(TR::RealRegister::esp);10281029cursor = cg()->generateDebugCounter(cursor, "cg.epilogues", 1, TR::DebugCounter::Expensive);10301031// Restore preserved regs1032//1033cursor = restorePreservedRegisters(cursor);10341035// Deallocate the stack frame1036//1037if (_properties.getAlwaysDedicateFramePointerRegister())1038{1039// Restore stack pointer from frame pointer1040//1041cursor = generateRegRegInstruction(cursor, TR::InstOpCode::MOVRegReg(), espReal, machine()->getRealRegister(_properties.getFramePointerRegister()), cg());1042cursor = generateRegInstruction(cursor, TR::InstOpCode::POPReg, machine()->getRealRegister(_properties.getFramePointerRegister()), cg());1043}1044else1045{1046auto frameSize = cg()->getFrameSizeInBytes();1047if (frameSize != 0)1048{1049cursor = generateRegImmInstruction(cursor, (frameSize <= 127) ? TR::InstOpCode::ADDRegImms() : TR::InstOpCode::ADDRegImm4(), espReal, frameSize, cg());1050}1051}10521053if (cursor->getNext()->getOpCodeValue() == TR::InstOpCode::RETImm2)1054{1055toIA32ImmInstruction(cursor->getNext())->setSourceImmediate(comp()->getJittedMethodSymbol()->getNumParameterSlots() << getProperties().getParmSlotShift());1056}1057}10581059TR::Register *1060J9::X86::PrivateLinkage::buildDirectDispatch(1061TR::Node *callNode,1062bool spillFPRegs)1063{1064TR::StackMemoryRegion stackMemoryRegion(*comp()->trMemory());10651066TR::SymbolReference *methodSymRef = callNode->getSymbolReference();1067TR::MethodSymbol *methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol();10681069TR::X86CallSite site(callNode, this);10701071// Add the int3 instruction if breakOnThrow is set on this user defined exception1072//1073TR::SimpleRegex *r = comp()->getOptions()->getBreakOnThrow();1074if (r && callNode && callNode->getOpCode().hasSymbolReference() &&1075comp()->getSymRefTab()->findOrCreateAThrowSymbolRef(comp()->getMethodSymbol())==callNode->getSymbolReference() &&1076callNode->getNumChildren()>=1 && callNode->getFirstChild()->getNumChildren()>=1 &&1077callNode->getFirstChild()->getFirstChild()->getOpCode().hasSymbolReference() &&1078callNode->getFirstChild()->getFirstChild()->getSymbolReference()->getSymbol()->isStatic() &&1079callNode->getFirstChild()->getFirstChild()->getSymbolReference()->getCPIndex() >= 0 &&1080callNode->getFirstChild()->getFirstChild()->getSymbolReference()->getSymbol()->castToStaticSymbol()->isClassObject() &&1081!callNode->getFirstChild()->getFirstChild()->getSymbolReference()->getSymbol()->castToStaticSymbol()->addressIsCPIndexOfStatic())1082{1083uint32_t len;1084TR_ResolvedMethod * method =1085callNode->getFirstChild()->getFirstChild()->getSymbolReference()->getOwningMethod(comp());1086int32_t cpIndex = callNode->getFirstChild()->getFirstChild()->getSymbolReference()->getCPIndex();1087char * name = method->getClassNameFromConstantPool(cpIndex, len);1088if (name)1089{1090if (TR::SimpleRegex::matchIgnoringLocale(r, name))1091{1092generateInstruction(TR::InstOpCode::INT3, callNode, cg());1093}1094}1095}10961097// Build arguments and initially populate regdeps1098//1099buildCallArguments(site);11001101// Remember where internal control flow region should start,1102// and create labels1103//1104TR::Instruction *startBookmark = cg()->getAppendInstruction();1105TR::LabelSymbol *startLabel = generateLabelSymbol(cg());1106TR::LabelSymbol *doneLabel = generateLabelSymbol(cg());1107startLabel->setStartInternalControlFlow();1108doneLabel->setEndInternalControlFlow();11091110buildDirectCall(callNode->getSymbolReference(), site);11111112// Construct postconditions1113//1114TR::Register *returnRegister = buildCallPostconditions(site);1115site.stopAddingConditions();11161117// Create the internal control flow region and VFP adjustment1118//1119generateLabelInstruction(startBookmark, TR::InstOpCode::label, startLabel, site.getPreConditionsUnderConstruction(), cg());1120if (getProperties().getCallerCleanup())1121{1122// TODO: Caller must clean up1123}1124else if (callNode->getSymbol()->castToMethodSymbol()->isHelper() && getProperties().getUsesRegsForHelperArgs())1125{1126// No cleanup needed for helpers if args are passed in registers1127}1128else1129{1130generateVFPCallCleanupInstruction(-site.getArgSize(), callNode, cg());1131}1132generateLabelInstruction(TR::InstOpCode::label, callNode, doneLabel, site.getPostConditionsUnderConstruction(), cg());11331134// Stop using the killed registers that are not going to persist1135//1136stopUsingKilledRegisters(site.getPostConditionsUnderConstruction(), returnRegister);11371138if (callNode->getType().isFloatingPoint())1139{1140static char *forceX87LinkageForSSE = feGetEnv("TR_ForceX87LinkageForSSE");1141if (callNode->getReferenceCount() == 1 && returnRegister->getKind() == TR_X87)1142{1143// If the method returns a floating-point value that is not used, insert a1144// dummy store to eventually pop the value from the floating-point stack.1145//1146generateFPSTiST0RegRegInstruction(TR::InstOpCode::FSTRegReg, callNode, returnRegister, returnRegister, cg());1147}1148else if (forceX87LinkageForSSE && returnRegister->getKind() == TR_FPR)1149{1150// If the caller expects the return value in an XMMR, insert a1151// transfer from the floating-point stack to the XMMR via memory.1152//1153coerceFPReturnValueToXMMR(callNode, site.getPostConditionsUnderConstruction(), site.getMethodSymbol(), returnRegister);1154}1155}11561157if (cg()->enableRegisterAssociations() && !callNode->getSymbol()->castToMethodSymbol()->preservesAllRegisters())1158associatePreservedRegisters(site.getPostConditionsUnderConstruction(), returnRegister);11591160return returnRegister;1161}116211631164TR::X86CallSite::X86CallSite(TR::Node *callNode, TR::Linkage *calleeLinkage)1165:_callNode(callNode)1166,_linkage(calleeLinkage)1167,_vftImplicitExceptionPoint(NULL)1168,_firstPICSlotInstruction(NULL)1169,_profiledTargets(NULL)1170,_interfaceClassOfMethod(NULL)1171,_argSize(-1)1172,_preservedRegisterMask(0)1173,_thunkAddress(NULL)1174,_useLastITableCache(false)1175{1176TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());1177if (getMethodSymbol()->isInterface())1178{1179// Find the class pointer to the interface class if it is already loaded.1180// This is needed by both static PICs1181//1182TR::Method *interfaceMethod = getMethodSymbol()->getMethod();1183int32_t len = interfaceMethod->classNameLength();1184char * s = TR::Compiler->cls.classNameToSignature(interfaceMethod->classNameChars(), len, comp());1185_interfaceClassOfMethod = fej9->getClassFromSignature(s, len, getSymbolReference()->getOwningMethod(comp()));1186}11871188setupVirtualGuardInfo();1189computeProfiledTargets();11901191// Initialize the register dependencies with conservative estimates of the1192// number of conditions1193//1194uint32_t numPreconditions =1195calleeLinkage->getProperties().getNumIntegerArgumentRegisters()1196+ calleeLinkage->getProperties().getNumFloatArgumentRegisters()1197+ 3; // VM Thread + eax + possible vtableIndex/J9Method arg on IA3211981199uint32_t numPostconditions =1200calleeLinkage->getProperties().getNumberOfVolatileGPRegisters()1201+ calleeLinkage->getProperties().getNumberOfVolatileXMMRegisters()1202+ 3; // return reg + VM Thread + scratch12031204_preConditionsUnderConstruction = generateRegisterDependencyConditions(numPreconditions, 0, cg());1205_postConditionsUnderConstruction = generateRegisterDependencyConditions((COPY_PRECONDITIONS_TO_POSTCONDITIONS? numPreconditions : 0), numPostconditions + (COPY_PRECONDITIONS_TO_POSTCONDITIONS? numPreconditions : 0), cg());120612071208_preservedRegisterMask = getLinkage()->getProperties().getPreservedRegisterMapForGC();1209if (getMethodSymbol()->preservesAllRegisters())1210{1211_preservedRegisterMask |= TR::RealRegister::getAvailableRegistersMask(TR_GPR);1212if (callNode->getDataType() != TR::NoType)1213{1214// Cross our fingers and hope things that preserve all regs only return ints1215_preservedRegisterMask &= ~TR::RealRegister::gprMask(getLinkage()->getProperties().getIntegerReturnRegister());1216}1217}12181219}12201221void TR::X86CallSite::setupVirtualGuardInfo()1222{1223TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());1224_virtualGuardKind = TR_NoGuard;1225_devirtualizedMethod = NULL;1226_devirtualizedMethodSymRef = NULL;12271228if (getMethodSymbol()->isVirtual() && _callNode->getOpCode().isIndirect())1229{1230TR_ResolvedMethod *resolvedMethod = getResolvedMethod();1231if (resolvedMethod &&1232(!getMethodSymbol()->isVMInternalNative() || !comp()->getOption(TR_FullSpeedDebug)) &&1233!_callNode->isTheVirtualCallNodeForAGuardedInlinedCall())1234{1235if (!resolvedMethod->virtualMethodIsOverridden() &&1236!resolvedMethod->isAbstract())1237{1238_virtualGuardKind = TR_NonoverriddenGuard;1239_devirtualizedMethod = resolvedMethod;1240_devirtualizedMethodSymRef = getSymbolReference();1241}1242else1243{1244TR_OpaqueClassBlock *thisClass = resolvedMethod->containingClass();1245TR_DevirtualizedCallInfo *devirtualizedCallInfo = comp()->findDevirtualizedCall(_callNode);1246TR_OpaqueClassBlock *refinedThisClass = NULL;12471248if (devirtualizedCallInfo)1249{1250refinedThisClass = devirtualizedCallInfo->_thisType;12511252if (refinedThisClass)1253thisClass = refinedThisClass;1254}12551256TR::SymbolReference *methodSymRef = getSymbolReference();1257TR_PersistentCHTable * chTable = comp()->getPersistentInfo()->getPersistentCHTable();1258/* Devirtualization is not currently supported for AOT compilations */1259if (thisClass && TR::Compiler->cls.isAbstractClass(comp(), thisClass) && !comp()->compileRelocatableCode())1260{1261TR_ResolvedMethod * method = chTable->findSingleAbstractImplementer(thisClass, methodSymRef->getOffset(), methodSymRef->getOwningMethod(comp()), comp());1262if (method &&1263(comp()->isRecursiveMethodTarget(method) ||1264!method->isInterpreted() ||1265method->isJITInternalNative()))1266{1267_virtualGuardKind = TR_AbstractGuard;1268_devirtualizedMethod = method;1269}1270}1271else if (refinedThisClass &&1272!chTable->isOverriddenInThisHierarchy(resolvedMethod, refinedThisClass, methodSymRef->getOffset(), comp()))1273{1274if (resolvedMethod->virtualMethodIsOverridden())1275{1276TR_ResolvedMethod *calleeMethod = methodSymRef->getOwningMethod(comp())->getResolvedVirtualMethod(comp(), refinedThisClass, methodSymRef->getOffset());1277if (calleeMethod &&1278(comp()->isRecursiveMethodTarget(calleeMethod) ||1279!calleeMethod->isInterpreted() ||1280calleeMethod->isJITInternalNative()))1281{1282_virtualGuardKind = TR_HierarchyGuard;1283_devirtualizedMethod = calleeMethod;1284}1285}1286}1287}12881289if (_devirtualizedMethod != NULL && _devirtualizedMethodSymRef == NULL)1290_devirtualizedMethodSymRef = comp()->getSymRefTab()->findOrCreateMethodSymbol(1291getSymbolReference()->getOwningMethodIndex(), -1, _devirtualizedMethod, TR::MethodSymbol::Virtual);1292}1293}12941295// Some self-consistency conditions1296TR_ASSERT((_virtualGuardKind == TR_NoGuard) == (_devirtualizedMethod == NULL), "Virtual guard requires _devirtualizedMethod");1297TR_ASSERT((_devirtualizedMethod == NULL) == (_devirtualizedMethodSymRef == NULL), "_devirtualizedMethod requires _devirtualizedMethodSymRef");1298}12991300void TR::X86CallSite::computeProfiledTargets()1301{1302TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());13031304if (cg()->profiledPointersRequireRelocation())1305// bail until create appropriate relocations to validate profiled targets1306return;13071308// Use static PICs for guarded calls as well1309//13101311_profiledTargets = new(comp()->trStackMemory()) TR_ScratchList<TR::X86PICSlot>(comp()->trMemory());13121313TR::SymbolReference *methodSymRef = getSymbolReference();1314TR::Node *callNode = getCallNode();13151316// TODO: Note the different logic for virtual and interface calls. Is this necessary?1317//13181319if (getMethodSymbol()->isVirtual() && !callNode->getSymbolReference()->isUnresolved() &&1320(callNode->getSymbolReference() != comp()->getSymRefTab()->findObjectNewInstanceImplSymbol()) &&1321callNode->getOpCode().isIndirect())1322{1323if (!comp()->getOption(TR_DisableInterpreterProfiling) &&1324TR_ValueProfileInfoManager::get(comp()))1325{1326TR::Node *callNode = getCallNode();1327TR_AddressInfo *valueInfo = static_cast<TR_AddressInfo*>(TR_ValueProfileInfoManager::getProfiledValueInfo(callNode, comp(), AddressInfo));13281329// PMR 05447,379,000 getTopValue may return array length profile data instead of a class pointer1330// (when the virtual call feeds an arraycopy method length parameter). We need to defend this case to1331// avoid attempting to use the length as a pointer, so use asAddressInfo() to gate assignment of topValue.1332uintptr_t topValue = (valueInfo) ? valueInfo->getTopValue() : 0;13331334// if the call to hashcode is a virtual call node, the top value was already inlined.1335if (callNode->isTheVirtualCallNodeForAGuardedInlinedCall())1336topValue = 0;13371338// Is the topValue valid?1339if (topValue)1340{1341if (valueInfo->getTopProbability() < getMinProfiledCallFrequency() ||1342comp()->getPersistentInfo()->isObsoleteClass((void*)topValue, fej9))1343{1344topValue = 0;1345}1346else1347{1348//printf("Checking is instanceof for top %p for %s\n", topValue, methodSymRef->getSymbol()->getResolvedMethodSymbol()->getResolvedMethod()->signature(comp()->trMemory())); fflush(stdout);1349TR_OpaqueClassBlock *callSiteMethodClass = methodSymRef->getSymbol()->getResolvedMethodSymbol()->getResolvedMethod()->classOfMethod();1350if (!cg()->isProfiledClassAndCallSiteCompatible((TR_OpaqueClassBlock *)topValue, callSiteMethodClass))1351{1352topValue = 0;1353}1354}1355}13561357if (!topValue && !callNode->getSymbolReference()->isUnresolved() &&1358(callNode->getSymbol()->castToMethodSymbol()->getRecognizedMethod() == TR::java_lang_Object_clone))1359topValue = (uintptr_t) comp()->getObjectClassPointer();13601361if (topValue)1362{13631364TR_ResolvedMethod *profiledVirtualMethod = callNode->getSymbolReference()->getOwningMethod(comp())->getResolvedVirtualMethod(comp(),1365(TR_OpaqueClassBlock *)topValue, methodSymRef->getOffset());1366if (profiledVirtualMethod &&1367(!profiledVirtualMethod->isInterpreted() ||1368profiledVirtualMethod->isJITInternalNative()))1369{1370//if (!getMethodSymbol()->isInterface() && profiledVirtualMethod->isJITInternalNative())1371//printf("New opportunity in %s to callee %s\n", comp()->signature(), profiledVirtualMethod->signature(comp()->trMemory(), stackAlloc));1372//TR_ASSERT(profiledVirtualMethod->classOfMethod() == (TR_OpaqueClassBlock *)topValue, "assertion failure");13731374TR_OpaqueMethodBlock *methodToBeCompared = NULL;1375int32_t slot = -1;1376if (profiledVirtualMethod->isJITInternalNative())1377{1378int32_t offset = callNode->getSymbolReference()->getOffset();1379slot = fej9->virtualCallOffsetToVTableSlot(offset);1380methodToBeCompared = profiledVirtualMethod->getPersistentIdentifier();1381}13821383_profiledTargets->add(new(comp()->trStackMemory()) TR::X86PICSlot((uintptr_t)topValue, profiledVirtualMethod, true, methodToBeCompared, slot));1384}1385}1386}1387}1388else if (getMethodSymbol()->isInterface())1389{1390bool staticPICsExist = false;1391int32_t numStaticPICSlots = 0;139213931394TR_AddressInfo *addressInfo = static_cast<TR_AddressInfo*>(TR_ValueProfileInfoManager::getProfiledValueInfo(callNode, comp(), AddressInfo));1395#if defined(OSX)1396uint64_t topValue;1397#else1398uintptr_t topValue;1399#endif /* OSX */1400float missRatio = 0.0;1401if (addressInfo && addressInfo->getTopValue(topValue) > 0 && topValue && !comp()->getPersistentInfo()->isObsoleteClass((void*)topValue, fej9) &&1402addressInfo->getTopProbability() >= getMinProfiledCallFrequency())1403{1404uint32_t totalFrequency = addressInfo->getTotalFrequency();1405TR_ScratchList<TR_ExtraAddressInfo> valuesSortedByFrequency(comp()->trMemory());1406addressInfo->getSortedList(comp(), &valuesSortedByFrequency);14071408static const char *p = feGetEnv("TR_TracePIC");1409if (p)1410{1411traceMsg(comp(), "Value profile info for callNode %p in %s\n", callNode, comp()->signature());1412addressInfo->getProfiler()->dumpInfo(comp()->getOutFile());1413traceMsg(comp(), "\n");1414}14151416uintptr_t totalPICHitFrequency = 0;1417uintptr_t totalPICMissFrequency = 0;1418ListIterator<TR_ExtraAddressInfo> sortedValuesIt(&valuesSortedByFrequency);1419for (TR_ExtraAddressInfo *profiledInfo = sortedValuesIt.getFirst(); profiledInfo != NULL; profiledInfo = sortedValuesIt.getNext())1420{1421float frequency = ((float)profiledInfo->_frequency) / totalFrequency;1422if (comp()->getOption(TR_TraceCG))1423traceMsg(comp(), " Profiled target frequency %f", frequency);14241425TR_OpaqueClassBlock *thisType = (TR_OpaqueClassBlock *) profiledInfo->_value;1426TR_ResolvedMethod *profiledInterfaceMethod = NULL;1427TR::SymbolReference *methodSymRef = getSymbolReference();1428if (!comp()->getPersistentInfo()->isObsoleteClass((void *)thisType, fej9))1429{1430profiledInterfaceMethod = methodSymRef->getOwningMethod(comp())->getResolvedInterfaceMethod(comp(),1431thisType, methodSymRef->getCPIndex());1432}1433if (profiledInterfaceMethod &&1434(!profiledInterfaceMethod->isInterpreted() ||1435profiledInterfaceMethod->isJITInternalNative()))1436{1437if (frequency < getMinProfiledCallFrequency())1438{1439if (comp()->getOption(TR_TraceCG))1440traceMsg(comp(), " - Too infrequent");1441totalPICMissFrequency += profiledInfo->_frequency;1442}1443else if (numStaticPICSlots >= comp()->getOptions()->getMaxStaticPICSlots(comp()->getMethodHotness()))1444{1445if (comp()->getOption(TR_TraceCG))1446traceMsg(comp(), " - Already reached limit of %d static PIC slots", numStaticPICSlots);1447totalPICMissFrequency += profiledInfo->_frequency;1448}1449else1450{1451_profiledTargets->add(new(comp()->trStackMemory()) TR::X86PICSlot((uintptr_t)thisType, profiledInterfaceMethod));1452if (comp()->getOption(TR_TraceCG))1453traceMsg(comp(), " + Added static PIC slot");1454numStaticPICSlots++;1455totalPICHitFrequency += profiledInfo->_frequency;1456}1457if (comp()->getOption(TR_TraceCG))1458traceMsg(comp(), " for %s\n", profiledInterfaceMethod->signature(comp()->trMemory(), stackAlloc));1459}1460else1461{1462if (comp()->getOption(TR_TraceCG))1463traceMsg(comp(), " * Can't find suitable method from profile info\n");1464}14651466}1467missRatio = 1.0 * totalPICMissFrequency / totalFrequency;1468}14691470_useLastITableCache = !comp()->getOption(TR_DisableLastITableCache) ? true : false;1471// Disable lastITable logic if all the implementers can fit into the pic slots during non-startup state1472if (_useLastITableCache && comp()->target().is64Bit() && _interfaceClassOfMethod && comp()->getPersistentInfo()->getJitState() != STARTUP_STATE)1473{1474J9::X86::PrivateLinkage *privateLinkage = static_cast<J9::X86::PrivateLinkage *>(getLinkage());1475int32_t numPICSlots = numStaticPICSlots + privateLinkage->IPicParameters.defaultNumberOfSlots;1476TR_ResolvedMethod **implArray = new (comp()->trStackMemory()) TR_ResolvedMethod*[numPICSlots+1];1477TR_PersistentCHTable * chTable = comp()->getPersistentInfo()->getPersistentCHTable();1478int32_t cpIndex = getSymbolReference()->getCPIndex();1479int32_t numImplementers = chTable->findnInterfaceImplementers(_interfaceClassOfMethod, numPICSlots+1, implArray, cpIndex, getSymbolReference()->getOwningMethod(comp()), comp());1480if (numImplementers <= numPICSlots)1481{1482_useLastITableCache = false;1483if (comp()->getOption(TR_TraceCG))1484traceMsg(comp(),"Found %d implementers for call to %s, can be fit into %d pic slots, disabling lastITable cache\n", numImplementers, getMethodSymbol()->getMethod()->signature(comp()->trMemory()), numPICSlots);1485}1486}1487else if (_useLastITableCache && comp()->target().is32Bit()) // Use the original heuristic for ia32 due to defect 1116511488{1489_useLastITableCache = false; // Default on ia32 is not to use the last itable cache1490static char *lastITableCacheThresholdStr = feGetEnv("TR_lastITableCacheThreshold");14911492// With 4 static and 2 dynamic PIC slots, the cache starts to be used1493// for 7 equally-likely targets. We want to catch that case, so the1494// threshold must be comfortably below 3/7 = 28%.1495//1496float lastITableCacheThreshold = lastITableCacheThresholdStr? atof(lastITableCacheThresholdStr) : 0.2;1497if ( missRatio >= lastITableCacheThreshold1498&& performTransformation(comp(), "O^O PIC miss ratio is %f >= %f -- adding lastITable cache\n", missRatio, lastITableCacheThreshold))1499{1500_useLastITableCache = true;1501}1502}1503}15041505if (_profiledTargets->isEmpty())1506_profiledTargets = NULL;1507}15081509bool TR::X86CallSite::shouldUseInterpreterLinkage()1510{1511if (getMethodSymbol()->isVirtual() &&1512!getSymbolReference()->isUnresolved() &&1513getMethodSymbol()->isVMInternalNative() &&1514!getResolvedMethod()->virtualMethodIsOverridden() &&1515!getResolvedMethod()->isAbstract())1516return true;1517else1518return false;1519}152015211522TR::Register *TR::X86CallSite::evaluateVFT()1523{1524TR::Node *vftNode = getCallNode()->getFirstChild();1525if (vftNode->getRegister())1526return vftNode->getRegister();1527else1528{1529TR::Register *result = cg()->evaluate(vftNode);1530_vftImplicitExceptionPoint = cg()->getImplicitExceptionPoint();1531return result;1532}1533}15341535bool TR::X86CallSite::resolvedVirtualShouldUseVFTCall()1536{1537TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());1538TR_ASSERT(getMethodSymbol()->isVirtual() && !getSymbolReference()->isUnresolved(), "assertion failure");15391540// WARNING: VPIC doesn't work for resolved calls at the moment, so setting1541// TR_EnableVPICForResolvedVirtualCalls won't work. The most straightforward1542// way to get VPIC to support (most) resolved calls is to simply treat them1543// the same way as unresolved ones, but that isn't allowed when we are1544// promising isResolvedVirtualDispatchGuaranteed().1545return1546fej9->isResolvedVirtualDispatchGuaranteed(comp()) &&1547(!comp()->getOption(TR_EnableVPICForResolvedVirtualCalls) ||1548getProfiledTargets() ||1549getCallNode()->isTheVirtualCallNodeForAGuardedInlinedCall() ||1550( comp()->getSymRefTab()->findObjectNewInstanceImplSymbol() &&1551comp()->getSymRefTab()->findObjectNewInstanceImplSymbol()->getSymbol() == getResolvedMethodSymbol()));1552}15531554void TR::X86CallSite::stopAddingConditions()1555{1556TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());1557if (COPY_PRECONDITIONS_TO_POSTCONDITIONS)1558{1559TR::RegisterDependencyGroup *preconditions = getPreConditionsUnderConstruction()->getPreConditions();1560TR::RegisterDependencyGroup *postconditions = getPostConditionsUnderConstruction()->getPostConditions();1561for (uint8_t i = 0; i < getPreConditionsUnderConstruction()->getAddCursorForPre(); i++)1562{1563TR::RegisterDependency *pre = preconditions->getRegisterDependency(i);1564getPostConditionsUnderConstruction()->unionPreCondition(pre->getRegister(), pre->getRealRegister(), cg(), pre->getFlags());1565TR::RegisterDependency *post = postconditions->findDependency(pre->getRealRegister(), getPostConditionsUnderConstruction()->getAddCursorForPost());1566if (!post)1567getPostConditionsUnderConstruction()->addPostCondition(pre->getRegister(), pre->getRealRegister(), cg(), pre->getFlags());1568}1569}15701571_preConditionsUnderConstruction->stopAddingPreConditions();1572_preConditionsUnderConstruction->stopAddingPostConditions();1573_postConditionsUnderConstruction->stopAddingPreConditions();1574_postConditionsUnderConstruction->stopAddingPostConditions();1575}15761577static void evaluateCommonedNodes(TR::Node *node, TR::CodeGenerator *cg)1578{1579// There is a rule that if a node with a symref is evaluated, it must be1580// evaluated in the first treetop under which it appears. (The so-called1581// "prompt evaluation" rule). Since we don't know what future trees will1582// do, this effectively means that any symref-bearing node that is commoned1583// with another treetop must be evaluated now.1584// We approximate this by saying that any node with a refcount >= 2 must be1585// evaluated now. The "refcount >= 2" is a conservative approximation of1586// "commoned with another treetop" because the latter is not cheap to figure out.1587// "Any node" is an approximation of "any node with a symref"; we do that1588// because it allows us to use a simple linear-time tree walk without1589// resorting to visit counts.1590//1591TR::Compilation * comp= cg->comp();1592if (node->getRegister() == NULL)1593{1594if (node->getReferenceCount() >= 2)1595{1596if (comp->getOption(TR_TraceCG))1597traceMsg(comp, "Promptly evaluating commoned node %s\n", cg->getDebug()->getName(node));1598cg->evaluate(node);1599}1600else1601{1602for (int32_t i = 0; i < node->getNumChildren(); i++)1603evaluateCommonedNodes(node->getChild(i), cg);1604}1605}1606}160716081609static bool indirectDispatchWillBuildVirtualGuard(TR::Compilation *comp, TR::X86CallSite *site)1610{1611TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());16121613// This method is used in vft mask instruction removal in buildIndirectDispatch1614// if method will generate virtual call guard and build direct call, then skip vft mask instruction.1615if (site->getVirtualGuardKind() != TR_NoGuard && fej9->canDevirtualizeDispatch() )1616{1617if (comp->performVirtualGuardNOPing())1618{1619return true;1620}1621else if (site->getVirtualGuardKind() == TR_NonoverriddenGuard1622&& !comp->getOption(TR_EnableHCR)1623&& !comp->getOption(TR_MimicInterpreterFrameShape))1624{1625return true;1626}1627}1628return false;1629}16301631TR::Register *J9::X86::PrivateLinkage::buildIndirectDispatch(TR::Node *callNode)1632{1633TR::StackMemoryRegion stackMemoryRegion(*comp()->trMemory());16341635TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp()->fe());16361637TR::X86CallSite site(callNode, this);16381639// Build arguments and initially populate regdeps1640//1641buildCallArguments(site);16421643// If receiver could be NULL, must evaluate it before the call1644// so any exception occurs before the call.1645// Might as well do it outside the internal control flow.1646//1647// Also evaluate the VFT if it survives the call.1648// The optimizer expects things to be evaluated in1649// the first tree in which they appear.1650//1651bool skipVFTmaskInstruction = false;1652if (callNode->getSymbol()->castToMethodSymbol()->firstArgumentIsReceiver())1653{1654TR::Node *rcvrChild = callNode->getChild(callNode->getFirstArgumentIndex());1655TR::Node *vftChild = callNode->getFirstChild();1656bool loadVFTForNullCheck = false;16571658if (cg()->getCurrentEvaluationTreeTop()->getNode()->getOpCodeValue() == TR::NULLCHK1659&& vftChild->getOpCode().isLoadIndirect()1660&& vftChild->getFirstChild() == cg()->getCurrentEvaluationTreeTop()->getNode()->getNullCheckReference()1661&& vftChild->getFirstChild()->isNonNull() == false)1662loadVFTForNullCheck = true;16631664bool willGenerateDirectCall = indirectDispatchWillBuildVirtualGuard(comp(), &site);1665static char *enableX86VFTLoadOpt = feGetEnv("TR_EnableX86VFTLoadOpt");16661667if (enableX86VFTLoadOpt &&1668loadVFTForNullCheck &&1669willGenerateDirectCall &&1670vftChild->getReferenceCount() == 1 &&1671vftChild->getRegister() == NULL)1672{1673/*cg()->generateDebugCounter(1674TR::DebugCounter::debugCounterName(comp(), "cg.vftload/%s/(%s)/%d/%d", "skipmask",1675comp()->signature(),1676callNode->getByteCodeInfo().getCallerIndex(),1677callNode->getByteCodeInfo().getByteCodeIndex()));1678*/1679TR::MemoryReference *sourceMR = generateX86MemoryReference(vftChild, cg());1680TR::Register *reg = cg()->allocateRegister();1681// as vftChild->getOpCode().isLoadIndirect is true here, need set exception point1682TR::Instruction * instr = TR::TreeEvaluator::insertLoadMemory(vftChild, reg, sourceMR, TR_RematerializableAddress, cg());1683reg->setMemRef(sourceMR);1684cg()->setImplicitExceptionPoint(instr);1685site.setImplicitExceptionPoint(instr);1686cg()->stopUsingRegister(reg);1687skipVFTmaskInstruction = true;1688}1689else if (enableX86VFTLoadOpt &&1690loadVFTForNullCheck == false &&1691willGenerateDirectCall &&1692//vftChild->getReferenceCount() == 1 &&1693vftChild->getRegister() == NULL)1694{1695// skip evaluate vft mask load instruction1696// as it is not used in direct call1697//fprintf(stderr, "Skip load in %s\n", comp()->getMethodSymbol()->signature(comp()->trMemory()));1698skipVFTmaskInstruction = true;1699/*1700cg()->generateDebugCounter(1701TR::DebugCounter::debugCounterName(comp(), "cg.vftload/%s/(%s)/%d/%d", "skipvft",1702comp()->signature(),1703callNode->getByteCodeInfo().getCallerIndex(),1704callNode->getByteCodeInfo().getByteCodeIndex()));1705*/1706}1707else if (rcvrChild->isNonNull() == false || callNode->getFirstChild()->getReferenceCount() > 1)1708{1709/*1710if (vftChild->getRegister() == NULL)1711{1712cg()->generateDebugCounter(1713TR::DebugCounter::debugCounterName(comp(), "cg.vftload/%s/(%s)/%d/%d", "loadvft",1714comp()->signature(),1715callNode->getByteCodeInfo().getCallerIndex(),1716callNode->getByteCodeInfo().getByteCodeIndex()));1717}*/1718site.evaluateVFT();1719}1720}17211722// Children of the VFT expression may also survive the call.1723// (Note that the following is not sufficient for the VFT node1724// itself, which should use site.evaluateVFT instead.)1725//1726if (skipVFTmaskInstruction == false)1727evaluateCommonedNodes(callNode->getFirstChild(), cg());17281729// Remember where internal control flow region should start,1730// and create labels1731//1732TR::Instruction *startBookmark = cg()->getAppendInstruction();1733TR::LabelSymbol *startLabel = generateLabelSymbol(cg());1734TR::LabelSymbol *doneLabel = generateLabelSymbol(cg());1735startLabel->setStartInternalControlFlow();1736doneLabel->setEndInternalControlFlow();17371738// Allocate thunk if necessary1739//1740void *virtualThunk = NULL;1741if (getProperties().getNeedsThunksForIndirectCalls())1742{1743TR::MethodSymbol *methodSymbol = callNode->getSymbol()->castToMethodSymbol();1744TR::Method *method = methodSymbol->getMethod();1745if (methodSymbol->isComputed())1746{1747switch (method->getMandatoryRecognizedMethod())1748{1749case TR::java_lang_invoke_ComputedCalls_dispatchVirtual:1750case TR::com_ibm_jit_JITHelpers_dispatchVirtual:1751{1752// Need a j2i thunk for the method that will ultimately be dispatched by this handle call1753char *j2iSignature = fej9->getJ2IThunkSignatureForDispatchVirtual(methodSymbol->getMethod()->signatureChars(), methodSymbol->getMethod()->signatureLength(), comp());1754int32_t signatureLen = strlen(j2iSignature);1755virtualThunk = fej9->getJ2IThunk(j2iSignature, signatureLen, comp());1756if (!virtualThunk)1757{1758virtualThunk = fej9->setJ2IThunk(j2iSignature, signatureLen,1759generateVirtualIndirectThunk(1760fej9->getEquivalentVirtualCallNodeForDispatchVirtual(callNode, comp())), comp());1761}1762}1763break;1764default:1765if (fej9->needsInvokeExactJ2IThunk(callNode, comp()))1766{1767TR_J2IThunk *thunk = generateInvokeExactJ2IThunk(callNode, methodSymbol->getMethod()->signatureChars());1768fej9->setInvokeExactJ2IThunk(thunk, comp());1769}1770break;1771}1772}1773else1774{1775virtualThunk = fej9->getJ2IThunk(methodSymbol->getMethod(), comp());1776if (!virtualThunk)1777virtualThunk = fej9->setJ2IThunk(methodSymbol->getMethod(), generateVirtualIndirectThunk(callNode), comp());1778}17791780site.setThunkAddress((uint8_t *)virtualThunk);1781}17821783TR::LabelSymbol *revirtualizeLabel = generateLabelSymbol(cg());1784if (site.getVirtualGuardKind() != TR_NoGuard && fej9->canDevirtualizeDispatch() && buildVirtualGuard(site, revirtualizeLabel) )1785{1786buildDirectCall(site.getDevirtualizedMethodSymRef(), site);1787buildRevirtualizedCall(site, revirtualizeLabel, doneLabel);1788}1789else1790{1791// Build static PIC if profiling targets available.1792//1793TR_ASSERT(skipVFTmaskInstruction == false, "VFT mask instruction is skipped in early evaluation");17941795TR::LabelSymbol *picMismatchLabel = NULL;1796TR_ScratchList<TR::X86PICSlot> *profiledTargets = site.getProfiledTargets();1797if (profiledTargets)1798{1799ListIterator<TR::X86PICSlot> i(profiledTargets);1800TR::X86PICSlot *picSlot = i.getFirst();1801while (picSlot)1802{1803picMismatchLabel = generateLabelSymbol(cg());18041805if (comp()->target().is32Bit())1806picSlot->setNeedsPicCallAlignment();18071808TR::Instruction *instr = buildPICSlot(*picSlot, picMismatchLabel, doneLabel, site);18091810if (fej9->isUnloadAssumptionRequired((TR_OpaqueClassBlock *)picSlot->getClassAddress(), comp()->getCurrentMethod()) ||1811cg()->profiledPointersRequireRelocation())1812{1813if (picSlot->getMethodAddress())1814comp()->getStaticMethodPICSites()->push_front(instr);1815else1816comp()->getStaticPICSites()->push_front(instr);1817}18181819picSlot = i.getNext();1820if (picSlot)1821generateLabelInstruction(TR::InstOpCode::label, site.getCallNode(), picMismatchLabel, cg());1822}18231824site.setFirstPICSlotInstruction(NULL);1825}18261827// Build the call1828//1829if (site.getMethodSymbol()->isVirtual() || site.getMethodSymbol()->isComputed())1830buildVirtualOrComputedCall(site, picMismatchLabel, doneLabel, (uint8_t *)virtualThunk);1831else1832buildInterfaceCall(site, picMismatchLabel, doneLabel, (uint8_t *)virtualThunk);1833}18341835// Construct postconditions1836//1837TR::Node *vftChild = callNode->getFirstChild();1838TR::Register *vftRegister = vftChild->getRegister();1839TR::Register *returnRegister;1840if (vftChild->getRegister() && (vftChild->getReferenceCount() > 1))1841{1842// VFT child survives the call, so we must include it in the postconditions.1843returnRegister = buildCallPostconditions(site);1844if (vftChild->getRegister() && vftChild->getRegister()->getRegisterPair())1845{1846site.addPostCondition(vftChild->getRegister()->getRegisterPair()->getHighOrder(), TR::RealRegister::NoReg);1847site.addPostCondition(vftChild->getRegister()->getRegisterPair()->getLowOrder(), TR::RealRegister::NoReg);1848}1849else1850site.addPostCondition(vftChild->getRegister(), TR::RealRegister::NoReg);1851cg()->recursivelyDecReferenceCount(vftChild);1852}1853else1854{1855// VFT child dies here; decrement it early so it doesn't interfere with dummy regs.1856cg()->recursivelyDecReferenceCount(vftChild);1857returnRegister = buildCallPostconditions(site);1858}18591860site.stopAddingConditions();18611862// Create the internal control flow region and VFP adjustment1863//1864generateLabelInstruction(startBookmark, TR::InstOpCode::label, startLabel, site.getPreConditionsUnderConstruction(), cg());1865if (!getProperties().getCallerCleanup())1866generateVFPCallCleanupInstruction(-site.getArgSize(), callNode, cg());1867generateLabelInstruction(TR::InstOpCode::label, callNode, doneLabel, site.getPostConditionsUnderConstruction(), cg());18681869// Stop using the killed registers that are not going to persist1870//1871stopUsingKilledRegisters(site.getPostConditionsUnderConstruction(), returnRegister);18721873if (callNode->getType().isFloatingPoint())1874{1875static char *forceX87LinkageForSSE = feGetEnv("TR_ForceX87LinkageForSSE");1876if (callNode->getReferenceCount() == 1 && returnRegister->getKind() == TR_X87)1877{1878// If the method returns a floating-point value that is not used, insert a1879// dummy store to eventually pop the value from the floating-point stack.1880//1881generateFPSTiST0RegRegInstruction(TR::InstOpCode::FSTRegReg, callNode, returnRegister, returnRegister, cg());1882}1883else if (forceX87LinkageForSSE && returnRegister->getKind() == TR_FPR)1884{1885// If the caller expects the return value in an XMMR, insert a1886// transfer from the floating-point stack to the XMMR via memory.1887//1888coerceFPReturnValueToXMMR(callNode, site.getPostConditionsUnderConstruction(), site.getMethodSymbol(), returnRegister);1889}1890}18911892if (cg()->enableRegisterAssociations())1893associatePreservedRegisters(site.getPostConditionsUnderConstruction(), returnRegister);18941895cg()->setImplicitExceptionPoint(site.getImplicitExceptionPoint());18961897return returnRegister;1898}18991900void J9::X86::PrivateLinkage::buildDirectCall(TR::SymbolReference *methodSymRef, TR::X86CallSite &site)1901{1902TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp()->fe());1903TR::MethodSymbol *methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol();1904TR::Instruction *callInstr = NULL;1905TR::Node *callNode = site.getCallNode();1906TR_AtomicRegion *callSiteAtomicRegions = TR::X86PatchableCodeAlignmentInstruction::CALLImm4AtomicRegions;19071908if (comp()->target().is64Bit() && methodSymRef->getReferenceNumber()>=TR_AMD64numRuntimeHelpers)1909fej9->reserveTrampolineIfNecessary(comp(), methodSymRef, false);19101911#if defined(J9VM_OPT_JITSERVER)1912// JITServer Workaround: Further transmute dispatchJ9Method symbols to appear as a runtime helper, this will cause OMR to1913// generate a TR_HelperAddress relocation instead of a TR_RelativeMethodAddress Relocation.1914if (!comp()->getOption(TR_DisableInliningOfNatives) &&1915methodSymbol->getMandatoryRecognizedMethod() == TR::java_lang_invoke_ComputedCalls_dispatchJ9Method &&1916comp()->isOutOfProcessCompilation())1917{1918methodSymbol->setHelper();1919}1920#endif /* defined(J9VM_OPT_JITSERVER) */19211922if (cg()->supportVMInternalNatives() && methodSymbol->isVMInternalNative())1923{1924// Find the virtual register for edi1925// TODO: The register used should come from the linkage properties, rather than being hardcoded1926//1927TR::RealRegister::RegNum ramMethodRegisterIndex = TR::RealRegister::edi;1928TR::Register *ramMethodReg = cg()->allocateRegister();1929site.addPostCondition(ramMethodReg, TR::RealRegister::edi);19301931// Load the RAM method into rdi and call the helper1932if (comp()->target().is64Bit())1933{1934generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, callNode, ramMethodReg, (uint64_t)(uintptr_t)methodSymbol->getMethodAddress(), cg());1935}1936else1937{1938generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, callNode, ramMethodReg, (uint32_t)(uintptr_t)methodSymbol->getMethodAddress(), cg());1939}19401941callInstr = generateHelperCallInstruction(callNode, TR_j2iTransition, NULL, cg());1942cg()->stopUsingRegister(ramMethodReg);1943}1944else if (comp()->target().is64Bit() && methodSymbol->isJITInternalNative())1945{1946// JIT callable natives on 64-bit may not be directly reachable. In lieu of trampolines and since this1947// is before binary encoding call through a register instead.1948//1949TR::RealRegister::RegNum nativeRegisterIndex = TR::RealRegister::edi;1950TR::Register *nativeMethodReg = cg()->allocateRegister();1951site.addPostCondition(nativeMethodReg, TR::RealRegister::edi);19521953generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, callNode, nativeMethodReg, (uint64_t)(uintptr_t)methodSymbol->getMethodAddress(), cg());1954callInstr = generateRegInstruction(TR::InstOpCode::CALLReg, callNode, nativeMethodReg, cg());1955cg()->stopUsingRegister(nativeMethodReg);1956}1957else if (methodSymRef->isUnresolved() || methodSymbol->isInterpreted()1958|| (comp()->compileRelocatableCode() && !methodSymbol->isHelper()) )1959{1960TR::LabelSymbol *label = generateLabelSymbol(cg());19611962TR::Snippet *snippet = (TR::Snippet*)new (trHeapMemory()) TR::X86CallSnippet(cg(), callNode, label, false);1963cg()->addSnippet(snippet);1964snippet->gcMap().setGCRegisterMask(site.getPreservedRegisterMask());19651966callInstr = generateImmSymInstruction(TR::InstOpCode::CALLImm4, callNode, 0, new (trHeapMemory()) TR::SymbolReference(comp()->getSymRefTab(), label), cg());1967generateBoundaryAvoidanceInstruction(TR::X86BoundaryAvoidanceInstruction::unresolvedAtomicRegions, 8, 8, callInstr, cg());19681969// Nop is necessary due to confusion when resolving shared slots at a transition1970if (methodSymRef->isOSRInductionHelper())1971generatePaddingInstruction(1, callNode, cg());1972}1973else1974{1975callInstr = generateImmSymInstruction(TR::InstOpCode::CALLImm4, callNode, (uintptr_t)methodSymbol->getMethodAddress(), methodSymRef, cg());19761977if (comp()->target().isSMP() && !methodSymbol->isHelper())1978{1979// Make sure it's patchable in case it gets (re)compiled1980generatePatchableCodeAlignmentInstruction(callSiteAtomicRegions, callInstr, cg());1981}1982}19831984callInstr->setNeedsGCMap(site.getPreservedRegisterMask());19851986}19871988void1989J9::X86::PrivateLinkage::buildInterfaceCall(1990TR::X86CallSite &site,1991TR::LabelSymbol *entryLabel,1992TR::LabelSymbol *doneLabel,1993uint8_t *thunk)1994{1995TR::Register *vftRegister = site.evaluateVFT();19961997// Dynamic PICs populated by the PIC builder.1998// Might be able to simplify this in the presence of value profiling information.1999//2000buildIPIC(site, entryLabel, doneLabel, thunk);2001}20022003void J9::X86::PrivateLinkage::buildRevirtualizedCall(TR::X86CallSite &site, TR::LabelSymbol *revirtualizeLabel, TR::LabelSymbol *doneLabel)2004{2005TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());2006TR::Register *vftRegister = site.getCallNode()->getFirstChild()->getRegister(); // may be NULL; we don't need to evaluate it here2007int32_t vftOffset = site.getSymbolReference()->getOffset();20082009TR::Snippet *snippet;2010if (comp()->target().is64Bit())2011{2012#ifdef TR_TARGET_64BIT2013snippet = new (trHeapMemory()) TR::AMD64GuardedDevirtualSnippet(2014cg(),2015site.getCallNode(),2016site.getDevirtualizedMethodSymRef(),2017doneLabel,2018revirtualizeLabel,2019vftOffset,2020cg()->getCurrentEvaluationBlock(),2021vftRegister,2022site.getArgSize()2023);2024#endif2025}2026else2027{2028snippet = new (trHeapMemory()) TR::X86GuardedDevirtualSnippet(2029cg(),2030site.getCallNode(),2031doneLabel,2032revirtualizeLabel,2033vftOffset,2034cg()->getCurrentEvaluationBlock(),2035vftRegister2036);2037}2038snippet->gcMap().setGCRegisterMask(site.getLinkage()->getProperties().getPreservedRegisterMapForGC());2039cg()->addSnippet(snippet);2040}20412042void J9::X86::PrivateLinkage::buildCallArguments(TR::X86CallSite &site)2043{2044site.setArgSize(buildArgs(site.getCallNode(), site.getPreConditionsUnderConstruction()));2045}20462047bool J9::X86::PrivateLinkage::buildVirtualGuard(TR::X86CallSite &site, TR::LabelSymbol *revirtualizeLabel)2048{2049TR_ASSERT(site.getVirtualGuardKind() != TR_NoGuard, "site must require a virtual guard");20502051TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());20522053static TR_AtomicRegion vgnopAtomicRegions[] =2054{2055// Don't yet know whether we're patching using a self-loop or a 2-byte2056// jmp, but it doesn't matter because they are both 2 bytes.2057//2058{ 0x0, 5 },2059{ 0,0 }2060};20612062TR::Node *callNode = site.getCallNode();20632064// Modify following logic, also need update indirectDispatchWillBuildVirtualGuard2065// it is a none side effect version of this method that detect if virtual guard will be created.20662067if (comp()->performVirtualGuardNOPing())2068{2069TR_VirtualGuard *virtualGuard =2070TR_VirtualGuard::createGuardedDevirtualizationGuard(site.getVirtualGuardKind(), comp(), callNode);20712072TR::Instruction *patchable =2073generateVirtualGuardNOPInstruction(callNode, virtualGuard->addNOPSite(), NULL, revirtualizeLabel, cg());20742075if (comp()->target().isSMP())2076generatePatchableCodeAlignmentInstruction(vgnopAtomicRegions, patchable, cg());2077// HCR in J9::X86::PrivateLinkage::buildRevirtualizedCall2078if (comp()->getOption(TR_EnableHCR))2079{2080TR_VirtualGuard* HCRGuard = TR_VirtualGuard::createGuardedDevirtualizationGuard(TR_HCRGuard, comp(), callNode);2081TR::Instruction *HCRpatchable = generateVirtualGuardNOPInstruction(callNode, HCRGuard->addNOPSite(), NULL, revirtualizeLabel, cg());2082if (comp()->target().isSMP())2083generatePatchableCodeAlignmentInstruction(vgnopAtomicRegions, HCRpatchable, cg());2084}2085return true;2086}2087else if (site.getVirtualGuardKind() == TR_NonoverriddenGuard2088&& !comp()->getOption(TR_EnableHCR) // If patching is off, devirtualization is not safe in HCR mode2089&& !comp()->getOption(TR_MimicInterpreterFrameShape)) // Explicitly-guarded devirtualization is pretty pointless without inlining2090{2091// We can do an explicit guard2092//2093uint32_t overRiddenBit = fej9->offsetOfIsOverriddenBit();2094TR::InstOpCode::Mnemonic opCode;20952096if (overRiddenBit <= 0xff)2097opCode = TR::InstOpCode::TEST1MemImm1;2098else2099opCode = TR::InstOpCode::TEST4MemImm4;21002101generateMemImmInstruction(2102opCode,2103callNode,2104generateX86MemoryReference((intptr_t)site.getResolvedMethod()->addressContainingIsOverriddenBit(), cg()),2105overRiddenBit,2106cg()2107);21082109generateLabelInstruction(TR::InstOpCode::JNE4, callNode, revirtualizeLabel, cg());21102111return true;2112}2113else2114{2115// Can't do guarded devirtualization2116//2117return false;2118}2119}21202121TR::Instruction *J9::X86::PrivateLinkage::buildVFTCall(TR::X86CallSite &site, TR::InstOpCode dispatchOp, TR::Register *targetAddressReg, TR::MemoryReference *targetAddressMemref)2122{2123TR::Node *callNode = site.getCallNode();2124if (cg()->enableSinglePrecisionMethods() &&2125comp()->getJittedMethodSymbol()->usesSinglePrecisionMode())2126{2127auto cds = cg()->findOrCreate2ByteConstant(callNode, DOUBLE_PRECISION_ROUND_TO_NEAREST);2128generateMemInstruction(TR::InstOpCode::LDCWMem, callNode, generateX86MemoryReference(cds, cg()), cg());2129}21302131TR::Instruction *callInstr;2132if (dispatchOp.sourceIsMemRef())2133{2134TR_ASSERT(targetAddressMemref, "Call via memory requires memref");2135// Fix the displacement at 4 bytes so j2iVirtual can decode it if necessary2136if (targetAddressMemref)2137targetAddressMemref->setForceWideDisplacement();2138callInstr = generateCallMemInstruction(dispatchOp.getOpCodeValue(), callNode, targetAddressMemref, cg());2139}2140else2141{2142TR_ASSERT(targetAddressReg, "Call via register requires register");2143TR::Node *callNode = site.getCallNode();2144TR::ResolvedMethodSymbol *resolvedMethodSymbol = callNode->getSymbol()->getResolvedMethodSymbol();2145bool mayReachJ2IThunk = true;2146if (resolvedMethodSymbol &&2147(resolvedMethodSymbol->getRecognizedMethod() == TR::java_lang_invoke_ComputedCalls_dispatchDirect ||2148resolvedMethodSymbol->getRecognizedMethod() == TR::com_ibm_jit_JITHelpers_dispatchComputedStaticCall))2149mayReachJ2IThunk = false;2150if (mayReachJ2IThunk && dispatchOp.isCallOp())2151{2152// Bad news.2153//2154// icallVMprJavaSendPatchupVirtual requires that a virtual call site2155// either (1) uses a TR::InstOpCode::CALLMem with a fixed VFT offset, or (2) puts the2156// VFT index into r8 and uses a TR::InstOpCode::CALLImm4 with a fixed call target.2157// We have neither a fixed VFT offset nor a fixed call target!2158// Adding support for TR::InstOpCode::CALLReg is difficult because the instruction is2159// a different length, making it hard to back up and disassemble it.2160//2161// Therefore, we cannot have the return address pointing after a2162// TR::InstOpCode::CALLReg instruction. Instead, we use a TR::InstOpCode::CALLImm4 with a fixed2163// displacement to get to out-of-line instructions that do a TR::InstOpCode::JMPReg.21642165// Mainline call2166//2167TR::LabelSymbol *jmpLabel = TR::LabelSymbol::create(cg()->trHeapMemory(),cg());2168callInstr = generateLabelInstruction(TR::InstOpCode::CALLImm4, callNode, jmpLabel, cg());21692170// Jump outlined2171//2172{2173TR_OutlinedInstructionsGenerator og(jmpLabel, callNode, cg());2174generateRegInstruction(TR::InstOpCode::JMPReg, callNode, targetAddressReg, cg());2175og.endOutlinedInstructionSequence();2176}21772178// The targetAddressReg doesn't appear to be used in mainline code, so2179// register assignment may do weird things like spill it. We'd prefer it2180// to stay in a register, though we don't care which.2181//2182TR::RegisterDependencyConditions *dependencies = site.getPostConditionsUnderConstruction();2183if (targetAddressReg && targetAddressReg->getRegisterPair())2184{2185dependencies->unionPreCondition(targetAddressReg->getRegisterPair()->getHighOrder(), TR::RealRegister::NoReg, cg());2186dependencies->unionPreCondition(targetAddressReg->getRegisterPair()->getLowOrder(), TR::RealRegister::NoReg, cg());2187}2188else2189dependencies->unionPreCondition(targetAddressReg, TR::RealRegister::NoReg, cg());2190}2191else2192{2193callInstr = generateRegInstruction(dispatchOp.getOpCodeValue(), callNode, targetAddressReg, cg());2194}2195}21962197callInstr->setNeedsGCMap(site.getPreservedRegisterMask());21982199TR_ASSERT_FATAL(2200!site.getSymbolReference()->isUnresolved() || site.getMethodSymbol()->isInterface(),2201"buildVFTCall: unresolved virtual site");22022203if (cg()->enableSinglePrecisionMethods() &&2204comp()->getJittedMethodSymbol()->usesSinglePrecisionMode())2205{2206auto cds = cg()->findOrCreate2ByteConstant(callNode, SINGLE_PRECISION_ROUND_TO_NEAREST);2207generateMemInstruction(TR::InstOpCode::LDCWMem, callNode, generateX86MemoryReference(cds, cg()), cg());2208}22092210return callInstr;2211}22122213TR::Register *J9::X86::PrivateLinkage::buildCallPostconditions(TR::X86CallSite &site)2214{2215TR::RegisterDependencyConditions *dependencies = site.getPostConditionsUnderConstruction();2216TR_ASSERT(dependencies != NULL, "assertion failure");22172218const TR::X86LinkageProperties &properties = getProperties();2219const TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg;2220TR::Node *callNode = site.getCallNode();2221TR::MethodSymbol *methodSymbol = callNode->getSymbolReference()->getSymbol()->castToMethodSymbol();2222bool calleePreservesRegisters = methodSymbol->preservesAllRegisters();22232224#ifdef J9VM_OPT_JAVA_CRYPTO_ACCELERATION2225// AES helpers actually use Java private linkage and do not preserve all2226// registers. This should really be handled by the linkage.2227//2228if (cg()->enableAESInHardwareTransformations() && methodSymbol && methodSymbol->isHelper())2229{2230TR::SymbolReference *methodSymRef = callNode->getSymbolReference();2231switch (methodSymRef->getReferenceNumber())2232{2233case TR_doAESInHardwareInner:2234case TR_expandAESKeyInHardwareInner:2235calleePreservesRegisters = false;2236break;22372238default:2239break;2240}2241}2242#endif22432244// We have to be careful to allocate the return register after the2245// dependency conditions for the other killed registers have been set up,2246// otherwise it will be marked as interfering with them.22472248// Figure out which is the return register2249//2250TR::RealRegister::RegNum returnRegIndex, highReturnRegIndex=noReg;2251TR_RegisterKinds returnKind;2252switch(callNode->getDataType())2253{2254default:2255TR_ASSERT(0, "Unrecognized call node data type: #%d", (int)callNode->getDataType());2256// fall through2257case TR::NoType:2258returnRegIndex = noReg;2259returnKind = TR_NoRegister;2260break;2261case TR::Int64:2262if (cg()->usesRegisterPairsForLongs())2263{2264returnRegIndex = getProperties().getLongLowReturnRegister();2265highReturnRegIndex = getProperties().getLongHighReturnRegister();2266returnKind = TR_GPR;2267break;2268}2269// else fall through2270case TR::Int8:2271case TR::Int16:2272case TR::Int32:2273case TR::Address:2274returnRegIndex = getProperties().getIntegerReturnRegister();2275returnKind = TR_GPR;2276break;2277case TR::Float:2278case TR::Double:2279returnRegIndex = getProperties().getFloatReturnRegister();2280returnKind = TR_FPR;2281break;2282}22832284// Find the registers that are already in the postconditions so we don't add them again.2285// (The typical example is the ramMethod.)2286//2287int32_t gprsAlreadyPresent = TR::RealRegister::noRegMask;2288TR::RegisterDependencyGroup *group = dependencies->getPostConditions();2289for (int i = 0; i < dependencies->getAddCursorForPost(); i++)2290{2291TR::RegisterDependency *dep = group->getRegisterDependency(i);2292TR_ASSERT(dep->getRealRegister() <= TR::RealRegister::LastAssignableGPR, "Currently, only GPRs can be added to call postcondition before buildCallPostconditions; found %s", cg()->getDebug()->getRealRegisterName(dep->getRealRegister()-1));2293gprsAlreadyPresent |= TR::RealRegister::gprMask((TR::RealRegister::RegNum)dep->getRealRegister());2294}22952296// Add postconditions indicating the state of arg regs (other than the return reg)2297//2298if (calleePreservesRegisters)2299{2300// For all argument-register preconditions, add an identical2301// postcondition, thus indicating that the arguments are preserved.2302// Note: this assumes the postcondition regdeps have preconditions too; see COPY_PRECONDITIONS_TO_POSTCONDITIONS.2303//2304TR::RegisterDependencyGroup *preConditions = dependencies->getPreConditions();2305for (int i = 0; i < dependencies->getAddCursorForPre(); i++)2306{2307TR::RegisterDependency *preCondition = preConditions->getRegisterDependency(i);2308TR::RealRegister::RegNum regIndex = preCondition->getRealRegister();23092310if (regIndex <= TR::RealRegister::LastAssignableGPR && (gprsAlreadyPresent & TR::RealRegister::gprMask(regIndex)))2311continue;23122313if (2314regIndex != returnRegIndex && regIndex != highReturnRegIndex2315&& (properties.isIntegerArgumentRegister(regIndex) || properties.isFloatArgumentRegister(regIndex))2316){2317dependencies->addPostCondition(preCondition->getRegister(), regIndex, cg());2318}2319}2320}2321else2322{2323// Kill all non-preserved int and float regs besides the return register,2324// by assigning them to unused virtual registers2325//2326TR::RealRegister::RegNum regIndex;23272328for (regIndex = TR::RealRegister::FirstGPR; regIndex <= TR::RealRegister::LastAssignableGPR; regIndex = (TR::RealRegister::RegNum)(regIndex + 1))2329{2330// Skip non-assignable registers2331//2332if (machine()->getRealRegister(regIndex)->getState() == TR::RealRegister::Locked)2333continue;23342335// Skip registers already present2336if (gprsAlreadyPresent & TR::RealRegister::gprMask(regIndex))2337continue;23382339if ((regIndex != returnRegIndex) && (regIndex != highReturnRegIndex) && !properties.isPreservedRegister(regIndex))2340{2341TR::Register *dummy = cg()->allocateRegister(TR_GPR);2342dummy->setPlaceholderReg();2343dependencies->addPostCondition(dummy, regIndex, cg());2344cg()->stopUsingRegister(dummy);2345}2346}23472348TR_LiveRegisters *lr = cg()->getLiveRegisters(TR_FPR);2349if(!lr || lr->getNumberOfLiveRegisters() > 0)2350{2351for (regIndex = TR::RealRegister::FirstXMMR; regIndex <= TR::RealRegister::LastXMMR; regIndex = (TR::RealRegister::RegNum)(regIndex + 1))2352{2353TR_ASSERT(regIndex != highReturnRegIndex, "highReturnRegIndex should not be an XMM register.");2354if ((regIndex != returnRegIndex) && !properties.isPreservedRegister(regIndex))2355{2356TR::Register *dummy = cg()->allocateRegister(TR_FPR);2357dummy->setPlaceholderReg();2358dependencies->addPostCondition(dummy, regIndex, cg());2359cg()->stopUsingRegister(dummy);2360}2361}2362}2363}23642365// Preserve the VM thread register2366//2367dependencies->addPostCondition(cg()->getMethodMetaDataRegister(), getProperties().getMethodMetaDataRegister(), cg());23682369// Now that everything is dead, we can allocate the return register without2370// interference2371//2372TR::Register *returnRegister;2373if (highReturnRegIndex)2374{2375TR::Register *lo = cg()->allocateRegister(returnKind);2376TR::Register *hi = cg()->allocateRegister(returnKind);2377returnRegister = cg()->allocateRegisterPair(lo, hi);2378dependencies->addPostCondition(lo, returnRegIndex, cg());2379dependencies->addPostCondition(hi, highReturnRegIndex, cg());2380}2381else if (returnRegIndex)2382{2383TR_ASSERT(returnKind != TR_NoRegister, "assertion failure");2384if (callNode->getDataType() == TR::Address)2385{2386returnRegister = cg()->allocateCollectedReferenceRegister();2387}2388else2389{2390returnRegister = cg()->allocateRegister(returnKind);2391if (callNode->getDataType() == TR::Float)2392returnRegister->setIsSinglePrecision();2393}2394dependencies->addPostCondition(returnRegister, returnRegIndex, cg());2395}2396else2397{2398returnRegister = NULL;2399}24002401return returnRegister;2402}240324042405void J9::X86::PrivateLinkage::buildVPIC(TR::X86CallSite &site, TR::LabelSymbol *entryLabel, TR::LabelSymbol *doneLabel)2406{2407TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());2408TR_ASSERT(doneLabel, "a doneLabel is required for VPIC dispatches");24092410if (entryLabel)2411generateLabelInstruction(TR::InstOpCode::label, site.getCallNode(), entryLabel, cg());24122413int32_t numVPicSlots = VPicParameters.defaultNumberOfSlots;24142415TR::SymbolReference *callHelperSymRef =2416cg()->symRefTab()->findOrCreateRuntimeHelper(TR_X86populateVPicSlotCall, true, true, false);24172418if (numVPicSlots > 1)2419{2420TR::X86PICSlot emptyPicSlot = TR::X86PICSlot(VPicParameters.defaultSlotAddress, NULL);2421emptyPicSlot.setNeedsShortConditionalBranch();2422emptyPicSlot.setJumpOnNotEqual();2423emptyPicSlot.setNeedsPicSlotAlignment();2424emptyPicSlot.setHelperMethodSymbolRef(callHelperSymRef);2425emptyPicSlot.setGenerateNextSlotLabelInstruction();24262427// Generate all slots except the last2428// (short branch to next slot, jump to doneLabel)2429//2430while (--numVPicSlots)2431{2432TR::LabelSymbol *nextSlotLabel = generateLabelSymbol(cg());2433buildPICSlot(emptyPicSlot, nextSlotLabel, doneLabel, site);2434}2435}24362437// Generate the last slot2438// (long branch to lookup snippet, fall through to doneLabel)2439//2440TR::X86PICSlot lastPicSlot = TR::X86PICSlot(VPicParameters.defaultSlotAddress, NULL, false);2441lastPicSlot.setJumpOnNotEqual();2442lastPicSlot.setNeedsPicSlotAlignment();2443lastPicSlot.setNeedsLongConditionalBranch();24442445if (comp()->target().is32Bit())2446{2447lastPicSlot.setNeedsPicCallAlignment();2448}24492450lastPicSlot.setHelperMethodSymbolRef(callHelperSymRef);24512452TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg());24532454TR::Instruction *slotPatchInstruction = buildPICSlot(lastPicSlot, snippetLabel, NULL, site);24552456TR::Instruction *startOfPicInstruction = site.getFirstPICSlotInstruction();24572458TR::X86PicDataSnippet *snippet = new (trHeapMemory()) TR::X86PicDataSnippet(2459VPicParameters.defaultNumberOfSlots,2460startOfPicInstruction,2461snippetLabel,2462doneLabel,2463site.getSymbolReference(),2464slotPatchInstruction,2465site.getThunkAddress(),2466false,2467cg());24682469snippet->gcMap().setGCRegisterMask(site.getPreservedRegisterMask());2470cg()->addSnippet(snippet);24712472cg()->incPicSlotCountBy(VPicParameters.defaultNumberOfSlots);2473cg()->reserveNTrampolines(VPicParameters.defaultNumberOfSlots);2474}24752476void J9::X86::PrivateLinkage::buildInterfaceDispatchUsingLastITable (TR::X86CallSite &site, int32_t numIPicSlots, TR::X86PICSlot &lastPicSlot, TR::Instruction *&slotPatchInstruction, TR::LabelSymbol *doneLabel, TR::LabelSymbol *lookupDispatchSnippetLabel, TR_OpaqueClassBlock *declaringClass, uintptr_t itableIndex )2477{2478static char *breakBeforeInterfaceDispatchUsingLastITable = feGetEnv("TR_breakBeforeInterfaceDispatchUsingLastITable");24792480TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());24812482TR::Node *callNode = site.getCallNode();24832484TR::LabelSymbol *lastITableTestLabel = generateLabelSymbol(cg());2485TR::LabelSymbol *lastITableDispatchLabel = generateLabelSymbol(cg());24862487if (numIPicSlots >= 1)2488{2489// The last PIC slot looks much like the others2490//2491lastPicSlot.setNeedsShortConditionalBranch();2492lastPicSlot.setNeedsJumpToDone();2493slotPatchInstruction = buildPICSlot(lastPicSlot, lastITableTestLabel, doneLabel, site);2494}2495else2496{2497// The sequence below requires control to flow straight to lastITableTestLabel2498// TODO: This is lame. Without IPIC slots, generating this sequence2499// upside-down is sub-optimal.2500//2501generateLabelInstruction(TR::InstOpCode::JMP4, callNode, lastITableTestLabel, cg());2502}25032504TR::Register *vftReg = site.evaluateVFT();2505TR::Register *scratchReg = cg()->allocateRegister();2506TR::Register *vtableIndexReg = cg()->allocateRegister();2507TR::RegisterDependencyConditions* vtableIndexRegDeps = generateRegisterDependencyConditions(1, 0, cg());2508vtableIndexRegDeps->addPreCondition(vtableIndexReg, getProperties().getVTableIndexArgumentRegister(), cg());2509// Now things get weird.2510//2511// We're going to generate the lastITable sequence upside-down.2512// We'll generate the dispatch sequence first, and THEN we'll generate2513// the test that guards that dispatch.2514//2515// Why?2516//2517// 1) You can't call a j2i thunk with your return address pointing at a2518// TR::InstOpCode::CALLMem unless that TR::InstOpCode::CALLMem has a displacement which equals the jit2519// vtable offset. We don't know the vtable offset statically, so we2520// must pass it in r8 and leave the return address pointing at a CALLImm.2521//2522// 2) PICBuilder needs to work with or without this lastITable dispatch.2523// To avoid extreme complexity in PICBuilder, that means the return2524// address should point at a sequence that looks enough like a PIC2525// slot that PICBuilder can act the same for both.2526//2527// 3) Given 1&2 above, the natural thing to do would be to put the2528// dispatch sequence out of line. However, we expect this to be2529// performance-critical, so we want it nearby. It just so happens2530// that the previous PIC slot ends with an unconditional jump, so we2531// can just stuff the dispatch sequence right between the last PIC2532// slot and the lastITable test.2533//2534// The final layout looks like this:2535//2536// jne lastITableTest ; PREVIOUS PIC SLOT2537// call xxx ; PREVIOUS PIC SLOT2538// jmp done ; PREVIOUS PIC SLOT2539// lastITableDispatch:2540// mov r8, sizeof(J9Class)2541// sub r8, [rdi + ITableSlotOffset] ; r8 = jit vtable offset2542// jmp [vft + r8] ; vtable dispatch2543// lastITableTest:2544// mov rdi, [vft + lastITableOffset] ; cached ITable2545// cmp [rdi + interfaceClassOffset], interfaceClass ; check if it's our interface class2546// jne lookupDispatchSnippet ; if not, jump to the slow path2547// call lastITableDispatch ; if so, call the dispatch sequence with return address pointing here2548// done:2549// ...25502551// The dispatch sequence2552//25532554TR::Instruction *lastITableDispatchStart = generateLabelInstruction( TR::InstOpCode::label, callNode, lastITableDispatchLabel, cg());2555generateRegImmInstruction( TR::InstOpCode::MOV4RegImm4, callNode, vtableIndexReg, fej9->getITableEntryJitVTableOffset(), cg());2556generateRegMemInstruction( TR::InstOpCode::SUBRegMem(), callNode, vtableIndexReg, generateX86MemoryReference(scratchReg, fej9->convertITableIndexToOffset(itableIndex), cg()), cg());2557buildVFTCall(site, TR::InstOpCode::JMPMem, NULL, generateX86MemoryReference(vftReg, vtableIndexReg, 0, cg()));25582559// Without PIC slots, lastITableDispatchStart takes the place of various "first instruction" pointers2560//2561if (!site.getFirstPICSlotInstruction())2562site.setFirstPICSlotInstruction(lastITableDispatchStart);2563if (!slotPatchInstruction)2564slotPatchInstruction = lastITableDispatchStart;25652566// The test sequence2567//2568generateLabelInstruction(TR::InstOpCode::label, callNode, lastITableTestLabel, cg());2569if (breakBeforeInterfaceDispatchUsingLastITable)2570generateInstruction(TR::InstOpCode::INT3, callNode, cg());2571generateRegMemInstruction(TR::InstOpCode::LRegMem(), callNode, scratchReg, generateX86MemoryReference(vftReg, (int32_t)fej9->getOffsetOfLastITableFromClassField(), cg()), cg());2572bool use32BitInterfacePointers = comp()->target().is32Bit();2573if (comp()->useCompressedPointers() /* actually compressed object headers */)2574{2575// The field is 8 bytes, but only 4 matter2576use32BitInterfacePointers = true;2577}2578if (use32BitInterfacePointers)2579{2580// The field is 8 bytes, but only 4 matter2581generateMemImmInstruction(TR::InstOpCode::CMP4MemImm4,2582callNode,2583generateX86MemoryReference(scratchReg, fej9->getOffsetOfInterfaceClassFromITableField(), cg()),2584(int32_t)(intptr_t)declaringClass,2585cg());2586}2587else2588{2589TR_ASSERT(comp()->target().is64Bit(), "Only 64-bit path should reach here.");2590TR::Register *interfaceClassReg = vtableIndexReg;2591auto cds = cg()->findOrCreate8ByteConstant(site.getCallNode(), (intptr_t)declaringClass);2592TR::MemoryReference *interfaceClassAddr = generateX86MemoryReference(cds, cg());2593generateRegMemInstruction(TR::InstOpCode::LRegMem(), callNode, interfaceClassReg, interfaceClassAddr, cg());2594generateMemRegInstruction(TR::InstOpCode::CMPMemReg(),2595callNode,2596generateX86MemoryReference(scratchReg, fej9->getOffsetOfInterfaceClassFromITableField(), cg()),2597interfaceClassReg, cg());2598}25992600generateLongLabelInstruction(TR::InstOpCode::JNE4, callNode, lookupDispatchSnippetLabel, cg()); // PICBuilder needs this to have a 4-byte offset2601if (comp()->target().is32Bit())2602generatePaddingInstruction(3, callNode, cg());2603generateLabelInstruction(TR::InstOpCode::CALLImm4, callNode, lastITableDispatchLabel, vtableIndexRegDeps, cg());26042605cg()->stopUsingRegister(vtableIndexReg);2606TR::RealRegister::RegNum otherScratchRegister = getProperties().getJ9MethodArgumentRegister(); // scratch reg other than the vtable index reg2607site.addPostCondition(scratchReg, otherScratchRegister);2608site.addPostCondition(vftReg, TR::RealRegister::NoReg);2609}261026112612