Path: blob/master/runtime/compiler/z/codegen/S390PrivateLinkage.cpp
6004 views
/*******************************************************************************1* Copyright (c) 2000, 2021 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/2122#include "codegen/S390PrivateLinkage.hpp"2324#include "codegen/CodeGenerator.hpp"25#include "codegen/GCStackAtlas.hpp"26#include "codegen/Linkage_inlines.hpp"27#include "codegen/Snippet.hpp"28#include "compile/ResolvedMethod.hpp"29#include "compile/VirtualGuard.hpp"30#include "env/CHTable.hpp"31#include "env/CompilerEnv.hpp"32#include "env/J2IThunk.hpp"33#include "env/PersistentCHTable.hpp"34#include "env/StackMemoryRegion.hpp"35#include "env/VMJ9.h"36#include "env/jittypes.h"37#include "env/j9method.h"38#include "il/Node.hpp"39#include "il/Node_inlines.hpp"40#include "il/ParameterSymbol.hpp"41#include "il/TreeTop.hpp"42#include "il/TreeTop_inlines.hpp"43#include "infra/InterferenceGraph.hpp"44#include "z/codegen/OpMemToMem.hpp"45#include "z/codegen/S390Evaluator.hpp"46#include "z/codegen/S390GenerateInstructions.hpp"47#include "z/codegen/S390HelperCallSnippet.hpp"48#include "z/codegen/S390J9CallSnippet.hpp"49#include "z/codegen/S390StackCheckFailureSnippet.hpp"50#include "z/codegen/SystemLinkage.hpp"51#include "z/codegen/SystemLinkagezOS.hpp"52#include "runtime/J9Profiler.hpp"53#include "runtime/J9ValueProfiler.hpp"5455#define MIN_PROFILED_CALL_FREQUENCY (.075f)5657////////////////////////////////////////////////////////////////////////////////58// J9::Z::PrivateLinkage for J959////////////////////////////////////////////////////////////////////////////////60J9::Z::PrivateLinkage::PrivateLinkage(TR::CodeGenerator * codeGen,TR_LinkageConventions lc)61: J9::PrivateLinkage(codeGen)62{63setLinkageType(lc);6465// linkage properties66setProperty(SplitLongParm);67setProperty(TwoStackSlotsForLongAndDouble);6869//Preserved Registers7071setRegisterFlag(TR::RealRegister::GPR5, Preserved);72setRegisterFlag(TR::RealRegister::GPR6, Preserved);73setRegisterFlag(TR::RealRegister::GPR7, Preserved);74setRegisterFlag(TR::RealRegister::GPR8, Preserved);75setRegisterFlag(TR::RealRegister::GPR9, Preserved);76setRegisterFlag(TR::RealRegister::GPR10, Preserved);77setRegisterFlag(TR::RealRegister::GPR11, Preserved);78setRegisterFlag(TR::RealRegister::GPR12, Preserved);79setRegisterFlag(TR::RealRegister::GPR13, Preserved);8081#if defined(ENABLE_PRESERVED_FPRS)82setRegisterFlag(TR::RealRegister::FPR8, Preserved);83setRegisterFlag(TR::RealRegister::FPR9, Preserved);84setRegisterFlag(TR::RealRegister::FPR10, Preserved);85setRegisterFlag(TR::RealRegister::FPR11, Preserved);86setRegisterFlag(TR::RealRegister::FPR12, Preserved);87setRegisterFlag(TR::RealRegister::FPR13, Preserved);88setRegisterFlag(TR::RealRegister::FPR14, Preserved);89setRegisterFlag(TR::RealRegister::FPR15, Preserved);90#endif9192setIntegerReturnRegister (TR::RealRegister::GPR2 );93setLongLowReturnRegister (TR::RealRegister::GPR3 );94setLongHighReturnRegister(TR::RealRegister::GPR2 );95setLongReturnRegister (TR::RealRegister::GPR2 );96setFloatReturnRegister (TR::RealRegister::FPR0 );97setDoubleReturnRegister (TR::RealRegister::FPR0 );98setLongDoubleReturnRegister0 (TR::RealRegister::FPR0 );99setLongDoubleReturnRegister2 (TR::RealRegister::FPR2 );100setLongDoubleReturnRegister4 (TR::RealRegister::FPR4 );101setLongDoubleReturnRegister6 (TR::RealRegister::FPR6 );102103if(comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z13) && comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_FACILITY) &&104!comp()->getOption(TR_DisableSIMD))105{106codeGen->setSupportsVectorRegisters();107codeGen->setSupportsAutoSIMD();108}109else110{111comp()->setOption(TR_DisableSIMD);112}113114const bool enableVectorLinkage = codeGen->getSupportsVectorRegisters();115if (enableVectorLinkage) setVectorReturnRegister(TR::RealRegister::VRF24);116117setStackPointerRegister (TR::RealRegister::GPR5 );118setEntryPointRegister (comp()->target().isLinux() ? TR::RealRegister::GPR4 : TR::RealRegister::GPR15);119setReturnAddressRegister (TR::RealRegister::GPR14);120121setVTableIndexArgumentRegister (TR::RealRegister::GPR0);122setJ9MethodArgumentRegister (TR::RealRegister::GPR1);123124setLitPoolRegister (TR::RealRegister::GPR6 );125setMethodMetaDataRegister(TR::RealRegister::GPR13 );126127setIntegerArgumentRegister(0, TR::RealRegister::GPR1);128setIntegerArgumentRegister(1, TR::RealRegister::GPR2);129setIntegerArgumentRegister(2, TR::RealRegister::GPR3);130setNumIntegerArgumentRegisters(3);131132setFloatArgumentRegister(0, TR::RealRegister::FPR0);133setFloatArgumentRegister(1, TR::RealRegister::FPR2);134setFloatArgumentRegister(2, TR::RealRegister::FPR4);135setFloatArgumentRegister(3, TR::RealRegister::FPR6);136setNumFloatArgumentRegisters(4);137138if (enableVectorLinkage)139{140int vecIndex = 0;141setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF25);142setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF26);143setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF27);144setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF28);145setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF29);146setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF30);147setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF31);148setVectorArgumentRegister(vecIndex++, TR::RealRegister::VRF24);149setNumVectorArgumentRegisters(vecIndex);150}151152setOffsetToFirstLocal (comp()->target().is64Bit() ? -8 : -4);153setOffsetToRegSaveArea (0);154setOffsetToLongDispSlot(0);155setOffsetToFirstParm (0);156int32_t numDeps = 30;157158if (codeGen->getSupportsVectorRegisters())159numDeps += 32; //need to kill VRFs160161setNumberOfDependencyGPRegisters(numDeps);162163setPreservedRegisterMapForGC(0x00001fc0);164setLargestOutgoingArgumentAreaSize(0);165}166167////////////////////////////////////////////////////////////////////////////////168// J9::Z::PrivateLinkage::initS390RealRegisterLinkage - initialize the state169// of real register for register allocator170////////////////////////////////////////////////////////////////////////////////171void172J9::Z::PrivateLinkage::initS390RealRegisterLinkage()173{174TR::RealRegister * sspReal = getSystemStackPointerRealRegister();175TR::RealRegister * spReal = getStackPointerRealRegister();176TR::RealRegister * mdReal = getMethodMetaDataRealRegister();177int32_t icount, ret_count = 0;178179// Lock all the dedicated registers180bool freeingSSPDisabled = true;181182TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());183184if (cg()->supportsJITFreeSystemStackPointer())185freeingSSPDisabled = false;186187if (freeingSSPDisabled)188{189sspReal->setState(TR::RealRegister::Locked);190sspReal->setAssignedRegister(sspReal);191sspReal->setHasBeenAssignedInMethod(true);192}193194// Java Stack pointer195spReal->setState(TR::RealRegister::Locked);196spReal->setAssignedRegister(spReal);197spReal->setHasBeenAssignedInMethod(true);198199// meta data register200mdReal->setState(TR::RealRegister::Locked);201mdReal->setAssignedRegister(mdReal);202mdReal->setHasBeenAssignedInMethod(true);203204// set register weight205for (icount = TR::RealRegister::FirstGPR; icount <= TR::RealRegister::GPR3; icount++)206{207int32_t weight;208if (getIntegerReturn((TR::RealRegister::RegNum) icount))209{210weight = ++ret_count;211}212else213{214weight = icount;215}216cg()->machine()->getRealRegister((TR::RealRegister::RegNum) icount)->setWeight(weight);217}218219for (icount = TR::RealRegister::GPR4; icount >= TR::RealRegister::LastAssignableGPR; icount++)220{221cg()->machine()->getRealRegister((TR::RealRegister::RegNum) icount)->setWeight(0xf000 + icount);222}223}224225void J9::Z::PrivateLinkage::alignLocalsOffset(uint32_t &stackIndex, uint32_t localObjectAlignment)226{227if (stackIndex % localObjectAlignment != 0)228{229uint32_t stackIndexBeforeAlignment = stackIndex;230231// TODO: Is the negation here necessary?232stackIndex = -((-stackIndex + (localObjectAlignment - 1)) & ~(localObjectAlignment - 1));233234TR::GCStackAtlas *atlas = cg()->getStackAtlas();235236atlas->setNumberOfSlotsMapped(atlas->getNumberOfSlotsMapped() + ((stackIndexBeforeAlignment - stackIndex) / TR::Compiler->om.sizeofReferenceAddress()));237238if (comp()->getOption(TR_TraceRA))239{240traceMsg(comp(),"\nAlign stack offset before alignment = %d and after alignment = %d\n", stackIndexBeforeAlignment, stackIndex);241}242}243}244245246////////////////////////////////////////////////////////////////////////////////247// J9::Z::PrivateLinkage::mapCompactedStack - maps variables onto the stack, sharing248// stack slots for automatic variables with non-interfering live ranges.249////////////////////////////////////////////////////////////////////////////////250void251J9::Z::PrivateLinkage::mapCompactedStack(TR::ResolvedMethodSymbol * method)252{253ListIterator<TR::AutomaticSymbol> automaticIterator(&method->getAutomaticList());254TR::AutomaticSymbol *localCursor = automaticIterator.getFirst();255int32_t firstLocalOffset = getOffsetToFirstLocal();256uint32_t stackIndex = getOffsetToFirstLocal();257TR::GCStackAtlas *atlas = cg()->getStackAtlas();258int32_t i;259uint8_t pointerSize = TR::Compiler->om.sizeofReferenceAddress();260261262#ifdef DEBUG263uint32_t origSize = 0; // the size of the stack had we not compacted it264#endif265266{267TR::StackMemoryRegion stackMemoryRegion(*trMemory());268269int32_t *colourToOffsetMap =270(int32_t *) trMemory()->allocateStackMemory(cg()->getLocalsIG()->getNumberOfColoursUsedToColour() * sizeof(int32_t));271272uint32_t *colourToSizeMap =273(uint32_t *) trMemory()->allocateStackMemory(cg()->getLocalsIG()->getNumberOfColoursUsedToColour() * sizeof(uint32_t));274275for (i=0; i<cg()->getLocalsIG()->getNumberOfColoursUsedToColour(); i++)276{277colourToOffsetMap[i] = -1;278colourToSizeMap[i] = 0;279}280281// Find maximum allocation size for each shared local.282//283TR_IGNode *igNode;284uint32_t size;285IGNodeColour colour;286287for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())288{289igNode = cg()->getLocalsIG()->getIGNodeForEntity(localCursor);290if(igNode != NULL) // if the local doesn't have an interference graph node, we will just map it without attempt to compact, so we can ignore it291{292colour = igNode->getColour();293294TR_ASSERT(colour != UNCOLOURED, "uncoloured local %p (igNode=%p) found in locals IG\n",295localCursor, igNode);296297if (!(localCursor->isInternalPointer() || localCursor->isPinningArrayPointer() || localCursor->holdsMonitoredObject()))298{299size = localCursor->getRoundedSize();300if (size > colourToSizeMap[colour])301{302colourToSizeMap[colour] = size;303}304}305}306}307308ListIterator<TR::AutomaticSymbol> variableSizeSymIterator(&method->getVariableSizeSymbolList());309TR::AutomaticSymbol * variableSizeSymCursor = variableSizeSymIterator.getFirst();310for (localCursor = variableSizeSymIterator.getFirst(); localCursor; localCursor = variableSizeSymIterator.getNext())311{312TR_ASSERT(localCursor->isVariableSizeSymbol(), "Should be variable sized");313igNode = cg()->getLocalsIG()->getIGNodeForEntity(localCursor);314if(igNode != NULL) // if the local doesn't have an interference graph node, we will just map it without attempt to compact, so we can ignore it315{316colour = igNode->getColour();317TR_ASSERT(colour != UNCOLOURED, "uncoloured local %p (igNode=%p) found in locals IG\n",318localCursor, igNode);319if (!(localCursor->isInternalPointer() || localCursor->isPinningArrayPointer() || localCursor->holdsMonitoredObject()))320{321size = localCursor->getRoundedSize();322if (size > colourToSizeMap[colour])323{324colourToSizeMap[colour] = size;325}326}327}328}329330// *************************************how we align local objects********************************331// because the offset of a local object is (stackIndex + pointerSize*(localCursor->getGCMapIndex()-firstLocalGCIndex))332// In createStackAtlas, we align pointerSize*(localCursor->getGCMapIndex()-firstLocalGCIndex) by modifying local objects' gc indices333// Here we align the stackIndex334// *************************************how we align local objects********************************335//336traceMsg(comp(), "stackIndex after compaction = %d\n", stackIndex);337338// stackIndex in mapCompactedStack is calculated using only local reference sizes and does not include the padding339stackIndex -= pointerSize * atlas->getNumberOfPaddingSlots();340341traceMsg(comp(), "stackIndex after padding slots = %d\n", stackIndex);342343uint32_t localObjectAlignment = 1 << TR::Compiler->om.compressedReferenceShift();344345if (localObjectAlignment >= 16)346{347// we don't want to fail gc when it tries to uncompress the reference of a stack allocated object, so we aligned the local objects based on the shift amount348// this is different to the alignment of heap objects, which is controlled separately and could be larger than 2<<shiftamount349alignLocalsOffset(stackIndex, localObjectAlignment);350}351352// Map all garbage collected references together so we can concisely represent353// stack maps. They must be mapped so that the GC map index in each local354// symbol is honoured.355//356#ifdef DEBUG357// to report diagnostic information into the trace log that is guarded by if(debug("reportCL"))358// set the environment variable TR_DEBUG=reportCL359// also note that all diagnostic information is only reported in a debug build360if(debug("reportCL"))361diagnostic("\n****Mapping compacted stack for method: %s\n",comp()->signature());362#endif363364// Here we map the garbage collected references onto the stack365// This stage is reversed later on, since in CodeGenGC we actually set all of the GC offsets366// so effectively the local stack compaction of collected references happens there367// but we must perform this stage to determine the size of the stack that contains object temp slots368int32_t lowGCOffset = stackIndex;369int32_t firstLocalGCIndex = atlas->getNumberOfParmSlotsMapped();370automaticIterator.reset();371for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())372{373if (localCursor->getGCMapIndex() >= 0)374{375TR_IGNode *igNode;376if (igNode = cg()->getLocalsIG()->getIGNodeForEntity(localCursor))377{378IGNodeColour colour = igNode->getColour();379380if (localCursor->isInternalPointer() || localCursor->isPinningArrayPointer() || localCursor->holdsMonitoredObject())381{382// Regardless of colouring on the local, map an internal383// pointer or a pinning array local. These kinds of locals384// do not participate in the compaction of locals phase and385// are handled specially (basically the slots are not shared for386// these autos).387//388#ifdef DEBUG389if(debug("reportCL"))390diagnostic("Mapping uncompactable ref local: %p\n",localCursor);391#endif392mapSingleAutomatic(localCursor, stackIndex);393}394else if (colourToOffsetMap[colour] == -1)395{396#ifdef DEBUG397if(debug("reportCL"))398diagnostic("Mapping first ref local: %p (colour=%d)\n",localCursor, colour);399#endif400mapSingleAutomatic(localCursor, stackIndex);401colourToOffsetMap[colour] = localCursor->getOffset();402}403else404{405traceMsg(comp(), "O^O COMPACT LOCALS: Sharing slot for local %p (colour = %d)\n",localCursor, colour);406localCursor->setOffset(colourToOffsetMap[colour]);407}408}409else410{411#ifdef DEBUG412if(debug("reportCL"))413diagnostic("No ig node exists for ref local %p, mapping regularly\n",localCursor);414#endif415mapSingleAutomatic(localCursor, stackIndex);416}417418#ifdef DEBUG419origSize += localCursor->getRoundedSize();420#endif421}422}423424// Here is where we reverse the previous stage425// We map local references again to set the stack position correct according to426// the GC map index, which is set in CodeGenGC427//428automaticIterator.reset();429for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())430if (localCursor->getGCMapIndex() >= 0)431{432int32_t newOffset = stackIndex + pointerSize*(localCursor->getGCMapIndex()-firstLocalGCIndex);433434if (comp()->getOption(TR_TraceRA))435traceMsg(comp(), "\nmapCompactedStack: changing %s (GC index %d) offset from %d to %d",436comp()->getDebug()->getName(localCursor), localCursor->getGCMapIndex(), localCursor->getOffset(), newOffset);437438localCursor->setOffset(newOffset);439440TR_ASSERT((localCursor->getOffset() <= 0), "Local %p (GC index %d) offset cannot be positive (stackIndex = %d)\n", localCursor, localCursor->getGCMapIndex(), stackIndex);441442if (localCursor->getGCMapIndex() == atlas->getIndexOfFirstInternalPointer())443{444atlas->setOffsetOfFirstInternalPointer(localCursor->getOffset() - firstLocalOffset);445}446}447448method->setObjectTempSlots((lowGCOffset-stackIndex) / pointerSize);449lowGCOffset = stackIndex;450451// Now map the rest of the locals (i.e. non-references)452//453// first map 4-byte locals, then 8-byte (and larger) locals454//455stackIndex -= (stackIndex & 0x4) ? 4 : 0;456automaticIterator.reset();457for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())458if (localCursor->getGCMapIndex() < 0)459{460TR_IGNode *igNode;461if (igNode = cg()->getLocalsIG()->getIGNodeForEntity(localCursor))462{463IGNodeColour colour = igNode->getColour();464465if(colourToSizeMap[colour] < 8)466{467if (colourToOffsetMap[colour] == -1) // map auto to stack slot468{469#ifdef DEBUG470if(debug("reportCL"))471diagnostic("Mapping first local: %p (colour=%d)\n",localCursor, colour);472#endif473mapSingleAutomatic(localCursor, colourToSizeMap[colour], stackIndex);474colourToOffsetMap[colour] = localCursor->getOffset();475}476else // share local with already mapped stack slot477{478traceMsg(comp(), "O^O COMPACT LOCALS: Sharing slot for local %p (colour = %d)\n",localCursor, colour);479localCursor->setOffset(colourToOffsetMap[colour]);480}481#ifdef DEBUG482origSize += localCursor->getRoundedSize();483#endif484}485}486else if(localCursor->getRoundedSize() < 8)487{488#ifdef DEBUG489if(debug("reportCL"))490diagnostic("No ig node exists for local %p, mapping regularly\n",localCursor);491origSize += localCursor->getRoundedSize();492#endif493mapSingleAutomatic(localCursor, stackIndex);494}495}496497498variableSizeSymIterator.reset();499variableSizeSymCursor = variableSizeSymIterator.getFirst();500while (variableSizeSymCursor != NULL)501{502if (variableSizeSymCursor->isReferenced())503{504if (cg()->traceBCDCodeGen())505traceMsg(comp(),"map variableSize sym %p (size %d) because isReferenced=true ",variableSizeSymCursor,variableSizeSymCursor->getSize());506mapSingleAutomatic(variableSizeSymCursor, stackIndex); //Ivan507if (cg()->traceBCDCodeGen())508traceMsg(comp(),"to auto offset %d\n",variableSizeSymCursor->getOffset());509}510else if (cg()->traceBCDCodeGen())511{512traceMsg(comp(),"do not map variableSize sym %p (size %d) because isReferenced=false\n",variableSizeSymCursor,variableSizeSymCursor->getSize());513}514variableSizeSymCursor = variableSizeSymIterator.getNext();515}516517// Ensure the frame is double-word aligned, since we're about to map 8-byte autos518//519#ifdef DEBUG520origSize += (origSize & 0x4) ? 4 : 0;521#endif522stackIndex -= (stackIndex & 0x4) ? 4 : 0;523524TR_ASSERT((stackIndex % pointerSize) == 0,525"size of scalar temp area not a multiple of Java pointer size");526527// now map 8-byte autos528//529automaticIterator.reset();530for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())531if (localCursor->getGCMapIndex() < 0)532{533TR_IGNode *igNode;534if (igNode = cg()->getLocalsIG()->getIGNodeForEntity(localCursor))535{536IGNodeColour colour = igNode->getColour();537538if(colourToSizeMap[colour] >= 8)539{540if (colourToOffsetMap[colour] == -1) // map auto to stack slot541{542#ifdef DEBUG543if(debug("reportCL"))544diagnostic("Mapping first local: %p (colour=%d)\n",localCursor, colour);545#endif546stackIndex -= (stackIndex & 0x4) ? 4 : 0;547mapSingleAutomatic(localCursor, colourToSizeMap[colour], stackIndex);548colourToOffsetMap[colour] = localCursor->getOffset();549}550else // share local with already mapped stack slot551{552traceMsg(comp(), "O^O COMPACT LOCALS: Sharing slot for local %p (colour = %d)\n",localCursor, colour);553localCursor->setOffset(colourToOffsetMap[colour]);554}555#ifdef DEBUG556origSize += localCursor->getRoundedSize();557#endif558}559}560else if(localCursor->getRoundedSize() >= 8)561{562#ifdef DEBUG563if(debug("reportCL"))564diagnostic("No ig node exists for local %p, mapping regularly\n",localCursor);565origSize += localCursor->getRoundedSize();566#endif567stackIndex -= (stackIndex & 0x4) ? 4 : 0;568mapSingleAutomatic(localCursor, stackIndex);569}570}571572// Map slot for Long Displacement573// Pick an arbitrary large number that is less than574// long disp (4K) to identify that we are no-where near575// a large stack or a large lit-pool576577//stackIndex -= pointerSize;578stackIndex -= 16; // see defect 162458, 164661579#ifdef DEBUG580// origSize += pointerSize;581origSize += 16;582#endif583setOffsetToLongDispSlot((uint32_t) (-((int32_t)stackIndex)));584585586// msf - aligning the start of the parm list may not always587// be best, but if a long is passed into a virtual fn, it will588// then be aligned (and therefore can efficiently be accessed)589// a better approach would be to look at the signature and determine590// the best overall way to align the stack given that the parm list591// is contiguous in storage to make it easy on the interpreter592// and therefore there may not be a 'best' way to align the storage.593// This change was made upon noticing that sometimes getObject() is594// very hot and references its data from backing storage often.595// it is possible that the stack might not be double-word aligned, due to mapping for long displacement if the pointer size is 4596#ifdef DEBUG597origSize += (origSize & 0x4) ? 4 : 0;598#endif599stackIndex -= (stackIndex & 0x4) ? 4 : 0;600601method->setScalarTempSlots((lowGCOffset-stackIndex) / pointerSize);602method->setLocalMappingCursor(stackIndex);603604mapIncomingParms(method);605606atlas->setLocalBaseOffset(lowGCOffset - firstLocalOffset);607atlas->setParmBaseOffset(atlas->getParmBaseOffset() + getOffsetToFirstParm() - firstLocalOffset);608609} // scope of the stack memory region610611#ifdef DEBUG612automaticIterator.reset();613614// report stack mapping even if TR_DEBUG=reportCL isn't set615diagnostic("\n****SYMBOL OFFSETS\n");616for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())617{618diagnostic("Local %p, offset=%d\n", localCursor, localCursor->getOffset());619}620621if (debug("reportCL"))622{623624int mappedSize = firstLocalOffset - stackIndex;625diagnostic("\n**** Mapped locals size: %d (orig map size=%d, shared size=%d) %s\n",626(mappedSize),627origSize,628origSize - mappedSize,629comp()->signature());630}631#endif632633}634635void636J9::Z::PrivateLinkage::mapStack(TR::ResolvedMethodSymbol * method)637{638639if (cg()->getLocalsIG() && cg()->getSupportsCompactedLocals())640{641mapCompactedStack(method);642return;643}644645646ListIterator<TR::AutomaticSymbol> automaticIterator(&method->getAutomaticList());647TR::AutomaticSymbol * localCursor = automaticIterator.getFirst();648TR::RealRegister::RegNum regIndex;649int32_t firstLocalOffset = getOffsetToFirstLocal();650uint32_t stackIndex = firstLocalOffset;651int32_t lowGCOffset;652TR::GCStackAtlas * atlas = cg()->getStackAtlas();653654// map all garbage collected references together so can concisely represent655// stack maps. They must be mapped so that the GC map index in each local656// symbol is honoured.657lowGCOffset = stackIndex;658int32_t firstLocalGCIndex = atlas->getNumberOfParmSlotsMapped();659660stackIndex -= (atlas->getNumberOfSlotsMapped() - firstLocalGCIndex) * TR::Compiler->om.sizeofReferenceAddress();661662uint32_t localObjectAlignment = 1 << TR::Compiler->om.compressedReferenceShift();663664if (localObjectAlignment >= 16)665{666// we don't want to fail gc when it tries to uncompress the reference of a stack allocated object, so we aligned the local objects based on the shift amount667// this is different to the alignment of heap objects, which is controlled separately and could be larger than 2<<shiftamount668alignLocalsOffset(stackIndex, localObjectAlignment);669}670671// Map local references again to set the stack position correct according to672// the GC map index.673//674for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())675{676if (localCursor->getGCMapIndex() >= 0)677{678localCursor->setOffset(stackIndex + TR::Compiler->om.sizeofReferenceAddress() * (localCursor->getGCMapIndex() - firstLocalGCIndex));679}680if (localCursor->getGCMapIndex() == atlas->getIndexOfFirstInternalPointer())681{682atlas->setOffsetOfFirstInternalPointer(localCursor->getOffset() - firstLocalOffset);683}684}685686method->setObjectTempSlots((lowGCOffset - stackIndex) / TR::Compiler->om.sizeofReferenceAddress());687lowGCOffset = stackIndex;688689stackIndex -= (stackIndex & 0x4) ? 4 : 0;690691// Now map the rest of the locals692//693ListIterator<TR::AutomaticSymbol> variableSizeSymIterator(&method->getVariableSizeSymbolList());694TR::AutomaticSymbol * variableSizeSymCursor = variableSizeSymIterator.getFirst();695while (variableSizeSymCursor != NULL)696{697TR_ASSERT(variableSizeSymCursor->isVariableSizeSymbol(), "should be variable sized");698if (variableSizeSymCursor->isReferenced())699{700if (cg()->traceBCDCodeGen())701traceMsg(comp(),"map variableSize sym %p (size %d) because isReferenced=true ",variableSizeSymCursor,variableSizeSymCursor->getSize());702mapSingleAutomatic(variableSizeSymCursor, stackIndex); //Ivan703if (cg()->traceBCDCodeGen())704traceMsg(comp(),"to auto offset %d\n",variableSizeSymCursor->getOffset());705}706else if (cg()->traceBCDCodeGen())707{708traceMsg(comp(),"do not map variableSize sym %p (size %d) because isReferenced=false\n",variableSizeSymCursor,variableSizeSymCursor->getSize());709}710variableSizeSymCursor = variableSizeSymIterator.getNext();711}712713automaticIterator.reset();714localCursor = automaticIterator.getFirst();715716while (localCursor != NULL)717{718if (localCursor->getGCMapIndex() < 0 && !TR::Linkage::needsAlignment(localCursor->getDataType(), cg()))719{720mapSingleAutomatic(localCursor, stackIndex);721}722localCursor = automaticIterator.getNext();723}724725automaticIterator.reset();726localCursor = automaticIterator.getFirst();727728// align double - but there is more to do to align the stack in general as double.729while (localCursor != NULL)730{731if (localCursor->getGCMapIndex() < 0 && TR::Linkage::needsAlignment(localCursor->getDataType(), cg()))732{733stackIndex -= (stackIndex & 0x4) ? 4 : 0;734mapSingleAutomatic(localCursor, stackIndex);735}736localCursor = automaticIterator.getNext();737}738739// Force the stack size to be increased by...740if (comp()->getOption(TR_Randomize) && comp()->getOptions()->get390StackBufferSize() == 0)741{742if (cg()->randomizer.randomBoolean(300) && performTransformation(comp(),"O^O Random Codegen - Added 5000 dummy slots to Java Stack frame to test large displacement.\n"))743{744stackIndex -= 5000;745}746else747{748stackIndex -= 0;749}750}751else752{753stackIndex -= (comp()->getOptions()->get390StackBufferSize()/4)*4;754}755756757stackIndex -= (stackIndex & 0x4) ? 4 : 0;758759// Pick an arbitrary large number that is less than760// long disp (4K) to identify that we are no-where near761// a large stack or a large lit-pool762//763764stackIndex -= 16; // see defect 162458, 164661765setOffsetToLongDispSlot((uint32_t) (-((int32_t)stackIndex)));766767method->setScalarTempSlots((lowGCOffset - stackIndex) / TR::Compiler->om.sizeofReferenceAddress());768method->setLocalMappingCursor(stackIndex);769770// msf - aligning the start of the parm list may not always771// be best, but if a long is passed into a virtual fn, it will772// then be aligned (and therefore can efficiently be accessed)773// a better approach would be to look at the signature and determine774// the best overall way to align the stack given that the parm list775// is contiguous in storage to make it easy on the interpreter776// and therefore there may not be a 'best' way to align the storage.777// This change was made upon noticing that sometimes getObject() is778// very hot and references it's data from backing storage often.779stackIndex -= (stackIndex & 0x4) ? 4 : 0;780781mapIncomingParms(method);782783atlas->setLocalBaseOffset(lowGCOffset - firstLocalOffset);784atlas->setParmBaseOffset(atlas->getParmBaseOffset() + getOffsetToFirstParm() - firstLocalOffset);785786#ifdef DEBUG787automaticIterator.reset();788diagnostic("\n****SYMBOL OFFSETS\n");789for (localCursor = automaticIterator.getFirst(); localCursor; localCursor = automaticIterator.getNext())790{791diagnostic("Local %p, offset=%d\n", localCursor, localCursor->getOffset());792}793#endif794795}796797////////////////////////////////////////////////////////////////////////////////798// J9::Z::PrivateLinkage::mapSingleAutomatic - maps an automatic onto the stack799// with size p->getRoundedSize()800////////////////////////////////////////////////////////////////////////////////801void802J9::Z::PrivateLinkage::mapSingleAutomatic(TR::AutomaticSymbol * p, uint32_t & stackIndex)803{804805mapSingleAutomatic(p, p->getRoundedSize(), stackIndex);806}807808////////////////////////////////////////////////////////////////////////////////809// J9::Z::PrivateLinkage::mapSingleAutomatic - maps an automatic onto the stack810////////////////////////////////////////////////////////////////////////////////811void812J9::Z::PrivateLinkage::mapSingleAutomatic(TR::AutomaticSymbol * p, uint32_t size, uint32_t & stackIndex)813{814815p->setOffset(stackIndex -= size);816}817818bool819J9::Z::PrivateLinkage::hasToBeOnStack(TR::ParameterSymbol * parm)820{821TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());822TR::ResolvedMethodSymbol * bodySymbol = comp()->getJittedMethodSymbol();823TR_OpaqueClassBlock * throwableClass;824825// Need to save parameter on the stack if:826// A global register is allocated for the parameter AND either:827// 1. the parameter is the *this pointer of a virtual sync'd/jvmpi method828// 2. the address of the parameter is taken (JNI calls)829// (You can't get an address of the parameter if it is stored in a register -830// hence, parameter needs to be saved it onto the stack).831bool result = ( parm->getAssignedGlobalRegisterIndex() >= 0 && // is using global RA832( ( parm->getLinkageRegisterIndex() == 0 && // is first parameter (this pointer)833parm->isCollectedReference() && // is object reference834!bodySymbol->isStatic() && // is virtual835// TODO:836// We potentially only need to save param onto stack for sync'd methods837// which have calls/exception traps. Currently, we conservatively save838// param onto stack for sync'd methods, regardless of whether there are calls839// or not.840// see PPCLinkage for actual details.841// ( ( bodySymbol->isSynchronised() && // is sync method842// ( cg()->canExceptByTrap() || cg()->hasCall() ) // can trigger stack walker843// ) ||844( ( bodySymbol->isSynchronised()845) ||846(847!strncmp(bodySymbol->getResolvedMethod()->nameChars(), "<init>", 6) &&848( (throwableClass = fej9->getClassFromSignature("Ljava/lang/Throwable;", 21, bodySymbol->getResolvedMethod())) == 0 ||849fej9->isInstanceOf(bodySymbol->getResolvedMethod()->containingClass(), throwableClass, true) != TR_no850)851)852)853) ||854parm->isParmHasToBeOnStack() // JNI direct where the address of a parm can be taken. e.g. &this.855)856);857858// Problem Report 96788:859//860// There is a potential race condition here. Because of the query to the frontend this function could861// possibly return different results at different points in the compilation dependent on whether the862// java/lang/Throwable class is resolved or not. This is a problem because this query is used to863// determine whether we need to generate a GC map for this parameter and whether we need to generate864// a store out to the stack for this parameter. Because these two queries happen at two different points865// in the compilation we could encounter a situation where we generate a GC map for this parameter but866// not generate a store out to the stack. This causes assertions in the VM if we hit a GC point in this867// compilation unit. To avoid this issue we cache the result of this function and directly modify the868// parameter symbol.869870// TODO : Where does the java/lang/Throwable code below originate and why is it here? This seems like871// a very hacky fix to a very specific problem. Also why is this code not commoned up with P and why872// is it missing for X?873874if (result)875parm->setParmHasToBeOnStack();876877return result;878}879880void881J9::Z::PrivateLinkage::setParameterLinkageRegisterIndex(TR::ResolvedMethodSymbol * method)882{883self()->setParameterLinkageRegisterIndex(method, method->getParameterList());884}885886void887J9::Z::PrivateLinkage::setParameterLinkageRegisterIndex(TR::ResolvedMethodSymbol * method, List<TR::ParameterSymbol> &parmList)888{889ListIterator<TR::ParameterSymbol> paramIterator(&parmList);890TR::ParameterSymbol * paramCursor=paramIterator.getFirst();891int32_t numIntArgs = 0, numFloatArgs = 0, numVectorArgs = 0;892893int32_t paramNum = -1;894while ((paramCursor != NULL) &&895(numIntArgs < self()->getNumIntegerArgumentRegisters() ||896numFloatArgs < self()->getNumFloatArgumentRegisters() ||897numVectorArgs < self()->getNumVectorArgumentRegisters()))898{899int32_t index = -1;900paramNum++;901902TR::DataType dt = paramCursor->getDataType();903904switch (dt)905{906case TR::Int8:907case TR::Int16:908case TR::Int32:909case TR::Address:910if (numIntArgs < self()->getNumIntegerArgumentRegisters())911{912index = numIntArgs;913}914numIntArgs++;915break;916case TR::Int64:917if(numIntArgs < self()->getNumIntegerArgumentRegisters())918{919index = numIntArgs;920}921numIntArgs += (comp()->target().is64Bit() ? 1 : 2);922break;923case TR::Float:924case TR::Double:925if (numFloatArgs < self()->getNumFloatArgumentRegisters())926{927index = numFloatArgs;928}929numFloatArgs++;930break;931case TR::PackedDecimal:932case TR::ZonedDecimal:933case TR::ZonedDecimalSignLeadingEmbedded:934case TR::ZonedDecimalSignLeadingSeparate:935case TR::ZonedDecimalSignTrailingSeparate:936case TR::UnicodeDecimal:937case TR::UnicodeDecimalSignLeading:938case TR::UnicodeDecimalSignTrailing:939case TR::Aggregate:940break;941case TR::VectorInt8:942case TR::VectorInt16:943case TR::VectorInt32:944case TR::VectorInt64:945case TR::VectorDouble:946if (numVectorArgs < self()->getNumVectorArgumentRegisters())947{948index = numVectorArgs;949}950numVectorArgs++;951break;952}953paramCursor->setLinkageRegisterIndex(index);954paramCursor = paramIterator.getNext();955956if (self()->isFastLinkLinkageType())957{958if ((numFloatArgs == 1) || (numIntArgs >= self()->getNumIntegerArgumentRegisters()))959{960// force fastlink ABI condition of only one float parameter for fastlink parameter and it must be within first slots961numFloatArgs = self()->getNumFloatArgumentRegisters(); // no more float args possible now962}963}964}965}966967//Clears numBytes bytes of storage from baseOffset(srcReg)968static TR::Instruction *969initStg(TR::CodeGenerator * codeGen, TR::Node * node, TR::RealRegister * tmpReg, TR::RealRegister * srcReg,TR::RealRegister * itersReg, int32_t baseOffset, int32_t numBytes,970TR::Instruction * cursor)971{972int32_t numIters = (numBytes / 256);973TR::RealRegister * baseReg = NULL;974TR::RealRegister * indexReg = tmpReg;975976TR_ASSERT( numBytes >= 0, "number of bytes to clear must be positive");977TR_ASSERT( baseOffset >= 0, "starting offset must be positive");978979if ((numBytes < 4096) && (numIters * 256 + baseOffset < 4096))980{981baseReg = srcReg;982}983else984{985baseReg = tmpReg;986987// If we don't set the proper flag when we use GPR14 as a temp register988// here during prologue creation, we won't restore the return address989// into GPR14 in epilogue990tmpReg->setHasBeenAssignedInMethod(true);991992if (baseOffset>=MIN_IMMEDIATE_VAL && baseOffset<=MAX_IMMEDIATE_VAL)993{994cursor = generateRRInstruction(codeGen, TR::InstOpCode::getLoadRegOpCode(), node, baseReg, srcReg, cursor);995cursor = generateRIInstruction(codeGen, TR::InstOpCode::getAddHalfWordImmOpCode(), node, baseReg, baseOffset, cursor);996}997else // Large frame situation998{999cursor = generateS390ImmToRegister(codeGen, node, baseReg, (intptr_t)(baseOffset), cursor);1000cursor = generateRRInstruction(codeGen, TR::InstOpCode::getAddRegOpCode(), node, baseReg, srcReg, cursor);1001}1002baseOffset = 0;1003}10041005MemClearConstLenMacroOp op(node, node, codeGen, numBytes);1006return op.generate(baseReg, baseReg, indexReg, itersReg, baseOffset, cursor);1007}10081009int32_t1010J9::Z::PrivateLinkage::calculateRegisterSaveSize(TR::RealRegister::RegNum firstUsedReg,1011TR::RealRegister::RegNum lastUsedReg,1012int32_t ®isterSaveDescription,1013int32_t &numIntSaved, int32_t &numFloatSaved)1014{1015int32_t regSaveSize = 0;1016// set up registerSaveDescription which looks the following1017//1018// 00000000 offsetfrombp 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 01019// gpr15 gpr01020//1021// The bit is set to 1 if the register is saved.1022int32_t i;1023if (lastUsedReg != TR::RealRegister::NoReg)1024{1025for (i = firstUsedReg ; i <= lastUsedReg ; ++i)1026{1027registerSaveDescription |= 1 << (i - 1);1028numIntSaved++;1029}1030}10311032#if defined(ENABLE_PRESERVED_FPRS)1033for (i = TR::RealRegister::FPR8 ; i <= TR::RealRegister::FPR15 ; ++i)1034{1035if ((getRealRegister(i))->getHasBeenAssignedInMethod())1036{1037numFloatSaved++;1038}1039}1040#endif10411042// calculate stackFramesize1043regSaveSize += numIntSaved * cg()->machine()->getGPRSize() +1044numFloatSaved * cg()->machine()->getFPRSize();104510461047int32_t firstLocalOffset = getOffsetToFirstLocal();1048int32_t localSize = -1 * (int32_t) (comp()->getJittedMethodSymbol()->getLocalMappingCursor()); // Auto+Spill size10491050return regSaveSize;1051}10521053int32_t1054J9::Z::PrivateLinkage::setupLiteralPoolRegister(TR::Snippet *firstSnippet)1055{1056// setup literal pool register if needed1057// on freeway:1058// LARL r6, i2 <- where i2 = (addr of lit. pool-current addr)/21059//1060// on non freeway:1061// BRAS r6, 41062// <lit. pool addr>1063// L r6, 0(r6)10641065if (!cg()->isLiteralPoolOnDemandOn() && firstSnippet != NULL)1066{1067// The immediate operand will be patched when the actual address of the literal pool is known1068if (cg()->anyLitPoolSnippets())1069{1070return getLitPoolRealRegister()->getRegisterNumber();1071}1072}10731074return -1;1075}10761077////////////////////////////////////////////////////////////////////////////////1078// TS_390PrivateLinkage::createPrologue() - create prolog for private linkage1079////////////////////////////////////////////////////////////////////////////////1080void1081J9::Z::PrivateLinkage::createPrologue(TR::Instruction * cursor)1082{1083TR::RealRegister * spReg = getStackPointerRealRegister();1084TR::RealRegister * lpReg = getLitPoolRealRegister();1085TR::RealRegister * epReg = getEntryPointRealRegister();1086TR::Snippet * firstSnippet = NULL;1087TR::Node * firstNode = comp()->getStartTree()->getNode();1088int32_t size = 0, argSize = 0, regSaveSize = 0, numIntSaved = 0, numFloatSaved = 0;1089int32_t registerSaveDescription = 0;1090int32_t firstLocalOffset = getOffsetToFirstLocal();1091int32_t i;1092TR::ResolvedMethodSymbol * bodySymbol = comp()->getJittedMethodSymbol();1093int32_t localSize = -1 * (int32_t) (bodySymbol->getLocalMappingCursor()); // Auto+Spill size10941095// look for registers that need to be saved1096// Look between R6-R111097//1098TR::RealRegister::RegNum firstUsedReg = getFirstSavedRegister(TR::RealRegister::GPR6,1099TR::RealRegister::GPR12);1100TR::RealRegister::RegNum lastUsedReg = getLastSavedRegister(TR::RealRegister::GPR6,1101TR::RealRegister::GPR12);11021103// compute the register save area1104regSaveSize = calculateRegisterSaveSize(firstUsedReg, lastUsedReg,1105registerSaveDescription,1106numIntSaved, numFloatSaved);11071108if (0 && comp()->target().is64Bit())1109{1110argSize = cg()->getLargestOutgoingArgSize() * 2 + getOffsetToFirstParm();1111}1112else1113{1114argSize = cg()->getLargestOutgoingArgSize() + getOffsetToFirstParm();1115}1116size = regSaveSize + localSize + argSize;11171118// TODO: Rename this option to "disableStackAlignment" as we can align to more than doubleword now1119if (!comp()->getOption(TR_DisableDoubleWordStackAlignment))1120{1121traceMsg(comp(), "Before stack alignment Framesize = %d, localSize = %d\n", size, localSize);11221123uint32_t stackFrameAlignment = std::max(1 << TR::Compiler->om.compressedReferenceShift(), 8);11241125// Represents the smallest non-negative x such that (size + x) % stackFrameAlignment == 01126int32_t distanceToAlignment = (stackFrameAlignment - (size % stackFrameAlignment)) % stackFrameAlignment;11271128localSize += distanceToAlignment;11291130// Recompute the size with the new (potentially) updated localSize1131size = regSaveSize + localSize + argSize;11321133traceMsg(comp(), "After stack alignment Framesize = %d, localSize = %d\n", size, localSize);1134}11351136// Check for large stack1137bool largeStack = (size<MIN_IMMEDIATE_VAL || size>MAX_IMMEDIATE_VAL);11381139if (comp()->getOption(TR_TraceCG))1140{1141traceMsg(comp(), "\n regSaveSize = %d localSize = %d argSize = %d firstLocalOffset = %d \n",regSaveSize,localSize,argSize,firstLocalOffset);1142traceMsg(comp(), " Framesize = %d \n",size);1143}11441145TR_ASSERT( ((int32_t) size % 4 == 0), "misaligned stack detected");11461147setOffsetToRegSaveArea(argSize);11481149registerSaveDescription |= (localSize + firstLocalOffset + regSaveSize) << 16;11501151cg()->setRegisterSaveDescription(registerSaveDescription);11521153cg()->setFrameSizeInBytes(size + firstLocalOffset);115411551156int32_t offsetToLongDisp = size - getOffsetToLongDispSlot();1157setOffsetToLongDispSlot(offsetToLongDisp);1158if (comp()->getOption(TR_TraceCG))1159{1160traceMsg(comp(), "\n\nOffsetToLongDispSlot = %d\n", offsetToLongDisp);1161}11621163// Is GPR14 ever used? If not, we can avoid1164//1165// setRaContextSaveNeeded((getRealRegister(TR::RealRegister::GPR14))->getHasBeenAssignedInMethod());11661167// We assume frame size is less than 32k1168//TR_ASSERT(size<=MAX_IMMEDIATE_VAL,1169// "J9::Z::PrivateLinkage::createPrologue -- Frame size (0x%x) greater than 0x7FFF\n",size);11701171TR::MemoryReference * retAddrMemRef = NULL;11721173// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *1174//1175// I M P O R T A N T!1176//1177// when recovering from a failed recompile, for sampling, any patching1178// must be1179// reversed. The reversal code assumes that STY R14,-[4,8](r5) is1180// generated for trex, and a nop. If this ever changes,1181// TR::Recompilation::methodCannotBeRecompiled must be updated.1182//1183// * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *11841185TR::RealRegister * tempReg = getRealRegister(TR::RealRegister::GPR0);11861187setFirstPrologueInstruction(cursor);1188static bool prologTuning = (feGetEnv("TR_PrologTuning")!=NULL);11891190if (prologTuning)1191{1192retAddrMemRef = generateS390MemoryReference(spReg, size - cg()->machine()->getGPRSize(), cg());1193}1194else1195{1196int32_t offset = cg()->machine()->getGPRSize() * -1;1197retAddrMemRef = generateS390MemoryReference(spReg, offset, cg());1198cursor = generateRXInstruction(cg(), TR::InstOpCode::getExtendedStoreOpCode(), firstNode, getRealRegister(getReturnAddressRegister()),1199retAddrMemRef, cursor);1200}12011202// adjust java stack frame pointer1203if (largeStack)1204{1205cursor = generateS390ImmToRegister(cg(), firstNode, tempReg, (intptr_t)(size * -1), cursor);1206cursor = generateRRInstruction(cg(), TR::InstOpCode::getAddRegOpCode(), firstNode, spReg, tempReg, cursor);1207}1208else1209{1210// Adjust stack pointer with LA (reduce AGI delay)1211cursor = generateRXInstruction(cg(), TR::InstOpCode::LAY, firstNode, spReg, generateS390MemoryReference(spReg,(size) * -1, cg()),cursor);1212}12131214if (!comp()->isDLT())1215{1216// Check stackoverflow /////////////////////////////////////1217//Load the stack limit in a temporary reg ( use R14, as it is killed later anyways )1218TR::RealRegister * stackLimitReg = getRealRegister(TR::RealRegister::GPR14);1219TR::RealRegister * mdReg = getMethodMetaDataRealRegister();1220TR::MemoryReference * stackLimitMR = generateS390MemoryReference(mdReg, cg()->getStackLimitOffset(), cg());12211222// Compare stackLimit and currentStackPointer1223cursor = generateRXInstruction(cg(), TR::InstOpCode::getCmpLogicalOpCode(), firstNode, spReg, stackLimitMR, cursor);12241225// Call stackOverflow helper, if stack limit is less than current Stack pointer. (Stack grows downwards)1226TR::LabelSymbol * stackOverflowSnippetLabel = generateLabelSymbol(cg());1227TR::LabelSymbol * reStartLabel = generateLabelSymbol(cg());12281229//Call Stack overflow helper1230cursor = generateS390BranchInstruction(cg(), TR::InstOpCode::BRC, TR::InstOpCode::COND_BL, firstNode, stackOverflowSnippetLabel, cursor);12311232TR::SymbolReference * stackOverflowRef = comp()->getSymRefTab()->findOrCreateStackOverflowSymbolRef(comp()->getJittedMethodSymbol());12331234TR::Snippet * snippet =1235new (trHeapMemory()) TR::S390StackCheckFailureSnippet(cg(), firstNode, reStartLabel, stackOverflowSnippetLabel, stackOverflowRef, size - cg()->machine()->getGPRSize());12361237cg()->addSnippet(snippet);12381239// The stack overflow helper returns back here1240cursor = generateS390LabelInstruction(cg(), TR::InstOpCode::label, firstNode, reStartLabel, cursor);1241}12421243// End of stack overflow checking code ////////////////////////1244static bool bppoutline = (feGetEnv("TR_BPRP_Outline")!=NULL);12451246if (bppoutline && cg()->_outlineCall._frequency != -1)1247{1248cursor = new (cg()->trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, firstNode, epReg, (cg()->_outlineCall._callSymRef)->getSymbol(),(cg()->_outlineCall._callSymRef), cursor, cg());12491250TR::MemoryReference * tempMR = generateS390MemoryReference(epReg, 0, cg());1251cursor = generateS390BranchPredictionPreloadInstruction(cg(), TR::InstOpCode::BPP, firstNode, cg()->_outlineCall._callLabel, (int8_t) 0xD, tempMR, cursor);1252}1253if (bppoutline && cg()->_outlineArrayCall._frequency != -1)1254{1255cursor = new (cg()->trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, firstNode, epReg, (cg()->_outlineArrayCall._callSymRef)->getSymbol(),(cg()->_outlineArrayCall._callSymRef), cursor, cg());12561257TR::MemoryReference * tempMR = generateS390MemoryReference(epReg, 0, cg());1258cursor = generateS390BranchPredictionPreloadInstruction(cg(), TR::InstOpCode::BPP, firstNode, cg()->_outlineArrayCall._callLabel, (int8_t) 0xD, tempMR, cursor);1259}12601261if (cg()->getSupportsRuntimeInstrumentation())1262cursor = TR::TreeEvaluator::generateRuntimeInstrumentationOnOffSequence(cg(), TR::InstOpCode::RION, firstNode, cursor, true);126312641265// save registers that are used by this method1266int32_t disp = argSize;1267TR::MemoryReference * rsa ;12681269// save GPRs1270if (lastUsedReg != TR::RealRegister::NoReg)1271{1272rsa = generateS390MemoryReference(spReg, disp, cg());12731274if (firstUsedReg != lastUsedReg)1275{1276cursor = generateRSInstruction(cg(), TR::InstOpCode::getStoreMultipleOpCode(), firstNode, getRealRegister(firstUsedReg), getRealRegister(lastUsedReg), rsa, cursor);1277}1278else1279{1280cursor = generateRXInstruction(cg(), TR::InstOpCode::getStoreOpCode(), firstNode, getRealRegister(firstUsedReg), rsa, cursor);1281}1282}1283disp += numIntSaved * cg()->machine()->getGPRSize();12841285#if defined(ENABLE_PRESERVED_FPRS)1286//save FPRs1287for (i = TR::RealRegister::FPR8 ; i <= TR::RealRegister::FPR15 ; ++i)1288{1289if ((getRealRegister(i))->getHasBeenAssignedInMethod())1290{1291cursor = generateRXInstruction(cg(), TR::InstOpCode::STD, firstNode, getRealRegister(i), generateS390MemoryReference(spReg, disp, cg()),1292cursor);1293disp += cg()->machine()->getFPRSize();1294}1295}1296#endif12971298if (prologTuning)1299{1300if ( size>=MAXLONGDISP )1301{1302cursor = generateS390ImmToRegister(cg(), firstNode, epReg, (intptr_t)(retAddrMemRef->getOffset()), cursor);1303retAddrMemRef->setOffset(0);1304retAddrMemRef->setDispAdjusted();1305retAddrMemRef->setIndexRegister(epReg);1306}1307// Save return address(R14) on stack1308cursor = generateRXInstruction(cg(), TR::InstOpCode::getStoreOpCode(), firstNode, getRealRegister(getReturnAddressRegister()), retAddrMemRef, cursor);1309}131013111312// initialize local objects1313TR::GCStackAtlas * atlas = cg()->getStackAtlas();1314if (atlas)1315{1316// for large copies, we can use the literal pool reg as a temp1317// (for >4096 clearing) when it is implemented13181319// The GC stack maps are conservative in that they all say that1320// collectable locals are live. This means that these locals must be1321// cleared out in case a GC happens before they are allocated a valid1322// value.1323// The atlas contains the number of locals that need to be cleared. They1324// are all mapped together starting at GC index 0.1325//1326uint32_t numLocalsToBeInitialized = atlas->getNumberOfSlotsToBeInitialized();1327if (numLocalsToBeInitialized > 0 || atlas->getInternalPointerMap())1328{1329int32_t offsetLcls = atlas->getLocalBaseOffset() + firstLocalOffset;1330TR::RealRegister * tmpReg = getReturnAddressRealRegister();1331TR::RealRegister * itersReg = getRealRegister(TR::RealRegister::GPR0);13321333int32_t initbytes = cg()->machine()->getGPRSize() * numLocalsToBeInitialized;13341335//printf("\ncollected reference: init %d bytes at offset %d\n", initbytes, size+offsetLcls);13361337cursor = initStg(cg(), firstNode, tmpReg, spReg, itersReg, size + offsetLcls, initbytes, cursor);1338if (atlas->getInternalPointerMap())1339{1340int32_t offsetIntPtr = atlas->getOffsetOfFirstInternalPointer() + firstLocalOffset;13411342// Total number of slots to be initialized is number of pinning arrays +1343// number of derived internal pointer stack slots1344//1345int32_t initbytes = (atlas->getNumberOfDistinctPinningArrays() +1346atlas->getInternalPointerMap()->getNumInternalPointers()) * cg()->machine()->getGPRSize();13471348//printf("\ninternal pointer: init %d bytes at offset %d\n", initbytes, size+offsetIntPtr);13491350cursor = initStg(cg(), firstNode, tmpReg, spReg, itersReg, size + offsetIntPtr, initbytes, cursor);1351}1352}1353}13541355firstSnippet = cg()->getFirstSnippet();1356if (setupLiteralPoolRegister(firstSnippet) > 0)1357{1358cursor = new (trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, firstNode, lpReg, firstSnippet, cursor, cg());1359}13601361ListIterator<TR::AutomaticSymbol> variableSizeSymIterator(&bodySymbol->getVariableSizeSymbolList());1362TR::AutomaticSymbol * variableSizeSymCursor = variableSizeSymIterator.getFirst();13631364while (variableSizeSymCursor != NULL)1365{1366TR_ASSERT(variableSizeSymCursor->isVariableSizeSymbol(), "Should be variable sized");1367variableSizeSymCursor->setOffset(variableSizeSymCursor->getOffset() + size);1368variableSizeSymCursor = variableSizeSymIterator.getNext();1369}1370ListIterator<TR::AutomaticSymbol> automaticIterator(&bodySymbol->getAutomaticList());1371TR::AutomaticSymbol * localCursor = automaticIterator.getFirst();13721373while (localCursor != NULL)1374{1375localCursor->setOffset(localCursor->getOffset() + size);1376localCursor = automaticIterator.getNext();1377}13781379ListIterator<TR::ParameterSymbol> parameterIterator(&bodySymbol->getParameterList());1380TR::ParameterSymbol * parmCursor = parameterIterator.getFirst();1381while (parmCursor != NULL)1382{1383parmCursor->setParameterOffset(parmCursor->getParameterOffset() + size);1384parmCursor = parameterIterator.getNext();1385}13861387// Save or move arguments according to the result of register assignment.1388cursor = (TR::Instruction *) saveArguments(cursor, false);13891390static const bool prefetchStack = feGetEnv("TR_PrefetchStack") != NULL;1391if (cg()->isPrefetchNextStackCacheLine() && prefetchStack)1392{1393cursor = generateRXInstruction(cg(), TR::InstOpCode::PFD, firstNode, 2, generateS390MemoryReference(spReg, -256, cg()), cursor);1394}13951396// Cold Eyecatcher is used for padding of endPC so that Return Address for exception snippets will never equal the endPC.1397// -> stackwalker assumes valid RA must be < endPC (not <= endPC).1398cg()->CreateEyeCatcher(firstNode);1399setLastPrologueInstruction(cursor);1400}140114021403////////////////////////////////////////////////////////////////////////////////1404// TS_390PrivateLinkage::createEpilog() - create epilog for private linkage1405//1406// Here is the sample epilog that we are currently generated1407//1408// 10 c0 d0 00 LM GPR6, GPR15, 40(,GPR11)1409// 47 00 b0 00 BC GPR141410////////////////////////////////////////////////////////////////////////////////1411void1412J9::Z::PrivateLinkage::createEpilogue(TR::Instruction * cursor)1413{1414TR::RealRegister * spReg = getRealRegister(getStackPointerRegister());1415TR::Node * currentNode = cursor->getNode();1416TR::Node * nextNode = cursor->getNext()->getNode();1417TR::ResolvedMethodSymbol * bodySymbol = comp()->getJittedMethodSymbol();1418uint32_t size = bodySymbol->getLocalMappingCursor();1419int32_t frameSize = cg()->getFrameSizeInBytes();1420int32_t i, offset = 0;1421TR::MemoryReference * rsa;1422TR::RealRegister::RegNum lastUsedReg, firstUsedReg;1423TR::RegisterDependencyConditions * dep;1424TR::RealRegister * tempReg = getRealRegister(TR::RealRegister::GPR0);1425TR::RealRegister * epReg = getRealRegister(getEntryPointRegister());1426int32_t blockNumber = -1;14271428bool enableBranchPreload = cg()->supportsBranchPreload();14291430dep = cursor->getNext()->getDependencyConditions();1431offset = getOffsetToRegSaveArea();14321433// Do Return Address restore1434uint32_t adjustSize = frameSize - getOffsetToFirstLocal();14351436static const char *disableRARestoreOpt = feGetEnv("TR_DisableRAOpt");14371438// Any one of these conditions will force us to restore RA1439bool restoreRA = disableRARestoreOpt ||1440!(performTransformation(comp(), "O^O No need to restore RAREG in epilog\n")) ||1441getRealRegister(getReturnAddressRegister())->getHasBeenAssignedInMethod() ||1442cg()->canExceptByTrap() ||1443cg()->getExitPointsInMethod() ||1444bodySymbol->isEHAware() ||1445comp()->getOption(TR_FullSpeedDebug); // CMVC 195232 - FSD can modify RA slot at a GC point.1446setRaContextRestoreNeeded(restoreRA);14471448if (getRaContextRestoreNeeded())1449{1450cursor = generateRXInstruction(cg(), TR::InstOpCode::getExtendedLoadOpCode(), nextNode,1451getRealRegister(getReturnAddressRegister()),1452generateS390MemoryReference(spReg, frameSize, cg()), cursor);1453}1454else1455{1456if (comp()->getOption(TR_TraceCG))1457traceMsg(comp(), "No RAREG context restore needed in Epilog\n");1458}14591460if (enableBranchPreload && (cursor->getNext() == cg()->_hottestReturn._returnInstr))1461{1462if (cg()->_hottestReturn._frequency > 6 && cg()->_hottestReturn._insertBPPInEpilogue)1463{1464cg()->_hottestReturn._returnLabel = generateLabelSymbol(cg());1465TR::MemoryReference * tempMR = generateS390MemoryReference(getRealRegister(getReturnAddressRegister()), 0, cg());1466cursor = generateS390BranchPredictionPreloadInstruction(cg(), TR::InstOpCode::BPP, nextNode, cg()->_hottestReturn._returnLabel, (int8_t) 0x6, tempMR, cursor);1467cg()->_hottestReturn._insertBPPInEpilogue = false;1468}1469}14701471// Restore GPRs1472firstUsedReg = getFirstRestoredRegister(TR::RealRegister::GPR6, TR::RealRegister::GPR12);1473lastUsedReg = getLastRestoredRegister(TR::RealRegister::GPR6, TR::RealRegister::GPR12);1474rsa = generateS390MemoryReference(spReg, offset, cg());14751476if (lastUsedReg != TR::RealRegister::NoReg)1477{1478if (firstUsedReg != lastUsedReg)1479{1480cursor = restorePreservedRegs(firstUsedReg, lastUsedReg, blockNumber, cursor, nextNode, spReg, rsa, getStackPointerRegister());1481}1482else1483{1484cursor = generateRXInstruction(cg(), TR::InstOpCode::getLoadOpCode(), nextNode, getRealRegister(firstUsedReg), rsa, cursor);1485}1486offset += cg()->machine()->getGPRSize() * (lastUsedReg - firstUsedReg + 1);1487}14881489#if defined(ENABLE_PRESERVED_FPRS)1490//Load FPRs1491for (i = TR::RealRegister::FPR8 ; i <= TR::RealRegister::FPR15 ; ++i)1492{1493if ((getRealRegister(i))->getHasBeenAssignedInMethod())1494{1495cursor = generateRXInstruction(cg(), TR::InstOpCode::LD, currentNode, getRealRegister(i),1496generateS390MemoryReference(spReg, offset, cg()), cursor);1497offset += cg()->machine()->getFPRSize();1498}1499}1500#endif15011502// Pop frame1503// use LA/LAY to add immediate through displacement1504if (adjustSize < MAXDISP)1505{1506cursor = generateRXInstruction(cg(), TR::InstOpCode::LA, nextNode, spReg, generateS390MemoryReference(spReg,adjustSize,cg()),cursor);1507}1508else if (adjustSize<MAXLONGDISP)1509{1510cursor = generateRXInstruction(cg(), TR::InstOpCode::LAY, nextNode, spReg, generateS390MemoryReference(spReg,adjustSize,cg()),cursor);1511}1512else1513{1514cursor = generateS390ImmToRegister(cg(), nextNode, tempReg, (intptr_t)(adjustSize), cursor);1515cursor = generateRRInstruction(cg(), TR::InstOpCode::getAddRegOpCode(), nextNode, spReg, tempReg, cursor);1516}15171518// Add RIOFF on Epilogue before we leave the JIT1519if (cg()->getSupportsRuntimeInstrumentation())1520cursor = TR::TreeEvaluator::generateRuntimeInstrumentationOnOffSequence(cg(), TR::InstOpCode::RIOFF, currentNode, cursor, true);152115221523if (enableBranchPreload)1524{1525if (cursor->getNext() == cg()->_hottestReturn._returnInstr)1526{1527if (cg()->_hottestReturn._frequency > 6)1528{1529cursor = generateS390LabelInstruction(cg(), TR::InstOpCode::label, currentNode, cg()->_hottestReturn._returnLabel, cursor);1530}1531}1532}15331534cursor = generateS390RegInstruction(cg(), TR::InstOpCode::BCR, currentNode, getRealRegister(getReturnAddressRegister()), cursor);1535((TR::S390RegInstruction *)cursor)->setBranchCondition(TR::InstOpCode::COND_BCR);15361537}15381539////////////////////////////////////////////////////////////////////////////////1540// J9::Z::PrivateLinkage::buildVirtualDispatch - build virtual function call1541////////////////////////////////////////////////////////////////////////////////1542void1543J9::Z::PrivateLinkage::buildVirtualDispatch(TR::Node * callNode, TR::RegisterDependencyConditions * dependencies,1544TR::Register * vftReg, uint32_t sizeOfArguments)1545{1546TR::RegisterDependencyGroup * Dgroup = dependencies->getPreConditions();1547TR::SymbolReference * methodSymRef = callNode->getSymbolReference();1548TR::MethodSymbol * methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol();1549TR::LabelSymbol * vcallLabel = generateLabelSymbol(cg());1550TR::Instruction * gcPoint = NULL;1551TR::Snippet *unresolvedSnippet = NULL;1552TR_Debug * debugObj = cg()->getDebug();15531554TR_ResolvedMethod * profiledMethod = NULL;1555TR_OpaqueClassBlock *profiledClass = NULL;1556bool useProfiledValues = false;15571558if (comp()->getOption(TR_TraceCG))1559traceMsg(comp(), "Build Virtual Dispatch\n");15601561if ((methodSymbol && !methodSymbol->isComputed()) &&1562(comp()->getPersistentInfo()->isRuntimeInstrumentationEnabled()) &&1563(comp()->getOption(TR_EnableRIEMIT)))1564{1565TR::Instruction *emitInstruction = generateRIInstruction(cg(), TR::InstOpCode::RIEMIT, callNode, vftReg, 0);1566comp()->addHWPValueProfileInstruction(emitInstruction);1567}15681569TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp()->fe());15701571// Generate and register a thunk for a resolved virtual function1572void *virtualThunk;1573if (methodSymbol && methodSymbol->isComputed())1574{1575switch (methodSymbol->getMandatoryRecognizedMethod())1576{1577case TR::java_lang_invoke_ComputedCalls_dispatchVirtual:1578case TR::com_ibm_jit_JITHelpers_dispatchVirtual:1579{1580char *j2iSignature = fej9->getJ2IThunkSignatureForDispatchVirtual(methodSymbol->getMethod()->signatureChars(), methodSymbol->getMethod()->signatureLength(), comp());1581int32_t signatureLen = strlen(j2iSignature);1582virtualThunk = fej9->getJ2IThunk(j2iSignature, signatureLen, comp());1583if (!virtualThunk)1584{1585virtualThunk = fej9->setJ2IThunk(j2iSignature, signatureLen,1586TR::S390J9CallSnippet::generateVIThunk(1587fej9->getEquivalentVirtualCallNodeForDispatchVirtual(callNode, comp()), sizeOfArguments, cg()), comp()); // TODO:JSR292: Is this the right sizeOfArguments?1588}1589}1590break;1591default:1592if (fej9->needsInvokeExactJ2IThunk(callNode, comp()))1593{1594TR_J2IThunk *thunk = TR::S390J9CallSnippet::generateInvokeExactJ2IThunk(callNode, sizeOfArguments, methodSymbol->getMethod()->signatureChars(), cg());1595fej9->setInvokeExactJ2IThunk(thunk, comp());1596}1597break;1598}1599}1600else1601{1602virtualThunk = fej9->getJ2IThunk(methodSymbol->getMethod(), comp());1603if (!virtualThunk)1604virtualThunk = fej9->setJ2IThunk(methodSymbol->getMethod(), TR::S390J9CallSnippet::generateVIThunk(callNode, sizeOfArguments, cg()), comp());1605}16061607if (methodSymbol->isVirtual() && (!methodSymRef->isUnresolved() && !comp()->compileRelocatableCode()))1608{1609TR_ResolvedMethod * rsm = methodSymbol->castToResolvedMethodSymbol()->getResolvedMethod();16101611// Simple heuristic to determine when to prefetch the next cache line in method prologue.1612// We check the J9ROMMethod of the non-cold callsite to estimate how big of a stack1613// frame will be required for the call.1614if (!(cg()->getCurrentEvaluationTreeTop()->getEnclosingBlock()->isCold()) &&1615(rsm->numberOfParameterSlots() + rsm->numberOfTemps()) > 5)1616{1617cg()->setPrefetchNextStackCacheLine(true);1618}1619}16201621if (cg()->getSupportsRuntimeInstrumentation())1622TR::TreeEvaluator::generateRuntimeInstrumentationOnOffSequence(cg(), TR::InstOpCode::RIOFF, callNode);16231624if (methodSymbol->isVirtual())1625{1626TR::Instruction * cursor = NULL;1627bool performGuardedDevirtualization = false;1628TR::LabelSymbol * virtualLabel = NULL;1629TR::LabelSymbol * doneVirtualLabel = generateLabelSymbol(cg());1630int32_t offset = comp()->compileRelocatableCode() ? 0: methodSymRef->getOffset();16311632if (comp()->getOption(TR_TraceCG))1633traceMsg(comp(), "Virtual call with offset %d\n", offset);16341635// We split dependencies to make sure the RA doesn't insert any register motion code in the fixed1636// block sequence.1637//1638TR::RegisterDependencyConditions * preDeps = new (trHeapMemory())1639TR::RegisterDependencyConditions(dependencies->getPreConditions(), NULL,1640dependencies->getAddCursorForPre(), 0, cg());16411642// Add the ThisReg to the postDeps to avoid seeing a SPILL inserted between the resolution code1643// and the VTABLE. This sequence is assumed to be fixed length.1644// Added one more slot for the post dep that might be added in buildDirectCall1645//1646TR::RegisterDependencyConditions * postDepsTemp = new (trHeapMemory())1647TR::RegisterDependencyConditions(NULL, dependencies->getPostConditions(), 0,1648dependencies->getAddCursorForPost(), cg());1649TR::RegisterDependencyConditions * postDeps = new (trHeapMemory())1650TR::RegisterDependencyConditions(postDepsTemp,0,4, cg());16511652// Search ARG Deps for vregs used for RA/EP and this1653//1654TR::Register * RegZero = dependencies->searchPostConditionRegister(TR::RealRegister::GPR0);1655TR::Register * RegThis = dependencies->searchPreConditionRegister(TR::RealRegister::GPR1);1656TR::Register * RegRA = dependencies->searchPostConditionRegister(getReturnAddressRegister());16571658// Check the thisChild to see if anyone uses this object after the call (if not, we won't add it to post Deps)1659if (callNode->getChild(callNode->getFirstArgumentIndex())->getReferenceCount() > 0)1660{1661postDeps->addPostCondition(RegThis, TR::RealRegister::AssignAny);1662}16631664if (methodSymRef->isUnresolved() || comp()->compileRelocatableCode())1665{1666if (comp()->getOption(TR_TraceCG))1667traceMsg(comp(), "... virtual call is unresolved\n");16681669// TODO: Task 124512. Fix picbuilder register preservation before1670// moving this vft register dependency to BASR pre-deps.1671postDeps->addPostConditionIfNotAlreadyInserted(vftReg, TR::RealRegister::AssignAny);16721673// Emit the resolve snippet and BRASL to call it1674//1675TR::LabelSymbol * snippetLabel = generateLabelSymbol(cg());1676unresolvedSnippet = new (trHeapMemory()) TR::S390VirtualUnresolvedSnippet(cg(), callNode, snippetLabel, sizeOfArguments, virtualThunk);1677cg()->addSnippet(unresolvedSnippet);1678//generateSnippetCall extracts preDeps from dependencies and puts them on BRASL1679TR::Instruction * gcPoint =1680generateSnippetCall(cg(), callNode, unresolvedSnippet, dependencies, methodSymRef);1681gcPoint->setNeedsGCMap(getPreservedRegisterMapForGC());1682}1683else1684{1685if (comp()->getOption(TR_TraceCG))1686traceMsg(comp(), "...call resolved\n");16871688TR::ResolvedMethodSymbol * resolvedSymbol = methodSymRef->getSymbol()->getResolvedMethodSymbol();1689TR_ResolvedMethod * resolvedMethod = resolvedSymbol ? resolvedSymbol->getResolvedMethod() : 0;16901691if ((comp()->performVirtualGuardNOPing() && comp()->isVirtualGuardNOPingRequired()))1692{1693TR_VirtualGuard * virtualGuard;16941695if (resolvedMethod &&1696!resolvedMethod->isInterpreted() &&1697!callNode->isTheVirtualCallNodeForAGuardedInlinedCall())1698{1699if (!resolvedMethod->virtualMethodIsOverridden() && !resolvedMethod->isAbstract())1700{17011702performGuardedDevirtualization = true;17031704// Build guarded devirtualization dispatch.1705//1706virtualGuard = TR_VirtualGuard::createGuardedDevirtualizationGuard(TR_NonoverriddenGuard,1707comp(), callNode);1708if (comp()->getOption(TR_TraceCG))1709{1710traceMsg(comp(), "Emit new Non-Overridden guard for call %s (%x) in %s\n", resolvedMethod->signature(trMemory()), callNode,1711comp()->signature());1712}1713}1714else1715{1716TR_OpaqueClassBlock * thisClass = resolvedMethod->containingClass();1717TR_DevirtualizedCallInfo * devirtualizedCallInfo = comp()->findDevirtualizedCall(callNode);1718TR_OpaqueClassBlock * refinedThisClass = 0;17191720if (devirtualizedCallInfo)1721{1722refinedThisClass = devirtualizedCallInfo->_thisType;1723if (comp()->getOption(TR_TraceCG))1724{1725traceMsg(comp(), "Found refined this class info %x for call %x in %s\n", refinedThisClass, callNode,1726comp()->signature());1727}1728if (refinedThisClass)1729{1730thisClass = refinedThisClass;1731}1732}17331734TR_PersistentCHTable * chTable = comp()->getPersistentInfo()->getPersistentCHTable();1735/* Devirtualization is not currently supported for AOT compilations */1736if (thisClass && TR::Compiler->cls.isAbstractClass(comp(), thisClass) && !comp()->compileRelocatableCode())1737{1738TR_ResolvedMethod * method = chTable->findSingleAbstractImplementer(thisClass, methodSymRef->getOffset(),1739methodSymRef->getOwningMethod(comp()), comp());1740if (method &&1741(comp()->isRecursiveMethodTarget(method) || !method->isInterpreted() || method->isJITInternalNative()))1742{1743performGuardedDevirtualization = true;1744resolvedMethod = method;1745virtualGuard = TR_VirtualGuard::createGuardedDevirtualizationGuard(TR_AbstractGuard,1746comp(), callNode);1747if (comp()->getOption(TR_TraceCG))1748{1749traceMsg(comp(), "Emit new ABSTRACT guard for call %s (%x) in %s\n", resolvedMethod->signature(trMemory()), callNode,1750comp()->signature());1751}1752}1753}1754else if (refinedThisClass && !chTable->isOverriddenInThisHierarchy(resolvedMethod, refinedThisClass,1755methodSymRef->getOffset(), comp()))1756{1757if (resolvedMethod->virtualMethodIsOverridden())1758{1759TR_ResolvedMethod * calleeMethod = methodSymRef->getOwningMethod(comp())->getResolvedVirtualMethod(comp(),1760refinedThisClass, methodSymRef->getOffset());1761if (calleeMethod &&1762(comp()->isRecursiveMethodTarget(calleeMethod) ||1763!calleeMethod->isInterpreted() ||1764calleeMethod->isJITInternalNative()))1765{1766performGuardedDevirtualization = true;1767resolvedMethod = calleeMethod;1768virtualGuard = TR_VirtualGuard::createGuardedDevirtualizationGuard(TR_HierarchyGuard,1769comp(), callNode);17701771if (comp()->getOption(TR_TraceCG))1772{1773traceMsg(comp(), "Emit new HierarchyGuardguard for call %s (%x) in %s\n", resolvedMethod->signature(trMemory()), callNode,1774comp()->signature());1775}1776}1777}1778}1779}1780if (performGuardedDevirtualization && virtualGuard)1781{1782virtualLabel = vcallLabel;1783generateVirtualGuardNOPInstruction(cg(), callNode, virtualGuard->addNOPSite(), NULL, virtualLabel);1784if (comp()->getOption(TR_EnableHCR))1785{1786if (cg()->supportsMergingGuards())1787{1788virtualGuard->setMergedWithHCRGuard();1789}1790else1791{1792TR_VirtualGuard* HCRGuard = TR_VirtualGuard::createGuardedDevirtualizationGuard(TR_HCRGuard, comp(), callNode);1793generateVirtualGuardNOPInstruction(cg(), callNode, HCRGuard->addNOPSite(), NULL, virtualLabel);1794}1795}1796}1797}1798}17991800if (!performGuardedDevirtualization &&1801!comp()->getOption(TR_DisableInterpreterProfiling) &&1802comp()->getOption(TR_enableProfiledDevirtualization) &&1803TR_ValueProfileInfoManager::get(comp()) && resolvedMethod1804)1805{1806TR_AddressInfo *valueInfo = NULL;1807if (!comp()->compileRelocatableCode())1808valueInfo = static_cast<TR_AddressInfo*>(TR_ValueProfileInfoManager::getProfiledValueInfo(callNode, comp(), AddressInfo));18091810uintptr_t topValue = valueInfo ? valueInfo->getTopValue() : 0;18111812// Is the topValue valid?1813if( topValue )1814{1815if( valueInfo->getTopProbability() < MIN_PROFILED_CALL_FREQUENCY ||1816comp()->getPersistentInfo()->isObsoleteClass((void*)topValue, fej9) )1817{1818topValue = 0;1819}1820else1821{1822TR_OpaqueClassBlock *callSiteMethodClass = methodSymRef->getSymbol()->getResolvedMethodSymbol()->getResolvedMethod()->classOfMethod();1823if (!cg()->isProfiledClassAndCallSiteCompatible((TR_OpaqueClassBlock *)topValue, callSiteMethodClass))1824{1825topValue = 0;1826}1827}1828}18291830if ( topValue )1831{1832TR_ResolvedMethod *profiledVirtualMethod = methodSymRef->getOwningMethod(comp())->getResolvedVirtualMethod(comp(),1833(TR_OpaqueClassBlock *)topValue, methodSymRef->getOffset());1834if (profiledVirtualMethod)1835{1836if (comp()->getOption(TR_TraceCG))1837{1838traceMsg(comp(),1839"Profiled method {%s}\n",1840fej9->sampleSignature((TR_OpaqueMethodBlock *)(profiledVirtualMethod->getPersistentIdentifier()), 0, 0, comp()->trMemory()));1841}1842profiledMethod = profiledVirtualMethod;1843profiledClass = (TR_OpaqueClassBlock *)topValue;1844useProfiledValues = true;1845virtualLabel = vcallLabel;1846}1847}1848}18491850if (performGuardedDevirtualization || useProfiledValues)1851{1852if (comp()->getOption(TR_TraceCG))1853traceMsg(comp(), "Make direct call under devirtualization\n");18541855TR::SymbolReference * realMethodSymRef = methodSymRef;1856if (useProfiledValues || resolvedMethod != resolvedSymbol->getResolvedMethod())1857{1858realMethodSymRef= comp()->getSymRefTab()->findOrCreateMethodSymbol(methodSymRef->getOwningMethodIndex(),1859-1, (useProfiledValues)?profiledMethod:resolvedMethod, TR::MethodSymbol::Virtual);1860}18611862if (useProfiledValues)1863{1864TR::Instruction * unloadableConstInstr = generateRILInstruction(cg(), TR::InstOpCode::LARL, callNode, RegZero, reinterpret_cast<uintptr_t*>(profiledClass));1865if (fej9->isUnloadAssumptionRequired(profiledClass, comp()->getCurrentMethod()))1866{1867comp()->getStaticPICSites()->push_front(unloadableConstInstr);1868}1869generateS390CompareAndBranchInstruction(cg(), TR::InstOpCode::getCmpLogicalRegOpCode(), callNode, vftReg, RegZero, TR::InstOpCode::COND_BNE, virtualLabel);1870}18711872buildDirectCall(callNode, realMethodSymRef, dependencies, sizeOfArguments);18731874if (!virtualLabel)1875generateS390BranchInstruction(cg(), TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, callNode, doneVirtualLabel);1876}1877}18781879TR_S390OutOfLineCodeSection *outlinedSlowPath = NULL;18801881if ( virtualLabel )1882{1883traceMsg (comp(), "OOL vcall: generating Vcall dispatch sequence\n");1884//Using OOL but generating code manually1885outlinedSlowPath = new (cg()->trHeapMemory()) TR_S390OutOfLineCodeSection(vcallLabel,doneVirtualLabel,cg());1886cg()->getS390OutOfLineCodeSectionList().push_front(outlinedSlowPath);1887outlinedSlowPath->swapInstructionListsWithCompilation();18881889TR::Instruction * temp = generateS390LabelInstruction(cg(), TR::InstOpCode::label, callNode, vcallLabel);1890if (debugObj)1891{1892debugObj->addInstructionComment(temp, "Denotes start of OOL vcall sequence");1893}1894}18951896// load class pointer1897TR::Register *classReg = vftReg;18981899// It should be impossible to have a offset that can't fit in 20bit given Java method table limitations.1900// We assert here to insure limitation/assumption remains true. If this fires we need to fix this code1901// and the _virtualUnresolvedHelper() code to deal with a new worst case scenario for patching.1902TR_ASSERT_FATAL(offset>MINLONGDISP, "JIT VFT offset does not fit in 20bits");1903TR_ASSERT_FATAL(offset!=0 || unresolvedSnippet, "Offset is 0 yet unresolvedSnippet is NULL");1904TR_ASSERT_FATAL(offset<=MAX_IMMEDIATE_VAL, "Offset is larger then MAX_IMMEDIATE_VAL");19051906// If unresolved/AOT, this instruction will be patched by _virtualUnresolvedHelper() with the correct offset1907cursor = generateRXInstruction(cg(), TR::InstOpCode::getExtendedLoadOpCode(), callNode, RegRA,1908generateS390MemoryReference(classReg, offset, cg()));19091910if (unresolvedSnippet)1911{1912((TR::S390VirtualUnresolvedSnippet *)unresolvedSnippet)->setPatchVftInstruction(cursor);1913}19141915// A load immediate into R0 instruction (LHI/LGFI) MUST be generated here because the "LA" instruction used by1916// the VM to find VFT table entries can't handle negative displacements. For unresolved/AOT targets we must assume1917// the worse case (offset can't fit in 16bits). VFT offset 0 means unresolved/AOT, otherwise offset is negative.1918// Some special cases have positive offsets i.e. java/lang/Object.newInstancePrototype()1919if (!unresolvedSnippet && offset >= MIN_IMMEDIATE_VAL && offset <= MAX_IMMEDIATE_VAL) // Offset fits in 16bits1920{1921cursor = generateRIInstruction(cg(), TR::InstOpCode::getLoadHalfWordImmOpCode(), callNode, RegZero, offset);1922}1923else // if unresolved || offset can't fit in 16bits1924{1925// If unresolved/AOT, this instruction will be patched by _virtualUnresolvedHelper() with the correct offset1926cursor = generateRILInstruction(cg(), TR::InstOpCode::LGFI, callNode, RegZero, static_cast<int32_t>(offset));1927}19281929gcPoint = new (trHeapMemory()) TR::S390RRInstruction(TR::InstOpCode::BASR, callNode, RegRA, RegRA, cg());1930gcPoint->setDependencyConditions(preDeps);19311932if (unresolvedSnippet != NULL)1933(static_cast<TR::S390VirtualUnresolvedSnippet *>(unresolvedSnippet))->setIndirectCallInstruction(gcPoint);19341935if (outlinedSlowPath)1936{1937TR::Instruction * temp = generateS390BranchInstruction(cg(),TR::InstOpCode::BRC,TR::InstOpCode::COND_BRC,callNode,doneVirtualLabel);1938if (debugObj)1939{1940debugObj->addInstructionComment(temp, "Denotes end of OOL vcall sequence: return to mainline");1941}1942// Done using OOL with manual code generation1943outlinedSlowPath->swapInstructionListsWithCompilation();19441945generateS390LabelInstruction(cg(), TR::InstOpCode::label, callNode, doneVirtualLabel, postDeps);1946}1947else1948{1949gcPoint->setDependencyConditions(postDeps);1950}1951}1952else if (methodSymbol->isInterface())1953{1954int32_t i=0;1955TR::Register * thisClassRegister;1956TR::Register * methodRegister ;1957TR::RegisterPair * classMethodEPPairRegister;1958int32_t numInterfaceCallCacheSlots = comp()->getOptions()->getNumInterfaceCallCacheSlots();19591960if (comp()->getOption(TR_disableInterfaceCallCaching))1961{1962numInterfaceCallCacheSlots=0;1963}1964else if (comp()->getOption(TR_enableInterfaceCallCachingSingleDynamicSlot))1965{1966numInterfaceCallCacheSlots=1;1967}19681969TR_ValueProfileInfoManager *valueProfileInfo = TR_ValueProfileInfoManager::get(comp());1970TR_AddressInfo *info = NULL;1971uint32_t numStaticPICs = 0;1972if (valueProfileInfo)1973info = static_cast<TR_AddressInfo*>(valueProfileInfo->getValueInfo(callNode->getByteCodeInfo(), comp(), AddressInfo));19741975TR::list<TR_OpaqueClassBlock*> * profiledClassesList = NULL;19761977bool isAddressInfo = info != NULL;1978uint32_t totalFreq = info ? info->getTotalFrequency() : 0;1979bool isAOT = cg()->needClassAndMethodPointerRelocations();1980bool callIsSafe = methodSymRef != comp()->getSymRefTab()->findObjectNewInstanceImplSymbol();1981if (!isAOT && callIsSafe && isAddressInfo &&1982(totalFreq!=0 && info->getTopProbability() > MIN_PROFILED_CALL_FREQUENCY))1983{19841985TR_ScratchList<TR_ExtraAddressInfo> allValues(comp()->trMemory());1986info->getSortedList(comp(), &allValues);19871988TR::SymbolReference *methodSymRef = callNode->getSymbolReference();1989TR_ResolvedMethod *owningMethod = methodSymRef->getOwningMethod(comp());19901991ListIterator<TR_ExtraAddressInfo> valuesIt(&allValues);19921993uint32_t maxStaticPICs = comp()->getOptions()->getNumInterfaceCallStaticSlots();19941995TR_ExtraAddressInfo *profiledInfo;1996profiledClassesList = new (trHeapMemory()) TR::list<TR_OpaqueClassBlock*>(getTypedAllocator<TR_OpaqueClassBlock*>(comp()->allocator()));1997for (profiledInfo = valuesIt.getFirst(); numStaticPICs < maxStaticPICs && profiledInfo != NULL; profiledInfo = valuesIt.getNext())1998{19992000float freq = (float) profiledInfo->_frequency / totalFreq;2001if (freq < MIN_PROFILED_CALL_FREQUENCY)2002continue;20032004TR_OpaqueClassBlock *clazz = (TR_OpaqueClassBlock *)profiledInfo->_value;2005if (comp()->getPersistentInfo()->isObsoleteClass(clazz, fej9))2006continue;20072008TR::SymbolReference *methodSymRef = callNode->getSymbolReference();2009TR_ResolvedMethod * profiledMethod = methodSymRef->getOwningMethod(comp())->getResolvedInterfaceMethod(comp(),2010(TR_OpaqueClassBlock *)clazz, methodSymRef->getCPIndex());20112012if (profiledMethod && !profiledMethod->isInterpreted())2013{2014numInterfaceCallCacheSlots++;2015numStaticPICs++;2016profiledClassesList->push_front(clazz);2017}2018}2019}20202021if (comp()->getOption(TR_TraceCG))2022{2023if (numStaticPICs != 0)2024traceMsg(comp(), "Interface dispatch with %d cache slots, added extra %d slot(s) for profiled classes.\n", numInterfaceCallCacheSlots, numStaticPICs);2025else2026traceMsg(comp(), "Interface dispatch with %d cache slots\n", numInterfaceCallCacheSlots);2027}20282029TR::LabelSymbol * snippetLabel = generateLabelSymbol(cg());2030TR::S390InterfaceCallSnippet * ifcSnippet = new (trHeapMemory()) TR::S390InterfaceCallSnippet(cg(), callNode,2031snippetLabel, sizeOfArguments, numInterfaceCallCacheSlots, virtualThunk, false);2032cg()->addSnippet(ifcSnippet);20332034if (numStaticPICs != 0)2035cg()->addPICsListForInterfaceSnippet(ifcSnippet->getDataConstantSnippet(), profiledClassesList);20362037if (numInterfaceCallCacheSlots == 0 )2038{2039//Disabled interface call caching2040TR::LabelSymbol * hitLabel = generateLabelSymbol(cg());2041TR::LabelSymbol * snippetLabel = generateLabelSymbol(cg());20422043// Make a copy of input deps, but add on 3 new slots.2044TR::RegisterDependencyConditions * postDeps = new (trHeapMemory()) TR::RegisterDependencyConditions(dependencies, 0, 3, cg());2045postDeps->setAddCursorForPre(0); // Ignore all pre-deps that were copied.2046postDeps->setNumPreConditions(0, trMemory()); // Ignore all pre-deps that were copied.20472048gcPoint = generateSnippetCall(cg(), callNode, ifcSnippet, dependencies,methodSymRef);20492050// NOP is necessary so that the VM doesn't confuse Virtual Dispatch (expected to always use BASR2051// with interface dispatch (which must guarantee that RA-2 != 0x0D ie. BASR)2052//2053TR::Instruction * cursor = new (trHeapMemory()) TR::S390NOPInstruction(TR::InstOpCode::NOP, 2, callNode, cg());20542055// Fool the snippet into setting up the return address to be after the NOP2056//2057gcPoint = cursor;2058((TR::S390CallSnippet *) ifcSnippet)->setBranchInstruction(gcPoint);2059cursor->setDependencyConditions(postDeps);2060}2061else2062{2063TR::Instruction * cursor = NULL;2064TR::LabelSymbol * paramSetupDummyLabel = generateLabelSymbol(cg());2065TR::LabelSymbol * returnLocationLabel = generateLabelSymbol(cg());2066TR::LabelSymbol * cacheFailLabel = generateLabelSymbol(cg());20672068TR::Register * RegEP = dependencies->searchPostConditionRegister(getEntryPointRegister());2069TR::Register * RegRA = dependencies->searchPostConditionRegister(getReturnAddressRegister());2070TR::Register * RegThis = dependencies->searchPreConditionRegister(TR::RealRegister::GPR1);2071TR::Register * snippetReg = RegEP;207220732074// We split dependencies to make sure the RA doesn't insert any register motion code in the fixed2075// block sequence and to only enforce parameter setup on head of block.2076TR::RegisterDependencyConditions * preDeps = new (trHeapMemory()) TR::RegisterDependencyConditions(2077dependencies->getPreConditions(), NULL, dependencies->getAddCursorForPre(), 0, cg());20782079// Make a copy of input deps, but add on 3 new slots.2080TR::RegisterDependencyConditions * postDeps = new (trHeapMemory()) TR::RegisterDependencyConditions(dependencies, 0, 5, cg());2081postDeps->setAddCursorForPre(0); // Ignore all pre-deps that were copied.2082postDeps->setNumPreConditions(0, trMemory()); // Ignore all pre-deps that were copied.20832084// Check the thisChild to see if anyone uses this object after the call (if not, we won't add it to post Deps)2085if (callNode->getChild(callNode->getFirstArgumentIndex())->getReferenceCount() > 0)2086postDeps->addPostCondition(RegThis, TR::RealRegister::AssignAny);20872088// Add this reg to post deps to ensure no reg motion2089postDeps->addPostConditionIfNotAlreadyInserted(vftReg, TR::RealRegister::AssignAny);20902091bool useCLFIandBRCL = false;20922093if (comp()->getOption(TR_enableInterfaceCallCachingSingleDynamicSlot))2094{2095cursor = new (trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, callNode, snippetReg, ifcSnippet->getDataConstantSnippet(), cg());20962097// Single dynamic slot case2098// we cache one class-method pair and atomically load it using LM/LPQ2099TR::Register * classRegister = cg()->allocateRegister();2100TR::Register * methodRegister = cg()->allocateRegister();2101classMethodEPPairRegister = cg()->allocateConsecutiveRegisterPair(methodRegister, classRegister);21022103postDeps->addPostCondition(classMethodEPPairRegister, TR::RealRegister::EvenOddPair);2104postDeps->addPostCondition(classRegister, TR::RealRegister::LegalEvenOfPair);2105postDeps->addPostCondition(methodRegister, TR::RealRegister::LegalOddOfPair);21062107//Load return address in RegRA2108cursor = new (trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, callNode, RegRA, returnLocationLabel, cursor, cg());21092110if (comp()->target().is64Bit())2111cursor = generateRXInstruction(cg(), TR::InstOpCode::LPQ, callNode, classMethodEPPairRegister,2112generateS390MemoryReference(snippetReg, ifcSnippet->getDataConstantSnippet()->getSingleDynamicSlotOffset(), cg()), cursor);2113else2114cursor = generateRSInstruction(cg(), TR::InstOpCode::LM, callNode, classMethodEPPairRegister,2115generateS390MemoryReference(snippetReg, ifcSnippet->getDataConstantSnippet()->getSingleDynamicSlotOffset(), cg()), cursor);21162117// We need a dummy label to hook dependencies onto2118cursor = generateS390LabelInstruction(cg(), TR::InstOpCode::label, callNode, paramSetupDummyLabel, preDeps, cursor);21192120//check if cached classPtr matches the receiving object classPtr2121cursor = generateRXInstruction(cg(), TR::InstOpCode::getCmpLogicalOpCode(), callNode, classRegister,2122generateS390MemoryReference(RegThis, 0, cg()), cursor);21232124//Cache hit? then jumpto cached method entrypoint directly2125cursor = generateS390RegInstruction(cg(), TR::InstOpCode::BCR, callNode, methodRegister, cursor);2126((TR::S390RegInstruction *)cursor)->setBranchCondition(TR::InstOpCode::COND_BER);21272128cursor = new (trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, callNode, snippetReg, ifcSnippet,cursor, cg());21292130// Cache miss... Too bad.. go to the slow path through the interface call snippet2131cursor = generateS390RegInstruction(cg(), TR::InstOpCode::BCR, callNode, snippetReg, cursor);2132((TR::S390RegInstruction *)cursor)->setBranchCondition(TR::InstOpCode::COND_BCR);21332134// Added NOP so that the pattern matching code in jit2itrg icallVMprJavaSendPatchupVirtual2135cursor = new (trHeapMemory()) TR::S390NOPInstruction(TR::InstOpCode::NOP, 2, callNode, cg());2136}2137else2138{2139useCLFIandBRCL = false && (comp()->target().is64Bit() && // Support for 64-bit2140TR::Compiler->om.generateCompressedObjectHeaders() // Classes are <2GB on CompressedRefs only.2141);21422143// Load the interface call data snippet pointer to register is required for non-CLFI / BRCL sequence.2144if (!useCLFIandBRCL)2145{2146cursor = new (trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, callNode, snippetReg, ifcSnippet->getDataConstantSnippet(), cg());2147methodRegister = cg()->allocateRegister();2148}2149else2150{2151#if defined(TR_TARGET_64BIT)2152#if defined(J9ZOS390)2153if (comp()->getOption(TR_EnableRMODE64))2154#endif2155{2156// Reserve a trampoline for this interface call. Might not be used, but we only2157// sacrifice a little trampoline space for it (24-bytes).2158if (methodSymRef->getReferenceNumber() >= TR_S390numRuntimeHelpers)2159fej9->reserveTrampolineIfNecessary(comp(), methodSymRef, false);2160}2161#endif2162}21632164// 64 bit MultiSlot case21652166cursor = generateRILInstruction(cg(), TR::InstOpCode::LARL, callNode, RegRA, returnLocationLabel, cursor);21672168// We need a dummy label to hook dependencies.2169cursor = generateS390LabelInstruction(cg(), TR::InstOpCode::label, callNode, paramSetupDummyLabel, preDeps, cursor);21702171if (useCLFIandBRCL)2172{2173// Update the IFC Snippet to note we are using CLFI/BRCL sequence.2174// This changes the format of the constants in the data snippet2175ifcSnippet->setUseCLFIandBRCL(true);21762177// We will generate CLFI / BRCL sequence to dispatch to target branches.2178// First CLFI/BRCL2179cursor = generateRILInstruction(cg(), TR::InstOpCode::CLFI, callNode, vftReg, 0x0, cursor); //compare against 021802181ifcSnippet->getDataConstantSnippet()->setFirstCLFI(cursor);21822183// BRCL2184cursor = generateRILInstruction(cg(), TR::InstOpCode::BRCL, callNode, static_cast<uint32_t>(0x0), reinterpret_cast<void*>(0x0), cursor);21852186for(i = 1; i < numInterfaceCallCacheSlots; i++)2187{2188// We will generate CLFI / BRCL sequence to dispatch to target branches.2189cursor = generateRILInstruction(cg(), TR::InstOpCode::CLFI, callNode, vftReg, 0x0, cursor); //compare against 021902191// BRCL2192cursor = generateRILInstruction(cg(), TR::InstOpCode::BRCL, callNode, static_cast<uint32_t>(0x0), reinterpret_cast<void*>(0x0), cursor);2193}2194}2195else2196{2197int32_t slotOffset = ifcSnippet->getDataConstantSnippet()->getFirstSlotOffset();2198for(i = 0; i < numInterfaceCallCacheSlots; i++)2199{2200TR::InstOpCode::Mnemonic cmpOp = TR::InstOpCode::getCmpLogicalOpCode();2201if (comp()->target().is64Bit() && TR::Compiler->om.generateCompressedObjectHeaders())2202cmpOp = TR::InstOpCode::CL;22032204//check if cached class matches the receiving object class2205cursor = generateRXInstruction(cg(), cmpOp, callNode, vftReg,2206generateS390MemoryReference(snippetReg, slotOffset, cg()), cursor);22072208//load cached methodEP from current cache slot2209cursor = generateRXInstruction(cg(), TR::InstOpCode::getLoadOpCode(), callNode, methodRegister,2210generateS390MemoryReference(snippetReg, slotOffset+TR::Compiler->om.sizeofReferenceAddress(), cg()), cursor);22112212cursor = generateS390RegInstruction(cg(), TR::InstOpCode::BCR, callNode, methodRegister, cursor);2213((TR::S390RegInstruction *)cursor)->setBranchCondition(TR::InstOpCode::COND_BER);22142215slotOffset += 2*TR::Compiler->om.sizeofReferenceAddress();2216}2217}22182219cursor = new (trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, callNode, snippetReg, ifcSnippet,cursor, cg());22202221// Cache miss... Too bad.. go to the slow path through the interface call snippet2222cursor = generateS390RegInstruction(cg(), TR::InstOpCode::BCR, callNode, snippetReg, cursor);2223((TR::S390RegInstruction *)cursor)->setBranchCondition(TR::InstOpCode::COND_BCR);22242225cursor = generateS390LabelInstruction(cg(), TR::InstOpCode::dd, callNode,2226ifcSnippet->getDataConstantSnippet()->getSnippetLabel());22272228// Added NOP so that the pattern matching code in jit2itrg icallVMprJavaSendPatchupVirtual2229cursor = new (trHeapMemory()) TR::S390NOPInstruction(TR::InstOpCode::NOP, 2, callNode, cg());22302231if (!useCLFIandBRCL)2232postDeps->addPostCondition(methodRegister, TR::RealRegister::AssignAny);2233}22342235gcPoint = cursor;2236((TR::S390CallSnippet *) ifcSnippet)->setBranchInstruction(gcPoint);22372238cursor = generateS390LabelInstruction(cg(), TR::InstOpCode::label, callNode, returnLocationLabel, postDeps);22392240if (comp()->getOption(TR_enableInterfaceCallCachingSingleDynamicSlot))2241{2242cg()->stopUsingRegister(classMethodEPPairRegister);2243}2244else2245{2246if (!useCLFIandBRCL)2247cg()->stopUsingRegister(methodRegister);2248}2249}2250}2251else if (methodSymbol->isComputed())2252{2253TR::Register *targetAddress = cg()->evaluate(callNode->getFirstChild());2254if (targetAddress->getRegisterPair())2255targetAddress=targetAddress->getRegisterPair()->getLowOrder(); // on 31-bit, the top half doesn't matter, so discard it2256TR::Register *RegRA = dependencies->searchPostConditionRegister(getReturnAddressRegister());22572258gcPoint = generateRRInstruction(cg(), TR::InstOpCode::BASR, callNode, RegRA, targetAddress, dependencies);2259}2260else2261{2262TR_ASSERT(0, "Unknown methodSymbol kind");2263}22642265if (cg()->getSupportsRuntimeInstrumentation())2266TR::TreeEvaluator::generateRuntimeInstrumentationOnOffSequence(cg(), TR::InstOpCode::RION, callNode);22672268TR_ASSERT( gcPoint, "Expected GC point for a virtual dispatch");2269gcPoint->setNeedsGCMap(getPreservedRegisterMapForGC());2270}22712272TR::Instruction *2273J9::Z::PrivateLinkage::buildDirectCall(TR::Node * callNode, TR::SymbolReference * callSymRef,2274TR::RegisterDependencyConditions * dependencies, int32_t argSize)2275{2276TR::Instruction * gcPoint = NULL;2277TR::MethodSymbol * callSymbol = callSymRef->getSymbol()->castToMethodSymbol();2278TR::ResolvedMethodSymbol * sym = callSymbol->getResolvedMethodSymbol();2279TR_ResolvedMethod * fem = (sym == NULL) ? NULL : sym->getResolvedMethod();2280bool myself;2281bool isJitInduceOSR = callSymRef->isOSRInductionHelper();2282myself = comp()->isRecursiveMethodTarget(fem);22832284TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp()->fe());22852286#if defined(TR_TARGET_64BIT)2287#if defined(J9ZOS390)2288// Reserve a trampoline for this direct call only if it is not a helper call. It may not be used, but we only2289// sacrifice a little trampoline space for it.2290if (comp()->getOption(TR_EnableRMODE64))2291#endif2292{2293if (callSymRef->getReferenceNumber() >= TR_S390numRuntimeHelpers)2294{2295fej9->reserveTrampolineIfNecessary(comp(), callSymRef, false);2296}2297}2298#endif22992300if (comp()->getOption(TR_TraceCG))2301traceMsg(comp(), "Build Direct Call\n");23022303// generate call2304if (isJitInduceOSR)2305{2306TR::LabelSymbol * snippetLabel = generateLabelSymbol(cg());2307TR::LabelSymbol * reStartLabel = generateLabelSymbol(cg());23082309gcPoint = generateS390BranchInstruction(cg(), TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, callNode, snippetLabel, dependencies);2310TR::Snippet * snippet = new (trHeapMemory()) TR::S390HelperCallSnippet(cg(), callNode, snippetLabel,2311callSymRef?callSymRef:callNode->getSymbolReference(), reStartLabel, argSize);2312cg()->addSnippet(snippet);23132314auto* reStartInstruction = generateS390LabelInstruction(cg(), TR::InstOpCode::label, callNode, reStartLabel);23152316// NOP is necessary due to confusion when resolving shared slots at a transition. The OSR infrastructure needs2317// to locate the GC map metadata for this transition point by examining the return address. The algorithm used2318// attempts to find the last instruction PC that is smaller than or equal to the return address. The reason we2319// do this is because under involuntary OSR we may generate the GC map on the return instruction itself. Several2320// of our snippets do this. As such we need to handle both cases, i.e. locating the GC map if its on the yield2321// point or if its on the return address. Hence a less than or equal to comparison is used. We insert this NOP2322// to avoid confusion as the instruction following this yield could also have a GC map registered and we must2323// ensure we pick up the correct metadata.2324cg()->insertPad(callNode, reStartInstruction, 2, false);23252326gcPoint->setNeedsGCMap(getPreservedRegisterMapForGC());23272328return gcPoint;2329}23302331if (!callSymRef->isUnresolved() && !callSymbol->isInterpreted() && ((comp()->compileRelocatableCode() && callSymbol->isHelper()) || !comp()->compileRelocatableCode()))2332{2333// direct call for resolved method23342335gcPoint = generateDirectCall(cg(), callNode, myself ? true : false, callSymRef, dependencies);2336gcPoint->setDependencyConditions(dependencies);23372338}2339else2340{2341if (cg()->getSupportsRuntimeInstrumentation())2342TR::TreeEvaluator::generateRuntimeInstrumentationOnOffSequence(cg(), TR::InstOpCode::RIOFF, callNode);23432344// call through snippet if the method is not resolved or not jitted yet2345TR::LabelSymbol * label = generateLabelSymbol(cg());2346TR::Snippet * snippet;23472348if (callSymRef->isUnresolved() || (comp()->compileRelocatableCode() && !comp()->getOption(TR_UseSymbolValidationManager)))2349{2350snippet = new (trHeapMemory()) TR::S390UnresolvedCallSnippet(cg(), callNode, label, argSize);2351}2352else2353{2354snippet = new (trHeapMemory()) TR::S390J9CallSnippet(cg(), callNode, label, callSymRef, argSize);2355}23562357cg()->addSnippet(snippet);235823592360gcPoint = generateSnippetCall(cg(), callNode, snippet, dependencies, callSymRef);23612362if (cg()->getSupportsRuntimeInstrumentation())2363TR::TreeEvaluator::generateRuntimeInstrumentationOnOffSequence(cg(), TR::InstOpCode::RION, callNode);2364}23652366if (comp()->getOption(TR_TraceCG))2367traceMsg(comp(), "\nGC Point at %p has preserved register map %x\n", gcPoint, getPreservedRegisterMapForGC());23682369gcPoint->setNeedsGCMap(getPreservedRegisterMapForGC());2370return gcPoint;2371}237223732374void2375J9::Z::PrivateLinkage::callPreJNICallOffloadCheck(TR::Node * callNode)2376{2377TR::CodeGenerator * codeGen = cg();2378TR::LabelSymbol * offloadOffRestartLabel = generateLabelSymbol(codeGen);2379TR::LabelSymbol * offloadOffSnippetLabel = generateLabelSymbol(codeGen);2380TR::SymbolReference * offloadOffSymRef = codeGen->symRefTab()->findOrCreateRuntimeHelper(TR_S390jitPreJNICallOffloadCheck);23812382TR::Instruction *gcPoint = generateS390BranchInstruction(2383codeGen, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, callNode, offloadOffSnippetLabel);2384gcPoint->setNeedsGCMap(0);23852386codeGen->addSnippet(new (trHeapMemory()) TR::S390HelperCallSnippet(codeGen, callNode,2387offloadOffSnippetLabel, offloadOffSymRef, offloadOffRestartLabel));2388generateS390LabelInstruction(codeGen, TR::InstOpCode::label, callNode, offloadOffRestartLabel);2389}23902391void2392J9::Z::PrivateLinkage::callPostJNICallOffloadCheck(TR::Node * callNode)2393{2394TR::CodeGenerator * codeGen = cg();2395TR::LabelSymbol * offloadOnRestartLabel = generateLabelSymbol(codeGen);2396TR::LabelSymbol * offloadOnSnippetLabel = generateLabelSymbol(codeGen);23972398TR::Instruction *gcPoint = generateS390BranchInstruction(2399codeGen, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, callNode, offloadOnSnippetLabel);2400gcPoint->setNeedsGCMap(0);2401TR::SymbolReference * offloadOnSymRef = codeGen->symRefTab()->findOrCreateRuntimeHelper(TR_S390jitPostJNICallOffloadCheck);2402codeGen->addSnippet(new (trHeapMemory()) TR::S390HelperCallSnippet(codeGen, callNode,2403offloadOnSnippetLabel, offloadOnSymRef, offloadOnRestartLabel));2404generateS390LabelInstruction(codeGen, TR::InstOpCode::label, callNode, offloadOnRestartLabel);2405}24062407void J9::Z::PrivateLinkage::collapseJNIReferenceFrame(TR::Node * callNode,2408TR::RealRegister * javaStackPointerRealRegister,2409TR::Register * javaLitPoolVirtualRegister,2410TR::Register * tempReg)2411{2412// must check to see if the ref pool was used and clean them up if so--or we2413// leave a bunch of pinned garbage behind that screws up the gc quality forever2414TR::CodeGenerator * codeGen = cg();2415TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());2416intptr_t flagValue = fej9->constJNIReferenceFrameAllocatedFlags();2417TR::LabelSymbol * refPoolRestartLabel = generateLabelSymbol(codeGen);2418TR::LabelSymbol * refPoolSnippetLabel = generateLabelSymbol(codeGen);24192420genLoadAddressConstant(codeGen, callNode, flagValue, tempReg, NULL, NULL, javaLitPoolVirtualRegister);24212422generateRXInstruction(codeGen, TR::InstOpCode::getAndOpCode(), callNode, tempReg,2423new (trHeapMemory()) TR::MemoryReference(javaStackPointerRealRegister, (int32_t)fej9->constJNICallOutFrameFlagsOffset(), codeGen));2424TR::Instruction *gcPoint =2425generateS390BranchInstruction(codeGen, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, callNode, refPoolSnippetLabel);2426gcPoint->setNeedsGCMap(0);24272428TR::SymbolReference * collapseSymRef = cg()->symRefTab()->findOrCreateRuntimeHelper(TR_S390collapseJNIReferenceFrame);2429codeGen->addSnippet(new (trHeapMemory()) TR::S390HelperCallSnippet(codeGen, callNode,2430refPoolSnippetLabel, collapseSymRef, refPoolRestartLabel));2431generateS390LabelInstruction(cg(), TR::InstOpCode::label, callNode, refPoolRestartLabel);2432}24332434//JNI Callout frame2435//2436// |-----|2437// | | <-- constJNICallOutFrameSpecialTag() (For jni thunk, constJNICallOutFrameInvisibleTag())2438// 16/32 |-----|2439// | | <-- savedPC ( we don't save anything here2440// 12/24 |-----|2441// | | <-- return address for JNI call2442// 8/16 |-----|2443// | | <-- constJNICallOutFrameFlags()2444// 4/8 -----2445// | | <-- ramMethod for the native method2446// ----- <-- stack pointer2447//24482449// release vm access - use hardware registers because of the control flow2450// At this point: arguments for the native routine are all in place already, i.e., if there are2451// more than 24 byte worth of arguments, some of them are on the stack. However,2452// we potentially go out to call a helper before jumping to the native.2453// but the helper call saves and restores all regs2454void2455J9::Z::PrivateLinkage::setupJNICallOutFrame(TR::Node * callNode,2456TR::RealRegister * javaStackPointerRealRegister,2457TR::Register * methodMetaDataVirtualRegister,2458TR::LabelSymbol * returnFromJNICallLabel,2459TR::S390JNICallDataSnippet *jniCallDataSnippet)2460{2461TR::CodeGenerator * codeGen = cg();2462TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());2463TR::ResolvedMethodSymbol * cs = callNode->getSymbol()->castToResolvedMethodSymbol();2464TR_ResolvedMethod * resolvedMethod = cs->getResolvedMethod();2465TR::Instruction * cursor = NULL;24662467int32_t stackAdjust = (-5 * (int32_t)sizeof(intptr_t));24682469cursor = generateRXInstruction(codeGen, TR::InstOpCode::LAY, callNode, javaStackPointerRealRegister, generateS390MemoryReference(javaStackPointerRealRegister, stackAdjust, codeGen), cursor);24702471setOffsetToLongDispSlot( getOffsetToLongDispSlot() - stackAdjust );247224732474// set up Java Thread2475intptr_t constJNICallOutFrameType = fej9->constJNICallOutFrameType();2476TR_ASSERT( constJNICallOutFrameType < MAX_IMMEDIATE_VAL, "OMR::Z::Linkage::setupJNICallOutFrame constJNICallOutFrameType is too big for MVHI");24772478TR_ASSERT((fej9->thisThreadGetJavaFrameFlagsOffset() == fej9->thisThreadGetJavaLiteralsOffset() + TR::Compiler->om.sizeofReferenceAddress()) &&2479fej9->thisThreadGetJavaLiteralsOffset() == fej9->thisThreadGetJavaPCOffset() + TR::Compiler->om.sizeofReferenceAddress()2480, "The vmthread field order should be pc,literals,jitStackFrameFlags\n");24812482jniCallDataSnippet->setPC(constJNICallOutFrameType);2483jniCallDataSnippet->setLiterals(0);2484jniCallDataSnippet->setJitStackFrameFlags(0);24852486generateSS1Instruction(cg(), TR::InstOpCode::MVC, callNode, 3*(TR::Compiler->om.sizeofReferenceAddress()) - 1,2487new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, fej9->thisThreadGetJavaPCOffset(), codeGen),2488new (trHeapMemory()) TR::MemoryReference(jniCallDataSnippet->getBaseRegister(), jniCallDataSnippet->getPCOffset(), codeGen));24892490// store out jsp2491generateRXInstruction(codeGen, TR::InstOpCode::getStoreOpCode(), callNode, javaStackPointerRealRegister,2492new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister,2493fej9->thisThreadGetJavaSPOffset(), codeGen));24942495// JNI Callout Frame setup2496// 0(sp) : RAM method for the native2497intptr_t ramMethod = (uintptr_t) resolvedMethod->resolvedMethodAddress();2498jniCallDataSnippet->setRAMMethod(ramMethod);24992500// 4[8](sp) : flags2501intptr_t flags = fej9->constJNICallOutFrameFlags();2502jniCallDataSnippet->setJNICallOutFrameFlags(flags);25032504// 8[16](sp) : return address (savedCP)2505jniCallDataSnippet->setReturnFromJNICall(returnFromJNICallLabel);25062507// 12[24](sp) : savedPC2508jniCallDataSnippet->setSavedPC(0);25092510// 16[32](sp) : tag bits (savedA0)2511intptr_t tagBits = fej9->constJNICallOutFrameSpecialTag();2512// if the current method is simply a wrapper for the JNI call, hide the call-out stack frame2513if (resolvedMethod == comp()->getCurrentMethod())2514{2515tagBits |= fej9->constJNICallOutFrameInvisibleTag();2516}25172518jniCallDataSnippet->setTagBits(tagBits);25192520generateSS1Instruction(cg(), TR::InstOpCode::MVC, callNode, -stackAdjust - 1,2521new (trHeapMemory()) TR::MemoryReference(javaStackPointerRealRegister, 0, codeGen),2522new (trHeapMemory()) TR::MemoryReference(jniCallDataSnippet->getBaseRegister(), jniCallDataSnippet->getJNICallOutFrameDataOffset(), codeGen));25232524}252525262527/**2528* release vm access - use hardware registers because of the control flow2529* At this point: arguments for the native routine are all in place already, i.e., if there are2530* more than 24 byte worth of arguments, some of them are on the stack. However,2531* we potentially go out to call a helper before jumping to the native.2532* but the helper call saves and restores all regs2533*/2534void J9::Z::JNILinkage::releaseVMAccessMask(TR::Node * callNode,2535TR::Register * methodMetaDataVirtualRegister, TR::Register * methodAddressReg, TR::Register * javaLitOffsetReg,2536TR::S390JNICallDataSnippet * jniCallDataSnippet, TR::RegisterDependencyConditions * deps)2537{2538TR::LabelSymbol * loopHead = generateLabelSymbol(self()->cg());2539TR::LabelSymbol * longReleaseLabel = generateLabelSymbol(self()->cg());2540TR::LabelSymbol * longReleaseSnippetLabel = generateLabelSymbol(self()->cg());2541TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(self()->cg());2542TR_J9VMBase *fej9 = (TR_J9VMBase *)(self()->fe());25432544intptr_t aValue = fej9->constReleaseVMAccessMask(); //0xfffffffffffdffdf2545jniCallDataSnippet->setConstReleaseVMAccessMask(aValue);25462547generateRXInstruction(self()->cg(), TR::InstOpCode::getLoadOpCode(), callNode, methodAddressReg,2548generateS390MemoryReference(methodMetaDataVirtualRegister,2549fej9->thisThreadGetPublicFlagsOffset(), self()->cg()));255025512552generateS390LabelInstruction(self()->cg(), TR::InstOpCode::label, callNode, loopHead);2553loopHead->setStartInternalControlFlow();255425552556aValue = fej9->constReleaseVMAccessOutOfLineMask(); //0x3400012557jniCallDataSnippet->setConstReleaseVMAccessOutOfLineMask(aValue);25582559generateRRInstruction(self()->cg(), TR::InstOpCode::getLoadRegOpCode(), callNode, javaLitOffsetReg, methodAddressReg);2560generateRXInstruction(self()->cg(), TR::InstOpCode::getAndOpCode(), callNode, javaLitOffsetReg,2561generateS390MemoryReference(jniCallDataSnippet->getBaseRegister(), jniCallDataSnippet->getConstReleaseVMAccessOutOfLineMaskOffset(), self()->cg()));25622563TR::Instruction * gcPoint = (TR::Instruction *) generateS390BranchInstruction(2564self()->cg(), TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, callNode, longReleaseSnippetLabel);2565gcPoint->setNeedsGCMap(0);25662567generateRRInstruction(self()->cg(), TR::InstOpCode::getLoadRegOpCode(), callNode, javaLitOffsetReg, methodAddressReg);2568generateRXInstruction(self()->cg(), TR::InstOpCode::getAndOpCode(), callNode, javaLitOffsetReg,2569generateS390MemoryReference(jniCallDataSnippet->getBaseRegister(), jniCallDataSnippet->getConstReleaseVMAccessMaskOffset(), self()->cg()));2570generateRSInstruction(self()->cg(), TR::InstOpCode::getCmpAndSwapOpCode(), callNode, methodAddressReg, javaLitOffsetReg,2571generateS390MemoryReference(methodMetaDataVirtualRegister,2572fej9->thisThreadGetPublicFlagsOffset(), self()->cg()));257325742575//get existing post conditions on the registers parameters and create a new post cond for the internal control flow2576TR::RegisterDependencyConditions * postDeps = new (self()->trHeapMemory()) TR::RegisterDependencyConditions(0, 3, self()->cg());2577TR::RealRegister::RegNum realReg;2578int32_t regPos = deps->searchPostConditionRegisterPos(methodMetaDataVirtualRegister);2579if (regPos >= 0)2580{2581realReg = deps->getPostConditions()->getRegisterDependency(regPos)->getRealRegister();2582postDeps->addPostCondition(methodMetaDataVirtualRegister, realReg);2583}2584else2585postDeps->addPostCondition(methodMetaDataVirtualRegister, TR::RealRegister::AssignAny);25862587regPos = deps->searchPostConditionRegisterPos(methodAddressReg);2588if (regPos >= 0)2589{2590realReg = deps->getPostConditions()->getRegisterDependency(regPos)->getRealRegister();2591postDeps->addPostCondition(methodAddressReg, realReg);2592}2593else2594postDeps->addPostCondition(methodAddressReg, TR::RealRegister::AssignAny);25952596regPos = deps->searchPostConditionRegisterPos(javaLitOffsetReg);2597if (regPos >= 0)2598{2599realReg = deps->getPostConditions()->getRegisterDependency(regPos)->getRealRegister();2600postDeps->addPostCondition(javaLitOffsetReg, realReg);2601}2602else2603postDeps->addPostCondition(javaLitOffsetReg, TR::RealRegister::AssignAny);260426052606generateS390BranchInstruction(self()->cg(), TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, callNode, loopHead);26072608generateS390LabelInstruction(self()->cg(), TR::InstOpCode::label, callNode, cFlowRegionEnd, postDeps);2609cFlowRegionEnd->setEndInternalControlFlow();261026112612self()->cg()->addSnippet(new (self()->trHeapMemory()) TR::S390HelperCallSnippet(self()->cg(), callNode, longReleaseSnippetLabel,2613comp()->getSymRefTab()->findOrCreateReleaseVMAccessSymbolRef(comp()->getJittedMethodSymbol()), cFlowRegionEnd));2614// end of release vm access (spin lock)2615}261626172618void J9::Z::JNILinkage::acquireVMAccessMask(TR::Node * callNode, TR::Register * javaLitPoolVirtualRegister,2619TR::Register * methodMetaDataVirtualRegister, TR::Register * methodAddressReg, TR::Register * javaLitOffsetReg)2620{2621// start of acquire vm access26222623// WARNING:2624// As java stack is not yet restored , Make sure that no instruction in this function2625// should use stack.2626// If instruction uses literal pool, it must only be to do load, and such instruction's memory reference should be marked MemRefMustNotSpill2627// so that in case of long disp, we will reuse the target reg as a scratch reg26282629TR_J9VMBase *fej9 = (TR_J9VMBase *)(self()->fe());2630intptr_t aValue = fej9->constAcquireVMAccessOutOfLineMask();26312632TR::Instruction * loadInstr = (TR::Instruction *) genLoadAddressConstant(self()->cg(), callNode, aValue, methodAddressReg, NULL, NULL, javaLitPoolVirtualRegister);2633switch (loadInstr->getKind())2634{2635case TR::Instruction::IsRX:2636case TR::Instruction::IsRXE:2637case TR::Instruction::IsRXY:2638case TR::Instruction::IsRXYb:2639((TR::S390RXInstruction *)loadInstr)->getMemoryReference()->setMemRefMustNotSpill();2640break;2641default:2642break;2643}26442645generateRRInstruction(self()->cg(), TR::InstOpCode::getXORRegOpCode(), callNode, javaLitOffsetReg, javaLitOffsetReg);26462647TR::LabelSymbol * longAcquireLabel = generateLabelSymbol(self()->cg());2648TR::LabelSymbol * longAcquireSnippetLabel = generateLabelSymbol(self()->cg());2649TR::LabelSymbol * acquireDoneLabel = generateLabelSymbol(self()->cg());2650generateRSInstruction(cg(), TR::InstOpCode::getCmpAndSwapOpCode(), callNode, javaLitOffsetReg, methodAddressReg,2651generateS390MemoryReference(methodMetaDataVirtualRegister,2652(int32_t)fej9->thisThreadGetPublicFlagsOffset(), self()->cg()));2653TR::Instruction *gcPoint = (TR::Instruction *) generateS390BranchInstruction(self()->cg(), TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, callNode, longAcquireSnippetLabel);2654gcPoint->setNeedsGCMap(0);26552656self()->cg()->addSnippet(new (self()->trHeapMemory()) TR::S390HelperCallSnippet(self()->cg(), callNode, longAcquireSnippetLabel,2657comp()->getSymRefTab()->findOrCreateAcquireVMAccessSymbolRef(comp()->getJittedMethodSymbol()), acquireDoneLabel));2658generateS390LabelInstruction(self()->cg(), TR::InstOpCode::label, callNode, acquireDoneLabel);2659// end of acquire vm accessa2660}26612662#ifdef J9VM_INTERP_ATOMIC_FREE_JNI26632664/**2665* \brief2666* Build the atomic-free release VM access sequence for JNI dispatch.2667*2668* \details2669* This is the atomic-free JNI design and works in conjunction with VMAccess.cpp atomic-free JNI changes.2670*2671* In the JNI dispatch sequence, a release-vm-access action is performed before the branch to native code; and an acquire-vm-access2672* is done after the thread execution returns from the native call. Both of the actions require synchronization between the2673* application thread and the GC thread. This was previously implemented with the atomic compare-and-swap (CS) instruction, which is slow in nature.2674*2675* To speed up the JNI acquire and release access actions (the fast path), a store-load sequence is generated by this evaluator2676* to replace the CS instruction. Normally, the fast path ST-LD are not serialized and can be done out-of-order for higher performance. Synchronization2677* burden is offloaded to the slow path.2678*2679* The slow path is where a thread tries to acquire exclusive vm access. The slow path should be taken proportionally less often than the fast2680* path. Should the slow path be taken, that thread will be penalized by calling a slow flushProcessWriteBuffer() routine so that all threads2681* can momentarily synchronize memory writes. Having fast and slow paths makes the atomic-free JNI design asymmetric.2682*2683* Note that the z/OS currently does not support the asymmetric algorithm. Hence, a serialization instruction is required between the2684* store and the load.2685*2686*/2687void2688J9::Z::JNILinkage::releaseVMAccessMaskAtomicFree(TR::Node * callNode,2689TR::Register * methodMetaDataVirtualRegister,2690TR::Register * tempReg1)2691{2692TR_J9VMBase *fej9 = (TR_J9VMBase *)fe();2693TR::CodeGenerator* cg = self()->cg();26942695// Store a 1 into vmthread->inNative2696generateSILInstruction(cg, TR::InstOpCode::getMoveHalfWordImmOpCode(), callNode,2697generateS390MemoryReference(methodMetaDataVirtualRegister, offsetof(J9VMThread, inNative), cg),26981);269927002701#if !defined(J9VM_INTERP_ATOMIC_FREE_JNI_USES_FLUSH)2702generateSerializationInstruction(cg, callNode, NULL);2703#endif27042705// Compare vmthread public flag with J9_PUBLIC_FLAGS_VM_ACCESS2706generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), callNode, tempReg1,2707generateS390MemoryReference(methodMetaDataVirtualRegister, fej9->thisThreadGetPublicFlagsOffset(), cg));27082709TR::LabelSymbol * longReleaseSnippetLabel = generateLabelSymbol(cg);2710TR::LabelSymbol * longReleaseRestartLabel = generateLabelSymbol(cg);27112712TR_ASSERT_FATAL(J9_PUBLIC_FLAGS_VM_ACCESS >= MIN_IMMEDIATE_BYTE_VAL && J9_PUBLIC_FLAGS_VM_ACCESS <= MAX_IMMEDIATE_BYTE_VAL, "VM access bit must be immediate");27132714generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), callNode, tempReg1, J9_PUBLIC_FLAGS_VM_ACCESS, TR::InstOpCode::COND_BNE, longReleaseSnippetLabel, false);27152716cg->addSnippet(new (self()->trHeapMemory()) TR::S390HelperCallSnippet(cg,2717callNode, longReleaseSnippetLabel,2718comp()->getSymRefTab()->findOrCreateReleaseVMAccessSymbolRef(comp()->getJittedMethodSymbol()),2719longReleaseRestartLabel));27202721generateS390LabelInstruction(cg, TR::InstOpCode::label, callNode, longReleaseRestartLabel);2722}27232724/**2725* \brief2726* Build the atomic-free acquire VM access sequence for JNI dispatch.2727*2728* */2729void2730J9::Z::JNILinkage::acquireVMAccessMaskAtomicFree(TR::Node * callNode,2731TR::Register * methodMetaDataVirtualRegister,2732TR::Register * tempReg1)2733{2734TR_J9VMBase *fej9 = (TR_J9VMBase *)fe();2735TR::CodeGenerator* cg = self()->cg();27362737// Zero vmthread->inNative, which is a UDATA field2738generateSS1Instruction(cg, TR::InstOpCode::XC, callNode, TR::Compiler->om.sizeofReferenceAddress() - 1,2739generateS390MemoryReference(methodMetaDataVirtualRegister, offsetof(J9VMThread, inNative), cg),2740generateS390MemoryReference(methodMetaDataVirtualRegister, offsetof(J9VMThread, inNative), cg));27412742#if !defined(J9VM_INTERP_ATOMIC_FREE_JNI_USES_FLUSH)2743generateSerializationInstruction(cg, callNode, NULL);2744#endif27452746// Compare vmthread public flag with J9_PUBLIC_FLAGS_VM_ACCESS2747generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), callNode, tempReg1,2748generateS390MemoryReference(methodMetaDataVirtualRegister, fej9->thisThreadGetPublicFlagsOffset(), cg));27492750TR::LabelSymbol * longAcquireSnippetLabel = generateLabelSymbol(cg);2751TR::LabelSymbol * longAcquireRestartLabel = generateLabelSymbol(cg);27522753TR_ASSERT_FATAL(J9_PUBLIC_FLAGS_VM_ACCESS >= MIN_IMMEDIATE_BYTE_VAL && J9_PUBLIC_FLAGS_VM_ACCESS <= MAX_IMMEDIATE_BYTE_VAL, "VM access bit must be immediate");27542755generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), callNode, tempReg1, J9_PUBLIC_FLAGS_VM_ACCESS, TR::InstOpCode::COND_BNE, longAcquireSnippetLabel, false);27562757cg->addSnippet(new (self()->trHeapMemory()) TR::S390HelperCallSnippet(cg,2758callNode, longAcquireSnippetLabel,2759comp()->getSymRefTab()->findOrCreateAcquireVMAccessSymbolRef(comp()->getJittedMethodSymbol()),2760longAcquireRestartLabel));27612762generateS390LabelInstruction(cg, TR::InstOpCode::label, callNode, longAcquireRestartLabel);2763}2764#endif27652766void J9::Z::JNILinkage::checkException(TR::Node * callNode,2767TR::Register * methodMetaDataVirtualRegister,2768TR::Register * tempReg)2769{2770TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());2771// check exception2772TR::LabelSymbol * exceptionRestartLabel = generateLabelSymbol(self()->cg());2773TR::LabelSymbol * exceptionSnippetLabel = generateLabelSymbol(self()->cg());2774generateRXInstruction(self()->cg(), TR::InstOpCode::getLoadOpCode(), callNode, tempReg,2775new (self()->trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, fej9->thisThreadGetCurrentExceptionOffset(), self()->cg()));27762777TR::Instruction *gcPoint = generateS390CompareAndBranchInstruction(self()->cg(),2778TR::InstOpCode::getCmpOpCode(), callNode, tempReg, 0, TR::InstOpCode::COND_BNE, exceptionSnippetLabel, false, true);2779gcPoint->setNeedsGCMap(0);27802781self()->cg()->addSnippet(new (self()->trHeapMemory()) TR::S390HelperCallSnippet(self()->cg(), callNode, exceptionSnippetLabel,2782comp()->getSymRefTab()->findOrCreateThrowCurrentExceptionSymbolRef(comp()->getJittedMethodSymbol()), exceptionRestartLabel));2783generateS390LabelInstruction(self()->cg(), TR::InstOpCode::label, callNode, exceptionRestartLabel);2784}27852786void2787J9::Z::JNILinkage::processJNIReturnValue(TR::Node * callNode,2788TR::CodeGenerator* cg,2789TR::Register* javaReturnRegister)2790{2791auto resolvedMethod = callNode->getSymbol()->castToResolvedMethodSymbol()->getResolvedMethod();2792auto returnType = resolvedMethod->returnType();2793const bool isUnwrapAddressReturnValue = !((TR_J9VMBase *)fe())->jniDoNotWrapObjects(resolvedMethod)2794&& (returnType == TR::Address);27952796TR::LabelSymbol *cFlowRegionStart = NULL, *cFlowRegionEnd = NULL;27972798if (isUnwrapAddressReturnValue)2799{2800cFlowRegionStart = generateLabelSymbol(cg);2801cFlowRegionEnd = generateLabelSymbol(cg);28022803generateS390LabelInstruction(cg, TR::InstOpCode::label, callNode, cFlowRegionStart);2804cFlowRegionStart->setStartInternalControlFlow();2805generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), callNode, javaReturnRegister, 0, TR::InstOpCode::COND_BE, cFlowRegionEnd);2806generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), callNode, javaReturnRegister,2807generateS390MemoryReference(javaReturnRegister, 0, cg));28082809generateS390LabelInstruction(cg, TR::InstOpCode::label, callNode, cFlowRegionEnd);2810cFlowRegionEnd->setEndInternalControlFlow();2811}2812else if ((returnType == TR::Int8) && comp()->getSymRefTab()->isReturnTypeBool(callNode->getSymbolReference()))2813{2814if (comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z13))2815{2816generateRIInstruction(cg, TR::InstOpCode::getCmpHalfWordImmOpCode(), callNode, javaReturnRegister, 0);2817generateRIEInstruction(cg, comp()->target().is64Bit() ? TR::InstOpCode::LOCGHI : TR::InstOpCode::LOCHI,2818callNode, javaReturnRegister, 1, TR::InstOpCode::COND_BNE);2819}2820else2821{2822cFlowRegionStart = generateLabelSymbol(cg);2823cFlowRegionEnd = generateLabelSymbol(cg);28242825generateS390LabelInstruction(cg, TR::InstOpCode::label, callNode, cFlowRegionStart);2826cFlowRegionStart->setStartInternalControlFlow();2827generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), callNode, javaReturnRegister,28280, TR::InstOpCode::COND_BE, cFlowRegionEnd);2829generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), callNode, javaReturnRegister, 1);2830generateS390LabelInstruction(cg, TR::InstOpCode::label, callNode, cFlowRegionEnd);2831cFlowRegionEnd->setEndInternalControlFlow();2832}2833}2834}28352836TR::Register * J9::Z::JNILinkage::buildDirectDispatch(TR::Node * callNode)2837{2838if (comp()->getOption(TR_TraceCG))2839traceMsg(comp(), "\nbuildDirectDispatch\n");28402841TR::CodeGenerator * codeGen = cg();2842TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());2843TR::SystemLinkage * systemLinkage = (TR::SystemLinkage *) cg()->getLinkage(TR_System);2844TR::LabelSymbol * returnFromJNICallLabel = generateLabelSymbol(cg());2845TR::RegisterDependencyConditions * deps;28462847// Extra dependency for killing volatile high registers (see KillVolHighRegs)2848int32_t numDeps = systemLinkage->getNumberOfDependencyGPRegisters() + 1;28492850if (cg()->getSupportsVectorRegisters())2851numDeps += 32; //VRFs need to be spilled28522853// 70896 Remove DEPEND instruction and merge glRegDeps to call deps2854// *Speculatively* increase numDeps for dependencies from glRegDeps2855// which is added right before callNativeFunction.2856// GlobalRegDeps should not add any more children after here.2857TR::RegisterDependencyConditions *glRegDeps;2858TR::Node *GlobalRegDeps;28592860bool hasGlRegDeps = (callNode->getNumChildren() >= 1) &&2861(callNode->getChild(callNode->getNumChildren()-1)->getOpCodeValue() == TR::GlRegDeps);2862if(hasGlRegDeps)2863{2864GlobalRegDeps = callNode->getChild(callNode->getNumChildren()-1);2865numDeps += GlobalRegDeps->getNumChildren();2866}28672868deps = generateRegisterDependencyConditions(numDeps, numDeps, cg());2869int64_t killMask = -1;2870TR::Register *vftReg = NULL;2871TR::S390JNICallDataSnippet * jniCallDataSnippet = NULL;2872TR::RealRegister * javaStackPointerRealRegister = getStackPointerRealRegister();2873TR::RealRegister * methodMetaDataRealRegister = getMethodMetaDataRealRegister();2874TR::RealRegister * javaLitPoolRealRegister = getLitPoolRealRegister();28752876TR::Register * javaLitPoolVirtualRegister = javaLitPoolRealRegister;2877TR::Register * methodMetaDataVirtualRegister = methodMetaDataRealRegister;28782879TR::Register * methodAddressReg = NULL;2880TR::Register * javaLitOffsetReg = NULL;2881intptr_t targetAddress = (intptr_t) 0;2882TR::DataType returnType = TR::NoType;2883int8_t numTempRegs = -1;2884comp()->setHasNativeCall();28852886if (codeGen->getSupportsRuntimeInstrumentation())2887TR::TreeEvaluator::generateRuntimeInstrumentationOnOffSequence(codeGen, TR::InstOpCode::RIOFF, callNode);28882889TR::ResolvedMethodSymbol * cs = callNode->getSymbol()->castToResolvedMethodSymbol();2890TR_ResolvedMethod * resolvedMethod = cs->getResolvedMethod();2891bool isFastJNI = true;2892bool isPassJNIThread = !fej9->jniDoNotPassThread(resolvedMethod);2893bool isPassReceiver = !fej9->jniDoNotPassReceiver(resolvedMethod);2894bool isJNIGCPoint = !fej9->jniNoGCPoint(resolvedMethod);2895bool isJNICallOutFrame = !fej9->jniNoNativeMethodFrame(resolvedMethod);2896bool isReleaseVMAccess = !fej9->jniRetainVMAccess(resolvedMethod);2897bool isJavaOffLoadCheck = false;2898bool isAcquireVMAccess = isReleaseVMAccess;2899bool isCollapseJNIReferenceFrame = !fej9->jniNoSpecialTeardown(resolvedMethod);2900bool isCheckException = !fej9->jniNoExceptionsThrown(resolvedMethod);2901bool isKillAllUnlockedGPRs = isJNIGCPoint;29022903killMask = killAndAssignRegister(killMask, deps, &methodAddressReg, (comp()->target().isLinux()) ? TR::RealRegister::GPR1 : TR::RealRegister::GPR9 , codeGen, true);2904killMask = killAndAssignRegister(killMask, deps, &javaLitOffsetReg, TR::RealRegister::GPR11, codeGen, true);29052906targetAddress = (intptr_t) resolvedMethod->startAddressForJNIMethod(comp());2907returnType = resolvedMethod->returnType();29082909static char * disablePureFn = feGetEnv("TR_DISABLE_PURE_FUNC_RECOGNITION");2910if (cs->canDirectNativeCall())2911{2912isReleaseVMAccess = false;2913isAcquireVMAccess = false;2914isKillAllUnlockedGPRs = false;2915isJNIGCPoint = false;2916isCheckException = false;2917isJNICallOutFrame = false;2918}2919if (cs->isPureFunction() && (disablePureFn == NULL))2920{2921isReleaseVMAccess=false;2922isAcquireVMAccess=false;2923isCheckException = false;2924}2925if ((fej9->isJavaOffloadEnabled() && static_cast<TR_ResolvedJ9Method *>(resolvedMethod)->methodIsNotzAAPEligible()) || (fej9->CEEHDLREnabled() && isJNICallOutFrame))2926isJavaOffLoadCheck = true;292729282929if (comp()->getOption(TR_TraceCG))2930traceMsg(comp(), "isPassReceiver: %d, isPassJNIThread: %d, isJNIGCPoint: %d, isJNICallOutFrame:%d, isReleaseVMAccess: %d, isCollapseJNIReferenceFrame: %d, isJNIGCPoint: %d\n", isPassReceiver, isPassJNIThread, isJNIGCPoint, isJNICallOutFrame, isReleaseVMAccess, isCollapseJNIReferenceFrame, isJNIGCPoint);29312932if (isPassJNIThread)2933{2934//First param for JNI call in JNIEnv pointer2935TR::Register * jniEnvRegister = cg()->allocateRegister();2936deps->addPreCondition(jniEnvRegister, systemLinkage->getIntegerArgumentRegister(0));2937generateRRInstruction(codeGen, TR::InstOpCode::getLoadRegOpCode(), callNode,2938jniEnvRegister, methodMetaDataVirtualRegister);2939}29402941// JNI dispatch does not allow for any object references to survive in preserved registers as they are saved onto2942// the system stack, which the JVM stack walker has no awareness of. Hence we need to ensure that all object2943// references are evicted from preserved registers at the call site.2944TR::Register* tempReg = cg()->allocateRegister();29452946deps->addPostCondition(tempReg, TR::RealRegister::KillVolHighRegs);2947cg()->stopUsingRegister(tempReg);29482949setupRegisterDepForLinkage(callNode, TR_JNIDispatch, deps, killMask, systemLinkage, GlobalRegDeps, hasGlRegDeps, &methodAddressReg, javaLitOffsetReg);29502951setupBuildArgForLinkage(callNode, TR_JNIDispatch, deps, isFastJNI, isPassReceiver, killMask, GlobalRegDeps, hasGlRegDeps, systemLinkage);29522953if (isJNICallOutFrame || isReleaseVMAccess)2954{2955TR::Register * JNISnippetBaseReg = NULL;2956killMask = killAndAssignRegister(killMask, deps, &JNISnippetBaseReg, TR::RealRegister::GPR12, codeGen, true);2957jniCallDataSnippet = new (trHeapMemory()) TR::S390JNICallDataSnippet(cg(), callNode);2958cg()->addSnippet(jniCallDataSnippet);2959jniCallDataSnippet->setBaseRegister(JNISnippetBaseReg);2960new (trHeapMemory()) TR::S390RILInstruction(TR::InstOpCode::LARL, callNode,2961jniCallDataSnippet->getBaseRegister(), jniCallDataSnippet, codeGen);2962jniCallDataSnippet->setTargetAddress(targetAddress);2963}29642965if (isJNICallOutFrame)2966{2967// Sets up PC, Stack pointer and literals offset slots.2968setupJNICallOutFrame(callNode, javaStackPointerRealRegister, methodMetaDataVirtualRegister,2969returnFromJNICallLabel, jniCallDataSnippet);2970}2971else2972{2973// store java stack pointer2974generateRXInstruction(codeGen, TR::InstOpCode::getStoreOpCode(), callNode, javaStackPointerRealRegister,2975new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, (int32_t)fej9->thisThreadGetJavaSPOffset(), codeGen));297629772978auto* literalOffsetMemoryReference = new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, (int32_t)fej9->thisThreadGetJavaLiteralsOffset(), codeGen);29792980// Set up literal offset slot to zero2981generateSILInstruction(codeGen, TR::InstOpCode::getMoveHalfWordImmOpCode(), callNode, literalOffsetMemoryReference, 0);2982}29832984if (isReleaseVMAccess)2985{2986#ifdef J9VM_INTERP_ATOMIC_FREE_JNI2987releaseVMAccessMaskAtomicFree(callNode, methodMetaDataVirtualRegister, methodAddressReg);2988#else2989releaseVMAccessMask(callNode, methodMetaDataVirtualRegister, methodAddressReg, javaLitOffsetReg, jniCallDataSnippet, deps);2990#endif2991}29922993//Turn off Java Offload if calling user native2994if (isJavaOffLoadCheck)2995{2996callPreJNICallOffloadCheck(callNode);2997}29982999// Generate a call to the native function3000TR::Register * javaReturnRegister = systemLinkage->callNativeFunction(3001callNode, deps, targetAddress, methodAddressReg, javaLitOffsetReg, returnFromJNICallLabel,3002jniCallDataSnippet, isJNIGCPoint);30033004// restore java stack pointer3005generateRXInstruction(codeGen, TR::InstOpCode::getLoadOpCode(), callNode, javaStackPointerRealRegister,3006new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, (int32_t)fej9->thisThreadGetJavaSPOffset(), codeGen));30073008//Turn on Java Offload3009if (isJavaOffLoadCheck)3010{3011callPostJNICallOffloadCheck(callNode);3012}30133014if (isAcquireVMAccess)3015{3016#ifdef J9VM_INTERP_ATOMIC_FREE_JNI3017acquireVMAccessMaskAtomicFree(callNode, methodMetaDataVirtualRegister, methodAddressReg);3018#else3019acquireVMAccessMask(callNode, javaLitPoolVirtualRegister, methodMetaDataVirtualRegister, methodAddressReg, javaLitOffsetReg);3020#endif3021}302230233024generateRXInstruction(codeGen, TR::InstOpCode::getAddOpCode(), callNode, javaStackPointerRealRegister,3025new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, (int32_t)fej9->thisThreadGetJavaLiteralsOffset(), codeGen));30263027processJNIReturnValue(callNode, codeGen, javaReturnRegister);30283029if (isCollapseJNIReferenceFrame)3030{3031collapseJNIReferenceFrame(callNode, javaStackPointerRealRegister, javaLitPoolVirtualRegister, methodAddressReg);3032}30333034// Restore the JIT frame3035if (isJNICallOutFrame)3036{3037generateRXInstruction(codeGen, TR::InstOpCode::LA, callNode, javaStackPointerRealRegister,3038generateS390MemoryReference(javaStackPointerRealRegister, 5 * sizeof(intptr_t), codeGen));30393040setOffsetToLongDispSlot(getOffsetToLongDispSlot() - (5 * (int32_t)sizeof(intptr_t)) );3041}30423043if (isCheckException)3044{3045checkException(callNode, methodMetaDataVirtualRegister, methodAddressReg);3046}30473048OMR::Z::Linkage::generateDispatchReturnLable(callNode, codeGen, deps, javaReturnRegister, hasGlRegDeps, GlobalRegDeps);3049return javaReturnRegister;3050}30513052////////////////////////////////////////////////////////////////////////////////3053// J9::Z::PrivateLinkage::doNotKillSpecialRegsForBuildArgs - Do not kill3054// special regs (java stack ptr, system stack ptr, and method metadata reg)3055////////////////////////////////////////////////////////////////////////////////3056void3057J9::Z::PrivateLinkage::doNotKillSpecialRegsForBuildArgs (TR::Linkage *linkage, bool isFastJNI, int64_t &killMask)3058{3059TR::SystemLinkage * systemLinkage = (TR::SystemLinkage *) cg()->getLinkage(TR_System);30603061int32_t i;3062killMask &= ~(0x1L << REGINDEX(getStackPointerRegister()));30633064if (systemLinkage->getStackPointerRealRegister()->getState() == TR::RealRegister::Locked)3065{3066killMask &= ~(0x1L << REGINDEX(getSystemStackPointerRegister()));3067}3068killMask &= ~(0x1L << REGINDEX(getMethodMetaDataRegister()));30693070// Remove preserved registers from kill set3071if (isFastJNI)3072{3073// We kill all unlocked GPRs for JNI preserved or not,3074// so only need to worry about not killing preserved FPRs3075for (i = TR::RealRegister::FirstFPR; i <= TR::RealRegister::LastFPR; i++)3076{3077if (linkage->getPreserved(REGNUM(i)))3078killMask &= ~(0x1L << REGINDEX(i));3079}3080}3081else3082{3083for (i = TR::RealRegister::FirstGPR; i <= TR::RealRegister::LastFPR; i++)3084{3085if (linkage->getPreserved(REGNUM(i)))3086killMask &= ~(0x1L << REGINDEX(i));3087}3088}3089}30903091////////////////////////////////////////////////////////////////////////////////3092// J9::Z::PrivateLinkage::addSpecialRegDepsForBuildArgs - add special argument3093// register dependencies for buildArgs3094////////////////////////////////////////////////////////////////////////////////3095void3096J9::Z::PrivateLinkage::addSpecialRegDepsForBuildArgs(TR::Node * callNode, TR::RegisterDependencyConditions * dependencies, int32_t& from, int32_t step)3097{3098TR::Node * child;3099TR::RealRegister::RegNum specialArgReg = TR::RealRegister::NoReg;3100switch (callNode->getSymbol()->castToMethodSymbol()->getMandatoryRecognizedMethod())3101{3102// Note: special long args are still only passed in one GPR3103case TR::java_lang_invoke_ComputedCalls_dispatchJ9Method:3104specialArgReg = getJ9MethodArgumentRegister();3105break;3106case TR::java_lang_invoke_ComputedCalls_dispatchVirtual:3107case TR::com_ibm_jit_JITHelpers_dispatchVirtual:3108specialArgReg = getVTableIndexArgumentRegister();3109break;3110}31113112if (specialArgReg != TR::RealRegister::NoReg)3113{3114child = callNode->getChild(from);3115TR::Register *specialArg = copyArgRegister(callNode, child, cg()->evaluate(child)); // TODO:JSR292: We don't need a copy of the highOrder reg on 31-bit3116if (specialArg->getRegisterPair())3117specialArg = specialArg->getLowOrder(); // on 31-bit, the top half doesn't matter, so discard it3118dependencies->addPreCondition(specialArg, specialArgReg );3119cg()->decReferenceCount(child);31203121if (comp()->getOption(TR_TraceCG))3122{3123traceMsg(comp(), "Special arg %s %s reg %s in %s\n",3124callNode->getOpCode().getName(),3125comp()->getDebug()->getName(callNode->getChild(from)),3126comp()->getDebug()->getName(callNode->getRegister()),3127comp()->getDebug()->getName(cg()->machine()->getRealRegister(specialArgReg)));3128}31293130from += step;3131}3132}31333134////////////////////////////////////////////////////////////////////////////////3135// J9::Z::PrivateLinkage::storeExtraEnvRegForBuildArgs - JNI specific,3136// account for extra env param. Return stackOffset.3137////////////////////////////////////////////////////////////////////////////////3138int32_t3139J9::Z::PrivateLinkage::storeExtraEnvRegForBuildArgs(TR::Node * callNode, TR::Linkage* linkage, TR::RegisterDependencyConditions * dependencies,3140bool isFastJNI, int32_t stackOffset, int8_t gprSize, uint32_t &numIntegerArgs)3141{3142//In XPLINK, when the called function has variable number of args, all args are passed on stack,3143//Because we have no way of knowing this, we will always store the args on stack and parm regs both.3144if (isFastJNI) // Account for extra parameter env3145{3146TR::Register * jniEnvRegister = dependencies->searchPreConditionRegister(getIntegerArgumentRegister(0));3147numIntegerArgs += 1;3148if (linkage->isAllParmsOnStack())3149{3150TR::Register *stackRegister = linkage->getStackRegisterForOutgoingArguments(callNode, dependencies); // delay (possibly) creating this till needed3151storeArgumentOnStack(callNode, TR::InstOpCode::getStoreOpCode(), jniEnvRegister, &stackOffset, stackRegister);3152}3153if (linkage->isXPLinkLinkageType()) // call specific3154{3155stackOffset += gprSize;3156}3157}3158return stackOffset;3159}31603161////////////////////////////////////////////////////////////////////////////////3162// J9::Z::PrivateLinkage::addFECustomizedReturnRegDependency - add extra3163// linkage specific return register dependency3164////////////////////////////////////////////////////////////////////////////////3165int64_t3166J9::Z::PrivateLinkage::addFECustomizedReturnRegDependency(int64_t killMask, TR::Linkage* linkage, TR::DataType resType,3167TR::RegisterDependencyConditions * dependencies)3168{3169TR::Register * javaResultReg;31703171//In zOS XPLink, return register(GPR3) is not same as privateLinkage (GPR2)3172// hence we need to add another dependency3173if (linkage->getIntegerReturnRegister() != getIntegerReturnRegister())3174{3175javaResultReg = (resType.isAddress())? cg()->allocateCollectedReferenceRegister() : cg()->allocateRegister();3176dependencies->addPostCondition(javaResultReg, getIntegerReturnRegister(),DefinesDependentRegister);3177killMask &= (~(0x1L << REGINDEX(getIntegerReturnRegister())));3178}3179return killMask;3180}31813182////////////////////////////////////////////////////////////////////////////////3183// J9::Z::PrivateLinkage::buildDirectDispatch - build direct function call3184// eg. Static, helpers... etc.3185////////////////////////////////////////////////////////////////////////////////3186TR::Register *3187J9::Z::PrivateLinkage::buildDirectDispatch(TR::Node * callNode)3188{3189TR::SymbolReference * callSymRef = callNode->getSymbolReference();3190TR::MethodSymbol * callSymbol = callSymRef->getSymbol()->castToMethodSymbol();3191int32_t argSize;3192TR::Register * returnRegister;3193TR::Register *vftReg = NULL;31943195if (comp()->getOption(TR_TraceCG))3196traceMsg(comp(), "\nbuildDirectDispatch\n");31973198// create register dependency conditions3199TR::RegisterDependencyConditions * dependencies = generateRegisterDependencyConditions(getNumberOfDependencyGPRegisters(),3200getNumberOfDependencyGPRegisters(), cg());32013202// setup arguments3203argSize = buildArgs(callNode, dependencies, false, -1, vftReg);32043205buildDirectCall(callNode, callSymRef, dependencies, argSize);32063207// set dependency on return register3208TR::Register * lowReg = NULL, * highReg;3209switch (callNode->getOpCodeValue())3210{3211case TR::icall:3212case TR::acall:3213returnRegister = dependencies->searchPostConditionRegister(getIntegerReturnRegister());3214break;3215case TR::lcall:3216{3217if (comp()->target().is64Bit())3218{3219returnRegister = dependencies->searchPostConditionRegister(getLongReturnRegister());3220}3221else3222{3223TR::Instruction *cursor = NULL;3224lowReg = dependencies->searchPostConditionRegister(getLongLowReturnRegister());3225highReg = dependencies->searchPostConditionRegister(getLongHighReturnRegister());32263227generateRSInstruction(cg(), TR::InstOpCode::SLLG, callNode, highReg, highReg, 32);3228cursor =3229generateRRInstruction(cg(), TR::InstOpCode::LR, callNode, highReg, lowReg);32303231TR::RegisterDependencyConditions * deps =3232new (cg()->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg());3233deps->addPostCondition(lowReg, getLongLowReturnRegister(),DefinesDependentRegister);3234deps->addPostCondition(highReg, getLongHighReturnRegister(),DefinesDependentRegister);3235cursor->setDependencyConditions(deps);32363237cg()->stopUsingRegister(lowReg);3238returnRegister = highReg;3239}3240}3241break;3242case TR::fcall:3243case TR::dcall:3244returnRegister = dependencies->searchPostConditionRegister(getFloatReturnRegister());3245break;3246case TR::call:3247returnRegister = NULL;3248break;3249default:3250returnRegister = NULL;3251TR_ASSERT(0, "Unknown direct call Opcode %d.", callNode->getOpCodeValue());3252}32533254callNode->setRegister(returnRegister);32553256#if TODO // for live register - to do later3257cg()->freeAndResetTransientLongs();3258#endif3259dependencies->stopUsingDepRegs(cg(), lowReg == NULL ? returnRegister : highReg, lowReg);32603261return returnRegister;3262}32633264////////////////////////////////////////////////////////////////////////////////3265// J9::Z::PrivateLinkage::buildIndirectDispatch - build indirect function call.3266// This function handles the arguments setup and the return register. It will3267// buildVirtualDispatch() to handle the call sequence.3268////////////////////////////////////////////////////////////////////////////////3269TR::Register *3270J9::Z::PrivateLinkage::buildIndirectDispatch(TR::Node * callNode)3271{3272TR::RegisterDependencyConditions * dependencies = NULL;3273int32_t argSize = 0;3274TR::Register * returnRegister;3275TR::SymbolReference * methodSymRef = callNode->getSymbolReference();3276TR::MethodSymbol * methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol();3277TR::Register *vftReg = NULL;3278//TR::S390SystemLinkage * systemLinkage = (TR::S390SystemLinkage *) cg()->getLinkage(TR_System);327932803281if (comp()->getOption(TR_TraceCG))3282traceMsg(comp(), "\nbuildIndirectDispatch\n");32833284// create register dependency conditions3285dependencies = generateRegisterDependencyConditions(getNumberOfDependencyGPRegisters(),3286getNumberOfDependencyGPRegisters(), cg());32873288argSize = buildArgs(callNode, dependencies, false, -1, vftReg);3289buildVirtualDispatch(callNode, dependencies, vftReg, argSize);32903291TR::Register * lowReg = NULL, * highReg;3292switch (callNode->getOpCodeValue())3293{3294case TR::icalli:3295case TR::acalli:3296returnRegister = dependencies->searchPostConditionRegister(getIntegerReturnRegister());3297break;3298case TR::lcalli:3299{3300if (comp()->target().is64Bit())3301{3302returnRegister = dependencies->searchPostConditionRegister(getLongReturnRegister());3303}3304else3305{3306TR::Instruction *cursor = NULL;3307lowReg = dependencies->searchPostConditionRegister(getLongLowReturnRegister());3308highReg = dependencies->searchPostConditionRegister(getLongHighReturnRegister());33093310generateRSInstruction(cg(), TR::InstOpCode::SLLG, callNode, highReg, highReg, 32);3311cursor =3312generateRRInstruction(cg(), TR::InstOpCode::LR, callNode, highReg, lowReg);33133314TR::RegisterDependencyConditions * deps =3315new (cg()->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg());3316deps->addPostCondition(lowReg, getLongLowReturnRegister(),DefinesDependentRegister);3317deps->addPostCondition(highReg, getLongHighReturnRegister(),DefinesDependentRegister);3318cursor->setDependencyConditions(deps);33193320cg()->stopUsingRegister(lowReg);3321returnRegister = highReg;3322}3323}3324break;3325case TR::fcalli:3326case TR::dcalli:3327returnRegister = dependencies->searchPostConditionRegister(getFloatReturnRegister());3328break;3329case TR::calli:3330returnRegister = NULL;3331break;3332default:3333returnRegister = NULL;3334TR_ASSERT( 0, "Unknown indirect call Opcode.");3335}33363337callNode->setRegister(returnRegister);3338#if TODO // for live register - to do later3339cg()->freeAndResetTransientLongs();3340#endif3341dependencies->stopUsingDepRegs(cg(), lowReg == NULL ? returnRegister : highReg, lowReg);3342return returnRegister;3343}33443345void3346J9::Z::PrivateLinkage::setupBuildArgForLinkage(TR::Node * callNode, TR_DispatchType dispatchType, TR::RegisterDependencyConditions * deps, bool isFastJNI,3347bool isPassReceiver, int64_t & killMask, TR::Node * GlobalRegDeps, bool hasGlRegDeps, TR::SystemLinkage * systemLinkage)3348{3349TR::CodeGenerator * codeGen = cg();3350TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());3351// call base class3352OMR::Z::Linkage::setupBuildArgForLinkage(callNode, dispatchType, deps, isFastJNI, isPassReceiver, killMask, GlobalRegDeps, hasGlRegDeps, systemLinkage);335333543355// omr todo: this should be cleaned up once the logic of other linkage related method is cleaned up3356// basically JNIDispatch will perform the stuff after this statement and hence returning here3357// to avoid executing stuff twice...should be fixed in conjunction with JNIDispatch3358if (dispatchType == TR_JNIDispatch) return;335933603361J9::Z::PrivateLinkage * privateLinkage = (J9::Z::PrivateLinkage *) cg()->getLinkage(TR_Private);3362TR::RealRegister * javaStackPointerRealRegister = privateLinkage->getStackPointerRealRegister();3363TR::Register * methodMetaDataVirtualRegister = privateLinkage->getMethodMetaDataRealRegister();33643365// store java stack pointer3366generateRXInstruction(codeGen, TR::InstOpCode::getStoreOpCode(), callNode, javaStackPointerRealRegister,3367new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, (int32_t)fej9->thisThreadGetJavaSPOffset(), codeGen));33683369}33703371void3372J9::Z::PrivateLinkage::setupRegisterDepForLinkage(TR::Node * callNode, TR_DispatchType dispatchType,3373TR::RegisterDependencyConditions * &deps, int64_t & killMask, TR::SystemLinkage * systemLinkage,3374TR::Node * &GlobalRegDeps, bool &hasGlRegDeps, TR::Register ** methodAddressReg, TR::Register * &javaLitOffsetReg)3375{3376// call base class3377OMR::Z::Linkage::setupRegisterDepForLinkage(callNode, dispatchType, deps, killMask, systemLinkage, GlobalRegDeps, hasGlRegDeps, methodAddressReg, javaLitOffsetReg);337833793380TR::CodeGenerator * codeGen = cg();33813382if (dispatchType == TR_SystemDispatch)3383{3384killMask = killAndAssignRegister(killMask, deps, methodAddressReg, (comp()->target().isLinux()) ? TR::RealRegister::GPR14 : TR::RealRegister::GPR8 , codeGen, true);3385killMask = killAndAssignRegister(killMask, deps, &javaLitOffsetReg, (comp()->target().isLinux()) ? TR::RealRegister::GPR8 : TR::RealRegister::GPR14 , codeGen, true);3386}33873388/*****************/33893390TR::RealRegister * systemStackRealRegister = systemLinkage->getStackPointerRealRegister();3391TR::Register * systemStackVirtualRegister = systemStackRealRegister;33923393if (comp()->target().isZOS())3394{33953396TR::RealRegister::RegNum systemStackPointerRegister;3397TR::RealRegister::RegNum systemCAAPointerRegister = ((TR::S390zOSSystemLinkage *)systemLinkage)->getCAAPointerRegister();3398TR::Register * systemCAAVirtualRegister = NULL;33993400killMask = killAndAssignRegister(killMask, deps, &systemCAAVirtualRegister, systemCAAPointerRegister, codeGen, true);34013402if (systemStackRealRegister->getState() != TR::RealRegister::Locked)3403{3404systemStackPointerRegister = ((TR::S390zOSSystemLinkage *)systemLinkage)->getStackPointerRegister();3405systemStackVirtualRegister = NULL;3406killMask = killAndAssignRegister(killMask, deps, &systemStackVirtualRegister, systemStackPointerRegister, codeGen, true);3407deps->addPreCondition(systemStackVirtualRegister,systemStackPointerRegister);3408}3409}34103411/*****************/3412J9::Z::PrivateLinkage * privateLinkage = (J9::Z::PrivateLinkage *) cg()->getLinkage(TR_Private);341334143415TR::RealRegister * javaLitPoolRealRegister = privateLinkage->getLitPoolRealRegister();3416TR::Register * javaLitPoolVirtualRegister = javaLitPoolRealRegister;34173418if (codeGen->isLiteralPoolOnDemandOn())3419{3420javaLitPoolVirtualRegister = NULL;3421killMask = killAndAssignRegister(killMask, deps, &javaLitPoolVirtualRegister, javaLitPoolRealRegister, codeGen, true);3422generateLoadLiteralPoolAddress(codeGen, callNode, javaLitPoolVirtualRegister);3423}342434253426/*****************/3427TR::Register * methodMetaDataVirtualRegister = privateLinkage->getMethodMetaDataRealRegister();342834293430// This logic was originally in OMR::Z::Linkage::buildNativeDispatch and the condition is cg()->supportsJITFreeSystemStackPointer().3431// The original condition is only true for J9 and only on zos, so replacing it with comp()->target().isZOS().3432if ( comp()->target().isZOS() )3433{3434TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());3435generateRXInstruction(codeGen, TR::InstOpCode::getLoadOpCode(), callNode, systemStackVirtualRegister,3436new (trHeapMemory()) TR::MemoryReference(methodMetaDataVirtualRegister, (int32_t)fej9->thisThreadGetSystemSPOffset(), codeGen));3437}34383439}344034413442TR::RealRegister::RegNum3443J9::Z::PrivateLinkage::getSystemStackPointerRegister()3444{3445return cg()->getLinkage(TR_System)->getStackPointerRegister();3446}344734483449J9::Z::JNILinkage::JNILinkage(TR::CodeGenerator * cg, TR_LinkageConventions elc)3450:J9::Z::PrivateLinkage(cg, elc)3451{3452}345334543455