Path: blob/master/runtime/compiler/z/codegen/J9CodeGenerator.cpp
6004 views
/*******************************************************************************1* Copyright (c) 2000, 2022 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/2122//On zOS XLC linker can't handle files with same name at link time23//This workaround with pragma is needed. What this does is essentially24//give a different name to the codesection (csect) for this file. So it25//doesn't conflict with another file with same name.2627#pragma csect(CODE,"TRJ9ZCGBase#C")28#pragma csect(STATIC,"TRJ9ZCGBase#S")29#pragma csect(TEST,"TRJ9ZCGBase#T")3031#include <algorithm>32#include "env/CompilerEnv.hpp"33#include "codegen/AheadOfTimeCompile.hpp"34#include "codegen/CodeGenerator.hpp"35#include "codegen/CodeGenerator_inlines.hpp"36#include "codegen/ConstantDataSnippet.hpp"37#include "codegen/Linkage_inlines.hpp"38#include "codegen/S390CHelperLinkage.hpp"39#include "codegen/S390PrivateLinkage.hpp"40#include "env/VMJ9.h"41#include "env/jittypes.h"42#include "il/Node.hpp"43#include "il/Node_inlines.hpp"44#include "z/codegen/J9SystemLinkageLinux.hpp"45#include "z/codegen/J9SystemLinkagezOS.hpp"46#include "z/codegen/S390GenerateInstructions.hpp"47#include "z/codegen/S390Recompilation.hpp"48#include "z/codegen/S390Register.hpp"49#include "z/codegen/ReduceSynchronizedFieldLoad.hpp"5051#define OPT_DETAILS "O^O CODE GENERATION: "5253extern void TEMPORARY_initJ9S390TreeEvaluatorTable(TR::CodeGenerator *cg);5455//Forward declarations56bool nodeMightClobberAccumulatorBeforeUse(TR::Node *);5758J9::Z::CodeGenerator::CodeGenerator(TR::Compilation *comp) :59J9::CodeGenerator(comp)60{61/**62* Do not add CodeGenerator initialization logic here.63* Use the \c initialize() method instead.64*/65}6667void68J9::Z::CodeGenerator::initialize()69{70self()->J9::CodeGenerator::initialize();7172TR::CodeGenerator *cg = self();73TR::Compilation *comp = cg->comp();74TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());7576cg->setAheadOfTimeCompile(new (cg->trHeapMemory()) TR::AheadOfTimeCompile(cg));7778// Java specific runtime helpers79cg->symRefTab()->createSystemRuntimeHelper(TR_S390jitMathHelperConvertLongToFloat);80cg->symRefTab()->createSystemRuntimeHelper(TR_S390induceRecompilation);8182// Enable Direct to JNI calls unless we're mimicking interpreter stack frames.83if (!comp->getOption(TR_FullSpeedDebug))84cg->setSupportsDirectJNICalls();8586if (cg->getSupportsVectorRegisters() && !comp->getOption(TR_DisableSIMDStringCaseConv))87cg->setSupportsInlineStringCaseConversion();8889if (cg->getSupportsVectorRegisters() && !comp->getOption(TR_DisableFastStringIndexOf) &&90!TR::Compiler->om.canGenerateArraylets())91{92cg->setSupportsInlineStringIndexOf();93}9495if (cg->getSupportsVectorRegisters() && !comp->getOption(TR_DisableSIMDStringHashCode) &&96!TR::Compiler->om.canGenerateArraylets())97{98cg->setSupportsInlineStringHashCode();99}100101if (cg->getSupportsVectorRegisters() && comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z14))102{103cg->setSupportsInlineStringLatin1Inflate();104}105106// See comment in `handleHardwareReadBarrier` implementation as to why we cannot support CTX under CS107if (cg->getSupportsTM() && TR::Compiler->om.readBarrierType() == gc_modron_readbar_none)108{109cg->setSupportsInlineConcurrentLinkedQueue();110}111112// Similar to AOT, array translate instructions are not supported for remote compiles because instructions such as113// TRTO allocate lookup tables in persistent memory that cannot be relocated.114if (comp->isOutOfProcessCompilation())115{116cg->resetSupportsArrayTranslateTRxx();117}118119static char *disableInlineEncodeASCII = feGetEnv("TR_disableInlineEncodeASCII");120if (comp->fej9()->isStringCompressionEnabledVM() && cg->getSupportsVectorRegisters() && !TR::Compiler->om.canGenerateArraylets() && !disableInlineEncodeASCII)121{122cg->setSupportsInlineEncodeASCII();123}124125// Let's turn this on. There is more work needed in the opt126// to catch the case where the BNDSCHK is inserted after127//128cg->setDisableNullCheckOfArrayLength();129130// Enable Range splitter by default.131if (!comp->getOption(TR_DisableLiveRangeSplitter))132comp->setOption(TR_EnableRangeSplittingGRA);133134// Disable SS Optimization that generates better SS instruction memory references.135// Issue in Java because of symref in AOT case. See RTC 31738 for details.136comp->setOption(TR_DisableSSOpts);137138// Invoke Class.newInstanceImpl() from the JIT directly139cg->setSupportsNewInstanceImplOpt();140141// Still being set in the S390CodeGenerator constructor, as zLinux sTR requires this.142//cg->setSupportsJavaFloatSemantics();143144// Enable this only on Java, as there is a possibility that optimizations driven by this145// flag will generate calls to helper routines.146#if defined(J9VM_OPT_JITSERVER)147// The TRT instruction generated by the arrayTranslateAndTestEvaluator is not relocatable. Thus, to148// attain functional correctness we don't enable this support for remote compilations.149if (!comp->isOutOfProcessCompilation())150#endif /* defined(J9VM_OPT_JITSERVER) */151{152cg->setSupportsArrayTranslateAndTest();153}154155// Enable compaction of local stack slots. i.e. variables with non-overlapping live ranges156// can share the same slot.157cg->setSupportsCompactedLocals();158159// Enable Implicit NULL Checks on zLinux. On zOS, page zero is readable, so we need explicit checks.160cg->setSupportsImplicitNullChecks(comp->target().isLinux() && cg->getHasResumableTrapHandler() && !comp->getOption(TR_DisableZImplicitNullChecks));161162// Enable Monitor cache lookup for monent/monexit163static char *disableMonitorCacheLookup = feGetEnv("TR_disableMonitorCacheLookup");164if (!disableMonitorCacheLookup)165comp->setOption(TR_EnableMonitorCacheLookup);166167// Enable high-resolution timer168cg->setSupportsCurrentTimeMaxPrecision();169170// Defect 109299 : PMR 14649,999,760 / CritSit AV8426171// Turn off use of hardware clock on zLinux for calculating currentTimeMillis() as user can adjust time on their system.172//173// Hardware clock, however, can be used for calculating System.nanoTime() on zLinux174// since java/lang/System.nanoTime() returns an arbitrary number, rather than the current time175// (see the java/lang/System.nanoTime() spec for details).176if (comp->target().isZOS())177cg->setSupportsMaxPrecisionMilliTime();178179// Support BigDecimal Long Lookaside versioning optimizations.180if (!comp->getOption(TR_DisableBDLLVersioning))181cg->setSupportsBigDecimalLongLookasideVersioning();182183// RI support184if (comp->getOption(TR_HWProfilerDisableRIOverPrivateLinkage)185&& comp->getPersistentInfo()->isRuntimeInstrumentationEnabled()186&& comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_ZEC12)187&& comp->target().cpu.supportsFeature(OMR_FEATURE_S390_RI))188{189cg->setSupportsRuntimeInstrumentation();190cg->setEnableRIOverPrivateLinkage(false); // Disable RI over private linkage, since RION/OFF will be controlled over J2I / I2J.191}192193/*194* "Statically" initialize the FE-specific tree evaluator functions.195* This code only needs to execute once per JIT lifetime.196*/197static bool initTreeEvaluatorTable = false;198if (!initTreeEvaluatorTable)199{200TEMPORARY_initJ9S390TreeEvaluatorTable(cg);201initTreeEvaluatorTable = true;202}203204cg->getS390Linkage()->initS390RealRegisterLinkage();205206if (comp->fej9()->hasFixedFrameC_CallingConvention())207{208cg->setHasFixedFrameC_CallingConvention();209}210211static bool disableIntegerToChars = (feGetEnv("TR_DisableIntegerToChars") != NULL);212if (cg->getSupportsVectorRegisters() && !TR::Compiler->om.canGenerateArraylets() && !disableIntegerToChars && comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_ZNEXT))213{214cg->setSupportsIntegerToChars();215cg->setSupportsIntegerStringSize();216}217218cg->setIgnoreDecimalOverflowException(false);219}220221bool222J9::Z::CodeGenerator::callUsesHelperImplementation(TR::Symbol *sym)223{224return sym && (!self()->comp()->getOption(TR_DisableInliningOfNatives) &&225sym->castToMethodSymbol()->getMandatoryRecognizedMethod() == TR::java_lang_invoke_ComputedCalls_dispatchJ9Method);226}227228TR::Linkage *229J9::Z::CodeGenerator::createLinkage(TR_LinkageConventions lc)230{231TR::Linkage * linkage;232switch (lc)233{234case TR_CHelper:235linkage = new (self()->trHeapMemory()) J9::Z::CHelperLinkage(self());236break;237case TR_Helper:238linkage = new (self()->trHeapMemory()) J9::Z::HelperLinkage(self());239break;240241case TR_Private:242linkage = new (self()->trHeapMemory()) J9::Z::PrivateLinkage(self());243break;244245case TR_J9JNILinkage:246linkage = new (self()->trHeapMemory()) J9::Z::JNILinkage(self());247break;248249case TR_System:250if (self()->comp()->target().isLinux())251linkage = new (self()->trHeapMemory()) J9::Z::zLinuxSystemLinkage(self());252else253linkage = new (self()->trHeapMemory()) J9::Z::zOSSystemLinkage(self());254break;255256default :257TR_ASSERT(0, "\nTestarossa error: Illegal linkage convention %d\n", lc);258}259260self()->setLinkage(lc, linkage);261return linkage;262}263264bool265J9::Z::CodeGenerator::doInlineAllocate(TR::Node *node)266{267TR_OpaqueClassBlock * classInfo = 0;268if (self()->comp()->suppressAllocationInlining()) return false;269TR::ILOpCodes opCode = node->getOpCodeValue();270271if ((opCode!=TR::anewarray) && (opCode!=TR::newarray) && (opCode!=TR::New))272return false;273274275int32_t objectSize = self()->comp()->canAllocateInline(node, classInfo);276if (objectSize < 0) return false;277278return true;279}280281bool282J9::Z::CodeGenerator::constLoadNeedsLiteralFromPool(TR::Node *node)283{284if (node->isClassUnloadingConst() || node->getType().isIntegral() || node->getType().isAddress())285{286return false;287}288else289{290return true; // Floats/Doubles require literal pool291}292}293294TR::Recompilation *295J9::Z::CodeGenerator::allocateRecompilationInfo()296{297TR::Compilation *comp = self()->comp();298if(comp->getJittedMethodSymbol()->isJNI() &&299!comp->getOption(TR_FullSpeedDebug))300{301traceMsg(comp, "\n====== THIS METHOD IS VIRTUAL JNI THUNK. IT WILL NOT BE RECOMPILED====\n");302return NULL;303}304else305{306return TR_S390Recompilation::allocate(comp);307}308}309310void311J9::Z::CodeGenerator::lowerTreesPreChildrenVisit(TR::Node* parent, TR::TreeTop * treeTop, vcount_t visitCount)312{313J9::CodeGenerator::lowerTreesPreChildrenVisit(parent, treeTop, visitCount);314315if (parent->getOpCodeValue() == TR::BCDCHK)316{317// sometimes TR::pdModifyPrecision will be inserted318// just under BCDCHK, we have to remove it.319TR::Node * chkChild = parent->getFirstChild();320if (chkChild->getOpCodeValue() == TR::pdModifyPrecision)321{322TR::Node * pdopNode = chkChild->getFirstChild();323pdopNode->incReferenceCount();324chkChild->recursivelyDecReferenceCount();325parent->setChild(0, pdopNode);326}327}328}329330void331J9::Z::CodeGenerator::lowerTreesPostChildrenVisit(TR::Node * parent, TR::TreeTop * treeTop, vcount_t visitCount)332{333J9::CodeGenerator::lowerTreesPostChildrenVisit(parent, treeTop, visitCount);334335// J9, Z336//337if (self()->codegenSupportsLoadlessBNDCheck() &&338parent->getOpCode().isBndCheck() &&339(parent->getFirstChild()->getOpCode().isLoadVar() ||340parent->getSecondChild()->getOpCode().isLoadVar()))341{342TR::Node * memChild = parent->getFirstChild()->getOpCode().isLoadVar()?parent->getFirstChild():parent->getSecondChild();343344if (memChild->getVisitCount() != self()->comp()->getVisitCount() && memChild->getReferenceCount() > 1 && performTransformation(self()->comp(), "%sRematerializing memref child %p from BNDCheck node\n", OPT_DETAILS, memChild))345{346memChild->decReferenceCount();347TR::Node *newNode = TR::Node::copy(memChild);348newNode->setReferenceCount(1);349parent->setChild(parent->findChildIndex(memChild), newNode);350}351}352}353354355void356J9::Z::CodeGenerator::lowerTreeIfNeeded(357TR::Node *node,358int32_t childNumberOfNode,359TR::Node *parent,360TR::TreeTop *tt)361{362TR::Compilation *comp = self()->comp();363J9::CodeGenerator::lowerTreeIfNeeded(node, childNumberOfNode, parent, tt);364365if (self()->yankIndexScalingOp() &&366(node->getOpCodeValue() == TR::aiadd || node->getOpCodeValue() == TR::aladd ) )367{368// 390 sees a lot of scaling ops getting stuck between BNDSchk and array read/write369// causing heavy AGIs. This transformation pulls the scaling opp up a tree to unpin it.370//371372// Looking for trees that look like this:373374// BNDCHK / BNDCHKwithSpineCHK375// iiload376// ==>aRegLoad377// iiload378// ==>aRegLoad379380// iaload381// aiadd <===== You are here382// ==>aRegLoad383// isub384// imul <=== Find this node and anchor it up above the BNDCHK385// ==>iiload386// iconst 4387// iconst -16388389TR::TreeTop* prevPrevTT = NULL;390TR::TreeTop* prevTT = tt->getPrevTreeTop();391392while ( prevTT &&393(prevTT->getNode()->getOpCodeValue() == TR::iRegStore ||394prevTT->getNode()->getOpCodeValue() == TR::aRegStore ||395prevTT->getNode()->getOpCodeValue() == TR::asynccheck ||396((prevTT->getNode()->getOpCodeValue() == TR::treetop) &&397(!prevTT->getNode()->getFirstChild()->getOpCode().hasSymbolReference() ||398prevTT->getNode()->getFirstChild()->getOpCode().isLoad()))))399{400prevTT = prevTT->getPrevTreeTop();401}402403// Pull scaling op up above the arrayStoreCheck as performing the scaling op right before the store is a horrible AGI.404if (tt->getPrevTreeTop() &&405tt->getNode()->getOpCodeValue() == TR::ArrayStoreCHK &&406node->getSecondChild()->getNumChildren() >= 2)407{408// The general tree that we are matching is:409// aladd <===== You are here410// ==>iaload411// lsub412// lmul <===== Find this node and anchor it up above the ArrayStoreCHK413// i2l414// ==>iRegLoad415//416// However, with internal pointers, there may or may not be an isub/lsub for arrayheader. If there is no417// arrayheader isub/lsub, we will see a tree as such:418//419// aladd (internal ptr) <===== You are here420// ==>iaload421// lshl <===== Find this node and anchor it up above the ArrayStoreCHK422// i2l423// ==>iRegLoad424//425// As such, we will check the second child of the aiadd/aladd, and see if it's the mul/shift operation.426// If not, we'll get the subsequent first child.427TR::Node* mulNode = node->getSecondChild();428429if (mulNode->getOpCodeValue() != TR::imul && mulNode->getOpCodeValue() != TR::ishl &&430mulNode->getOpCodeValue() != TR::lmul && mulNode->getOpCodeValue() != TR::lshl)431mulNode = node->getSecondChild()->getFirstChild();432433if ((mulNode->getOpCodeValue() == TR::imul || mulNode->getOpCodeValue() == TR::ishl || mulNode->getOpCodeValue() == TR::lmul || mulNode->getOpCodeValue() == TR::lshl) &&434(performTransformation(comp, "%sYank mul above ArrayStoreChk [%p] \n", OPT_DETAILS, node)))435{436TR::TreeTop * ttNew = TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, mulNode));437tt->getPrevTreeTop()->insertAfter(ttNew);438}439}440else if (prevTT &&441(prevPrevTT = prevTT->getPrevTreeTop()) &&442prevTT->getNode()->getOpCode().isBndCheck() &&443node->getSecondChild()->getNumChildren() >= 2 )444{445// The general tree that we are matching is:446// aladd <===== You are here447// ==>iaload448// lsub449// lmul <===== Find this node and anchor it up above the BNDCHK450// i2l451// ==>iRegLoad452//453// However, with internal pointers, there may or may not be an isub/lsub for arrayheader. If there is no454// arrayheader isub/lsub, we will see a tree as such:455//456// aladd (internal ptr) <===== You are here457// ==>iaload458// lshl <===== Find this node and anchor it up above the BNDCHK459// i2l460// ==>iRegLoad461//462// As such, we will check the second child of the aiadd/aladd, and see if it's the mul/shift operation.463// If not, we'll get the subsequent first child.464TR::Node* mulNode = node->getSecondChild();465466if (mulNode->getOpCodeValue() != TR::imul && mulNode->getOpCodeValue() != TR::ishl &&467mulNode->getOpCodeValue() != TR::lmul && mulNode->getOpCodeValue() != TR::lshl)468mulNode = node->getSecondChild()->getFirstChild();469470TR::Node *prevNode = prevTT->getNode();471TR::Node *bndchkIndex = prevNode->getOpCode().isSpineCheck() ?472prevNode->getChild(3) : // TR::BNDCHKwithSpineCHK473prevNode->getSecondChild(); // TR::BNDCHK474475bool doIt = false;476477doIt |= ((mulNode->getOpCodeValue() == TR::imul || mulNode->getOpCodeValue() == TR::ishl) &&478(mulNode->getFirstChild() == bndchkIndex)); // Make sure the BNDCHK is for this ind var479480doIt |= ((mulNode->getOpCodeValue() == TR::lmul || mulNode->getOpCodeValue() == TR::lshl) &&481(mulNode->getFirstChild()->getOpCodeValue() == TR::i2l && // 64-bit memrefs have an extra iu2l482// Make sure the BNDCHKxxx is for this ind var483(mulNode->getFirstChild() == bndchkIndex ||484mulNode->getFirstChild()->getFirstChild() == bndchkIndex ||485(bndchkIndex->getNumChildren() >= 1 &&486mulNode->getFirstChild() == bndchkIndex->getFirstChild())) ));487488if (doIt && performTransformation(comp, "%sYank mul [%p] \n", OPT_DETAILS, node))489{490TR::TreeTop * ttNew = TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, mulNode));491prevPrevTT->insertAfter(ttNew);492}493}494495}496497// J9, Z498//499// On zseries, convert aconst to iaload of aconst 0 and move it to its own new treetop500if (comp->target().cpu.isZ() && !self()->profiledPointersRequireRelocation() &&501node->getOpCodeValue() == TR::aconst && node->isClassUnloadingConst())502{503TR::Node * dummyNode = TR::Node::create(node, TR::aconst, 0);504TR::Node *constCopy;505TR::SymbolReference *intShadow;506507dumpOptDetails(comp, "transforming unloadable aconst %p \n", node);508509constCopy =TR::Node::copy(node);510intShadow = self()->symRefTab()->findOrCreateGenericIntShadowSymbolReference((intptr_t)constCopy);511intShadow->setLiteralPoolAddress();512513TR::Node::recreate(node, TR::aloadi);514node->setNumChildren(1);515node->setSymbolReference(intShadow);516node->setAndIncChild(0,dummyNode);517518519tt->getPrevTreeTop()->insertAfter(TR::TreeTop::create(comp,TR::Node::create(TR::treetop, 1, node)));520node->decReferenceCount();521parent->setAndIncChild(childNumberOfNode, node);522}523524// J9, Z525//526if (comp->target().cpu.isZ() && node->getOpCodeValue() == TR::aloadi && node->isUnneededIALoad())527{528ListIterator<TR_Pair<TR::Node, int32_t> > listIter(&_ialoadUnneeded);529TR_Pair<TR::Node, int32_t> *ptr;530uintptr_t temp;531int32_t updatedTemp;532for (ptr = listIter.getFirst(); ptr; ptr = listIter.getNext())533{534temp = (uintptr_t)ptr->getValue();535updatedTemp = (int32_t) temp;536if (ptr->getKey() == node && temp != node->getReferenceCount())537{538node->setUnneededIALoad(false);539break;540}541}542}543544}545546TR::S390EyeCatcherDataSnippet *547J9::Z::CodeGenerator::CreateEyeCatcher(TR::Node * node)548{549// 88448: Cold Eyecatcher is used for padding of endPC so that Return Address for exception snippets will never equal the endPC.550TR::S390EyeCatcherDataSnippet * eyeCatcherSnippet = new (self()->trHeapMemory()) TR::S390EyeCatcherDataSnippet(self(),node);551_snippetDataList.push_front(eyeCatcherSnippet);552return eyeCatcherSnippet;553}554555/**556* Input reg can be NULL (when called for a store node or other type that does not return a register)557*/558void559J9::Z::CodeGenerator::widenUnicodeSignLeadingSeparate(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, int32_t bytesToClear, TR::MemoryReference *targetMR)560{561TR_ASSERT(node->getType().isAnyUnicode(),"widenUnicodeSignLeadingSeparate is only valid for unicode types (type = %s)\n",node->getDataType().toString());562TR_ASSERT( targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"widenUnicodeSignLeadingSeparate is only valid for aligned memory references\n");563if (bytesToClear > 0)564{565TR_StorageReference *storageRef = reg ? reg->getStorageReference() : NULL;566if (self()->traceBCDCodeGen())567traceMsg(self()->comp(),"\twidenUnicodeSignLeadingSeparate: node %p, reg %s targetStorageRef #%d, endByte %d, bytesToClear %d\n",568node,reg?self()->getDebug()->getName(reg):"0",storageRef?storageRef->getReferenceNumber():0,endByte,bytesToClear);569targetMR = reuseS390LeftAlignedMemoryReference(targetMR, node, storageRef, self(), endByte);570if (self()->traceBCDCodeGen())571traceMsg(self()->comp(),"\tgen MVC of size 2 to move unicode leading separate sign code left by %d bytes to the widened left aligned position\n",bytesToClear);572TR::MemoryReference *originalSignCodeMR = generateS390LeftAlignedMemoryReference(*targetMR, node, bytesToClear, self(), endByte);573int32_t mvcSize = 2;574generateSS1Instruction(self(), TR::InstOpCode::MVC, node,575mvcSize-1,576targetMR,577originalSignCodeMR);578579self()->genZeroLeftMostUnicodeBytes(node, reg, endByte - TR::DataType::getUnicodeSignSize(), bytesToClear, targetMR);580}581}582583#define TR_MAX_UNPKU_SIZE 64584/**585* Input reg can be NULL (when called for a store node or other type that does not return a register)586*/587void588J9::Z::CodeGenerator::genZeroLeftMostUnicodeBytes(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, int32_t bytesToClear, TR::MemoryReference *targetMR)589{590TR_ASSERT(node->getType().isAnyUnicode(),"genZeroLeftMostUnicodeDigits is only valid for unicode types (type = %d)\n",node->getDataType().toString());591TR_ASSERT(targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"genZeroLeftMostUnicodeBytes is only valid for aligned memory references\n");592593bool evaluatedPaddingAnchor = false;594TR::Node *paddingAnchor = NULL;595if (bytesToClear > 0)596{597TR_StorageReference *storageRef = reg ? reg->getStorageReference() : NULL;598if (self()->traceBCDCodeGen())599traceMsg(self()->comp(),"\tgenZeroLeftMostUnicodeBytes: node %p, reg %s targetStorageRef #%d, endByte %d, bytesToClear %d\n",600node,reg?self()->getDebug()->getName(reg):"0",storageRef?storageRef->getReferenceNumber():0,endByte,bytesToClear);601602// zero 16 bytes (the fixed UNPKU source size) followed by a left aligned UNPKU of bytesToClear length to get 0030 repeated as the left most digits.603// less efficient than the MVC literal copy above but doesn't require any extra storage as it is in-place604int32_t tempSize = self()->getPackedToUnicodeFixedSourceSize();605TR_StorageReference *tempStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(tempSize, self()->comp());606tempStorageReference->setTemporaryReferenceCount(1);607TR::MemoryReference *tempMR = generateS390LeftAlignedMemoryReference(node, tempStorageReference, self(), tempSize, true, true); // enforceSSLimits=true, isNewTemp=true608609TR_ASSERT(bytesToClear <= TR_MAX_UNPKU_SIZE,"expecting bytesToClear (%d) <= TR_MAX_UNPKU_SIZE (%d)\n",bytesToClear,TR_MAX_UNPKU_SIZE);610self()->genZeroLeftMostPackedDigits(node, NULL, tempSize, tempSize*2, tempMR);611612int32_t unpkuCount = ((bytesToClear-1)/TR_MAX_UNPKU_SIZE)+1;613for (int32_t i = 0; i < unpkuCount; i++)614{615int32_t unpkuSize = std::min(bytesToClear,TR_MAX_UNPKU_SIZE);616int32_t destOffset = i*TR_MAX_UNPKU_SIZE;617if (self()->traceBCDCodeGen())618traceMsg(self()->comp(),"\tgen %d of %d UNPKUs with dest size of %d destOffset of %d and fixed source size %d\n",i+1,unpkuCount,unpkuSize,destOffset,tempSize);619generateSS1Instruction(self(), TR::InstOpCode::UNPKU, node,620unpkuSize-1,621generateS390LeftAlignedMemoryReference(*targetMR, node, destOffset, self(), endByte),622generateS390LeftAlignedMemoryReference(*tempMR, node, 0, self(), tempSize));623bytesToClear-=unpkuSize;624}625tempStorageReference->decrementTemporaryReferenceCount();626}627if (!evaluatedPaddingAnchor)628self()->processUnusedNodeDuringEvaluation(paddingAnchor);629}630631/**632* Input reg can be NULL (when called for a store node or other type that does not return a register)633*/634void635J9::Z::CodeGenerator::widenZonedSignLeadingSeparate(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, int32_t bytesToClear, TR::MemoryReference *targetMR)636{637TR_ASSERT(node->getDataType() == TR::ZonedDecimalSignLeadingSeparate,638"widenZonedSignLeadingSeparate is only valid for TR::ZonedDecimalSignLeadingSeparate (type=%s)\n",node->getDataType().toString());639TR_ASSERT( targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"widenZonedSignLeadingSeparate is only valid for aligned memory references\n");640if (bytesToClear > 0)641{642TR_StorageReference *storageRef = reg ? reg->getStorageReference() : NULL;643if (self()->traceBCDCodeGen())644traceMsg(self()->comp(),"\twidenZonedSignLeadingSeparate: node %p, reg %s targetStorageRef #%d, endByte %d, bytesToClear %d\n",645node,reg?self()->getDebug()->getName(reg):"0",storageRef?storageRef->getReferenceNumber():0,endByte,bytesToClear);646targetMR = reuseS390LeftAlignedMemoryReference(targetMR, node, storageRef, self(), endByte);647if (self()->traceBCDCodeGen())648traceMsg(self()->comp(),"\tgen MVC of size 1 to move zoned leading separate sign code left by %d bytes to the widened left aligned position\n",bytesToClear);649TR::MemoryReference *originalSignCodeMR = generateS390LeftAlignedMemoryReference(*targetMR, node, bytesToClear, self(), endByte);650int32_t mvcSize = 1;651generateSS1Instruction(self(), TR::InstOpCode::MVC, node,652mvcSize-1,653targetMR,654originalSignCodeMR);655self()->genZeroLeftMostZonedBytes(node, reg, endByte - TR::DataType::getZonedSignSize(), bytesToClear, targetMR);656}657}658659/**660* Input reg can be NULL (when called for a store node or other type that does not return a register)661*/662void663J9::Z::CodeGenerator::widenZonedSignLeadingEmbedded(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, int32_t bytesToClear, TR::MemoryReference *targetMR)664{665TR_ASSERT(node->getDataType() == TR::ZonedDecimalSignLeadingEmbedded,666"widenZonedSignLeadingEmbedded is only valid for TR::ZonedDecimalSignLeadingEmbedded (type=%s)\n",node->getDataType().toString());667TR_ASSERT( targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"widenZonedSignLeadingEmbedded is only valid for aligned memory references\n");668if (bytesToClear > 0)669{670TR_StorageReference *storageRef = reg ? reg->getStorageReference() : NULL;671if (self()->traceBCDCodeGen())672traceMsg(self()->comp(),"\twidenZonedSignLeadingEmbedded: node %p, reg %s targetStorageRef #%d, endByte %d, bytesToClear %d\n",673node,reg?self()->getDebug()->getName(reg):"0",storageRef?storageRef->getReferenceNumber():0,endByte,bytesToClear);674self()->genZeroLeftMostZonedBytes(node, reg, endByte, bytesToClear, targetMR);675targetMR = reuseS390LeftAlignedMemoryReference(targetMR, node, storageRef, self(), endByte);676if (self()->traceBCDCodeGen())677traceMsg(self()->comp(),"\tgen MVZ of size 1 to move leading sign code left by %d bytes to the widened left aligned position\n",bytesToClear);678TR::MemoryReference *originalSignCodeMR = generateS390LeftAlignedMemoryReference(*targetMR, node, bytesToClear, self(), endByte);679int32_t mvzSize = 1;680generateSS1Instruction(self(), TR::InstOpCode::MVZ, node,681mvzSize-1,682targetMR,683generateS390LeftAlignedMemoryReference(*originalSignCodeMR, node, 0, self(), originalSignCodeMR->getLeftMostByte()));684{685if (self()->traceBCDCodeGen()) traceMsg(self()->comp(),"\tgenerate OI 0xF0 to force original leading sign code at offset=bytesToClear=%d\n",bytesToClear);686generateSIInstruction(self(), TR::InstOpCode::OI, node, originalSignCodeMR, TR::DataType::getZonedCode());687}688}689}690691void692J9::Z::CodeGenerator::genZeroLeftMostZonedBytes(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, int32_t bytesToClear, TR::MemoryReference *targetMR)693{694TR_ASSERT(node->getType().isAnyZoned(),"genZeroLeftMostZonedBytes is only valid for zoned types (type = %s)\n",node->getDataType().toString());695TR_ASSERT(targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"genZeroLeftMostZonedBytes is only valid for aligned memory references\n");696TR::Node *paddingAnchor = NULL;697bool evaluatedPaddingAnchor = false;698if (bytesToClear > 0)699{700TR_StorageReference *storageRef = reg ? reg->getStorageReference() : NULL;701if (self()->traceBCDCodeGen())702traceMsg(self()->comp(),"\tgenZeroLeftMostZoneBytes: (%s) %p, reg %s targetStorageRef #%d, endByte %d, bytesToClear %d\n",703node->getOpCode().getName(),node,reg?self()->getDebug()->getName(reg):"0",storageRef?storageRef->getReferenceNumber():0,endByte,bytesToClear);704705{706targetMR = reuseS390LeftAlignedMemoryReference(targetMR, node, storageRef, self(), endByte);707708generateSIInstruction(self(), TR::InstOpCode::MVI, node, targetMR, TR::DataType::getZonedZeroCode());709if (bytesToClear > 2)710{711int32_t overlapMVCSize = bytesToClear-1;712generateSS1Instruction(self(), TR::InstOpCode::MVC, node,713overlapMVCSize-1,714generateS390LeftAlignedMemoryReference(*targetMR, node, 1, self(), targetMR->getLeftMostByte()),715generateS390LeftAlignedMemoryReference(*targetMR, node, 0, self(), targetMR->getLeftMostByte()));716}717}718if (reg)719reg->addRangeOfZeroBytes(endByte-bytesToClear, endByte);720}721722if (!evaluatedPaddingAnchor)723self()->processUnusedNodeDuringEvaluation(paddingAnchor);724}725726bool727J9::Z::CodeGenerator::alwaysGeneratesAKnownCleanSign(TR::Node *node)728{729switch (node->getOpCodeValue())730{731case TR::ud2pd:732return true;733default:734return false;735}736return false;737}738739bool740J9::Z::CodeGenerator::alwaysGeneratesAKnownPositiveCleanSign(TR::Node *node)741{742switch (node->getOpCodeValue())743{744case TR::ud2pd:745return true;746default:747return false;748}749return false;750}751752TR_RawBCDSignCode753J9::Z::CodeGenerator::alwaysGeneratedSign(TR::Node *node)754{755switch (node->getOpCodeValue())756{757case TR::ud2pd:758return raw_bcd_sign_0xc;759default:760return raw_bcd_sign_unknown;761}762return raw_bcd_sign_unknown;763}764765TR_OpaquePseudoRegister *766J9::Z::CodeGenerator::allocateOpaquePseudoRegister(TR::DataType dt)767{768TR_OpaquePseudoRegister *temp = new (self()->trHeapMemory()) TR_OpaquePseudoRegister(dt, self()->comp());769self()->addAllocatedRegister(temp);770if (self()->getDebug())771self()->getDebug()->newRegister(temp);772return temp;773}774775776TR_OpaquePseudoRegister *777J9::Z::CodeGenerator::allocateOpaquePseudoRegister(TR_OpaquePseudoRegister *reg)778{779TR_OpaquePseudoRegister *temp = new (self()->trHeapMemory()) TR_OpaquePseudoRegister(reg, self()->comp());780self()->addAllocatedRegister(temp);781if (self()->getDebug())782self()->getDebug()->newRegister(temp);783return temp;784}785786787TR_PseudoRegister *788J9::Z::CodeGenerator::allocatePseudoRegister(TR_PseudoRegister *reg)789{790TR_PseudoRegister *temp = new (self()->trHeapMemory()) TR_PseudoRegister(reg, self()->comp());791self()->addAllocatedRegister(temp);792if (self()->getDebug())793self()->getDebug()->newRegister(temp);794return temp;795}796797/**798* OPR in this context is OpaquePseudoRegister799*/800TR_OpaquePseudoRegister *801J9::Z::CodeGenerator::evaluateOPRNode(TR::Node * node)802{803bool isBCD = node->getType().isBCD();804bool isAggr = node->getType().isAggregate();805TR_ASSERT(isBCD || isAggr,"evaluateOPRNode node %s (%p) must be BCD/Aggr type\n",node->getOpCode().getName(),node);806TR::Register *reg = isBCD ? self()->evaluateBCDNode(node) : self()->evaluate(node);807TR_OpaquePseudoRegister *opaquePseudoReg = reg->getOpaquePseudoRegister();808TR_ASSERT(opaquePseudoReg,"reg must be some type of opaquePseudoRegister on node %s (%p)\n",node->getOpCode().getName(),node);809return opaquePseudoReg;810}811812void813J9::Z::CodeGenerator::freeUnusedTemporaryBasedHint(TR::Node *node)814{815TR_StorageReference *hint = node->getOpCode().canHaveStorageReferenceHint() ? node->getStorageReferenceHint() : NULL;816if (hint && hint->isTemporaryBased() && hint->getTemporaryReferenceCount() == 0)817{818self()->pendingFreeVariableSizeSymRef(hint->getTemporarySymbolReference());819if (self()->traceBCDCodeGen())820traceMsg(self()->comp(),"\tfreeing (pending) unused hint symRef #%d (%s) on %s (%p)\n",821hint->getReferenceNumber(),822self()->getDebug()->getName(hint->getTemporarySymbol()),823node->getOpCode().getName(),824node);825}826}827828bool829J9::Z::CodeGenerator::storageReferencesMatch(TR_StorageReference *ref1, TR_StorageReference *ref2)830{831bool refMatch = false;832if (ref1->isNodeBased() && (ref1->getNode()->getOpCode().isLoadVar() || ref1->getNode()->getOpCode().isStore()) &&833ref2->isNodeBased() && (ref2->getNode()->getOpCode().isLoadVar() || ref2->getNode()->getOpCode().isStore()) &&834self()->loadOrStoreAddressesMatch(ref1->getNode(), ref2->getNode()))835{836if (ref1->getNode()->getSize() != ref2->getNode()->getSize())837{838if (self()->traceBCDCodeGen())839traceMsg(self()->comp(),"\tnode based storageRefs match = false : ref1 (#%d) and ref2 (#%d) addresses match but node1 %s (%p) size=%d != node2 %s (%p) size=%d\n",840ref1->getReferenceNumber(),ref2->getReferenceNumber(),841ref1->getNode()->getOpCode().getName(),ref1->getNode(),ref1->getNode()->getSize(),842ref2->getNode()->getOpCode().getName(),ref2->getNode(),ref2->getNode()->getSize());843refMatch = false;844}845else846{847if (self()->traceBCDCodeGen())848traceMsg(self()->comp(),"\tnode based storageRefs match = true : ref1 (#%d) %s (%p) == ref2 (#%d) %s (%p)\n",849ref1->getReferenceNumber(),ref1->getNode()->getOpCode().getName(),ref1->getNode(),850ref2->getReferenceNumber(),ref2->getNode()->getOpCode().getName(),ref2->getNode());851refMatch = true;852}853}854else if (ref1->isTemporaryBased() &&855ref2->isTemporaryBased() &&856ref1->getSymbolReference() == ref2->getSymbolReference())857{858if (self()->traceBCDCodeGen())859traceMsg(self()->comp(),"\ttemp based storageRefs match = true : ref1 (#%d) == ref2 (#%d) match\n",ref1->getReferenceNumber(),ref2->getReferenceNumber());860refMatch = true;861}862return refMatch;863}864865void866J9::Z::CodeGenerator::processUnusedStorageRef(TR_StorageReference *ref)867{868if (ref == NULL || !ref->isNodeBased())869return;870871if (ref->getNodeReferenceCount() == 0)872return;873874TR::Node *refNode = ref->getNode();875TR::Node *addrChild = NULL;876if (refNode->getOpCode().isIndirect() ||877(ref->isConstantNodeBased() && refNode->getNumChildren() > 0))878{879addrChild = refNode->getFirstChild();880}881882if (self()->traceBCDCodeGen())883traceMsg(self()->comp(),"\tprocessUnusedStorageRef ref->node %s (%p) with addrChild %s (%p)\n",884refNode->getOpCode().getName(),refNode,addrChild?addrChild->getOpCode().getName():"NULL",addrChild);885886if (addrChild)887{888TR_ASSERT(addrChild->getType().isAddress(),"addrChild %s (%p) not an address type\n",addrChild->getOpCode().getName(),addrChild);889if (ref->getNodeReferenceCount() == 1)890{891if (self()->traceBCDCodeGen())892traceMsg(self()->comp(),"\t\tstorageRef->nodeRefCount %d == 1 so processUnusedAddressNode %s (%p) (refCount %d)\n",893ref->getNodeReferenceCount(),addrChild->getOpCode().getName(),addrChild,addrChild->getReferenceCount());894self()->processUnusedNodeDuringEvaluation(addrChild);895}896else if (self()->traceBCDCodeGen())897{898traceMsg(self()->comp(),"\t\tstorageRef->nodeRefCount %d > 1 so do not decRefCounts of unusedAddressNode %s (%p) (refCount %d)\n",899ref->getNodeReferenceCount(),addrChild->getOpCode().getName(),addrChild,addrChild->getReferenceCount());900}901}902903if (self()->traceBCDCodeGen())904traceMsg(self()->comp(),"\tdec storageRef->nodeRefCount %d->%d\n",905ref->getNodeReferenceCount(),ref->getNodeReferenceCount()-1);906907ref->decrementNodeReferenceCount();908}909910TR_PseudoRegister *911J9::Z::CodeGenerator::allocatePseudoRegister(TR::DataType dt)912{913TR_PseudoRegister *temp = new (self()->trHeapMemory()) TR_PseudoRegister(dt, self()->comp());914self()->addAllocatedRegister(temp);915if (self()->getDebug())916self()->getDebug()->newRegister(temp);917return temp;918}919920#define TR_ACCUMULATOR_NODE_BUDGET 50921922/// canUseSingleStoreAsAnAccumulator does not use visitCounts (as they are923/// already in use at this point) but instead the slightly less / exact924/// getRegister() == NULL checks925///926/// In a pathological case, such as doubly commoned nodes under the same store927/// there is a potential for an exponential number of nodes to / be visited. To928/// guard against this maintain a count of nodes visited under one store and929/// compare against the budget below.930///931/// \note Today, it should be relatively easy to insert a Checklist, which932/// addresses the concern about visit counts above.933template <class TR_AliasSetInterface>934bool935J9::Z::CodeGenerator::canUseSingleStoreAsAnAccumulator(TR::Node *parent, TR::Node *node, TR::Node *store,TR_AliasSetInterface &storeAliases, TR::list<TR::Node*> *conflictingAddressNodes, bool justLookForConflictingAddressNodes, bool isChainOfFirstChildren, bool mustCheckAllNodes)936{937TR::Compilation *comp = self()->comp();938939// A note on isChainOfFirstChildren:940// In RTC 75858, we saw the following trees for the following COBOL statements, where X is packed decimal:941// COMPUTE X = X - 2.942// COMPUTE X = 999 - X.943//944// pdstore "X"945// pdsub946// pdconst +999947// pdsub948// pdload "X"949// pdconst 2950//951// In this case, canUseSingleStoreAsAnAccumulator is returning true because the pdload of X is the first child of its parent, but it's missing952// the fact that the parent pdsub is itself a second child. This is resulting in the value of X getting clobbered with +999.953//954// To solve this, isChainOfFirstChildren is used. It is set to true initially, and it will only remain true when called for a node's first child955// if it was already true. In the example above, it would be true for the pdsub and the pdconst +999 and false for any other nodes.956LexicalTimer foldTimer("canUseSingleStore", comp->phaseTimer());957958if (self()->traceBCDCodeGen())959traceMsg(comp,"\t\texamining node %s (%p) (usage/budget = %d/%d)\n",node->getOpCode().getName(),node,self()->getAccumulatorNodeUsage(),TR_ACCUMULATOR_NODE_BUDGET);960961if (self()->getAccumulatorNodeUsage() > TR_ACCUMULATOR_NODE_BUDGET)962{963if (self()->traceBCDCodeGen())964traceMsg(comp,"\t\ta^a : disallow useAccum=false as node budget %d exceeded for store %s (%p)\n",965TR_ACCUMULATOR_NODE_BUDGET,store->getOpCode().getName(),store);966return false;967}968969if (!mustCheckAllNodes)970{971if (self()->endAccumulatorSearchOnOperation(node))972{973if (self()->traceBCDCodeGen())974traceMsg(comp,"\t\t\tallow -- found node %s (%p) with endSearch = yes\n",node->getOpCode().getName(),node);975if (conflictingAddressNodes->empty())976{977return true;978}979else980{981// do not have to worry about overlaps but still must descend to look for conflictingAddressNodes982if (self()->traceBCDCodeGen())983traceMsg(comp,"\t\tconflictingAddressNodes list is not empty so continue searching for conflictingAddressNodes\n");984justLookForConflictingAddressNodes = true;985}986}987else if (!justLookForConflictingAddressNodes && nodeMightClobberAccumulatorBeforeUse(node))988{989// RTC 75966: In general, we want to check all nodes until we hit a node for which endAccumulatorSearchOnOperation is true990// (eg. zd2pd; we won't accumulate across a type change). However, if we have already done something that might clobber the991// destination, we still need to search all nodes. So, mustCheckAllNodes is initially false but will be set to true when we992// first encounter any node for which endAccumulatorSearchOnOperation is false. If we've already hit such a node, and we're993// continuing the search to find conflicting address nodes, then mustCheckAllNodes can remain false.994//995// pdstore "a"996// pdsub997// pdconst998// zd2pd999// zdload "a"1000//1001// Previously, the code would hit the zd2pd and stop, incorrectly accumulating into "a" and potentially clobbering "a" before1002// the pdload was evaluated. Now, we'll set mustCheckAllNodes to true when we hit the pdsub, and the code that won't let us1003// accumulate because the pdload "a" isn't on a chain of first children will kick in, and we won't accumulate to "a".1004if (!mustCheckAllNodes && self()->traceBCDCodeGen())1005traceMsg(comp,"\t\tFound a node that could clobber the accumulator before use; must check all children\n");10061007mustCheckAllNodes = true;1008}1009}10101011TR::Node *nodeForAliasing = NULL;1012if (!justLookForConflictingAddressNodes)1013{1014// An already evaluated OpaquePseudoRegister may have had its storageReference updated to point to1015// memory different from that on the node itself (e.g. updated by skipCopyOnStore checks in pdstoreEvaluator1016// or to a temp by ssrClobberEvaluate)1017// It is this updated memory that will be used to generate the actual instructions/memoryReferences therefore it is1018// this memory that must be used for the overlap tests1019if (node->getOpaquePseudoRegister())1020{1021TR_StorageReference *storageRef = node->getOpaquePseudoRegister()->getStorageReference();1022if (self()->traceBCDCodeGen())1023traceMsg(comp,"\t\tfound evaluated reg %s : storageRef #%d ",self()->getDebug()->getName(node->getOpaquePseudoRegister()),storageRef->getReferenceNumber());1024if (storageRef->isTemporaryBased())1025{1026if (self()->traceBCDCodeGen()) traceMsg(comp,"(tempBased)\n");1027TR::SymbolReference *tempSymRef = storageRef->getTemporarySymbolReference();1028// the rest of the code below expects a node but there is not one for tempBased storageRefs so construct/reuse one on the fly1029if (_dummyTempStorageRefNode == NULL)1030{1031_dummyTempStorageRefNode = TR::Node::createWithSymRef(node, comp->il.opCodeForDirectLoad(node->getDataType()), 0, tempSymRef);1032}1033else1034{1035TR::Node::recreate(_dummyTempStorageRefNode, comp->il.opCodeForDirectLoad(node->getDataType()));1036_dummyTempStorageRefNode->setSymbolReference(tempSymRef);1037}1038if (node->getType().isBCD())1039_dummyTempStorageRefNode->setDecimalPrecision(node->getDecimalPrecision());1040else1041TR_ASSERT(false,"unexpected type on node %s (%p)\n",node->getOpCode().getName(),node);1042nodeForAliasing = _dummyTempStorageRefNode;1043}1044else if (storageRef->isNonConstantNodeBased())1045{1046if (self()->traceBCDCodeGen()) traceMsg(comp,"(nodeBased storageRefNode %s (%p))\n",storageRef->getNode()->getOpCode().getName(),storageRef->getNode());1047TR_ASSERT(storageRef->getNode()->getOpCode().hasSymbolReference(),"storageRef node %s (%p) should have a symRef\n",1048storageRef->getNode()->getOpCode().getName(),storageRef->getNode());1049nodeForAliasing = storageRef->getNode();1050}1051else1052{1053if (self()->traceBCDCodeGen()) traceMsg(comp,"(constNodeBased storageRefNode %s (%p))\n",storageRef->getNode()->getOpCode().getName(),storageRef->getNode());1054TR_ASSERT(storageRef->isConstantNodeBased(),"expecting storageRef #%d to be constant node based\n",storageRef->getReferenceNumber());1055}1056}1057else if (node->getOpCodeValue() != TR::loadaddr && // no aliasing implications to a simple loadaddr (it is not a deref)1058node->getOpCode().hasSymbolReference())1059{1060nodeForAliasing = node;1061}10621063}10641065TR::SymbolReference *symRefForAliasing = NULL;1066if (nodeForAliasing)1067symRefForAliasing = nodeForAliasing->getSymbolReference();10681069if (self()->traceBCDCodeGen() && nodeForAliasing && symRefForAliasing)1070traceMsg(comp,"\t\tgot nodeForAliasing %s (%p), symRefForAliasing #%d\n",1071nodeForAliasing->getOpCode().getName(),nodeForAliasing,symRefForAliasing?symRefForAliasing->getReferenceNumber():-1);10721073bool useAliasing = true;1074if (self()->traceBCDCodeGen() && useAliasing && !storeAliases.isZero(comp) && symRefForAliasing)1075{1076if (comp->getOption(TR_TraceAliases) && !symRefForAliasing->getUseDefAliases().isZero(comp))1077{1078traceMsg(comp, "\t\t\taliases for #%d: ",symRefForAliasing->getReferenceNumber());1079TR::SparseBitVector aliases(comp->allocator());1080symRefForAliasing->getUseDefAliases().getAliases(aliases);1081(*comp) << aliases << "\n";1082}1083traceMsg(comp,"\t\t\tsymRefForAliasing #%d isSet in storeAliases = %s\n",1084symRefForAliasing->getReferenceNumber(),storeAliases.contains(symRefForAliasing->getReferenceNumber(), comp) ? "yes":"no");1085}10861087if (symRefForAliasing &&1088loadAndStoreMayOverlap(store, store->getSize(), nodeForAliasing, nodeForAliasing->getSize(), storeAliases)) // if aliases are present node can be of any node type (a call for example)1089{1090// allow expressions like a=a+b but not a=b+a1091if (parent &&1092nodeForAliasing->getOpCode().isLoadVar() &&1093(parent->getOpCode().isBasicPackedArithmetic()) &&1094parent->getFirstChild() == nodeForAliasing &&1095isChainOfFirstChildren &&1096self()->loadOrStoreAddressesMatch(store, nodeForAliasing))1097{1098if (self()->traceBCDCodeGen())1099traceMsg(comp,"\t\t\tallow hint (loadVar case) %s (%p) -- store %s #%d (%p) location = nodeForAliasing %s #%d (%p) location\n",1100parent->getOpCode().getName(),parent,1101store->getOpCode().getName(),store->getSymbolReference()->getReferenceNumber(),store,1102nodeForAliasing->getOpCode().getName(),symRefForAliasing->getReferenceNumber(),nodeForAliasing);11031104return true;1105}1106else if (parent &&1107node->getOpaquePseudoRegister() &&1108nodeForAliasing->getOpCode().isStore() &&1109(parent->getOpCode().isBasicPackedArithmetic()) &&1110parent->getFirstChild() == node &&1111isChainOfFirstChildren &&1112self()->loadOrStoreAddressesMatch(store, nodeForAliasing))1113{1114// zdstoreA #y1115// zdTrMultipleA1116// zdload #y1117//1118// zdstoreB #y <- store1119// zdTrMultipleB <- parent1120// ==>zdTrMultipleA <- node with nodeForAliasing zdstoreA1121if (self()->traceBCDCodeGen())1122traceMsg(comp,"\t\t\tallow hint (storeVar case) %s (%p) -- store %s #%d (%p) location = nodeForAliasing %s #%d (%p) location\n",1123parent->getOpCode().getName(),parent,1124store->getOpCode().getName(),store->getSymbolReference()->getReferenceNumber(),store,1125nodeForAliasing->getOpCode().getName(),symRefForAliasing->getReferenceNumber(),nodeForAliasing);11261127return true;1128}1129// Catch this case1130// pdstore #y1131// pdshr1132// pdload #y1133// where the store is to the leading bytes of the load. See RTC 950731134else if (self()->isAcceptableDestructivePDShiftRight(store, nodeForAliasing))1135{1136if (self()->traceBCDCodeGen())1137traceMsg(comp,"\t\t\tallow hint (pdshr in place case) %s (%p) -- store %s #%d (%p) location = nodeForAliasing %s #%d (%p) location\n",1138parent->getOpCode().getName(),parent,1139store->getOpCode().getName(),store->getSymbolReference()->getReferenceNumber(),store,1140nodeForAliasing->getOpCode().getName(),symRefForAliasing->getReferenceNumber(),nodeForAliasing);11411142return true;1143}1144else if (self()->isAcceptableDestructivePDModPrecision(store, nodeForAliasing))1145{1146if (self()->traceBCDCodeGen())1147traceMsg(comp,"\t\t\tallow hint (pdMod in place case) %s (%p) -- store %s #%d (%p) location = nodeForAliasing %s #%d (%p) location\n",1148parent->getOpCode().getName(),parent,1149store->getOpCode().getName(),store->getSymbolReference()->getReferenceNumber(),store,1150nodeForAliasing->getOpCode().getName(),symRefForAliasing->getReferenceNumber(),nodeForAliasing);11511152return true;1153}1154else1155{1156if (useAliasing && // checking useAliasing here because in the no info case the above loadAndStoreMayOverlap already did the pattern match1157self()->storageMayOverlap(store, store->getSize(), nodeForAliasing, nodeForAliasing->getSize()) == TR_NoOverlap)1158{1159// get a second opinion -- the aliasing says the operations overlap but perhaps it is too conservative1160// so do pattern matching based test to see if the operations are actually disjoint1161if (self()->traceBCDCodeGen())1162traceMsg(comp,"\t\t\tcheck children -- useAccum=true aliasing test failed but pattern match passed for nodeForAliasing %s (%p) with symRefForAliasing #%d\n",1163nodeForAliasing->getOpCode().getName(),nodeForAliasing,symRefForAliasing->getReferenceNumber());1164}1165else1166{1167if (self()->traceBCDCodeGen())1168traceMsg(comp,"\t\t\tdisallow -- useAccum=false for nodeForAliasing %s (%p) with symRefForAliasing #%d\n",1169nodeForAliasing->getOpCode().getName(),nodeForAliasing,symRefForAliasing->getReferenceNumber());1170return false;1171}1172}1173}11741175// no need to descend below a load if loadAndStoreMayOverlap already has returned false -- we have our answer and there1176// is no overlap -- unless mustCheckAllNodes is true (something higher up could clobber the accumulator before it's used,1177// so make sure no one uses it). Never any need to descend below a node that's already been evaluated.1178if (node->getOpCode().isLoad())1179{1180if (self()->traceBCDCodeGen())1181traceMsg(comp,"\t\t\t%s -- found load %s (%p) under store %s (%p)\n", (mustCheckAllNodes ? "check children" : "allow"),1182node->getOpCode().getName(),node,store->getOpCode().getName(),store);1183if (!mustCheckAllNodes)1184return true;1185}1186else if (node->getRegister())1187{1188if (self()->traceBCDCodeGen())1189traceMsg(comp,"\t\t\tallow -- found base case evaluated reg %s on node %s (%p) under store %s (%p)\n",1190self()->getDebug()->getName(node->getRegister()),node->getOpCode().getName(),node,store->getOpCode().getName(),store);1191return true;1192}1193// Check conflicting address nodes on the parent node too1194if (self()->foundConflictingNode(node, conflictingAddressNodes))1195{1196// If the same unevaluated BCD/Aggr node is present in the address child and the value child then prevent the accum flag from being set1197// The problem is that if the store is used an accum then there will be a circular evaluation as the value child will have to evaluate1198// the address child in order to the get accumulated store address1199if (self()->traceBCDCodeGen())1200traceMsg(comp,"\t\t\ta^a: disallow -- useAccum=false because node %s (%p) was found commoned from address tree on %s (%p)\n",1201node->getOpCode().getName(),node,store->getOpCode().getName(),store);1202return false;1203}12041205for (int32_t i = node->getNumChildren() - 1; i >= 0; --i) // recurse from original node and not nodeForAliasing1206{1207TR::Node *child = node->getChild(i);1208if (self()->foundConflictingNode(child, conflictingAddressNodes))1209{1210// If the same unevaluated BCD/Aggr node is present in the address child and the value child then prevent the accum flag from being set1211// The problem is that if the store is used an accum then there will be a circular evaluation as the value child will have to evaluate1212// the address child in order to the get accumulated store address1213if (self()->traceBCDCodeGen())1214traceMsg(comp,"\t\t\ta^a: disallow -- useAccum=false because node %s (%p) was found commoned from address tree on %s (%p)\n",1215child->getOpCode().getName(),child,store->getOpCode().getName(),store);1216return false;1217}1218else1219{1220// If so far we have an unbroken chain of first children, the chain continues if this node is the value child.1221// If this isn't the value child (eg. second operand of an arith op), or the chain was broken, then we definitely1222// can't continue the chain.1223bool continueChainOfFirstChildren = false;1224if (child == node->getValueChild() && isChainOfFirstChildren)1225continueChainOfFirstChildren = true;12261227self()->incAccumulatorNodeUsage();1228if (!canUseSingleStoreAsAnAccumulator(node, child, store, storeAliases, conflictingAddressNodes, justLookForConflictingAddressNodes, continueChainOfFirstChildren, mustCheckAllNodes))1229{1230if (!justLookForConflictingAddressNodes && self()->endHintOnOperation(node))1231{1232if (self()->traceBCDCodeGen())1233traceMsg(comp,"\t\t\ta^a: : endHint mismatch -- node %s (%p)\n",node->getOpCode().getName(),node);1234}1235return false;1236}1237}1238}12391240return true;1241}124212431244// Z1245bool1246J9::Z::CodeGenerator::isAcceptableDestructivePDShiftRight(TR::Node *storeNode, TR::Node * nodeForAliasing)1247{1248TR::Node *shiftNode = NULL;1249TR::Node *loadNode = NULL;12501251if (storeNode->getOpCodeValue() != TR::pdstore && storeNode->getOpCodeValue() != TR::pdstorei)1252return false;12531254if (storeNode->getValueChild()->getOpCodeValue() == TR::pdshr)1255shiftNode = storeNode->getValueChild();12561257if (!shiftNode)1258return false;12591260if (shiftNode->getDecimalRound() != 0)1261return false;12621263if (shiftNode->getChild(0)->getOpCode().isLoadVar())1264loadNode = shiftNode->getChild(0);12651266if (!loadNode)1267return false;12681269if (nodeForAliasing && loadNode != nodeForAliasing)1270return false;12711272return self()->loadOrStoreAddressesMatch(storeNode, loadNode);12731274}127512761277///1278/// pdstorei s=51279/// addr+31280/// pdModPrec s=51281/// pdX (with nodeForAliasing address : addr) s=81282///1283/// The above IL is truncating the sourceNode (pdX) and storing the result back1284/// to the same field right aligned / In this case it is ok to accumulate as an1285/// exact right aligned subfield of the source is being operated on1286///1287bool1288J9::Z::CodeGenerator::isAcceptableDestructivePDModPrecision(TR::Node *storeNode, TR::Node *nodeForAliasing)1289{1290return false; // currently disabled as this leads to a completely overlapping MVC that is even slower than going thru a temp1291// should be re-enabled when redundant MVC removal is complete12921293if (storeNode->getOpCodeValue() != TR::pdstore && storeNode->getOpCodeValue() != TR::pdstorei)1294return false;12951296if (!nodeForAliasing->getOpCode().isIndirect())1297return false;12981299if (storeNode->getValueChild()->getOpCodeValue() != TR::pdModifyPrecision)1300return false;13011302TR::Node *modPrecNode = storeNode->getValueChild();1303TR::Node *sourceNode = modPrecNode->getFirstChild();13041305bool matchSourceAndAliasingNode = false;1306if (sourceNode == nodeForAliasing)1307{1308matchSourceAndAliasingNode = true;1309}1310else if (sourceNode->getOpaquePseudoRegister() &&1311sourceNode->getOpaquePseudoRegister()->getStorageReference()->isNonConstantNodeBased() &&1312sourceNode->getOpaquePseudoRegister()->getStorageReference()->getNode() == nodeForAliasing)1313{1314matchSourceAndAliasingNode = true;1315}13161317if (!matchSourceAndAliasingNode)1318return false;13191320int32_t storePrec = storeNode->getDecimalPrecision();1321int32_t modPrec = modPrecNode->getDecimalPrecision();1322int32_t sourcePrec = sourceNode->getDecimalPrecision();13231324if (storePrec != modPrec)1325return false;13261327if (sourceNode->getSize() != nodeForAliasing->getSize())1328return false;13291330if (modPrec >= sourcePrec) // only handling truncations and this is not a truncation1331return false;13321333int32_t truncatedBytes = nodeForAliasing->getSize() - storeNode->getSize();13341335return self()->validateAddressOneToAddressOffset(truncatedBytes,1336nodeForAliasing->getFirstChild(),1337nodeForAliasing->getSymbolReference()->getOffset(),1338storeNode->getFirstChild(),1339storeNode->getSymbolReference()->getOffset(),1340NULL,1341self()->traceBCDCodeGen()); // _baseLoadsThatAreNotKilled = NULL (not tracking here)1342}13431344// Z1345bool1346J9::Z::CodeGenerator::validateAddressOneToAddressOffset(int32_t expectedOffset,1347TR::Node *addr1,1348int64_t addr1ExtraOffset,1349TR::Node *addr2,1350int64_t addr2ExtraOffset,1351TR::list<TR::Node*> *_baseLoadsThatAreNotKilled,1352bool trace) // _baseLoadsThatAreNotKilled can be NULL1353{1354TR_ASSERT(addr1->getType().isAddress(),"addr1 %s (%p) must an address type\n",addr1->getOpCode().getName(),addr1);1355TR_ASSERT(addr2->getType().isAddress(),"addr2 %s (%p) must an address type\n",addr2->getOpCode().getName(),addr2);13561357bool canGetOffset = false;1358int32_t addrOffset = 0;1359self()->getAddressOneToAddressTwoOffset(&canGetOffset, addr1, addr1ExtraOffset, addr2, addr2ExtraOffset, &addrOffset, _baseLoadsThatAreNotKilled, trace);1360if (!canGetOffset)1361{1362if (trace)1363traceMsg(self()->comp(),"\tvalidateAddressOneToAddressOffset = false : could not compute offset between addr1 %s (%p) (+%lld) and addr2 %s (%p) (+%lld)\n",1364addr1->getOpCode().getName(),addr1,addr1ExtraOffset,addr2->getOpCode().getName(),addr2,addr2ExtraOffset);1365return false;1366}13671368// some examples:1369// pdstorei (highDigitsStore or lowDigitsStore) p=15,s=81370// addr11371// ...1372// The addr2 could be from an MVO:1373// tt1374// mvo1375// dstAddr1376// addr2 = addr1 + expectedOffset (3)1377//1378if (addrOffset != expectedOffset)1379{1380if (trace)1381traceMsg(self()->comp(),"\tvalidateAddressOneToAddressOffset = false : addrOffset %d not the expected value of %d between addr1 %s (%p) (+%lld) and addr2 %s (%p) (+%lld)\n",1382addrOffset,expectedOffset,addr1->getOpCode().getName(),addr1,addr1ExtraOffset,addr2->getOpCode().getName(),addr2,addr2ExtraOffset);1383return false;1384}1385return true;1386}13871388// _baseLoadsThatAreNotKilled is if the caller is doing its own tracking of loads that are not killed between treetops1389// For these loads syntactic address matching of the loads is allowed even if the node pointers themselves are not the same1390// That is1391//1392// load1 "A"1393//1394// intervening treetops checked by caller not to kill "A"1395//1396// load2 "A"1397//1398// load1 and load2 can be matched for symRef and other properties as the caller has checked that "A" is not killed in between the loads1399//1400// resultOffset = address2 - address1 or equivalently address2 = address1 + resultOffset1401//1402// Z1403void1404J9::Z::CodeGenerator::getAddressOneToAddressTwoOffset(bool *canGetOffset,1405TR::Node *addr1,1406int64_t addr1ExtraOffset,1407TR::Node *addr2,1408int64_t addr2ExtraOffset,1409int32_t *offset,1410TR::list<TR::Node*> *_baseLoadsThatAreNotKilled,1411bool trace) // _baseLoadsThatAreNotKilled can be NULL1412{1413TR::Compilation *comp = self()->comp();1414int64_t offset64 = 0;1415*canGetOffset = false;1416*offset=0;1417bool foundOffset = false;14181419if (!foundOffset &&1420self()->addressesMatch(addr1, addr2))1421{1422foundOffset = true;1423offset64 = (addr2ExtraOffset - addr1ExtraOffset);1424if (trace)1425traceMsg(comp,"\t\t(addr2 %s (%p) + %lld) = (addr1 %s (%p) + %lld) + offset (%lld) : node matches case\n",1426addr2->getOpCode().getName(),addr2,addr2ExtraOffset,addr1->getOpCode().getName(),addr1,addr1ExtraOffset,offset64);1427}14281429if (!foundOffset &&1430self()->isSupportedAdd(addr1) &&1431self()->isSupportedAdd(addr2) &&1432self()->addressesMatch(addr1->getFirstChild(), addr2->getFirstChild()))1433{1434if (addr1->getSecondChild()->getOpCode().isIntegralConst() &&1435addr2->getSecondChild()->getOpCode().isIntegralConst())1436{1437foundOffset = true;1438int64_t addr1Offset = addr1->getSecondChild()->get64bitIntegralValue() + addr1ExtraOffset;1439int64_t addr2Offset = addr2->getSecondChild()->get64bitIntegralValue() + addr2ExtraOffset;1440offset64 = (addr2Offset - addr1Offset);1441if (trace)1442traceMsg(comp,"\t\t(addr2 %s (%p) + %lld) = (addr1 %s (%p) + %lld) + offset (%lld) : both adds case\n",1443addr2->getOpCode().getName(),addr2,addr2ExtraOffset,addr1->getOpCode().getName(),addr1,addr1ExtraOffset,offset64);1444}1445}14461447// =>i2a1448//1449// aiadd1450// =>i2a1451// iconst 81452//1453if (!foundOffset &&1454self()->isSupportedAdd(addr2) &&1455addr2->getSecondChild()->getOpCode().isIntegralConst() &&1456self()->addressesMatch(addr1, addr2->getFirstChild()))1457{1458foundOffset = true;1459int64_t addr2Offset = addr2->getSecondChild()->get64bitIntegralValue() + addr2ExtraOffset;1460offset64 = addr2Offset;1461if (trace)1462traceMsg(comp,"\t\t(addr2 %s (%p) + %lld) = addr1 %s (%p) + offset (%lld) : 2nd add case\n",1463addr2->getOpCode().getName(),addr2,addr2ExtraOffset,addr1->getOpCode().getName(),addr1,offset64);1464}14651466if (!foundOffset &&1467_baseLoadsThatAreNotKilled &&1468!_baseLoadsThatAreNotKilled->empty() &&1469self()->isSupportedAdd(addr1) &&1470self()->isSupportedAdd(addr2) &&1471addr1->getSecondChild()->getOpCode().isIntegralConst() &&1472addr2->getSecondChild()->getOpCode().isIntegralConst())1473{1474TR::Node *baseLoad1 = self()->getAddressLoadVar(addr1->getFirstChild(), trace);1475TR::Node *baseLoad2 = self()->getAddressLoadVar(addr2->getFirstChild(), trace);14761477if (baseLoad1 != NULL && baseLoad2 != NULL &&1478(std::find(_baseLoadsThatAreNotKilled->begin(),_baseLoadsThatAreNotKilled->end(), baseLoad1) !=1479_baseLoadsThatAreNotKilled->end()) &&1480self()->directLoadAddressMatch(baseLoad1, baseLoad2, trace))1481{1482foundOffset = true;1483int64_t addr1Offset = addr1->getSecondChild()->get64bitIntegralValue() + addr1ExtraOffset;1484int64_t addr2Offset = addr2->getSecondChild()->get64bitIntegralValue() + addr2ExtraOffset;1485offset64 = (addr2Offset - addr1Offset);1486if (trace)1487traceMsg(comp,"\t\t(addr2 %s (%p) + %lld) = (addr1 %s (%p) + %lld) + offset (%lld) : baseLoad1 %s (%p) in notKilledList, both adds case\n",1488addr2->getOpCode().getName(),addr2,addr2ExtraOffset,1489addr1->getOpCode().getName(),addr1,addr1ExtraOffset,1490offset64,1491baseLoad1->getOpCode().getName(),baseLoad1);1492}1493}14941495if (!foundOffset ||1496self()->isOutOf32BitPositiveRange(offset64, trace))1497{1498return;1499}15001501*canGetOffset = true;1502*offset = (int32_t)offset64;15031504return;1505}15061507TR::Node *1508J9::Z::CodeGenerator::getAddressLoadVar(TR::Node *node, bool trace)1509{1510// allow a non truncating address cast from integral types1511if ((node->getOpCodeValue() == TR::i2a || node->getOpCodeValue() == TR::l2a) &&1512(node->getSize() == node->getFirstChild()->getSize()))1513{1514node = node->getFirstChild();1515}15161517if (node->getOpCode().isLoadVar())1518return node;1519else1520return NULL;1521}15221523void1524J9::Z::CodeGenerator::addStorageReferenceHints(TR::Node *node)1525{1526TR::list<TR::Node*> leftMostNodesList(getTypedAllocator<TR::Node*>(self()->comp()->allocator()));1527TR::StackMemoryRegion stackMemoryRegion(*self()->trMemory());15281529self()->markStoreAsAnAccumulator(node);15301531TR::Node *bestNode = NULL;1532if (self()->traceBCDCodeGen())1533traceMsg(self()->comp(),"\n--start-- examining cg treeTop %s (%p)\n",node->getOpCode().getName(),node);1534int32_t storeSize = 0;1535self()->examineNode(NULL, node, bestNode, storeSize, leftMostNodesList);1536if (self()->traceBCDCodeGen())1537traceMsg(self()->comp(),"--end-- examining cg treeTop %s (%p)\n\n",node->getOpCode().getName(),node);15381539}154015411542// Z1543void1544J9::Z::CodeGenerator::examineNode(1545TR::Node *parent,1546TR::Node *node,1547TR::Node *&bestNode,1548int32_t &storeSize,1549TR::list<TR::Node*> &leftMostNodesList)1550{1551TR::Compilation *comp = self()->comp();1552TR::Node *checkNode = node;1553bool isAccumStore = node->getOpCode().canUseStoreAsAnAccumulator();1554bool isLoad = node->getOpCode().isLoad();1555bool endHintOnNode = self()->endHintOnOperation(node) || isLoad;1556bool isConversionToNonAggrOrNonBCD = node->getOpCode().isBCDToNonBCDConversion();15571558if (isAccumStore)1559storeSize = node->getSize();15601561if (!node->hasBeenVisitedForHints()) // check other nodes using hasBeenVisitedForHints1562{1563if (self()->traceBCDCodeGen())1564traceMsg(comp,"\tvisiting node - %s (%p), bestNode - %s (%p) (endHintOnNode=%s)\n",1565node->getOpCode().getName(),node,bestNode?bestNode->getOpCode().getName():"NULL",bestNode,endHintOnNode?"true":"false");15661567node->setHasBeenVisitedForHints();15681569bool nodeCanHaveHint = node->getOpCode().canHaveStorageReferenceHint();1570bool isInterestingStore = nodeCanHaveHint || isAccumStore || isConversionToNonAggrOrNonBCD;1571bool isNonOverflowPDShift = node->getOpCode().isPackedShift() && node->getOpCodeValue() != TR::pdshlOverflow;1572bool isSafeWideningConversion =1573TR::ILOpCode::isPackedConversionToWiderType(node->getOpCodeValue()) && node->getDecimalPrecision() <= node->getFirstChild()->getDecimalPrecision();15741575if (isInterestingStore &&1576(isNonOverflowPDShift ||1577isSafeWideningConversion ||1578node->getOpCodeValue() == TR::pdModifyPrecision) &&1579(node->getFirstChild()->getReferenceCount() == 1))1580{1581// pdshl/pdModPrec nodes take care of the zeroing the top nibble in the pad byte for the final shifted value (so we can skip clearing1582// the nibble in the intermediate arithmetic result.1583// non-widening pd2zd nodes only select the exact number of digits so the top nibble will be ignored for even precision values1584// If the child has a refCount > 1 then subsequent uses may not also be have a pdshl/pdModPrec/pd2zd parent so we must conservatively clear the nibble right1585// after the arithmetic operation.1586// TODO: if all subsequent uses are also under truncating pdshl/pdModPrec nodes then the clearing can also be skipped -- but finding this out will1587// require more analysis1588node->getFirstChild()->setSkipPadByteClearing(true);1589}15901591if (nodeCanHaveHint &&1592bestNode &&1593node->getStorageReferenceSize() > bestNode->getStorageReferenceSize() && // end hint before1594endHintOnNode && // end hint after1595!leftMostNodesList.empty())1596{15971598// when the current node will end a hint before and after then tag any nodes above this node with the store hint so it can store into the final receiver1599// pdstore <- hint1600// pdshr <- tag this list node with the pdstore hint1601// ud2pd <- node (endHintOnNode=true and ud2pd size > store size) -- alloc a new temp1602//1603// when the current node only ends the hint after (such as a zd2pd) then delay calling processNodeList so the zd2pd will also get the store hint1604// pdstore <- hint1605// pdshr <- tag this list node with the pdstore hint1606// zd2pd <- node (endHintOnNode=true and ud2pd size <= store size) <- tag this list node with the pdstore hint1607//1608if (self()->traceBCDCodeGen())1609traceMsg(comp,"\t\tendHintOnNode=true so call processNodeList before examining ending hint node %p\n",node);1610// processNodeList will reset storeSize so save and restore the current storeSize value so it will persist for the current node1611// pd2ud <-sets storeSize to 161612// zd2pd <-node (should also use storeSize=16)1613// by persisting it for the zd2pd node this operation can initialize up to 16 bytes for its parent1614int32_t savedStoreSize = storeSize;1615self()->processNodeList(bestNode, storeSize, leftMostNodesList);1616storeSize = savedStoreSize;1617}16181619TR::ILOpCodes opCode = node->getOpCodeValue();1620if (isInterestingStore)1621{1622// TODO: if a pdstore is to an auto then continually increase the size of this auto so it is the biggest on the left1623// most subtree (i.e. force it to be the bestNode)1624if ((bestNode == NULL) ||1625(node->getStorageReferenceSize() > bestNode->getStorageReferenceSize()) ||1626(self()->nodeRequiresATemporary(node) && bestNode->getOpCode().isStore() && !self()->isAcceptableDestructivePDShiftRight(bestNode, NULL /* let the function find the load node */)))1627{1628if (!isAccumStore || node->useStoreAsAnAccumulator())1629{1630bestNode = node;1631if (self()->traceBCDCodeGen())1632{1633if (isAccumStore)1634traceMsg(comp,"\t\tfound new store (canUse = %s) bestNode - %s (%p) with actual size %d and storageRefResultSize %d\n",1635bestNode->useStoreAsAnAccumulator() ? "yes":"no", bestNode->getOpCode().getName(),bestNode, bestNode->getSize(),bestNode->getStorageReferenceSize());1636else1637traceMsg(comp,"\t\tfound new non-store bestNode - %s (%p) (isConversionToNonAggrOrNonBCD=%s, isForcedTemp=%s) with actual size %d and storageRefResultSize %d\n",1638bestNode->getOpCode().getName(),bestNode,isConversionToNonAggrOrNonBCD?"yes":"no",self()->nodeRequiresATemporary(node)?"yes":"no",bestNode->getSize(),bestNode->getStorageReferenceSize());1639}1640}1641}16421643if (!isAccumStore && !isConversionToNonAggrOrNonBCD && !isLoad) // don't add stores or bcd2x or load nodes to the list1644{1645if (self()->traceBCDCodeGen())1646traceMsg(comp,"\t\tadd node - %s (%p) to list\n",node->getOpCode().getName(),node);1647leftMostNodesList.push_front(node);1648}1649}1650// end hints on some nodes so1651// 1) the same storageReference isn't used for both sides of a zd2pd or pd2zd conversion1652// 2) a storageReference for a commoned node is not used 'across' a conversion:1653// pdadd1654// i2pd :: end hint here so the commoned pdshr storageReference is not used for the i2pd/pdadd subexpression1655// iadd1656// pd2i :: start new hint here1657// ==>pdshr1658if (endHintOnNode)1659{1660self()->processNodeList(bestNode, storeSize, leftMostNodesList);1661switch (node->getOpCodeValue())1662{1663case TR::pd2ud:1664case TR::pd2udsl:1665case TR::pd2udst:1666storeSize = TR::DataType::packedDecimalPrecisionToByteLength(node->getDecimalPrecision()); // i.e. the size of the result in packed bytes (node->getSize is in ud bytes)1667break;1668default:1669break;1670}16711672if (self()->traceBCDCodeGen())1673traceMsg(comp,"\t\tendHintOnNode=true for node - %s (%p) setting storeSize to %d\n",node->getOpCode().getName(),node,storeSize);1674}16751676// visit value child first for indirect stores so the possible store hint is not lost on the address child1677if (node->getOpCode().isStoreIndirect())1678{1679int32_t valueChildIndex = node->getOpCode().isIndirect() ? 1 : 0;1680self()->examineNode(node, node->getChild(valueChildIndex), bestNode, storeSize, leftMostNodesList);1681for (int32_t i = 0; i < node->getNumChildren(); i++)1682{1683if (i != valueChildIndex)1684self()->examineNode(node, node->getChild(i), bestNode, storeSize, leftMostNodesList);1685}1686}1687else1688{1689for (int32_t i = 0; i < node->getNumChildren(); i++)1690self()->examineNode(node, node->getChild(i), bestNode, storeSize, leftMostNodesList);1691}1692}1693else1694{1695checkNode = parent;1696if (self()->traceBCDCodeGen())1697traceMsg(comp,"\tnot descending node - %s (%p) because it has been visited already\n",node->getOpCode().getName(),node);1698TR_OpaquePseudoRegister *reg = node->getOpCodeValue() == TR::BBStart ? NULL : node->getOpaquePseudoRegister();1699if (reg)1700{1701if (self()->traceBCDCodeGen())1702traceMsg(comp,"\t\tnode - %s (%p) with reg %s is an already evaluated bcd node with refCount=%d\n",1703node->getOpCode().getName(),node,self()->getDebug()->getName(static_cast<TR::Register*>(reg)),node->getReferenceCount());17041705if (!reg->getStorageReference()->isTemporaryBased())1706{1707if (self()->traceBCDCodeGen())1708traceMsg(comp,"\t\t\treg storageRef #%d is not a temp so do not update bestNode with node - %s (%p) but do reset reg %s isInit to false\n",1709reg->getStorageReference()->getReferenceNumber(),node->getOpCode().getName(),node,self()->getDebug()->getName(reg));1710// setting to false here forces the commoned expression to re-initialize the register to the new hint for one of two reasons:1711// 1) functionally required for non-temps as these storage references can not be clobbered (they are program variables or constants)1712// 2) for perf to avoid a clobber evaluate (temp to temp move) of the already initialized reg -- instead begin using the store hint and leave the temp alone1713reg->setIsInitialized(false);1714}1715else if (bestNode && bestNode->getOpCode().isStore() && node->getReferenceCount() >= 1) // use >= 1 so useNewStoreHint can always be used for ZAP widening on initializations1716{1717if (self()->traceBCDCodeGen())1718traceMsg(comp,"\t\t\treg storageRef #%d with a store bestNode so do not update bestNode with node - %s (%p) refCount=%d\n",1719reg->getStorageReference()->getReferenceNumber(),node->getOpCode().getName(),node,node->getReferenceCount());1720}1721else1722{1723if (self()->traceBCDCodeGen())1724traceMsg(comp,"\t\t\treg storageRef #%d is a final-use (refCount==1) temp so set bestNode to node - %s (%p) reg->isInit=%s (and reuse temp storageRef))\n",1725reg->getStorageReference()->getReferenceNumber(),node->getOpCode().getName(),node,reg->isInitialized()?"yes":"no");1726if (bestNode)1727storeSize = bestNode->getSize();1728bestNode = node;1729}1730}1731}17321733if ((leftMostNodesList.empty()) || (checkNode == leftMostNodesList.front())) // just finished with a left most path and there are nodes to tag with hint1734// just finished with a left most but there are no nodes to tag with a hint1735{1736if (self()->traceBCDCodeGen())1737{1738traceMsg(comp,"\t\tdetected the end of a left most path because ");1739if ((!leftMostNodesList.empty()) && (checkNode == leftMostNodesList.front()))1740traceMsg(comp,"checkNode - %s (%p) matches head of list %p\n",checkNode?checkNode->getOpCode().getName():"NULL",checkNode,leftMostNodesList.front());1741else if (leftMostNodesList.empty()) // i.e. bestNode is your only node so you haven't seen any other higher up nodes to add to the list1742traceMsg(comp,"bestNode - %s (%p) is set and the head of list is NULL for node - %s (%p)\n",1743(bestNode ? bestNode->getOpCode().getName():"NULL"),bestNode,node->getOpCode().getName(),node);1744else1745traceMsg(comp,"of an unknown reason for node - %s (%p) (FIXME - add a reason) \n",node->getOpCode().getName(),node);1746}1747if (leftMostNodesList.empty())1748{1749if (self()->traceBCDCodeGen())1750traceMsg(comp,"\tleftMostNodesList is empty so clear bestNode - %s (%p->NULL) for current node - %s (%p)\n",1751bestNode?bestNode->getOpCode().getName():"NULL",bestNode,node->getOpCode().getName(),node);1752bestNode = NULL;1753storeSize = 0;1754}1755else1756{1757if (self()->traceBCDCodeGen())1758traceMsg(comp,"\tcalling processNodeList with bestNode - %s (%p) because leftMostNodesList is not empty for current node - %s (%p)\n",1759bestNode?bestNode->getOpCode().getName():"NULL",bestNode,node->getOpCode().getName(),node);1760self()->processNodeList(bestNode, storeSize, leftMostNodesList);1761}1762}1763}176417651766// Z1767void1768J9::Z::CodeGenerator::processNodeList(1769TR::Node *&bestNode,1770int32_t &storeSize,1771TR::list<TR::Node*> &leftMostNodesList)1772{1773TR::Compilation *comp = self()->comp();17741775if (bestNode)1776{1777bool keepTrackOfSharedNodes = false;1778TR::SymbolReference *memSlotHint = NULL;1779TR_StorageReference *storageRefHint = NULL;1780if (bestNode->getOpaquePseudoRegister())1781{1782TR_OpaquePseudoRegister *reg = bestNode->getOpaquePseudoRegister();1783if (self()->traceBCDCodeGen())1784traceMsg(comp,"\t\tbestNode - %s (%p) already has a register (%s) so use reg->getStorageReference #%d and %s\n",1785bestNode->getOpCode().getName(),bestNode,self()->getDebug()->getName(reg),reg->getStorageReference()->getReferenceNumber(),1786self()->getDebug()->getName(reg->getStorageReference()->getSymbol()));1787if (reg->getStorageReference()->isTemporaryBased() &&1788storeSize > reg->getLiveSymbolSize())1789{1790if (self()->traceBCDCodeGen())1791traceMsg(comp,"\t\treg->getStorageReference #%d is tempBased and requested storeSize %d > regLiveSymSize %d so increase tempSize\n",1792reg->getStorageReference()->getReferenceNumber(),storeSize,reg->getLiveSymbolSize());1793reg->increaseTemporarySymbolSize(storeSize-reg->getLiveSymbolSize());1794}1795storageRefHint = reg->getStorageReference();1796}1797else if (bestNode->getOpCode().isStore())1798{1799if (self()->traceBCDCodeGen())1800traceMsg(comp,"\t\tbestNode - %s (%p) is a store so create a new node based storage reference #%d\n",1801bestNode->getOpCode().getName(),bestNode,bestNode->getSymbolReference()->getReferenceNumber());1802storageRefHint = TR_StorageReference::createNodeBasedHintStorageReference(bestNode, comp);1803}1804else1805{1806if (!leftMostNodesList.empty())1807{1808int32_t bestNodeSize = bestNode->getStorageReferenceSize();1809int32_t tempSize = std::max(storeSize, bestNodeSize);1810if (self()->traceBCDCodeGen())1811{1812traceMsg(comp,"\t\tbestNode - %s (%p) is a BCD arithmetic or conversion op (isBCDToNonBCDConversion %s) and list is not empty so allocate a new temporary based storage reference\n",1813bestNode->getOpCode().getName(),bestNode,bestNode->getOpCode().isBCDToNonBCDConversion()?"yes":"no");1814traceMsg(comp,"\t\tsize of temp is max(storeSize,bestNodeSize) = max(%d,%d) = %d\n", storeSize, bestNodeSize, tempSize);1815}1816storageRefHint = TR_StorageReference::createTemporaryBasedStorageReference(tempSize, comp);1817if (tempSize == bestNodeSize)1818{1819keepTrackOfSharedNodes=true;1820if (self()->traceBCDCodeGen())1821traceMsg(comp,"\t\tsetting keepTrackOfSharedNodes=true because hintSize is based on a non-store operation (bestNode %s - %p)\n",1822bestNode->getOpCode().getName(),bestNode);1823}1824}1825else if (self()->traceBCDCodeGen())1826{1827traceMsg(comp,"\t\tbestNode %p is a BCD arithmetic or conversion op but list is empty so do not allocate a new temporary based storage reference\n",bestNode);1828}1829}1830for (auto listIt = leftMostNodesList.begin(); listIt != leftMostNodesList.end(); ++listIt)1831{1832TR_ASSERT(!(*listIt)->getOpCode().isStore(),"stores should not be in the list\n");1833if (self()->traceBCDCodeGen())1834traceMsg(comp,"\ttag (*listIt) - %s (%p) with storageRefHint #%d (%s)\n",1835(*listIt)->getOpCode().getName(),*listIt,storageRefHint->getReferenceNumber(),self()->getDebug()->getName(storageRefHint->getSymbol()));1836(*listIt)->setStorageReferenceHint(storageRefHint);1837if (keepTrackOfSharedNodes)1838storageRefHint->addSharedNode(*listIt);183918401841// If a child node has lower precision than the storage hint make sure its skipPadByteClearing is off1842if (TR::ILOpCode::isPackedConversionToWiderType((*listIt)->getOpCodeValue()))1843{1844TR::Node *firstChild = (*listIt)->getFirstChild();1845if (firstChild->chkSkipPadByteClearing() &&1846storageRefHint->getSymbolSize() > TR::DataType::getSizeFromBCDPrecision((*listIt)->getDataType(), firstChild->getDecimalPrecision()))1847{1848if (self()->traceBCDCodeGen())1849traceMsg(comp,"\tUnset skipPadByteClearing on node %s (%p): storage ref hint has size %d and converted node has size %d\n",1850firstChild->getOpCode().getName(),firstChild,storageRefHint->getSymbolSize(),TR::DataType::getSizeFromBCDPrecision((*listIt)->getDataType(), firstChild->getDecimalPrecision()));1851firstChild->setSkipPadByteClearing(false);1852}1853}1854}1855}18561857storeSize = 0;1858bestNode = NULL;1859leftMostNodesList.clear();1860}1861186218631864// Z1865void1866J9::Z::CodeGenerator::markStoreAsAnAccumulator(TR::Node *node)1867{1868TR::Compilation *comp = self()->comp();1869LexicalTimer foldTimer("markStoreAsAccumulator", comp->phaseTimer());18701871if (!node->getOpCode().isStore())1872return;18731874if (self()->traceBCDCodeGen())1875traceMsg(comp,"markStoreAsAnAccumulator for node %s (%p) - useAliasing=%s\n",node->getOpCode().getName(),node,"yes");18761877TR::list<TR::Node*> conflictingAddressNodes(getTypedAllocator<TR::Node*>(comp->allocator()));18781879if (node->getOpCode().canUseStoreAsAnAccumulator())1880{1881TR_UseDefAliasSetInterface aliases = node->getSymbolReference()->getUseDefAliases();18821883if (self()->traceBCDCodeGen())1884{1885traceMsg(comp, "\nUseAsAnAccumulator check for store %s (%p) #%d",node->getOpCode().getName(),node,node->getSymbolReference()->getReferenceNumber());1886if (comp->getOption(TR_TraceAliases) && !aliases.isZero(comp))1887{1888traceMsg(comp, ", storeAliases : ");1889TR::SparseBitVector printAliases(comp->allocator());1890aliases.getAliases(printAliases);1891(*comp) << printAliases;1892}1893traceMsg(comp,"\n");1894}18951896if (node->getOpCode().isIndirect())1897{1898conflictingAddressNodes.clear();1899if (self()->traceBCDCodeGen())1900traceMsg(comp,"\tlook for conflicting nodes in address subtree starting at %s (%p)\n",node->getFirstChild()->getOpCode().getName(),node->getFirstChild());1901self()->collectConflictingAddressNodes(node, node->getFirstChild(), &conflictingAddressNodes);1902}19031904if (self()->traceBCDCodeGen())1905traceMsg(comp,"\n\texamine nodes in value subtree starting at %s [%s]\n",node->getValueChild()->getOpCode().getName(),node->getValueChild()->getName(comp->getDebug()));19061907self()->setAccumulatorNodeUsage(0);1908// parent=NULL, justLookForConflictingAddressNodes=false, isChainOfFirstChildren=true, mustCheckAllNodes=false1909bool canUse = self()->canUseSingleStoreAsAnAccumulator(NULL, node->getValueChild(), node, aliases, &conflictingAddressNodes, false, true, false);1910if (self()->traceBCDCodeGen())1911traceMsg(comp,"\tfinal accumulatorNodeUsage = %d/%d\n",self()->getAccumulatorNodeUsage(),TR_ACCUMULATOR_NODE_BUDGET);1912self()->setAccumulatorNodeUsage(0);19131914if (canUse &&1915performTransformation(comp, "%sset new UseStoreAsAnAccumulator=true on %s [%s]\n", OPT_DETAILS, node->getOpCode().getName(),node->getName(comp->getDebug())))1916{1917node->setUseStoreAsAnAccumulator(canUse);1918}1919}1920}192119221923/// If true, this node's operation might overwrite an accumulator by evaluating one child before loading1924/// the value from another, if we choose to accumulate. (Accumulation may still be safe, but we'll need1925/// to investigate all child nodes to be sure).1926/// eg.1927//1928/// pdstore "a"1929/// pdsub1930/// pdconst1931/// zd2pd1932/// zdload "a"1933///1934/// Accumulating to "a" is incorrect here because the pdconst will get evaluated into "a" before the1935/// zdload is evaluated, so when we encounter the pdsub, we need to check all children.1936///1937bool nodeMightClobberAccumulatorBeforeUse(TR::Node *node)1938{1939TR_ASSERT(node != NULL, "NULL node in nodeMightClobberAccumulatorBeforeUse\n");19401941if (!node->getType().isBCD())1942return false;19431944if (node->getOpCode().isAnyBCDArithmetic())1945return true;19461947if (node->getNumChildren() == 1)1948return false;19491950if (node->getOpCode().isShift()1951|| node->getOpCode().isConversion()1952|| node->getOpCode().isSetSign()1953|| node->getOpCode().isSetSignOnNode()1954|| node->getOpCode().isExponentiation())1955return false;19561957return true;1958}19591960void1961J9::Z::CodeGenerator::correctBadSign(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, TR::MemoryReference *memRef)1962{1963if (reg && reg->hasKnownBadSignCode())1964{1965int32_t sign = 0xf; // can choose any valid sign here but 0xf will be the cheapest to set1966if (self()->traceBCDCodeGen())1967traceMsg(self()->comp(),"\tcorrectBadSign node %p: reg %s hasKnownBadSignCode()=true so force sign to a valid sign 0x%x\n",node,self()->getDebug()->getName(reg),sign);1968self()->genSignCodeSetting(node, NULL, endByte, generateS390RightAlignedMemoryReference(*memRef, node, 0, self()), sign, reg, 0, false); // numericNibbleIsZero=false1969}1970}19711972int32_t1973J9::Z::CodeGenerator::genSignCodeSetting(TR::Node *node, TR_PseudoRegister *targetReg, int32_t endByte, TR::MemoryReference *targetMR, int32_t sign, TR_PseudoRegister *srcReg, int32_t digitsToClear, bool numericNibbleIsZero)1974{1975TR::CodeGenerator *cg = self();1976TR::Compilation *comp = cg->comp();1977int32_t digitsCleared = 0;1978int32_t signCodeOffset = TR::DataType::getSignCodeOffset(node->getDataType(), endByte);19791980TR_ASSERT(sign == TR::DataType::getIgnoredSignCode() || (sign >= TR::DataType::getFirstValidSignCode() && sign <= TR::DataType::getLastValidSignCode()),"unexpected sign of 0x%x in genSignCodeSetting\n",sign);19811982if (sign == TR::DataType::getIgnoredSignCode())1983{1984if (self()->traceBCDCodeGen())1985traceMsg(comp,"\tgenSignCodeSetting: node=%p, sign==ignored case srcReg %s, targetReg %s, srcReg->isSignInit %d, targetReg->isSignInit %d\n",1986node,srcReg?cg->getDebug()->getName(srcReg):"NULL",targetReg?cg->getDebug()->getName(targetReg):"NULL",srcReg?srcReg->signStateInitialized():0,targetReg?targetReg->signStateInitialized():0);1987if (targetReg != srcReg)1988{1989if (targetReg)1990{1991if (srcReg)1992{1993targetReg->transferSignState(srcReg, true); // digitsLost=true -- conservatively set as this may be part of a truncation1994}1995else1996{1997targetReg->setHasKnownBadSignCode();1998if (cg->traceBCDCodeGen())1999traceMsg(comp,"\tsign==ignored case and srcReg==NULL so setHasKnownBadSignCode=true on targetReg %s\n",cg->getDebug()->getName(targetReg));2000}2001}2002}2003return digitsCleared;2004}20052006int32_t srcSign = TR::DataType::getInvalidSignCode();2007if (srcReg)2008{2009if (srcReg->hasKnownOrAssumedSignCode())2010srcSign = srcReg->getKnownOrAssumedSignCode();2011else if (srcReg->hasTemporaryKnownSignCode())2012srcSign = srcReg->getTemporaryKnownSignCode();2013}20142015sign = (sign&0xF);2016bool isEffectiveNop = (srcSign == sign);20172018if (self()->traceBCDCodeGen())2019traceMsg(comp,"\tgenSignCodeSetting: node=%p, endByte=%d, sign=0x%x, signCodeOffset=%d, srcReg=%s, digitsToClear=%d, numericNibbleIsZero=%s (srcSign=0x%x, hasCleanSign=%s, hasPrefSign=%s, isEffectiveNop=%s)\n",2020node,endByte,sign,signCodeOffset,srcReg ? self()->getDebug()->getName(srcReg):"NULL",digitsToClear,numericNibbleIsZero ?"yes":"no",2021srcSign,srcReg && srcReg->hasKnownOrAssumedCleanSign()?"true":"false",2022srcReg && srcReg->hasKnownOrAssumedPreferredSign()?"true":"false",isEffectiveNop?"yes":"no");20232024if (isEffectiveNop)2025{2026if (srcReg && targetReg)2027targetReg->transferSignState(srcReg, true); // digitsLost=true -- conservatively set as this may be part of a truncation2028if (targetReg->signStateInitialized() == false) // when srcSign is from getTemporaryKnownSignCode()2029targetReg->setKnownSignCode(srcSign);2030return digitsCleared;2031}20322033TR::MemoryReference *signCodeMR = generateS390LeftAlignedMemoryReference(*targetMR, node, 0, cg, endByte-signCodeOffset);20342035// If the sign code is 0xc,0xd,0xe or 0xf then the top two bits are already set so an initial OI is not required and only an NI is required for some sign values2036bool topTwoBitsSet = false;2037bool knownSignIs0xC = false;2038bool knownSignIs0xF = false;2039if (srcReg)2040{2041topTwoBitsSet = srcReg->hasKnownOrAssumedCleanSign() || srcReg->hasKnownOrAssumedPreferredSign();2042if (srcSign != TR::DataType::getInvalidSignCode())2043{2044if (srcSign >= 0xc && srcSign <= 0xf)2045topTwoBitsSet = true;2046knownSignIs0xC = (srcSign == 0xc);2047knownSignIs0xF = (srcSign == 0xf);2048}2049}20502051TR::DataType dt = node->getDataType();2052TR_ASSERT(dt == TR::PackedDecimal || dt == TR::ZonedDecimal || dt == TR::ZonedDecimalSignLeadingEmbedded,2053"genSignCodeSetting only valid for embedded sign types and not type %s\n",dt.toString());2054bool isPacked = (dt == TR::PackedDecimal);20552056intptr_t litPoolOffset;2057switch (dt)2058{2059case TR::PackedDecimal:2060case TR::ZonedDecimal:2061case TR::ZonedDecimalSignLeadingEmbedded:2062{2063if (isPacked && digitsToClear >= 3)2064{2065int32_t bytesToSet = (digitsToClear+1)/2;2066int32_t leftMostByte = 0;2067TR::InstOpCode::Mnemonic op = TR::InstOpCode::bad;2068switch (bytesToSet)2069{2070case 2:2071case 3:2072op = TR::InstOpCode::MVHHI;2073digitsCleared = 3;2074leftMostByte = 2;2075break;2076case 4:2077case 5:2078case 6:2079case 7:2080op = TR::InstOpCode::MVHI;2081digitsCleared = 7;2082leftMostByte = 4;2083break;2084default:2085TR_ASSERT(bytesToSet >= 8,"unexpected bytesToSet value (%d) -- should be >= 8\n",bytesToSet);2086op = TR::InstOpCode::MVGHI;2087digitsCleared = 15;2088leftMostByte = 8;2089break;2090}2091signCodeMR->setLeftMostByte(leftMostByte);2092generateSILInstruction(cg, op, node, signCodeMR, sign);2093if (self()->traceBCDCodeGen())2094traceMsg(comp,"\t\tusing %d byte move imm (%s) for sign setting : set digitsCleared=%d\n",2095leftMostByte,leftMostByte==8?"MVGHI":(leftMostByte==4)?"MVHI":"MVHHI",digitsCleared);2096}2097else if (numericNibbleIsZero || digitsToClear >= 1)2098{2099generateSIInstruction(cg, TR::InstOpCode::MVI, node, signCodeMR, isPacked ? sign : sign << 4);2100digitsCleared = 1;2101if (self()->traceBCDCodeGen()) traceMsg(comp,"\t\tusing MVI for sign setting : set digitsCleared=1\n");2102}2103else2104{2105if (knownSignIs0xF)2106{2107generateSIInstruction(cg, TR::InstOpCode::NI, node, signCodeMR, isPacked ? (0xF0 | sign) : (0x0F | (sign<<4)));2108}2109else if (topTwoBitsSet && sign == 0xc)2110{2111generateSIInstruction(cg, TR::InstOpCode::NI, node, signCodeMR, isPacked ? 0xFC : 0xCF);2112}2113else if (knownSignIs0xC && sign == 0xd)2114{2115generateSIInstruction(cg, TR::InstOpCode::OI, node, signCodeMR, isPacked ? 0x01 : 0x10);2116}2117else if (sign == 0xf)2118{2119generateSIInstruction(cg, TR::InstOpCode::OI, node, signCodeMR, isPacked ? 0x0F : 0xF0);2120}2121else2122{2123{2124generateSIInstruction(cg, TR::InstOpCode::OI, node, signCodeMR, isPacked ? 0x0F : 0xF0);2125generateSIInstruction(cg, TR::InstOpCode::NI, node, generateS390LeftAlignedMemoryReference(*signCodeMR,2126node,21270,2128cg,2129signCodeMR->getLeftMostByte()),2130isPacked ? (0xF0 | sign) : (0x0F | (sign<<4)));2131}2132}2133}2134}2135break;2136default:2137TR_ASSERT(false,"dt %s not handled yet in genSignCodeSetting\n",node->getDataType().toString());2138}21392140if (targetReg)2141targetReg->setKnownSignCode(sign);21422143return digitsCleared;2144}21452146/**2147* Input reg can be NULL (when called for a store node or other type that does not return a register)2148*/2149void2150J9::Z::CodeGenerator::widenBCDValue(TR::Node *node, TR_PseudoRegister *reg, int32_t startByte, int32_t endByte, TR::MemoryReference *targetMR)2151{2152TR_ASSERT(node->getType().isBCD(),2153"widenBCDValue is only valid for BCD types (type=%s)\n",node->getDataType().toString()); TR_ASSERT(targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"widenBCDValue is only valid for aligned memory references\n");2154TR_ASSERT(endByte >= startByte,"endByte (%d) >= startByte (%d) in widenBCDValue\n",endByte,startByte);21552156int32_t bytesToClear = endByte - startByte;2157if (bytesToClear > 0)2158{2159switch (node->getDataType())2160{2161case TR::PackedDecimal:2162self()->genZeroLeftMostPackedDigits(node, reg, endByte, bytesToClear*2, targetMR);2163break;2164case TR::ZonedDecimal:2165case TR::ZonedDecimalSignTrailingSeparate:2166self()->genZeroLeftMostZonedBytes(node, reg, endByte, bytesToClear, targetMR);2167break;2168case TR::ZonedDecimalSignLeadingEmbedded:2169self()->widenZonedSignLeadingEmbedded(node, reg, endByte, bytesToClear, targetMR);2170break;2171case TR::ZonedDecimalSignLeadingSeparate:2172self()->widenZonedSignLeadingSeparate(node, reg, endByte, bytesToClear, targetMR);2173break;2174case TR::UnicodeDecimal:2175case TR::UnicodeDecimalSignTrailing:2176self()->genZeroLeftMostUnicodeBytes(node, reg, endByte, bytesToClear, targetMR);2177break;2178case TR::UnicodeDecimalSignLeading:2179self()->widenUnicodeSignLeadingSeparate(node, reg, endByte, bytesToClear, targetMR);2180break;2181default:2182TR_ASSERT(false,"unsupported dataType %s in widenBCDValue\n",node->getDataType().toString());2183}2184}2185}218621872188/**2189* Input reg can be NULL (when called for a store node or other type that does not return a register)2190*/2191void2192J9::Z::CodeGenerator::widenBCDValueIfNeeded(TR::Node *node, TR_PseudoRegister *reg, int32_t startByte, int32_t endByte, TR::MemoryReference *targetMR)2193{2194TR_ASSERT(node->getType().isBCD(),2195"widenBCDValue is only valid for BCD types (type=%s)\n",node->getDataType().toString());2196TR_ASSERT(targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"widenBCDValue is only valid for aligned memory references\n");2197TR_ASSERT(endByte >= startByte,"endByte (%d) >= startByte (%d) in widenBCDValue\n",endByte,startByte);21982199int32_t bytesToClear = endByte - startByte;2200if (self()->traceBCDCodeGen())2201traceMsg(self()->comp(),"\twidenBCDValue for node %s (%p) : %d->%d (%d bytes)\n",node->getOpCode().getName(),node,startByte,endByte,bytesToClear);2202if (bytesToClear > 0)2203{2204if (reg && reg->trackZeroDigits())2205self()->clearByteRangeIfNeeded(node, reg, targetMR, startByte, endByte);2206else2207self()->widenBCDValue(node, reg, startByte, endByte, targetMR);2208}2209}22102211void2212J9::Z::CodeGenerator::genZeroLeftMostDigitsIfNeeded(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, int32_t digitsToClear, TR::MemoryReference *targetMR, bool widenOnLeft)2213{2214TR_ASSERT(reg->trackZeroDigits(),"genZeroLeftMostDigitsIfNeeded only valid for types where trackZeroDigits=true (dt %s)\n",reg->getDataType().toString());2215TR_ASSERT(endByte > 0,"genZeroLeftMostDigitsIfNeeded: endByte %d should be > 0\n",endByte);2216TR_ASSERT(digitsToClear >= 0,"genZeroLeftMostDigitsIfNeeded: digitsToClear %d should be >= 0\n",digitsToClear);2217TR_ASSERT(reg->getDataType() == node->getDataType(),"reg dt (%s) should match node dt (%s) in genZeroLeftMostDigitsIfNeeded\n",reg->getDataType().toString(),node->getDataType().toString());22182219if (digitsToClear <= 0)2220return;22212222TR_StorageReference *storageReference = reg->getStorageReference();2223TR_ASSERT(storageReference,"storageReference should be non-null at this point\n");2224int32_t endDigit = TR::DataType::getBCDPrecisionFromSize(node->getDataType(), endByte);2225int32_t startDigit = endDigit-digitsToClear;2226// -1 is the sign code position and it can be cleared. The caller is responsible for generating code to set a new and valid sign code.2227if (self()->traceBCDCodeGen())2228traceMsg(self()->comp(),"\tgenZeroLeftMostDigitsIfNeeded %s #%d for node %p: digitsToClear = %d, endByte = %d (digit range is %d->%d), widenOnLeft=%s\n",2229self()->getDebug()->getName(reg),storageReference->getReferenceNumber(),node,digitsToClear,endByte,startDigit,endDigit,widenOnLeft?"yes":"no");2230TR_ASSERT(startDigit >= -1,"genZeroLeftMostDigitsIfNeeded: startDigit %d should be >= -1\n",startDigit);22312232// If requested (widenOnLeft=true) then attempt to clear up to the live symbol size to save separate clears being needed later on2233// this would not be legal, for example, when this routine is called to clear an intermediate digit range only2234// where some left most digits have to be preserved -- such as in pdshlEvaluator (via clearAndSetSign) when the moved over sign code is cleared.2235int32_t actualDigitsToClear = reg->getDigitsToClear(startDigit, endDigit);2236int32_t origEndDigit = endDigit;2237// only respect widenOnLeft if the actualDigitsToClear exceeds the widenOnLeftThreshold2238int32_t widenOnLeftThreshold = 0;2239if (node->getType().isAnyPacked())2240{2241// for the half byte type do not increase a single digit clear (i.e. avoid NI -> XC/NI -- just do the NI and leave the XC until later if needed)2242widenOnLeftThreshold = 1;2243}2244else if (node->getType().isAnyZoned() || node->getType().isAnyUnicode())2245{2246// the full byte types use an MVC for the clear so always attempt to widen on the left2247widenOnLeftThreshold = 0;2248}2249else2250{2251TR_ASSERT(false,"unsupported datatype %s in genZeroLeftMostDigitsIfNeededA\n",node->getDataType().toString());2252}2253if (widenOnLeft &&2254actualDigitsToClear > widenOnLeftThreshold &&2255reg->getLiveSymbolSize() > endByte)2256{2257int32_t origEndByte = endByte;2258endByte = reg->getLiveSymbolSize();2259endDigit = TR::DataType::getBCDPrecisionFromSize(node->getDataType(), endByte);2260if (self()->traceBCDCodeGen())2261traceMsg(self()->comp(),"\ttargetMR->getStorageReference() #%d liveSymSize %d > endByte %d so increase endByte %d->%d (endDigit %d->%d) and retrieve the actualDigitsToClear based on this new endDigit\n",2262targetMR->getStorageReference()->getReferenceNumber(),reg->getLiveSymbolSize(),origEndByte,origEndByte,endByte,origEndDigit,endDigit);2263}22642265if (origEndDigit != endDigit)2266actualDigitsToClear = reg->getDigitsToClear(startDigit, endDigit);22672268if (actualDigitsToClear)2269{2270int32_t offset = reg->getByteOffsetFromLeftForClear(startDigit, endDigit, actualDigitsToClear, endByte); // might modify actualDigitsToClear2271switch (node->getDataType())2272{2273case TR::PackedDecimal:2274self()->genZeroLeftMostPackedDigits(node,2275reg,2276endByte,2277actualDigitsToClear,2278targetMR,2279offset);2280break;2281case TR::ZonedDecimal:2282self()->genZeroLeftMostZonedBytes(node,2283reg,2284endByte-offset,2285actualDigitsToClear,2286targetMR);2287break;2288default:2289TR_ASSERT(false,"unsupported datatype %s in genZeroLeftMostDigitsIfNeededB\n",node->getDataType().toString());2290break;2291}2292}2293else2294{2295self()->processUnusedNodeDuringEvaluation(NULL);2296}2297}22982299void2300J9::Z::CodeGenerator::clearByteRangeIfNeeded(TR::Node *node, TR_PseudoRegister *reg, TR::MemoryReference *targetMR, int32_t startByte, int32_t endByte, bool widenOnLeft)2301{2302TR_ASSERT(startByte <= endByte,"clearByteRangeIfNeeded: invalid range of %d->%d\n",startByte,endByte);2303if (startByte >= endByte) return;2304int32_t clearDigits = TR::DataType::bytesToDigits(node->getDataType(), endByte-startByte);2305return self()->genZeroLeftMostDigitsIfNeeded(node, reg, endByte, clearDigits, targetMR, widenOnLeft);2306}23072308void2309J9::Z::CodeGenerator::genZeroLeftMostPackedDigits(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, int32_t digitsToClear, TR::MemoryReference *targetMR, int32_t memRefOffset)2310{2311TR_ASSERT(targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"genZeroLeftMostPackedDigits is only valid for aligned memory references\n");23122313TR_StorageReference *storageRef = reg ? reg->getStorageReference() : NULL;2314targetMR = reuseS390LeftAlignedMemoryReference(targetMR, node, storageRef, self(), endByte);23152316if (digitsToClear)2317{2318int32_t fullBytesToClear = digitsToClear/2;2319if (self()->traceBCDCodeGen())2320traceMsg(self()->comp(),"\tgenZeroLeftMostPackedDigits: node %p, reg %s targetMemSlot #%d, endByte %d, digitsToClear %d (fullBytesToClear %d), memRefOffset %d\n",2321node,reg?self()->getDebug()->getName(reg):"0",reg?reg->getStorageReference()->getReferenceNumber():0,endByte,digitsToClear,fullBytesToClear,memRefOffset);2322if (fullBytesToClear)2323{2324int32_t destOffset = 0;2325if (self()->traceBCDCodeGen())2326traceMsg(self()->comp(),"\t\tgen XC with size %d and mr offset %d (destOffset %d + memRefOffset %d)\n",fullBytesToClear,destOffset+memRefOffset,destOffset,memRefOffset);2327generateSS1Instruction(self(), TR::InstOpCode::XC, node,2328fullBytesToClear-1,2329generateS390LeftAlignedMemoryReference(*targetMR, node, destOffset+memRefOffset, self(), targetMR->getLeftMostByte()), // left justified2330generateS390LeftAlignedMemoryReference(*targetMR, node, destOffset+memRefOffset, self(), targetMR->getLeftMostByte())); // left justified2331}2332if (digitsToClear&0x1)2333{2334int32_t destOffset = 0;2335{2336if (self()->traceBCDCodeGen())2337traceMsg(self()->comp(),"\tgen NI for odd clear digits with mr offset %d (fullBytesToClear %d + destOffset %d + memRefOffset %d)\n",fullBytesToClear+destOffset+memRefOffset,fullBytesToClear,destOffset,memRefOffset);2338generateSIInstruction(self(), TR::InstOpCode::NI, node,2339generateS390LeftAlignedMemoryReference(*targetMR, node, fullBytesToClear+destOffset+memRefOffset, self(), targetMR->getLeftMostByte()),23400x0F);2341}2342}2343int32_t endDigit = (endByte*2)-(memRefOffset*2)-1; // -1 for the sign code2344if (reg)2345reg->addRangeOfZeroDigits(endDigit-digitsToClear, endDigit);2346}2347}234823492350void2351J9::Z::CodeGenerator::initializeStorageReference(TR::Node *node,2352TR_OpaquePseudoRegister *destReg,2353TR::MemoryReference *destMR,2354int32_t destSize,2355TR::Node *srcNode,2356TR_OpaquePseudoRegister *srcReg,2357TR::MemoryReference *sourceMR,2358int32_t sourceSize,2359bool performExplicitWidening,2360bool alwaysLegalToCleanSign,2361bool trackSignState)2362{2363TR::Compilation *comp = self()->comp();2364if (self()->traceBCDCodeGen())2365traceMsg(comp,"\tinitializeStorageReference for %s (%p), destReg %s, srcReg %s, sourceSize %d, destSize %d, performExplicitWidening=%s, trackSignState=%s\n",2366node->getOpCode().getName(),node,2367destReg ? self()->getDebug()->getName(destReg):"NULL",srcReg ? self()->getDebug()->getName(srcReg):"NULL",sourceSize,destSize,performExplicitWidening?"yes":"no",trackSignState?"yes":"no");23682369TR_ASSERT( srcReg,"expecting a non-null srcReg in initializeStorageReference\n");2370TR_ASSERT( srcReg->getStorageReference(),"expecting a non-null srcReg->storageRef in initializeStorageReference\n");23712372TR::CodeGenerator *cg = self();2373// if a non-null destReg does not have a memory slot set then the addRangeOfZeroBytes/addRangeOfZeroDigits calls will2374// not be able to query the symbol size2375TR_ASSERT( !destReg || destReg->getStorageReference(),"a non-null destReg must have a storageReference set\n");2376bool isBCD = node->getType().isBCD();2377TR_ASSERT(!isBCD || sourceSize <= TR_MAX_MVC_SIZE,"sourceSize %d > max %d for node %p\n",sourceSize,TR_MAX_MVC_SIZE,node);2378TR_ASSERT(!isBCD || destSize <= TR_MAX_MVC_SIZE,"destSize %d > max %d for node %p\n",destSize,TR_MAX_MVC_SIZE,node);2379TR_PseudoRegister *srcPseudoReg = srcReg->getPseudoRegister();2380TR_PseudoRegister *destPseudoReg = destReg ? destReg->getPseudoRegister() : NULL;23812382// widening and truncations only supported for pseudoRegisters2383TR_ASSERT(srcPseudoReg || destSize == sourceSize,"destSize %d != sourceSize %d for opaquePseudoReg on node %p\n",destSize,sourceSize,node);2384TR_ASSERT(destPseudoReg == NULL || srcPseudoReg == NULL || (srcPseudoReg && destPseudoReg),"both src and dest must be pseudoRegisters for node %p\n",node);2385TR_ASSERT(!isBCD || srcPseudoReg,"srcPseudoReg should be set for BCD node %p\n",node);23862387if (sourceMR == NULL)2388{2389sourceMR = isBCD ?2390generateS390RightAlignedMemoryReference(srcNode, srcReg->getStorageReference(), cg) :2391generateS390MemRefFromStorageRef(srcNode, srcReg->getStorageReference(), cg);2392}23932394int32_t mvcSize = std::min(sourceSize, destSize);2395TR_StorageReference *dstStorageRef = destMR->getStorageReference();2396TR_StorageReference *srcStorageRef = sourceMR->getStorageReference();2397TR_ASSERT(dstStorageRef,"dstStorageRef should be non-NULL\n");2398TR_ASSERT(srcStorageRef,"srcStorageRef should be non-NULL\n");23992400if (!self()->storageReferencesMatch(dstStorageRef, srcStorageRef))2401{2402int32_t bytesToClear = (destSize > sourceSize) ? srcReg->getBytesToClear(sourceSize, destSize) : 0;2403bool srcCastedToBCD = srcReg->getStorageReference()->isNodeBased() && srcReg->getStorageReference()->getNode()->castedToBCD();24042405if (self()->traceBCDCodeGen())2406traceMsg(comp,"\t\tnode %p : srcReg %s (hasBadSign %s) on srcNode %p has bytes %d->%d %salready clear (bytesToClear=%d), srcCastedToBCD=%d\n",2407node,self()->getDebug()->getName(srcReg),srcPseudoReg ? (srcPseudoReg->hasKnownBadSignCode()?"yes":"no") : "no",2408srcNode,sourceSize,destSize,bytesToClear==0?"":"not ",bytesToClear,srcCastedToBCD);24092410if (destSize > sourceSize &&2411bytesToClear == 0)2412{2413mvcSize = destSize;2414if (destReg)2415destReg->addRangeOfZeroBytes(sourceSize,destSize);2416if (self()->traceBCDCodeGen())2417traceMsg(comp,"\tincrease mvcSize %d->%d to account for already cleared %d bytes\n",sourceSize,mvcSize,bytesToClear);2418}24192420if (self()->traceBCDCodeGen())2421traceMsg(comp,"\t\tgen MVC/memcpy to initialize storage reference with size = %d\n",mvcSize);24222423TR::MemoryReference *initDstMR = NULL;2424TR::MemoryReference *initSrcMR = NULL;2425if (isBCD)2426{2427initDstMR = generateS390RightAlignedMemoryReference(*destMR, node, 0, cg);2428initSrcMR = generateS390RightAlignedMemoryReference(*sourceMR, srcNode, 0, cg);2429}2430else2431{2432initDstMR = generateS390MemoryReference(*destMR, 0, cg);2433initSrcMR = generateS390MemoryReference(*sourceMR, 0, cg);2434}2435self()->genMemCpy(initDstMR, node, initSrcMR, srcNode, mvcSize);2436}24372438if (isBCD && performExplicitWidening && (destSize > sourceSize))2439self()->widenBCDValueIfNeeded(node, destPseudoReg, sourceSize, destSize, destMR);24402441if (destPseudoReg)2442{2443TR_ASSERT(srcPseudoReg,"srcPseudoReg must be non-NULL if destPseudoReg is non-NULL on node %p\n",node);2444// the destReg can be refined further by the caller but for now set it to a conservative value2445int32_t targetPrecision = 0;2446if (destSize >= sourceSize)2447targetPrecision = srcPseudoReg->getDecimalPrecision();2448else2449targetPrecision = TR::DataType::getBCDPrecisionFromSize(node->getDataType(), destSize);2450destPseudoReg->setDecimalPrecision(targetPrecision);2451if (self()->traceBCDCodeGen())2452traceMsg(comp,"\tset destReg targetPrecision to %d (from %s for node dt %s)\n",2453targetPrecision,destSize >= sourceSize?"srcReg precision":"destSize",node->getDataType().toString());2454}2455if (destReg)2456destReg->setIsInitialized();2457}24582459TR_StorageReference *2460J9::Z::CodeGenerator::initializeNewTemporaryStorageReference(TR::Node *node,2461TR_OpaquePseudoRegister *destReg,2462int32_t destSize,2463TR::Node *srcNode,2464TR_OpaquePseudoRegister *srcReg,2465int32_t sourceSize,2466TR::MemoryReference *sourceMR,2467bool performExplicitWidening,2468bool alwaysLegalToCleanSign,2469bool trackSignState)2470{2471if (self()->traceBCDCodeGen())2472traceMsg(self()->comp(),"\tinitializeNewTemporaryStorageReference for node %p, destReg %s, srcNode %p, srcReg %s (with size %d), sourceSize %d, destSize %d\n",2473node,destReg ? self()->getDebug()->getName(destReg):"NULL",srcNode,srcReg ? self()->getDebug()->getName(srcReg):"NULL",srcReg?srcReg->getSize():0,sourceSize,destSize);24742475TR_StorageReference *tempStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(destSize, self()->comp());2476if (destReg)2477destReg->setStorageReference(tempStorageReference, node);2478else2479tempStorageReference->setTemporaryReferenceCount(1);24802481TR_ASSERT(srcReg,"expecting a non-null srcReg in initializeNewTemporaryStorageReference for srcNode %p\n",srcNode);24822483TR::MemoryReference *destMR = NULL;2484if (srcReg->getPseudoRegister())2485destMR = generateS390RightAlignedMemoryReference(node, tempStorageReference, self(), true, true); // enforceSSLimits=true, isNewTemp=true2486else2487destMR = generateS390MemRefFromStorageRef(node, tempStorageReference, self());24882489self()->initializeStorageReference(node,2490destReg,2491destMR,2492destSize,2493srcNode,2494srcReg,2495sourceMR,2496sourceSize,2497performExplicitWidening,2498alwaysLegalToCleanSign,2499trackSignState);2500if (destReg == NULL)2501tempStorageReference->setTemporaryReferenceCount(0);2502return tempStorageReference;2503}25042505TR_OpaquePseudoRegister *2506J9::Z::CodeGenerator::privatizePseudoRegister(TR::Node *node, TR_OpaquePseudoRegister *reg, TR_StorageReference *storageRef, size_t sizeOverride)2507{2508TR::CodeGenerator *cg = self();2509TR::Compilation *comp = cg->comp();2510size_t regSize = reg->getSize();2511if (self()->traceBCDCodeGen())2512{2513if (sizeOverride != 0 && sizeOverride != regSize)2514traceMsg(comp,"\tsizeOverride=%d : use this as the size for privatizing reg %s (instead of regSize %d)\n",sizeOverride,cg->getDebug()->getName(reg),reg->getSize());2515else2516traceMsg(comp,"\tsizeOverride=0 : use reg %s regSize %d as the size for privatizing\n",cg->getDebug()->getName(reg),reg->getSize());2517}2518size_t size = sizeOverride == 0 ? regSize : sizeOverride;2519bool isBCD = node->getType().isBCD();2520TR_StorageReference *tempStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(size, comp);2521tempStorageReference->setIsSingleUseTemporary();2522TR::MemoryReference *origSrcMR = NULL;2523TR::MemoryReference *copyMR = NULL;2524if (isBCD)2525{2526origSrcMR = generateS390RightAlignedMemoryReference(node, storageRef, cg);2527copyMR = generateS390RightAlignedMemoryReference(node, tempStorageReference, cg, true, true); // enforceSSLimits=true, isNewTemp=true2528}2529else2530{2531origSrcMR = generateS390MemRefFromStorageRef(node, storageRef, cg);2532copyMR = generateS390MemRefFromStorageRef(node, tempStorageReference, cg); // enforceSSLimits=true2533}25342535if (self()->traceBCDCodeGen())2536traceMsg(comp,"\ta^a : gen memcpy of size = %d to privatize node %s (%p) with storageRef #%d (%s) to #%d (%s) on line_no=%d\n",2537size,node->getOpCode().getName(),node,2538storageRef->getReferenceNumber(),self()->getDebug()->getName(storageRef->getSymbol()),2539tempStorageReference->getReferenceNumber(),self()->getDebug()->getName(tempStorageReference->getSymbol()),2540comp->getLineNumber(node));25412542// allocate a new register so any storageRef dep state (like leftAlignedZeroDigits) is cleared (as the mempcy isn't going transfer these over to copyMR)2543TR_OpaquePseudoRegister *tempRegister = isBCD ? cg->allocatePseudoRegister(reg->getPseudoRegister()) : cg->allocateOpaquePseudoRegister(reg);2544tempRegister->setStorageReference(tempStorageReference, NULL); // node==NULL as the temp refCounts are explicitly being managed as the temp will only live for this evaluator2545tempRegister->setIsInitialized();25462547cg->genMemCpy(copyMR, node, origSrcMR, node, size);25482549return tempRegister;2550}25512552TR_OpaquePseudoRegister*2553J9::Z::CodeGenerator::privatizePseudoRegisterIfNeeded(TR::Node *parent, TR::Node *child, TR_OpaquePseudoRegister *childReg)2554{2555TR::Compilation *comp = self()->comp();2556TR_OpaquePseudoRegister *outReg = childReg;2557TR_StorageReference *hint = parent->getStorageReferenceHint();2558if (hint && hint->isNodeBased())2559{2560TR::Node *hintNode = hint->getNode();2561TR_StorageReference *childStorageRef = childReg->getStorageReference();2562if (self()->traceBCDCodeGen())2563traceMsg(comp,"\tprivatizePseudoRegisterIfNeeded for %s (%p) with hint %s (%p) (isInMemoryCopyProp=%s) and child %s (%p) (child storageRef isNonConstNodeBased=%s)\n",2564parent->getOpCode().getName(),parent,2565hintNode->getOpCode().getName(),hintNode,hintNode->isInMemoryCopyProp()?"yes":"no",2566child->getOpCode().getName(),child,2567childStorageRef ? (childStorageRef->isNonConstantNodeBased() ? "yes":"no") : "null");2568if (childStorageRef &&2569childStorageRef->isNonConstantNodeBased() &&2570hintNode->getOpCode().hasSymbolReference())2571{2572TR::Node *childStorageRefNode = childStorageRef->getNode();2573// see comment in pdstoreEvaluator for isUsingStorageRefFromAnotherStore and childRegHasDeadOrIgnoredBytes2574bool isUsingStorageRefFromAnotherStore = childStorageRefNode->getOpCode().isStore() && childStorageRefNode != hintNode;2575bool childRegHasDeadOrIgnoredBytes = childReg->getRightAlignedIgnoredBytes() > 0;2576if (self()->traceBCDCodeGen())2577traceMsg(comp,"\tisInMemoryCopyProp=%s, isUsingStorageRefFromAnotherStore=%s, childRegHasDeadOrIgnoredBytes=%s : childStorageRef %s (%p), hintNode %s (%p)\n",2578hintNode->isInMemoryCopyProp() ? "yes":"no",2579isUsingStorageRefFromAnotherStore ? "yes":"no",2580childRegHasDeadOrIgnoredBytes ? "yes":"no",2581childStorageRefNode->getOpCode().getName(),childStorageRefNode,2582hintNode->getOpCode().getName(),hintNode);2583if (hintNode->isInMemoryCopyProp() || isUsingStorageRefFromAnotherStore || childRegHasDeadOrIgnoredBytes)2584{2585bool useAliasing = true;2586if (self()->traceBCDCodeGen())2587traceMsg(comp,"\tcheck overlap between store hint %s (%p) and childStorageRefNode %s (%p)\n",2588hintNode->getOpCode().getName(),hintNode,childStorageRefNode->getOpCode().getName(),childStorageRefNode);2589if (self()->loadAndStoreMayOverlap(hintNode,2590hintNode->getSize(),2591childStorageRefNode,2592childStorageRefNode->getSize()))2593{2594bool needsPrivitization = true;2595if (self()->traceBCDCodeGen())2596traceMsg(comp,"\toverlap=true (from %s test) -- privatize the source memref to a temp memref\n",useAliasing?"aliasing":"pattern");2597if (useAliasing && // checking useAliasing here because in the no info case the above loadAndStoreMayOverlap already did the pattern match2598self()->storageMayOverlap(hintNode, hintNode->getSize(), childStorageRefNode, childStorageRefNode->getSize()) == TR_NoOverlap)2599{2600// get a second opinion -- the aliasing says the operations overlap but perhaps it is too conservative2601// so do pattern matching based test to see if the operations are actually disjoint2602if (self()->traceBCDCodeGen())2603traceMsg(comp,"\t\t but overlap=false (from 2nd opinion pattern test) -- set needsPrivitization to false\n");2604needsPrivitization = false;2605}26062607if (needsPrivitization)2608{2609if (self()->traceBCDCodeGen())2610{2611if (hintNode->isInMemoryCopyProp())2612traceMsg(comp,"\ta^a : privatize needed due to isInMemoryCopyProp hintNode %s (%p) on line_no=%d (privatizeCase)\n",2613hintNode->getOpCode().getName(),hintNode,comp->getLineNumber(hintNode));2614if (isUsingStorageRefFromAnotherStore)2615traceMsg(comp,"\ta^a : privatize needed due to isUsingStorageRefFromAnotherStore childStorageRefNode %s (%p) on line_no=%d (privatizeCase)\n",2616childStorageRefNode->getOpCode().getName(),childStorageRefNode,comp->getLineNumber(hintNode));2617if (childRegHasDeadOrIgnoredBytes)2618traceMsg(comp,"\ta^a : privatize needed due to childRegHasDeadOrIgnoredBytes valueReg %s child %s (%p) on line_no=%d (privatizeCase)\n",2619self()->getDebug()->getName(childReg),child->getOpCode().getName(),child,comp->getLineNumber(hintNode));2620}26212622outReg = self()->privatizePseudoRegister(child, childReg, childStorageRef);2623TR_ASSERT(!comp->getOption(TR_EnablePerfAsserts),"gen overlap copy for hintNode %s (%p) on line_no=%d (privatePseudoCase)\n",2624hintNode->getOpCode().getName(),hintNode,comp->getLineNumber(hintNode));2625}2626}2627else2628{2629if (self()->traceBCDCodeGen())2630traceMsg(comp,"\toverlap=false (from %s test) -- do not privatize the source memref\n",useAliasing?"aliasing":"pattern");2631}2632}2633else2634{2635if (self()->traceBCDCodeGen())2636traceMsg(comp,"y^y : temp copy saved isInMemoryCopyProp = false on %s (%p) (privatizeCase)\n",hintNode->getOpCode().getName(),hintNode);2637}2638}2639}2640return outReg;2641}26422643TR_PseudoRegister*2644J9::Z::CodeGenerator::privatizeBCDRegisterIfNeeded(TR::Node *parent, TR::Node *child, TR_OpaquePseudoRegister *childReg)2645{2646TR_OpaquePseudoRegister *reg = self()->privatizePseudoRegisterIfNeeded(parent, child, childReg);2647TR_PseudoRegister *pseudoReg = reg->getPseudoRegister();2648TR_ASSERT(pseudoReg,"pseudoReg should not be NULL after privatizing of child %p\n",child);2649return pseudoReg;2650}26512652TR_StorageReference *2653J9::Z::CodeGenerator::privatizeStorageReference(TR::Node *node, TR_OpaquePseudoRegister *reg, TR::MemoryReference *memRef)2654{2655TR::Compilation *comp = self()->comp();26562657// Copy a node-based storageReference with a refCount > 1 to a temporary as the underlying symRef may be killed before the next commoned reference2658// to the node.2659// The flag skipCopyOnLoad is set in lowerTrees to prevent unnecessary copies when the symRef is known not to be killed for any commoned reference.2660TR_StorageReference *storageRef = reg->getStorageReference();2661TR_StorageReference *tempStorageRef = NULL;2662bool isPassThruCase = node != storageRef->getNode();2663if (self()->traceBCDCodeGen())2664traceMsg(comp,"privatizeStorageReference: %s (%p) refCount %d :: storageRef #%d, storageRefNode %s (%p) nodeRefCount %d, isNodeBased %s\n",2665node->getOpCode().getName(),2666node,2667node->getReferenceCount(),2668storageRef->getReferenceNumber(),2669storageRef->getNode()?storageRef->getNode()->getOpCode().getName():"NULL",2670storageRef->getNode(),2671storageRef->isNodeBased()?storageRef->getNodeReferenceCount():-99,2672storageRef->isNodeBased()?"yes":"no");26732674bool force = comp->getOption(TR_ForceBCDInit) && node->getOpCode().isBCDLoad();2675if (force ||2676(storageRef->isNodeBased() &&2677node->getReferenceCount() > 1 &&2678!node->skipCopyOnLoad()))2679{2680if (self()->traceBCDCodeGen())2681{2682traceMsg(comp,"\tnode %p (%s) with skipCopyOnLoad=false does need to be privatized for node based storageRef node %p (%s-based) (force=%s)\n",2683node,node->getOpCode().getName(),storageRef->getNode(),storageRef->getNode()->getOpCode().isStore()?"store":"load",force?"yes":"no");2684traceMsg(comp,"\tb^b : gen memcpy of size = %d to privatizeStorageReference node %s (%p) with storageRef #%d (%s) on line_no=%d\n",2685reg->getSize(),node->getOpCode().getName(),node,2686storageRef->getReferenceNumber(),self()->getDebug()->getName(storageRef->getSymbol()),2687comp->getLineNumber(node));2688}26892690if (force && storageRef->getNodeReferenceCount() == 1)2691storageRef->incrementNodeReferenceCount(); // prevent nodeRefCount underflow (dec'd for init and on setStorageRef call)26922693if (memRef == NULL)2694{2695if (reg->getPseudoRegister())2696memRef = generateS390RightAlignedMemoryReference(node, storageRef, self());2697else2698memRef = generateS390MemRefFromStorageRef(node, storageRef, self());2699}27002701if (reg->getSize() == 0)2702{2703TR_ASSERT(false,"register should have its size initialized before calling privatizeStorageReference\n");27042705if (reg->getPseudoRegister())2706reg->getPseudoRegister()->setDecimalPrecision(node->getDecimalPrecision());2707else2708reg->setSize(node->getSize());2709}2710tempStorageRef = self()->initializeNewTemporaryStorageReference(node, reg, reg->getSize(), node, reg, reg->getSize(), memRef, false, false, false); // performExplicitWidening=false, alwaysLegalToCleanSign=false, trackSignState=false2711}2712else if (self()->traceBCDCodeGen())2713{2714traceMsg(comp,"\t%s (%p) does NOT need to be privatised because isTemp (%s) and/or refCount %d <= 1 and/or skipCopyOnLoad=true (flag is %s)\n",2715node->getOpCode().getName(),node,storageRef->isTemporaryBased()?"yes":"no",node->getReferenceCount(),node->skipCopyOnLoad()?"true":"false");2716}2717return tempStorageRef;2718}27192720/**2721* A binary coded decimal value may have had its storageReference size reduced2722* (by a pdshr for example) and/or have implied left most zeroes. This routine2723* will ensure the storageReference is at least resultSize and zero digits are2724* explicitly generated up and including clearSize. This full materialization2725* is required in several cases such as before calls or when used in an2726* instruction that requires a fixed size temp (like UNPKU in pd2ud or2727* CVB/CVBG)2728*/2729TR::MemoryReference *2730J9::Z::CodeGenerator::materializeFullBCDValue(TR::Node *node,2731TR_PseudoRegister *®,2732int32_t resultSize,2733int32_t clearSize,2734bool updateStorageReference,2735bool alwaysEnforceSSLimits)2736{2737TR::Compilation *comp = self()->comp();27382739int32_t regSize = reg->getSize();2740if (self()->traceBCDCodeGen())2741traceMsg(comp,"\tmaterializeFullBCDValue evaluated %s (%p) (nodeSize %d, requested resultSize %d) to reg %s (regSize %d), clearSize=%d, updateStorageReference=%s\n",2742node->getOpCode().getName(),node,node->getStorageReferenceSize(),resultSize,self()->getDebug()->getName(reg),regSize,clearSize,updateStorageReference?"yes":"no");27432744TR_ASSERT(clearSize >= 0,"invalid clearSize %d for node %p\n",clearSize,node);2745if (clearSize == 0)2746{2747clearSize = resultSize;2748if (self()->traceBCDCodeGen())2749traceMsg(comp,"\tspecific clearSize not requested : set clearSize=resultSize=%d\n",resultSize);2750}2751else2752{2753// enforce this condition : regSize <= clearSize <= resultSize2754TR_ASSERT(clearSize <= resultSize,"clearSize %d should be <= resultSize %d on node %p\n",clearSize,resultSize,node);2755if (self()->traceBCDCodeGen())2756traceMsg(comp,"\tupdate clearSize %d to max(clearSize, regSize) = max(%d,%d) = %d\n",clearSize,clearSize,regSize,std::max(clearSize, regSize));2757clearSize = std::max(clearSize, regSize);2758}27592760TR::MemoryReference *memRef = NULL;2761if (regSize < resultSize &&2762reg->getLiveSymbolSize() >= resultSize &&2763reg->getBytesToClear(regSize, clearSize) == 0)2764{2765if (self()->traceBCDCodeGen())2766traceMsg(comp,"\tbytes regSize->clearSize (%d->%d) are already clear -- no work to do to materializeFullBCDValue\n",regSize,clearSize);2767memRef = reuseS390RightAlignedMemoryReference(memRef, node, reg->getStorageReference(), self(), alwaysEnforceSSLimits);2768}2769else if (regSize < resultSize)2770{2771if (self()->traceBCDCodeGen())2772traceMsg(comp,"\treg->getSize() < resultSize (%d < %d) so check liveSymSize on reg\n",regSize,resultSize);2773int32_t liveSymSize = reg->getLiveSymbolSize();2774int32_t bytesToClear = clearSize-regSize;2775bool enforceSSLimitsForClear = alwaysEnforceSSLimits || bytesToClear > 1;27762777if (reg->isInitialized() &&2778reg->getStorageReference()->isReadOnlyTemporary() &&2779liveSymSize > regSize &&2780reg->getBytesToClear(regSize, clearSize) > 0)2781{2782// 1 pd2i2783// 1 pdModPrec p=3,s=2 <- (node) passThrough + initialized (setAsReadOnly due to lazy clobber evaluate)2784// 2 pdX p=8,s=5 <- initialized and refCount > 1 (used again)2785//2786// Have to clobber evaluate in this case so the clearing of firstRegSize (2) to sourceSize (8) does not destroy2787// the 6 upper bytes required by the commoned reference to pdX2788memRef = reuseS390RightAlignedMemoryReference(memRef, node, reg->getStorageReference(), self(), enforceSSLimitsForClear);2789TR_OpaquePseudoRegister *opaqueReg = self()->ssrClobberEvaluate(node, memRef);2790reg = opaqueReg->getPseudoRegister();2791TR_ASSERT(reg,"reg should be set for node %p\n",node);2792}27932794if (reg->isInitialized() && reg->trackZeroDigits() && liveSymSize >= resultSize)2795{2796if (self()->traceBCDCodeGen())2797traceMsg(comp,"\treg->getLiveSymbolSize() >= resultSize (%d >= %d) so call clearByteRangeIfNeeded\n",liveSymSize,resultSize);2798memRef = reuseS390RightAlignedMemoryReference(memRef, node, reg->getStorageReference(), self(), enforceSSLimitsForClear);2799self()->clearByteRangeIfNeeded(node, reg, memRef, regSize, clearSize);2800}2801else if (reg->isInitialized() && reg->trackZeroDigits() && reg->getStorageReference()->isTemporaryBased())2802{2803if (self()->traceBCDCodeGen())2804traceMsg(comp,"\treg->getLiveSymbolSize() < resultSize (%d < %d) so call increaseTemporarySymbolSize but first check for already cleared bytes\n",liveSymSize,resultSize);2805//int32_t bytesToClear = clearSize-regSize; // e.g. clearSize=16, regSize=3 so bytesToClear=13, liveSymSize=152806int32_t alreadyClearedBytes = 0;2807int32_t endByteForClearCheck = 0;2808if (clearSize > liveSymSize) // 16 > 152809endByteForClearCheck = liveSymSize; // endByteForClearCheck = 152810else2811endByteForClearCheck = clearSize;28122813if (reg->getBytesToClear(regSize, endByteForClearCheck) == 0) // increaseTemporarySymbolSize resets leftAlignedZeroDigits so check cleared bytes first2814alreadyClearedBytes = endByteForClearCheck-regSize; // endByteForClearCheck=15,regSize=3 so alreadyClearedBytes=1228152816if (self()->traceBCDCodeGen())2817traceMsg(comp,"\tfound %d alreadyClearedBytes : adjust bytesToClear %d -> %d\n",alreadyClearedBytes,bytesToClear,bytesToClear-alreadyClearedBytes);2818bytesToClear-=alreadyClearedBytes; // bytesToClear = bytesToClear-alreadyClearedBytes = 13-12 = 12819if (bytesToClear < 0)2820{2821TR_ASSERT(false,"bytesToClear should always be >=0 and not %d\n",bytesToClear);2822bytesToClear = clearSize-regSize;2823}2824int32_t savedLeftAlignedZeroDigits = reg->getLeftAlignedZeroDigits();2825reg->increaseTemporarySymbolSize(resultSize - liveSymSize); // also resets leftAlignedZeroDigits28262827// create memRef after temp size increase so correct TotalSizeForAlignment is set2828memRef = reuseS390RightAlignedMemoryReference(memRef, node, reg->getStorageReference(), self(), enforceSSLimitsForClear);2829int32_t startByte = clearSize-bytesToClear;2830int32_t endByte = clearSize;2831self()->widenBCDValue(node, reg, startByte, endByte, memRef);2832if (clearSize == resultSize)2833{2834// bytesToClear may have been reduced to less than resultSize-regSize if the source already had some cleared bytes2835// in this case the already cleared bytes should also be transferred to the size increased temporary2836int32_t newLeftAlignedZeroDigits = TR::DataType::bytesToDigits(reg->getDataType(), resultSize-regSize); // (16-3)*2 = 262837if (TR::DataType::getDigitSize(reg->getDataType()) == HalfByteDigit && reg->isEvenPrecision() && reg->isLeftMostNibbleClear())2838newLeftAlignedZeroDigits++;2839reg->setLeftAlignedZeroDigits(newLeftAlignedZeroDigits);2840if (self()->traceBCDCodeGen())2841traceMsg(comp,"\tset leftAlignedZeroDigits to %d after temporarySymbolSize increase\n",newLeftAlignedZeroDigits);2842}2843else // if not clearing all the new bytes than the zero digits will not be left aligned2844{2845// TODO: when actual zero ranges are tracked can transfer the range on the reg from before the increaseTemporarySymbolSize2846// to now in the clearSize < resultSize case2847if (self()->traceBCDCodeGen() && savedLeftAlignedZeroDigits > 0)2848traceMsg(comp,"x^x : missed transferring savedLeftAlignedZeroDigits %d on matFull, node %p\n",savedLeftAlignedZeroDigits,node);2849}2850}2851else2852{2853if (self()->traceBCDCodeGen())2854traceMsg(comp,"\tstorageReference #%d is not tempBased (or is not packed) and reg->getLiveSymbolSize() < resultSize (%d < %d) so alloc a new temporary reference\n",2855reg->getStorageReference()->getReferenceNumber(),liveSymSize,resultSize);2856memRef = reuseS390RightAlignedMemoryReference(memRef, node, reg->getStorageReference(), self(), enforceSSLimitsForClear);2857TR_PseudoRegister *destReg = NULL;2858if (updateStorageReference)2859destReg = reg;2860bool clearWidenedBytes = clearSize == resultSize;2861TR_StorageReference *tempStorageRef = self()->initializeNewTemporaryStorageReference(node,2862destReg,2863resultSize,2864node,2865reg,2866reg->getSize(),2867memRef,2868clearWidenedBytes, // performExplicitWidening2869false, // alwaysLegalToCleanSign2870false); // trackSignState=false2871if (destReg == NULL)2872tempStorageRef->setTemporaryReferenceCount(1);28732874// pass in isNewTemp=true for the memref gen below so any deadBytes on the node's register are *not* counted for this new temporary2875// (these deadBytes should only be counted for the source memRef created just above)2876memRef = generateS390RightAlignedMemoryReference(node, tempStorageRef, self(), true, true); // enforceSSLimits=true, isNewTemp=true28772878if (!clearWidenedBytes && clearSize > regSize)2879self()->widenBCDValue(node, destReg, regSize, clearSize, memRef);28802881if (destReg == NULL)2882tempStorageRef->setTemporaryReferenceCount(0);2883self()->pendingFreeVariableSizeSymRef(tempStorageRef->getTemporarySymbolReference()); // free after this treetop has been evaluated if the refCount is still 0 at that point2884}2885}2886memRef = reuseS390RightAlignedMemoryReference(memRef, node, reg->getStorageReference(), self(), alwaysEnforceSSLimits);2887return memRef;2888}28892890bool topBitIsZero(uint8_t byte)2891{2892return (byte & 0x80) == 0;2893}28942895bool topBitIsOne(uint8_t byte)2896{2897return (byte & 0x80) == 0x80;2898}28992900#define TR_TWO_BYTE_TABLE_SIZE 172901static uint8_t zeroTable[TR_TWO_BYTE_TABLE_SIZE] =2902{29030,29040,29050,29060,29070,29080,29090,29100,29110,29120,29130,29140,29150,29160,29170,29180,291902920};29212922static uint8_t oneTable[TR_TWO_BYTE_TABLE_SIZE] =2923{29240xFF,29250xFF,29260xFF,29270xFF,29280xFF,29290xFF,29300xFF,29310xFF,29320xFF,29330xFF,29340xFF,29350xFF,29360xFF,29370xFF,29380xFF,29390xFF,29400xFF2941};29422943TR::MemoryReference *getNextMR(TR::MemoryReference *baseMR, TR::Node *node, intptr_t offset, size_t destLeftMostByte, bool isBCD, TR::CodeGenerator *cg)2944{2945if (isBCD)2946return generateS390LeftAlignedMemoryReference(*baseMR, node, offset, cg, destLeftMostByte);2947else2948return generateS390MemoryReference(*baseMR, offset, cg);2949}29502951bool checkMVHI(char *lit, int32_t offset)2952{2953if (memcmp(lit+offset,zeroTable,2) == 0 && topBitIsZero(lit[offset+2])) // zero extend 0x7FFF to lit value of 0x00007FFF2954return true;2955else if (memcmp(lit+offset,oneTable,2) == 0 && topBitIsOne(lit[offset+2])) // sign extend 0xFFF to lit value of 0xffffFFFF2956return true;2957else2958return false;2959}29602961bool checkMVGHI(char *lit, int32_t offset)2962{2963if (memcmp(lit+offset,zeroTable,6) == 0 && topBitIsZero(lit[offset+6])) // zero extend 0x7FFF to lit value of 0x00000000 00007FFF2964return true;2965else if (memcmp(lit+offset,oneTable,6) == 0 && topBitIsOne(lit[offset+6])) // sign extend 0xFFFF to lit value of 0xffffFFFF ffffFFFF2966return true;2967else2968return false;2969}29702971void genMVI(TR::MemoryReference *destMR, TR::Node *node, uint8_t value, TR::CodeGenerator *cg)2972{2973if (cg->traceBCDCodeGen())2974traceMsg(cg->comp(),"\tgen MVI 0x%02x\n",value);2975generateSIInstruction(cg, TR::InstOpCode::MVI, node, destMR, value);2976}29772978void genMVHHI(TR::MemoryReference *destMR, TR::Node *node, int16_t value, TR::CodeGenerator *cg)2979{2980if (cg->traceBCDCodeGen())2981traceMsg(cg->comp(),"\tgen MVHHI 0x%04x\n",(uint16_t)value);2982generateSILInstruction(cg, TR::InstOpCode::MVHHI, node, destMR, value);2983}29842985void genMVHI(TR::MemoryReference *destMR, TR::Node *node, int16_t value, TR::CodeGenerator *cg)2986{2987if (cg->traceBCDCodeGen())2988traceMsg(cg->comp(),"\tgen MVHI 0x%04x\n",(uint16_t)value);2989generateSILInstruction(cg, TR::InstOpCode::MVHI, node, destMR, value);2990}29912992void genMVGHI(TR::MemoryReference *destMR, TR::Node *node, int16_t value, TR::CodeGenerator *cg)2993{2994if (cg->traceBCDCodeGen())2995traceMsg(cg->comp(),"\tgen MVGHI 0x%04x\n",(uint16_t)value);2996generateSILInstruction(cg, TR::InstOpCode::MVGHI, node, destMR, value);2997}2998299930003001/**3002* This method must be kept in sync with cases handled by useMoveImmediateCommon below3003*/3004bool3005J9::Z::CodeGenerator::canCopyWithOneOrTwoInstrs(char *lit, size_t size)3006{3007if (size < 1 || size >= TR_TWO_BYTE_TABLE_SIZE)3008{3009return false;3010}30113012bool canCopy = false;3013switch (size)3014{3015case 0:3016canCopy = false;3017break;3018case 1: // MVI3019case 2: // MVI/MVI or MVHHI3020case 3: // MVHHI/MVI3021canCopy = true;3022break;3023case 4: // MVHHI/MVHHI (always) or MVHI (value <= 0x7FFF)3024canCopy = true;3025break;3026case 5: // MVHI/MVI (MVHI 0,1,2,3 bytes value <= 0x7FFF) or MVI/MVHI (MVHI 1,2,3,4 bytes value <= 0x7FFF)3027if (checkMVHI(lit,0) || checkMVHI(lit,1))3028canCopy = true;3029break;3030case 6: // MVHI/MVHHI (MVHI 0,1,2,3 bytes value <= 0x7FFF) or MVHHI/MVHI (MVHI 2,3,4,5 bytes value <= 0x7FFF)3031if (checkMVHI(lit,0) || checkMVHI(lit,2))3032canCopy = true;3033break;3034case 7:3035canCopy = false;3036break;3037case 8: // MVGHI (value <= 0x7FFF) or MVHI/MVHI (e.g. 0x00007FFF FFFFffff or vice-versa)3038if (checkMVGHI(lit,0))3039canCopy = true;3040else if (checkMVHI(lit,0) && checkMVHI(lit,4))3041canCopy = true;3042break;3043case 9: // MVGHI/MVI (MVGHI <= 0x7FFF)3044case 10: // MVGHI/MVHHI (MVGHI <= 0x7FFF)3045if (checkMVGHI(lit,0))3046canCopy = true;3047break;3048case 11:3049canCopy = false;3050break;3051case 12: // MVGHI/MVHI (MVGHI and MVHI value both <= 0x7FFF)3052if (checkMVGHI(lit,0) && checkMVHI(lit,8))3053{3054canCopy = true;3055}3056break;3057case 13:3058case 14:3059case 15:3060canCopy=false;3061break;3062case 16: // MVGHI/MVGHI (both MVGHI values <= 0x7FFF)3063if (checkMVGHI(lit,0) && checkMVGHI(lit,8))3064{3065canCopy = true;3066}3067break;3068default:3069canCopy = false;3070break;3071}3072return canCopy;3073}307430753076/**3077* This method must be kept in sync with cases handled by canCopyWithOneOrTwoInstrs above3078*/3079bool3080J9::Z::CodeGenerator::useMoveImmediateCommon(TR::Node *node,3081char *srcLiteral,3082size_t srcSize,3083TR::Node *srcNode,3084size_t destSize,3085intptr_t destBaseOffset,3086size_t destLeftMostByte,3087TR::MemoryReference *inputDestMR)3088{3089TR::CodeGenerator *cg = self();3090size_t size = destSize;3091char *lit = srcLiteral;3092bool isBCD = node->getType().isBCD();30933094TR::MemoryReference *destMR = getNextMR(inputDestMR, node, destBaseOffset, destLeftMostByte, isBCD, cg);30953096switch (size)3097{3098case 0:3099TR_ASSERT(false,"copySize %d not supported on node %p\n",size,node);3100break;3101case 1: // MVI3102genMVI(destMR, node, lit[0], cg);3103break;3104case 2: // MVI/MVI or MVHHI3105{3106genMVHHI(destMR, node, (lit[0]<<8)|lit[1], cg);3107break;3108}3109case 3: // MVHHI/MVI3110genMVHHI(destMR, node, (lit[0]<<8)|lit[1], cg);3111genMVI(getNextMR(destMR, node, 2, destLeftMostByte, isBCD, cg), node, lit[2], cg);3112break;3113case 4: // MVHHI/MVHHI (always) or MVHI (value <= 0x7FFF)3114if (checkMVHI(lit,0))3115{3116genMVHI(destMR, node, (lit[2]<<8)|lit[3], cg);3117}3118else3119{3120genMVHHI(destMR, node, (lit[0]<<8)|lit[1], cg);3121genMVHHI(getNextMR(destMR, node, 2, destLeftMostByte, isBCD, cg), node, (lit[2]<<8)|lit[3], cg);3122}3123break;3124case 5:3125if (checkMVHI(lit,0))3126{3127// MVHI/MVI (MVHI 0,1,2,3 bytes value <= 0x7FFF)3128genMVHI(destMR, node, (lit[2]<<8)|lit[3], cg);3129genMVI(getNextMR(destMR, node, 4, destLeftMostByte, isBCD, cg), node, lit[4], cg);3130}3131else3132{3133// MVI/MVHI (MVHI 1,2,3,4 bytes value <= 0x7FFF)3134TR_ASSERT(checkMVHI(lit,1),"checkMVHI should be true\n");3135genMVI(destMR, node, lit[0], cg);3136genMVHI(getNextMR(destMR, node, 1, destLeftMostByte, isBCD, cg), node, (lit[3]<<8)|lit[4], cg);3137}3138break;3139case 6:3140if (checkMVHI(lit,0))3141{3142// MVHI/MVHHI (MVHI 0,1,2,3 bytes value <= 0x7FFF)3143genMVHI(destMR, node, (lit[2]<<8)|lit[3], cg);3144genMVHHI(getNextMR(destMR, node, 4, destLeftMostByte, isBCD, cg), node, (lit[4]<<8)|lit[5], cg);3145}3146else3147{3148// MVHHI/MVHI (MVHI 2,3,4,5 bytes value <= 0x7FFF)3149TR_ASSERT(checkMVHI(lit,2),"checkMVHI should be true\n");3150genMVHHI(destMR, node, (lit[0]<<8)|lit[1], cg);3151genMVHI(getNextMR(destMR, node, 2, destLeftMostByte, isBCD, cg), node, (lit[4]<<8)|lit[5], cg);3152}3153break;3154case 7:3155TR_ASSERT(false,"copySize %d not supported on node %p\n",size,node);3156break;3157case 8: // MVGHI (value <= 0x7FFF)3158if (checkMVGHI(lit,0))3159{3160genMVGHI(destMR, node, (lit[6]<<8)|lit[7], cg);3161}3162else3163{3164TR_ASSERT(checkMVHI(lit,0) && checkMVHI(lit,4),"checkMVHI+checkMVHI should be true\n");3165genMVHI(destMR, node, (lit[2]<<8)|lit[3], cg);3166genMVHI(getNextMR(destMR, node, 4, destLeftMostByte, isBCD, cg), node, (lit[6]<<8)|lit[7], cg);3167}3168break;3169case 9: // MVGHI/MVI (MVGHI <= 0x7FFF)3170genMVGHI(destMR, node, (lit[6]<<8)|lit[7], cg);3171genMVI(getNextMR(destMR, node, 8, destLeftMostByte, isBCD, cg), node, lit[8], cg);3172break;3173case 10: // MVGHI/MVHHI (MVGHI <= 0x7FFF)3174genMVGHI(destMR, node, (lit[6]<<8)|lit[7], cg);3175genMVHHI(getNextMR(destMR, node, 8, destLeftMostByte, isBCD, cg), node, (lit[8]<<8)|lit[9], cg);3176break;3177case 11:3178TR_ASSERT(false,"copySize %d not supported on node %p\n",size,node);3179break;3180case 12: // MVGHI/MVHI (MVGHI and MVHI value both <= 0x7FFF)3181genMVGHI(destMR, node, (lit[6]<<8)|lit[7], cg);3182genMVHI(getNextMR(destMR, node, 8, destLeftMostByte, isBCD, cg), node, (lit[10]<<8)|lit[11], cg);3183break;3184case 13:3185case 14:3186case 15:3187TR_ASSERT(false,"copySize %d not supported on node %p\n",size,node);3188break;3189case 16: // MVGHI/MVGHI (both MVGHI values <= 0x7FFF)3190genMVGHI(destMR, node, (lit[6]<<8)|lit[7], cg);3191genMVGHI(getNextMR(destMR, node, 8, destLeftMostByte, isBCD, cg), node, (lit[14]<<8)|lit[15], cg);3192break;3193default:3194TR_ASSERT(false,"copySize %d not supported on node %p\n",size,node);3195break;3196}31973198return true;3199}32003201bool3202J9::Z::CodeGenerator::inlineSmallLiteral(size_t srcSize, char *srcLiteral, size_t destSize, bool trace)3203{3204TR::Compilation *comp = self()->comp();32053206bool inlineLiteral = false;3207if (srcSize != destSize)3208{3209inlineLiteral = false;3210if (trace)3211traceMsg(comp,"\t\tinlineLiteral=false : srcSize %d != destSize %d\n",srcSize,destSize);3212}3213else if (srcSize == 1)3214{3215inlineLiteral = true;3216if (trace)3217traceMsg(comp,"\t\tinlineLiteral=true : srcSize == 1 (destSize %d)\n",destSize);3218}3219else if (destSize <= 2)3220{3221inlineLiteral = true;3222if (trace)3223traceMsg(comp,"\t\tinlineLiteral=true : destSize %d <= 2 (srcSize %d)\n",destSize,srcSize);3224}3225else if (self()->canCopyWithOneOrTwoInstrs(srcLiteral, srcSize))3226{3227inlineLiteral = true;3228if (trace)3229traceMsg(comp,"\t\tinlineLiteral=true : canCopyWithOneOrTwoInstrs = true (srcSize %d, destSize %d)\n",srcSize,destSize);3230}3231else3232{3233inlineLiteral = false;3234if (trace)3235traceMsg(comp,"\t\tinlineSmallLiteral=false : unhandled case (srcSize %d, destSize %d)\n",srcSize,destSize);3236}3237return inlineLiteral;3238}323932403241bool3242J9::Z::CodeGenerator::checkFieldAlignmentForAtomicLong()3243{3244TR_OpaqueClassBlock * classBlock = self()->comp()->fej9()->getSystemClassFromClassName("java/util/concurrent/atomic/AtomicLong", 38, true);32453246// TR_J9SharedCacheVM::getSystemClassFromClassName can return 0 when it's impossible to relocate a J9Class later for AOT loads.3247if (!classBlock)3248return false;32493250char* fieldName = "value";3251int32_t fieldNameLen = 5;3252char * fieldSig = "J";3253int32_t fieldSigLen = 1;3254int32_t intOrBoolOffset = self()->fe()->getObjectHeaderSizeInBytes() + self()->fej9()->getInstanceFieldOffset(classBlock, fieldName, fieldNameLen, fieldSig, fieldSigLen);3255return (intOrBoolOffset & 0x3) == 0;3256}325732583259TR_PseudoRegister *3260J9::Z::CodeGenerator::evaluateBCDNode(TR::Node * node)3261{3262TR_ASSERT(node->getType().isBCD(),"evaluateBCDNode only valid for binary coded decimal types\n");3263bool isFirstTime = node->getRegister() == NULL;3264TR::Register *reg = self()->evaluate(node);3265TR_PseudoRegister *pseudoReg = reg->getPseudoRegister();3266TR_ASSERT(pseudoReg,"pseudoReg should not be NULL after evaluation of node %p\n",node);3267if (isFirstTime)3268{3269if (node->getOpCode().canHaveStorageReferenceHint() &&3270node->getStorageReferenceHint() &&3271node->getStorageReferenceHint()->isTemporaryBased())3272{3273if (self()->traceBCDCodeGen())3274traceMsg(self()->comp(),"evaluateBCDNode: found temp based hint #%d on %s (%p)\n",3275node->getStorageReferenceHint()->getReferenceNumber(),3276node->getOpCode().getName(),3277node);3278node->getStorageReferenceHint()->removeSharedNode(node);3279}3280// to prevent refCount underflow on the padding address node can only use this tree on the first reference to a node3281if (node->getOpCode().canHavePaddingAddress())3282{3283if (self()->traceBCDCodeGen())3284traceMsg(self()->comp(),"evaluateBCDNode: set UsedPaddingAnchorAddress flag to true on %s (%p)\n",3285node->getOpCode().getName(),3286node);3287}3288}3289// TR_ASSERT(pseudoReg->signStateInitialized(),"sign state for node %p register not initialized\n",node);3290return pseudoReg;3291}32923293void3294J9::Z::CodeGenerator::addAllocatedRegister(TR_PseudoRegister * temp)3295{3296uint32_t idx = _registerArray.add(temp);3297temp->setIndex(idx);3298self()->startUsingRegister(temp);3299}330033013302/**3303* These routines return the minimum precision and size values for a packed arithmetic node so the corresponding3304* hardware instruction (AP,SP,MP,DP) can be legally encode3305*/3306uint32_t3307J9::Z::CodeGenerator::getPDMulEncodedSize(TR::Node *pdmul, TR_PseudoRegister *multiplicand, TR_PseudoRegister *multiplier)3308{3309TR_ASSERT(pdmul->getType().isAnyPacked(), "getPDMulEncodedSize only valid for packed types and not type %s\n",pdmul->getDataType().toString());3310return multiplicand->getSize() + multiplier->getSize();3311}33123313uint32_t3314J9::Z::CodeGenerator::getPDMulEncodedSize(TR::Node *pdmul)3315{3316TR_ASSERT(pdmul->getType().isAnyPacked(), "getPDMulEncodedSize only valid for packed types and not type %s\n",pdmul->getDataType().toString());3317return pdmul->getFirstChild()->getSize() + pdmul->getSecondChild()->getSize();3318}33193320uint32_t3321J9::Z::CodeGenerator::getPDMulEncodedSize(TR::Node *pdmul, int32_t exponent)3322{3323TR_ASSERT(pdmul->getType().isAnyPacked(), "getPDMulEncodedSize only valid for packed types and not type %s\n",pdmul->getDataType().toString());3324return pdmul->getFirstChild()->getSize() * exponent;3325}33263327int32_t3328J9::Z::CodeGenerator::getPDMulEncodedPrecision(TR::Node *pdmul, TR_PseudoRegister *multiplicand, TR_PseudoRegister *multiplier)3329{3330TR_ASSERT(pdmul->getType().isAnyPacked(), "getPDMulEncodedPrecision only valid for packed types and not type %s\n",pdmul->getDataType().toString());3331return TR::DataType::byteLengthToPackedDecimalPrecisionFloor(self()->getPDMulEncodedSize(pdmul, multiplicand, multiplier));3332}33333334int32_t3335J9::Z::CodeGenerator::getPDMulEncodedPrecision(TR::Node *pdmul)3336{3337TR_ASSERT(pdmul->getType().isAnyPacked(), "getPDMulEncodedPrecision only valid for packed types and not type %s\n",pdmul->getDataType().toString());3338return TR::DataType::byteLengthToPackedDecimalPrecisionFloor(self()->getPDMulEncodedSize(pdmul));3339}33403341int32_t3342J9::Z::CodeGenerator::getPDMulEncodedPrecision(TR::Node *pdmul, int32_t exponent)3343{3344TR_ASSERT(pdmul->getType().isAnyPacked(), "getPDMulEncodedPrecision only valid for packed types and not type %s\n",pdmul->getDataType().toString());3345return TR::DataType::byteLengthToPackedDecimalPrecisionFloor(self()->getPDMulEncodedSize(pdmul, exponent));3346}33473348/**3349* Motivating example for the packedAddSubSize3350* pdsub p=3,s=2 // correct answer is 12111-345 truncated to 3 digits = (11)7663351* pdload p=5,s=3 // 121113352* pdload p=3,s=3 // 3453353* If an SP of size=2 is used then the answer will be 111-345 = -234 instead of 766 as SP/AP are destructive operations3354* so for AP/SP the encoded firstOp/result size must be at least as big as the first operand.3355*/3356uint32_t3357J9::Z::CodeGenerator::getPDAddSubEncodedSize(TR::Node *node)3358{3359TR_ASSERT( node->getType().isAnyPacked() && node->getFirstChild()->getType().isAnyPacked(),"getPackedAddSubSize only valid for packed types\n");3360return std::max(node->getSize(), node->getFirstChild()->getSize());3361}33623363int32_t3364J9::Z::CodeGenerator::getPDAddSubEncodedPrecision(TR::Node *node)3365{3366TR_ASSERT( node->getType().isAnyPacked() && node->getFirstChild()->getType().isAnyPacked(),"getPackedAddSubPrecision only valid for packed types\n");3367return std::max(node->getDecimalPrecision(), node->getFirstChild()->getDecimalPrecision());3368}33693370uint32_t3371J9::Z::CodeGenerator::getPDAddSubEncodedSize(TR::Node *node, TR_PseudoRegister *firstReg)3372{3373TR_ASSERT( node->getType().isAnyPacked() && firstReg->getDataType().isAnyPacked(),"getPackedAddSubSize only valid for packed types\n");3374return std::max<uint32_t>(node->getSize(), firstReg->getSize());3375}33763377int32_t3378J9::Z::CodeGenerator::getPDAddSubEncodedPrecision(TR::Node *node, TR_PseudoRegister *firstReg)3379{3380TR_ASSERT( node->getType().isAnyPacked() && firstReg->getDataType().isAnyPacked(),"getPackedAddSubPrecision only valid for packed types\n");3381return std::max<int32_t>(node->getDecimalPrecision(), firstReg->getDecimalPrecision());3382}33833384bool3385J9::Z::CodeGenerator::supportsPackedShiftRight(int32_t resultPrecision, TR::Node *shiftSource, int32_t shiftAmount)3386{3387bool isSupported = false;3388int32_t maxPrecision = TR::DataType::getMaxPackedDecimalPrecision();3389int32_t sourceDigits = shiftSource->getDecimalPrecision();3390int32_t shiftedPrecision = sourceDigits - shiftAmount;3391if (resultPrecision <= maxPrecision)3392{3393isSupported = true; // fits in an MVO or SRP (and all SS2/SS3 instructions)3394}3395else if (shiftedPrecision <= maxPrecision)3396{3397isSupported = true; // fits in an MVO or SRP (and all SS2/SS3 instructions)3398}3399else if (isEven(shiftAmount))3400{3401isSupported = true; // uses MVN to move just the sign code so no restriction on length3402}34033404if (self()->traceBCDCodeGen())3405traceMsg(self()->comp(),"%ssupportsPackedShiftRight = %s : shiftSource %s (%p) p=%d by shiftAmount=%d -> shiftedPrec=%d (resultPrec %d) on line_no=%d (offset=%06X)\n",3406isSupported?"":"t^t : ",isSupported?"yes":"no",shiftSource->getOpCode().getName(),shiftSource,3407sourceDigits,shiftAmount,shiftedPrecision,resultPrecision,3408self()->comp()->getLineNumber(shiftSource),self()->comp()->getLineNumber(shiftSource));34093410return isSupported;3411}34123413int32_t3414J9::Z::CodeGenerator::getPDDivEncodedPrecision(TR::Node *node)3415{3416TR_ASSERT(node->getOpCodeValue() == TR::pddiv || node->getOpCodeValue() == TR::pdrem,3417"getPackedDividendPrecision only valid for pddiv/pdrem\n");3418return self()->getPDDivEncodedPrecisionCommon(node,3419node->getFirstChild()->getDecimalPrecision(),3420node->getSecondChild()->getDecimalPrecision(),3421node->getSecondChild()->isEvenPrecision());3422}34233424int32_t3425J9::Z::CodeGenerator::getPDDivEncodedPrecision(TR::Node *node, TR_PseudoRegister *dividendReg, TR_PseudoRegister *divisorReg)3426{3427TR_ASSERT(node->getOpCodeValue() == TR::pddiv || node->getOpCodeValue() == TR::pdrem,3428"getPackedDividendPrecision only valid for pddiv/pdrem\n");3429return self()->getPDDivEncodedPrecisionCommon(node,3430dividendReg->getDecimalPrecision(),3431divisorReg->getDecimalPrecision(),3432divisorReg->isEvenPrecision());3433}343434353436int32_t3437J9::Z::CodeGenerator::getPDDivEncodedPrecisionCommon(TR::Node *node, int32_t dividendPrecision, int32_t divisorPrecision, bool isDivisorEvenPrecision)3438{3439int32_t basePrecision = dividendPrecision;3440int32_t quotientAdjust = 1; // always subtract off second sign code when computing the quotient precision3441if (isDivisorEvenPrecision)3442quotientAdjust++; // adjust for the pad nibble3443return basePrecision+divisorPrecision+quotientAdjust;3444}34453446uint32_t3447J9::Z::CodeGenerator::getPDDivEncodedSize(TR::Node *node)3448{3449TR_ASSERT(node->getOpCodeValue() == TR::pddiv || node->getOpCodeValue() == TR::pdrem,3450"getPDDivEncodedSize only valid for pddiv/pdrem\n");3451return TR::DataType::packedDecimalPrecisionToByteLength(self()->getPDDivEncodedPrecision(node));3452}34533454uint32_t3455J9::Z::CodeGenerator::getPDDivEncodedSize(TR::Node *node, TR_PseudoRegister *dividendReg, TR_PseudoRegister *divisorReg)3456{3457TR_ASSERT(node->getOpCodeValue() == TR::pddiv || node->getOpCodeValue() == TR::pdrem,3458"getPDDivEncodedSize only valid for pddiv/pdrem\n");3459return TR::DataType::packedDecimalPrecisionToByteLength(self()->getPDDivEncodedPrecision(node, dividendReg, divisorReg));3460}34613462bool3463J9::Z::CodeGenerator::canGeneratePDBinaryIntrinsic(TR::ILOpCodes opCode, TR::Node * op1PrecNode, TR::Node * op2PrecNode, TR::Node * resultPrecNode)3464{3465if(!op2PrecNode->getOpCode().isLoadConst() || !op1PrecNode->getOpCode().isLoadConst() || !resultPrecNode->getOpCode().isLoadConst())3466return false;34673468int32_t max = TR::DataType::getMaxPackedDecimalPrecision();34693470int32_t op1Prec = op1PrecNode->getInt();3471int32_t op2Prec = op2PrecNode->getInt();3472int32_t resultPrec = resultPrecNode->getInt();34733474if(op1Prec > max || op2Prec > max || resultPrec > max)3475return false;34763477int32_t op1Size = TR::DataType::packedDecimalPrecisionToByteLength(op1Prec);3478int32_t op2Size = TR::DataType::packedDecimalPrecisionToByteLength(op2Prec);3479int32_t resultSize = TR::DataType::packedDecimalPrecisionToByteLength(resultPrec);34803481switch(opCode)3482{3483case TR::pdadd:3484case TR::pdsub:3485case TR::pdmul:3486if(op2Prec > 15)3487return false;3488if(resultSize < (op1Size + op2Size))3489return false;3490break;3491case TR::pddiv:3492case TR::pdrem:3493if(op2Size >= op1Size)3494return false;3495if(op2Prec > 15 || op1Prec > 31 || (op1Prec-op2Prec) > 29)3496return false;3497break;3498default:3499TR_ASSERT(0, "not implemented yet");3500return false;3501}35023503return true;3504}35053506void3507J9::Z::CodeGenerator::incRefCountForOpaquePseudoRegister(TR::Node * node)3508{3509if (node->getOpaquePseudoRegister())3510{3511TR_OpaquePseudoRegister *reg = node->getOpaquePseudoRegister();3512TR_StorageReference *ref = reg->getStorageReference();3513if (ref && ref->isNodeBased() && ref->getNodeReferenceCount() > 0)3514{3515if (self()->traceBCDCodeGen())3516self()->comp()->getDebug()->trace("\tnode %s (%p) with storageRef #%d (%s): increment nodeRefCount %d->%d when artificially incrementing ref count\n",3517node->getOpCode().getName(),node,ref->getReferenceNumber(),self()->comp()->getDebug()->getName(ref->getSymbol()),ref->getNodeReferenceCount(),ref->getNodeReferenceCount()+1);3518ref->incrementNodeReferenceCount();3519}3520}3521}35223523TR::Instruction* J9::Z::CodeGenerator::generateVMCallHelperSnippet(TR::Instruction* cursor, TR::LabelSymbol* vmCallHelperSnippetLabel)3524{3525TR::Compilation* comp = self()->comp();35263527// Associate all generated instructions with the first node3528TR::Node* node = comp->getStartTree()->getNode();35293530cursor = generateS390LabelInstruction(self(), TR::InstOpCode::label, node, vmCallHelperSnippetLabel, cursor);35313532TR::Instruction* vmCallHelperSnippetLabelInstruction = cursor;35333534// Store all arguments to the stack for access by the interpreted method3535J9::Z::PrivateLinkage *privateLinkage = static_cast<J9::Z::PrivateLinkage *>(self()->getLinkage());3536cursor = static_cast<TR::Instruction*>(privateLinkage->saveArguments(cursor, false, true));35373538// Load the EP register with the address of the next instruction3539cursor = generateRRInstruction(self(), TR::InstOpCode::BASR, node, self()->getEntryPointRealRegister(), self()->machine()->getRealRegister(TR::RealRegister::GPR0), cursor);35403541TR::Instruction* basrInstruction = cursor;35423543// Displacement will be updated later once we know the offset3544TR::MemoryReference* j9MethodAddressMemRef = generateS390MemoryReference(self()->getEntryPointRealRegister(), 0, self());35453546// Load the address of the J9Method corresponding to this JIT compilation3547cursor = generateRXInstruction(self(), TR::InstOpCode::getLoadOpCode(), node, self()->machine()->getRealRegister(TR::RealRegister::GPR1), j9MethodAddressMemRef, cursor);35483549// Displacement will be updated later once we know the offset3550TR::MemoryReference* vmCallHelperAddressMemRef = generateS390MemoryReference(self()->getEntryPointRealRegister(), 0, self());35513552// Load the address of the VM call helper3553cursor = generateRXInstruction(self(), TR::InstOpCode::getLoadOpCode(), node, self()->getEntryPointRealRegister(), vmCallHelperAddressMemRef, cursor);35543555// Call the VM call helper3556cursor = generateS390BranchInstruction(self(), TR::InstOpCode::BCR, node, TR::InstOpCode::COND_BCR, self()->getEntryPointRealRegister(), cursor);35573558const int32_t offsetFromEPRegisterValueToVMCallHelperAddress = CalcCodeSize(basrInstruction->getNext(), cursor);35593560vmCallHelperAddressMemRef->setOffset(offsetFromEPRegisterValueToVMCallHelperAddress);35613562TR::ResolvedMethodSymbol* methodSymbol = comp->getJittedMethodSymbol();35633564TR::SymbolReference* helperSymRef = self()->symRefTab()->findOrCreateRuntimeHelper(TR_j2iTransition);35653566// AOT relocation for the helper address3567TR::S390EncodingRelocation* encodingRelocation = new (self()->trHeapMemory()) TR::S390EncodingRelocation(TR_AbsoluteHelperAddress, helperSymRef);35683569AOTcgDiag3(comp, "Add encodingRelocation = %p reloType = %p symbolRef = %p\n", encodingRelocation, encodingRelocation->getReloType(), encodingRelocation->getSymbolReference());35703571const intptr_t vmCallHelperAddress = reinterpret_cast<intptr_t>(helperSymRef->getMethodAddress());35723573// Encode the address of the VM call helper3574if (comp->target().is64Bit())3575{3576cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, UPPER_4_BYTES(vmCallHelperAddress), cursor);3577cursor->setEncodingRelocation(encodingRelocation);35783579cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, LOWER_4_BYTES(vmCallHelperAddress), cursor);3580}3581else3582{3583cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, vmCallHelperAddress, cursor);3584cursor->setEncodingRelocation(encodingRelocation);3585}35863587const int32_t offsetFromEPRegisterValueToJ9MethodAddress = CalcCodeSize(basrInstruction->getNext(), cursor);35883589j9MethodAddressMemRef->setOffset(offsetFromEPRegisterValueToJ9MethodAddress);3590TR::SymbolReference *methodSymRef = new (self()->trHeapMemory()) TR::SymbolReference(self()->symRefTab(), methodSymbol);3591encodingRelocation = new (self()->trHeapMemory()) TR::S390EncodingRelocation(TR_RamMethod, methodSymRef);35923593AOTcgDiag2(comp, "Add encodingRelocation = %p reloType = %p\n", encodingRelocation, encodingRelocation->getReloType());35943595const intptr_t j9MethodAddress = reinterpret_cast<intptr_t>(methodSymbol->getResolvedMethod()->resolvedMethodAddress());35963597if (comp->target().is64Bit())3598{3599cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, UPPER_4_BYTES(j9MethodAddress), cursor);3600cursor->setEncodingRelocation(encodingRelocation);36013602cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, LOWER_4_BYTES(j9MethodAddress), cursor);3603}3604else3605{3606cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, j9MethodAddress, cursor);3607cursor->setEncodingRelocation(encodingRelocation);3608}36093610if (comp->getOption(TR_EnableHCR))3611{3612comp->getStaticHCRPICSites()->push_front(cursor);3613}36143615int32_t padSize = CalcCodeSize(vmCallHelperSnippetLabelInstruction, cursor) % TR::Compiler->om.sizeofReferenceAddress();36163617if (padSize != 0)3618{3619padSize = TR::Compiler->om.sizeofReferenceAddress() - padSize;3620}36213622// Align to the size of the reference field to ensure alignment of subsequent sections for atomic patching3623cursor = self()->insertPad(node, cursor, padSize, false);36243625return cursor;3626}36273628bool J9::Z::CodeGenerator::canUseRelativeLongInstructions(int64_t value)3629{3630if (self()->comp()->isOutOfProcessCompilation())3631{3632return false;3633}3634return OMR::CodeGeneratorConnector::canUseRelativeLongInstructions(value);3635}36363637TR::Instruction* J9::Z::CodeGenerator::generateVMCallHelperPrePrologue(TR::Instruction* cursor)3638{3639TR::Compilation* comp = self()->comp();36403641// Associate all generated instructions with the first node3642TR::Node* node = comp->getStartTree()->getNode();36433644TR::LabelSymbol* vmCallHelperSnippetLabel = generateLabelSymbol(self());36453646cursor = self()->generateVMCallHelperSnippet(cursor, vmCallHelperSnippetLabel);36473648cursor = generateS390BranchInstruction(self(), TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, vmCallHelperSnippetLabel, cursor);36493650// The following 4 bytes are used for various patching sequences that overwrite the JIT entry point with a 4 byte3651// branch (BRC) to some location. Before patching in the branch we must save the 4 bytes at the JIT entry point3652// to this location so that we can later reverse the patching at JIT entry point if needed.3653cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, 0xdeafbeef, cursor);36543655// Generated a pad for the body info address to keep offsets in PreprologueConst.hpp constant for simplicity3656if (comp->target().is64Bit())3657{3658cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, 0x00000000, cursor);3659cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, 0x00000000, cursor);3660}3661else3662{3663cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, 0x00000000, cursor);3664}36653666return cursor;3667}36683669bool3670J9::Z::CodeGenerator::suppressInliningOfRecognizedMethod(TR::RecognizedMethod method)3671{3672TR::Compilation *comp = self()->comp();36733674if (self()->isMethodInAtomicLongGroup(method))3675return true;36763677if (self()->getSupportsVectorRegisters()){3678if (method == TR::java_lang_Math_fma_D ||3679method == TR::java_lang_StrictMath_fma_D)3680{3681return true;3682}3683if (comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_FACILITY_ENHANCEMENT_1) &&3684(method == TR::java_lang_Math_fma_F ||3685method == TR::java_lang_StrictMath_fma_F))3686{3687return true;3688}3689}36903691if (method == TR::java_lang_Integer_highestOneBit ||3692method == TR::java_lang_Integer_numberOfLeadingZeros ||3693method == TR::java_lang_Integer_numberOfTrailingZeros ||3694method == TR::java_lang_Long_highestOneBit ||3695method == TR::java_lang_Long_numberOfLeadingZeros ||3696method == TR::java_lang_Long_numberOfTrailingZeros)3697{3698return true;3699}37003701if (method == TR::java_util_concurrent_atomic_AtomicBoolean_getAndSet ||3702method == TR::java_util_concurrent_atomic_AtomicInteger_getAndAdd ||3703method == TR::java_util_concurrent_atomic_AtomicInteger_getAndIncrement ||3704method == TR::java_util_concurrent_atomic_AtomicInteger_getAndDecrement ||3705method == TR::java_util_concurrent_atomic_AtomicInteger_getAndSet ||3706method == TR::java_util_concurrent_atomic_AtomicInteger_addAndGet ||3707method == TR::java_util_concurrent_atomic_AtomicInteger_decrementAndGet ||3708method == TR::java_util_concurrent_atomic_AtomicInteger_incrementAndGet ||3709method == TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_incrementAndGet ||3710method == TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_decrementAndGet ||3711method == TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_addAndGet ||3712method == TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_getAndIncrement ||3713method == TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_getAndDecrement ||3714method == TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_getAndAdd)3715{3716return true;3717}37183719// Transactional Memory3720if (self()->getSupportsInlineConcurrentLinkedQueue())3721{3722if (method == TR::java_util_concurrent_ConcurrentLinkedQueue_tmOffer ||3723method == TR::java_util_concurrent_ConcurrentLinkedQueue_tmPoll ||3724method == TR::java_util_concurrent_ConcurrentLinkedQueue_tmEnabled)3725{3726return true;3727}3728}37293730return false;3731}37323733#define IS_OBJ true3734#define IS_NOT_OBJ false37353736bool isKnownMethod(TR::MethodSymbol * methodSymbol)3737{3738return methodSymbol &&3739(methodSymbol->getRecognizedMethod() == TR::java_lang_Math_sqrt ||3740methodSymbol->getRecognizedMethod() == TR::java_lang_StrictMath_sqrt ||3741methodSymbol->getRecognizedMethod() == TR::java_lang_Class_isAssignableFrom);3742}37433744bool3745J9::Z::CodeGenerator::inlineDirectCall(3746TR::Node *node,3747TR::Register *&resultReg)3748{3749TR::CodeGenerator *cg = self();3750TR::Compilation *comp = cg->comp();3751TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());37523753TR::MethodSymbol * methodSymbol = node->getSymbol()->getMethodSymbol();37543755// If the method to be called is marked as an inline method, see if it can3756// actually be generated inline.3757//37583759if (comp->getSymRefTab()->isNonHelper(node->getSymbolReference(), TR::SymbolReferenceTable::encodeASCIISymbol))3760{3761TR::TreeEvaluator::inlineEncodeASCII(node, cg);3762return true;3763}3764else if (comp->getSymRefTab()->isNonHelper(node->getSymbolReference(), TR::SymbolReferenceTable::currentTimeMaxPrecisionSymbol))3765{3766resultReg = TR::TreeEvaluator::inlineCurrentTimeMaxPrecision(cg, node);3767return true;3768}3769else if (comp->getSymRefTab()->isNonHelper(node->getSymbolReference(), TR::SymbolReferenceTable::singlePrecisionSQRTSymbol))3770{3771resultReg = TR::TreeEvaluator::inlineSinglePrecisionSQRT(node, cg);3772return true;3773}3774else if (comp->getSymRefTab()->isNonHelper(node->getSymbolReference(), TR::SymbolReferenceTable::synchronizedFieldLoadSymbol))3775{3776ReduceSynchronizedFieldLoad::inlineSynchronizedFieldLoad(node, cg);3777return true;3778}37793780static const char * enableTRTRE = feGetEnv("TR_enableTRTRE");3781switch (methodSymbol->getRecognizedMethod())3782{3783case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z:3784// In Java9 this can be either the jdk.internal JNI method or the sun.misc Java wrapper.3785// In Java8 it will be sun.misc which will contain the JNI directly.3786// We only want to inline the JNI methods, so add an explicit test for isNative().3787if (!methodSymbol->isNative())3788break;37893790if ((!TR::Compiler->om.canGenerateArraylets() || node->isUnsafeGetPutCASCallOnNonArray()) && node->isSafeForCGToFastPathUnsafeCall())3791{3792resultReg = TR::TreeEvaluator::VMinlineCompareAndSwap(node, cg, TR::InstOpCode::CS, IS_NOT_OBJ);3793return true;3794}37953796case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z:3797// As above, we only want to inline the JNI methods, so add an explicit test for isNative()3798if (!methodSymbol->isNative())3799break;38003801if (comp->target().is64Bit() && (!TR::Compiler->om.canGenerateArraylets() || node->isUnsafeGetPutCASCallOnNonArray()) && node->isSafeForCGToFastPathUnsafeCall())3802{3803resultReg = TR::TreeEvaluator::VMinlineCompareAndSwap(node, cg, TR::InstOpCode::CSG, IS_NOT_OBJ);3804return true;3805}3806// Too risky to do Long-31bit version now.3807break;38083809case TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z:3810// As above, we only want to inline the JNI methods, so add an explicit test for isNative()3811if (!methodSymbol->isNative())3812break;38133814if ((!TR::Compiler->om.canGenerateArraylets() || node->isUnsafeGetPutCASCallOnNonArray()) && node->isSafeForCGToFastPathUnsafeCall())3815{3816resultReg = TR::TreeEvaluator::VMinlineCompareAndSwap(node, cg, (comp->useCompressedPointers() ? TR::InstOpCode::CS : TR::InstOpCode::getCmpAndSwapOpCode()), IS_OBJ);3817return true;3818}3819break;38203821case TR::java_util_concurrent_atomic_Fences_reachabilityFence:3822case TR::java_util_concurrent_atomic_Fences_orderAccesses:3823case TR::java_util_concurrent_atomic_Fences_orderReads:3824case TR::java_util_concurrent_atomic_Fences_orderWrites:3825cg->decReferenceCount(node->getChild(0));3826break;38273828case TR::java_util_concurrent_atomic_AtomicBoolean_getAndSet:3829case TR::java_util_concurrent_atomic_AtomicInteger_getAndAdd:3830case TR::java_util_concurrent_atomic_AtomicInteger_getAndIncrement:3831case TR::java_util_concurrent_atomic_AtomicInteger_getAndDecrement:3832case TR::java_util_concurrent_atomic_AtomicInteger_getAndSet:3833case TR::java_util_concurrent_atomic_AtomicInteger_addAndGet:3834case TR::java_util_concurrent_atomic_AtomicInteger_incrementAndGet:3835case TR::java_util_concurrent_atomic_AtomicInteger_decrementAndGet:3836resultReg = TR::TreeEvaluator::inlineAtomicOps(node, cg, 4, methodSymbol);3837return true;3838break;38393840case TR::java_util_concurrent_atomic_AtomicIntegerArray_getAndAdd:3841case TR::java_util_concurrent_atomic_AtomicIntegerArray_getAndIncrement:3842case TR::java_util_concurrent_atomic_AtomicIntegerArray_getAndDecrement:3843case TR::java_util_concurrent_atomic_AtomicIntegerArray_getAndSet:3844case TR::java_util_concurrent_atomic_AtomicIntegerArray_addAndGet:3845case TR::java_util_concurrent_atomic_AtomicIntegerArray_incrementAndGet:3846case TR::java_util_concurrent_atomic_AtomicIntegerArray_decrementAndGet:3847resultReg = TR::TreeEvaluator::inlineAtomicOps(node, cg, 4, methodSymbol, true);3848return true;3849break;38503851case TR::java_util_concurrent_atomic_AtomicLong_addAndGet:3852case TR::java_util_concurrent_atomic_AtomicLong_getAndAdd:3853case TR::java_util_concurrent_atomic_AtomicLong_incrementAndGet:3854case TR::java_util_concurrent_atomic_AtomicLong_getAndIncrement:3855case TR::java_util_concurrent_atomic_AtomicLong_decrementAndGet:3856case TR::java_util_concurrent_atomic_AtomicLong_getAndDecrement:3857if (cg->checkFieldAlignmentForAtomicLong() && comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z196))3858{3859// TODO: I'm not sure we need the z196 restriction here given that the function already checks for z196 and3860// has a compare and swap fallback path3861resultReg = TR::TreeEvaluator::inlineAtomicOps(node, cg, 8, methodSymbol);3862return true;3863}3864break;38653866case TR::java_util_concurrent_atomic_AtomicLongArray_addAndGet:3867case TR::java_util_concurrent_atomic_AtomicLongArray_getAndAdd:3868case TR::java_util_concurrent_atomic_AtomicLongArray_incrementAndGet:3869case TR::java_util_concurrent_atomic_AtomicLongArray_getAndIncrement:3870case TR::java_util_concurrent_atomic_AtomicLongArray_decrementAndGet:3871case TR::java_util_concurrent_atomic_AtomicLongArray_getAndDecrement:3872if (cg->checkFieldAlignmentForAtomicLong() && comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z196))3873{3874// TODO: I'm not sure we need the z196 restriction here given that the function already checks for z196 and3875// has a compare and swap fallback path3876resultReg = TR::TreeEvaluator::inlineAtomicOps(node, cg, 8, methodSymbol);3877return true;3878}3879break;38803881case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_incrementAndGet:3882case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_decrementAndGet:3883case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_addAndGet:3884case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_getAndIncrement:3885case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_getAndDecrement:3886case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_getAndAdd:3887if (cg->getSupportsAtomicLoadAndAdd())3888{3889resultReg = TR::TreeEvaluator::inlineAtomicFieldUpdater(node, cg, methodSymbol);3890return true;3891}3892break;38933894case TR::java_nio_Bits_keepAlive:3895case TR::java_lang_ref_Reference_reachabilityFence:3896resultReg = TR::TreeEvaluator::inlineKeepAlive(node, cg);3897return true;38983899case TR::java_util_concurrent_ConcurrentLinkedQueue_tmOffer:3900if (cg->getSupportsInlineConcurrentLinkedQueue())3901{3902resultReg = TR::TreeEvaluator::inlineConcurrentLinkedQueueTMOffer(node, cg);3903return true;3904}3905break;39063907case TR::java_util_concurrent_ConcurrentLinkedQueue_tmPoll:3908if (cg->getSupportsInlineConcurrentLinkedQueue())3909{3910resultReg = TR::TreeEvaluator::inlineConcurrentLinkedQueueTMPoll(node, cg);3911return true;3912}3913break;3914// HashCode routine for Compressed and Decompressed String Shares lot of code so combining them.3915case TR::java_lang_String_hashCodeImplDecompressed:3916if (cg->getSupportsInlineStringHashCode())3917{3918return resultReg = TR::TreeEvaluator::inlineStringHashCode(node, cg, false);3919}3920break;39213922case TR::java_lang_String_hashCodeImplCompressed:3923if (cg->getSupportsInlineStringHashCode())3924{3925return resultReg = TR::TreeEvaluator::inlineStringHashCode(node, cg, true);3926}3927break;39283929case TR::java_lang_StringLatin1_inflate:3930if (cg->getSupportsInlineStringLatin1Inflate())3931{3932resultReg = TR::TreeEvaluator::inlineStringLatin1Inflate(node, cg);3933return resultReg != NULL;3934}3935break;3936case TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Big:3937return resultReg = comp->getOption(TR_DisableUTF16BEEncoder) ? TR::TreeEvaluator::inlineUTF16BEEncodeSIMD(node, cg)3938: TR::TreeEvaluator::inlineUTF16BEEncode (node, cg);3939break;3940case TR::java_lang_Integer_stringSize:3941case TR::java_lang_Long_stringSize:3942if (cg->getSupportsIntegerStringSize())3943{3944resultReg = TR::TreeEvaluator::inlineIntegerStringSize(node, cg);3945return resultReg != NULL;3946}3947break;3948case TR::java_lang_Integer_getChars:3949case TR::java_lang_Long_getChars:3950if (cg->getSupportsIntegerToChars())3951{3952resultReg = TR::TreeEvaluator::inlineIntegerToCharsForLatin1Strings(node, cg);3953return resultReg != NULL;3954}3955break;3956case TR::java_lang_StringUTF16_getChars_Integer:3957case TR::java_lang_StringUTF16_getChars_Long:3958case TR::java_lang_Integer_getChars_charBuffer:3959case TR::java_lang_Long_getChars_charBuffer:3960if (cg->getSupportsIntegerToChars())3961{3962resultReg = TR::TreeEvaluator::inlineIntegerToCharsForUTF16Strings(node, cg);3963return resultReg != NULL;3964}3965break;39663967default:3968break;39693970}39713972switch (methodSymbol->getRecognizedMethod())3973{3974case TR::java_lang_Integer_highestOneBit:3975resultReg = TR::TreeEvaluator::inlineHighestOneBit(node, cg, false);3976return true;3977case TR::java_lang_Integer_numberOfLeadingZeros:3978resultReg = TR::TreeEvaluator::inlineNumberOfLeadingZeros(node, cg, false);3979return true;3980case TR::java_lang_Integer_numberOfTrailingZeros:3981resultReg = TR::TreeEvaluator::inlineNumberOfTrailingZeros(node, cg, 32);3982return true;3983case TR::java_lang_Long_highestOneBit:3984resultReg = TR::TreeEvaluator::inlineHighestOneBit(node, cg, true);3985return true;3986case TR::java_lang_Long_numberOfLeadingZeros:3987resultReg = TR::TreeEvaluator::inlineNumberOfLeadingZeros(node, cg, true);3988return true;3989case TR::java_lang_Long_numberOfTrailingZeros:3990resultReg = TR::TreeEvaluator::inlineNumberOfTrailingZeros(node, cg, 64);3991return true;3992default:3993break;3994}39953996#ifdef J9VM_OPT_JAVA_CRYPTO_ACCELERATION3997if (self()->inlineCryptoMethod(node, resultReg))3998{3999return true;4000}4001#endif40024003if (cg->getSupportsInlineStringCaseConversion())4004{4005switch (methodSymbol->getRecognizedMethod())4006{4007case TR::com_ibm_jit_JITHelpers_toUpperIntrinsicUTF16:4008resultReg = TR::TreeEvaluator::toUpperIntrinsic(node, cg, false);4009return true;4010case TR::com_ibm_jit_JITHelpers_toUpperIntrinsicLatin1:4011resultReg = TR::TreeEvaluator::toUpperIntrinsic(node, cg, true);4012return true;4013case TR::com_ibm_jit_JITHelpers_toLowerIntrinsicUTF16:4014resultReg = TR::TreeEvaluator::toLowerIntrinsic(node, cg, false);4015return true;4016case TR::com_ibm_jit_JITHelpers_toLowerIntrinsicLatin1:4017resultReg = TR::TreeEvaluator::toLowerIntrinsic(node, cg, true);4018return true;4019default:4020break;4021}4022}40234024if (cg->getSupportsInlineStringIndexOf())4025{4026switch (methodSymbol->getRecognizedMethod())4027{4028case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfLatin1:4029resultReg = TR::TreeEvaluator::inlineIntrinsicIndexOf(node, cg, true);4030return true;4031case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfUTF16:4032resultReg = TR::TreeEvaluator::inlineIntrinsicIndexOf(node, cg, false);4033return true;4034case TR::java_lang_StringLatin1_indexOf:4035case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfStringLatin1:4036resultReg = TR::TreeEvaluator::inlineVectorizedStringIndexOf(node, cg, false);4037return resultReg != NULL;4038case TR::java_lang_StringUTF16_indexOf:4039case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfStringUTF16:4040resultReg = TR::TreeEvaluator::inlineVectorizedStringIndexOf(node, cg, true);4041return resultReg != NULL;4042default:4043break;4044}4045}40464047if (!comp->getOption(TR_DisableSIMDDoubleMaxMin) && cg->getSupportsVectorRegisters())4048{4049switch (methodSymbol->getRecognizedMethod())4050{4051case TR::java_lang_Math_max_D:4052resultReg = TR::TreeEvaluator::inlineDoubleMax(node, cg);4053return true;4054case TR::java_lang_Math_min_D:4055resultReg = TR::TreeEvaluator::inlineDoubleMin(node, cg);4056return true;4057default:4058break;4059}4060}4061if (cg->getSupportsVectorRegisters())4062{4063switch (methodSymbol->getRecognizedMethod())4064{4065case TR::java_lang_Math_fma_D:4066case TR::java_lang_StrictMath_fma_D:4067resultReg = TR::TreeEvaluator::inlineMathFma(node, cg);4068return true;40694070case TR::java_lang_Math_fma_F:4071case TR::java_lang_StrictMath_fma_F:4072if (comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_FACILITY_ENHANCEMENT_1))4073{4074resultReg = TR::TreeEvaluator::inlineMathFma(node, cg);4075return true;4076}4077break;4078default:4079break;4080}4081}40824083TR::MethodSymbol * symbol = node->getSymbol()->castToMethodSymbol();4084if ((symbol->isVMInternalNative() || symbol->isJITInternalNative()) || isKnownMethod(methodSymbol))4085{4086if (TR::TreeEvaluator::VMinlineCallEvaluator(node, false, cg))4087{4088resultReg = node->getRegister();4089return true;4090}4091}40924093// No method specialization was done.4094//4095resultReg = NULL;4096return false;4097}40984099/**4100* Check if arithmetic operations with a constant requires entry in the literal pool.4101*/4102bool4103J9::Z::CodeGenerator::arithmeticNeedsLiteralFromPool(TR::Node *node)4104{4105int64_t value = getIntegralValue(node);4106return value > GE_MAX_IMMEDIATE_VAL || value < GE_MIN_IMMEDIATE_VAL;4107}410841094110bool4111J9::Z::CodeGenerator::supportsTrapsInTMRegion()4112{4113return self()->comp()->target().isZOS();4114}4115411641174118