Path: blob/master/runtime/compiler/optimizer/DataAccessAccelerator.cpp
6000 views
/*******************************************************************************1* Copyright (c) 2000, 2022 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/2122#include "optimizer/DataAccessAccelerator.hpp"2324#include <algorithm>25#include <limits.h>26#include <math.h>27#include <stddef.h>28#include <stdint.h>29#include <stdio.h>30#include <stdlib.h>31#include <string.h>32#include "codegen/CodeGenerator.hpp"33#include "env/FrontEnd.hpp"34#include "codegen/RecognizedMethods.hpp"35#include "codegen/RegisterConstants.hpp"36#include "compile/Compilation.hpp"37#include "compile/Method.hpp"38#include "compile/ResolvedMethod.hpp"39#include "compile/SymbolReferenceTable.hpp"40#include "control/Options.hpp"41#include "control/Options_inlines.hpp"42#include "control/Recompilation.hpp"43#include "control/RecompilationInfo.hpp"44#include "env/CompilerEnv.hpp"45#include "env/StackMemoryRegion.hpp"46#include "env/TRMemory.hpp"47#include "env/jittypes.h"48#include "env/VMJ9.h"49#include "il/Block.hpp"50#include "il/DataTypes.hpp"51#include "il/ILOpCodes.hpp"52#include "il/ILOps.hpp"53#include "il/MethodSymbol.hpp"54#include "il/Node.hpp"55#include "il/NodePool.hpp"56#include "il/Node_inlines.hpp"57#include "il/ParameterSymbol.hpp"58#include "il/ResolvedMethodSymbol.hpp"59#include "il/StaticSymbol.hpp"60#include "il/Symbol.hpp"61#include "il/SymbolReference.hpp"62#include "il/TreeTop.hpp"63#include "il/TreeTop_inlines.hpp"64#include "infra/Assert.hpp"65#include "infra/Cfg.hpp"66#include "infra/Stack.hpp"67#include "infra/TRCfgEdge.hpp"68#include "infra/TRCfgNode.hpp"69#include "optimizer/Optimization.hpp"70#include "optimizer/Optimization_inlines.hpp"71#include "optimizer/OptimizationManager.hpp"72#include "optimizer/Optimizations.hpp"73#include "optimizer/Optimizer.hpp"74#include "optimizer/OSRGuardRemoval.hpp"75#include "optimizer/Structure.hpp"76#include "optimizer/TransformUtil.hpp"77#include "ras/Debug.hpp"7879#define IS_VARIABLE_PD2I(callNode) (!isChildConst(callNode, 2) || !isChildConst(callNode, 3))8081TR_DataAccessAccelerator::TR_DataAccessAccelerator(TR::OptimizationManager* manager)82:83TR::Optimization(manager)84{85// Void86}8788int32_t TR_DataAccessAccelerator::perform()89{90int32_t result = 0;9192if (!comp()->getOption(TR_DisableIntrinsics) &&93!comp()->getOption(TR_MimicInterpreterFrameShape) &&9495// We cannot handle arraylets because hardware intrinsics act on contiguous memory96!comp()->generateArraylets()&& !TR::Compiler->om.useHybridArraylets())97{9899// A vector to keep track of variable packed decimal calls100TR::StackMemoryRegion stackMemoryRegion(*(comp()->trMemory()));101TreeTopContainer variableCallTreeTops(stackMemoryRegion);102103for (TR::AllBlockIterator iter(optimizer()->getMethodSymbol()->getFlowGraph(), comp());104iter.currentBlock() != NULL;105++iter)106{107TR::Block* block = iter.currentBlock();108109result += performOnBlock(block, &variableCallTreeTops);110}111112result += processVariableCalls(&variableCallTreeTops);113}114115if (result != 0)116{117optimizer()->setUseDefInfo(NULL);118optimizer()->setValueNumberInfo(NULL);119optimizer()->setAliasSetsAreValid(false);120}121122return result;123}124125int32_t126TR_DataAccessAccelerator::processVariableCalls(TreeTopContainer* variableCallTreeTops)127{128int32_t result = 0;129130// Process variable precision calls after iterating through all the nodes131for(int i = 0; i < variableCallTreeTops->size(); ++i)132{133TR::TreeTop* treeTop = variableCallTreeTops->at(i);134TR::Node* callNode = treeTop->getNode()->getChild(0);135TR::ResolvedMethodSymbol* callSymbol = callNode->getSymbol()->getResolvedMethodSymbol();136if (callSymbol != NULL)137{138if (!comp()->getOption(TR_DisablePackedDecimalIntrinsics))139{140switch (callSymbol->getRecognizedMethod())141{142// DAA Packed Decimal <-> Integer143case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_:144{145if (generatePD2IVariableParameter(treeTop, callNode, true, false))146{147++result;148}149continue;150}151case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_ByteBuffer_:152{153if (generatePD2IVariableParameter(treeTop, callNode, true, true))154{155++result;156}157continue;158}159160// DAA Packed Decimal <-> Long161case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToLong_:162{163if (generatePD2IVariableParameter(treeTop, callNode, false, false))164{165++result;166}167continue;168}169case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToLong_ByteBuffer_:170{171if (generatePD2IVariableParameter(treeTop, callNode, false, true))172{173++result;174}175continue;176}177default:178break;179}180}181}182}183184return result;185}186187const char *188TR_DataAccessAccelerator::optDetailString() const throw()189{190return "O^O DATA ACCESS ACCELERATOR: ";191}192193int32_t TR_DataAccessAccelerator::performOnBlock(TR::Block* block, TreeTopContainer* variableCallTreeTops)194{195int32_t blockResult = 0;196bool requestOSRGuardRemoval = false;197198for (TR::TreeTopIterator iter(block->getEntry(), comp()); iter != block->getExit(); ++iter)199{200TR::Node* currentNode = iter.currentNode();201if (currentNode->getOpCodeValue() == TR::treetop)202{203currentNode = currentNode->getChild(0);204}205206if (currentNode != NULL && currentNode->getOpCode().isCall())207{208int32_t result = 0;209bool matched = false;210211TR::TreeTop* treeTop = iter.currentTree();212213TR::Node* callNode = currentNode;214215TR::Node* returnNode = NULL;216217TR::ResolvedMethodSymbol* callSymbol = callNode->getSymbol()->getResolvedMethodSymbol();218219if (callSymbol != NULL)220{221if (!comp()->getOption(TR_DisableMarshallingIntrinsics))222{223switch (callSymbol->getRecognizedMethod())224{225// ByteArray Marshalling methods226case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeShort_:227returnNode = insertIntegerSetIntrinsic(treeTop, callNode, 2, 2);228break;229case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeShortLength_:230returnNode = insertIntegerSetIntrinsic(treeTop, callNode, 2, 0);231break;232case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeInt_:233returnNode = insertIntegerSetIntrinsic(treeTop, callNode, 4, 4);234break;235case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeIntLength_:236returnNode = insertIntegerSetIntrinsic(treeTop, callNode, 4, 0);237break;238case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeLong_:239returnNode = insertIntegerSetIntrinsic(treeTop, callNode, 8, 8);240break;241case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeLongLength_:242returnNode = insertIntegerSetIntrinsic(treeTop, callNode, 8, 0);243break;244245case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeFloat_:246returnNode = insertDecimalSetIntrinsic(treeTop, callNode, 4, 4);247break;248case TR::com_ibm_dataaccess_ByteArrayMarshaller_writeDouble_:249returnNode = insertDecimalSetIntrinsic(treeTop, callNode, 8, 8);250break;251252// ByteArray Unmarshalling methods253case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readShort_:254returnNode = insertIntegerGetIntrinsic(treeTop, callNode, 2, 2);255break;256case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readShortLength_:257returnNode = insertIntegerGetIntrinsic(treeTop, callNode, 0, 2);258break;259case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readInt_:260returnNode = insertIntegerGetIntrinsic(treeTop, callNode, 4, 4);261break;262case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readIntLength_:263returnNode = insertIntegerGetIntrinsic(treeTop, callNode, 0, 4);264break;265case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readLong_:266returnNode = insertIntegerGetIntrinsic(treeTop, callNode, 8, 8);267break;268case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readLongLength_:269returnNode = insertIntegerGetIntrinsic(treeTop, callNode, 0, 8);270break;271272case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readFloat_:273returnNode = insertDecimalGetIntrinsic(treeTop, callNode, 4, 4);274break;275case TR::com_ibm_dataaccess_ByteArrayUnmarshaller_readDouble_:276returnNode = insertDecimalGetIntrinsic(treeTop, callNode, 8, 8);277break;278279default:280break;281}282283if (returnNode)284{285result = 1;286matched = true;287288printInliningStatus(true, callNode);289for (int i=callNode->getNumChildren();i>0;i--)290callNode->getChild(i-1)->recursivelyDecReferenceCount();291callNode->setNumChildren(returnNode->getNumChildren());292callNode->setSymbolReference(NULL);293TR::Node::recreate(callNode, returnNode->getOpCodeValue());294if (callNode->getOpCode().hasSymbolReference())295callNode->setSymbolReference(returnNode->getSymbolReference());296for (int i=callNode->getNumChildren();i>0;i--)297callNode->setChild(i-1, returnNode->getChild(i-1));298}299}300301bool isZLinux = comp()->target().cpu.isZ() && comp()->target().isLinux();302bool isZOS = comp()->target().isZOS();303304if (!matched && (isZOS || isZLinux) &&305!comp()->getOption(TR_DisablePackedDecimalIntrinsics))306{307matched = true;308switch (callSymbol->getRecognizedMethod())309{310// DAA Packed Decimal Check311case TR::com_ibm_dataaccess_PackedDecimal_checkPackedDecimal_:312if (inlineCheckPackedDecimal(treeTop, callNode))313{314++result;315}316break;317318// DAA Packed Decimal <-> Unicode Decimal319case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToUnicodeDecimal_:320if (generatePD2UD(treeTop, callNode, true))321{322++result;323}324break;325case TR::com_ibm_dataaccess_DecimalData_convertUnicodeDecimalToPackedDecimal_:326if (generateUD2PD(treeTop, callNode, true))327{328++result;329}330break;331332// DAA Packed Decimal <-> External Decimal333case TR::com_ibm_dataaccess_DecimalData_convertExternalDecimalToPackedDecimal_:334if (generateUD2PD(treeTop, callNode, false))335{336++result;337}338break;339case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToExternalDecimal_:340if (generatePD2UD(treeTop, callNode, false))341{342++result;343}344break;345346default:347matched = false;348break;349}350}351352if (!matched && (isZOS || isZLinux) &&353!block->isCold() &&354!comp()->getOption(TR_DisablePackedDecimalIntrinsics))355{356matched = true;357comp()->cg()->setUpStackSizeForCallNode(callNode);358switch (callSymbol->getRecognizedMethod())359{360// DAA Packed Decimal arithmetic methods361case TR::com_ibm_dataaccess_PackedDecimal_addPackedDecimal_:362if (genArithmeticIntrinsic(treeTop, callNode, TR::pdadd))363{364++result;365}366break;367case TR::com_ibm_dataaccess_PackedDecimal_subtractPackedDecimal_:368if (genArithmeticIntrinsic(treeTop, callNode, TR::pdsub))369{370++result;371}372break;373case TR::com_ibm_dataaccess_PackedDecimal_multiplyPackedDecimal_:374if (genArithmeticIntrinsic(treeTop, callNode, TR::pdmul))375{376++result;377}378break;379case TR::com_ibm_dataaccess_PackedDecimal_dividePackedDecimal_:380if (genArithmeticIntrinsic(treeTop, callNode, TR::pddiv))381{382++result;383}384break;385case TR::com_ibm_dataaccess_PackedDecimal_remainderPackedDecimal_:386if (genArithmeticIntrinsic(treeTop, callNode, TR::pdrem))387{388++result;389}390break;391392// DAA Packed Decimal shift methods393case TR::com_ibm_dataaccess_PackedDecimal_shiftLeftPackedDecimal_:394if (genShiftLeftIntrinsic(treeTop, callNode))395{396++result;397}398break;399case TR::com_ibm_dataaccess_PackedDecimal_shiftRightPackedDecimal_:400if (genShiftRightIntrinsic(treeTop, callNode))401{402++result;403}404break;405406// DAA Packed Decimal comparison methods407case TR::com_ibm_dataaccess_PackedDecimal_lessThanPackedDecimal_:408if (genComparisionIntrinsic(treeTop, callNode, TR::pdcmplt))409{410++result;411}412break;413case TR::com_ibm_dataaccess_PackedDecimal_lessThanOrEqualsPackedDecimal_:414if (genComparisionIntrinsic(treeTop, callNode, TR::pdcmple))415{416++result;417}418break;419case TR::com_ibm_dataaccess_PackedDecimal_greaterThanPackedDecimal_:420if (genComparisionIntrinsic(treeTop, callNode, TR::pdcmpgt))421{422++result;423}424break;425case TR::com_ibm_dataaccess_PackedDecimal_greaterThanOrEqualsPackedDecimal_:426if (genComparisionIntrinsic(treeTop, callNode, TR::pdcmpge))427{428++result;429}430break;431case TR::com_ibm_dataaccess_PackedDecimal_equalsPackedDecimal_:432if (genComparisionIntrinsic(treeTop, callNode, TR::pdcmpeq))433{434++result;435}436break;437438// DAA Packed Decimal <-> Integer439case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_:440{441if (IS_VARIABLE_PD2I(callNode))442{443variableCallTreeTops->push_back(treeTop);444}445else446{447if (generatePD2I(treeTop, callNode, true, false))448{449++result;450}451}452break;453}454case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_ByteBuffer_:455{456if (IS_VARIABLE_PD2I(callNode))457{458variableCallTreeTops->push_back(treeTop);459}460else461{462if (generatePD2I(treeTop, callNode, true, true))463{464++result;465}466}467break;468}469case TR::com_ibm_dataaccess_DecimalData_convertIntegerToPackedDecimal_:470if (generateI2PD(treeTop, callNode, true, false))471{472++result;473}474break;475case TR::com_ibm_dataaccess_DecimalData_convertIntegerToPackedDecimal_ByteBuffer_:476if (generateI2PD(treeTop, callNode, true, true))477{478++result;479}480break;481482// DAA Packed Decimal <-> Long483case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToLong_:484{485if (IS_VARIABLE_PD2I(callNode))486{487variableCallTreeTops->push_back(treeTop);488}489else490{491if (generatePD2I(treeTop, callNode, false, false))492{493++result;494}495}496break;497}498case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToLong_ByteBuffer_:499{500if (IS_VARIABLE_PD2I(callNode))501{502variableCallTreeTops->push_back(treeTop);503}504else505{506if (generatePD2I(treeTop, callNode, false, true))507{508++result;509}510}511break;512}513case TR::com_ibm_dataaccess_DecimalData_convertLongToPackedDecimal_:514if (generateI2PD(treeTop, callNode, false, false))515{516++result;517}518break;519case TR::com_ibm_dataaccess_DecimalData_convertLongToPackedDecimal_ByteBuffer_:520if (generateI2PD(treeTop, callNode, false, true))521{522++result;523}524break;525526default:527matched = false;528break;529}530}531532if (matched && result533&& !requestOSRGuardRemoval534&& TR_OSRGuardRemoval::findMatchingOSRGuard(comp(), treeTop))535requestOSRGuardRemoval = true;536537blockResult += result;538}539}540}541542// If yields to the VM have been removed, it is possible to remove OSR guards as well543//544if (requestOSRGuardRemoval)545requestOpt(OMR::osrGuardRemoval);546547return blockResult;548}549550bool TR_DataAccessAccelerator::isChildConst(TR::Node* node, int32_t child)551{552return node->getChild(child)->getOpCode().isLoadConst();553}554555TR::Node* TR_DataAccessAccelerator::insertDecimalGetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes)556{557if (targetNumBytes != 4 && targetNumBytes != 8)558{559printInliningStatus (false, callNode, "targetNumBytes is invalid. Valid targetNumBytes values are 4 or 8.");560return NULL;561}562563if (sourceNumBytes != 4 && sourceNumBytes != 8)564{565printInliningStatus (false, callNode, "sourceNumBytes is invalid. Valid sourceNumBytes values are 4 or 8.");566return NULL;567}568569if (sourceNumBytes > targetNumBytes)570{571printInliningStatus (false, callNode, "sourceNumBytes is out of bounds.");572return NULL;573}574575TR::Node* byteArrayNode = callNode->getChild(0);576TR::Node* offsetNode = callNode->getChild(1);577TR::Node* bigEndianNode = callNode->getChild(2);578579if (!bigEndianNode->getOpCode().isLoadConst())580{581printInliningStatus (false, callNode, "bigEndianNode is not constant.");582return NULL;583}584585// Determines whether a TR::ByteSwap needs to be inserted before the store to the byteArray586bool requiresByteSwap = comp()->target().cpu.isBigEndian() != static_cast <bool> (bigEndianNode->getInt());587588if (requiresByteSwap && !comp()->cg()->supportsByteswap())589{590printInliningStatus (false, callNode, "Unmarshalling is not supported because ByteSwap IL evaluators are not implemented.");591return NULL;592}593594if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: insertDecimalGetIntrinsic on callNode %p\n", callNode))595{596insertByteArrayNULLCHK(callTreeTop, callNode, byteArrayNode);597598insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, 0);599insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, sourceNumBytes - 1);600601TR::DataType sourceDataType = TR::NoType;602TR::DataType targetDataType = TR::NoType;603604// Default case is impossible due to previous checks605switch (sourceNumBytes)606{607case 4: sourceDataType = TR::Float; break;608case 8: sourceDataType = TR::Double; break;609}610611TR::ILOpCodes op = TR::BadILOp;612613// Default case is impossible due to previous checks614switch (sourceNumBytes)615{616case 4: op = requiresByteSwap ? TR::iloadi : TR::floadi; break;617case 8: op = requiresByteSwap ? TR::lloadi : TR::dloadi; break;618}619620// Default case is impossible due to previous checks621switch (targetNumBytes)622{623case 4: targetDataType = TR::Float; break;624case 8: targetDataType = TR::Double; break;625}626627TR::Node* valueNode = TR::Node::createWithSymRef(op, 1, 1, createByteArrayElementAddress(callTreeTop, callNode, byteArrayNode, offsetNode), comp()->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0));628629if (requiresByteSwap)630{631// Default case is impossible due to previous checks632switch (sourceNumBytes)633{634case 4: valueNode = TR::Node::create(TR::ibits2f, 1, TR::Node::create(TR::ibyteswap, 1, valueNode)); break;635case 8: valueNode = TR::Node::create(TR::lbits2d, 1, TR::Node::create(TR::lbyteswap, 1, valueNode)); break;636}637}638639if (sourceNumBytes != targetNumBytes)640{641valueNode = TR::Node::create(TR::ILOpCode::getProperConversion(sourceDataType, targetDataType, false), 1, valueNode);642}643644return valueNode;645}646647return NULL;648}649650TR::Node* TR_DataAccessAccelerator::insertDecimalSetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes)651{652if (sourceNumBytes != 4 && sourceNumBytes != 8)653{654printInliningStatus (false, callNode, "sourceNumBytes is invalid. Valid sourceNumBytes values are 4 or 8.");655return NULL;656}657658if (targetNumBytes != 4 && targetNumBytes != 8)659{660printInliningStatus (false, callNode, "targetNumBytes is invalid. Valid targetNumBytes values are 4 or 8.");661return NULL;662}663664if (targetNumBytes > sourceNumBytes)665{666printInliningStatus (false, callNode, "targetNumBytes is out of bounds.");667return NULL;668}669670TR::Node* valueNode = callNode->getChild(0);671TR::Node* byteArrayNode = callNode->getChild(1);672TR::Node* offsetNode = callNode->getChild(2);673TR::Node* bigEndianNode = callNode->getChild(3);674675if (!bigEndianNode->getOpCode().isLoadConst())676{677printInliningStatus (false, callNode, "bigEndianNode is not constant.");678return NULL;679}680681// Determines whether a TR::ByteSwap needs to be inserted before the store to the byteArray682bool requiresByteSwap = comp()->target().cpu.isBigEndian() != static_cast <bool> (bigEndianNode->getInt());683684if (requiresByteSwap && !comp()->cg()->supportsByteswap())685{686printInliningStatus (false, callNode, "Unmarshalling is not supported because ByteSwap IL evaluators are not implemented.");687return NULL;688}689690if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: insertDecimalSetIntrinsic on callNode %p\n", callNode))691{692insertByteArrayNULLCHK(callTreeTop, callNode, byteArrayNode);693694insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, 0);695insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, targetNumBytes - 1);696697TR::DataType sourceDataType = TR::NoType;698TR::DataType targetDataType = TR::NoType;699700// Default case is impossible due to previous checks701switch (sourceNumBytes)702{703case 4: sourceDataType = TR::Float; break;704case 8: sourceDataType = TR::Double; break;705}706707// Default case is impossible due to previous checks708switch (targetNumBytes)709{710case 4: targetDataType = TR::Float; break;711case 8: targetDataType = TR::Double; break;712}713714TR::ILOpCodes op = TR::BadILOp;715716// Default case is impossible due to previous checks717switch (targetNumBytes)718{719case 4: op = requiresByteSwap ? TR::istorei : TR::fstorei; break;720case 8: op = requiresByteSwap ? TR::lstorei : TR::dstorei; break;721}722723// Create the proper conversion if the source and target sizes are different724if (sourceNumBytes != targetNumBytes)725{726valueNode = TR::Node::create(TR::ILOpCode::getProperConversion(sourceDataType, targetDataType, false), 1, valueNode);727}728729if (requiresByteSwap)730{731// Default case is impossible due to previous checks732switch (targetNumBytes)733{734case 4: valueNode = TR::Node::create(TR::ibyteswap, 1, TR::Node::create(TR::fbits2i, 1, valueNode)); break;735case 8: valueNode = TR::Node::create(TR::lbyteswap, 1, TR::Node::create(TR::dbits2l, 1, valueNode)); break;736}737}738739return TR::Node::createWithSymRef(op, 2, 2, createByteArrayElementAddress(callTreeTop, callNode, byteArrayNode, offsetNode), valueNode, comp()->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0));740}741742return NULL;743}744745bool TR_DataAccessAccelerator::inlineCheckPackedDecimal(TR::TreeTop* callTreeTop, TR::Node* callNode)746{747TR::Node* byteArrayNode = callNode->getChild(0);748TR::Node* offsetNode = callNode->getChild(1);749TR::Node* precisionNode = callNode->getChild(2);750TR::Node* ignoreHighNibbleForEvenPrecisionNode = callNode->getChild(3);751TR::Node* canOverwriteHighNibbleForEvenPrecisionNode = callNode->getChild(4);752int32_t precision = precisionNode->getInt();753char* failMsg = NULL;754755if (!precisionNode->getOpCode().isLoadConst())756failMsg = "precisionNode is not constant.";757else if(precision < 1 || precision > 31)758failMsg = "precisionNode is out of bounds.";759else if (!ignoreHighNibbleForEvenPrecisionNode->getOpCode().isLoadConst())760failMsg = "ignoreHighNibbleForEvenPrecisionNode is not constant.";761else if (!canOverwriteHighNibbleForEvenPrecisionNode->getOpCode().isLoadConst())762failMsg = "canOverwriteHighNibbleForEvenPrecisionNode is not constant.";763764if (failMsg)765{766TR::DebugCounter::incStaticDebugCounter(comp(),767TR::DebugCounter::debugCounterName(comp(),768"DAA/rejected/chkPacked"));769770return printInliningStatus (false, callNode, failMsg);771}772773if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: inlineCheckPackedDecimal on callNode %p\n", callNode))774{775TR::DebugCounter::incStaticDebugCounter(comp(),776TR::DebugCounter::debugCounterName(comp(),777"DAA/inlined/chkPacked"));778779insertByteArrayNULLCHK(callTreeTop, callNode, byteArrayNode);780781int32_t precisionSizeInNumberOfBytes = TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, precision);782783insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, 0);784insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, precisionSizeInNumberOfBytes - 1);785786TR::SymbolReference* packedDecimalSymbolReference = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, NULL, precisionSizeInNumberOfBytes, fe());787788TR::Node* pdchkChild0Node = TR::Node::createWithSymRef(TR::pdloadi, 1, 1, constructAddressNode(callNode, byteArrayNode, offsetNode), packedDecimalSymbolReference);789790// The size argument passed to create an array shadow symbol reference is the size in number of bytes that this PackedDecimal represents.791// Unfortunately when a Node is constructed with this symbol reference we extract the size from the symbol reference and convert it to a792// precision via a helper function. Because this conversion is not injective we may not get back the original precision we calculated793// above. This is why we must explicitly set the precision on the Node after creation.794795pdchkChild0Node->setDecimalPrecision(precision);796797if (precision % 2 == 0)798{799const bool ignoreHighNibbleForEvenPrecision = static_cast <bool> (ignoreHighNibbleForEvenPrecisionNode->getInt());800const bool canOverwriteHighNibbleForEvenPrecision = static_cast <bool> (canOverwriteHighNibbleForEvenPrecisionNode->getInt());801802if (ignoreHighNibbleForEvenPrecision || canOverwriteHighNibbleForEvenPrecision)803{804// Increase the precision of the pdload by 1 to pretend that we have an extra digit, then create a new parent on top of the pdload805// which will truncate Packed Decimal by modifying its precision to the desired value. This has the effect of creating a new temporary806// Packed Decimal value which properly ignores the high nibble if the precision is even, and more over it has a value of 0 in the high nibble.807808pdchkChild0Node->setDecimalPrecision(precision + 1);809810pdchkChild0Node = TR::Node::create(TR::pdModifyPrecision, 1, pdchkChild0Node);811812pdchkChild0Node->setDecimalPrecision(precision);813814// If we are allowed to overwrite the high nibble if the precision is even then we need to store temporary Packed Decimal we just815// created back into the original byte array. We once again pretend that we have an extra digit when doing this store because we also want to816// store out the extra 0 digit which is guaranteed to be present due to the above computation.817818if (canOverwriteHighNibbleForEvenPrecision)819{820int32_t precisionSizeInNumberOfBytes = TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, precision + 1);821822TR::SymbolReference* packedDecimalSymbolReference = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, NULL, precisionSizeInNumberOfBytes, fe());823824//this node should be inserted after callNode825TR::Node * pdstoreNode = TR::Node::createWithSymRef(TR::pdstorei, 2, 2, constructAddressNode(callNode, byteArrayNode, offsetNode), pdchkChild0Node, packedDecimalSymbolReference);826827pdstoreNode->setDecimalPrecision(precision + 1);828829callTreeTop->insertAfter(TR::TreeTop::create(comp(), pdstoreNode));830}831}832}833834// We will be recreating the callNode so decrement the reference count of all it's children835for (auto i = 0; i < callNode->getNumChildren(); ++i)836{837callNode->getChild(i)->decReferenceCount();838}839840TR::Node::recreateWithoutProperties(callNode, TR::pdchk, 1, pdchkChild0Node);841842return true;843}844845return false;846}847848TR::Node* TR_DataAccessAccelerator::insertIntegerGetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes)849{850if (targetNumBytes != 1 && targetNumBytes != 2 && targetNumBytes != 4 && targetNumBytes != 8)851{852printInliningStatus (false, callNode, "targetNumBytes is invalid. Valid targetNumBytes values are 1, 2, 4, or 8.");853return NULL;854}855856TR::Node* byteArrayNode = callNode->getChild(0);857TR::Node* offsetNode = callNode->getChild(1);858TR::Node* bigEndianNode = callNode->getChild(2);859TR::Node* numBytesNode = NULL;860TR::Node* signExtendNode = NULL;861862if (!bigEndianNode->getOpCode().isLoadConst())863{864printInliningStatus (false, callNode, "bigEndianNode is not constant.");865return NULL;866}867868bool needUnsignedConversion = false;869870// This check indicates that the sourceNumBytes value is specified on the callNode, so we must extract it871if (sourceNumBytes == 0)872{873numBytesNode = callNode->getChild(3);874875if (!numBytesNode->getOpCode().isLoadConst())876{877printInliningStatus (false, callNode, "numBytesNode is not constant.");878return NULL;879}880881sourceNumBytes = numBytesNode->getInt();882883if (sourceNumBytes != 1 && sourceNumBytes != 2 && sourceNumBytes != 4 && sourceNumBytes != 8)884{885printInliningStatus (false, callNode, "sourceNumBytes is invalid. Valid targetNumBytes values are 1, 2, 4, or 8.");886return NULL;887}888889if (sourceNumBytes > targetNumBytes)890{891printInliningStatus (false, callNode, "sourceNumBytes is out of bounds.");892return NULL;893}894895signExtendNode = callNode->getChild(4);896897if (!signExtendNode->getOpCode().isLoadConst())898{899printInliningStatus (false, callNode, "signExtendNode is not constant.");900return NULL;901}902903needUnsignedConversion = sourceNumBytes < targetNumBytes && static_cast <bool> (signExtendNode->getInt() != 1);904}905else906{907sourceNumBytes = targetNumBytes;908}909910// Determines whether a TR::ByteSwap needs to be inserted before the store to the byteArray911bool requiresByteSwap = sourceNumBytes != 1 && comp()->target().cpu.isBigEndian() != static_cast <bool> (bigEndianNode->getInt());912913if (requiresByteSwap && !comp()->cg()->supportsByteswap())914{915printInliningStatus (false, callNode, "Unmarshalling is not supported because ByteSwap IL evaluators are not implemented.");916return NULL;917}918919if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: genSimpleGetBinary call: %p inlined.\n", callNode))920{921insertByteArrayNULLCHK(callTreeTop, callNode, byteArrayNode);922923insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, 0);924insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, sourceNumBytes - 1);925926TR::DataType sourceDataType = TR::NoType;927TR::DataType targetDataType = TR::NoType;928929// Default case is impossible due to previous checks930switch (sourceNumBytes)931{932case 1: sourceDataType = TR::Int8; break;933case 2: sourceDataType = TR::Int16; break;934case 4: sourceDataType = TR::Int32; break;935case 8: sourceDataType = TR::Int64; break;936}937938TR::ILOpCodes op = TR::BadILOp;939TR::ILOpCodes byteswapOp = TR::BadILOp;940941// Default case is impossible due to previous checks942switch (sourceNumBytes)943{944case 1: op = TR::bloadi; break;945case 2: op = TR::sloadi; byteswapOp = TR::sbyteswap; break;946case 4: op = TR::iloadi; byteswapOp = TR::ibyteswap; break;947case 8: op = TR::lloadi; byteswapOp = TR::lbyteswap; break;948}949950// Default case is impossible due to previous checks951switch (targetNumBytes)952{953case 1: targetDataType = TR::Int32; break;954case 2: targetDataType = TR::Int32; break;955case 4: targetDataType = TR::Int32; break;956case 8: targetDataType = TR::Int64; break;957}958959TR::Node* valueNode = TR::Node::createWithSymRef(op, 1, 1, createByteArrayElementAddress(callTreeTop, callNode, byteArrayNode, offsetNode), comp()->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0));960961if (requiresByteSwap)962{963valueNode = TR::Node::create(byteswapOp, 1, valueNode);964}965966if (sourceDataType != targetDataType)967{968valueNode = TR::Node::create(TR::ILOpCode::getProperConversion(sourceDataType, targetDataType, needUnsignedConversion), 1, valueNode);969}970971return valueNode;972}973974return NULL;975}976977TR::Node* TR_DataAccessAccelerator::insertIntegerSetIntrinsic(TR::TreeTop* callTreeTop, TR::Node* callNode, int32_t sourceNumBytes, int32_t targetNumBytes)978{979if (sourceNumBytes != 1 && sourceNumBytes != 2 && sourceNumBytes != 4 && sourceNumBytes != 8)980{981printInliningStatus (false, callNode, "sourceNumBytes is invalid. Valid sourceNumBytes values are 1, 2, 4, or 8.");982return NULL;983}984985TR::Node* valueNode = callNode->getChild(0);986TR::Node* byteArrayNode = callNode->getChild(1);987TR::Node* offsetNode = callNode->getChild(2);988TR::Node* bigEndianNode = callNode->getChild(3);989TR::Node* numBytesNode = NULL;990991if (!bigEndianNode->getOpCode().isLoadConst())992{993printInliningStatus (false, callNode, "bigEndianNode is not constant.");994return NULL;995}996997// This check indicates that the targetNumBytes value is specified on the callNode, so we must extract it998if (targetNumBytes == 0)999{1000numBytesNode = callNode->getChild(4);10011002if (!numBytesNode->getOpCode().isLoadConst())1003{1004printInliningStatus (false, callNode, "numBytesNode is not constant.");1005return NULL;1006}10071008targetNumBytes = numBytesNode->getInt();10091010if (targetNumBytes != 1 && targetNumBytes != 2 && targetNumBytes != 4 && targetNumBytes != 8)1011{1012printInliningStatus (false, callNode, "targetNumBytes is invalid. Valid targetNumBytes values are 1, 2, 4, or 8.");1013return NULL;1014}10151016if (targetNumBytes > sourceNumBytes)1017{1018printInliningStatus (false, callNode, "targetNumBytes is out of bounds.");1019return NULL;1020}1021}1022else1023{1024targetNumBytes = sourceNumBytes;1025}10261027// Determines whether a TR::ByteSwap needs to be inserted before the store to the byteArray1028bool requiresByteSwap = targetNumBytes != 1 && comp()->target().cpu.isBigEndian() != static_cast <bool> (bigEndianNode->getInt());10291030if (requiresByteSwap && !comp()->cg()->supportsByteswap())1031{1032printInliningStatus (false, callNode, "Marshalling is not supported because ByteSwap IL evaluators are not implemented.");1033return NULL;1034}10351036if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: genSimplePutBinary call: %p inlined.\n", callNode))1037{1038insertByteArrayNULLCHK(callTreeTop, callNode, byteArrayNode);10391040insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, 0);1041insertByteArrayBNDCHK(callTreeTop, callNode, byteArrayNode, offsetNode, targetNumBytes - 1);10421043TR::DataType sourceDataType = TR::NoType;1044TR::DataType targetDataType = TR::NoType;10451046// Default case is impossible due to previous checks1047switch (sourceNumBytes)1048{1049case 1: sourceDataType = TR::Int32; break;1050case 2: sourceDataType = TR::Int32; break;1051case 4: sourceDataType = TR::Int32; break;1052case 8: sourceDataType = TR::Int64; break;1053}10541055// Default case is impossible due to previous checks1056switch (targetNumBytes)1057{1058case 1: targetDataType = TR::Int8; break;1059case 2: targetDataType = TR::Int16; break;1060case 4: targetDataType = TR::Int32; break;1061case 8: targetDataType = TR::Int64; break;1062}10631064TR::ILOpCodes op = TR::BadILOp;1065TR::ILOpCodes byteswapOp = TR::BadILOp;10661067// Default case is impossible due to previous checks1068switch (targetNumBytes)1069{1070case 1: op = TR::bstorei; break;1071case 2: op = TR::sstorei; byteswapOp = TR::sbyteswap; break;1072case 4: op = TR::istorei; byteswapOp = TR::ibyteswap; break;1073case 8: op = TR::lstorei; byteswapOp = TR::lbyteswap; break;1074}10751076// Create the proper conversion if the source and target sizes are different1077if (sourceDataType != targetDataType)1078{1079valueNode = TR::Node::create(TR::ILOpCode::getProperConversion(sourceDataType, targetDataType, false), 1, valueNode);1080}10811082if (requiresByteSwap)1083{1084valueNode = TR::Node::create(byteswapOp, 1, valueNode);1085}10861087return TR::Node::createWithSymRef(op, 2, 2, createByteArrayElementAddress(callTreeTop, callNode, byteArrayNode, offsetNode), valueNode, comp()->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0));1088}10891090return NULL;1091}10921093TR::Node* TR_DataAccessAccelerator::constructAddressNode(TR::Node* callNode, TR::Node* arrayNode, TR::Node* offsetNode)1094{1095TR::Node * arrayAddressNode;1096TR::Node * headerConstNode;1097TR::Node * totalOffsetNode;10981099TR::Node * pdBufAddressNode = NULL;1100TR::Node * pdBufPositionNode = NULL;110111021103if (callNode->getSymbol()->getResolvedMethodSymbol())1104{1105if (callNode->getSymbol()->getResolvedMethodSymbol()->getRecognizedMethod())1106{1107bool isByteBuffer = false;11081109if ((callNode->getSymbol()->getResolvedMethodSymbol()->getRecognizedMethod() == TR::com_ibm_dataaccess_DecimalData_convertIntegerToPackedDecimal_ByteBuffer_)1110|| (callNode->getSymbol()->getResolvedMethodSymbol()->getRecognizedMethod() == TR::com_ibm_dataaccess_DecimalData_convertLongToPackedDecimal_ByteBuffer_))1111{1112isByteBuffer = true;1113pdBufAddressNode = callNode->getChild(5);1114pdBufPositionNode = callNode->getChild(7);1115}1116else if ((callNode->getSymbol()->getResolvedMethodSymbol()->getRecognizedMethod() == TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_ByteBuffer_)1117|| (callNode->getSymbol()->getResolvedMethodSymbol()->getRecognizedMethod() == TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToLong_ByteBuffer_))1118{1119isByteBuffer = true;1120pdBufAddressNode = callNode->getChild(4);1121pdBufPositionNode = callNode->getChild(6);1122}11231124if (isByteBuffer)1125{1126TR::Node* offset = TR::Node::create(TR::i2l, 1, TR::Node::create(TR::iadd, 2, pdBufPositionNode, offsetNode));1127TR::Node* address = TR::Node::create(TR::ladd, 2, pdBufAddressNode, offset);1128return TR::Node::create(TR::l2a, 1, address);1129}1130}1131}11321133if (comp()->target().is64Bit())1134{1135headerConstNode = TR::Node::create(callNode, TR::lconst, 0, 0);1136headerConstNode->setLongInt(TR::Compiler->om.contiguousArrayHeaderSizeInBytes());1137totalOffsetNode = TR::Node::create(TR::ladd, 2, headerConstNode, TR::Node::create(TR::i2l, 1, offsetNode));1138arrayAddressNode = TR::Node::create(TR::aladd, 2, arrayNode, totalOffsetNode);1139}1140else1141{1142headerConstNode = TR::Node::create(callNode, TR::iconst, 0,1143TR::Compiler->om.contiguousArrayHeaderSizeInBytes());11441145totalOffsetNode = TR::Node::create(TR::iadd, 2, headerConstNode, offsetNode);1146arrayAddressNode = TR::Node::create(TR::aiadd, 2, arrayNode, totalOffsetNode);1147}1148arrayAddressNode->setIsInternalPointer(true);1149return arrayAddressNode;1150}11511152bool TR_DataAccessAccelerator::genComparisionIntrinsic(TR::TreeTop* treeTop, TR::Node* callNode, TR::ILOpCodes ops)1153{1154if (!isChildConst(callNode, 2) || !isChildConst(callNode, 5))1155{1156return printInliningStatus(false, callNode, "Child (2|5) is not constant");1157}11581159TR_ASSERT(callNode->getNumChildren() == 6, "Expecting BCD cmp call with 6 children.");11601161TR::Node * op1Node = callNode->getChild(0);1162TR::Node * offset1Node = callNode->getChild(1);1163TR::Node * prec1Node = callNode->getChild(2);1164TR::Node * op2Node = callNode->getChild(3);1165TR::Node * offset2Node = callNode->getChild(4);1166TR::Node * prec2Node = callNode->getChild(5);11671168int precision1 = prec1Node->getInt();1169int precision2 = prec2Node->getInt();11701171if (precision1 > 31 || precision2 > 31 || precision1 < 1 || precision2 < 1)1172{1173return printInliningStatus(false, callNode, "Invalid precisions. Valid precisions are in range [1, 31]");1174}11751176if (!performTransformation(comp(), "O^O TR_DataAccessAccelerator: genComparison call: %p, Comparison type: %d inlined.\n", callNode, ops))1177{1178return false;1179}11801181//create loading1182// loading The first operand1183TR::Node * arrayAddressNode1 = constructAddressNode(callNode, op1Node, offset1Node);1184TR::SymbolReference * symRef1 = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, arrayAddressNode1, 8, fe());1185symRef1->setOffset(0);11861187TR::Node * pdload1 = TR::Node::create(TR::pdloadi, 1, arrayAddressNode1);1188pdload1->setSymbolReference(symRef1);1189pdload1->setDecimalPrecision(precision1);11901191//load the second operand1192TR::Node * arrayAddressNode2 = constructAddressNode(callNode, op2Node, offset2Node);1193TR::SymbolReference * symRef2 = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, arrayAddressNode2, 8, fe());1194symRef2->setOffset(0);11951196TR::Node * pdload2 = TR::Node::create(TR::pdloadi, 1, arrayAddressNode2);1197pdload2->setSymbolReference(symRef2);1198pdload2->setDecimalPrecision(precision2);11991200//create the BCDCHK:1201TR::Node * pdOpNode = callNode;1202TR::SymbolReference* bcdChkSymRef = callNode->getSymbolReference();1203TR::Node * bcdchkNode = TR::Node::createWithSymRef(TR::BCDCHK, 7, 7,1204pdOpNode,1205callNode->getChild(0), callNode->getChild(1),1206callNode->getChild(2), callNode->getChild(3),1207callNode->getChild(4), callNode->getChild(5),1208bcdChkSymRef);12091210pdOpNode->setNumChildren(2);1211pdOpNode->setAndIncChild(0, pdload1);1212pdOpNode->setAndIncChild(1, pdload2);1213pdOpNode->setSymbolReference(NULL);12141215// Set inlined site index to make sure AOT TR_RelocationRecordConstantPool::computeNewConstantPool() API can1216// correctly compute a new CP to relocate DAA OOL calls.1217bcdchkNode->setInlinedSiteIndex(callNode->getInlinedSiteIndex());12181219//instead of creating comparison operation, re use the callNode:1220TR::Node::recreate(pdOpNode, ops);12211222treeTop->setNode(bcdchkNode);12231224pdOpNode->decReferenceCount();1225op1Node->decReferenceCount();1226op2Node->decReferenceCount();1227offset1Node->decReferenceCount();1228offset2Node->decReferenceCount();1229prec1Node->decReferenceCount();1230prec2Node->decReferenceCount();12311232return printInliningStatus(true, callNode);1233}12341235bool TR_DataAccessAccelerator::generateI2PD(TR::TreeTop* treeTop, TR::Node* callNode, bool isI2PD, bool isByteBuffer)1236{1237int precision = callNode->getChild(3)->getInt();1238char* failMsg = NULL;12391240if (!isChildConst(callNode, 3) || !isChildConst(callNode, 4))1241failMsg = "Child (3|4) is not constant";1242else if (precision < 1 || precision > 31)1243failMsg = "Invalid precision. Valid precision is in range [1, 31]";12441245if (failMsg)1246{1247TR::DebugCounter::incStaticDebugCounter(comp(),1248TR::DebugCounter::debugCounterName(comp(),1249"DAA/rejected/%s",1250isI2PD ? "i2pd" : "l2pd"));1251return printInliningStatus(false, callNode, failMsg);1252}12531254TR::Node* intNode = NULL;1255TR::Node* pdNode = NULL;1256TR::Node* offsetNode = NULL;1257TR::Node* precNode = NULL;1258TR::Node* errorCheckingNode = NULL;12591260// Backing storage info for ByteBuffer1261TR::Node * pdBufAddressNodeCopy = NULL;1262TR::Node * pdBufCapacityNode = NULL;1263TR::Node * pdBufPositionNode = NULL;12641265TR::TreeTop *slowPathTreeTop = NULL;1266TR::TreeTop *fastPathTreeTop = NULL;1267TR::Node *slowPathNode = NULL;12681269bool needsBCDCHK = (isI2PD && (precision < 10)) || (!isI2PD && (precision < 19));12701271//still need to check bounds of pdNode1272if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: %s call: %p inlined.\n", (isI2PD)?"generateI2PD":"generateL2PD", callNode))1273{1274TR::DebugCounter::incStaticDebugCounter(comp(),1275TR::DebugCounter::debugCounterName(comp(),1276"DAA/inlined/%s",1277isI2PD ? "i2pd" : "l2pd"));12781279if (isByteBuffer)1280{1281/* We will be creating a precision diamond for the fast / slow path and eliminating the original call.1282* Because we are about to split the CFG we would have to store the original parameters of the call into1283* temp slots as we will be duplicating the call node in the precision diamond but we don't need to since1284* createConditionalBlocksBeforeTree takes care of it. createConditionalBlocksBeforeTree calls block::split1285* with true for the option fixupCommoning and so it will break the commoning and add any necessary temps for you.1286*/12871288pdBufAddressNodeCopy = TR::Node::copy(callNode->getChild(5));1289pdBufAddressNodeCopy->setReferenceCount(0);1290pdBufCapacityNode = callNode->getChild(6);1291pdBufPositionNode = callNode->getChild(7);1292}12931294intNode = callNode->getChild(0);1295pdNode = callNode->getChild(1);1296offsetNode = callNode->getChild(2);1297precNode = callNode->getChild(3);1298errorCheckingNode = callNode->getChild(4);12991300//create a TR::i2pd node and an pdstore.1301//this will not cause an exception, so it is safe to remove BCDCHK1302TR::Node * i2pdNode = TR::Node::create((isI2PD)?TR::i2pd:TR::l2pd, 1, intNode);1303i2pdNode->setDecimalPrecision(precision);13041305/**1306* Create separate address nodes for BCDCHK and pdstorei because BCDCHK can GC-and-Return like a call.1307*1308* Having separate address nodes also allows AddrNode2 and AddrNode3 commoning, which then makes1309* copy propagations possible.1310*1311* AddrNode1 could still be commoned with address nodes before the BCDCHK. Hence, the need for1312* UncommonBCDCHKAddressNode codegen pass. AddrNode1 is special in that it has to be rematerialized and used1313* at the end of the BCDCHK OOL path's GC point. No commoning of this node should happen.1314*1315* Example:1316*1317* BCDCHK1318* pdshlOverflow <prec=9 (len=5) adj=0 round=0>1319* ....1320* aladd (internalPtr sharedMemory ) AddrNode11321* ==>newarray1322* lconst 8 (highWordZero X!=0 X>=0 )1323* ....1324* pdstorei <array-shadow>[#490 Shadow] <prec=9 (len=5)>1325* aladd (internalPtr sharedMemory ) AddrNode21326* ==>newarray1327* ==>lconst 81328* ==>pdshlOverflow <prec=9 (len=5)1329* zdsleStorei <array-shadow>[#492 Shadow] <prec=9 (len=9)>1330* ....1331* zd2zdsle <prec=9 (len=9)>1332* pd2zd <prec=9 (len=9)>1333* pdloadi <prec=9 (len=5) adj=0 round=0>1334* aladd (internalPtr sharedMemory ) AddrNode31335* ==>newarray1336* ==>lconst 81337*1338* In the example above, AddrNode 1 to 3 have the same children.1339*1340* AddrNode1, the second child of the BCDCHK node, is meant to be rematerialized for OOL post-call data copy back.1341* See BCDCHKEvaluatorImpl() for BCDCHK tree structure and intended use of its children.1342*1343* 'outOfLineCopyBackAddr' and 'storeAddressNode' correspond to AddrNode1 and AddrNode2, respectively. They1344* are created as separate nodes so that LocalCSE is able to common up AddrNode2 and AddrNode3. If1345* AddrNode1 and AddrNode2 were the same node, the LocalCSE would not consider AddrNode1 an alternative replacement1346* of AddrNode3 because the BCDCHK's symbol canGCAndReturn().1347*1348* With AddrNode2 and AddrNode3 commoned up, the LocalCSE is able to copy propagate pdshlOverflow to the pd2zd1349* tree and replace its pdloadi.1350*/1351TR::Node * outOfLineCopyBackAddr = constructAddressNode(callNode, pdNode, offsetNode);1352TR::Node * storeAddressNode = constructAddressNode(callNode, pdNode, offsetNode);13531354TR::TreeTop * nextTT = treeTop->getNextTreeTop();1355TR::TreeTop * prevTT = treeTop->getPrevTreeTop();13561357TR::ILOpCodes op = comp()->il.opCodeForIndirectStore(TR::PackedDecimal);13581359TR::Node * pdstore = NULL;1360TR::Node * bcdchkNode = NULL;1361if (needsBCDCHK)1362{1363i2pdNode->setDecimalPrecision((isI2PD)? 10:19);1364TR::Node * pdshlNode = TR::Node::create(TR::pdshlOverflow, 2, i2pdNode, TR::Node::create(callNode, TR::iconst, 0));1365pdshlNode->setDecimalPrecision(precision);13661367/* Attaching all the original callNode's children as the children to BCDCHK.1368* We don't want to attach the callNode as a child to BCDCHK since it would be an aberration to the1369* definition of a BCDCHK node. BCDCHK node is already a special type of node, and all optimizations expect the1370* call (i2pd) to be inside the first child of BCDCHK. Attaching another call could cause many things to1371* break as all optimizations such as Value Propagation don't expect it to be there. Attaching the callNode's children1372* to BCDCHK would be safe. We would whip up the call with these attached children during codegen1373* for the fallback of the fastpath.1374*/1375TR::SymbolReference* bcdChkSymRef = callNode->getSymbolReference();13761377if (isByteBuffer)1378{1379bcdchkNode = TR::Node::createWithSymRef(TR::BCDCHK, 10, 10,1380pdshlNode, outOfLineCopyBackAddr,1381callNode->getChild(0), callNode->getChild(1),1382callNode->getChild(2), callNode->getChild(3),1383callNode->getChild(4), callNode->getChild(5),1384callNode->getChild(6), callNode->getChild(7),1385bcdChkSymRef);1386}1387else1388{1389bcdchkNode = TR::Node::createWithSymRef(TR::BCDCHK, 7, 7,1390pdshlNode, outOfLineCopyBackAddr,1391callNode->getChild(0), callNode->getChild(1),1392callNode->getChild(2), callNode->getChild(3),1393callNode->getChild(4),1394bcdChkSymRef);1395}13961397// Set inlined site index to make sure AOT TR_RelocationRecordConstantPool::computeNewConstantPool() API can1398// correctly compute a new CP to relocate DAA OOL calls.1399bcdchkNode->setInlinedSiteIndex(callNode->getInlinedSiteIndex());14001401pdstore = TR::Node::create(op, 2, storeAddressNode, pdshlNode);1402}1403else1404{1405pdstore = TR::Node::create(op, 2, storeAddressNode, i2pdNode);1406}14071408TR::TreeTop* pdstoreTT = TR::TreeTop::create(comp(), pdstore);14091410if (isByteBuffer)1411{1412TR::CFG *cfg = comp()->getFlowGraph();1413TR::Block *callBlock = treeTop->getEnclosingBlock();14141415// Generate the slow path1416slowPathTreeTop = TR::TreeTop::create(comp(),treeTop->getNode()->duplicateTree());1417slowPathNode = slowPathTreeTop->getNode()->getFirstChild();14181419// Generate the tree to check if the ByteBuffer has a valid address or not1420TR::Node* nullNode = TR::Node::create(TR::lconst, 0, 0);1421TR::Node *isValidAddrNode = TR::Node::createif(TR::iflcmpeq, pdBufAddressNodeCopy, nullNode, treeTop);1422TR::TreeTop *isValidAddrTreeTop = TR::TreeTop::create(comp(), isValidAddrNode, NULL, NULL);14231424fastPathTreeTop = pdstoreTT;14251426/* Create the diamond in CFG1427* if (ByteBuffer.address != NULL)1428* fastpath (CVD instruction executed by HW)1429* else1430* slowpath (call to Java method: convertIntegerToPackedDecimal_)1431* */1432callBlock->createConditionalBlocksBeforeTree(treeTop, isValidAddrTreeTop, slowPathTreeTop, fastPathTreeTop, cfg, false, true);1433}14341435TR::SymbolReference * symRef = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, outOfLineCopyBackAddr, 8, fe());1436pdstore->setSymbolReference(symRef);1437pdstore->setDecimalPrecision(precision);14381439TR::TreeTop* bcdchktt = NULL;1440if (needsBCDCHK)1441bcdchktt = TR::TreeTop::create(comp(), bcdchkNode);14421443if (isByteBuffer)1444{1445// the original call will be deleted by createConditionalBlocksBeforeTree, but if the refcount was > 1, we need to insert stores.1446if (needsBCDCHK)1447{1448pdstoreTT->insertBefore(bcdchktt);1449}1450}1451else1452{1453if (needsBCDCHK)1454{1455prevTT->join(bcdchktt);1456bcdchktt->join(pdstoreTT);1457}1458else1459{1460prevTT->join(pdstoreTT);1461}1462pdstoreTT->join(nextTT);14631464// we'll be removing the callNode, update its refcount.1465callNode->recursivelyDecReferenceCount();1466}14671468return true;1469}1470return false;1471}14721473void TR_DataAccessAccelerator::createPrecisionDiamond(TR::Compilation* comp,1474TR::TreeTop* treeTop,1475TR::TreeTop* fastTree,1476TR::TreeTop* slowTree,1477bool isPD2I,1478uint32_t numPrecisionNodes,1479...)1480{1481// Create precision guards1482const uint8_t precisionMin = 1;1483const uint8_t precisionMax = isPD2I ? 15 : 31;14841485uint32_t numGuards = numPrecisionNodes * 2;14861487TR::StackMemoryRegion stackMemoryRegion(*(comp->trMemory()));14881489BlockContainer testBlocks(stackMemoryRegion);1490TreeTopContainer testTTs(stackMemoryRegion);14911492va_list precisionNodeList;1493va_start(precisionNodeList, numPrecisionNodes);1494for(uint32_t i = 0; i < numPrecisionNodes; ++i)1495{1496TR::Node* precisionNode = va_arg(precisionNodeList, TR::Node*);1497TR_ASSERT(precisionNode, "Precision node should not be null");1498TR::Node* node1 = TR::Node::createif(TR::ificmpgt, precisionNode->duplicateTree(), TR::Node::iconst(precisionMax));1499TR::Node* node2 = TR::Node::createif(TR::ificmplt, precisionNode->duplicateTree(), TR::Node::iconst(precisionMin));15001501testTTs.push_back(TR::TreeTop::create(comp, node1));1502testTTs.push_back(TR::TreeTop::create(comp, node2));1503}1504va_end(precisionNodeList);15051506// Split blocks, 1 for each precision test block1507TR::CFG* cfg = comp->getFlowGraph();15081509// We will be updating the CFG so invalidate the structure1510cfg->setStructure(0);15111512testBlocks.push_back(treeTop->getEnclosingBlock(false));1513for(uint32_t i = 1; i < numGuards; ++i)1514{1515testBlocks.push_back(testBlocks[i-1]->split(treeTop, cfg, true));1516}15171518TR::Block* firstTestBlock = testBlocks.front();1519TR::Block* lastTestBlock = testBlocks.back();15201521// This block will contain everything AFTER tree1522TR::Block * otherBlock = lastTestBlock->split(treeTop, cfg, true);15231524// Append tree tops1525for(int i = 0; i < numGuards; ++i)1526{1527testBlocks[i]->append(testTTs[i]);1528}15291530TR::Node* node = treeTop->getNode();15311532// Remove the original tree from the other block1533node->removeAllChildren();15341535TR::TreeTop* prevTT = treeTop->getPrevTreeTop();1536TR::TreeTop* nextTT = treeTop->getNextTreeTop();15371538prevTT->join(nextTT);15391540TR::Block * fastBlock = TR::Block::createEmptyBlock(node, comp, firstTestBlock->getFrequency());1541TR::Block * slowBlock = TR::Block::createEmptyBlock(node, comp, UNKNOWN_COLD_BLOCK_COUNT);15421543TR::TreeTop* slowEntry = slowBlock->getEntry();1544TR::TreeTop* fastEntry = fastBlock->getEntry();1545TR::TreeTop* slowExit = slowBlock->getExit();1546TR::TreeTop* fastExit = fastBlock->getExit();15471548// Fast block is a fall-through of the second if test1549lastTestBlock->getExit()->join(fastEntry);15501551cfg->addNode(fastBlock);1552cfg->addNode(slowBlock);15531554TR::Block * bestBlock = otherBlock;15551556// Find the best place for the slow block1557while (bestBlock && bestBlock->canFallThroughToNextBlock())1558{1559bestBlock = bestBlock->getNextBlock();1560}15611562if (bestBlock)1563{1564TR::TreeTop* bestExit = bestBlock->getExit();1565TR::TreeTop* bestNext = bestBlock->getExit()->getNextTreeTop();15661567bestExit->join(slowEntry);1568slowExit->join(bestNext);1569}1570else1571{1572cfg->findLastTreeTop()->join(slowBlock->getEntry());1573}15741575fastBlock->append(fastTree);1576slowBlock->append(slowTree);15771578// Jump back to other block after slow path1579slowBlock->append(TR::TreeTop::create(comp, TR::Node::create(node, TR::Goto, 0, otherBlock->getEntry())));1580for(int i = 0; i < numGuards; ++i)1581{1582testTTs[i]->getNode()->setBranchDestination(slowEntry);1583}15841585// Other block is a fall-through of the fast block1586fastExit->join(otherBlock->getEntry());15871588cfg->addEdge(TR::CFGEdge::createEdge(lastTestBlock, fastBlock, trMemory()));1589cfg->addEdge(TR::CFGEdge::createEdge(fastBlock, otherBlock, trMemory()));1590cfg->addEdge(TR::CFGEdge::createEdge(slowBlock, otherBlock, trMemory()));1591for(int i = 0; i < numGuards; ++i)1592{1593cfg->addEdge(TR::CFGEdge::createEdge(testBlocks[i], slowBlock, trMemory()));1594}15951596// We introduced fastBlock in between these two, so it is not needed anymore1597cfg->removeEdge(lastTestBlock, otherBlock);15981599fastBlock->setIsExtensionOfPreviousBlock(false);1600slowBlock->setIsExtensionOfPreviousBlock(false);1601otherBlock->setIsExtensionOfPreviousBlock(false);16021603cfg->copyExceptionSuccessors(firstTestBlock, fastBlock);1604cfg->copyExceptionSuccessors(firstTestBlock, slowBlock);1605}16061607bool1608TR_DataAccessAccelerator::generatePD2IConstantParameter(TR::TreeTop* treeTop, TR::Node* callNode, bool isPD2i, bool isByteBuffer)1609{1610TR::Node* pdInputNode = callNode->getChild(0);1611TR::Node* offsetNode = callNode->getChild(1);1612TR::Node* precisionNode = callNode->getChild(2);1613TR::Node* overflowNode = callNode->getChild(3);16141615// Backing storage info for ByteBuffer1616TR::Node * pdBufAddressNodeCopy = NULL;1617TR::Node * pdBufCapacityNode = NULL;1618TR::Node * pdBufPositionNode = NULL;16191620TR::TreeTop *slowPathTreeTop = NULL;1621TR::TreeTop *fastPathTreeTop = NULL;1622TR::Node *slowPathNode = NULL;1623TR::Node *pd2iNode = NULL;1624TR::TreeTop *copiedCallNodeTreeTop = NULL;1625TR::Node *copiedCallNode = NULL;1626TR::SymbolReference *newSymbolReference = 0;1627TR::TreeTop *bcdchkTreeTop = NULL;1628int precision = precisionNode->getInt();1629int overflow = overflowNode->getInt();1630char* failMsg = NULL;16311632if (precision < 1)1633failMsg = "Invalid precision. Precision can not be less than 1";1634else if (isPD2i && precision > 10)1635failMsg = "Invalid precision. Precision can not be greater than 10";1636else if (!isPD2i && precision > 31)1637failMsg = "Invalid precision. Precision can not be greater than 31";16381639if (failMsg)1640{1641TR::DebugCounter::incStaticDebugCounter(comp(),1642TR::DebugCounter::debugCounterName(comp(),1643"DAA/rejected/%s",1644isPD2i ? "pd2i" : "pd2l"));1645return printInliningStatus(false, callNode, failMsg);1646}16471648if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: %s call: %p inlined.\n", (isPD2i)?"generatePD2I":"generatePD2L", callNode))1649{1650TR::DebugCounter::incStaticDebugCounter(comp(),1651TR::DebugCounter::debugCounterName(comp(),1652"DAA/inlined/%s",1653isPD2i ? "pd2i" : "pd2l"));1654if (isByteBuffer)1655{1656/* We will be creating a precision diamond for the fast / slow path and eliminating the original call.1657* Because we are about to split the CFG we would have to store the original parameters of the call into1658* temp slots as we will be duplicating the call node in the precision diamond but we don't need to since1659* createConditionalBlocksBeforeTree takes care of it. createConditionalBlocksBeforeTree calls block::split1660* with true for the option fixupCommoning and so it will break the commoning and add any necessary temps for you.1661*/16621663pdBufAddressNodeCopy = TR::Node::copy(callNode->getChild(4));1664pdBufAddressNodeCopy->setReferenceCount(0);1665pdBufCapacityNode = callNode->getChild(5);1666pdBufPositionNode = callNode->getChild(6);1667}16681669//create the packed decimals16701671TR::Node * arrayAddressNode = constructAddressNode(callNode, pdInputNode, offsetNode);16721673int32_t size = TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, precision) - 1;1674TR::SymbolReference * symRef = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, arrayAddressNode, 8, fe());1675TR::Node * pdload = TR::Node::create(TR::pdloadi, 1, arrayAddressNode);1676pdload->setSymbolReference(symRef);1677pdload->setDecimalPrecision(precision);16781679TR::TreeTop * prevTT = treeTop->getPrevTreeTop();1680TR::TreeTop * nextTT = treeTop->getNextTreeTop();1681TR::Node *bcdchk = NULL;1682TR::SymbolReference* bcdChkSymRef = callNode->getSymbolReference();16831684/* Attaching all the original callNode's children as the children to BCDCHK.1685* We don't want to attach the callNode as a child to BCDCHK since it would be an aberration to the1686* definition of a BCDCHK node. BCDCHK node is already a special type of node, and all optimizations expect the1687* call (pd2i) to be in its first child. Attaching another call could cause many things to break as all1688* optimizations such as Value Propagation don't expect it to be there. Attaching the callNode's children1689* to BCDCHK would be safe. We would whip up the call with these attached children during codegen1690* for the fallback of the fastpath.1691*/1692if (isByteBuffer)1693{1694// Tree with pd2i() under BCDCHK node1695copiedCallNodeTreeTop = TR::TreeTop::create(comp(),treeTop->getNode()->duplicateTree());1696copiedCallNode = copiedCallNodeTreeTop->getNode()->getFirstChild();1697bcdchk = TR::Node::createWithSymRef(TR::BCDCHK, 8, 8,1698copiedCallNode,1699callNode->getChild(0), callNode->getChild(1),1700callNode->getChild(2), callNode->getChild(3),1701callNode->getChild(4), callNode->getChild(5),1702callNode->getChild(6),1703bcdChkSymRef);17041705/*1706* BCDCHK would look something like this for ByteBuffer:1707*1708* n2975n BCDCHK [#959] ()1709n2958n pd2i ()1710n2949n pdloadi <array-shadow>[#497 Shadow] [flags 0x80000618 0x0 ] <prec=10 (len=6) adj=0> sign=hasState1711n2948n l2a1712n2947n ladd1713n2941n lload <temp slot 29>[#949 Auto] [flags 0x4 0x0 ] // address of ByteBuffer1714n2946n i2l1715n2945n iadd1716n2943n ==>iload // position1717n2938n ==>iload // offset1718n2937n ==>aload // ByteBuffer1719n2938n ==>iload // offset1720n2939n iload <temp slot 27>[#947 Auto] [flags 0x3 0x0 ] // precision1721n2940n iload <temp slot 28>[#948 Auto] [flags 0x3 0x0 ] // checkOverflow1722n2941n ==>lload // address of ByteBuffer1723n2942n ==>iload // capacity of ByteBuffer1724n2943n ==>iload // position of ByteBuffer1725*/1726}1727else1728{1729bcdchk = TR::Node::createWithSymRef(TR::BCDCHK, 5, 5,1730callNode,1731callNode->getChild(0), callNode->getChild(1),1732callNode->getChild(2), callNode->getChild(3),1733bcdChkSymRef);1734/*1735* BCDCHK would look something like this for byte[]:1736*1737* n2937n BCDCHK [#990] ()1738n990n pd2i ()1739n2929n pdloadi <array-shadow>[#486 Shadow] [flags 0xffffffff80000612 0x0 ] <prec=10 (len=6) adj=0> sign=hasState vn=- sti=- udi=- nc=11740n2928n aladd (internalPtr sharedMemory )1741n986n ==>aload1742n2927n aladd1743n2925n lconst 81744n2926n i2l1745n1001n ==>iconst 01746n986n ==>aload // byte[]1747n1001n ==>iconst 0 // offset1748n1002n iconst 10 (X!=0 X>=0 ) // precision1749n1003n iconst 0 (X==0 X>=0 X<=0 ) // checkOverflow1750*/1751}17521753// Set inlined site index to make sure AOT TR_RelocationRecordConstantPool::computeNewConstantPool() API can1754// correctly compute a new CP to relocate DAA OOL calls.1755bcdchk->setInlinedSiteIndex(callNode->getInlinedSiteIndex());17561757TR::DataType dataType = callNode->getDataType();1758if (isByteBuffer)1759{1760TR::CFG *cfg = comp()->getFlowGraph();1761TR::Block *callBlock = treeTop->getEnclosingBlock();17621763// Generate the slow path1764slowPathTreeTop = TR::TreeTop::create(comp(),treeTop->getNode()->duplicateTree());1765slowPathNode = slowPathTreeTop->getNode()->getFirstChild();17661767// Generate the tree to check if the ByteBuffer has a valid address or not1768TR::Node* nullNode = TR::Node::create(TR::lconst, 0, 0);1769TR::Node *isValidAddrNode = TR::Node::createif(TR::iflcmpeq, pdBufAddressNodeCopy, nullNode, treeTop);1770TR::TreeTop *isValidAddrTreeTop = TR::TreeTop::create(comp(), isValidAddrNode, NULL, NULL);17711772bcdchkTreeTop = TR::TreeTop::create(comp(), bcdchk);1773fastPathTreeTop = bcdchkTreeTop;17741775if (callNode->getReferenceCount() > 1)1776{1777newSymbolReference = comp()->getSymRefTab()->createTemporary(comp()->getMethodSymbol(), dataType);1778TR::Node::recreate(callNode, comp()->il.opCodeForDirectLoad(dataType));1779callNode->setSymbolReference(newSymbolReference);1780callNode->removeAllChildren();1781}17821783callNode = copiedCallNode;17841785/* Create the diamond in CFG1786* if (ByteBuffer.address != NULL)1787* fastpath (CVB instruction executed by HW)1788* else1789* slowpath (call to Java method: convertPackedDecimalToInteger_)1790* */1791callBlock->createConditionalBlocksBeforeTree(treeTop, isValidAddrTreeTop, slowPathTreeTop, fastPathTreeTop, cfg, false, true);1792}17931794// we'll be removing the callNode, update its refcount before replacing its fields.1795// The callNode may have more than 1 reference (treetop and i/lstore), so we need to scan through its list of children.1796for (int32_t childCount = callNode->getNumChildren()-1; childCount >= 0; childCount--)1797callNode->getChild(childCount)->recursivelyDecReferenceCount();17981799// Replacing TT with BCDCHK, so losing one reference.1800callNode->decReferenceCount();18011802//create pd2i:1803pd2iNode = callNode;1804pd2iNode->setNumChildren(1);1805pd2iNode->setAndIncChild(0, pdload);1806if (!isByteBuffer)1807{1808pd2iNode->setSymbolReference(NULL);1809}18101811if (isPD2i)1812{1813if (!overflow)1814TR::Node::recreate(pd2iNode, TR::pd2i);1815else1816TR::Node::recreate(pd2iNode, TR::pd2iOverflow);1817}1818else1819{1820if (!overflow)1821TR::Node::recreate(pd2iNode, TR::pd2l);1822else1823TR::Node::recreate(pd2iNode, TR::pd2lOverflow);1824}182518261827if (isByteBuffer)1828{1829// the original call will be deleted by createConditionalBlocksBeforeTree, but if the refcount was > 1, we need to insert stores.18301831if (newSymbolReference)1832{1833/* Storing the result to a temp slot so that it can be loaded from there later1834* We would need to store the result to the same temp slot for both fast and slow path so we that1835* we get the same result irrespective of the path taken.1836* For slowpath: storing the result of icall to temp slot1837* For fastpath: storing the result of pd2i() to temp slot1838*/18391840TR::Node *slowStoreNode = TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(dataType), 1, 1, slowPathTreeTop->getNode()->getFirstChild(), newSymbolReference);1841TR::TreeTop *slowStoreTree = TR::TreeTop::create(comp(), slowStoreNode);18421843slowPathTreeTop->insertAfter(slowStoreTree);18441845treeTop->setNode(bcdchk);1846TR::Node *fastStoreNode = TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(dataType), 1, 1, bcdchkTreeTop->getNode()->getFirstChild(), newSymbolReference);1847TR::TreeTop *fastStoreTree = TR::TreeTop::create(comp(), fastStoreNode);18481849bcdchkTreeTop->insertAfter(fastStoreTree);1850}1851else1852{1853treeTop->setNode(bcdchk);1854}1855}1856else1857{1858treeTop->setNode(bcdchk);1859prevTT->join(treeTop);1860treeTop->join(nextTT);1861}18621863return true;1864}1865return false;1866}18671868TR::Node*1869TR_DataAccessAccelerator::restructureVariablePrecisionCallNode(TR::TreeTop* treeTop, TR::Node* callNode)1870{1871uint32_t numCallParam = callNode->getNumChildren();1872TR::SymbolReferenceTable* symRefTab = comp()->getSymRefTab();1873TR::ResolvedMethodSymbol* methodSym = comp()->getMethodSymbol();18741875for(uint32_t i = 0; i < numCallParam; ++i)1876{1877TR::Node* child = callNode->getChild(i);1878TR::SymbolReference* newTemp = symRefTab->createTemporary(methodSym, child->getDataType());1879treeTop->insertBefore(TR::TreeTop::create(comp(), TR::Node::createStore(newTemp, child)));1880child->decReferenceCount();1881callNode->setAndIncChild(i, TR::Node::createLoad(child, newTemp));1882}18831884return callNode;1885}18861887bool1888TR_DataAccessAccelerator::generatePD2IVariableParameter(TR::TreeTop* treeTop, TR::Node* callNode, bool isPD2i, bool isByteBuffer)1889{1890TR::Node* precisionNode = callNode->getChild(2);18911892if (comp()->getOption(TR_DisableVariablePrecisionDAA) ||1893!performTransformation(comp(), "O^O TR_DataAccessAccelerator: [DAA] Generating variable %s for node %p \n", isPD2i ? "PD2I" : "PD2L", callNode))1894{1895TR::DebugCounter::incStaticDebugCounter(comp(),1896TR::DebugCounter::debugCounterName(comp(),1897"DAA/rejected/%s",1898isPD2i ? "var-pd2i" : "var-pd2l"));1899return false;1900}19011902TR::DebugCounter::incStaticDebugCounter(comp(),1903TR::DebugCounter::debugCounterName(comp(),1904"DAA/inlined/%s",1905isPD2i ? "var-pd2i" : "var-pd2l"));1906// We will be creating a precision diamond for the fast / slow path and eliminating the original call.1907// Because we are about to split the CFG we must store the original parameters of the call into temp slots1908// as we will be duplicating the call node in the precision diamond. We cannot duplicate the parameters1909// because tree duplication breaks commoning, and thus we want to avoid a situation where a commoned reference1910// to a newarray node gets duplicated and uncommoned.1911callNode = restructureVariablePrecisionCallNode(treeTop, callNode);19121913// Duplicate two trees for the precision diamond1914TR::Node* slowNode = callNode->duplicateTree();1915TR::Node* fastNode = callNode->duplicateTree();19161917// Create the corresponding treetops1918TR::TreeTop* slowTT = TR::TreeTop::create(comp(), TR::Node::create(TR::treetop, 1, slowNode));1919TR::TreeTop* fastTT = TR::TreeTop::create(comp(), TR::Node::create(TR::treetop, 1, fastNode));19201921// We mark the slow path with a flag to prevent this optimization to recurse on the slow path1922slowNode->setDAAVariableSlowCall(true);19231924// Create the precision test diamond1925createPrecisionDiamond(comp(), treeTop, fastTT, slowTT, isPD2i, 1, precisionNode);19261927// Fix up any references to the original call1928if (callNode->getReferenceCount() > 0)1929{1930// Create a temp variable to store the result of the call1931TR::SymbolReference* temp = comp()->getSymRefTab()->createTemporary(comp()->getMethodSymbol(),1932callNode->getDataType());19331934TR::TreeTop* slowStore = TR::TreeTop::create(comp(), TR::Node::createStore(temp, slowNode));1935TR::TreeTop* fastStore = TR::TreeTop::create(comp(), TR::Node::createStore(temp, fastNode));19361937slowStore->join(slowTT->getNextTreeTop());1938fastStore->join(fastTT->getNextTreeTop());19391940slowTT->join(slowStore);1941fastTT->join(fastStore);19421943// Replacing original call with a load, so remove all children1944callNode->removeAllChildren();19451946// Update the op code to the correct type and make it reference the temp variable1947TR::Node::recreate(callNode, comp()->il.opCodeForDirectLoad(callNode->getDataType()));1948callNode->setSymbolReference(temp);1949}19501951// Create BCDCHK node1952TR::SymbolReference* bcdChkSymRef = fastNode->getSymbolReference();1953TR::Node* pdAddressNode = constructAddressNode(fastNode, fastNode->getChild(0), fastNode->getChild(1));1954TR::Node* bcdchkNode = TR::Node::createWithSymRef(TR::BCDCHK, 2, 2, fastNode, pdAddressNode, bcdChkSymRef);1955fastTT->setNode(bcdchkNode);19561957// TreeTop replaced by BCDCHK, so we lose 1 reference1958fastNode->decReferenceCount();19591960return true;1961}19621963bool TR_DataAccessAccelerator::generatePD2I(TR::TreeTop* treeTop, TR::Node* callNode, bool isPD2i, bool isByteBuffer)1964{1965// Check if the current call node is the slow part of a previous variable precision DAA optimization1966if (callNode->isDAAVariableSlowCall())1967{1968return false;1969}19701971TR_ASSERT(!IS_VARIABLE_PD2I(callNode), "Variable PD2I should not be handled here.");19721973return generatePD2IConstantParameter(treeTop, callNode, isPD2i, isByteBuffer);1974}19751976bool TR_DataAccessAccelerator::genArithmeticIntrinsic(TR::TreeTop* treeTop, TR::Node* callNode, TR::ILOpCodes opCode)1977{1978if (!isChildConst(callNode, 2) || !isChildConst(callNode, 5) ||1979!isChildConst(callNode, 8) || !isChildConst(callNode, 9))1980{1981return printInliningStatus(false, callNode, "Child (2|5|8|9) is not constant");1982}19831984TR_ASSERT(callNode->getNumChildren() == 10, "Expecting BCD arithmetics call with 10 children.");19851986TR::Node* resultNode = callNode->getChild(0);1987TR::Node* resOffsetNode = callNode->getChild(1);1988TR::Node* resPrecNode = callNode->getChild(2);1989TR::Node* input1Node = callNode->getChild(3);1990TR::Node* offset1Node = callNode->getChild(4);1991TR::Node* prec1Node = callNode->getChild(5);1992TR::Node* input2Node = callNode->getChild(6);1993TR::Node* offset2Node = callNode->getChild(7);1994TR::Node* prec2Node = callNode->getChild(8);1995TR::Node* overflowNode = callNode->getChild(9);19961997int precision1 = prec1Node->getInt();1998int precision2 = prec2Node->getInt();1999int precisionResult = resPrecNode->getInt();2000int isCheckOverflow = overflowNode->getInt();20012002if (precision1 < 1 || precision1 > 15 ||2003precision2 < 1 || precision2> 15 ||2004precisionResult < 1 || precisionResult > 15)2005{2006return printInliningStatus(false, callNode, "Invalid precisions. Valid precisions are in range [1, 15]");2007}20082009if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: genArithmetics call: %p inlined, with opcode:%d \n", callNode, opCode))2010{2011//create loading2012// loading The first operand2013TR::Node * arrayAddressNodeA = constructAddressNode(callNode, input1Node, offset1Node);2014TR::SymbolReference * symRef1 = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, arrayAddressNodeA, 8, fe());2015symRef1->setOffset(0);20162017TR::Node * pdloadA = TR::Node::create(TR::pdloadi, 1, arrayAddressNodeA);2018pdloadA->setSymbolReference(symRef1);2019pdloadA->setDecimalPrecision(precision1);20202021//load the second operand2022TR::Node * arrayAddressNodeB = constructAddressNode(callNode, input2Node, offset2Node);2023TR::SymbolReference * symRef2 = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, arrayAddressNodeB, 8, fe());2024symRef2->setOffset(0);20252026TR::Node * pdloadB = TR::Node::create(TR::pdloadi, 1, arrayAddressNodeB);2027pdloadB->setSymbolReference(symRef2);2028pdloadB->setDecimalPrecision(precision2);20292030// create actual arithmetic operation.2031TR::Node * operationNode = TR::Node::create(opCode, 2, pdloadA, pdloadB);20322033switch(opCode)2034{2035case TR::pdadd:2036case TR::pdsub:2037operationNode->setDecimalPrecision(((precision1 > precision2) ? precision1 : precision2) + 1);2038break;2039case TR::pdmul:2040operationNode->setDecimalPrecision(precision1 + precision2); //TODO: check +1. +1 because pdshlOverflow will do the overflow check2041break;2042case TR::pddiv:2043operationNode->setDecimalPrecision(precision1);2044break;2045case TR::pdrem:2046operationNode->setDecimalPrecision((precision1 < precision2) ? precision1 : precision2);2047break;2048default:2049TR_ASSERT(0, "Unsupported DAA arithmetics opCode");2050break;2051}20522053/*2054* Resulting tree2055*2056* BCDCHK2057* pdshlOverflow2058* operationNode2059* pdLoadA2060* pdLoadB2061* iconst 02062* arrayAddressNode2063* call-param-12064* ...2065* call-param-92066*2067* pdstorei2068* pdStoreAddressNode2069* => pdshlOverflow2070*2071* Create separate address nodes for BCDCHK and pdstorei. See generateI2PD() for an explanation to this.2072*/2073TR::Node* outOfLineCopyBackAddr = constructAddressNode(callNode, resultNode, resOffsetNode);2074TR::Node* pdStoreAddressNode = constructAddressNode(callNode, resultNode, resOffsetNode);20752076TR::ILOpCodes op = comp()->il.opCodeForIndirectStore(TR::PackedDecimal);2077TR::SymbolReference * symRefPdstore = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, outOfLineCopyBackAddr, 8, fe());2078TR::Symbol * symStore = TR::Symbol::createShadow(comp()->trHeapMemory(), TR::PackedDecimal, TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, resPrecNode->getInt()));2079symStore->setArrayShadowSymbol();2080symRefPdstore->setSymbol(symStore);20812082TR::Node * pdshlNode = TR::Node::create(TR::pdshlOverflow, 2, operationNode,2083TR::Node::create(callNode, TR::iconst, 0, 0));2084pdshlNode->setDecimalPrecision(resPrecNode->getInt());2085TR::Node * pdstore = TR::Node::create(op, 2, pdStoreAddressNode, pdshlNode);20862087TR::SymbolReference* bcdChkSymRef = callNode->getSymbolReference();2088TR::Node * bcdchk = TR::Node::createWithSymRef(TR::BCDCHK, 12, 12,2089pdshlNode, outOfLineCopyBackAddr,2090callNode->getChild(0), callNode->getChild(1),2091callNode->getChild(2), callNode->getChild(3),2092callNode->getChild(4), callNode->getChild(5),2093callNode->getChild(6), callNode->getChild(7),2094callNode->getChild(8), callNode->getChild(9),2095bcdChkSymRef);20962097// Set inlined site index to make sure AOT TR_RelocationRecordConstantPool::computeNewConstantPool() API can2098// correctly compute a new CP to relocate DAA OOL calls.2099bcdchk->setInlinedSiteIndex(callNode->getInlinedSiteIndex());21002101pdstore->setSymbolReference(symRefPdstore);2102pdstore->setDecimalPrecision(resPrecNode->getInt());21032104TR::TreeTop * ttPdstore = TR::TreeTop::create(comp(), pdstore);21052106// Join treetops:2107TR::TreeTop * nextTT = treeTop->getNextTreeTop();2108TR::TreeTop * prevTT = treeTop->getPrevTreeTop();2109treeTop->setNode(bcdchk);2110prevTT->join(treeTop);2111treeTop->join(ttPdstore);2112ttPdstore->join(nextTT);21132114callNode->recursivelyDecReferenceCount();21152116return printInliningStatus(true, callNode);2117}2118return false;2119}21202121bool TR_DataAccessAccelerator::genShiftRightIntrinsic(TR::TreeTop* treeTop, TR::Node* callNode)2122{2123TR::Node * dstNode = callNode->getChild(0);2124TR::Node * dstOffsetNode = callNode->getChild(1);2125TR::Node * dstPrecNode = callNode->getChild(2);21262127TR::Node * srcNode = callNode->getChild(3);2128TR::Node * srcOffsetNode = callNode->getChild(4);2129TR::Node * srcPrecNode = callNode->getChild(5);21302131TR::Node * shiftNode = callNode->getChild(6);2132TR::Node * roundNode = callNode->getChild(7);2133TR::Node * overflowNode = callNode->getChild(8);21342135int srcPrec = srcPrecNode->getInt();2136int dstPrec = dstPrecNode->getInt();21372138int shiftAmount = shiftNode->getInt();2139int isRound = roundNode->getInt();2140int isCheckOverflow = overflowNode->getInt();2141char* failMsg = NULL;21422143if (!isChildConst(callNode, 2) || !isChildConst(callNode, 5) ||2144!isChildConst(callNode, 7) || !isChildConst(callNode, 8))2145failMsg = "Child (2|5|7|8) is not constant";2146else if (srcPrec < 1)2147failMsg = "Invalid precision. Source precision can not be less than 1";2148else if (dstPrec < 1)2149failMsg = "Invalid precision. Destination precision can not be less than 1";2150else if (srcPrec > 15)2151failMsg = "Invalid precision. Source precision can not be greater than 15";2152else if (dstPrec > 15)2153failMsg = "Invalid precision. Destination precision can not be greater than 15";2154else if (dstPrec < srcPrec - shiftAmount)2155failMsg = "Invalid shift amount. Precision is too low to contain shifted Packed Decimal";21562157if (!performTransformation(comp(), "O^O TR_DataAccessAccelerator: genShiftRight call: %p inlined.\n", callNode) && !failMsg)2158failMsg = "Not allowed";21592160if (failMsg)2161{2162TR::DebugCounter::incStaticDebugCounter(comp(),2163TR::DebugCounter::debugCounterName(comp(),2164"DAA/rejected/shr"));2165return printInliningStatus(false, callNode, failMsg);2166}21672168TR::DebugCounter::incStaticDebugCounter(comp(),2169TR::DebugCounter::debugCounterName(comp(),2170"DAA/inlined/shr"));21712172//gen source pdload2173TR::Node * arrayAddressNode = constructAddressNode(callNode, srcNode, srcOffsetNode);21742175TR::SymbolReference * symRef = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, arrayAddressNode, 8, fe());2176symRef->setOffset(0);21772178//gen pdshr:2179TR::Node * roundValueNode = TR::Node::create(callNode, TR::iconst, 0, isRound ? 5 : 0);2180TR::Node * outOfLineCopyBackAddr = constructAddressNode(callNode, dstNode, dstOffsetNode);2181TR::Node * pdStoreAddressNode = constructAddressNode(callNode, dstNode, dstOffsetNode);21822183TR::Node * pdload = TR::Node::create(TR::pdloadi, 1, arrayAddressNode);2184pdload->setSymbolReference(symRef);2185pdload->setDecimalPrecision(srcPrec);21862187TR::Node * pdshrNode = TR::Node::create(TR::pdshr, 3, pdload, shiftNode, roundValueNode);2188pdshrNode->setDecimalPrecision(dstPrec);21892190TR::SymbolReference* bcdChkSymRef = callNode->getSymbolReference();2191TR::Node* bcdchkNode = TR::Node::createWithSymRef(TR::BCDCHK, 11, 11,2192pdshrNode, outOfLineCopyBackAddr,2193callNode->getChild(0), callNode->getChild(1),2194callNode->getChild(2), callNode->getChild(3),2195callNode->getChild(4), callNode->getChild(5),2196callNode->getChild(6), callNode->getChild(7),2197callNode->getChild(8),2198bcdChkSymRef);21992200// Set inlined site index to make sure AOT TR_RelocationRecordConstantPool::computeNewConstantPool() API can2201// correctly compute a new CP to relocate DAA OOL calls.2202bcdchkNode->setInlinedSiteIndex(callNode->getInlinedSiteIndex());22032204TR::ILOpCodes op = comp()->il.opCodeForIndirectStore(TR::PackedDecimal);2205TR::SymbolReference * symRefPdstore = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, outOfLineCopyBackAddr, 8, fe());2206TR::Symbol * symStore = TR::Symbol::createShadow(comp()->trHeapMemory(), TR::PackedDecimal, TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, dstPrec));2207symStore->setArrayShadowSymbol();2208symRefPdstore->setSymbol(symStore);22092210TR::Node * pdstore = TR::Node::create(op, 2, pdStoreAddressNode, pdshrNode);22112212pdstore->setSymbolReference(symRefPdstore);2213pdstore->setDecimalPrecision(dstPrec);22142215TR::TreeTop * pdstoreNodeTT = TR::TreeTop::create(comp(), pdstore);22162217//link them together:2218TR::TreeTop * prevTT = treeTop->getPrevTreeTop();2219TR::TreeTop * nextTT = treeTop->getNextTreeTop();22202221prevTT->join(treeTop);2222treeTop->setNode(bcdchkNode);2223treeTop->join(pdstoreNodeTT);2224pdstoreNodeTT->join(nextTT);22252226callNode->recursivelyDecReferenceCount();2227return printInliningStatus(true, callNode);2228}22292230bool TR_DataAccessAccelerator::genShiftLeftIntrinsic(TR::TreeTop* treeTop, TR::Node* callNode)2231{223222332234TR::Node * dstNode = callNode->getChild(0);2235TR::Node * dstOffsetNode = callNode->getChild(1);2236TR::Node * dstPrecNode = callNode->getChild(2);22372238TR::Node * srcNode = callNode->getChild(3);2239TR::Node * srcOffsetNode = callNode->getChild(4);2240TR::Node * srcPrecNode = callNode->getChild(5);22412242TR::Node * shiftNode = callNode->getChild(6);22432244int srcPrec = srcPrecNode->getInt();2245int dstPrec = dstPrecNode->getInt();2246int shiftAmount = shiftNode->getInt();2247char* failMsg = NULL;22482249if (!isChildConst(callNode, 2) || !isChildConst(callNode, 5) ||2250!isChildConst(callNode, 6) || !isChildConst(callNode, 7))2251failMsg = "Child (2|5|6|7) is not constant";2252else if (srcPrec < 1)2253failMsg = "Invalid precision. Source precision can not be less than 1";2254else if (dstPrec < 1)2255failMsg = "Invalid precision. Destination precision can not be less than 1";2256else if (srcPrec > 15)2257failMsg = "Invalid precision. Source precision can not be greater than 15";2258else if (dstPrec > 15)2259failMsg = "Invalid precision. Destination precision can not be greater than 15";2260else if (shiftAmount < 0)2261failMsg = "Invalid shift amount. Shift amount can not be less than 0";22622263if (!performTransformation(comp(), "O^O TR_DataAccessAccelerator: genShiftLeft call: %p inlined.\n", callNode) && !failMsg)2264failMsg = "Not allowed";22652266if (failMsg)2267{2268TR::DebugCounter::incStaticDebugCounter(comp(),2269TR::DebugCounter::debugCounterName(comp(),2270"DAA/rejected/shl"));2271return printInliningStatus(false, callNode, failMsg);2272}22732274TR::DebugCounter::incStaticDebugCounter(comp(),2275TR::DebugCounter::debugCounterName(comp(),2276"DAA/inlined/shl"));22772278TR::Node* srcAddrNode = constructAddressNode(callNode, srcNode, srcOffsetNode);2279TR::Node* outOfLineCopyBackAddr = constructAddressNode(callNode, dstNode, dstOffsetNode);2280TR::Node* pdStoreAddrNode = constructAddressNode(callNode, dstNode, dstOffsetNode);22812282//pdload:2283TR::Node * pdload = TR::Node::create(TR::pdloadi, 1, srcAddrNode);2284TR::SymbolReference * symRef = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, srcAddrNode, 8, fe());2285symRef->setOffset(0);2286pdload->setSymbolReference(symRef);2287pdload->setDecimalPrecision(srcPrec);22882289// Always use BCDCHK node for exception handling (invalid digits/sign).2290TR::Node * pdshlNode = TR::Node::create(TR::pdshlOverflow, 2, pdload, shiftNode);2291pdshlNode->setDecimalPrecision(dstPrec);22922293TR::SymbolReference* bcdChkSymRef = callNode->getSymbolReference();2294TR::Node* bcdchkNode = TR::Node::createWithSymRef(TR::BCDCHK, 10, 10,2295pdshlNode, outOfLineCopyBackAddr,2296callNode->getChild(0), callNode->getChild(1),2297callNode->getChild(2), callNode->getChild(3),2298callNode->getChild(4), callNode->getChild(5),2299callNode->getChild(6), callNode->getChild(7),2300bcdChkSymRef);23012302// Set inlined site index to make sure AOT TR_RelocationRecordConstantPool::computeNewConstantPool() API can2303// correctly compute a new CP to relocate DAA OOL calls.2304bcdchkNode->setInlinedSiteIndex(callNode->getInlinedSiteIndex());23052306//following pdstore2307TR::ILOpCodes op = comp()->il.opCodeForIndirectStore(TR::PackedDecimal);2308TR::SymbolReference * symRefPdstore = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, outOfLineCopyBackAddr, 8, fe());2309TR::Symbol * symStore = TR::Symbol::createShadow(comp()->trHeapMemory(), TR::PackedDecimal, TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, dstPrec));2310symStore->setArrayShadowSymbol();2311symRefPdstore->setSymbol(symStore);23122313TR::Node * pdstore = TR::Node::create(op, 2, pdStoreAddrNode, pdshlNode);2314pdstore->setSymbolReference(symRefPdstore);2315pdstore->setDecimalPrecision(dstPrec);23162317//gen treeTop tops2318TR::TreeTop * nextTT = treeTop->getNextTreeTop();2319TR::TreeTop * prevTT = treeTop->getPrevTreeTop();2320TR::TreeTop * pdstoreTT = TR::TreeTop::create(comp(), pdstore);23212322prevTT->join(treeTop);2323treeTop->setNode(bcdchkNode);2324treeTop->join(pdstoreTT);2325pdstoreTT->join(nextTT);23262327callNode->recursivelyDecReferenceCount();2328return printInliningStatus(true, callNode);2329}23302331bool TR_DataAccessAccelerator::generateUD2PD(TR::TreeTop* treeTop, TR::Node* callNode, bool isUD2PD)2332{2333TR::Node * decimalNode = callNode->getChild(0);2334TR::Node * decimalOffsetNode = callNode->getChild(1);2335TR::Node * pdNode = callNode->getChild(2);2336TR::Node * pdOffsetNode = callNode->getChild(3);2337TR::Node * precNode = callNode->getChild(4);2338TR::Node * typeNode = callNode->getChild(5);23392340//first, check decimalType2341int type = typeNode->getInt();2342int prec = precNode->getInt();2343char* failMsg = NULL;23442345if (!isChildConst(callNode, 4) || !isChildConst(callNode, 5))2346failMsg = "Child (4|5) is not constant";2347else if (isUD2PD && type != 5 && type != 6 && type != 7)2348failMsg = "Invalid decimal type. Supported types are (5|6|7)";2349else if (!isUD2PD && (type < 1 || type > 4))2350failMsg = "Invalid decimal type. Supported types are (1|2|3|4)";2351else if (prec < 1 || prec > 31)2352failMsg = "Invalid precision. Valid precision is in range [1, 31]";23532354if (failMsg)2355{2356TR::DebugCounter::incStaticDebugCounter(comp(),2357TR::DebugCounter::debugCounterName(comp(),2358"DAA/rejected/ud2pd"));23592360return printInliningStatus(false, callNode, failMsg);2361}23622363if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: generate UD2PD/ED2PD call: %p inlined.\n", callNode))2364{2365TR::DebugCounter::incStaticDebugCounter(comp(),2366TR::DebugCounter::debugCounterName(comp(),2367"DAA/inlined/ud2pd"));23682369TR::ILOpCodes loadOp;2370TR::DataType dt = TR::DataTypes::NoType; //unicode data type, as it could be unsigned decimal, sign trailing or sign leading.23712372switch (type)2373{2374case 1:2375loadOp = TR::zdloadi;2376dt = TR::ZonedDecimal;2377break;2378case 2:2379loadOp = TR::zdsleLoadi;2380dt = TR::ZonedDecimalSignLeadingEmbedded;2381break;2382case 3:2383loadOp = TR::zdstsLoadi;2384dt = TR::ZonedDecimalSignTrailingSeparate;2385break;2386case 4:2387loadOp = TR::zdslsLoadi;2388dt = TR::ZonedDecimalSignLeadingSeparate;2389break;2390case 5:2391loadOp = TR::udLoadi;2392dt = TR::UnicodeDecimal;2393break;2394case 6:2395loadOp = TR::udslLoadi;2396dt = TR::UnicodeDecimalSignLeading;2397break;2398case 7:2399loadOp = TR::udstLoadi;2400dt = TR::UnicodeDecimalSignTrailing;2401break;2402default:2403TR_ASSERT(false, "illegal decimalType.\n");2404}24052406//create decimalload2407TR::Node * decimalAddressNode;2408int offset = decimalOffsetNode->getInt();2409TR::Node * twoConstNode;2410TR::Node * multipliedOffsetNode;2411TR::Node * totalOffsetNode;2412TR::Node * headerConstNode;2413if (comp()->target().is64Bit())2414{2415headerConstNode = TR::Node::create(callNode, TR::lconst, 0, 0);2416headerConstNode->setLongInt(TR::Compiler->om.contiguousArrayHeaderSizeInBytes());2417twoConstNode = TR::Node::create(callNode, TR::lconst, 0, 0);2418twoConstNode->setLongInt(isUD2PD ? 2 : 1);2419multipliedOffsetNode = TR::Node::create(TR::lmul, 2,2420TR::Node::create(TR::i2l, 1, decimalOffsetNode), twoConstNode);2421totalOffsetNode = TR::Node::create(TR::ladd, 2, headerConstNode, multipliedOffsetNode);2422decimalAddressNode = TR::Node::create(TR::aladd, 2, decimalNode, totalOffsetNode);2423}2424else2425{2426headerConstNode = TR::Node::create(callNode, TR::iconst, 0,2427TR::Compiler->om.contiguousArrayHeaderSizeInBytes());2428twoConstNode = TR::Node::create(callNode, TR::iconst, 0, isUD2PD ? 2 : 1);2429multipliedOffsetNode = TR::Node::create(TR::imul, 2, decimalOffsetNode, twoConstNode);2430totalOffsetNode = TR::Node::create(TR::iadd, 2, headerConstNode, multipliedOffsetNode);2431decimalAddressNode = TR::Node::create(TR::aiadd, 2, decimalNode, totalOffsetNode);2432}24332434decimalAddressNode->setIsInternalPointer(true);2435TR::SymbolReference * symRef = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(dt, decimalAddressNode, 8, fe());2436symRef->setOffset(0);24372438TR::Node * decimalload = TR::Node::create(loadOp, 1, decimalAddressNode);2439decimalload->setSymbolReference(symRef);2440decimalload->setDecimalPrecision(prec);24412442//create PDaddress2443TR::Node * pdAddressNode = constructAddressNode(callNode, pdNode, pdOffsetNode);24442445int elementSize = isUD2PD ? TR::DataType::getUnicodeElementSize() : TR::DataType::getZonedElementSize();24462447//bound values2448int pdPrecSize = TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, prec) - 1;2449int decimalPrecSize = (TR::DataType::getSizeFromBCDPrecision(dt, prec) / elementSize) - 1;2450TR::Node * pdBndvalue = TR::Node::create(TR::iadd, 2,2451pdOffsetNode,2452TR::Node::create(callNode, TR::iconst, 0, pdPrecSize));2453TR::Node * decimalBndvalue = TR::Node::create(TR::iadd, 2,2454decimalOffsetNode,2455TR::Node::create(callNode, TR::iconst, 0, decimalPrecSize)); //size of unicode is 2 bytes24562457//create ud2pd2458TR::ILOpCodes op = TR::BadILOp;2459TR::ILOpCodes interOp = TR::BadILOp;2460switch (type)2461{2462case 1:2463op = TR::zd2pd;2464break;2465case 2:2466interOp = TR::zdsle2zd;2467op = TR::zd2pd;2468break;2469case 3:2470interOp = TR::zdsts2zd;2471op = TR::zd2pd;2472break;2473case 4:2474interOp = TR::zdsls2zd;2475op = TR::zd2pd;2476break;2477case 5:2478op = TR::ud2pd;2479break;2480case 6:2481op = TR::udsl2pd;2482break;2483case 7:2484op = TR::udst2pd;2485break;2486default:2487TR_ASSERT(false, "illegal decimalType.\n");2488}24892490TR::Node * decimal2pdNode = NULL;2491if (isUD2PD || type == 1)2492{2493//for converting zd to pd (here type == 1), dont need the additional intermediate conversion2494decimal2pdNode = TR::Node::create(op, 1, decimalload);2495}2496else //ED2PD, needs intermediate conversion2497{2498TR::Node * decimal2zdNode = TR::Node::create(interOp, 1, decimalload);2499decimal2zdNode->setDecimalPrecision(prec);2500decimal2pdNode = TR::Node::create(op, 1, decimal2zdNode);2501}2502decimal2pdNode->setDecimalPrecision(prec);25032504//create pdstore2505TR::Node * pdstoreNode = TR::Node::create(TR::pdstorei, 2, pdAddressNode, decimal2pdNode);2506TR::SymbolReference * symRefStore = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, pdAddressNode, 8, fe());2507TR::Symbol * symStore = TR::Symbol::createShadow(comp()->trHeapMemory(), TR::PackedDecimal, TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, prec));2508symRefStore->setSymbol(symStore);25092510pdstoreNode->setSymbolReference(symRefStore);2511pdstoreNode->setDecimalPrecision(prec);25122513//set up bndchks, and null chks2514TR::Node * pdPassThroughNode = TR::Node::create(TR::PassThrough, 1, pdNode);2515TR::Node * decimalPassThroughNode = TR::Node::create(TR::PassThrough, 1, decimalNode);25162517TR::Node * pdArrayLengthNode = TR::Node::create(TR::arraylength, 1, pdNode);2518TR::Node * decimalArrayLengthNode = TR::Node::create(TR::arraylength, 1, decimalNode);25192520TR::Node * pdNullChkNode = TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, pdPassThroughNode, getSymRefTab()->findOrCreateRuntimeHelper(TR_nullCheck, false, true, true));2521TR::Node * decimalNullChkNode = TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, decimalPassThroughNode, getSymRefTab()->findOrCreateRuntimeHelper(TR_nullCheck, false, true, true));25222523TR::Node * pdBndChk = TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, pdArrayLengthNode, pdOffsetNode, getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));2524TR::Node * pdBndChk2 =TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, pdArrayLengthNode, pdBndvalue, getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));2525TR::Node * decimalBndChk = TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, decimalArrayLengthNode, decimalOffsetNode,2526getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));2527TR::Node * decimalBndChk2 =TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, decimalArrayLengthNode, decimalBndvalue,2528getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));25292530//gen tree tops2531TR::TreeTop * pdNullChktt = TR::TreeTop::create(comp(), pdNullChkNode);2532TR::TreeTop * decimalNullChktt = TR::TreeTop::create(comp(), decimalNullChkNode);25332534TR::TreeTop * pdBndChktt1 = TR::TreeTop::create(comp(), pdBndChk);2535TR::TreeTop * pdBndChktt2 = TR::TreeTop::create(comp(), pdBndChk2);2536TR::TreeTop * decimalBndChktt1 = TR::TreeTop::create(comp(), decimalBndChk );2537TR::TreeTop * decimalBndChktt2 = TR::TreeTop::create(comp(), decimalBndChk2);25382539TR::TreeTop * ttPdstore = TR::TreeTop::create(comp(), pdstoreNode);254025412542//link together2543TR::TreeTop * nextTT = treeTop->getNextTreeTop();2544TR::TreeTop * prevTT = treeTop->getPrevTreeTop();25452546prevTT->join(decimalNullChktt);2547decimalNullChktt->join(pdNullChktt);2548pdNullChktt->join(decimalBndChktt1);2549decimalBndChktt1->join(decimalBndChktt2);2550decimalBndChktt2->join(pdBndChktt1);2551pdBndChktt1->join(pdBndChktt2);2552pdBndChktt2->join(ttPdstore);2553ttPdstore->join(nextTT);25542555callNode->recursivelyDecReferenceCount();2556return true;2557}25582559return false;2560}25612562bool TR_DataAccessAccelerator::generatePD2UD(TR::TreeTop* treeTop, TR::Node* callNode, bool isPD2UD)2563{2564TR::Node * pdNode = callNode->getChild(0);2565TR::Node * pdOffsetNode = callNode->getChild(1);2566TR::Node * decimalNode = callNode->getChild(2);2567TR::Node * decimalOffsetNode = callNode->getChild(3);2568TR::Node * precNode = callNode->getChild(4);2569TR::Node * typeNode = callNode->getChild(5);25702571//first, check decimalType2572int type = typeNode->getInt();2573char* failMsg = NULL;2574int prec = precNode->getInt();25752576if (!isChildConst(callNode, 4) || !isChildConst(callNode, 5))2577failMsg = "Child (4|5) is not constant";2578else if (isPD2UD && type != 5 && type != 6 && type != 7)2579failMsg = "Invalid decimal type. Supported types are (5|6|7)";2580else if (!isPD2UD && (type < 1 || type > 4)) //PD2ED2581failMsg = "Invalid decimal type. Supported types are (1|2|3|4)";2582else if (prec < 1 || prec > 31)2583failMsg = "Invalid precision. Valid precision is in range [1, 31]";25842585if (failMsg)2586{2587TR::DebugCounter::incStaticDebugCounter(comp(),2588TR::DebugCounter::debugCounterName(comp(),2589"DAA/rejected/%s",2590isPD2UD ? "pd2ud" : "pd2ed"));25912592return printInliningStatus(false, callNode, failMsg);2593}25942595if (performTransformation(comp(), "O^O TR_DataAccessAccelerator: generate PD2UD/PD2ED call: %p inlined.\n", callNode))2596{2597TR::DebugCounter::incStaticDebugCounter(comp(),2598TR::DebugCounter::debugCounterName(comp(),2599"DAA/inlined/%s",2600isPD2UD ? "pd2ud" : "pd2ed"));26012602//set up pdload:2603TR::Node * arrayAddressNode = constructAddressNode(callNode, pdNode, pdOffsetNode);26042605int size = TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, prec) - 1;2606TR::SymbolReference * symRef = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(TR::PackedDecimal, arrayAddressNode, 8, fe());2607symRef->setOffset(0);26082609TR::Node * pdload = TR::Node::create(TR::pdloadi, 1, arrayAddressNode);2610pdload->setSymbolReference(symRef);2611pdload->setDecimalPrecision(prec);26122613//set up decimal arrayAddressNode2614TR::Node * decimalAddressNode;2615{2616TR::Node * twoConstNode;2617TR::Node * multipliedOffsetNode;2618TR::Node * totalOffsetNode;2619TR::Node * headerConstNode;2620if (comp()->target().is64Bit())2621{2622headerConstNode = TR::Node::create(callNode, TR::lconst, 0, 0);2623headerConstNode->setLongInt(TR::Compiler->om.contiguousArrayHeaderSizeInBytes());2624twoConstNode = TR::Node::create(callNode, TR::lconst, 0, 0);2625twoConstNode->setLongInt(isPD2UD ? 2 : 1);2626multipliedOffsetNode = TR::Node::create(TR::lmul, 2,2627TR::Node::create(TR::i2l, 1, decimalOffsetNode), twoConstNode);2628totalOffsetNode = TR::Node::create(TR::ladd, 2, headerConstNode, multipliedOffsetNode);2629decimalAddressNode = TR::Node::create(TR::aladd, 2, decimalNode, totalOffsetNode);2630}2631else2632{2633headerConstNode = TR::Node::create(callNode, TR::iconst, 0,2634TR::Compiler->om.contiguousArrayHeaderSizeInBytes());2635twoConstNode = TR::Node::create(callNode, TR::iconst, 0, isPD2UD ? 2 : 1);2636multipliedOffsetNode = TR::Node::create(TR::imul, 2, decimalOffsetNode, twoConstNode);2637totalOffsetNode = TR::Node::create(TR::iadd, 2, headerConstNode, multipliedOffsetNode);2638decimalAddressNode = TR::Node::create(TR::aiadd, 2, decimalNode, totalOffsetNode);2639}2640decimalAddressNode->setIsInternalPointer(true);2641}26422643//set up pd2decimal node2644TR::ILOpCodes op = TR::BadILOp;2645TR::ILOpCodes storeOp = TR::BadILOp;2646TR::ILOpCodes interOp = TR::BadILOp;2647TR::DataType dt = TR::NoType;2648switch (type)2649{2650case 1:2651op = TR::pd2zd;2652storeOp = TR::zdstorei;2653dt = TR::ZonedDecimal;2654break;2655case 2:2656op = TR::zd2zdsle;2657interOp = TR::pd2zd;2658storeOp = TR::zdsleStorei;2659dt = TR::ZonedDecimalSignLeadingEmbedded;2660break;2661case 3:2662op = TR::zd2zdsts;2663interOp = TR::pd2zd;2664storeOp = TR::zdstsStorei;2665dt = TR::ZonedDecimalSignTrailingSeparate;2666break;2667case 4:2668op = TR::zd2zdsls;2669interOp = TR::pd2zd;2670storeOp = TR::zdslsStorei;2671dt = TR::ZonedDecimalSignLeadingSeparate;2672break;2673case 5:2674op = TR::pd2ud;2675interOp = TR::pd2ud;2676storeOp = TR::udStorei;2677dt = TR::UnicodeDecimal;2678break;2679case 6:2680op = TR::pd2udsl;2681interOp = TR::pd2ud;2682storeOp = TR::udslStorei;2683dt = TR::UnicodeDecimalSignLeading;2684break;2685case 7:2686op = TR::pd2udst;2687interOp = TR::pd2ud;2688storeOp = TR::udstStorei;2689dt = TR::UnicodeDecimalSignTrailing;2690break;2691default:2692TR_ASSERT(false, "unsupported decimalType.\n");2693}26942695TR::Node * pd2decimalNode = NULL;2696if (isPD2UD || type == 1)2697{2698pd2decimalNode = TR::Node::create(op, 1, pdload);2699}2700else //ED2PD2701{2702TR::Node * toZDNode = TR::Node::create(interOp, 1, pdload);2703toZDNode->setDecimalPrecision(precNode->getInt());2704pd2decimalNode = TR::Node::create(op, 1, toZDNode);2705}2706pd2decimalNode->setDecimalPrecision(precNode->getInt());27072708//set up decimalStore node2709TR::SymbolReference * symRefDecimalStore = comp()->getSymRefTab()->findOrCreateArrayShadowSymbolRef(dt, decimalAddressNode, 8, fe());2710TR::Symbol * symStore = TR::Symbol::createShadow(comp()->trHeapMemory(), dt, TR::DataType::getSizeFromBCDPrecision(dt, prec));2711symStore->setArrayShadowSymbol();2712symRefDecimalStore->setSymbol(symStore);27132714TR::Node * decimalStore = TR::Node::create(storeOp, 2, decimalAddressNode, pd2decimalNode);2715decimalStore->setSymbolReference(symRefDecimalStore);2716decimalStore->setDecimalPrecision(precNode->getInt());27172718//set up bndchks, and null chks2719TR::Node * pdPassThroughNode = TR::Node::create(TR::PassThrough, 1, pdNode);2720TR::Node * decimalPassThroughNode = TR::Node::create(TR::PassThrough, 1, decimalNode);2721int elementSize = isPD2UD ? TR::DataType::getUnicodeElementSize() : TR::DataType::getZonedElementSize();2722int pdPrecSize = TR::DataType::getSizeFromBCDPrecision(TR::PackedDecimal, prec) - 1;2723int decimalPrecSize = (TR::DataType::getSizeFromBCDPrecision(dt, prec) / elementSize) - 1;2724TR::Node * pdBndvalue = TR::Node::create(TR::iadd, 2, pdOffsetNode, TR::Node::create(callNode, TR::iconst, 0, pdPrecSize));2725TR::Node * decimalBndvalue = TR::Node::create(TR::iadd, 2, decimalOffsetNode, TR::Node::create(callNode, TR::iconst, 0, decimalPrecSize)); //size of unicode is 2 bytes27262727TR::Node * pdArrayLengthNode = TR::Node::create(TR::arraylength, 1, pdNode);2728TR::Node * decimalArrayLengthNode = TR::Node::create(TR::arraylength, 1, decimalNode);27292730TR::Node * pdNullChkNode = TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, pdPassThroughNode, getSymRefTab()->findOrCreateRuntimeHelper(TR_nullCheck, false, true, true));2731TR::Node * decimalNullChkNode = TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, decimalPassThroughNode, getSymRefTab()->findOrCreateRuntimeHelper(TR_nullCheck, false, true, true));27322733TR::Node * pdBndChk = TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, pdArrayLengthNode, pdOffsetNode, getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));2734TR::Node * pdBndChk2 =TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, pdArrayLengthNode, pdBndvalue, getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));2735TR::Node * decimalBndChk = TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, decimalArrayLengthNode, decimalOffsetNode, getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));2736TR::Node * decimalBndChk2 =TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, decimalArrayLengthNode, decimalBndvalue, getSymRefTab()->findOrCreateRuntimeHelper(TR_arrayBoundsCheck, false, true, true));27372738//gen tree tops2739TR::TreeTop * nextTT = treeTop->getNextTreeTop();2740TR::TreeTop * prevTT = treeTop->getPrevTreeTop();27412742TR::TreeTop * pdNullChktt = TR::TreeTop::create(comp(), pdNullChkNode);2743TR::TreeTop * decimalNullChktt = TR::TreeTop::create(comp(), decimalNullChkNode);27442745TR::TreeTop * pdBndChktt1 = TR::TreeTop::create(comp(), pdBndChk);2746TR::TreeTop * pdBndChktt2 = TR::TreeTop::create(comp(), pdBndChk2);2747TR::TreeTop * decimalBndChktt1 = TR::TreeTop::create(comp(), decimalBndChk);2748TR::TreeTop * decimalBndChktt2 = TR::TreeTop::create(comp(), decimalBndChk2);27492750TR::TreeTop * decimalStoreTT = TR::TreeTop::create(comp(), decimalStore);27512752prevTT->join(pdNullChktt);2753pdNullChktt->join(decimalNullChktt);2754decimalNullChktt->join(pdBndChktt1);2755pdBndChktt1->join(pdBndChktt2);2756pdBndChktt2->join(decimalBndChktt1);2757decimalBndChktt1->join(decimalBndChktt2);2758decimalBndChktt2->join(decimalStoreTT);2759decimalStoreTT->join(nextTT);27602761callNode->recursivelyDecReferenceCount();27622763return true;2764}27652766return false;2767}27682769void TR_DataAccessAccelerator::insertByteArrayNULLCHK(TR::TreeTop* callTreeTop, TR::Node* callNode, TR::Node* byteArrayNode)2770{2771TR::Compilation* comp = OMR::Optimization::comp();27722773callTreeTop->insertBefore(TR::TreeTop::create(comp, TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, TR::Node::create(TR::PassThrough, 1, byteArrayNode), comp->getSymRefTab()->findOrCreateNullCheckSymbolRef(callNode->getSymbol()->getResolvedMethodSymbol()))));2774}27752776void TR_DataAccessAccelerator::insertByteArrayBNDCHK(TR::TreeTop* callTreeTop, TR::Node* callNode, TR::Node* byteArrayNode, TR::Node* offsetNode, int32_t index)2777{2778TR::Compilation* comp = OMR::Optimization::comp();27792780if (index != 0)2781{2782offsetNode = TR::Node::create(TR::iadd, 2, offsetNode, TR::Node::create(callNode, TR::iconst, 0, index));2783}27842785TR::Node* arraylengthNode = TR::Node::create(TR::arraylength, 1, byteArrayNode);27862787// byte[] is always of type TR::Int8 so set the appropriate stride2788arraylengthNode->setArrayStride(TR::Symbol::convertTypeToSize(TR::Int8));27892790callTreeTop->insertBefore(TR::TreeTop::create(comp, TR::Node::createWithSymRef(TR::BNDCHK, 2, 2, arraylengthNode, offsetNode, comp->getSymRefTab()->findOrCreateArrayBoundsCheckSymbolRef(callNode->getSymbol()->getResolvedMethodSymbol()))));2791}27922793TR::Node* TR_DataAccessAccelerator::createByteArrayElementAddress(TR::TreeTop* callTreeTop, TR::Node* callNode, TR::Node* byteArrayNode, TR::Node* offsetNode)2794{2795TR::CodeGenerator* cg = comp()->cg();27962797TR::Node* byteArrayElementAddressNode;27982799if (comp()->target().is64Bit())2800{2801byteArrayElementAddressNode = TR::Node::create(TR::aladd, 2, byteArrayNode, TR::Node::create(TR::ladd, 2, TR::Node::create(callNode, TR::lconst, 0, TR::Compiler->om.contiguousArrayHeaderSizeInBytes()), TR::Node::create(TR::i2l, 1, offsetNode)));2802}2803else2804{2805byteArrayElementAddressNode = TR::Node::create(TR::aiadd, 2, byteArrayNode, TR::Node::create(TR::iadd, 2, TR::Node::create(callNode, TR::iconst, 0, TR::Compiler->om.contiguousArrayHeaderSizeInBytes()), offsetNode));2806}28072808// This node is pointing to an array element so we must mark it as such2809byteArrayElementAddressNode->setIsInternalPointer(true);28102811return byteArrayElementAddressNode;2812}281328142815