Path: blob/master/runtime/compiler/optimizer/IdiomRecognition.cpp
6000 views
/*******************************************************************************1* Copyright (c) 2000, 2022 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/2122#include "optimizer/IdiomRecognition.hpp"2324#include <stdint.h>25#include <stdio.h>26#include <stdlib.h>27#include <string.h>28#include "codegen/CodeGenerator.hpp"29#include "env/FrontEnd.hpp"30#include "codegen/RecognizedMethods.hpp"31#include "compile/Compilation.hpp"32#include "compile/CompilationTypes.hpp"33#include "control/Options.hpp"34#include "control/Options_inlines.hpp"35#include "control/Recompilation.hpp"36#include "control/RecompilationInfo.hpp"37#include "cs2/bitvectr.h"38#include "env/CompilerEnv.hpp"39#include "env/StackMemoryRegion.hpp"40#include "env/TRMemory.hpp"41#include "il/Block.hpp"42#include "il/DataTypes.hpp"43#include "il/ILOpCodes.hpp"44#include "il/ILOps.hpp"45#include "il/MethodSymbol.hpp"46#include "il/Node.hpp"47#include "il/Node_inlines.hpp"48#include "il/Symbol.hpp"49#include "il/SymbolReference.hpp"50#include "il/TreeTop.hpp"51#include "il/TreeTop_inlines.hpp"52#include "infra/Assert.hpp"53#include "infra/BitVector.hpp"54#include "infra/Cfg.hpp"55#include "infra/Flags.hpp"56#include "infra/HashTab.hpp"57#include "infra/Link.hpp"58#include "infra/List.hpp"59#include "infra/SimpleRegex.hpp"60#include "infra/TRCfgEdge.hpp"61#include "infra/TRCfgNode.hpp"62#include "optimizer/IdiomRecognitionUtils.hpp"63#include "optimizer/LoopCanonicalizer.hpp"64#include "optimizer/Optimization_inlines.hpp"65#include "optimizer/OptimizationManager.hpp"66#include "optimizer/Optimizer.hpp"67#include "optimizer/Structure.hpp"68#include "optimizer/TransformUtil.hpp"69#include "optimizer/UseDefInfo.hpp"70#include "ras/Debug.hpp"71#include "omrformatconsts.h"72#if defined(J9VM_OPT_JITSERVER)73#include "env/JITServerAllocationRegion.hpp"74#endif7576#define OPT_DETAILS "O^O NEWLOOPREDUCER: "77#define VERBOSE 078#define STRESS_TEST 079#define ALLOW_FAST_VERSIONED_LOOP 180#define EXCLUDE_COLD_LOOP (!STRESS_TEST)81#define SHOW_CANDIDATES 08283#define IDIOM_SIZE_FACTOR 1584#define MAX_PREPARED_GRAPH (36+STRESS_TEST*5)85static TR_CISCGraph *preparedCISCGraphs[MAX_PREPARED_GRAPH];86static int32_t numPreparedCISCGraphs;87static TR_Hotness minimumHotnessPrepared;8889#define SHOW_BCINDICES 190#define SHOW_STATISTICS 19192/************************************/93/*********** ************/94/*********** TR_CISCNode ************/95/*********** ************/96/************************************/9798void99TR_CISCNode::initializeMembers(uint32_t opc, uint16_t id, int16_t dagId, uint16_t ncfgs, uint16_t nchildren)100{101initializeLists();102_flags.clear();103setOpcode(opc);104_id = id;105_dagId = dagId;106_numChildren = nchildren;107_numSuccs = ncfgs;108_latestDest = 0;109_otherInfo = 0;110if (_ilOpCode.isStoreDirect()) setIsStoreDirect();111switch(opc)112{113case TR_ahconst:114setIsLightScreening();115// fall through116case TR_allconst:117case TR_variable:118case TR_variableORconst:119case TR_quasiConst:120case TR_quasiConst2:121case TR_arrayindex:122case TR_arraybase:123setIsNecessaryScreening();124break;125}126}127128//*****************************************************************************************129// It analyzes pOpc and tOpc are equivalent.130// Note that it handles wildcard nodes as shown below.131//*****************************************************************************************132bool133TR_CISCNode::isEqualOpc(TR_CISCNode *t)134{135//TR_ASSERT((int)TR::NumIlOps == TR_variable, "assumption for reducing compilation time");136static_assert((int)TR::NumIlOps == TR_variable,137"assumption for reducing compilation time");138139int32_t pOpc = _opcode;140int32_t tOpc = t->_opcode;141142if (pOpc == tOpc) return true;143else if (pOpc > TR::NumIlOps) // Please see the above assumption144{145switch(pOpc)146{147case TR_booltable:148return (tOpc == TR::Case || t->_ilOpCode.isIf()) && !t->isOutsideOfLoop();149case TR_variableORconst:150return (tOpc == TR_variable || t->_ilOpCode.isLoadConst());151case TR_quasiConst2:152if (tOpc == TR::iloadi)153return !t->getHeadOfTrNode()->getSymbol()->isArrayShadowSymbol(); // true if non-array154// else fall through155case TR_quasiConst:156return (tOpc == TR_variable || t->_ilOpCode.isLoadConst() || tOpc == TR::arraylength);157case TR_iaddORisub:158return (tOpc == TR::iadd || tOpc == TR::isub);159case TR_conversion:160return (t->_ilOpCode.isConversion());161case TR_ifcmpall:162return (t->_ilOpCode.isIf());163case TR_ishrall:164return (tOpc == TR::ishr || tOpc == TR::iushr);165case TR_bitop1:166return (t->_ilOpCode.isAnd() || t->_ilOpCode.isOr() || t->_ilOpCode.isXor());167case TR_arrayindex:168return (tOpc == TR_variable || tOpc == TR::iadd);169case TR_arraybase:170return (tOpc == TR_variable || tOpc == TR::aloadi);171case TR_ahconst:172case TR_allconst:173return t->_ilOpCode.isLoadConst();174case TR_inbload:175return (t->_ilOpCode.isLoadIndirect() && !t->_ilOpCode.isByte());176case TR_inbstore:177return (t->_ilOpCode.isStoreIndirect() && !t->_ilOpCode.isByte());178case TR_indload:179return (t->_ilOpCode.isLoadIndirect());180case TR_indstore:181return (t->_ilOpCode.isStoreIndirect() || tOpc == TR::awrtbari);182case TR_ibcload:183return (t->_ilOpCode.isLoadIndirect() && (t->_ilOpCode.isByte() || (t->_ilOpCode.isShort() && t->_ilOpCode.isUnsigned())));184case TR_ibcstore:185return (t->_ilOpCode.isStoreIndirect() && (t->_ilOpCode.isByte() || (t->_ilOpCode.isShort() && t->_ilOpCode.isUnsigned())));186}187}188return false;189}190191//*****************************************************************************************192// convert opcode to its name for debug print193//*****************************************************************************************194const char *195TR_CISCNode::getName(TR_CISCOps op, TR::Compilation * comp)196{197if (op < (TR_CISCOps)TR::NumIlOps)198{199TR::ILOpCode opCode;200opCode.setOpCodeValue((enum TR::ILOpCodes)op);201return opCode.getName();202}203switch(op)204{205case TR_variable: return "Var";206case TR_booltable: return "booltable";207case TR_entrynode: return "entrynode";208case TR_exitnode: return "exitnode";209case TR_ahconst: return "ahconst";210case TR_allconst: return "constall";211case TR_inbload: return "inbload";212case TR_inbstore: return "inbstore";213case TR_indload: return "indload";214case TR_indstore: return "indstore";215case TR_ibcload: return "ibcload";216case TR_ibcstore: return "ibcstore";217case TR_variableORconst: return "variableORconst";218case TR_quasiConst: return "quasiConst";219case TR_quasiConst2: return "quasiConst2";220case TR_iaddORisub: return "iaddORisub";221case TR_arrayindex: return "arrayindex";222case TR_arraybase: return "arraybase";223case TR_conversion: return "conversion";224case TR_ifcmpall: return "ifcmpall";225case TR_ishrall: return "ishrall";226case TR_bitop1: return "bitop1";227}228return "Unknown";229}230231232//*****************************************************************************************233// Debug print to file234//*****************************************************************************************235void236TR_CISCNode::dump(TR::FILE *pOutFile, TR::Compilation * comp)237{238int32_t i;239char buf[256];240const char *name = getName((TR_CISCOps)_opcode, comp);241if (isValidOtherInfo())242{243sprintf(buf, "%s %d", name, _otherInfo);244}245else246{247sprintf(buf, "%s", name);248}249traceMsg(comp, "[%p] %3d %2d%c %-11s", this, _id, _dagId, isOutsideOfLoop() ? ' ' : 'L', buf);250traceMsg(comp, " [");251for (i = 0; i < _numSuccs; i++)252{253traceMsg(comp, "%d",_succs[i]->_id);254if (i < _numSuccs-1) traceMsg(comp, " ");255}256traceMsg(comp, "]");257traceMsg(comp, " [");258for (i = 0; i < _numChildren; i++)259{260traceMsg(comp, "%d",_children[i]->_id);261if (i < _numChildren-1) traceMsg(comp, " ");262}263traceMsg(comp, "]");264265ListIterator<TR_CISCNode> ci(&_chains);266TR_CISCNode *n;267if (!_chains.isEmpty())268{269traceMsg(comp, " chains[");270for (n = ci.getFirst(); n; n = ci.getNext())271{272traceMsg(comp, "%d ",n->_id);273}274traceMsg(comp, "]");275}276277if (!_dest.isEmpty())278{279traceMsg(comp, " dest=");280ci.set(&_dest);281for (n = ci.getFirst(); n; n = ci.getNext())282{283traceMsg(comp, "%d ",n->_id);284}285}286287if (!_hintChildren.isEmpty())288{289traceMsg(comp, " hint=");290ci.set(&_hintChildren);291for (n = ci.getFirst(); n; n = ci.getNext())292{293traceMsg(comp, "%d ",n->_id);294}295}296297#if 0298if (!_preds.isEmpty())299{300ci.set(&_preds);301traceMsg(comp, " preds[");302for (n = ci.getFirst(); n; n = ci.getNext())303{304traceMsg(comp, "%d ",n->_id);305}306traceMsg(comp, "]");307}308#endif309310if (isCISCNodeModified())311{312traceMsg(comp, "\t(Modified)");313}314315if (isOptionalNode())316{317traceMsg(comp, "\t(Optional)");318}319320// Print out the TR_Nodes that correspond with the given CISCNode.321if (getTrNodeInfo())322{323if (!getTrNodeInfo()->isEmpty())324{325traceMsg(comp, "\tTR::Node:[");326ListIterator<TrNodeInfo> ni(getTrNodeInfo());327for (TrNodeInfo *n = ni.getFirst(); n != NULL; n = ni.getNext())328{329traceMsg(comp, "%s,",comp->getDebug()->getName((TR::Node*)n->_node));330}331traceMsg(comp, "]");332}333}334traceMsg(comp, "\n");335}336337338//*****************************************************************************************339// Debug print to stdout340//*****************************************************************************************341void342TR_CISCNode::printStdout()343{344int32_t i;345char buf[256];346if (isValidOtherInfo())347{348sprintf(buf, "%d %d", _opcode, _otherInfo);349}350else351{352sprintf(buf, "%d", _opcode);353}354printf("[%p] %3d %2d%c %-11s", this, _id, _dagId, isOutsideOfLoop() ? ' ' : 'L', buf);355printf(" [");356for (i = 0; i < _numSuccs; i++)357{358printf("%d",_succs[i]->_id);359if (i < _numSuccs-1) printf(" ");360}361printf("]");362printf(" [");363for (i = 0; i < _numChildren; i++)364{365printf("%d",_children[i]->_id);366if (i < _numChildren-1) printf(" ");367}368printf("]");369370ListIterator<TR_CISCNode> ci(&_chains);371TR_CISCNode *n;372if (!_chains.isEmpty())373{374printf(" chains[");375for (n = ci.getFirst(); n; n = ci.getNext())376{377printf("%d ",n->_id);378}379printf("]");380}381382if (!_dest.isEmpty())383{384printf(" dest=");385ci.set(&_dest);386for (n = ci.getFirst(); n; n = ci.getNext())387{388printf("%d ",n->_id);389}390}391392if (!_hintChildren.isEmpty())393{394printf(" hint=");395ci.set(&_hintChildren);396for (n = ci.getFirst(); n; n = ci.getNext())397{398printf("%d ",n->_id);399}400}401402if (isCISCNodeModified())403{404printf("\t(Modified)");405}406407if (isOptionalNode())408{409printf("\t(Optional)");410}411412printf("\n");413}414415416//*****************************************************************************************417// It replaces the successor and maintains the predecessor of the original successor.418//*****************************************************************************************419void420TR_CISCNode::replaceSucc(uint32_t index, TR_CISCNode *to)421{422TR_ASSERT(index < _numSuccs, "TR_CISCNode::replaceSucc index out of range");423TR_CISCNode *old = _succs[index];424if (old)425{426old->_preds.remove(this);427}428setSucc(index, to);429}430431432//*****************************************************************************************433// It replaces the child and maintains the parent of the original child.434//*****************************************************************************************435void436TR_CISCNode::replaceChild(uint32_t index, TR_CISCNode *ch)437{438TR_ASSERT(index < _numChildren, "TR_CISCNode::replaceChild index out of range");439TR_CISCNode *org = _children[index];440if (org)441{442org->_parents.remove(this);443}444setChild(index, ch);445}446447448449450//*****************************************************************************************451// To sort out leaf nodes, these functions related to checkParent analyze whether ancestors452// of the pattern node and those of the target node are equivalent.453//*****************************************************************************************454455#define DEBUG_CHECKPARENTS 0456#define DEPTH_CHECKPARENTS 7457458#if 0 // recursive version, just for reference459bool460TR_CISCNode::checkParents(TR_CISCNode *const o, const int8_t level)461{462#if DEBUG_CHECKPARENTS463int i;464for (i = DEPTH_CHECKPARENTS-level; --i >= 0; ) traceMsg(comp(), " ");465traceMsg(comp(), "checkParents %d:%d...\n", _id, o->_id);466#endif467if (level > 0)468{469ListElement<TR_CISCNode> *ple;470for (ple = _parents.getListHead(); ple; ple = ple->getNextElement())471{472TR_CISCNode *const pn = ple->getData();473uint16_t pIndex = 0;474const bool commutative = pn->isCommutative();475if (!commutative)476{477for (; pIndex < pn->getNumChildren(); pIndex++ )478{479if (pn->getChild(pIndex) == this) break;480}481}482TR_ASSERT(pIndex < pn->getNumChildren(), "error!");483ListElement<TR_CISCNode> *tle;484const bool isPnParentsEmpty = pn->_parents.isEmpty();485const bool isPnOptional = pn->isOptionalNode();486for (tle = o->_parents.getListHead(); tle; tle = tle->getNextElement())487{488TR_CISCNode *const tn = tle->getData();489if (pn->isEqualOpc(tn))490{491if ((commutative || tn->getChild(pIndex) == o) &&492pn->checkParents(tn, level-1)) goto find;493}494else495{496if (tn->getIlOpCode().isStoreDirect())497{498if (tn->getChild(0) == o && !pn->isChildDirectlyConnected())499{500if (this->checkParents(tn->getChild(1), level-1)) goto find;501}502}503else504{ /* search one more depth */505ListElement<TR_CISCNode> *tcle;506for (tcle = tn->_parents.getListHead(); tcle; tcle = tcle->getNextElement())507{508if (pn->isEqualOpc(tcle->getData()) &&509(commutative || tcle->getData()->getChild(pIndex) == tn) &&510pn->checkParents(tcle->getData(), level-1)) goto find;511}512}513}514}515TR_ASSERT(!tle, "error");516if (isPnOptional)517{518if (isPnParentsEmpty ||519pn->isSkipParentsCheck() ||520pn->checkParents(o, level-1)) goto find;521}522#if DEBUG_CHECKPARENTS523for (i = DEPTH_CHECKPARENTS-level; --i >= 0; ) traceMsg(comp(), " ");524traceMsg(comp(), "checkParents %d:%d failed due to pid %d\n", _id, o->_id, pn->_id);525#endif526return false;527528find:;529}530}531#if DEBUG_CHECKPARENTS532for (i = DEPTH_CHECKPARENTS-level; --i >= 0; ) traceMsg(comp(), " ");533traceMsg(comp(), "checkParents %d:%d succeed\n", _id, o->_id);534#endif535return true;536}537#endif538539540541//*****************************************************************************************542// It recursively marks dead (negligible).543//*****************************************************************************************544void545TR_CISCNode::deadAllChildren()546{547int32_t i;548if (!getParents()->isSingleton() || // this node is multi-referenced or549_ilOpCode.canRaiseException() || // any side effectable instructions550_ilOpCode.isCall() ||551_ilOpCode.isReturn() ||552_ilOpCode.isStore() ||553_ilOpCode.isBranch()) return;554555setIsNegligible();556for (i = _numChildren; --i >= 0; )557_children[i]->deadAllChildren();558}559560561562//*****************************************************************************************563// This class is for creating non-recursive version of checkParents.564//*****************************************************************************************565struct TR_StackForCheckParents566{567TR_StackForCheckParents() {}568TR_StackForCheckParents(TR_CISCNode *p, TR_CISCNode *t,569ListElement<TR_CISCNode> *ple, ListElement<TR_CISCNode> *tle, ListElement<TR_CISCNode> *tcle,570TR_CISCNode *pn, TR_CISCNode *tn,571uint16_t pIndex, int8_t level, int8_t label)572{ initialize(p, t, ple, tle, tcle, pn, tn, pIndex, level, label); }573574void initialize(TR_CISCNode *p, TR_CISCNode *t,575ListElement<TR_CISCNode> *ple, ListElement<TR_CISCNode> *tle, ListElement<TR_CISCNode> *tcle,576TR_CISCNode *pn, TR_CISCNode *tn,577uint16_t pIndex, int8_t level, int8_t label)578{579_p = p;580_t = t;581_ple = ple;582_tle = tle;583_tcle = tcle;584_pn = pn;585_tn = tn;586_pIndex = pIndex;587_level = level;588_label = label;589}590591TR_CISCNode *_p;592TR_CISCNode *_t;593ListElement<TR_CISCNode> *_ple;594ListElement<TR_CISCNode> *_tle;595ListElement<TR_CISCNode> *_tcle;596TR_CISCNode *_pn;597TR_CISCNode *_tn;598uint16_t _pIndex;599int8_t _level;600int8_t _label;601};602603//*****************************************************************************************604// It analyzes ancestors of p and those of t are equivalent.605// The maximum search depth can be given by "level".606// Note: There is a recursive version of this function named "checkParents" above, which607// is easier to understand.608//*****************************************************************************************609bool610TR_CISCNode::checkParentsNonRec(TR_CISCNode *p, TR_CISCNode *t, int8_t level, TR::Compilation *comp)611{612ListElement<TR_CISCNode> *ple = NULL;613ListElement<TR_CISCNode> *tle = NULL;614ListElement<TR_CISCNode> *tcle = NULL;615TR_CISCNode *pn = NULL;616TR_CISCNode *tn = NULL;617uint16_t pIndex;618bool ret;619TR_StackForCheckParents stackParents[DEPTH_CHECKPARENTS];620TR_StackForCheckParents *st;621const int8_t initial_level = level;622623while(true)624{625first:626TR_CISCNode *parm1 = 0;627ret = true;628#if DEBUG_CHECKPARENTS629int32_t i;630for (i = initial_level-level; --i >= 0; ) traceMsg(comp, " ");631traceMsg(comp, "checkParents %d:%d...\n", p->_id, t->_id);632#endif633if (level > 0)634{635for (ple = p->_parents.getListHead(); ple; ple = ple->getNextElement())636{637pn = ple->getData();638pIndex = 0;639if (!pn->isCommutative())640{641for (; pIndex < pn->getNumChildren(); pIndex++ )642{643if (pn->getChild(pIndex) == p) break;644}645}646TR_ASSERT(pIndex < pn->getNumChildren(), "error!");647for (tle = t->_parents.getListHead(); tle; tle = tle->getNextElement())648{649tn = tle->getData();650TR_CISCNode *parm2;651parm1 = 0;652if (pn->isEqualOpc(tn))653{654if (pn->isCommutative() || tn->getChild(pIndex) == t)655{656parm1 = pn;657parm2 = tn;658}659}660else661{662if (tn->getIlOpCode().isStoreDirect())663{664if (tn->getChild(0) == t && !pn->isChildDirectlyConnected())665{666parm1 = p;667parm2 = tn->getChild(1);668}669}670else671{ /* search one more depth */672for (tcle = tn->_parents.getListHead(); tcle; tcle = tcle->getNextElement())673{674if (pn->isEqualOpc(tcle->getData()) &&675(pn->isCommutative() || tcle->getData()->getChild(pIndex) == tn))676{677level--;678st = stackParents + level;679st->initialize(p, t, ple, tle, tcle, pn, tn, pIndex, level+1, 0);680p = pn;681t = tcle->getData();682goto first;683684label0:;685}686}687}688}689if (parm1)690{691level--;692st = stackParents + level;693st->initialize(p, t, ple, tle, tcle, pn, tn, pIndex, level+1, 1);694p = parm1;695t = parm2;696goto first;697698label1:;699}700}701TR_ASSERT(!tle, "error");702if (pn->isOptionalNode())703{704if (!pn->_parents.isEmpty() && !pn->isSkipParentsCheck())705{706level--;707st = stackParents + level;708st->initialize(p, t, ple, tle, tcle, pn, tn, pIndex, level+1, 2);709p = pn;710//t = t;711goto first;712713label2:;714}715else716goto find;717}718#if DEBUG_CHECKPARENTS719for (i = initial_level-level; --i >= 0; ) traceMsg(comp, " ");720traceMsg(comp, "checkParents %d:%d failed due to pid %d\n", p->_id, t->_id, pn->_id);721#endif722ret = false;723goto final;724725find:;726}727}728ret = true;729#if DEBUG_CHECKPARENTS730for (i = initial_level-level; --i >= 0; ) traceMsg(comp, " ");731traceMsg(comp, "checkParents %d:%d succeed\n", p->_id, t->_id);732#endif733final:734if (level == initial_level) break; /* exit loop */735st = stackParents + level;736737p = st->_p;738t = st->_t;739ple = st->_ple;740tle = st->_tle;741tcle = st->_tcle;742pn = st->_pn;743tn = st->_tn;744pIndex = st->_pIndex;745level = st->_level;746parm1 = 0;747748if (ret) goto find;749else750{751if (st->_label == 0) goto label0;752else if (st->_label == 1) goto label1;753else goto label2;754}755}756return ret;757}758759760761//*****************************************************************************************762// Swap successors and reverse the opcode of the branch763//*****************************************************************************************764void765TR_CISCNode::reverseBranchOpCodes()766{767TR_ASSERT(_opcode < TR::NumIlOps && _ilOpCode.isIf(), "error: not isIf");768TR_ASSERT(_numSuccs == 2, "error: _numSuccs != 2");769TR_CISCNode *swap = _succs[0];770_succs[0] = _succs[1];771_succs[1] = swap;772setOpcode(_ilOpCode.getOpCodeForReverseBranch());773}774775776//*****************************************************************************************777// Return whether this node and all nodes in the "_chains" are in the same loop body.778//*****************************************************************************************779bool780TR_CISCNode::checkDagIdInChains()781{782uint16_t thisDagID = _dagId;783ListIterator<TR_CISCNode> ci(&_chains);784TR_CISCNode *c;785for (c = ci.getFirst(); c; c = ci.getNext())786if (c->_dagId != thisDagID) return false;787return true;788}789790//*****************************************************************************************791// Return TreeTop for getBranchDestination or fallthrough792//*****************************************************************************************793TR::TreeTop *794TR_CISCNode::getDestination(bool isFallThrough)795{796TR::TreeTop *ret;797TR::Node *nRepTrNode = getHeadOfTrNode();798if (getOpcode() != nRepTrNode->getOpCodeValue())799{800TR_ASSERT(getOpcode() == nRepTrNode->getOpCode().getOpCodeForReverseBranch(), "error");801isFallThrough = !isFallThrough;802}803804if (isFallThrough)805{806for (ret = getHeadOfTreeTop()->getNextTreeTop();807ret->getNode()->getOpCodeValue() != TR::BBStart;808ret = ret->getNextTreeTop());809}810else811{812ret = nRepTrNode->getBranchDestination();813}814return ret;815}816817818819820/************************************/821/*********** ************/822/*********** TR_CISCHash ************/823/*********** ************/824/************************************/825TR_CISCNode *TR_CISCHash::find(uint64_t key)826{827TR_ASSERT(_numBuckets > 0, "error: _numBuckets == 0!");828uint32_t index = key % _numBuckets;829struct HashTableEntry *p;830for (p = _buckets[index]; p != 0; p = p->_next)831{832if (p->_key == key) return p->_node;833}834return 0;835}836837bool838TR_CISCHash::add(uint64_t key, TR_CISCNode *node, bool checkExist)839{840TR_ASSERT(_numBuckets > 0, "error: _numBuckets == 0!");841uint32_t index = key % _numBuckets;842if (checkExist)843{844struct HashTableEntry *p;845for (p = _buckets[index]; p != 0; p = p->_next)846{847if (p->_key == key) return false;848}849}850struct HashTableEntry *newEntry = (struct HashTableEntry *)trMemory()->allocateMemory(sizeof(*newEntry), _allocationKind);851newEntry->_next = _buckets[index];852newEntry->_key = key;853newEntry->_node = node;854_buckets[index] = newEntry;855return true;856}857858859860861862863864/********************************************/865/*********** ************/866/*********** TR_CISCGraphAspects ************/867/*********** ************/868/********************************************/869//*****************************************************************************************870// We used this property to exclude those idioms which are unlikely to be matched against871// the target loop to limit the extra compilation time.872// We can consider the nodes of an idiom, and if a graph is missing any of those nodes,873// we already know no topological embedding exists.874// For each idiom, we prepared a bit-vector that represents the required IL nodes.875// We compare the bit-vector of every idiom graph with that of the target graph to exclude876// the unlikely matched idioms.877//*****************************************************************************************878879void880TR_CISCGraphAspects::setLoadAspects(uint32_t val, bool orExistAccess)881{882TR_ASSERT(val <= loadMasks, "error!");883if (orExistAccess && (val & 0xFF)) val |= existAccess;884set(val);885}886887void888TR_CISCGraphAspects::setStoreAspects(uint32_t val, bool orExistAccess)889{890TR_ASSERT(val <= loadMasks, "error!");891if (orExistAccess && (val & 0xFF)) val |= existAccess;892set(val << storeShiftCount);893}894895void896TR_CISCGraphAspects::modifyAspects()897{898uint32_t load = getLoadAspects();899uint32_t store = getStoreAspects();900uint32_t temp = load & store & 0xff;901if (temp) set(sameTypeLoadStore);902}903904void905TR_CISCGraphAspects::print(TR::Compilation *comp, bool noaspects)906{907traceMsg(comp, "CISCGraph%sAspects is %08x\n",noaspects ? "No" : "", getValue());908}909910void911TR_CISCGraphAspectsWithCounts::print(TR::Compilation *comp, bool noaspects)912{913traceMsg(comp, "CISCGraph%sAspects is %08x\n",noaspects ? "No" : "", getValue());914traceMsg(comp, "min counts: if=%d, indirectLoad=%d, indirectStore=%d\n",915_ifCount, _indirectLoadCount, _indirectStoreCount);916}917918void919TR_CISCGraphAspectsWithCounts::setAspectsByOpcode(int opc)920{921switch(opc)922{923case TR_inbload:924setLoadAspects(existAccess | nbValue);925incIndirectLoadCount();926break;927case TR_inbstore:928setStoreAspects(existAccess | nbValue);929incIndirectStoreCount();930break;931case TR_ibcload:932case TR_indload:933setLoadAspects(existAccess);934incIndirectLoadCount();935break;936case TR_ibcstore:937case TR_indstore:938setStoreAspects(existAccess);939incIndirectStoreCount();940break;941case TR_ifcmpall:942incIfCount();943break;944case TR::ishr:945case TR::iushr:946case TR::lshr:947case TR::lushr:948set(shr);949break;950case TR::lmulh:951case TR::imulh:952case TR::imul:953case TR::lmul:954set(mul);955break;956case TR::irem:957case TR::lrem:958set(reminder);959break;960case TR::idiv:961case TR::ldiv:962set(division);963break;964case TR::BNDCHK:965set(bndchk);966break;967case TR::isub:968set(isub);969break;970case TR::iadd:971set(iadd);972break;973default:974if (opc < TR::NumIlOps)975{976TR::ILOpCode opCode;977opCode.setOpCodeValue((enum TR::ILOpCodes)opc);978if (opCode.isLoadIndirect())979{980setLoadAspects(existAccess | opCode.getSize());981incIndirectLoadCount();982}983else if (opCode.isStoreIndirect())984{985setStoreAspects(existAccess | opCode.getSize());986incIndirectStoreCount();987}988else if (opCode.isCall())989{990set(call);991}992else if (opCode.isIf() || opCode.isSwitch())993{994incIfCount();995}996else if (opCode.isAnd() || opCode.isOr() || opCode.isXor())997{998set(bitop1);999}1000}1001break;1002}1003}100410051006/*************************************/1007/*********** ************/1008/*********** TR_CISCGraph ************/1009/*********** ************/1010/*************************************/1011void1012TR_CISCGraph::setEssentialNodes(TR_CISCGraph *tgt)1013{1014ListIterator<TR_CISCNode> ni(tgt->getNodes());1015TR_CISCNode *n;10161017for (n = ni.getFirst(); n; n = ni.getNext())1018{1019if (n->getIlOpCode().isStore() ||1020n->getIlOpCode().isCall())1021{1022n->setIsEssentialNode();1023}1024}1025}10261027static bool graphsInitialized = false;1028// Register all idioms at start up.1029//1030void1031TR_CISCGraph::makePreparedCISCGraphs(TR::Compilation *c)1032{1033if (graphsInitialized)1034return;1035else1036graphsInitialized = true;10371038#if defined(J9VM_OPT_JITSERVER)1039// Prepared CISC graphs are static, i.e. initialized only once.1040// Need to use the global allocator here.1041if (c->isOutOfProcessCompilation())1042{1043JITServer::GlobalAllocationRegion globalAllocationRegion(c->fej9()->_compInfoPT);1044initializeGraphs(c);1045}1046else1047#endif1048{1049initializeGraphs(c);1050}1051}10521053void1054TR_CISCGraph::initializeGraphs(TR::Compilation *c)1055{1056int32_t num = 0;1057bool genTRxx = c->cg()->getSupportsArrayTranslateTRxx();1058bool genSIMD = c->cg()->getSupportsVectorRegisters() && !c->getOption(TR_DisableSIMDArrayTranslate);1059bool genTRTO255 = c->cg()->getSupportsArrayTranslateTRTO255();1060bool genTRTO = c->cg()->getSupportsArrayTranslateTRTO();1061bool genTROTNoBreak = c->cg()->getSupportsArrayTranslateTROTNoBreak();1062bool genTROT = c->cg()->getSupportsArrayTranslateTROT();1063bool genTRT = c->cg()->getSupportsArrayTranslateAndTest();1064bool genMemcpy = c->cg()->getSupportsReferenceArrayCopy() || c->cg()->getSupportsPrimitiveArrayCopy();1065bool genMemset = c->cg()->getSupportsArraySet();1066bool genMemcmp = c->cg()->getSupportsArrayCmp();1067bool genIDiv2Mul = c->cg()->getSupportsLoweringConstIDiv();1068bool genLDiv2Mul = c->cg()->getSupportsLoweringConstLDiv();1069// FIXME: We need getSupportsCountDecimalDigit() like interface1070// this idiom is only enabled on 390 for the moment10711072#if defined(J9VM_OPT_JITSERVER)1073// Enabling genDecimal generates the TROT instruction on Z which is currently not1074// relocatable for remote compiles. Thus we disable this option for remote compiles for now.1075bool genDecimal = c->target().cpu.isZ() && !c->isOutOfProcessCompilation();1076#else1077bool genDecimal = c->target().cpu.isZ();1078#endif /* defined(J9VM_OPT_JITSERVER) */1079bool genBitOpMem = c->target().cpu.isZ();1080bool is64Bit = c->target().is64Bit();1081bool isBig = c->target().cpu.isBigEndian();1082int32_t ctrl = (is64Bit ? CISCUtilCtl_64Bit : 0) | (isBig ? CISCUtilCtl_BigEndian : 0);10831084// THESE ARE NOT GUARANTEED OR TESTED TO WORK ON WCODE.1085// Problems encountered include ahSize=0 on WCode leading to hash collision when adding node for make*CISCGraphs.1086if (genMemcmp)1087{1088preparedCISCGraphs[num] = makeMemCmpGraph(c, ctrl);1089setEssentialNodes(preparedCISCGraphs[num++]);1090preparedCISCGraphs[num] = makeMemCmpIndexOfGraph(c, ctrl);1091setEssentialNodes(preparedCISCGraphs[num++]);1092preparedCISCGraphs[num] = makeMemCmpSpecialGraph(c, ctrl);1093setEssentialNodes(preparedCISCGraphs[num++]);1094}1095if (genTRT)1096{1097preparedCISCGraphs[num] = makeTRTGraph(c, ctrl);1098setEssentialNodes(preparedCISCGraphs[num++]);1099preparedCISCGraphs[num] = makeTRTGraph2(c, ctrl);1100setEssentialNodes(preparedCISCGraphs[num++]);1101preparedCISCGraphs[num] = makeTRT4NestedArrayGraph(c, ctrl);1102setEssentialNodes(preparedCISCGraphs[num++]);1103//preparedCISCGraphs[num] = makeTRT4NestedArrayIfGraph(c, ctrl); setEssentialNodes(preparedCISCGraphs[num]); num++;1104}1105if (genMemset)1106{1107preparedCISCGraphs[num] = makeMemSetGraph(c, ctrl);1108setEssentialNodes(preparedCISCGraphs[num++]);1109#if STRESS_TEST1110preparedCISCGraphs[num] = makeMixedMemSetGraph(c, ctrl);1111setEssentialNodes(preparedCISCGraphs[num++]);1112#endif1113preparedCISCGraphs[num] = makePtrArraySetGraph(c, ctrl);1114setEssentialNodes(preparedCISCGraphs[num++]);1115// Causes perf degradations on Xalan strlen16 opportunities. SRSTU is only better on long strings.1116//preparedCISCGraphs[num] = makeStrlen16Graph(c, ctrl);1117//setEssentialNodes(preparedCISCGraphs[num++]);1118}1119if (genMemcpy)1120{1121preparedCISCGraphs[num] = makeMemCpyGraph(c, ctrl);1122setEssentialNodes(preparedCISCGraphs[num++]);1123preparedCISCGraphs[num] = makeMemCpyDecGraph(c, ctrl);1124setEssentialNodes(preparedCISCGraphs[num++]);1125preparedCISCGraphs[num] = makeMemCpySpecialGraph(c, ctrl);1126setEssentialNodes(preparedCISCGraphs[num++]);1127preparedCISCGraphs[num] = makeMemCpyByteToCharGraph(c, ctrl);1128setEssentialNodes(preparedCISCGraphs[num++]);1129preparedCISCGraphs[num] = makeMemCpyByteToCharBndchkGraph(c, ctrl);1130setEssentialNodes(preparedCISCGraphs[num++]);1131preparedCISCGraphs[num] = makeMemCpyCharToByteGraph(c, ctrl);1132setEssentialNodes(preparedCISCGraphs[num++]);1133preparedCISCGraphs[num] = makeMEMCPYChar2ByteGraph2(c, ctrl);1134setEssentialNodes(preparedCISCGraphs[num++]);1135preparedCISCGraphs[num] = makeMEMCPYChar2ByteMixedGraph(c, ctrl);1136setEssentialNodes(preparedCISCGraphs[num++]);1137// disabled for now1138#if STRESS_TEST1139preparedCISCGraphs[num] = makeMEMCPYByte2IntGraph(c, ctrl); setEssentialNodes(preparedCISCGraphs[num]); num++;1140preparedCISCGraphs[num] = makeMEMCPYInt2ByteGraph(c, ctrl); setEssentialNodes(preparedCISCGraphs[num]); num++;1141#endif1142}11431144if (genTRTO255 || genTRTO || genSIMD || genTRxx)1145{1146preparedCISCGraphs[num] = makeCopyingTRTxGraph(c, ctrl, 0);1147setEssentialNodes(preparedCISCGraphs[num++]);1148preparedCISCGraphs[num] = makeCopyingTRTxGraph(c, ctrl, 1);1149setEssentialNodes(preparedCISCGraphs[num++]);1150preparedCISCGraphs[num] = makeCopyingTRTxGraph(c, ctrl, 2);1151setEssentialNodes(preparedCISCGraphs[num++]);1152preparedCISCGraphs[num] = makeCopyingTRTxThreeIfsGraph(c, ctrl);1153setEssentialNodes(preparedCISCGraphs[num++]);1154preparedCISCGraphs[num] = makeCopyingTRTOInduction1Graph(c, ctrl, 0);1155setEssentialNodes(preparedCISCGraphs[num++]);1156preparedCISCGraphs[num] = makeCopyingTRTOInduction1Graph(c, ctrl, 1);1157setEssentialNodes(preparedCISCGraphs[num++]);1158preparedCISCGraphs[num] = makeCopyingTRTOInduction1Graph(c, ctrl, 2);1159setEssentialNodes(preparedCISCGraphs[num++]);11601161}11621163if (genTROTNoBreak || genTROT || genSIMD || genTRxx)1164{1165preparedCISCGraphs[num] = makeCopyingTROxGraph(c, ctrl, 0);1166setEssentialNodes(preparedCISCGraphs[num++]);1167preparedCISCGraphs[num] = makeCopyingTROxGraph(c, ctrl, 1);1168setEssentialNodes(preparedCISCGraphs[num++]);1169}11701171if (genTRxx)1172{1173if (c->getOption(TR_EnableCopyingTROTInduction1Idioms))1174{1175preparedCISCGraphs[num] = makeCopyingTROTInduction1Graph(c, ctrl, 0);1176setEssentialNodes(preparedCISCGraphs[num++]);1177preparedCISCGraphs[num] = makeCopyingTROTInduction1Graph(c, ctrl, 1);1178setEssentialNodes(preparedCISCGraphs[num++]);1179}1180preparedCISCGraphs[num] = makeCopyingTROOSpecialGraph(c, ctrl);1181setEssentialNodes(preparedCISCGraphs[num++]);1182#if STRESS_TEST1183preparedCISCGraphs[num] = makeCopyingTRTTSpecialGraph(c, ctrl);1184setEssentialNodes(preparedCISCGraphs[num++]);1185#endif1186if (is64Bit)1187{1188preparedCISCGraphs[num] = makeCopyingTRTOGraphSpecial(c, ctrl);1189setEssentialNodes(preparedCISCGraphs[num++]);1190}1191preparedCISCGraphs[num] = makeTROTArrayGraph(c, ctrl);1192setEssentialNodes(preparedCISCGraphs[num++]);1193preparedCISCGraphs[num] = makeTRTOArrayGraph(c, ctrl);1194setEssentialNodes(preparedCISCGraphs[num++]);1195preparedCISCGraphs[num] = makeTRTOArrayGraphSpecial(c, ctrl);1196setEssentialNodes(preparedCISCGraphs[num++]);1197}1198if (genDecimal)1199{1200// Needs to be modified1201preparedCISCGraphs[num] = makeCountDecimalDigitIntGraph(c, ctrl, genIDiv2Mul);1202setEssentialNodes(preparedCISCGraphs[num++]);1203preparedCISCGraphs[num] = makeIntToStringGraph(c, ctrl, genIDiv2Mul);1204setEssentialNodes(preparedCISCGraphs[num++]);1205preparedCISCGraphs[num] = makeCountDecimalDigitLongGraph(c, ctrl, genLDiv2Mul);1206setEssentialNodes(preparedCISCGraphs[num++]);1207#if STRESS_TEST1208preparedCISCGraphs[num] = makeLongToStringGraph(c, ctrl); setEssentialNodes(preparedCISCGraphs[num]); num++;1209#endif1210}1211if (genBitOpMem)1212{1213preparedCISCGraphs[num] = makeBitOpMemGraph(c, ctrl);1214setEssentialNodes(preparedCISCGraphs[num++]);1215}12161217TR_ASSERT(num <= MAX_PREPARED_GRAPH, "incorrect number of graphs!");1218numPreparedCISCGraphs = num;12191220// set minimumHotnessPrepared;1221minimumHotnessPrepared = scorching;1222for (;--num >= 0;)1223{1224TR_Hotness hotness = preparedCISCGraphs[num]->getHotness();1225if (minimumHotnessPrepared > hotness)1226minimumHotnessPrepared = hotness;1227}1228}12291230void1231TR_CISCGraph::dump(TR::FILE *pOutFile, TR::Compilation * comp)1232{1233traceMsg(comp, "CISCGraph of %s\n",_titleOfCISC);1234_aspects.print(comp, false);1235_noaspects.print(comp, true);1236ListIterator<TR_CISCNode> ni(getNodes());1237TR_CISCNode *n;12381239#if 11240traceMsg(comp, "!! Note !! Showing reverse order for convenience\n");1241TR_ScratchList<TR_CISCNode> reorder(comp->trMemory());1242for (n = ni.getFirst(); n; n = ni.getNext())1243{1244reorder.add(n);1245}1246ni.set(&reorder);1247#endif1248traceMsg(comp, " ptr id dagId(L=Loop) succ children (chains) (dest) (hintChildren) (flags) (TRNodeInfo)\n");1249for (n = ni.getFirst(); n; n = ni.getNext())1250{1251n->dump(pOutFile, comp);1252}12531254traceMsg(comp, "\nOrder by Data\n");1255ni.set(&_orderByData);1256for (n = ni.getFirst(); n; n = ni.getNext())1257{1258n->dump(pOutFile, comp);1259}1260}126112621263//*****************************************************************************************1264// It registers TR::Block, TR::TreeTop, and TR::Node into TR_CISCNode.1265// To find TR_CISCNode by TR_Node, it also adds TR_CISCNode into a hash table.1266//*****************************************************************************************1267void1268TR_CISCGraph::addTrNode(TR_CISCNode *n, TR::Block *block, TR::TreeTop *top, TR::Node *trNode)1269{1270n->addTrNode(block, top, trNode);1271bool ret = addTrNode2CISCNode(trNode, n);1272TR_ASSERT(ret, "addTrNode2CISCNode returns failure");1273}127412751276//*****************************************************************************************1277// To find TR_CISCNode by opcode, it adds TR_CISCNode into a hash table.1278//*****************************************************************************************1279void1280TR_CISCGraph::addOpc2CISCNode(TR_CISCNode *n)1281{1282if (_opc2CISCNode.getNumBuckets() > 0)1283{1284bool ret;1285bool registerNode = false;1286switch(n->getOpcode())1287{1288case TR::lconst:1289if (!n->isValidOtherInfo()) break;1290// else fall through1291case TR_variable:1292case TR::sconst:1293case TR::iconst:1294case TR::bconst:1295TR_ASSERT(n->isValidOtherInfo(), "error");1296// fall through1297case TR_booltable:1298case TR_entrynode:1299case TR_exitnode:1300case TR_ahconst:1301case TR_arrayindex:1302case TR_arraybase:1303registerNode = true;1304break;1305}1306if (registerNode)1307{1308ret = addOpc2CISCNode(n->getOpcode(), n->isValidOtherInfo(), n->getOtherInfo(), n);1309TR_ASSERT(ret, "addOpc2CISCNode returns failure");1310}1311}1312}131313141315//*****************************************************************************************1316// Add TR_CISCNode to the graph.1317// It also adds its aspects, TR::Block, TR::TreeTop, TR_Node, and hash table.1318//*****************************************************************************************1319void1320TR_CISCGraph::addNode(TR_CISCNode *n, TR::Block *block, TR::TreeTop *top, TR::Node *trNode)1321{1322TR_ASSERT((block == 0 && top == 0 && trNode == 0) ||1323(block != 0 && top != 0 && trNode != 0), "error"); // all 0 or all Non-01324_nodes.add(n);1325if (isRecordingAspectsByOpcode()) _aspects.setAspectsByOpcode(n);1326if (trNode != 0)1327{1328TR_ASSERT(n->getTrNodeInfo()->isEmpty(), "n->getTrNodeInfo() exists???");1329addTrNode(n, block, top, trNode);1330}1331addOpc2CISCNode(n);1332}133313341335//*****************************************************************************************1336// Search a store for the node "target" until the node "to"1337//*****************************************************************************************1338TR_CISCNode *1339TR_CISCGraph::searchStore(TR_CISCNode *target, TR_CISCNode *to)1340{1341TR_CISCNode *v = target;1342if (v->isLoadVarDirect()) v = v->getChild(0);1343if (v->getOpcode() != TR_variable) return 0;1344TR_BitVector visited(getNumNodes(), trMemory());13451346TR_CISCNode *t = target;1347while (true)1348{1349if (t->isStoreDirect() &&1350t->getChild(1) == v) return t;13511352if (t->getNumSuccs() == 0) break;1353visited.set(t->getID());13541355t = t->getSucc(0);1356if (t == target || t == to || t == getExitNode() || visited.isSet(t->getID())) return 0;1357}13581359return 0;1360}136113621363/**************************************************/1364/*********** ************/1365/*********** TR_PCISCGraph ************/1366/*********** (PersistentAlloc version) ************/1367/*********** ************/1368/**************************************************/1369void1370TR_PCISCGraph::addNode(TR_CISCNode *n, TR::Block *block, TR::TreeTop *top, TR::Node *trNode)1371{1372TR_ASSERT(block == 0 && top == 0 && trNode == 0, "error"); // all NULL1373TR_PersistentList<TR_CISCNode> *p = (TR_PersistentList<TR_CISCNode> *)&_nodes;1374p->add(n);1375addOpc2CISCNode(n);1376}137713781379void1380TR_CISCGraph::createDagId2NodesTable()1381{1382TR_ASSERT(_numDagIds > 0, "TR_CISCGraph::createDagId2NodesTable(), _numDagIds <= 0???");1383if (!isNoFragmentDagId()) defragDagId();1384uint32_t size = sizeof(*_dagId2Nodes) * _numDagIds;1385_dagId2Nodes = (List<TR_CISCNode> *)trMemory()->allocateMemory(size, heapAlloc);1386memset(_dagId2Nodes, 0, size);1387for (int i = 0; i < _numDagIds; i++) _dagId2Nodes[i].setRegion(trMemory()->heapMemoryRegion());1388ListIterator<TR_CISCNode> nodesLi(&_nodes);1389TR_CISCNode *n;1390for (n = nodesLi.getFirst(); n; n = nodesLi.getNext())1391{1392int32_t dagId = n->getDagID();1393TR_ASSERT(dagId < _numDagIds, "TR_CISCGraph::createDagId2NodesTable(), dagId is out of range");1394_dagId2Nodes[dagId].add(n);1395}1396}13971398// remove all the BBStart/BBEnd nodes1399void1400TR_CISCGraph::createOrderByData()1401{1402_orderByData.init();1403ListIterator<TR_CISCNode> nodesLi(&_nodes);1404TR_CISCNode *n;1405for (n = nodesLi.getFirst(); n; n = nodesLi.getNext())1406{1407if (n->getNumChildren() > 0 ||1408!n->getParents()->isEmpty())1409{1410_orderByData.add(n);1411}1412else1413{1414switch(n->getOpcode())1415{1416case TR_entrynode:1417case TR_exitnode:1418_orderByData.add(n);1419break;1420}1421}1422}1423}142414251426void1427TR_PCISCGraph::createDagId2NodesTable()1428{1429TR_ASSERT(_numDagIds > 0, "TR_PCISCGraph::createDagId2NodesTable(), _numDagIds <= 0???");1430if (!isNoFragmentDagId()) defragDagId();1431uint32_t size = sizeof(*_dagId2Nodes) * _numDagIds;1432_dagId2Nodes = (TR_PersistentList<TR_CISCNode> *)jitPersistentAlloc(size);1433memset(_dagId2Nodes, 0, size);1434ListIterator<TR_CISCNode> nodesLi(&_nodes);1435TR_CISCNode *n;1436TR_PersistentList<TR_CISCNode> *list;1437for (n = nodesLi.getFirst(); n; n = nodesLi.getNext())1438{1439int32_t dagId = n->getDagID();1440TR_ASSERT(dagId < _numDagIds, "TR_PCISCGraph::createDagId2NodesTable(), dagId is out of range");1441list = (TR_PersistentList<TR_CISCNode> *)_dagId2Nodes + dagId;1442list->add(n);1443}1444}144514461447void1448TR_PCISCGraph::createOrderByData()1449{1450TR_PersistentList<TR_CISCNode> *list = (TR_PersistentList<TR_CISCNode> *)&_orderByData;1451ListIterator<TR_CISCNode> nodesLi(&_nodes);1452TR_CISCNode *n;1453for (n = nodesLi.getFirst(); n; n = nodesLi.getNext())1454{1455if (n->getNumChildren() > 0 ||1456!n->getParents()->isEmpty())1457{1458list->add(n);1459}1460else1461{1462switch(n->getOpcode())1463{1464case TR_entrynode:1465case TR_exitnode:1466list->add(n);1467break;1468}1469}1470}1471}147214731474//*****************************************************************************************1475// Import UD/DU information of TR::Node to TR_CISCNode._chain1476//*****************************************************************************************1477void1478TR_CISCGraph::importUDchains(TR::Compilation *comp, TR_UseDefInfo *useDefInfo, bool reinitialize)1479{1480ListIterator<TR_CISCNode> nodesLi(&_nodes);1481TR_CISCNode *n;1482const int32_t firstUseIndex = useDefInfo->getFirstUseIndex();14831484if (isSetUDDUchains()) // already done before1485{1486if (!reinitialize) return;1487for (n = nodesLi.getFirst(); n; n = nodesLi.getNext())1488{1489n->initChains();1490}1491}14921493setIsSetUDDUchains();1494for (n = nodesLi.getFirst(); n; n = nodesLi.getNext())1495{1496if (n->isLoadVarDirect())1497{1498if (n->getHeadOfTrNodeInfo()->_node->getSymbol()->isAutoOrParm())1499{1500/* set UD-chains to TR_CISCNode._chain */1501TR_ASSERT(n->getTrNodeInfo()->isSingleton(), "direct load must correspond to a single TR node");1502TR::Node *trNode = n->getTrNodeInfo()->getListHead()->getData()->_node;1503int32_t useDefIndex = trNode->getUseDefIndex();1504TR_ASSERT(useDefInfo->isUseIndex(useDefIndex), "error!");1505TR_UseDefInfo::BitVector info(comp->allocator());1506useDefInfo->getUseDef(info, useDefIndex);1507TR_ASSERT(!info.IsZero(), "no defs!");1508TR_UseDefInfo::BitVector::Cursor cursor(info);1509for (cursor.SetToFirstOne(); cursor.Valid(); cursor.SetToNextOne())1510{1511int32_t defIndex = cursor;1512TR_ASSERT(defIndex < useDefInfo->getNumDefsOnEntry() || useDefInfo->isDefIndex(defIndex), "error!");1513TR_CISCNode *defNode = getCISCNode(useDefInfo->getNode(defIndex));1514if (!defNode)1515{1516n->addChain(_entryNode, true);1517}1518else1519{1520n->addChain(defNode);1521}1522}1523}1524}1525else if (n->isStoreDirect())1526{1527/* set DU-chains to TR_CISCNode._chain */1528TR_ASSERT(n->getTrNodeInfo()->isSingleton(), "direct store must correspond to a single TR node");1529TR::Node *trNode = n->getTrNodeInfo()->getListHead()->getData()->_node;1530int32_t useDefIndex = trNode->getUseDefIndex();1531if (useDefIndex == 0) continue;1532TR_ASSERT(useDefInfo->isDefIndex(useDefIndex), "error!");1533TR_UseDefInfo::BitVector info(comp->allocator());1534useDefInfo->getUsesFromDef(info, useDefIndex);1535if (!info.IsZero())1536{1537TR_UseDefInfo::BitVector::Cursor cursor(info);1538for (cursor.SetToFirstOne(); cursor.Valid(); cursor.SetToNextOne())1539{1540int32_t useIndex = (int32_t) cursor + firstUseIndex;1541TR_ASSERT(useDefInfo->isUseIndex(useIndex), "error!");1542TR_CISCNode *useNode = getCISCNode(useDefInfo->getNode(useIndex));1543if (!useNode)1544{1545n->addChain(_exitNode, true);1546}1547else1548{1549n->addChain(useNode);1550}1551}1552}1553else1554{1555n->setIsNegligible();1556n->getChild(0)->deadAllChildren();1557}1558}1559else1560{1561TR_CISCNode *c, *p, *v;1562ListIterator<TR_CISCNode> ci(n->getHintChildren());1563for (c = ci.getFirst(); c; c = ci.getNext())1564{1565ListIterator<TR_CISCNode> pi(c->getParents());1566for (p = pi.getFirst(); p; p = pi.getNext())1567{1568if (p->isStoreDirect())1569{1570v = p->getChild(1); // variable1571bool find = false;1572for (int i = n->getNumChildren(); --i >= 0; )1573if (n->getChild(i) == v)1574{1575find = true;1576break;1577}1578if (find)1579{1580// set UD/DU chains1581p->addChain(n);1582n->addChain(p);1583}1584}1585}1586}1587}1588}1589}1590159115921593//*****************************************************************************************1594// Defragment dagIds and set _noFragmentDagId1595//*****************************************************************************************1596int32_t1597TR_CISCGraph::defragDagId()1598{1599ListIterator<TR_CISCNode> nodesLi(&_nodes);1600TR_CISCNode *n;1601int32_t curId, newId;16021603n = nodesLi.getFirst();1604newId = 0;1605curId = n->getDagID();1606for (; n; n = nodesLi.getNext())1607{1608int32_t nDagId = n->getDagID();1609if (curId != nDagId)1610{1611TR_ASSERT(curId < nDagId, "Error!");1612curId = nDagId;1613newId++;1614}1615n->setDagID(newId);1616}1617newId++;1618_numDagIds = newId;1619setNoFragmentDagId();1620return newId;1621}1622162316241625//*****************************************************************************************1626// Set the flag _isOutsideOfLoop to all of the nodes in outside of the loop body1627//*****************************************************************************************1628void1629TR_CISCGraph::setOutsideOfLoopFlag(uint16_t loopBodyDagId)1630{1631ListIterator<TR_CISCNode> li(&_nodes);1632TR_CISCNode *n;1633for (n = li.getFirst(); n; n = li.getNext())1634{1635if (n->getDagID() != loopBodyDagId) n->setOutsideOfLoop();1636}1637}1638163916401641/***********************************************/1642/*********** ************/1643/*********** TR_CFGReversePostOrder ************/1644/*********** ************/1645/***********************************************/16461647// do a reverse post-order walk of the CFG1648// The result is stored in _revPost.1649//16501651List<TR::CFGNode> *1652TR_CFGReversePostOrder::compute(TR::CFG *cfg)1653{1654createReversePostOrder(cfg, cfg->getEnd());1655return &_revPost;1656}16571658void1659TR_CFGReversePostOrder::createReversePostOrder(TR::CFG *cfg, TR::CFGNode *n)1660{1661TR_StackForRevPost *stack;1662TR_BitVector *visited = new (cfg->comp()->trStackMemory()) TR_BitVector(cfg->getNextNodeNumber(), cfg->comp()->trMemory(), stackAlloc);1663TR_LinkHead<TR_StackForRevPost> stackRevPost;16641665visited->set(n->getNumber());1666auto edge = n->getPredecessors().begin();1667while(true)1668{1669bool alreadyVisited = true;1670while(edge != n->getPredecessors().end())1671{1672TR::CFGNode *predBlock = (*edge)->getFrom();1673if (!visited->isSet(predBlock->getNumber()))1674{1675// push the predBlock onto the stack1676//1677stack = new (cfg->comp()->trStackMemory()) TR_StackForRevPost();1678stack->n = n;1679stack->le = ++edge;1680stackRevPost.add(stack);16811682n = predBlock;1683visited->set(n->getNumber());1684edge = n->getPredecessors().begin();1685alreadyVisited = false;1686break;1687}1688++edge;1689}16901691if (alreadyVisited)1692{1693// add current block to the final list1694//1695_revPost.append(n);16961697// pick next block to be processed1698//1699if (!(stack = stackRevPost.pop()))1700break;1701n = stack->n;1702edge = stack->le;1703}1704}1705}17061707void1708TR_CFGReversePostOrder::dump(TR::Compilation * comp)1709{1710ListIterator<TR::CFGNode> ni(&_revPost);1711TR::CFGNode *n;1712traceMsg(comp, "Generated Reverse post order of CFG: ");1713for (n = ni.getFirst(); n; n = ni.getNext())1714traceMsg(comp, "%d->", n->getNumber());1715traceMsg(comp, "\n");1716}17171718#if 01719//*****************************************************************************************1720// These functions are recursive versions, and they are disabled for now.1721//*****************************************************************************************1722void1723TR_CFGReversePostOrder::initReversePostOrder()1724{1725_visited.init(_cfg->getNextNodeNumber());1726_revPost.init();1727}172817291730void1731TR_CFGReversePostOrder::initReversePostOrder(TR::CFG *cfg)1732{1733_cfg = cfg;1734initReversePostOrder();1735}17361737void1738TR_CFGReversePostOrder::createReversePostOrderRec(TR::CFGNode *n)1739{1740_visited.set(n->getNumber());1741for (auto le = n->getPredecessors().begin(); le != n->getPredecessors().end(); ++le)1742{1743TR::CFGNode *from = (*le)->getFrom();1744if (!_visited.isSet(from->getNumber()))1745{1746createReversePostOrderRec(from);1747}1748}1749_revPost.append(n);1750}1751#endif1752175317541755/*********** FOR OPTIMIZATION ************/17561757/******************************************/1758/*********** ************/1759/*********** TR_CISCNodeRegion ************/1760/*********** ************/1761/******************************************/1762TR_CISCNodeRegion *1763TR_CISCNodeRegion::clone()1764{1765TR_CISCNodeRegion *c = new (getRegion()) TR_CISCNodeRegion(_bvnum, getRegion());1766ListElement<TR_CISCNode> *le;1767c->_flags = _flags;1768for (le = _pHead; le; le = le->getNextElement())1769c->append(le->getData());1770return c;1771}177217731774/******************************************/1775/*********** ************/1776/*********** TR_NodeDuplicator ************/1777/*********** ************/1778/******************************************/1779TR::Node *1780TR_NodeDuplicator::restructureTree(TR::Node *oldTree, TR::Node *newTree)1781{1782TR_ASSERT(oldTree->getNumChildren() == newTree->getNumChildren(), "error");1783for (int i = 0; i < oldTree->getNumChildren(); i++)1784{1785TR::Node *oldChild = oldTree->getChild(i);1786TR_Pair<TR::Node,TR::Node> *pair;1787ListElement<TR_Pair<TR::Node,TR::Node> > *le;17881789// Search for oldChild1790for (le = _list.getListHead(); le; le = le->getNextElement())1791{1792pair = le->getData();1793if (pair->getKey() == oldChild) break;1794}17951796if (le)1797{ // found1798newTree->setAndIncChild(i, pair->getValue());1799}1800else1801{1802TR::Node *newChild = newTree->getChild(i);1803pair = new (trHeapMemory()) TR_Pair<TR::Node,TR::Node>(oldChild, newChild);1804_list.add(pair);1805restructureTree(oldChild, newChild);1806}1807}1808return newTree;1809}18101811TR::Node *1812TR_NodeDuplicator::duplicateTree(TR::Node *org)1813{1814TR::Node *newNode = org->duplicateTree();1815return restructureTree(org, newNode);1816}181718181819/******************************************/1820/*********** ************/1821/*********** TR_UseTreeTopMap ************/1822/*********** ************/1823/******************************************/18241825TR_UseTreeTopMap::TR_UseTreeTopMap(TR::Compilation * comp, TR::Optimizer * optimizer)1826: _useToParentMap(comp->trMemory(), stackAlloc)1827{1828_buildAllMap = false;1829_compilation = comp;1830_optimizer = optimizer;1831}18321833int32_t1834TR_UseTreeTopMap::buildAllMap()1835{1836if (_buildAllMap) return 0;1837_info = _optimizer->getUseDefInfo();1838if (0==_info) return 0;1839TR::TreeTop* currentTree = comp()->getStartTree();1840_useToParentMap.init(_info->getTotalNodes());1841comp()->incVisitCount();18421843for (; currentTree; currentTree = currentTree->getNextTreeTop())1844{1845buildUseTreeTopMap(currentTree,currentTree->getNode());1846}18471848_buildAllMap = true;1849return 1;1850}18511852/**1853* Build a map of use indices to their parent TreeTops (usedef datastructure doesn't have this)1854*/1855typedef TR_Pair<TR::Node,TR::TreeTop> UseInfo;1856void TR_UseTreeTopMap::buildUseTreeTopMap(TR::TreeTop* treeTop,TR::Node *node)1857{1858vcount_t currCount = comp()->getVisitCount();1859if (currCount == node->getVisitCount()) return;1860node->setVisitCount(currCount);18611862for (int32_t childIndex=0;childIndex < node->getNumChildren();++childIndex)1863{1864TR::Node *childNode = node->getChild(childIndex);1865int32_t useIndex = childNode->getUseDefIndex();1866if (_info->isUseIndex(useIndex)) // hash it.1867{1868TR_HashId hashIndex;1869TR_ScratchList<UseInfo> *tlist;1870if (_useToParentMap.locate(useIndex,hashIndex))1871{1872tlist = (TR_ScratchList<UseInfo> *)_useToParentMap.getData(hashIndex);1873}1874else1875{1876tlist = new (comp()->trStackMemory()) TR_ScratchList<UseInfo>(comp()->trMemory());1877_useToParentMap.add(useIndex,hashIndex,tlist);1878}18791880UseInfo * useInfo = new (comp()->trStackMemory()) UseInfo(childNode,treeTop);1881tlist->add(useInfo);1882}1883buildUseTreeTopMap(treeTop,childNode);1884}1885}18861887// findParentTreeTop returns the parent TreeTop that contains the given useNode1888TR::TreeTop * TR_UseTreeTopMap::findParentTreeTop(TR::Node *useNode)1889{1890TR_ASSERT(_buildAllMap, "Use Treetop map is not initialized.");1891TR_HashId hashId;1892int32_t useIndex = useNode->getUseDefIndex();1893bool found = _useToParentMap.locate(useIndex,hashId);1894TR_ASSERT(found,"No entry exists for %d %x\n",useIndex,hashId);1895TR_ScratchList<UseInfo> *list= (TR_ScratchList<UseInfo> *)_useToParentMap.getData(hashId);1896ListIterator<UseInfo> listCursor(list);1897for (listCursor.getFirst(); listCursor.getCurrent(); listCursor.getNext())1898{1899UseInfo *useInfoPtr = listCursor.getCurrent();1900if (useNode == useInfoPtr->getKey())1901return useInfoPtr->getValue();1902}1903// Parent treetop may not be found if an earlier transformation has removed the useNode.1904// In that case, we'll conservatively return NULL.1905return NULL;1906}190719081909/*******************************************/1910/*********** ************/1911/*********** TR_CISCTransformer ************/1912/*********** ************/1913/*******************************************/1914TR_CISCTransformer::TR_CISCTransformer(TR::OptimizationManager *manager)1915: TR_LoopTransformer(manager),1916_candidatesForShowing(manager->trMemory()),1917_candidateBBStartEnd(0),1918_backPatchList(manager->trMemory()),1919_beforeInsertions(manager->trMemory()),1920_afterInsertions(manager->trMemory()),1921_bblistPred(manager->trMemory()),1922_bblistBody(manager->trMemory()),1923_bblistSucc(manager->trMemory()),1924_candidatesForRegister(manager->trMemory()),1925_useTreeTopMap(manager->comp(), manager->optimizer()),1926_BitsKeepAliveList(manager->trMemory())1927{1928_afterInsertionsIdiom = (ListHeadAndTail<TR::Node> *) trMemory()->allocateHeapMemory(sizeof(ListHeadAndTail<TR::Node>)*2);1929memset(_afterInsertionsIdiom, 0, sizeof(ListHeadAndTail<TR::Node>)*2);1930for (int32_t i = 0; i < 2; ++i)1931_afterInsertionsIdiom[i].setRegion(trMemory()->heapMemoryRegion());19321933_lastCFGNode = 0;1934_backPatchList.init();1935_embeddedForData = _embeddedForCFG = 0;1936_flagsForTransformer.clear();1937_loopStructure = NULL;1938if (SHOW_CANDIDATES) setShowingCandidates();1939// construct the idiom graphs and register them1940//1941// TO_BE_ENABLED1942TR_CISCGraph::makePreparedCISCGraphs(manager->comp());1943}19441945//*****************************************************************************************1946// return whether the structure is a fast versioned loop1947//*****************************************************************************************1948bool1949TR_CISCTransformer::isInsideOfFastVersionedLoop(TR_RegionStructure *l)1950{1951TR_RegionStructure *parent = l;1952while(true)1953{1954if (////parent->getVersionedLoop() &&1955!parent->getEntryBlock()->isCold()1956#if 0 // We should optimize a fail path of value profiling1957&& !parent->getEntryBlock()->isRare()1958#endif1959)1960{1961return true; // It is inside of the fast versioned loop.1962}1963TR_Structure *p = parent->getParent();1964if (!p || !(parent = p->asRegion())) break;1965}1966return false;1967}196819691970// createLoopCandidates populates the given list with natural loop candidates1971// which contains structure information and is not cold. The return value of1972// this call dictates whether we found candidates or not.1973bool1974TR_CISCTransformer::createLoopCandidates(List<TR_RegionStructure> *loopCandidates)1975{1976bool enableTracing = trace();197719781979loopCandidates->init();1980TR_ScratchList<TR_Structure> whileLoops(trMemory());1981ListAppender<TR_Structure> whileLoopsInnerFirst(&whileLoops);1982TR_ScratchList<TR_Structure> doWhileLoops(trMemory());1983ListAppender<TR_Structure> doWhileLoopsInnerFirst(&doWhileLoops);1984TR_ScratchList<TR_Structure> *candidate;1985comp()->incVisitCount();19861987detectWhileLoops(whileLoopsInnerFirst, whileLoops, doWhileLoopsInnerFirst, doWhileLoops, _cfg->getStructure(), true);1988// join both lists so all loop1989// candidates are analyzed1990ListElement<TR_Structure> *last = whileLoops.getLastElement();1991if (last)1992{1993last->setNextElement(doWhileLoops.getListHead());1994candidate = &whileLoops;1995}1996else1997candidate = &doWhileLoops;19981999int32_t loopCount = 0;2000if (!candidate->isEmpty())2001{2002if (enableTracing)2003traceMsg(comp(), "createLoopCandidates: Evaluating list of loop candidates.\n");20042005ListIterator<TR_Structure> whileLoopsIt(candidate);2006TR_Structure *nextWhileLoop;20072008for (nextWhileLoop = whileLoopsIt.getFirst(); nextWhileLoop != 0; nextWhileLoop = whileLoopsIt.getNext())2009{2010TR_RegionStructure *naturalLoop = nextWhileLoop->asRegion();2011TR_ASSERT(naturalLoop && naturalLoop->isNaturalLoop(),"CISCGraph, expecting natural loop");2012if (!naturalLoop || !naturalLoop->isNaturalLoop())2013{2014if (trace() && naturalLoop)2015traceMsg(comp(), "\tRejected loop %d - not a natural loop?\n", naturalLoop->getNumber());2016continue;2017}2018TR_StructureSubGraphNode *entryGraphNode = naturalLoop->getEntry();2019TR_BlockStructure *loopBlockStructure = entryGraphNode->getStructure()->asBlock();2020if (!loopBlockStructure)2021{2022if (enableTracing)2023traceMsg(comp(), "\tRejected loop %d - no block structure.\n", naturalLoop->getNumber());2024continue;2025}2026if (!naturalLoop->containsOnlyAcyclicRegions())2027{2028if (enableTracing)2029traceMsg(comp(), "\tRejected loop %d - not inner most loop.\n", naturalLoop->getNumber());2030continue; // inner most loop2031}2032if (loopBlockStructure->getBlock()->isCold())2033{2034if (enableTracing)2035traceMsg(comp(), "\tRejected loop %d - cold loop.\n", naturalLoop->getNumber());2036continue; // cold loop2037}2038loopCount++;2039loopCandidates->add(naturalLoop);20402041if (enableTracing)2042traceMsg(comp(), "\tAccepted loop %d as candidate.\n", naturalLoop->getNumber());2043}2044#if SHOW_STATISTICS2045if (showMesssagesStdout() && loopCount)2046if (comp()->getMethodHotness() == warm || isAfterVersioning()) printf("!! #Loop=%d\n",loopCount);2047#endif2048}20492050if (enableTracing)2051traceMsg(comp(), "createLoopCandidates: %d loop candidates found.\n", loopCount);20522053return !loopCandidates->isEmpty();2054}20552056// prepare the loop for transformation2057//2058TR::Block *2059TR_CISCTransformer::addPreHeaderIfNeeded(TR_RegionStructure *region)2060{2061TR::Block *loopEntry = region->getEntry()->getStructure()->asBlock()->getBlock();2062TR::Block *preHeader = NULL;2063for (auto e = loopEntry->getPredecessors().begin(); e != loopEntry->getPredecessors().end(); ++e)2064{2065TR::Block *predBlock = toBlock((*e)->getFrom());2066// ignore backedges2067if (region->contains(predBlock->getStructureOf(), region->getParent()))2068continue;20692070if (predBlock->getStructureOf() &&2071predBlock->getStructureOf()->isLoopInvariantBlock())2072{2073preHeader = predBlock;2074break;2075}2076}20772078if (!preHeader)2079{2080preHeader = TR::Block::createEmptyBlock(loopEntry->getEntry()->getNode(), comp(), loopEntry->getFrequency(), loopEntry);2081_cfg->addNode(preHeader);2082TR::Block *prevBlock = loopEntry->getPrevBlock();2083if (prevBlock)2084prevBlock->getExit()->join(preHeader->getEntry());2085preHeader->getExit()->join(loopEntry->getEntry());2086_cfg->addEdge(preHeader, loopEntry);20872088// iterate again and fixup the preds to branch to the2089// new pre-header2090//2091TR_ScratchList<TR::CFGEdge> removedEdges(trMemory());2092TR::CFGEdge *e = NULL;2093for (auto e = loopEntry->getPredecessors().begin(); e != loopEntry->getPredecessors().end(); ++e)2094{2095TR::Block *predBlock = toBlock((*e)->getFrom());2096// ignore backedges2097if (region->contains(predBlock->getStructureOf(), region->getParent()))2098continue;20992100traceMsg(comp(), "fixing predecessor %d\n", predBlock->getNumber());2101removedEdges.add(*e);2102_cfg->addEdge(predBlock, preHeader);2103TR::Node *branchNode = predBlock->getExit()->getPrevRealTreeTop()->getNode();2104if (branchNode->getOpCode().isBranch() || branchNode->getOpCode().isBranch())2105{2106if (branchNode->getBranchDestination()->getNode()->getBlock() == loopEntry)2107{2108branchNode->setBranchDestination(preHeader->getEntry());2109}2110}2111else if (branchNode->getOpCode().isSwitch())2112{2113for (int32_t i = branchNode->getCaseIndexUpperBound() - 1; i > 0; --i)2114{2115if (branchNode->getChild(i)->getBranchDestination()->getNode()->getBlock() == loopEntry)2116branchNode->getChild(i)->setBranchDestination(preHeader->getEntry());2117}2118}2119}2120ListIterator<TR::CFGEdge> rIt(&removedEdges);2121for (e = rIt.getFirst(); e; e = rIt.getNext())2122{2123_cfg->removeEdge(e);2124}2125traceMsg(comp(), "added preheader block_%d\n", preHeader->getNumber());2126}21272128return preHeader;2129}213021312132// return the predecessor block of the loop entry2133//2134TR::Block *2135TR_CISCTransformer::findPredecessorBlockOfLoopEntry(TR_RegionStructure *loop)2136{2137TR::Block *loopEntry = loop->getEntry()->getStructure()->asBlock()->getBlock();2138if (true /*pred.isDoubleton()*/ )2139{2140for (auto edge = loopEntry->getPredecessors().begin(); edge != loopEntry->getPredecessors().end(); ++edge)2141{2142TR::Block *from = toBlock((*edge)->getFrom());2143if ((from->getSuccessors().size() == 1) &&2144from->getParentStructureIfExists(_cfg) != loop)2145return from;2146}2147}2148return 0;2149}215021512152//*****************************************************************************************2153// analyze whether the loop is frequently iterated.2154//*****************************************************************************************2155void2156TR_CISCTransformer::analyzeHighFrequencyLoop(TR_CISCGraph *graph,2157TR_RegionStructure *naturalLoop)2158{2159if (trace())2160traceMsg(comp(), "\tAnalyzing if loop is frequently iterated\n");2161bool isInsideOfFastVersioned = isInsideOfFastVersionedLoop(naturalLoop);2162bool highFrequency = true;2163#if !STRESS_TEST // If STRESS_TEST is true, BB frequency checks are ignored.2164TR::Block *loopEntry;2165int32_t loopEntryFrequency = -1;2166ListIterator<TR::Block> bi(&_bblistBody);2167for (loopEntry = bi.getFirst(); loopEntry; loopEntry = bi.getNext())2168{2169if (loopEntryFrequency < loopEntry->getFrequency())2170loopEntryFrequency = loopEntry->getFrequency();2171}2172if (trace()) traceMsg(comp(), "\t\tLoop Frequency=%d\n",loopEntryFrequency); // the freq of the loop entry2173if (loopEntryFrequency <= 0)2174{2175#if ALLOW_FAST_VERSIONED_LOOP2176highFrequency = isInsideOfFastVersioned;2177#else2178highFrequency = false;2179#endif2180}2181else2182{2183TR::Block *outer = findPredecessorBlockOfLoopEntry(naturalLoop);2184if (!outer || outer->getFrequency() < 0)2185{2186// If there is no freq information for the predecessor BB of the loop2187if (_bblistSucc.isSingleton())2188{2189// Try the successor block if it is single.2190outer = _bblistSucc.getListHead()->getData();2191if (outer->getFrequency() > loopEntryFrequency) outer = 0;2192}2193if (!outer || outer->getFrequency() < 0)2194{2195// Use the freq of the method entry block for the reference.2196outer = _cfg->getStart()->getSuccessors().front()->getTo()->asBlock();2197}2198}2199if (outer)2200{2201int32_t outerFrequency = outer->getFrequency();2202if (outerFrequency < 1) outerFrequency = 1;2203if (trace()) traceMsg(comp(), "\t\tOuter block %d: Frequency=%d Inner/Outer Ratio:(%f)\n",outer->getNumber(),outerFrequency, (double)loopEntryFrequency/(double)outerFrequency);2204if (loopEntryFrequency < outerFrequency * cg()->arrayTranslateAndTestMinimumNumberOfIterations()) highFrequency = false;2205}2206}2207#endif2208if (trace()) traceMsg(comp(), "\t\thighFrequency=%d\n",highFrequency);2209graph->setHotness(comp()->getMethodHotness(), highFrequency);2210graph->setInsideOfFastVersioned(isInsideOfFastVersioned);2211}22122213221422152216//*****************************************************************************************2217// Tree Simplification for the Idiom Recognition2218//*****************************************************************************************2219/*2220ificmpxx2221isub2222iconst A2223iload B2224iconst C2225|2226V2227ificmpyy (yy is the swapChildrenOpCodes of xx)2228iload B2229iconst A-C2230-------------2231ificmpxx2232iadd2233iconst A2234iload B2235iconst C2236|2237V2238ificmpxx2239iload B2240iconst C-A2241-------------2242ificmplt2243isub2244iload A2245iload B2246iconst 12247|2248V2249ificmple2250isub2251iload A2252iload B2253iconst 02254|2255V2256ificmpge2257iload B2258iload A2259*/2260void2261TR_CISCTransformer::easyTreeSimplification(TR::Node *const node)2262{2263bool modified = false;2264if (node->getOpCode().isIf())2265{2266TR::Node *iconstC = node->getChild(1);2267if (iconstC->getOpCodeValue() != TR::iconst || iconstC->getReferenceCount() > 1) return;2268if (node->getOpCodeValue() == TR::ificmplt &&2269iconstC->getInt() == 1)2270{2271traceMsg(comp(), "\t\teasyTreeSimplification: Node: %p converted from ificmplt with 1 to ifcmple with 0", node);2272TR::Node::recreate(node, TR::ificmple);2273iconstC->setInt(0);2274}22752276TR::Node *addOrSub = node->getChild(0);2277if (!addOrSub->getOpCode().isAdd() && !addOrSub->getOpCode().isSub()) return;2278if (addOrSub->getReferenceCount() > 1) return;22792280TR::Node *iloadB = addOrSub->getChild(1);2281if (iloadB->getOpCodeValue() != TR::iload) return;2282if (iloadB->getReferenceCount() > 1) return;22832284TR::Node *iconstA = addOrSub->getChild(0);2285if (iconstA->getOpCodeValue() == TR::iconst)2286{2287if (addOrSub->getOpCode().isSub())2288{2289TR::Node::recreate(node, node->getOpCode().getOpCodeForSwapChildren());2290node->setAndIncChild(0, iloadB);2291iconstC->setInt(iconstA->getInt() - iconstC->getInt());2292}2293else2294{2295TR_ASSERT(addOrSub->getOpCode().isSub(), "error");2296node->setAndIncChild(0, iloadB);2297iconstC->setInt(iconstC->getInt() - iconstA->getInt());2298}2299addOrSub->recursivelyDecReferenceCount();2300modified = true;2301}2302else if (iconstA->getOpCodeValue() == TR::iload)2303{2304TR::Node *iloadA = iconstA;2305if (iloadA->getReferenceCount() > 1 ||2306!addOrSub->getOpCode().isSub()) return;2307if (node->getOpCodeValue() == TR::ificmple &&2308iconstC->getInt() == 0)2309{2310TR::Node::recreate(node, TR::ificmpge);2311node->setChild(0, iloadB);2312node->setChild(1, iloadA);2313modified = true;2314}2315}2316}2317if (modified && trace())2318traceMsg(comp(), "\t\teasyTreeSimplification: The tree %p is simplified.\n", node);2319}23202321232223232324//*****************************************************************************************2325// analyze the tree "top" and add each node2326//*****************************************************************************************2327TR_CISCNode *2328TR_CISCTransformer::addAllSubNodes(TR_CISCGraph *const graph, TR::Block *const block, TR::TreeTop *const top,2329TR::Node *const parent, TR::Node *const node, const int32_t dagId)2330{2331//IdiomRecognition doesn't know how to handle rdbar/wrtbar for now2332if (comp()->incompleteOptimizerSupportForReadWriteBarriers() &&2333(node->getOpCode().isReadBar() || node->getOpCode().isWrtBar()))2334return 0;2335int32_t i;2336int32_t numChildren = node->getNumChildren();2337TR_ScratchList<TR_CISCNode> childList(trMemory());2338vcount_t curVisit = comp()->getVisitCount();23392340if (node->getVisitCount() == curVisit) // already visited2341{2342TR_CISCNode *findCisc = graph->getCISCNode(node);2343return findCisc;2344}2345else2346{2347if (isAfterVersioning() || comp()->getMethodHotness() == warm)2348easyTreeSimplification(node);2349node->setVisitCount(curVisit);2350TR_CISCNode *newCisc = NULL;2351const int32_t opcode = node->getOpCodeValue();2352TR::DataType nodeDataType = node->getDataType();23532354//bool ret;2355bool isReplaceChild = true;2356const bool isSwitch = (opcode == TR::lookup || opcode == TR::table);23572358int32_t numCreateChildren;2359if (isSwitch)2360{2361numCreateChildren = 1;2362numChildren = node->getCaseIndexUpperBound();2363}2364else2365{2366numCreateChildren = numChildren; // for ops other than switches, process the children first2367}23682369if (parent &&2370((parent->getOpCodeValue() == TR::aiadd && node->getOpCodeValue() == TR::isub && node->getChild(0)->getOpCodeValue() != TR::imul) ||2371(parent->getOpCodeValue() == TR::aladd && node->getOpCodeValue() == TR::lsub && node->getChild(0)->getOpCodeValue() != TR::lmul)))2372{2373// In the internal graph representation, I explicitly represent an index as "index * 1"2374// for a byte memory access in order to merge byte and non-byte idioms.2375// Example:2376// aiadd2377// aload2378// isub2379// iload2380// iconst xx2381// |2382// V2383// aiadd2384// aload2385// isub2386// imul2387// iload2388// iconst 12389// iconst xx23902391TR_ASSERT(numCreateChildren == 2, "error");2392TR_CISCNode *childCisc = addAllSubNodes(graph, block, top, node, node->getChild(0), dagId);2393if (!childCisc) return 0;23942395bool is64bit = node->getOpCodeValue() == TR::lsub;2396int opcodeConst = is64bit ? TR::lconst : TR::iconst;2397TR::DataType opcodeDataType = is64bit ? TR::Int64 : TR::Int32;23982399TR_CISCNode *const1 = graph->getCISCNode(opcodeConst, true, 1);2400if (!const1)2401{2402const1 = new (trHeapMemory()) TR_CISCNode(trMemory(), opcodeConst, opcodeDataType, graph->incNumNodes(), 0, 0, 0, 1);2403const1->setNewCISCNode();2404graph->addNode(const1, 0, 0, 0);2405}24062407TR_CISCNode *mul = new (trHeapMemory()) TR_CISCNode(trMemory(),2408is64bit ? TR::lmul : TR::imul,2409is64bit ? TR::Int64 : TR::Int32,2410graph->incNumNodes(), dagId, 1, 2);2411mul->setNewCISCNode();2412mul->setChildren(childCisc, const1);2413graph->addNode(mul, 0, 0, 0);2414childCisc = mul;2415childList.add(childCisc);2416if (_lastCFGNode)2417{2418_lastCFGNode->setSucc(0, mul); // _lastCFGNode must be the predecessor of newCisc.2419_lastCFGNode = mul;2420}24212422childCisc = addAllSubNodes(graph, block, top, node, node->getChild(1), dagId);2423if (!childCisc) return 0;2424childList.add(childCisc);2425}2426else2427{2428for (i = 0; i < numCreateChildren; i++)2429{2430TR_CISCNode *childCisc = addAllSubNodes(graph, block, top, node, node->getChild(i), dagId);2431if (!childCisc) return 0;2432childList.add(childCisc);2433}2434}24352436if (node->getOpCode().isLoadVarDirect()) // direct loads (e.g. iload)2437{2438TR_ASSERT(childList.isEmpty(), "Loads must have no child");2439int32_t refNum = node->getSymbolReference()->getReferenceNumber();2440TR_CISCNode *variable = graph->getCISCNode(TR_variable, true, refNum);2441if (!variable)2442{2443variable = new (trHeapMemory()) TR_CISCNode(trMemory(), TR_variable, TR::NoType, graph->incNumNodes(), 0,24440, 0, refNum);2445variable->addTrNode(block, top, node);2446graph->addNode(variable);2447}24482449numChildren = 1;2450newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), dagId,24511, numChildren);2452newCisc->setIsNegligible();2453newCisc->setIsLoadVarDirect();2454childList.add(variable);24552456}2457else if (node->getOpCode().isLoadConst()) // constants (e.g. iconst)2458{2459int32_t val;2460bool isConst = true;2461switch(opcode)2462{2463case TR::iconst:2464val = node->getInt();2465break;2466case TR::sconst:2467val = node->getShortInt();2468break;2469case TR::bconst:2470val = node->getByte();2471break;2472case TR::lconst:2473val = node->getLongIntLow();2474if ((int64_t)val != node->getLongInt())2475isConst = false;2476break;2477default:2478isConst = false;2479break;2480}24812482if (isConst)2483{2484newCisc = graph->getCISCNode(opcode, true, val);2485if (newCisc) // if the same node is already added2486graph->addTrNode(newCisc, block, top, node);2487else2488{2489newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), 0,24900, 0, val);2491graph->addNode(newCisc, block, top, node);2492}2493return newCisc;2494}2495newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), dagId,24961, numChildren);2497}2498else if (node->getOpCode().isStoreDirect())2499{2500if (childList.isSingleton()) // for normal direct store (e.g. istore)2501{2502isReplaceChild = false;2503TR_CISCNode *child = childList.getListHead()->getData();2504int refNum = node->getSymbolReference()->getReferenceNumber();2505TR_CISCNode *variable = graph->getCISCNode(TR_variable, true, refNum);2506if (!variable)2507{2508variable = new (trHeapMemory()) TR_CISCNode(trMemory(), TR_variable, TR::NoType, graph->incNumNodes(), 0,25090, 0, refNum);2510variable->addTrNode(block, top, node);2511graph->addNode(variable);2512}2513if (!child->isInterestingConstant()) child->setDest(variable);25142515numChildren = 2;2516newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), dagId,25171, numChildren);2518newCisc->setIsStoreDirect();2519childList.add(variable);2520}2521else2522{2523newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), dagId,25241, numChildren);2525}2526}2527else2528{2529if (node->getOpCode().isCall() &&2530graph->isRecordingAspectsByOpcode()) return 0;2531int32_t nSucc = node->getOpCode().isIf() ? 2 : (isSwitch ? node->getCaseIndexUpperBound()-1 : 1);2532if (opcode == TR::Case)2533{2534TR_ASSERT(numChildren == 0, "TR::Case: numChildren != 0 ???");2535numChildren = 1;2536newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), dagId,2537nSucc, numChildren);2538childList.add(graph->getCISCNode(top->getNode()->getChild(0)));2539}2540else2541{2542newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), dagId,2543nSucc, numChildren);2544}2545switch(newCisc->getOpcode())2546{2547case TR::BBStart:2548case TR::BBEnd:2549newCisc->setOtherInfo(block->getNumber());2550// fall through2551case TR::Goto:2552case TR::asynccheck:2553case TR::allocationFence:2554case TR::treetop:2555case TR::table:2556case TR::lookup:2557case TR::PassThrough:2558newCisc->setIsNegligible(); // These opcodes are negligible.2559break;2560case TR::compressedRefs:2561// compressed refs are really just another kind of treetop so treat them accordingly2562{2563static const bool anchorIsNegligible = feGetEnv("TR_disableIRNegligibleCompressedRefs") == NULL;2564if (anchorIsNegligible)2565newCisc->setIsNegligible();2566}2567break;2568}25692570if (node->getOpCode().isBranch())2571{2572_backPatchList.add(newCisc); // The branch destination will be set later.2573}2574}25752576if (_lastCFGNode)2577{2578_lastCFGNode->setSucc(0, newCisc); // _lastCFGNode must be the predecessor of newCisc.2579}25802581if (newCisc->getNumSuccs() > 0) _lastCFGNode = newCisc;2582graph->addNode(newCisc, block, top, node);// add newCisc25832584if (isSwitch) // When the node is a switch, children will be added later for analyzing easier.2585{2586for (i = 1; i < numChildren; i++)2587{2588TR_CISCNode *childCisc = addAllSubNodes(graph, block, top, node, node->getChild(i), dagId);2589if (!childCisc) return 0;2590childList.add(childCisc);2591switch (opcode)2592{2593case TR::lookup:2594if (i >= 1)2595{2596newCisc->setSucc(i-1, childCisc);2597if (i >= 2)2598childCisc->setOtherInfo(node->getChild(i)->getCaseConstant());2599}2600break;2601case TR::table:2602if (i >= 1)2603{2604newCisc->setSucc(i-1, childCisc);2605if (i >= 2)2606childCisc->setOtherInfo(i-2);2607}2608break;2609}2610}2611}26122613ListIterator<TR_CISCNode> ciscLi(&childList);2614TR_CISCNode *child = ciscLi.getFirst();2615if (isReplaceChild)2616{2617for (i = numChildren; --i >= 0; child = ciscLi.getNext())2618{2619if (child->getFirstDest())2620{2621newCisc->addHint(child);2622child = child->getFirstDest();2623}2624newCisc->setChild(i, child);2625}2626}2627else2628{2629for (i = numChildren; --i >= 0; child = ciscLi.getNext()) newCisc->setChild(i, child);2630}2631return newCisc;2632}2633}2634263526362637//*****************************************************************************************2638// Convert TR::Block to TR_CISCGraph2639//*****************************************************************************************2640bool2641TR_CISCTransformer::makeCISCGraphForBlock(TR_CISCGraph *graph, TR::Block *const block, int32_t dagId)2642{2643if (trace())2644traceMsg(comp(), "\t\tmakeCISCGraphForBlock: Building CISCGraph for block %d.\n", block->getNumber());26452646TR::TreeTop *top = block->getEntry();2647TR::TreeTop *end = block->getExit();26482649if (!top) return true;2650while(true)2651{2652if (!addAllSubNodes(graph, block, top, NULL, top->getNode(), dagId))2653{2654if (trace())2655traceMsg(comp(), "\t\tFailed to create CISCNode for Node %p in block %d : %p\n", top->getNode(), block->getNumber(), block);2656return false;2657}26582659if (top == end) break;2660top = top->getNextTreeTop();2661}2662if (_lastCFGNode)2663{2664if (!_backPatchList.find(_lastCFGNode))2665_backPatchList.add(_lastCFGNode);2666_lastCFGNode = 0;2667}2668return true;2669}267026712672//*****************************************************************************************2673// Resolve unknown destinations of branches2674// If the destination is not included in the region, it will set "exitNode" to the destination.2675//*****************************************************************************************2676void2677TR_CISCTransformer::resolveBranchTargets(TR_CISCGraph *graph, TR_CISCNode *exitNode)2678{2679ListIterator<TR_CISCNode> ci(&_backPatchList);2680TR_CISCNode *p;2681for (p = ci.getFirst(); p != 0; p = ci.getNext()) // each element of _backPatchList2682{2683TR_ASSERT(p->getTrNodeInfo()->isSingleton(), "each branch node must correspond to a single TR node");2684TR::Node *trNode = p->getHeadOfTrNodeInfo()->_node;2685TR_CISCNode *destCisc;2686TR::Node *destNode;26872688if (trNode->getOpCode().isBranch())2689{2690destNode = trNode->getBranchDestination()->getNode();2691destCisc = graph->getCISCNode(destNode);2692if (!destCisc) destCisc = exitNode; // set exitNode if the destination is outside2693p->setSucc(p->getNumSuccs()-1, destCisc);2694}2695else2696{2697// set fallthrough block2698destCisc = exitNode; // set exitNode as default2699if (trNode->getOpCodeValue() == TR::BBEnd)2700{2701TR::TreeTop *nextTreeTop = trNode->getBlock()->getExit()->getNextTreeTop();2702if (nextTreeTop)2703{2704destNode = nextTreeTop->getNode();2705destCisc = graph->getCISCNode(destNode);2706if (!destCisc) destCisc = exitNode; // set exitNode if the destination is outside2707}2708}2709p->setSucc(0, destCisc);2710}2711}27122713for (p = ci.getFirst(); p != 0; p = ci.getNext())2714{2715uint32_t numSuccs = p->getNumSuccs();2716if (numSuccs >= 2) // To exclude 0 and 1 quickly (reduce compilation time)2717{2718if (numSuccs == 2) // Typical case in "numSuccs >= 2" (reduce compilation time)2719{2720TR_CISCNode *succ0 = p->getSucc(0);2721TR_CISCNode *succ1 = p->getSucc(1);2722if (succ0->getOpcode() == TR::BBEnd) p->setSucc(0, (succ0 = succ0->getSucc(0)));2723if (succ1->getOpcode() == TR::BBEnd) p->setSucc(1, (succ1 = succ1->getSucc(0)));2724if (p->getHeadOfTrNodeInfo()->_node->getOpCode().isIf())2725{2726if (succ0->getOpcode() == TR_exitnode || // if the fallthrough edge goes to exitNode, or2727(p->getDagID() == succ1->getDagID() && // if the fallthrough edge goes to another dagId2728p->getDagID() != succ0->getDagID())) // and the branch edge goes to the same dagid2729{2730p->reverseBranchOpCodes(); // swap the fallthrough and branch edges for canonicalization2731}2732}2733}2734else2735{2736uint32_t idx;2737for (idx = 0; idx < numSuccs; idx++)2738{2739TR_CISCNode *s = p->getSucc(idx);2740if (s->getOpcode() == TR::BBEnd) p->setSucc(idx, s->getSucc(0));2741}2742TR_ASSERT(!p->getHeadOfTrNodeInfo()->_node->getOpCode().isIf(), "error");2743}2744}2745}2746}27472748274927502751uint16_t2752TR_CISCTransformer::renumberDagId(TR_CISCGraph *graph, int32_t tempMaxDagId, int32_t bodyDagId)2753{2754int32_t newDagId = 0, newBodyDagId = -1;2755List<TR_CISCNode> *orgNodes = graph->getNodes();2756ListElement<TR_CISCNode> *cur = NULL, *next = NULL, *appendCursor = NULL, *newListTop = NULL;2757// renumber the dagIds of the nodes in the graph2758// initially, the exitNode, entryNode and treetop (eg. loads, stores, add/sub, ifcmp)2759// nodes in the loop are assigned ids of 3, 0, 2 resp.2760// for eg., the dagIds of nodes may look like:2761// 0L iconst -16 [] []2762// 2L isub [] [40 41]2763// 2L aiadd [] [39 42]2764// 0L bconst 1 [] []2765// 2L ibstore [] [43 44]2766// 2L iadd [] [40 22]2767// 2L istore [] [46 4]2768// 2L ificmpge [] [4 6]2769// 2L BBEnd [] []2770// 3L exitnode [] []2771//2772// the loop walks the list of nodes, reassigning ids so that exitNode gets 0, entryNode2773// gets 2 and all nodes in the loop (e.g. with 2L) get id 1. the children of these nodes2774// will get unique dagIds. note, we need 3 ptrs as we might need to hop over some nodes2775// during any iteration of the for loop2776#if 02777for (int32_t dId = tempMaxDagId; dId >= 0; dId--)2778{2779cur = orgNodes->getListHead();2780ListElement<TR_CISCNode> *prev = 0;2781while (cur)2782{2783next = cur->getNextElement();2784if (cur->getData()->getDagID() == dId)2785{2786cur->getData()->setDagID(newDagId);2787// if node belongs to outside of the loop,2788// give it a unique id, otherwise it belongs to the scc2789if (dId != bodyDagId)2790newDagId++;2791else2792newBodyDagId = newDagId;2793cur->setNextElement(0);2794if (!appendCursor)2795{2796newListTop = cur;2797appendCursor = cur;2798}2799else2800{2801appendCursor->setNextElement(cur);2802appendCursor = cur;2803}2804if (!prev)2805orgNodes->setListHead(next);2806else2807prev->setNextElement(next);2808}2809else2810prev = cur;2811cur = next;2812}2813if (dId == bodyDagId)2814{2815newBodyDagId = newDagId;2816newDagId++;2817}2818}2819#else2820for (int32_t dId = tempMaxDagId; dId >= 0; dId--)2821{2822while (true)2823{2824cur = orgNodes->getListHead();2825if (!cur || cur->getData()->getDagID() != dId) break;2826orgNodes->setListHead(cur->getNextElement());2827cur->getData()->setDagID(newDagId);2828if (dId != bodyDagId)2829newDagId++; // outside of the loop body2830else2831newBodyDagId = newDagId; // inside of the loop body2832cur->setNextElement(0);2833if (!appendCursor)2834{2835newListTop = cur;2836appendCursor = cur;2837}2838else2839{2840appendCursor->setNextElement(cur);2841appendCursor = cur;2842}2843}2844if (cur)2845{2846while (true)2847{2848next = cur->getNextElement();2849if (!next) break;2850if (next->getData()->getDagID() == dId)2851{2852cur->setNextElement(next->getNextElement());2853next->getData()->setDagID(newDagId);2854if (dId != bodyDagId)2855newDagId++; // outside of the loop body2856else2857newBodyDagId = newDagId; // inside of the loop body2858next->setNextElement(0);2859if (!appendCursor)2860{2861newListTop = next;2862appendCursor = next;2863}2864else2865{2866appendCursor->setNextElement(next);2867appendCursor = next;2868}2869}2870else2871{2872cur = next;2873}2874}2875}2876if (dId == bodyDagId)2877{2878newBodyDagId = newDagId;2879newDagId++;2880}2881}2882#endif2883TR_ASSERT(orgNodes->isEmpty(), "there are elements in orgNodes");2884orgNodes->setListHead(newListTop);2885graph->setNoFragmentDagId();2886graph->setNumDagIds(newDagId);2887return (uint16_t)newBodyDagId;2888}2889289028912892// make our internal graph representation from the input IL code.2893//2894TR_CISCGraph *2895TR_CISCTransformer::makeCISCGraph(List<TR::Block> *pred,2896List<TR::Block> *body,2897List<TR::Block> *succ)2898{2899int32_t dagId = 1, bodyDagId;2900TR_CISCGraph *graph = new (trHeapMemory()) TR_CISCGraph(trMemory(), comp()->signature());2901TR::Block *block;2902ListIterator<TR::Block> bi(pred);2903//ListElement<TR_CISCNode> *head;29042905graph->setRecordingAspectsByOpcode(false); // Stop recording aspects outside of the loop2906_backPatchList.init();2907comp()->incVisitCount();29082909// make entry node2910TR_CISCNode *newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), TR_entrynode, TR::NoType, graph->incNumNodes(), dagId, 1, 0);2911graph->setEntryNode(newCisc);2912graph->addNode(newCisc);2913_lastCFGNode = newCisc;29142915static const bool includePreds = feGetEnv("TR_idiomIncludePreds") != NULL;2916if (includePreds)2917{2918if (trace())2919traceMsg(comp(), "\tmakeCISCGraph: Building CISCGraph for Predecessor Blocks.\n");2920for (block = bi.getFirst(); block != 0; block = bi.getNext())2921{2922if (!makeCISCGraphForBlock(graph, block, dagId)) return 0;2923}2924}29252926dagId++;2927bodyDagId = dagId;2928bi.set(body);2929if (trace())2930traceMsg(comp(), "\tmakeCISCGraph: Building CISCGraph for Loop Body Blocks.\n");2931graph->setRecordingAspectsByOpcode(true); // Start recording aspects inside of the loop2932for (block = bi.getFirst(); block != 0; block = bi.getNext())2933{2934if (!makeCISCGraphForBlock(graph, block, dagId)) return 0;2935}2936graph->setRecordingAspectsByOpcode(false); // Stop recording aspects outside of the loop293729382939dagId++;2940#if 02941bi.set(succ);2942for (block = bi.getFirst(); block != 0; block = bi.getNext())2943{2944if (!makeCISCGraphForBlock(graph, block, dagId)) return 0;2945}2946#endif29472948TR_CISCNode *exitNode = new (trHeapMemory()) TR_CISCNode(trMemory(), TR_exitnode, TR::NoType, graph->incNumNodes(), dagId, 0, 0);2949graph->addNode(exitNode);2950graph->setExitNode(exitNode);2951if (_lastCFGNode)2952{2953_lastCFGNode->setSucc(0, exitNode);2954_lastCFGNode = 0;2955}29562957// add "iconst -ahsize" if nonexistent2958int32_t ahsize = -(int32_t)TR::Compiler->om.contiguousArrayHeaderSizeInBytes();2959uint32_t opcode = comp()->target().is64Bit() ? TR::lconst : TR::iconst;2960TR::DataType nodeDataType = comp()->target().is64Bit() ? TR::Int64 : TR::Int32;29612962if (!graph->getCISCNode(opcode, true, ahsize))2963{2964graph->addNode(new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), 0, 0, 0, ahsize));2965}29662967bodyDagId = renumberDagId(graph, dagId, bodyDagId);2968resolveBranchTargets(graph, exitNode);2969// setup the dagIds2Nodes table & a list containing2970// all the nodes except BBStarts and BBEnds2971graph->createInternalData(bodyDagId);2972graph->modifyTargetGraphAspects();29732974return graph;2975}2976297729782979//***************************************************************************************2980// Main driver for TR_CISCTransformer2981//***************************************************************************************2982int32_t TR_CISCTransformer::perform()2983{29842985//TO_BE_ENABLED2986///return 0;29872988static int enable = -1;2989if (enable < 0)2990{2991char *p = feGetEnv("DISABLE_CISC");2992enable = p ? 0 : 1;2993}2994static int disableLoopNumber = -1;2995if (disableLoopNumber == -1)2996{2997char *p = feGetEnv("DISABLE_LOOP_NUMBER");2998disableLoopNumber = p ? atoi(p) : -2;2999}3000static int showStdout = -1;3001if (showStdout == -1)3002{3003char *p = feGetEnv("traceCISCVerbose");3004showStdout = p ? 1 : 0;3005}30063007//FIXME: add TR_EnableIdiomRecognitionWarm to options3008int32_t methodHotness = scorching;3009//_compilation->getOption(TR_EnableIdiomRecognitionWarm) ? scorching :3010//_compilation->getMethodHotness();3011//FIXME: remove this code if it works30123013TR::Recompilation *recompInfo = comp()->getRecompilationInfo();3014if (!comp()->mayHaveLoops() ||3015methodHotness < minimumHotnessPrepared ||3016comp()->getProfilingMode() == JitProfiling || // if this method is now profiled3017!enable)3018return 0;30193020_useDefInfo = optimizer()->getUseDefInfo();3021if (_useDefInfo == 0)3022return 0;30233024// Required beyond the scope of the stack memory region3025int32_t cost = 0;30263027{3028TR::StackMemoryRegion stackMemoryRegion(*trMemory());30293030_cfg = comp()->getFlowGraph();3031_rootStructure = _cfg->getStructure();3032_nodesInCycle = new (trStackMemory()) TR_BitVector(_cfg->getNextNodeNumber(), trMemory(), stackAlloc);3033_isGenerateI2L = comp()->target().is64Bit();3034_showMesssagesStdout = (VERBOSE || showStdout);30353036// make loop candidates3037List<TR_RegionStructure> loopCandidates(trMemory());30383039if (createLoopCandidates(&loopCandidates))3040{3041TR_CFGReversePostOrder revPost(trMemory());3042ListIterator<TR::CFGNode> revPostIterator(revPost.compute(_cfg));3043if (trace())3044revPost.dump(comp());30453046bool modified = false;3047if (showMesssagesStdout()) printf("\nStarting CISCTransformer %s, %s\n",3048comp()->getHotnessName(comp()->getMethodHotness()),3049comp()->signature());3050if (trace())3051{3052traceMsg(comp(), "Starting CISCTransformer\n");3053comp()->dumpMethodTrees("Trees before transforming CISC instructions");3054}30553056ListIterator<TR_RegionStructure> loopIt(&loopCandidates);30573058TR_RegionStructure *nextLoop;30593060List<TR::Block> bblistPred(comp()->trMemory());3061List<TR::Block> bblistBody(comp()->trMemory());3062List<TR::Block> bblistSucc(comp()->trMemory());3063List<TR_BitsKeepAliveInfo> BitsKeepAliveList(comp()->trMemory());3064_BitsKeepAliveList = BitsKeepAliveList;30653066int32_t numNodes = _cfg->getNextNodeNumber();3067TR_BitVector cfgBV(numNodes, trMemory(), stackAlloc);3068TR::CFGNode *cfgnode;3069int loopNumber = 0;30703071for (nextLoop = loopIt.getFirst(); nextLoop != 0; nextLoop = loopIt.getNext())3072{3073// make bb list for each loop3074TR::Block *block;3075bblistPred.init();3076bblistBody.init();3077bblistSucc.init();30783079// add predecessors of loop header as long as there is no merge point. (i.e. extended BB)3080TR::Block *predBlock = findPredecessorBlockOfLoopEntry(nextLoop);3081if (predBlock && predBlock->getEntry())3082{3083TR_RegionStructure *region = NULL;30843085// Get the parent of the region the outer region3086TR_Structure* blockStructure = predBlock->getStructureOf();3087if (blockStructure)3088region = (TR_RegionStructure*) blockStructure->getParent();30893090if (region != NULL)3091{3092TR::CFGNode* cfgStart = _cfg->getStart();3093while(predBlock != cfgStart)3094{3095if (predBlock->getNumberOfRealTreeTops() > 300)3096{3097if (trace())3098traceMsg(comp(), "Skip the predecessor %d, because it has too many TreeTops (%d).\n", predBlock->getNumber(), predBlock->getNumberOfRealTreeTops());3099break; // To reduce unnecessary compilation time3100}31013102bblistPred.add(predBlock);31033104TR::CFGEdgeList pred = predBlock->getPredecessors();31053106if (!(pred.size() == 1)) // Check if there exists more than one predecessor ==> merge point.3107break;31083109predBlock = toBlock(pred.front()->getFrom());31103111// Get the parent of the region the outer region3112TR_RegionStructure* predBlockRegion = NULL;3113blockStructure = predBlock->getStructureOf();3114if (blockStructure)3115{3116predBlockRegion = (TR_RegionStructure*) blockStructure->getParent();3117}31183119// Check to see if predBlock is the same.3120if (region != predBlockRegion)3121{3122if (trace())3123traceMsg(comp(), "Skip the predecessor block_%d, because it is within another region/loop.\n",predBlock->getNumber());3124break;3125}3126}3127}3128}312931303131// add BBs of loop body3132TR_RegionStructure::Cursor si(*nextLoop);3133TR_StructureSubGraphNode *node;3134cfgBV.empty();3135for (node = si.getFirst(); node != 0; node = si.getNext())3136{3137cfgBV.set(node->getNumber());3138}31393140ListAppender<TR::Block> appender(&bblistBody);3141block = nextLoop->getEntry()->getStructure()->asBlock()->getBlock();3142for (cfgnode = revPostIterator.getFirst(); cfgnode; cfgnode = revPostIterator.getNext())3143{3144if (block == cfgnode->asBlock())3145break;3146}31473148if (cfgnode == 0)3149continue;31503151for (; cfgnode; cfgnode = revPostIterator.getNext())3152{3153int32_t bbnum = cfgnode->getNumber();3154if (cfgBV.isSet(bbnum))3155{3156block = cfgnode->asBlock();3157TR_ASSERT(block->getEntry() != 0, "assuming block->getEntry() != 0");3158appender.add(block);3159cfgBV.reset(bbnum);3160if (cfgBV.isEmpty())3161break;3162}3163}31643165if (!cfgBV.isEmpty())3166{3167for (cfgnode = revPostIterator.getFirst(); cfgnode; cfgnode = revPostIterator.getNext())3168{3169int32_t bbnum = cfgnode->getNumber();3170if (cfgBV.isSet(bbnum))3171{3172block = cfgnode->asBlock();3173TR_ASSERT(block->getEntry() != 0, "assuming block->getEntry() != 0");3174appender.add(block);3175cfgBV.reset(bbnum);3176if (cfgBV.isEmpty())3177break;3178}3179}3180}31813182// add exit blocks of the loop3183ListIterator<TR::CFGEdge> ei(&nextLoop->getExitEdges());3184TR::CFGEdge *edge;3185for (edge = ei.getFirst(); edge != 0; edge = ei.getNext())3186{3187TR::CFGNode *from = edge->getFrom();3188TR::CFGNode *to = edge->getTo();3189int32_t toNum = to->getNumber();3190if (!toStructureSubGraphNode(from)->getStructure()->asBlock()) continue;3191from = toStructureSubGraphNode(from)->getStructure()->asBlock()->getBlock();3192// get the corresponding cfg edge3193//3194auto edgeFrom = from->getSuccessors().begin();3195for (; edgeFrom != from->getSuccessors().end(); ++edgeFrom)3196{3197block = toBlock((*edgeFrom)->getTo());3198if (block->getNumber() == toNum)3199break;3200}3201if (edgeFrom != from->getSuccessors().end())3202{3203TR_ASSERT(block->getNumber() == toNum, "error block->getNumber() != toNum");3204if (block->getEntry() &&3205!bblistSucc.find(block) &&3206!bblistPred.find(block))3207bblistSucc.add(block);3208}3209}32103211_bblistPred = bblistPred;3212_bblistBody = bblistBody;3213_bblistSucc = bblistSucc;32143215if (trace())3216{3217traceMsg(comp(), "Loop %d.\n\tAnalyzed predecessor, body and successor blocks.\n", nextLoop->getNumber());3218traceMsg(comp(), "\t\tPredecessors blocks:");3219ListIterator<TR::Block> bi(&bblistPred);3220for (block = bi.getFirst(); block != 0; block = bi.getNext())3221traceMsg(comp(), " %d:[%p]",block->getNumber(),block);3222traceMsg(comp(), "\n");32233224traceMsg(comp(), "\t\tBody blocks:");3225bi.set(&bblistBody);3226for (block = bi.getFirst(); block != 0; block = bi.getNext())3227traceMsg(comp(), " %d:[%p]",block->getNumber(),block);3228traceMsg(comp(), "\n");32293230traceMsg(comp(), "\t\tSuccessors blocks:");3231bi.set(&bblistSucc);3232for (block = bi.getFirst(); block != 0; block = bi.getNext())3233traceMsg(comp(), " %d:[%p]",block->getNumber(),block);3234traceMsg(comp(), "\n");3235}32363237// Iterate through the loop body blocks to remove Bits.keepAlive() or Reference.reachabilityFence() calls.3238// This is a NOP function inserted into NIO libraries to keep the NIO object and its native3239// ptr alive until after the native pointer accesses.3240removeBitsKeepAliveCalls(&bblistBody);324132423243// make our internal graph representation3244TR_CISCGraph *graph;3245graph = makeCISCGraph(&bblistPred, &bblistBody, &bblistSucc);3246if (!graph)3247{3248if (trace()) traceMsg(comp(), "Loop %d. Failed to make CISC Graph.\n", nextLoop->getNumber());3249restoreBitsKeepAliveCalls();3250continue;3251}3252if (loopNumber++ == disableLoopNumber)3253{3254restoreBitsKeepAliveCalls();3255continue; // for debug purpose3256}32573258analyzeHighFrequencyLoop(graph, nextLoop); // Analyze if frequently iterated loop.32593260setCurrentLoop(nextLoop);32613262if (trace())3263graph->dump(comp()->getOutFile(), comp());32643265bool modifiedThisLoop = false;3266_candidatesForShowing.init();3267for (int32_t i = 0; i < numPreparedCISCGraphs; i++) // for each idiom3268{3269TR_CISCGraph *prepared = preparedCISCGraphs[i];3270if (prepared)3271{3272if (computeTopologicalEmbedding(prepared, graph))// Analyze matching for the idiom3273{3274modified = true; // IL is modified3275modifiedThisLoop = true;3276if (trace())3277{3278traceMsg(comp(), "Transformed %s\n", prepared->getTitle());3279comp()->dumpMethodTrees("Trees after transforming CISC instruction");3280}3281break;3282}3283}3284}3285// Restore the keepAliveCalls3286restoreBitsKeepAliveCalls();32873288if (!modifiedThisLoop) showCandidates();3289}329032913292if (modified)3293{3294_cfg->setStructure(0);3295// Use/def info and value number info are now bad.3296//3297optimizer()->setUseDefInfo(NULL);3298optimizer()->setValueNumberInfo(0);32993300if (trace())3301{3302traceMsg(comp(), "Ending CISCTransformer\n");3303comp()->dumpFlowGraph();3304comp()->dumpMethodTrees("Trees after transforming CISC instructions");3305}3306}33073308if (showMesssagesStdout()) printf("Exiting CISCTransformer\n");3309cost = 1;3310}3311} // scope for stack memory region33123313manager()->incNumPassesCompleted();3314return cost;3315}33163317const char *3318TR_CISCTransformer::optDetailString() const throw()3319{3320return "O^O IDIOM RECOGNITION: ";3321}33223323// check if the block is really in the loop body3324//3325bool3326TR_CISCTransformer::isBlockInLoopBody(TR::Block *block)3327{3328ListIterator<TR::Block> bi(&_bblistBody);3329for (TR::Block *b = bi.getFirst(); b; b = bi.getNext())3330{3331if (block->getNumber() == b->getNumber())3332return true;3333}3334return false;3335}333633373338void3339TR_CISCTransformer::showEmbeddedData(char *title, uint8_t *data)3340{3341int32_t i, j;3342traceMsg(comp(), "%s\n ",title);3343for (j = 0; j < _numPNodes; j++)3344{3345traceMsg(comp(), "%3d",j);3346}3347traceMsg(comp(), "\n --");3348for (j = 0; j < _numPNodes; j++)3349{3350traceMsg(comp(), "---");3351}3352traceMsg(comp(), "\n");3353for (i = 0; i < _numTNodes; i++)3354{3355traceMsg(comp(), "%3d:",i);3356for (j = 0; j < _numPNodes; j++)3357{3358uint8_t this_result = data[idx(j, i)];3359if (this_result == _Unknown || this_result == _NotEmbed)3360traceMsg(comp(), "| ");3361else3362traceMsg(comp(), "| %X",data[idx(j, i)]);3363}3364traceMsg(comp(), "\n");3365}3366}336733683369//***************************************************************************************3370// It returns the result whether parents of p correspond to those of t.3371// It also checks whether the expression is commutative.3372//***************************************************************************************3373bool3374TR_CISCTransformer::checkParents(TR_CISCNode *p, TR_CISCNode *t, uint8_t *result, bool *inLoop, bool *optionalParents)3375{3376ListIterator<TR_CISCNode> pi(p->getParents());3377ListIterator<TR_CISCNode> ti(t->getParents());3378TR_CISCNode *pn, *tn;3379bool isTargetInsideOfLoop = false;3380bool allOptionalParents = true;3381for (pn = pi.getFirst(); pn; pn = pi.getNext()) // for each parent of p3382{3383uint32_t pnOpc = pn->getOpcode();3384uint32_t tmpIdx = idx(pn->getID(), 0);3385int32_t pIndex = 0;3386const bool commutative = pn->isCommutative();3387const bool isPnInsideOfLoop = !pn->isOutsideOfLoop();3388if (!commutative)3389{3390for (pIndex = pn->getNumChildren(); --pIndex >= 0; )3391{3392if (pn->getChild(pIndex) == p) break;3393}3394}3395TR_ASSERT(pIndex >= 0, "error!");3396for (tn = ti.getFirst(); tn; tn = ti.getNext()) // for each parent of t3397{3398if (isPnInsideOfLoop && tn->isOutsideOfLoop())3399continue;3400if (pn->isEqualOpc(tn))3401{3402if (result[tmpIdx + tn->getID()] == _Embed &&3403(commutative || tn->getChild(pIndex) == t)) break;3404}3405else3406{3407if (tn->getIlOpCode().isLoadVarDirect()) /* search one more depth */3408{3409ListIterator<TR_CISCNode> tci(tn->getParents());3410TR_CISCNode *tcn;3411for (tcn = tci.getFirst(); tcn; tcn = tci.getNext())3412{3413if (pn->isEqualOpc(tcn) &&3414result[tmpIdx + tcn->getID()] == _Embed &&3415(commutative || tcn->getChild(pIndex) == tn)) break;3416}3417if (tcn) break;3418}3419}3420}3421if (!tn)3422{3423if (!pn->isOptionalNode())3424{3425return false;3426}3427else3428{3429if (!pn->getParents()->isEmpty() && !pn->isSkipParentsCheck())3430{3431bool nextInLoop = false;3432bool nextOptionalParents = false;3433if (checkParents(pn, t, result, &nextInLoop, &nextOptionalParents))3434{3435if (!nextOptionalParents) allOptionalParents = false;3436if (nextInLoop) isTargetInsideOfLoop = true;3437}3438else3439{3440return false;3441}3442}3443}3444}3445else3446{3447if (!pn->isOptionalNode()) allOptionalParents = false;3448if (!tn->isOutsideOfLoop()) isTargetInsideOfLoop = true;3449}3450}3451*optionalParents = allOptionalParents;3452*inLoop = isTargetInsideOfLoop;3453return true;3454}345534563457//***************************************************************************************3458// It computes embedding information for an input data dependence graph.3459// Because this graph consists of a directed acyclic graph (DAG), it handles only a DAG.3460// It uses the topological embedding algorithm (dag_embed) by walking data dependence graph3461// from leaf to root and find the candidate nodes.3462// Note that we need to find candidates among the leaf nodes in the data dependence graph.3463// The function checkParentsNonRec() finds candidates of leaf nodes by analyzing their ancestors.3464// When there are multiple candidates of a leaf, we will exclude those candidates3465// which are unlikely to be matched to the leaf.3466// In this case, we will perform the topological embedding again.3467//***************************************************************************************3468bool3469TR_CISCTransformer::computeEmbeddedForData()3470{3471uint8_t *const result = _embeddedForData;3472bool ret = false;3473bool skipScreening = false;3474const bool enableWriteBarrierConversion = TR::Compiler->om.writeBarrierType() == gc_modron_wrtbar_none;34753476memset(result, 0, _sizeResult);3477TR_CISCNode *p, *t;3478ListElement<TR_CISCNode> *const plistHead = _P->getOrderByData()->getListHead();3479ListElement<TR_CISCNode> *const tlistHead = _T->getOrderByData()->getListHead();3480ListElement<TR_CISCNode> *ple, *tle;3481uint32_t i;34823483while(true) // This loop is for narrowing down candidates of a leaf.3484{3485// Perform topological embedding algorithm (dagEmbed)3486for (ple = plistHead; ple; ple = ple->getNextElement())3487{3488p = ple->getData();3489const uint32_t pOpc = p->getOpcode();3490const uint32_t tmpIdx = idx(p->getID(), 0);3491const int32_t pOtherInfo = p->getOtherInfo();3492const uint16_t numPChi = p->getNumChildren();3493const bool isVariable = (pOpc == TR_variable);3494const bool isBoolTable = (pOpc == TR_booltable);3495const bool isAllowWrtbar = enableWriteBarrierConversion && (pOpc == TR_inbstore || pOpc == TR_indstore);3496const bool isCheckOtherInfo = p->isInterestingConstant();3497const bool isChildDirectlyConnected = p->isChildDirectlyConnected();3498const bool isNecessaryScreening = p->isNecessaryScreening();3499const bool commutative = p->isCommutative();3500const bool checkLoopInvariant =3501pOpc == TR_variableORconst3502|| pOpc == TR_quasiConst3503|| pOpc == TR_quasiConst23504|| pOpc == TR_arraybase;3505const bool isOptionalNode = p->isOptionalNode();3506bool existEmbed = false;3507tle = tlistHead;3508while(true)3509{3510t = tle->getData();3511const uint16_t numTChi = t->getNumChildren();3512const uint32_t tOpc = t->getOpcode();3513const uint32_t index = tmpIdx + t->getID();3514tle = tle->getNextElement();3515bool isEmbed = true;35163517// check degree3518if (numPChi != numTChi && numPChi != 0)3519{3520if ((!isBoolTable || numTChi < 1) &&3521(!isAllowWrtbar || numTChi < 2))3522isEmbed = false;3523}35243525if (isEmbed)3526{3527if (skipScreening &&3528isNecessaryScreening)3529{3530existEmbed = true;3531isEmbed = (result[index] == _Embed);3532}3533else if ((isEmbed = p->isEqualOpc(t)) == true)3534{3535// if its a constant, check if values match3536if (isCheckOtherInfo &&3537pOtherInfo != t->getOtherInfo())3538{3539isEmbed = false;3540}3541else if (isNecessaryScreening)3542{3543if (checkLoopInvariant)3544{3545if (tOpc == TR_variable)3546{3547ListIterator<TR_CISCNode> parenti(t->getParents());3548TR_CISCNode *parent;3549for (parent = parenti.getFirst(); parent; parent = parenti.getNext())3550{3551if (!parent->isOutsideOfLoop() &&3552parent->getIlOpCode().isStoreDirect())3553{3554if (trace()) traceMsg(comp(), "pID%d: tID%d isn't loop invariant because of %d\n", p->getID(), t->getID(), parent->getID());3555isEmbed = false;3556break;3557}3558}3559}3560}35613562// finds candidates of leaf nodes by analyzing their ancestors.3563//isEmbed = p->checkParents(t, DEPTH_CHECKPARENTS);3564//FIXME: enable the recursive routine3565//3566if (isEmbed) isEmbed = TR_CISCNode::checkParentsNonRec(p, t, DEPTH_CHECKPARENTS, comp());3567}3568}35693570if (isEmbed)3571{3572if (p->isStoreDirect())3573{3574isEmbed = false;3575TR_ASSERT(numPChi == 2, "error");3576uint8_t chiData = result[idx(p->getChild(0)->getID(), t->getChild(0)->getID())];3577if (chiData == _Embed || (chiData == _Desc && !isChildDirectlyConnected))3578{3579chiData = result[idx(p->getChild(1)->getID(), t->getChild(1)->getID())];3580if (chiData == _Embed) // allow only _Embed3581isEmbed = true;3582}3583}3584else3585{3586// For commutative expressions, we try to swap operands for the comparison.3587if (commutative && numPChi == 2)3588{3589TR_CISCNode *pch0 = p->getChild(0);3590TR_CISCNode *tch0 = t->getChild(0);3591TR_CISCNode *tch1 = t->getChild(1);3592uint32_t pIdx0 = idx(pch0->getID(), 0);3593uint8_t chiData00 = result[pIdx0 + tch0->getID()];3594uint8_t chiData01 = result[pIdx0 + tch1->getID()];3595if ((chiData00 == _Embed || chiData01 == _Embed) ||3596((chiData00 == _Desc) &&3597(!isChildDirectlyConnected ||3598(tch0->isLoadVarDirect() &&3599_Embed == result[pIdx0 + tch0->getChild(0)->getID()]))) ||3600((chiData01 == _Desc) &&3601(!isChildDirectlyConnected ||3602(tch1->isLoadVarDirect() &&3603_Embed == result[pIdx0 + tch1->getChild(0)->getID()]))))3604{3605// OK!3606TR_CISCNode *pch1 = p->getChild(1);3607uint32_t pIdx1 = idx(pch1->getID(), 0);3608uint8_t chiData10 = result[pIdx1 + tch0->getID()];3609uint8_t chiData11 = result[pIdx1 + tch1->getID()];3610if ((chiData10 == _Embed || chiData11 == _Embed) ||3611((chiData10 == _Desc) &&3612(!isChildDirectlyConnected ||3613(tch0->isLoadVarDirect() &&3614_Embed == result[pIdx1 + tch0->getChild(0)->getID()]))) ||3615((chiData11 == _Desc) &&3616(!isChildDirectlyConnected ||3617(tch1->isLoadVarDirect() &&3618_Embed == result[pIdx1 + tch1->getChild(0)->getID()]))))3619{3620// OK!3621}3622else3623{3624isEmbed = false;3625}3626}3627else3628{3629isEmbed = false;3630}3631}3632else3633{3634for (i = 0; i < numPChi; i++)3635{3636TR_CISCNode *pch = p->getChild(i);3637TR_CISCNode *tch = t->getChild(i);3638uint8_t chiData = result[idx(pch->getID(), tch->getID())];3639if (chiData == _Embed ||3640((chiData == _Desc) &&3641(!isChildDirectlyConnected ||3642(tch->isLoadVarDirect() &&3643_Embed == result[idx(pch->getID(), tch->getChild(0)->getID())]))))3644{3645// OK!3646}3647else3648{3649isEmbed = false;3650break;3651}3652}3653}3654}3655}3656}36573658if (isEmbed)3659{3660TR_ASSERT(index == idx(p->getID(), t->getID()), "error");3661result[index] = _Embed;3662existEmbed = true;3663}3664else3665{3666if (numTChi == 1) // for reducing compilation time3667{3668if (tOpc != TR::arraylength) // The type of the child (ref) is different from the type of destination (int).3669{3670uint8_t chiData = result[tmpIdx + t->getChild(0)->getID()];3671isEmbed = isDescOrEmbed(chiData);3672}3673}3674else if (numTChi == 2) // for reducing compilation time3675{3676uint16_t idTChi0 = t->getChild(0)->getID();3677uint16_t idTChi1 = t->getChild(1)->getID();3678uint8_t chiData0 = result[tmpIdx + idTChi0];3679uint8_t chiData1 = result[tmpIdx + idTChi1];3680isEmbed = isDescOrEmbed(chiData0) | isDescOrEmbed(chiData1);3681}3682else // Natural code3683{3684for (i = 0; i < numTChi; i++)3685{3686uint8_t chiData = result[tmpIdx + t->getChild(i)->getID()];3687if (isDescOrEmbed(chiData))3688{3689isEmbed = true;3690break;3691}3692}3693}36943695if (isOptionalNode && !isEmbed)3696{3697for (i = 0; i < numPChi; i++)3698{3699uint8_t chiData = result[idx(p->getChild(i)->getID(), t->getID())];3700if (chiData == _Desc || chiData == _Embed)3701{3702isEmbed = true;3703break;3704}3705}3706}3707TR_ASSERT(index == idx(p->getID(), t->getID()), "error");3708if (isEmbed)3709{3710result[index] = _Desc;3711if (t->isStoreDirect())3712{3713int32_t childIdx = tmpIdx + t->getChild(1)->getID();3714result[childIdx] |= _Desc;3715}3716}3717else3718result[index] = _NotEmbed;3719}3720if (!tle) break;3721}3722if (!existEmbed && !isOptionalNode) // cannot find any nodes corresponding to p3723{3724if (trace())3725{3726traceMsg(comp(), "data dag embedding failed for node %d.\n", p->getID());3727showEmbeddedData("Result of _embeddedForData", result);3728}3729return false;3730}3731}3732// Finish topological embedding algorithm (dagEmbed)373337343735// From here, I'd like to exclude those candidates3736// which are unlikely to be matched to each leaf.37373738//showEmbeddedData("before screening1", result);3739skipScreening = true;3740bool modifyEmbeddedResult = false;3741TR_ScratchList<TR_CISCNode> singleList(comp()->trMemory()), multiList(comp()->trMemory());37423743// This loop tries to exclude candidates by analyzing parents3744// It also creates two lists singleList and multiList.3745// * singleList has a pattern leaf node corresponding to a single target node3746// * multiList has a pattern leaf node corresponding to multiple target nodes3747for (ple = plistHead; ple; ple = ple->getNextElement())3748{3749p = ple->getData();3750if (!p->isNecessaryScreening()) continue;3751const bool lightScreening = p->isLightScreening();3752const uint32_t tmpIdx = idx(p->getID(), 0);3753int32_t count = 0;3754for (tle = tlistHead; tle; tle = tle->getNextElement())3755{3756t = tle->getData();3757if (result[tmpIdx + t->getID()] == _Embed)3758{3759bool inLoop = false;3760bool allOptionalParents = false;3761if (!(!checkParents(p, t, result, &inLoop, &allOptionalParents) || !(inLoop || lightScreening)))3762{3763count ++;3764}3765}3766}3767bool checkOptionalParents = (count == 0);3768if (trace() && count == 0) traceMsg(comp(), "screening1: count=%d for p:%d\n",count,p->getID());3769count = 0;3770for (tle = tlistHead; tle; tle = tle->getNextElement())3771{3772t = tle->getData();3773if (result[tmpIdx + t->getID()] == _Embed)3774{3775bool inLoop = false;3776bool allOptionalParents = false;3777if (!checkParents(p, t, result, &inLoop, &allOptionalParents) || !(inLoop || lightScreening ||3778(checkOptionalParents && allOptionalParents)))3779{3780modifyEmbeddedResult = true;3781result[tmpIdx + t->getID()] = _NotEmbed;3782if (trace()) traceMsg(comp(), "screening1: set _NotEmbed to (%d, %d)\n",p->getID(),t->getID());3783}3784else3785{3786count ++;3787}3788}3789}3790if (count == 0)3791{3792if (!p->isOptionalNode())3793{3794if (trace())3795{3796traceMsg(comp(), "fail!! pID=%d.\n", p->getID());3797showEmbeddedData("Result of _embeddedForData", result);3798}3799return false;3800}3801}3802else if (count == 1)3803{3804singleList.add(p);3805}3806else if (count >= 2)3807{3808multiList.add(p);3809}3810}381138123813// This loop tries to exclude those candidates that already included in singleList.3814//showEmbeddedData("before screening2", result);3815if (!multiList.isEmpty())3816{3817ListIterator<TR_CISCNode> mi(&multiList);3818ListIterator<TR_CISCNode> si(&singleList);3819TR_CISCNode *s, *m;3820for (m = mi.getFirst(); m; m = mi.getNext())3821{3822TR_ASSERT(m->isNecessaryScreening(), "error!");3823if (m->isLightScreening()) continue;3824const uint32_t tmpMIdx = idx(m->getID(), 0);3825const int32_t mOpcode = m->getOpcode();3826bool thisScreening = false;3827// Try a set of the same opcode3828for (s = si.getFirst(); s; s = si.getNext())3829{3830if (mOpcode == s->getOpcode())3831{3832const uint32_t tmpSIdx = idx(s->getID(), 0);3833uint32_t tID;3834for (tID = 0; tID < _numTNodes; tID++ )3835{3836if (result[tmpSIdx + tID] == _Embed) break;3837}3838if (result[tmpMIdx + tID] == _Embed)3839{3840modifyEmbeddedResult = true;3841thisScreening = true;3842result[tmpMIdx + tID] = _NotEmbed;3843if (trace()) traceMsg(comp(), "screening2, sameOpcode: set _NotEmbed to (%d, %d)\n",m->getID(),tID);3844}3845}3846}3847bool changeToSingle = false;3848if (thisScreening)3849{3850uint32_t tID;3851int32_t count = 0;3852for (tID = 0; tID < _numTNodes; tID++ )3853{3854if (result[tmpMIdx + tID] == _Embed) count++;3855}3856if (count <= 1) changeToSingle = true;3857}3858// Try a set of different opcodes3859if (!changeToSingle)3860{3861for (s = si.getFirst(); s; s = si.getNext())3862{3863if (mOpcode != s->getOpcode())3864{3865const uint32_t tmpSIdx = idx(s->getID(), 0);3866uint32_t tID;3867for (tID = 0; tID < _numTNodes; tID++ )3868{3869if (result[tmpSIdx + tID] == _Embed) break;3870}3871if (result[tmpMIdx + tID] == _Embed)3872{3873modifyEmbeddedResult = true;3874thisScreening = true;3875result[tmpMIdx + tID] = _NotEmbed;3876if (trace()) traceMsg(comp(), "screening2, others: set _NotEmbed to (%d, %d)\n",m->getID(),tID);3877}3878}3879}3880}3881}3882}3883if (!modifyEmbeddedResult) break;3884}38853886for (ple = plistHead; ple; ple = ple->getNextElement())3887{3888p = ple->getData();3889if (!p->isNecessaryScreening()) continue;3890const bool lightScreening = p->isLightScreening();3891const uint32_t tmpIdx = idx(p->getID(), 0);3892for (tle = tlistHead; tle; tle = tle->getNextElement())3893{3894t = tle->getData();3895if (result[tmpIdx + t->getID()] == _Embed)3896{3897bool inLoop = false;3898bool allOptionalParents = false;3899if (!checkParents(p, t, result, &inLoop, &allOptionalParents) || !(inLoop || lightScreening))3900{3901result[tmpIdx + t->getID()] = _NotEmbed;3902if (trace()) traceMsg(comp(), "screening3: set _NotEmbed to (%d, %d)\n",p->getID(),t->getID());3903}3904}3905}3906}39073908if (trace())3909showEmbeddedData("Result of _embeddedForData", result);3910return true;3911}391239133914391539163917//***************************************************************************************3918// It corresponds to the algorithm "dag_embed()" in Fu's paper. (p.385)3919// I relaxed degree checking for TR_booltable to find switch-case statements.3920//***************************************************************************************3921bool3922TR_CISCTransformer::dagEmbed(TR_CISCNode *np, TR_CISCNode *nt)3923{3924uint8_t *const result = _embeddedForCFG;3925const uint32_t numPSucc = np->getNumSuccs();3926const uint32_t numTSucc = nt->getNumSuccs();3927bool isEmbed = false;3928const uint32_t tmpIdx = idx(np->getID(), 0);3929const uint32_t index = tmpIdx + nt->getID();3930uint32_t i;39313932if (_embeddedForData[index] == _Embed &&3933((numPSucc == numTSucc) || (numPSucc == 0)))3934{3935const bool isSuccDirectlyConnected = np->isSuccDirectlyConnected();3936isEmbed = true;3937uint8_t *const result = _embeddedForCFG;3938if (np->getOpcode() == TR_booltable)3939{3940TR_ASSERT(numPSucc == 2, "error!!");3941uint16_t idPSucc0 = np->getSucc(0)->getID();3942uint16_t idTSucc0 = nt->getSucc(0)->getID();3943uint16_t idPSucc1 = np->getSucc(1)->getID();3944uint16_t idTSucc1 = nt->getSucc(1)->getID();3945uint8_t succData01 = result[idx(idPSucc0, idTSucc1)];3946uint8_t succData10 = result[idx(idPSucc1, idTSucc0)];3947if (isDescOrEmbed(succData01) & isDescOrEmbed(succData10))3948{3949nt->reverseBranchOpCodes();3950}3951}3952for (i = 0; i < numPSucc; i++)3953{3954uint16_t idPSucc = np->getSucc(i)->getID();3955uint16_t idTSucc = nt->getSucc(i)->getID();3956uint8_t succData = result[idx(idPSucc, idTSucc)];3957if ((succData != _Desc || isSuccDirectlyConnected) && succData != _Embed)3958{3959isEmbed = false;3960break;3961}3962}3963}3964if (isEmbed)3965{3966result[index] = _Embed;3967return true;3968}3969else3970{3971TR_ASSERT(index == idx(np->getID(), nt->getID()), "error");3972if (numTSucc == 1) // for reducing compilation time3973{3974uint8_t succData = result[tmpIdx + nt->getSucc(0)->getID()];3975result[index] = isDescOrEmbed(succData) ? _Desc : _NotEmbed;3976}3977else if (numTSucc == 0) // for reducing compilation time3978{3979result[index] = _NotEmbed;3980}3981else // Natural code3982{3983for (i = 0; i < numTSucc; i++)3984{3985uint16_t idTSucc = nt->getSucc(i)->getID();3986uint8_t succData = result[tmpIdx + idTSucc];3987if (isDescOrEmbed(succData))3988{3989isEmbed = true;3990break;3991}3992}3993result[index] = isEmbed ? _Desc : _NotEmbed;3994}3995}3996return false;3997}399839994000//***************************************************************************************4001// It corresponds to the algorithm "cycle_embed()" in Fu's paper. (pp.387-388)4002// I relaxed degree checking for TR_booltable to find switch-case statements.4003//***************************************************************************************4004bool4005TR_CISCTransformer::cycleEmbed(uint16_t dagP, uint16_t dagT)4006{4007const List<TR_CISCNode> *dagId2NodesP = _P->getDagId2Nodes();4008const List<TR_CISCNode> *dagId2NodesT = _T->getDagId2Nodes();4009List<TR_CISCNode> dagPList = dagId2NodesP[dagP];4010List<TR_CISCNode> dagTList = dagId2NodesT[dagT];4011ListIterator<TR_CISCNode> pi(&dagPList);4012ListIterator<TR_CISCNode> ti(&dagTList);4013uint8_t *result = _embeddedForCFG;4014uint8_t *const embeddedForData = _embeddedForData;4015uint32_t i;40164017memset(_EM, 0, _sizeResult);4018memset(_DE, 0, _sizeDE);4019TR_CISCNode *np, *nt;4020bool isEmbed;4021for (np = pi.getFirst(); np; np = pi.getNext())4022{4023const uint16_t pId = np->getID();4024const uint32_t tmpIdx = idx(pId, 0);4025const uint32_t numPSucc = np->getNumSuccs();4026const bool isSuccDirectlyConnected = np->isSuccDirectlyConnected();4027const bool isBoolTable = (np->getOpcode() == TR_booltable);4028for (nt = ti.getFirst(); nt; nt = ti.getNext())4029{4030const uint32_t numTSucc = nt->getNumSuccs();4031const uint32_t index = tmpIdx + nt->getID();4032isEmbed = false;4033bool isLabelSame = (embeddedForData[index] == _Embed);4034uint32_t tOpc = nt->getOpcode();4035if (isLabelSame)4036{4037if ((numPSucc == numTSucc) || (numPSucc == 0))4038{4039isEmbed = true;4040if (isBoolTable)4041{4042TR_ASSERT(numPSucc == 2, "error!!");4043uint16_t idPSucc0 = np->getSucc(0)->getID();4044uint16_t idTSucc0 = nt->getSucc(0)->getID();4045uint16_t idPSucc1 = np->getSucc(1)->getID();4046uint16_t idTSucc1 = nt->getSucc(1)->getID();4047uint8_t succData01 = result[idx(idPSucc0, idTSucc1)];4048uint8_t succData10 = result[idx(idPSucc1, idTSucc0)];4049if (isDescOrEmbed(succData01) & isDescOrEmbed(succData10))4050{4051nt->reverseBranchOpCodes();4052}4053}4054for (i = 0; i < numPSucc; i++)4055{4056uint8_t succData = result[idx(np->getSucc(i)->getID(), nt->getSucc(i)->getID())];4057if ((succData != _Desc || isSuccDirectlyConnected) && succData != _Embed)4058{4059isEmbed = false;4060break;4061}4062}4063}4064else if (isBoolTable)4065{4066if (tOpc == TR::Case)4067{4068i = (nt->isValidOtherInfo() ? 1 : 0);4069uint8_t succData = result[idx(np->getSucc(i)->getID(), nt->getSucc(0)->getID())];4070isEmbed = !((succData != _Desc || isSuccDirectlyConnected) && succData != _Embed);4071}4072}4073}4074uint8_t chkEmbed;4075if (isEmbed)4076chkEmbed = _Embed;4077else4078{4079if (!isLabelSame)4080chkEmbed = _NotEmbed;4081else if (numPSucc != numTSucc)4082{4083chkEmbed = _NotEmbed;4084if (isBoolTable)4085{4086chkEmbed = _Cond;4087if (tOpc == TR::Case)4088{4089i = (nt->isValidOtherInfo() ? 1 : 0);4090uint8_t succData = result[idx(np->getSucc(i)->getID(), nt->getSucc(0)->getID())];4091if (succData == _NotEmbed) chkEmbed = _NotEmbed;4092}4093}4094}4095else4096{4097chkEmbed = _Cond;4098for (i = 0; i < numPSucc; i++)4099{4100uint8_t succData = result[idx(np->getSucc(i)->getID(), nt->getSucc(i)->getID())];4101if (succData == _NotEmbed)4102{4103chkEmbed = _NotEmbed;4104break;4105}4106}4107}4108}4109_EM[index] = chkEmbed;4110if (chkEmbed == _Embed || chkEmbed == _Cond)4111_DE[pId] = 1;4112else4113{4114for (i = 0; i < numTSucc; i++)4115{4116uint8_t succData = result[tmpIdx + nt->getSucc(i)->getID()];4117if (isDescOrEmbed(succData))4118{4119_DE[pId] = 1;4120break;4121}4122}4123}4124}4125}41264127for (np = pi.getFirst(); np; np = pi.getNext())4128{4129if (_DE[np->getID()] == 0)4130{4131for (np = pi.getFirst(); np; np = pi.getNext())4132{4133const uint32_t tmpIdx = idx(np->getID(), 0);4134for (nt = ti.getFirst(); nt; nt = ti.getNext())4135{4136result[tmpIdx + nt->getID()] = _NotEmbed; // set NotEmbed to all elements4137}4138}4139}4140}41414142bool ret = true;4143for (np = pi.getFirst(); np; np = pi.getNext())4144{4145const uint32_t tmpIdx = idx(np->getID(), 0);4146bool existEmbed = false;4147for (nt = ti.getFirst(); nt; nt = ti.getNext())4148{4149const uint32_t index = tmpIdx + nt->getID();4150uint8_t chkEmbed = _EM[index];4151if (chkEmbed == _Embed || chkEmbed == _Cond)4152{4153result[index] = _Embed;4154existEmbed = true;4155}4156else4157{4158result[index] = _Desc;4159}4160}4161if (!existEmbed && !np->isOptionalNode())4162{4163ret = false;4164}4165}4166return ret;4167}4168416941704171//***************************************************************************************4172// It computes embedding information for an input CFG.4173// Because CFG consists of DAGs and a cycle, it will handle them.4174// It uses the result of computeEmbeddedForData() to find candidate nodes4175// whose label is the same as that of each node in the idiom graph.4176// It uses the topological embedding algorithm by walking the CFG edges from exit to entry.4177// At the time, we traverse nodes based on the order of the DagIDs,4178// which basically represent a post order of basic blocks.4179//***************************************************************************************4180bool4181TR_CISCTransformer::computeEmbeddedForCFG()4182{4183TR_ASSERT(_embeddedForData != 0, "error");4184uint8_t *const result = _embeddedForCFG;4185memset(result, 0, _sizeResult);4186uint16_t dagP, dagT;4187uint16_t numDagIdsP = _P->getNumDagIds();4188uint16_t numDagIdsT = _T->getNumDagIds();4189const List<TR_CISCNode> *dagId2NodesP = _P->getDagId2Nodes();4190const List<TR_CISCNode> *dagId2NodesT = _T->getDagId2Nodes();4191TR_CISCNode *np, *nt;41924193for (dagP = 0; dagP < numDagIdsP; dagP++)4194{4195List<TR_CISCNode> dagPList = dagId2NodesP[dagP];4196ListIterator<TR_CISCNode> pi(&dagPList);4197bool existEmbed = false;4198for (dagT = 0; dagT < numDagIdsT; dagT++)4199{4200List<TR_CISCNode> dagTList = dagId2NodesT[dagT];4201TR_ASSERT(!dagTList.isEmpty(), "empty dagId");4202if (dagTList.isSingleton())4203{4204nt = dagTList.getListHead()->getData();4205for (np = pi.getFirst(); np; np = pi.getNext())4206if (dagEmbed(np, nt)) existEmbed = true;4207}4208else4209{4210if (cycleEmbed(dagP, dagT)) existEmbed = true;4211}4212}4213if (!existEmbed)4214{4215if (trace())4216{4217traceMsg(comp(), "computeEmbeddedForCFG: Cannot find embedded nodes for dagP:%d\n",dagP);4218showEmbeddedData("Result of _embeddedForCFG", result);4219}4220return false;4221}4222}4223if (trace())4224showEmbeddedData("Result of _embeddedForCFG", result);4225return true;4226}422742284229//***************************************************************************************4230// It creates P2T and T2P tables from embedding information.4231// (P and T denote Pattern and Target, respectively.)4232// We can use them to find target nodes from pattern nodes, and vice versa.4233//***************************************************************************************4234bool4235TR_CISCTransformer::makeLists()4236{4237TR_CISCNode *p, *t;4238ListIterator<TR_CISCNode> pi(_P->getNodes());4239ListIterator<TR_CISCNode> ti(_T->getOrderByData());4240uint8_t *const result = _embeddedForCFG;4241uint8_t *const embeddedForData = _embeddedForData;4242bool modify = false;42434244memset(_P2T, 0, _sizeP2T);4245memset(_T2P, 0, _sizeT2P);42464247int i;4248for (i = 0; i < _numPNodes; i++) _P2T[i].setRegion(trMemory()->heapMemoryRegion());4249for (i = 0; i < _numTNodes; i++) _T2P[i].setRegion(trMemory()->heapMemoryRegion());42504251for (p = pi.getFirst(); p; p = pi.getNext())4252{4253const bool isEssential = p->isEssentialNode();4254const bool isSuccDirectlyConnected = p->isSuccDirectlyConnected();4255const uint32_t numPSucc = p->getNumSuccs();4256uint32_t pID = p->getID();4257List<TR_CISCNode> *pList = _P2T + pID;4258const uint32_t tmpIdx = idx(pID, 0);4259for (t = ti.getFirst(); t; t = ti.getNext())4260{4261uint32_t tID = t->getID();4262if (result[tmpIdx+tID] == _Embed)4263{4264bool isEmbed = true;4265if (isSuccDirectlyConnected)4266{4267for (uint32_t i = 0; i < numPSucc; i++)4268{4269if (result[idx(p->getSucc(i)->getID(), t->getSucc(i)->getID())] != _Embed)4270{4271isEmbed = false;4272break;4273}4274}4275}4276if (isEmbed)4277{4278if (trace() &&4279!_T2P[tID].isEmpty())4280{4281traceMsg(comp(), "makeLists: tID:%d corresponds to multiple nodes\n",tID);4282}4283if (isEssential) t->setIsEssentialNode();4284pList->add(t);4285if (numPSucc == 0) t->setIsNegligible();4286_T2P[tID].add(p);4287}4288else4289{4290modify = true;4291result[tmpIdx+tID] = _Desc;4292embeddedForData[tmpIdx+tID] = _Desc;4293}4294}4295}4296if (pList->isMultipleEntry() &&4297p->getOpcode() == TR_variable)4298{4299if (!p->isOptionalNode())4300{4301if (trace()) traceMsg(comp(), "makeLists: pid:%d a variable corresponds to multiple nodes\n",p->getID());4302return false; /* a variable corresponds to multiple nodes */4303}4304}4305}4306if (modify)4307{4308if (trace())4309showEmbeddedData("Result of _embeddedForCFG after makeLists", result);4310}4311return true;4312}4313431443154316//*****************************************************************************4317// Analyze relationships for parents and children4318//*****************************************************************************4319int32_t4320TR_CISCTransformer::analyzeConnectionOnePairChild(TR_CISCNode *const p, TR_CISCNode *const t,4321TR_CISCNode *const pn, TR_CISCNode *tn)4322{4323uint8_t *result = _embeddedForData;4324const uint32_t tmpIdx = idx(pn->getID(), 0);4325int32_t successCount = 0;4326TR_CISCNode *tnBefore = t;4327while(true) // we may need to analyze descendant for tn because of negligible nodes (e.g. iload)4328{4329uint8_t chiData = result[tmpIdx + tn->getID()];4330if (chiData == _Embed)4331{4332// the connectivity of p and pn (child) is the same as that of t and tn (child)4333successCount++;4334tn->setIsParentSimplyConnected();4335break;4336}4337else if (chiData != _Desc || !tn->isNegligible() || tn->getNumSuccs() != 1)4338{4339bool success = false;4340if (tnBefore->isLoadVarDirect())4341{4342ListIterator<TR_CISCNode> defI;4343ListIterator<TR_CISCNode> useI;4344defI.set(tnBefore->getChains());4345TR_ASSERT(defI.getFirst(), "error");4346TR_CISCNode *d;4347success = true;4348// Check if t and tn are connected via a variable4349for (d = defI.getFirst(); d; d = defI.getNext())4350{4351if (d->getOpcode() == TR_entrynode)4352{4353success = false;4354continue;4355}4356TR_ASSERT(d->isStoreDirect(), "error");4357TR_CISCNode *childOfStore = d->getChild(0);4358if (_Embed != result[tmpIdx + childOfStore->getID()])4359success = false; // Need to handle this store4360else4361{4362// Check if all uses are appropriate.4363bool validThisStore = true;4364bool includeExitNode = false;4365if (!d->isNegligible())4366{4367useI.set(d->getChains());4368TR_ASSERT(useI.getFirst(), "error");4369TR_CISCNode *u;4370List<TR_CISCNode> *pParents = p->getChild(0)->getParents();4371ListIterator<TR_CISCNode> pParentsI(pParents);4372for (u = useI.getFirst(); u; u = useI.getNext())4373{4374if (tnBefore == u) continue; // short-cut path4375if (u->getDagID() != d->getDagID())4376{4377includeExitNode = true;4378continue;4379}4380List<TR_CISCNode> *uParents = u->getParents();4381TR_CISCNode *uParent;4382TR_CISCNode *pParent;43834384// check all stored data are valid.4385ListIterator<TR_CISCNode> uParentsI(uParents);4386bool isEmbed = true;4387for (uParent = uParentsI.getFirst(); uParent; uParent = uParentsI.getNext())4388{4389isEmbed = false;4390for (pParent = pParentsI.getFirst(); pParent; pParent = pParentsI.getNext())4391{4392if (_Embed == result[idx(pParent->getID(), uParent->getID())])4393{4394isEmbed = true;4395break;4396}4397}4398if (!isEmbed) break;4399}4400if (!isEmbed)4401{4402success = validThisStore = false;4403break;4404}4405}4406}4407if (validThisStore)4408{4409if (!includeExitNode) d->setIsNegligible();4410childOfStore->setIsParentSimplyConnected();4411}4412}4413}4414}4415else if (tn->getOpcode() == TR_variable)4416{4417success = false;4418ListIterator<TR_CISCNode> hi(t->getHintChildren());4419TR_CISCNode *n;4420for (n = hi.getFirst(); n; n = hi.getNext()) // n is a right-hand side expression of a store4421{4422if (_Embed == result[tmpIdx + n->getID()])4423{4424n->setIsParentSimplyConnected();4425success = true;4426break;4427}4428}44294430// If we cannot use any hint, we'll look at a neighbor store instruction.4431if (!success)4432{4433List<TR_CISCNode> *preds = tnBefore->getPreds();4434while(preds->isSingleton())4435{4436n = preds->getListHead()->getData();4437if (n->isStoreDirect() &&4438n->getChild(1) == tnBefore &&4439_Embed == result[tmpIdx + n->getChild(0)->getID()])4440{4441n->getChild(0)->setIsParentSimplyConnected();4442success = true;4443break;4444}4445preds = n->getPreds();4446}4447}4448}4449if (success)4450{4451successCount++;4452}4453break;4454}4455if (tn->getNumChildren() == 0) break;4456tnBefore = tn;4457tn = tn->getChild(0);4458}4459return successCount;4460}4461446244634464//*****************************************************************************4465// Analyze relationships for parents, children, predecessors, successors.4466// They are represented to four flags.4467//*****************************************************************************4468void4469TR_CISCTransformer::analyzeConnectionOnePair(TR_CISCNode *const p, TR_CISCNode *const t)4470{4471int32_t i, /*j,*/ num;4472uint8_t *result;4473TR_CISCNode *tn, *pn;4474int32_t successCount;4475const bool isBoolTable = (p->getOpcode() == TR_booltable);4476const bool isCmpAll = (p->getOpcode() == TR_ifcmpall);44774478result = _embeddedForData;4479num = p->getNumChildren();4480TR_ASSERT(t->getNumChildren() == num ||4481p->getOpcode() == TR_arrayindex ||4482p->getOpcode() == TR_arraybase ||4483p->getOpcode() == TR_quasiConst ||4484p->getOpcode() == TR_quasiConst2 ||4485p->getOpcode() == TR_booltable ||4486p->getOpcode() == TR_inbstore ||4487p->getOpcode() == TR_indstore, "error");4488if (p->getParents()->isEmpty() ||4489t->getParents()->isEmpty() ||4490t->getOpcode() == TR::Case ||4491t->getOpcode() == TR::awrtbari) t->setIsParentSimplyConnected();44924493// Analyze connectivities for children and parents4494if (num == 0)4495{4496t->setIsChildSimplyConnected();4497}4498else4499{4500successCount = 0;4501for (i = 0; i < num; i++) // for each child4502{4503const bool commutative = p->isCommutative();4504pn = p->getChild(i);4505while (pn->isOptionalNode() &&4506_P2T[pn->getID()].isEmpty() &&4507pn->getNumChildren() > 0)4508{4509pn = pn->getChild(0);4510}4511const uint32_t tmpIdx = idx(pn->getID(), 0);45124513while(true)4514{4515int32_t thisCount;4516if (commutative && num == 2)4517{4518if ((thisCount = analyzeConnectionOnePairChild(p, t, pn, t->getChild(i))) ||4519(thisCount = analyzeConnectionOnePairChild(p, t, pn, t->getChild(1-i))))4520{4521successCount += thisCount;4522break;4523}4524}4525else4526{4527thisCount = analyzeConnectionOnePairChild(p, t, pn, t->getChild(i));4528if (thisCount)4529{4530successCount += thisCount;4531break;4532}4533}4534if (pn->isOptionalNode() &&4535pn->getNumChildren() > 0)4536{4537pn = pn->getChild(0); // Try a child4538}4539else4540{4541break; // fail4542}4543}4544}4545if (successCount == num)4546{4547t->setIsChildSimplyConnected();4548}4549}455045514552// Analyzing Predecessors and Successors4553result = _embeddedForCFG;4554num = t->getNumSuccs();4555if (t->getPreds()->isEmpty() || p->getPreds()->isEmpty()) t->setIsPredSimplyConnected();4556if (num == 0 || p->getNumSuccs() == 0)4557{4558t->setIsSuccSimplyConnected();4559}4560else if (p->getNumSuccs() == num) // typical case4561{4562successCount = 0;4563for (i = 0; i < num; i++) // for each successor4564{4565pn = p->getSucc(i);4566while (pn->isOptionalNode() &&4567_P2T[pn->getID()].isEmpty() &&4568pn->getNumSuccs() > 0)4569{4570pn = pn->getSucc(0); // skip optional nodes4571}45724573while(true)4574{4575tn = t->getSucc(i);4576const uint32_t tmpIdx = idx(pn->getID(), 0);45774578while(true)4579{4580uint8_t chiData = result[tmpIdx + tn->getID()];4581if (chiData == _Embed)4582{4583// the connectivity of p and pn (succ) is the same as that of t and tn (succ)4584successCount++;4585tn->setIsPredSimplyConnected();4586break;4587}4588else if (chiData != _Desc || !tn->isNegligible() || tn->getNumSuccs() != 1)4589{4590if (isBoolTable || isCmpAll)4591{4592if (_Embed == result[idx(p->getID(), tn->getID())])4593{4594successCount++;4595tn->setIsPredSimplyConnected();4596}4597}4598break;4599}4600tn = tn->getSucc(0);4601}46024603if (!tn->isPredSimplyConnected() &&4604pn->isOptionalNode())4605{4606while (pn->isOptionalNode() &&4607pn->getNumSuccs() > 0)4608{4609pn = pn->getSucc(0); // skip optional nodes4610}4611continue; // retry!4612}4613break; // Usually, this loop doesn't iterate.4614}4615}4616if (successCount == num)4617{4618t->setIsSuccSimplyConnected();4619}4620}4621else if (isBoolTable)4622{4623if (t->getOpcode() == TR::Case)4624{4625int32_t i = (t->isValidOtherInfo() ? 1 : 0);4626pn = p->getSucc(i);4627tn = t->getSucc(0);4628while(true)4629{4630uint8_t chiData = result[idx(pn->getID(), tn->getID())];4631if (chiData == _Embed)4632{4633tn->setIsPredSimplyConnected();4634t->setIsSuccSimplyConnected();4635break;4636}4637else if (chiData != _Desc || !tn->isNegligible() || tn->getNumSuccs() != 1)4638{4639if (_Embed == result[idx(p->getID(), tn->getID())])4640{4641tn->setIsPredSimplyConnected();4642t->setIsSuccSimplyConnected();4643}4644break;4645}4646tn = tn->getSucc(0);4647}4648}4649}4650}465146524653void4654TR_CISCTransformer::showT2P()4655{4656if (trace())4657{4658TR_CISCNode *p, *t;4659int32_t dagT, numDagIdsT = _T->getNumDagIds();4660List<TR_CISCNode> *dagId2NodesT = _T->getDagId2Nodes();4661for (dagT = numDagIdsT; --dagT >= 0;)4662{4663ListIterator<TR_CISCNode> ti(dagId2NodesT + dagT);4664for (t = ti.getFirst(); t; t = ti.getNext())4665{4666uint32_t tID = t->getID();4667traceMsg(comp(), "%3d:",tID);4668if (!_T2P[tID].isEmpty())4669{4670//TR_ASSERT(_T2P[tID].isSingleton(), "it may be error (not sure).");4671ListIterator<TR_CISCNode> pi(_T2P + tID);4672for (p = pi.getFirst(); p; p = pi.getNext())4673{4674uint32_t pID = p->getID();4675traceMsg(comp(), " %2d",pID);4676}4677traceMsg(comp(), " %c%c%c%c", t->isSuccSimplyConnected() ? 'S' : 'x',4678t->isPredSimplyConnected() ? 'P' : 'x',4679t->isParentSimplyConnected() ? 'B' : 'x',4680t->isChildSimplyConnected() ? 'C' : 'x'4681);4682if (t->isNegligible()) traceMsg(comp(), "\t(negligible)");4683traceMsg(comp(), "\n");4684}4685else4686{4687if (t->isNegligible())4688{4689traceMsg(comp(), " negligible\n"); // negligible4690}4691else4692{4693t->dump(comp()->getOutFile(), comp());4694}4695}4696}4697}4698}4699}470047014702//*****************************************************************************4703// Analyze connectivities between pattern nodes and target nodes4704//*****************************************************************************4705void4706TR_CISCTransformer::analyzeConnection()4707{4708TR_CISCNode *p, *t;4709ListIterator<TR_CISCNode> pi(_P->getNodes());4710SpecialNodeTransformerPtr specialTransformer = _P->getSpecialNodeTransformer();4711int count = 0;47124713_T->setListsDuplicator();4714while(true)4715{4716for (p = pi.getFirst(); p; p = pi.getNext()) // for each pattern node4717{4718uint32_t pID = p->getID();4719ListIterator<TR_CISCNode> ti(_P2T + pID);4720for (t = ti.getFirst(); t; t = ti.getNext()) // for each target node corresponding to p4721{4722analyzeConnectionOnePair(p, t);4723}4724}47254726if (!specialTransformer ||4727!specialTransformer(this)) break;4728if (++count > 10) break;4729}47304731showT2P();4732}473347344735//*****************************************************************************4736// Analyze whether each candidate of array header constant is appropriate compared to the idiom.4737// Because the array header size is sometimes modified by constant folding4738// e.g. When AH is -24 for a[i], AH is modified to -25 for a[i+1]4739// If the analysis fails, it'll invalidate that node.4740//*****************************************************************************4741void4742TR_CISCTransformer::analyzeArrayHeaderConst()4743{4744int32_t i = 0;4745while(true) // check all TR_ahconst4746{4747TR_CISCNode *p = _P->getCISCNode(TR_ahconst, true, i);4748if (!p) break; // no more TR_ahconst47494750TR_ASSERT(p->getOpcode() == TR_ahconst, "error");4751int32_t pid = p->getID();4752ListIterator<TR_CISCNode> p2ti(_P2T + pid);4753TR_CISCNode *t;4754int32_t ahsize = -(int32_t)TR::Compiler->om.contiguousArrayHeaderSizeInBytes();4755uint8_t *const embeddedForCFG = _embeddedForCFG;4756uint8_t *const embeddedForData = _embeddedForData;4757const uint32_t tmpIdx = idx(pid, 0);4758bool modify = false;4759for (t = p2ti.getFirst(); t; t = p2ti.getNext()) // for each target node4760{4761TR_ASSERT(t->isValidOtherInfo(), "error");4762int32_t val = t->getOtherInfo();4763bool invalidate = false;4764if (val != ahsize)4765{4766ListIterator<TR_CISCNode> parentTi(t->getParents());4767TR_CISCNode *parent;4768for (parent = parentTi.getFirst(); parent; parent = parentTi.getNext())4769{4770// Condition 1:4771// sub - parent4772// load - loadNode4773// variable - variableNode4774// ahconst - t4775if (!parent->getIlOpCode().isSub())4776{4777invalidate = true;4778break;4779}4780else4781{4782invalidate = true;4783TR_CISCNode *loadNode;4784TR_CISCNode *variableNode;4785TR_CISCNode *subNode = 0;4786TR_CISCNode *constNode = 0;4787TR_CISCNode *storeNode = 0;4788TR_CISCNode *i2lNode = 0;47894790loadNode = parent->getChild(0);4791if (loadNode->getOpcode() == TR::i2l)4792{4793i2lNode = loadNode;4794loadNode = loadNode->getChild(0);4795}4796if (loadNode->getOpcode() == TR_variable)4797{4798// Fail. Not implemeted yet.4799}4800else4801{4802// OK, Condition 1 is satisfied48034804// Next, check Condition 24805// istore v4806// isub4807// iload v4808// iconst b4809variableNode = loadNode->getChild(0);4810ListIterator<TR_CISCNode> loadPi(loadNode->getParents());4811bool found = false;4812for (subNode = loadPi.getFirst(); subNode; subNode = loadPi.getNext())4813{4814if (parent != subNode && subNode->getIlOpCode().isSub())4815{4816constNode = subNode->getChild(1);4817if (constNode->isValidOtherInfo() &&4818constNode->getIlOpCode().isLoadConst() &&4819constNode->getOtherInfo() + ahsize == val)4820{4821ListIterator<TR_CISCNode> subPi(subNode->getParents());4822for (storeNode = subPi.getFirst(); storeNode; storeNode = subPi.getNext())4823{4824if (storeNode->getChild(1) == variableNode)4825{4826// Condition 2 is satisfied4827invalidate = false;4828found = true;4829break;4830////goto find;4831}4832}4833if (found)4834break;4835}4836}4837}4838}4839////find:;48404841if (invalidate) break;4842else4843{4844TR_CISCNode *newConstNode = _T->getCISCNode(t->getOpcode(), true, ahsize);4845if (newConstNode)4846{4847if (i2lNode)4848{4849parent->replaceChild(0, i2lNode);4850i2lNode->replaceChild(0, variableNode);4851i2lNode->setCISCNodeModified();4852}4853else4854{4855parent->replaceChild(0, variableNode);4856}4857parent->replaceChild(1, newConstNode);4858parent->setCISCNodeModified();4859const int32_t index = tmpIdx + newConstNode->getID();4860embeddedForCFG[index] = _Embed;4861embeddedForData[index] = _Embed;4862modify = true;4863}4864}4865}4866}4867}48684869if (invalidate)4870{4871const int32_t tid = t->getID();4872const int32_t index = tmpIdx + tid;4873if (trace())4874{4875traceMsg(comp(), "tid:%d (pid:%d) is invalidated because of failure of analyzeArrayHeaderConst\n",4876tid,pid);4877}4878embeddedForCFG[index] = _NotEmbed;4879embeddedForData[index] = _NotEmbed;4880}4881}4882if (modify && trace())4883{4884_T->dump(comp()->getOutFile(), comp());4885}4886i++;4887}4888}488948904891void4892TR_CISCTransformer::showCISCNodeRegion(TR_CISCNodeRegion *r, TR::Compilation * comp)4893{4894ListIterator<TR_CISCNode> ni;4895TR_CISCNode *n;48964897if (r->isIncludeEssentialNode()) traceMsg(comp, "(E) ");4898ni.set((ListHeadAndTail<TR_CISCNode>*)r);4899for (n = ni.getFirst(); n; n = ni.getNext())4900{4901traceMsg(comp, "%d->",n->getID());4902}4903traceMsg(comp, "\n");4904}490549064907void4908TR_CISCTransformer::showCISCNodeRegions(List<TR_CISCNodeRegion> *regions, TR::Compilation * comp)4909{4910ListIterator<TR_CISCNodeRegion> ri(regions);4911TR_CISCNodeRegion *r;49124913for (r = ri.getFirst(); r; r = ri.getNext())4914{4915showCISCNodeRegion(r, comp);4916}4917}4918491949204921//*****************************************************************************4922// It removes first several nodes from the region r to correct the alignment4923//*****************************************************************************4924bool4925TR_CISCTransformer::alignTopOfRegion(TR_CISCNodeRegion *r)4926{4927ListElement<TR_CISCNode> *le;4928ListElement<TR_CISCNode> *firstNegligible = 0;4929TR_CISCNode *pTop = _P->getEntryNode()->getSucc(0);4930TR_CISCNode *t;49314932// determine the top node pTop of the idiom (skip optional nodes)4933while (true)4934{4935t = getP2TRep(pTop);4936if (t == 0)4937{4938if (!pTop->isOptionalNode())4939{4940if (trace()) traceMsg(comp(), "alignTopOfRegion failed. There is no target node corresponding to %d. Check for nodes in broken region listings above and x in SPBC listing.\n",pTop->getID());4941return false;4942}4943}4944else4945{4946if (!pTop->isOptionalNode() || r->isIncluded(t)) break;4947ListIterator<TR_CISCNode> ci(_P2T + pTop->getID());4948for (t = ci.getFirst(); t; t = ci.getNext())4949{4950if (r->isIncluded(t)) break;4951}4952if (t) break;4953}4954pTop = pTop->getSucc(0);4955}49564957if (trace()) traceMsg(comp(), "alignTopOfRegion: (pTop, t) is (%d, %d)\n", pTop->getID(), t->getID());49584959// remove nodes (from start to pTop) from the region r4960for (le = r->getListHead(); le; le = le->getNextElement())4961{4962t = le->getData();4963ListIterator<TR_CISCNode> ci(_T2P + t->getID());4964TR_CISCNode *p;4965bool pHasCFG = false;4966for (p = ci.getFirst(); p; p = ci.getNext())4967{4968if (p == pTop)4969{4970r->setListHead(firstNegligible ? firstNegligible : le);4971return true;4972}4973if (p->getNumSuccs() > 0 || !p->getPreds()->isEmpty())4974pHasCFG = true;4975}4976if (t->isNegligible() || !pHasCFG)4977{4978if (!firstNegligible && t->getOpcode() != TR::BBEnd) firstNegligible = le;4979}4980else4981{4982firstNegligible = 0;4983}4984}4985if (trace()) traceMsg(comp(), "alignTopOfRegion failed. Cannot find pTop:%d in the region.\n",pTop->getID());4986return false;4987}498849894990//*****************************************************************************4991// Check whether all nodes in the idiom _P are included in the region r.4992// It uses a bit vector to analyze above.4993//*****************************************************************************4994bool4995TR_CISCTransformer::areAllNodesIncluded(TR_CISCNodeRegion *r)4996{4997ListIterator<TR_CISCNode> ni;4998TR_CISCNode *t;4999TR_BitVector bv(_P->getNumNodes(), trMemory(), stackAlloc);5000ni.set(_P->getNodes());5001// Set the IDs of required nodes in the idiom _P to the bit vector bv.5002for (t = ni.getFirst(); t; t = ni.getNext())5003{5004if ((t->getNumSuccs() > 0 || !t->getPreds()->isEmpty()) && !t->isOptionalNode())5005{5006switch(t->getOpcode())5007{5008case TR_entrynode:5009case TR_exitnode:5010break;5011default:5012bv.set(t->getID());5013break;5014}5015}5016}50175018// Reset the ID of the idiom nodes corresponding to each target node in the region r.5019ni.set((ListHeadAndTail<TR_CISCNode>*)r);5020for (t = ni.getFirst(); t; t = ni.getNext())5021{5022ListIterator<TR_CISCNode> ci(_T2P + t->getID());5023TR_CISCNode *p;5024for (p = ci.getFirst(); p; p = ci.getNext())5025bv.reset(p->getID());5026}50275028// If the bit vector bv is empty, alll nodes are included in the region r.5029if (trace())5030{5031if (!bv.isEmpty())5032{5033traceMsg(comp(), "Cannot find pNodes: ");5034bv.print(comp(), comp()->getOutFile());5035traceMsg(comp(), "\n");5036}5037}5038return bv.isEmpty();5039}504050415042//*****************************************************************************5043// If moveTo is 0, the region ("from" through "to") in the list l will be moved to the last.5044// Otherwise, the region will be moved to before moveTo.5045//*****************************************************************************5046void5047TR_CISCTransformer::moveCISCNodesInList(List<TR_CISCNode> *l, TR_CISCNode *from, TR_CISCNode *to, TR_CISCNode *moveTo)5048{5049#if 05050if (showMesssagesStdout())5051{5052printf("moveCISCNodesInList: %s\n",_T->getTitle());5053}5054#endif5055if (trace())5056{5057traceMsg(comp(), "moveCISCNodesInList: r_from:%p(%d) r_to:%p(%d) moveTo:%p(%d)\n",from,from->getID(),to,to->getID(),moveTo,moveTo->getID());5058}50595060ListElement<TR_CISCNode> *before = 0, *beforeFrom = 0, *beforeMoveTo = 0, *fromLe = 0, *toLe = 0, *moveToLe = 0, *le;5061for (le = l->getListHead(); le; le = le->getNextElement())5062{5063TR_CISCNode *n = le->getData();5064if (n == from)5065{5066beforeFrom = before;5067fromLe = le;5068}5069if (n == to)5070{5071TR_ASSERT(fromLe != 0, "error! fromLe must be found first.");5072toLe = le;5073}5074if (n == moveTo)5075{5076beforeMoveTo = before;5077moveToLe = le;5078}5079before = le;5080}5081if (moveTo == 0)5082{5083beforeMoveTo = before;5084}5085else5086{5087TR_ASSERT(moveToLe != 0, "error");5088if (moveToLe == 0) return; // the case if the assertion failed5089}5090TR_ASSERT(fromLe != 0 && toLe != 0, "error");5091if (fromLe == 0 || toLe == 0) return; // the case if the assertion failed5092if (toLe == beforeMoveTo) return; // Already moved50935094if (!beforeFrom)5095l->setListHead(toLe->getNextElement());5096else5097beforeFrom->setNextElement(toLe->getNextElement());50985099toLe->setNextElement(moveToLe);51005101if (!beforeMoveTo)5102l->setListHead(fromLe);5103else5104beforeMoveTo->setNextElement(fromLe);5105}510651075108//*****************************************************************************5109// If moveTo is 0, the region ("from" through "to") will be moved to the last5110// of the dagId2Nodes[from->getDagID()].5111// Otherwise, the region will be moved to before moveTo.5112// It assumes that all three nodes "from", "to", and "moveTo" (if non-null)5113// have the same dagId.5114// It maintains the following lists:5115// * _T->_dagId2Nodes[from->getDagID()]5116// * _T->_nodes5117// * _T->_orderByData5118//*****************************************************************************5119void5120TR_CISCTransformer::moveCISCNodes(TR_CISCNode *from, TR_CISCNode *to, TR_CISCNode *moveTo, char *debugStr)5121{5122if (showMesssagesStdout())5123{5124printf("moveCISCNodes: %s %s\n",_T->getTitle(), debugStr ? debugStr : "");5125}51265127int32_t dagId = from->getDagID();5128TR_ASSERT(dagId == to->getDagID(), "from->getDagID() and to->getDagID() must be same!");5129TR_ASSERT(!moveTo || dagId == moveTo->getDagID(), "from->getDagID() and moveTo->getDagID() must be same!");5130List<TR_CISCNode> *dagList = _T->getDagId2Nodes()+dagId;51315132TR_CISCNode *prevOrg, *nextOrg;5133TR_CISCNode *prevDst, *nextDst, *succDst;51345135TR_ASSERT(from->getPreds()->isSingleton(), "assumption error!");5136prevOrg = from->getHeadOfPredecessors();5137nextOrg = to->getSucc(0);51385139ListElement<TR_CISCNode> *beforeLastDagIdElement = 0;5140ListElement<TR_CISCNode> *lastDagIdElement = dagList->getListHead();5141if (!moveTo)5142{5143while(lastDagIdElement->getNextElement())5144{5145beforeLastDagIdElement = lastDagIdElement;5146lastDagIdElement = lastDagIdElement->getNextElement();5147}5148prevDst = lastDagIdElement->getData();5149if (prevDst->getOpcode() == TR::BBEnd)5150{5151moveTo = nextDst = prevDst;5152TR_ASSERT(beforeLastDagIdElement, "error!");5153prevDst = beforeLastDagIdElement->getData();5154}5155else5156{5157nextDst = prevDst->getSucc(0);5158}5159}5160else5161{5162while(lastDagIdElement)5163{5164if (lastDagIdElement->getData() == moveTo) break;5165beforeLastDagIdElement = lastDagIdElement;5166lastDagIdElement = lastDagIdElement->getNextElement();5167}5168nextDst = moveTo;5169TR_ASSERT(beforeLastDagIdElement, "error!");5170prevDst = beforeLastDagIdElement->getData();5171}5172succDst = prevDst->getSucc(0);51735174// Modify the successor of each node5175prevOrg->replaceSucc(0, nextOrg);5176prevDst->replaceSucc(0, from);5177to->replaceSucc(0, succDst);51785179// Modify three lists5180if (to->getNumChildren() != 0 || !to->getParents()->isEmpty()) // if "to" has a child or a parent.5181{5182TR_CISCNode *fromData = from, *nextDstData = nextDst;5183while(fromData->getNumChildren() == 0 && fromData->getParents()->isEmpty()) fromData = fromData->getSucc(0);5184while(nextDstData->getNumChildren() == 0 && nextDstData->getParents()->isEmpty() && nextDstData->getOpcode() != TR_exitnode) nextDstData = nextDstData->getSucc(0);5185moveCISCNodesInList(_T->getOrderByData(), fromData, to, nextDstData);5186}51875188moveCISCNodesInList(dagList, from, to, moveTo);5189moveCISCNodesInList(_T->getNodes(), to, from, prevDst); // Note: _nodes is the reverse post order.5190}519151925193//*****************************************************************************5194// Based on four relationships (parents, children, predecessors, successors),5195// we extract the region that matches the idiom graph.5196// Return the region in which all nodes in the idiom graph are included5197//*****************************************************************************5198TR_CISCNodeRegion *5199TR_CISCTransformer::extractMatchingRegion()5200{5201TR_CISCNodeRegion *lists = new (trHeapMemory()) TR_CISCNodeRegion(_numTNodes, comp()->trMemory()->heapMemoryRegion());5202TR_ScratchList<TR_CISCNodeRegion> regions(comp()->trMemory());5203TR_CISCNode *t;5204ListElement<TR_CISCNode> *firstNegligible = 0;5205int32_t dagT, numDagIdsT = _T->getEntryNode()->getDagID() + 1;5206List<TR_CISCNode> *dagId2NodesT = _T->getDagId2Nodes();5207bool empty = true;52085209bool isSingleLoopBody = _bblistBody.isSingleton();52105211// Collect regions in which data dependence of nodes are equivalent to the idiom and5212// there is no additional node.5213for (dagT = numDagIdsT; --dagT >= 0;) // From entry to exit5214{5215List<TR_CISCNode> *TList = dagId2NodesT + dagT;5216ListElement<TR_CISCNode> *element;5217for (element = TList->getListHead(); element; element = element->getNextElement())5218{5219t = element->getData();5220uint32_t tID = t->getID();5221bool isEmbed = false;5222if (!_T2P[tID].isEmpty() && (t->isDataConnected() || t->isNegligible()))5223{5224// TODO: We may need more checks !!!5225isEmbed = true;5226if (t->getIlOpCode().isIf() && !t->isOutsideOfLoop())5227if (!t->isPredSimplyConnected() && !isSingleLoopBody)5228{5229isEmbed = false;5230if (showMesssagesStdout())5231{5232printf("!!!!!!!!!!!!!! Predecessor of tID %" OMR_PRIu32 " is different from that of idiom.\n", tID);5233}5234if (trace())5235{5236traceMsg(comp(), "Predecessor of tID %" OMR_PRIu32 " is different from that of idiom.\n", tID);5237}5238}5239}5240if (isEmbed)5241{5242// The node t is an appropriate node!5243if (empty && firstNegligible)5244{5245// Add all of the negligible nodes before the node corresponding to a pattern node.5246ListElement<TR_CISCNode> *tmp_ele;5247TR_CISCNode *neg;5248for (tmp_ele = firstNegligible; true; tmp_ele = tmp_ele->getNextElement())5249{5250neg = tmp_ele->getData();5251if (!neg->isNegligible() || !_T2P[neg->getID()].isEmpty()) break;5252lists->append(neg);5253}5254TR_ASSERT(neg == t, "error!");5255}5256empty = false;5257lists->append(t);5258}5259else5260{5261// The node t is an inappropriate node!5262if (!empty)5263{5264if (!t->isNegligible() || !_T2P[t->getID()].isEmpty())5265{5266// add "lists" to "regions" and clear it5267empty = true;5268firstNegligible = 0;5269regions.add(lists);5270lists = new (trHeapMemory()) TR_CISCNodeRegion(_numTNodes, comp()->trMemory()->heapMemoryRegion());5271}5272else5273{5274// It can be added5275lists->append(t);5276}5277}5278else5279{5280if (t->isNegligible() && _T2P[t->getID()].isEmpty())5281{5282// Register the first negligible node after the inappropriate node.5283if (!firstNegligible) firstNegligible = element;5284}5285else5286{5287// Clear the first negligible node5288firstNegligible = 0;5289}5290}5291}5292}5293firstNegligible = 0;5294}5295if (!empty)5296{5297regions.add(lists);5298}5299if (trace())5300{5301traceMsg(comp(), "Before alignTopOfRegion\n");5302showCISCNodeRegions(®ions, comp());5303}53045305ListIterator<TR_CISCNodeRegion> ri(®ions);5306ListIterator<TR_CISCNode> ni;5307TR_CISCNodeRegion *r, *ret = 0;5308const bool showingCandidates = isShowingCandidates();5309for (r = ri.getFirst(); r; r = ri.getNext())5310{5311if (r->isIncludeEssentialNode())5312{5313if (showingCandidates) _candidatesForRegister.add(r->clone());5314if (alignTopOfRegion(r)) // Remove nodes from the region r to correct the alignment5315{5316if (areAllNodesIncluded(r)) // If all idiom nodes are included in r5317{5318ret = r;5319break;5320}5321}5322}5323}5324if (trace())5325{5326traceMsg(comp(), "After alignTopOfRegion\n");5327showCISCNodeRegions(®ions, comp());5328traceMsg(comp(), "extractMatchingRegion ret=0x%x\n",ret);5329}53305331return ret;5332}533353345335//*****************************************************************************5336// It analyzes whether all blocks of the loop body are included in the _candidateRegion5337// It also creates _candidateBBStartEnd, which has all of TR::BBStart and TR::BBEnd nodes in the region.5338//*****************************************************************************5339bool5340TR_CISCTransformer::verifyCandidate()5341{5342ListIterator<TR_CISCNode> ci(_candidateRegion);5343TR_CISCNode *cn;5344ListHeadAndTail<TR_CISCNode> *listBB = new (trHeapMemory()) ListHeadAndTail<TR_CISCNode>(trMemory());5345ListElement <TR_CISCNode> *le;53465347// Create the list of TR::BBStart and TR::BBEnd nodes in the region.5348for (cn = ci.getFirst(); cn; cn = ci.getNext())5349{5350switch(cn->getOpcode())5351{5352case TR::BBStart:5353case TR::BBEnd:5354listBB->append(cn);5355break;5356}5357}53585359le = listBB->getListHead();5360ListIterator<TR::Block> bi(&_bblistBody);5361TR::Block *b;5362for (b = bi.getFirst(); b; b = bi.getNext())5363{5364while (true)5365{5366if (!le)5367{5368if (trace()) traceMsg(comp(), "Cannot find TR::BBStart of block_%d in the region\n",b->getNumber());5369return false; // Cannot find b in listBB5370}5371cn = le->getData();5372if (cn->getOpcode() == TR::BBStart && cn->getHeadOfTrNodeInfo()->_node->getBlock() == b)5373{5374le = le->getNextElement();5375if (!le) return false; // Cannot find TR::BBEnd5376cn = le->getData();5377if (cn->getOpcode() != TR::BBEnd || cn->getHeadOfTrNodeInfo()->_node->getBlock() != b)5378return false; // Cannot find TR::BBEnd5379le = le->getNextElement();5380break;5381}5382le = le->getNextElement();5383}5384}53855386_candidateBBStartEnd = listBB;5387return true;5388}5389539053915392//*****************************************************************************************5393// Return the number of target nodes corresponding to p5394// Count only within a loop if inLoop is true (default is false).5395//*****************************************************************************************5396int5397TR_CISCTransformer::countP2T(TR_CISCNode *p, bool inLoop)5398{5399uint32_t pID = p->getID();5400List<TR_CISCNode> *list = _P2T + pID;5401if (list->isEmpty())5402{5403return 0;5404}5405else5406{5407ListIterator<TR_CISCNode> ni(list);5408TR_CISCNode *t;5409int count = 0;5410if (inLoop)5411{5412for (t = ni.getFirst(); t; t = ni.getNext()) if (!t->isOutsideOfLoop()) count++;5413}5414else5415{5416for (t = ni.getFirst(); t; t = ni.getNext()) count++;5417}5418return count;5419}5420}5421542254235424//*****************************************************************************************5425// Return a representative target node corresponding to p5426// 0 for no-existence5427//*****************************************************************************************5428TR_CISCNode *5429TR_CISCTransformer::getP2TRep(TR_CISCNode *p)5430{5431uint32_t pID = p->getID();5432List<TR_CISCNode> *list = _P2T + pID;5433if (list->isEmpty())5434{5435return 0;5436}5437else5438{5439return list->getListHead()->getData();5440}5441}5442544354445445//*****************************************************************************************5446// Return a representative target node *in the cycle* corresponding to p5447// 0 for no-existence5448//*****************************************************************************************5449TR_CISCNode *5450TR_CISCTransformer::getP2TRepInLoop(TR_CISCNode *p, TR_CISCNode *exclude)5451{5452uint32_t pID = p->getID();5453List<TR_CISCNode> *list = _P2T + pID;5454if (list->isEmpty())5455{5456return 0;5457}5458else5459{5460ListIterator<TR_CISCNode> ni(list);5461TR_CISCNode *t;5462for (t = ni.getFirst(); t; t = ni.getNext())5463{5464if (!t->isOutsideOfLoop() && t != exclude) return t;5465}5466return 0;5467}5468}5469547054715472//*****************************************************************************************5473// Return a target node *in the cycle* corresponding to p if the target node is only one.5474// 0 for others5475//*****************************************************************************************5476TR_CISCNode *5477TR_CISCTransformer::getP2TInLoopIfSingle(TR_CISCNode *p)5478{5479uint32_t pID = p->getID();5480List<TR_CISCNode> *list = _P2T + pID;5481if (list->isEmpty())5482{5483return 0;5484}5485else5486{5487ListIterator<TR_CISCNode> ni(list);5488TR_CISCNode *t;5489TR_CISCNode *ret = 0;5490for (t = ni.getFirst(); t; t = ni.getNext())5491{5492if (!t->isOutsideOfLoop())5493{5494if (ret) return 0;5495ret = t;5496}5497}5498return ret;5499}5500}5501550255035504//*****************************************************************************************5505// It is similar to getP2TInLoopIfSingle.5506// If the given pattern node is "optional", we can skip it.5507//*****************************************************************************************5508TR_CISCNode *5509TR_CISCTransformer::getP2TInLoopAllowOptionalIfSingle(TR_CISCNode *p)5510{5511TR_CISCNode *t;5512while(true)5513{5514t = getP2TInLoopIfSingle(p);5515if (t) return t;5516if (!p->isOptionalNode()) return 0;5517p = p->getChild(0);5518if (!p) return 0;5519}5520}552155225523//*****************************************************************************************5524// Return TR::TreeTop, TR::Node, and TR::Block corresponding to the first node of the region _candidateRegion5525//*****************************************************************************************5526bool5527TR_CISCTransformer::findFirstNode(TR::TreeTop **retTree, TR::Node **retNode, TR::Block **retBlock)5528{5529ListIterator<TR_CISCNode> ci(_candidateRegion);5530TR_CISCNode *cn = NULL;5531TR::Node *trNode = NULL;5532TR::TreeTop *trTreeTop = NULL;5533TR::Block *block = NULL;55345535for (cn = ci.getFirst(); cn; cn = ci.getNext())5536{5537if (cn->getOpcode() == TR_entrynode) continue;5538if (cn->isNewCISCNode()) continue;5539if (trace() && !cn->getTrNodeInfo()->isSingleton())5540traceMsg(comp(), "!cn->getTrNodeInfo()->isSingleton(): %d\n",cn->getID());5541TR_ASSERT(cn->getTrNodeInfo()->isSingleton(), "it must correspond to a single TR node");5542struct TrNodeInfo *info = cn->getHeadOfTrNodeInfo();5543trNode = info->_node;5544if (trNode->getOpCodeValue() == TR::BBEnd) continue;5545if(cn->getOpcode() == TR::BBStart)5546{5547block = trNode->getBlock();55485549trTreeTop = info->_treeTop;5550trTreeTop = trTreeTop->getNextTreeTop();5551trNode = trTreeTop->getNode();5552if (trNode->getOpCodeValue() != TR::BBEnd)5553break;5554}5555else5556{5557trTreeTop = info->_treeTop;5558if (trTreeTop->getNode() == trNode)5559{5560if (!block)5561{5562cn = _candidateBBStartEnd->getHeadData();5563if(cn->getOpcode() == TR::BBEnd)5564{5565block = cn->getHeadOfTrNodeInfo()->_node->getBlock();5566}5567}5568break;5569}5570}5571}55725573TR_ASSERT(trNode->getOpCodeValue() != TR::BBEnd, "Assumption failed!");5574*retTree = trTreeTop;5575*retNode = trNode;5576*retBlock = block;5577if (trace()) traceMsg(comp(), "First node in candidate region - node: %p block_%d: %p\n",trNode, block->getNumber(), block);5578return true;5579}558055815582//*****************************************************************************************5583// It adds the edge from srcBlock to the destBlock, only if succList does not have it.5584//*****************************************************************************************5585void5586TR_CISCTransformer::addEdge(TR::CFGEdgeList *succList, TR::Block *srcBlock, TR::Block *destBlock)5587{5588for (auto edge = succList->begin(); edge != succList->end(); ++edge)5589{5590TR::Block * dest = toBlock((*edge)->getTo());5591TR::Block * src = toBlock((*edge)->getFrom());5592if (src == srcBlock && dest == destBlock)5593{5594return; // already exists!5595}5596}5597_cfg->addEdge(TR::CFGEdge::createEdge(srcBlock, destBlock, trMemory()));5598return;5599}560056015602//*****************************************************************************************5603// It removes the edge (from srcBlock to the destBlock) from the CFG.5604//*****************************************************************************************5605void5606TR_CISCTransformer::removeEdge(List<TR::CFGEdge> *succList, TR::Block *srcBlock, TR::Block *destBlock)5607{5608ListIterator<TR::CFGEdge> succIt(succList);5609TR::CFGEdge * edge;56105611for (edge = succIt.getCurrent(); edge != 0; edge = succIt.getNext())5612{5613TR::Block * dest = toBlock(edge->getTo());5614TR::Block * src = toBlock(edge->getFrom());5615if (src == srcBlock && dest == destBlock)5616{5617_cfg->removeEdge(edge);5618}5619}5620return;5621}562256235624//*****************************************************************************************5625// It removes the edges (from srcBlock to all blocks except for exceptDestBlock) from the CFG.5626//*****************************************************************************************5627void5628TR_CISCTransformer::removeEdgesExceptFor(TR::CFGEdgeList *succList, TR::Block *srcBlock, TR::Block *exceptDestBlock)5629{5630for (auto edge = succList->begin(); edge != succList->end();)5631{5632TR::Block * dest = toBlock((*edge)->getTo());5633TR::Block * src = toBlock((*edge)->getFrom());5634if (src == srcBlock && dest != exceptDestBlock)5635{5636_cfg->removeEdge(*(edge++));5637}5638else5639++edge;5640}5641return;5642}564356445645//*****************************************************************************************5646// Set the edge from srcBlock to destBlock into the CFG.5647//*****************************************************************************************5648void5649TR_CISCTransformer::setEdge(TR::CFGEdgeList *succList, TR::Block *srcBlock, TR::Block *destBlock)5650{5651addEdge(succList, srcBlock, destBlock);5652removeEdgesExceptFor(succList, srcBlock, destBlock);5653}565456555656//*****************************************************************************************5657// Set two edges from srcBlock to destBlock0 and destBlock1 into the CFG.5658//*****************************************************************************************5659void5660TR_CISCTransformer::setEdges(TR::CFGEdgeList *succList, TR::Block *srcBlock, TR::Block *destBlock0, TR::Block *destBlock1)5661{5662bool existEdge0, existEdge1;5663existEdge0 = existEdge1 = false;5664int32_t count0, count1;56655666for (auto edge = succList->begin(); edge != succList->end(); ++edge)5667{5668TR::Block * dest = toBlock((*edge)->getTo());5669TR::Block * src = toBlock((*edge)->getFrom());5670if (src == srcBlock)5671{5672if (dest == destBlock0) existEdge0 = true;5673else if (dest == destBlock1) existEdge1 = true;5674}5675}56765677if (!existEdge1) addEdge(succList, srcBlock, destBlock1);5678if (!existEdge0) addEdge(succList, srcBlock, destBlock0);56795680count0 = count1 = 0;5681for (auto edge = succList->begin(); edge != succList->end();)5682{5683TR::Block * dest = toBlock((*edge)->getTo());5684TR::Block * src = toBlock((*edge)->getFrom());5685if (src == srcBlock)5686{5687if (dest == destBlock0)5688{5689if (++count0 >= 2) _cfg->removeEdge(*(edge++));5690else ++edge;5691}5692else if (dest == destBlock1)5693{5694if (++count1 >= 2) _cfg->removeEdge(*(edge++));5695else ++edge;5696}5697else5698{5699_cfg->removeEdge(*(edge++));5700}5701}5702else5703++edge;5704}5705}570657075708//*****************************************************************************5709// It analyzes whether the successor of the target loop is single.5710// Even if there are multiple successors for the loop, it tries to analyze5711// whether they can be merged to a single successor.5712// It returns:5713// * non-null: the single successor block5714// * null: multiple successors5715//*****************************************************************************5716TR::Block *5717TR_CISCTransformer::analyzeSuccessorBlock(TR::Node *ignoreTree)5718{5719TR::Block *target = 0;5720if (_bblistSucc.isSingleton())5721{5722target = _bblistSucc.getListHead()->getData(); // obvious5723}5724else5725{5726ListIterator<TR::Block> bbi1(&_bblistSucc), bbi2(&_bblistSucc);5727TR::Block *b1, *b2;57285729// Analyze successors will be merged to a single block without any additional instruction5730for (b1 = bbi1.getFirst(); b1; b1 = bbi1.getNext())5731{5732target = 0;5733for (b2 = bbi2.getFirst(); b2; b2 = bbi2.getNext())5734{5735if (b1 != b2)5736{5737TR::Node *gotonode = b2->getFirstRealTreeTop()->getNode();5738if (gotonode->getOpCodeValue() == TR::Goto &&5739gotonode->getBranchDestination()->getNode()->getBlock() == b1)5740{5741target = b1;5742}5743else if (gotonode->getOpCodeValue() == TR::BBEnd &&5744b2->getNextBlock() == b1)5745{5746target = b2;5747}5748else5749{5750target = 0;5751break;5752}5753}5754}5755if (target) break;5756}5757if (!target)5758{5759for (b1 = bbi1.getFirst(); b1; b1 = bbi1.getNext())5760{5761TR::Block *gotoTarget = skipGoto(b1, ignoreTree);5762if (!target)5763{5764target = gotoTarget;5765}5766else5767{5768if (target != gotoTarget)5769{5770target = 0;5771break;5772}5773}5774}5775}57765777// Especially for two successors, I'll analyze more deeply.5778/*5779if (!target && _bblistSucc.isDoubleton())5780{5781b1 = bbi1.getFirst();5782b2 = bbi1.getNext();5783TR::Block *cur1 = b1, *cur2 = b2;5784while(true)5785{5786cur1 = skipGoto(cur1, ignoreTree);5787cur2 = skipGoto(cur2, ignoreTree);5788if (cur1 == cur2)5789{5790target = b1;5791break;5792}5793if (!compareBlockTrNodeTree(cur1, cur2)) break;5794if (!(cur1 = getSuccBlockIfSingle(cur1))) break;5795if (!(cur2 = getSuccBlockIfSingle(cur2))) break;5796}5797}5798*/5799}58005801if (trace())5802{5803if (target == 0)5804traceMsg(comp(), "!! TR_CISCTransformer::analyzeSuccessorBlock returns 0!\n");5805}58065807return target;5808}580958105811//*****************************************************************************************5812// It sets one successor "target" to the block.5813//*****************************************************************************************5814void5815TR_CISCTransformer::setSuccessorEdge(TR::Block *block, TR::Block *target)5816{5817if (target == 0)5818target = analyzeSuccessorBlock();58195820TR_ASSERT(target != 0, "target must be non-null!!");58215822TR::Node *gotonode = block->getLastRealTreeTop()->getNode();5823if (gotonode->getOpCodeValue() != TR::Goto)5824{5825TR::TreeTop * branchAroundTreeTop = TR::TreeTop::create(comp(), TR::Node::create(gotonode, TR::Goto, 0, target->getEntry()));5826block->getLastRealTreeTop()->join(branchAroundTreeTop);5827branchAroundTreeTop->join(block->getExit());5828}5829setEdge(&block->getSuccessors(), block, target);5830}583158325833//*****************************************************************************************5834// Search for the TR::Block in _bblistSucc except for target0 and target15835//*****************************************************************************************5836TR::Block *5837TR_CISCTransformer::searchOtherBlockInSuccBlocks(TR::Block *target0, TR::Block *target1)5838{5839ListIterator<TR::Block> bbi1(&_bblistSucc);5840TR::Block *b;5841TR::Block *ret = 0;5842for (b = bbi1.getFirst(); b; b = bbi1.getNext())5843{5844if (b == target0 || b == target1)5845continue;5846if (ret)5847return 0; // Failure - Found two non-target0/target1 blocks.5848ret = b;5849}5850return ret;5851}585258535854//*****************************************************************************************5855// Search for the TR::Block in _bblistSucc except for target05856//*****************************************************************************************5857TR::Block *5858TR_CISCTransformer::searchOtherBlockInSuccBlocks(TR::Block *target0)5859{5860if (_bblistSucc.isDoubleton())5861{5862ListIterator<TR::Block> bbi1(&_bblistSucc);5863TR::Block *first = bbi1.getFirst();5864TR::Block *second = bbi1.getNext();5865if (first == target0)5866return second;5867else if (second == target0)5868return first;5869}5870return 0;5871}587258735874//*****************************************************************************************5875// It sets two successors "target0" and "target1" to the block.5876//*****************************************************************************************5877TR::Block *5878TR_CISCTransformer::setSuccessorEdges(TR::Block *block, TR::Block *target0, TR::Block *target1)5879{5880TR::TreeTop * oldNext = block->getExit()->getNextTreeTop();5881if (target0 == 0 || target1 == 0) // automatic detection5882{5883if (target0 == 0)5884target0 = searchOtherBlockInSuccBlocks(target1);5885else5886target1 = searchOtherBlockInSuccBlocks(target0);5887TR_ASSERT(target0 && target1, "error");5888}5889if (trace())5890{5891traceMsg(comp(), "setSuccessorEdges for block_%d [%p]: tgt0=%d tgt1=%d\n", block->getNumber(), block, target0->getNumber(),target1->getNumber());5892}58935894if (!oldNext ||5895oldNext->getNode()->getBlock() != target0)5896{5897TR::Node *lastnode = block->getLastRealTreeTop()->getNode();5898TR::Block * gotoBlock = TR::Block::createEmptyBlock(lastnode, comp(), block->getFrequency(), block);5899_cfg->addNode(gotoBlock);5900TR::TreeTop * gotoEntry = gotoBlock->getEntry();5901TR::TreeTop * gotoExit = gotoBlock->getExit();5902TR::TreeTop * branchTreeTop = TR::TreeTop::create(comp(), TR::Node::create(lastnode, TR::Goto, 0, target0->getEntry()));5903gotoEntry->insertAfter(branchTreeTop);59045905block->getExit()->join(gotoEntry);5906gotoExit->join(oldNext);59075908_cfg->setStructure(0);5909_cfg->addEdge(TR::CFGEdge::createEdge(gotoBlock, target0, trMemory()));5910setEdges(&block->getSuccessors(), block, gotoBlock, target1);5911return gotoBlock;5912}5913else5914{5915setEdges(&block->getSuccessors(), block, target0, target1);5916return block;5917}5918}591959205921//*****************************************************************************************5922// It returns:5923// * 0: if successor is not single5924// * non-null: the successor block5925//*****************************************************************************************5926TR::Block *5927TR_CISCTransformer::getSuccBlockIfSingle(TR::Block *block)5928{5929if (!(block->getSuccessors().size() == 1)) return 0;5930TR::CFGEdge *edge = block->getSuccessors().front();5931return toBlock(edge->getTo());5932}5933593459355936//*****************************************************************************************5937// It searches for a combination of predecessors of "block" and _bblistPred.5938//*****************************************************************************************5939TR::Block *5940TR_CISCTransformer::searchPredecessorOfBlock(TR::Block *block)5941{5942for (auto edge = block->getPredecessors().begin(); edge != block->getPredecessors().end(); ++edge)5943{5944TR::Block *from = toBlock((*edge)->getFrom());5945if (_bblistPred.find(from))5946{5947return from; // find5948}5949}5950return NULL;5951}595259535954//*****************************************************************************************5955// It decides whether we generate versioning code and modifies the target blocks.5956// It returns the block that fast code will be appended.5957//*****************************************************************************************5958TR::Block *5959TR_CISCTransformer::modifyBlockByVersioningCheck(TR::Block *block, TR::TreeTop *startTop, TR::Node *lengthNode, List<TR::Node> *guardList)5960{5961uint16_t versionLength = _P->getVersionLength();5962List<TR::Node> guardListLocal(trMemory());5963// Create versioning if necessary5964if (versionLength >= 1) // Skip if versionLength is less than 1.5965{5966if (guardList == 0) guardList = &guardListLocal;5967ListAppender<TR::Node> appender(guardList);5968if (lengthNode->getOpCodeValue() == TR::i2l) lengthNode = lengthNode->getAndDecChild(0);5969if (lengthNode->getOpCode().isLong())5970{5971TR::Node *lconst = TR::Node::create(lengthNode, TR::lconst, 0, 0);5972lconst->setLongInt(versionLength);5973appender.add(TR::Node::createif(TR::iflcmple, lengthNode, lconst));5974}5975else5976{5977TR_ASSERT(lengthNode->getOpCode().isInt(), "Error");5978appender.add(TR::Node::createif(TR::ificmple, lengthNode,5979TR::Node::create(lengthNode, TR::iconst, 0, versionLength)));5980}5981}5982return modifyBlockByVersioningCheck(block, startTop, guardList);5983}5984598559865987//*****************************************************************************************5988// It decides whether we generate versioning code and modifies the target blocks.5989// It returns the block that fast code will be appended.5990//*****************************************************************************************5991TR::Block *5992TR_CISCTransformer::modifyBlockByVersioningCheck(TR::Block *block, TR::TreeTop *startTop, List<TR::Node> *guardList)5993{5994TR::CFG *cfg = comp()->getFlowGraph();5995TR::Block *fastpath;59965997// Create versioning if necessary5998if (guardList && !guardList->isEmpty())5999{6000cfg->setStructure(0);6001fastpath = TR::Block::createEmptyBlock(startTop->getNode(), comp(), block->getFrequency(), block);6002TR::Block *slowpad;6003TR::Node *cmp;6004TR::Block *orgPrevBlock = 0;6005ListIterator<TR::Node> guardI(guardList);6006TR::Block *firstBlock = 0;6007TR::Block *lastBlock = 0;60086009// Append versioning check6010// Result: orgPrevBlock->block->fastpath->slowpad60116012if (block->getFirstRealTreeTop() == startTop)6013{6014// search the entry pad6015orgPrevBlock = searchPredecessorOfBlock(block);6016}60176018// Insert the fastpath + versioning tress in between the previous block and current block, unless:6019// 1. We do not find a previous block.6020// 2. The previous block does not fall-through into current block (i.e. reached by taken branch).6021// In these two cases, we will just split the current block, and insert the fastpath + versioning trees6022// in between.6023if (!orgPrevBlock || orgPrevBlock->getNextBlock() != block)6024{6025orgPrevBlock = block;6026slowpad = block->split(startTop, cfg, true);6027}6028else6029{6030slowpad = block;6031}60326033TR::TreeTop * orgPrevTreeTop = orgPrevBlock->getExit();6034TR::Node *lastOrgPrevRealNode = orgPrevBlock->getLastRealTreeTop()->getNode();6035TR::TreeTop * orgNextTreeTop = orgPrevTreeTop->getNextTreeTop();6036if (orgNextTreeTop)6037{6038TR::Block * orgNextBlock = orgNextTreeTop->getNode()->getBlock();6039cfg->insertBefore(fastpath, orgNextBlock);6040}6041else6042{6043cfg->addNode(fastpath);6044}60456046firstBlock = fastpath;6047for (cmp = guardI.getFirst(); cmp; cmp = guardI.getNext())6048{6049block = TR::Block::createEmptyBlock(startTop->getNode(), comp(), block->getFrequency(), block);6050if (!lastBlock) lastBlock = block;6051TR_ASSERT(cmp->getOpCode().isIf(), "Not implemeted yet");6052cmp->setBranchDestination(slowpad->getEntry());6053block->append(TR::TreeTop::create(comp(), cmp));6054cfg->insertBefore(block, firstBlock);6055firstBlock = block;6056}60576058orgPrevTreeTop->join(firstBlock->getEntry());6059cfg->addEdge(orgPrevBlock, firstBlock);6060cfg->removeEdge(orgPrevBlock, slowpad);60616062if (trace()) traceMsg(comp(), "modifyBlockByVersioningCheck: orgPrevBlock=%d firstBlock=%d lastBlock=%d fastpath=%d slowpad=%d orgNextTreeTop=%x\n",6063orgPrevBlock->getNumber(), firstBlock->getNumber(), lastBlock->getNumber(), fastpath->getNumber(), slowpad->getNumber(), orgNextTreeTop);60646065if (lastOrgPrevRealNode->getOpCode().getOpCodeValue() == TR::Goto)6066{6067TR_ASSERT(lastOrgPrevRealNode->getBranchDestination() == slowpad->getEntry(), "Error");6068lastOrgPrevRealNode->setBranchDestination(firstBlock->getEntry());6069}6070}6071else6072{6073// Generate no versioning code6074TR::TreeTop *lastRealTT = block->getLastRealTreeTop();6075if (lastRealTT->getNode()->getOpCodeValue() == TR::Goto)6076{6077if (startTop != lastRealTT)6078{6079TR::TreeTop *last = removeAllNodes(startTop, lastRealTT);6080last->join(lastRealTT);6081}6082block->split(lastRealTT, cfg);6083}6084else6085{6086TR::TreeTop *last = removeAllNodes(startTop, block->getExit());6087last->join(block->getExit());6088}6089fastpath = block;6090}6091return fastpath;6092}609360946095//*****************************************************************************************6096// It clones the loop body.6097//*****************************************************************************************6098TR::Block *6099TR_CISCTransformer::cloneLoopBodyForPeel(TR::Block **firstBlock, TR::Block **lastBlock, TR_CISCNode *cmpifCISCNode)6100{6101TR::CFG *cfg = comp()->getFlowGraph();6102cfg->setStructure(0);6103TR_BlockCloner cloner(cfg);6104*firstBlock = cloner.cloneBlocks(_bblistBody.getListHead()->getData(),_bblistBody.getLastElement()->getData());6105*lastBlock = cloner.getLastClonedBlock();6106if (cmpifCISCNode)6107{6108struct TrNodeInfo *repNode = cmpifCISCNode->getHeadOfTrNodeInfo();6109TR::Block *modifyBlock = cloner.getToBlock(repNode->_block);6110TR_ASSERT(modifyBlock != repNode->_block, "error");6111TR::Node *modifyNode = modifyBlock->getLastRealTreeTop()->getNode();6112TR_ASSERT(modifyNode->getOpCode().isIf(), "error");6113TR::Node::recreate(modifyNode, (TR::ILOpCodes)cmpifCISCNode->getOpcode());6114modifyNode->setBranchDestination(cmpifCISCNode->getDestination());6115}6116return *firstBlock;6117}61186119//*****************************************************************************************6120// It appends blocks after "block".6121//*****************************************************************************************6122TR::Block *6123TR_CISCTransformer::appendBlocks(TR::Block *block, TR::Block *firstBlock, TR::Block *lastBlock)6124{6125TR::CFG *cfg = comp()->getFlowGraph();6126cfg->setStructure(0);6127TR::Block *ret;61286129TR::TreeTop *orgNextTreeTop = block->getExit()->getNextTreeTop();6130if (orgNextTreeTop)6131{6132TR::Block * orgNextBlock = orgNextTreeTop->getEnclosingBlock();6133ret = TR::Block::createEmptyBlock(block->getExit()->getNode(), comp(), block->getFrequency(), block);6134cfg->insertBefore(ret, orgNextBlock);6135}6136else6137{6138TR_ASSERT(block->getLastRealTreeTop()->getNode()->getOpCode().isBranch(), "error");6139ret = block->split(block->getLastRealTreeTop(), cfg);6140}6141cfg->join(block, firstBlock);6142cfg->join(lastBlock, ret);6143setSuccessorEdge(block, firstBlock);6144return ret;6145}614661476148//*****************************************************************************************6149// Check whether the node is a dead store by using useDefInfo6150//*****************************************************************************************6151bool6152TR_CISCTransformer::isDeadStore(TR::Node *node)6153{6154if (node->getOpCode().isStoreDirect())6155{6156if (!node->getSymbol()->isAutoOrParm())6157return false;61586159TR_UseDefInfo *useDefInfo = _useDefInfo;6160const int32_t firstUseIndex = useDefInfo->getFirstUseIndex();6161int32_t useDefIndex = node->getUseDefIndex();6162//TR_ASSERT(useDefInfo->isDefIndex(useDefIndex), "error!");6163if (!useDefInfo->isDefIndex(useDefIndex)) return false;6164if (useDefInfo->getUsesFromDefIsZero(useDefIndex)) return true;6165}6166return false;6167}616861696170//*****************************************************************************************6171// It basically skips blocks containing only a goto statement.6172// It can additionally skip nodes for dead stores and the node specified by "ignoreTree"6173//*****************************************************************************************6174TR::Block *6175TR_CISCTransformer::skipGoto(TR::Block *block, TR::Node *ignoreTree)6176{6177while(true)6178{6179TR::TreeTop *treeTop = block->getFirstRealTreeTop();6180TR::Node *gotonode;6181while(true)6182{6183gotonode = treeTop->getNode();6184if (!isDeadStore(gotonode) &&6185(ignoreTree == 0 || !compareTrNodeTree(gotonode, ignoreTree))) break;6186treeTop = treeTop->getNextRealTreeTop();6187}6188TR::ILOpCodes opcode = gotonode->getOpCodeValue();6189if (opcode == TR::Goto)6190{6191block = gotonode->getBranchDestination()->getNode()->getBlock();6192}6193else if (opcode == TR::BBEnd)6194{6195treeTop = treeTop->getNextRealTreeTop();6196block = treeTop->getNode()->getBlock();6197}6198else6199{6200break;6201}6202}6203return block;6204}620562066207//*****************************************************************************************6208// It searches the "target" node in the tree from "top".6209// Return values are stored into *retParent and *retChildNum.6210//*****************************************************************************************6211bool6212TR_CISCTransformer::searchNodeInTrees(TR::Node *top, TR::Node *target, TR::Node **retParent, int *retChildNum)6213{6214int i;6215for (i = top->getNumChildren(); --i >= 0; )6216{6217if (compareTrNodeTree(top->getChild(i), target))6218{6219if (retParent) *retParent = top;6220if (retChildNum) *retChildNum = i;6221return true;6222}6223}6224for (i = top->getNumChildren(); --i >= 0; )6225{6226if (searchNodeInTrees(top->getChild(i), target, retParent, retChildNum)) return true;6227}6228return false;6229}623062316232//*****************************************************************************************6233// Analyze whether node trees a and b are equivalent.6234//*****************************************************************************************6235bool6236TR_CISCTransformer::compareTrNodeTree(TR::Node *a, TR::Node *b)6237{6238if (a == b) return true;6239if (a->getOpCodeValue() != b->getOpCodeValue()) return false;6240if (a->getOpCode().hasSymbolReference() &&6241(a->getSymbolReference()->getReferenceNumber() != b->getSymbolReference()->getReferenceNumber()))6242return false;62436244if (a->getOpCode().isLoadConst())6245{6246switch(a->getOpCodeValue())6247{6248case TR::iconst:6249if (a->getUnsignedInt() != b->getUnsignedInt()) return false;6250break;6251case TR::lconst:6252if (a->getUnsignedLongInt() != b->getUnsignedLongInt()) return false;6253break;6254case TR::aconst:6255if (a->getAddress() != b->getAddress()) return false;6256break;6257case TR::fconst:6258if (a->getFloat() != b->getFloat()) return false;6259break;6260case TR::dconst:6261if (a->getDouble() != b->getDouble()) return false;6262break;6263case TR::bconst:6264if (a->getUnsignedByte() != b->getUnsignedByte()) return false;6265break;6266case TR::sconst:6267if (a->getShortInt() != b->getShortInt()) return false;6268break;6269default:6270return false;6271}6272}6273int32_t numChild = a->getNumChildren();6274if (numChild != b->getNumChildren()) return false;6275if (numChild == 2 && a->getOpCode().isCommutative())6276{6277if ((!compareTrNodeTree(a->getChild(0), b->getChild(0)) ||6278!compareTrNodeTree(a->getChild(1), b->getChild(1))) &&6279(!compareTrNodeTree(a->getChild(0), b->getChild(1)) ||6280!compareTrNodeTree(a->getChild(1), b->getChild(0))))6281return false;6282}6283else6284{6285int32_t i;6286for (i = 0; i < numChild; i++)6287{6288if (!compareTrNodeTree(a->getChild(i), b->getChild(i))) return false;6289}6290}6291return true;6292}629362946295//*****************************************************************************************6296// Analyze whether node trees in blocks a and b are equivalent.6297//*****************************************************************************************6298bool6299TR_CISCTransformer::compareBlockTrNodeTree(TR::Block *a, TR::Block *b)6300{6301if (a == b) return true;6302TR::TreeTop *ttA = a->getFirstRealTreeTop();6303TR::TreeTop *ttB = b->getFirstRealTreeTop();6304TR::TreeTop *lastA = a->getLastRealTreeTop();6305while(true)6306{6307if (!compareTrNodeTree(ttA->getNode(), ttB->getNode())) return false;6308if (ttA == lastA) break;6309ttA = ttA->getNextRealTreeTop();6310if (ttA->getNode()->getOpCodeValue() == TR::Goto) break;6311ttB = ttB->getNextRealTreeTop();6312if (ttB->getNode()->getOpCodeValue() == TR::Goto) break;6313}6314return true;6315}631663176318//*****************************************************************************************6319// Append nodes in the list _beforeInsertions into the block.6320// Restriction: It can create a single new block automatically, but it doesn't create multiple ones.6321//*****************************************************************************************6322TR::Block *6323TR_CISCTransformer::insertBeforeNodes(TR::Block *block)6324{6325ListIterator<TR::Node> ni(&_beforeInsertions);6326TR::Node *n, *last = 0;6327int32_t count = 0;6328for (n = ni.getFirst(); n; n = ni.getNext())6329{6330// Insert nodes into given block6331TR::TreeTop *top = TR::TreeTop::create(comp(), n);6332block->getLastRealTreeTop()->join(top);6333top->join(block->getExit());6334last = n;6335count++;6336}6337if (trace()) traceMsg(comp(), "insertBeforeNodes added %d node(s) to block_%d [%p]\n", count, block->getNumber(), block);6338if (last && last->getOpCode().isBranch())6339{6340TR::CFG *cfg = comp()->getFlowGraph();6341TR::TreeTop *orgNext = block->getExit()->getNextTreeTop();6342TR::Block *newBlock = TR::Block::createEmptyBlock(last, comp(), block->getFrequency(), block);6343cfg->setStructure(0);6344cfg->addNode(newBlock);6345newBlock->getExit()->join(orgNext);6346block->getExit()->join(newBlock->getEntry());63476348cfg->addSuccessorEdges(newBlock);6349bool isRemove = true;63506351TR::Block * orgNextBlock = orgNext->getNode()->getBlock();6352TR::Block * branchDestinationBlock = 0;6353if (last->getOpCode().isIf())6354branchDestinationBlock = last->getBranchDestination()->getEnclosingBlock();6355// Copy edges to avoid removing necessary blocks6356for (auto edge = block->getSuccessors().begin(); edge != block->getSuccessors().end(); ++edge)6357{6358TR::Block *to = toBlock((*edge)->getTo());6359if (to != branchDestinationBlock &&6360to != orgNextBlock)6361{6362if (trace()) traceMsg(comp(), "insertBeforeNodes added the edge (%d, %d).\n",newBlock->getNumber(),to->getNumber());6363addEdge(&newBlock->getSuccessors(), newBlock, to);6364}6365}63666367if (last->getOpCode().isIf())6368{6369setSuccessorEdges(block, newBlock, branchDestinationBlock);6370if (orgNext->getNode()->getBlock() == branchDestinationBlock)6371isRemove = false;6372}6373else6374{6375setSuccessorEdge(block, newBlock);6376}63776378if (isRemove)6379cfg->removeEdge(block, orgNext->getNode()->getBlock());6380if (trace()) traceMsg(comp(), "insertBeforeNodes created block_%d [%p]\n", newBlock->getNumber(), newBlock);6381block = newBlock;6382}6383return block;6384}63856386//*****************************************************************************************6387// Search for store to a given symref in the insert before list6388// @param symRefNumberToBeMatched The symbol reference number to be matched.6389// @return The TR::Node for the store with same symbol reference number. NULL if not found.6390//*****************************************************************************************6391TR::Node *6392TR_CISCTransformer::findStoreToSymRefInInsertBeforeNodes(int32_t symRefNumberToBeMatched)6393{6394ListIterator<TR::Node> ni(&_beforeInsertions);6395TR::Node *n = NULL;63966397for (n = ni.getFirst(); n; n = ni.getNext())6398{6399if (n->getOpCode().isStore() && n->getOpCode().hasSymbolReference() && n->getSymbolReference()->getReferenceNumber() == symRefNumberToBeMatched)6400return n;6401}64026403return NULL;6404}64056406//*****************************************************************************************6407// Prepend/Append nodes in the list l into the block.6408// Restriction: It creates no new block automatically.6409//*****************************************************************************************6410TR::Block *6411TR_CISCTransformer::insertAfterNodes(TR::Block *block, List<TR::Node> *l, bool prepend)6412{6413ListIterator<TR::Node> ni(l);6414TR::Node *n;6415int32_t count = 0;6416if (prepend)6417{6418TR::TreeTop *last, *orgNext;6419last = block->getEntry();6420orgNext = last->getNextTreeTop();6421for (n = ni.getFirst(); n; n = ni.getNext())6422{6423TR::TreeTop *top = TR::TreeTop::create(comp(), n);6424last->join(top);6425last = top;6426count++;6427}6428last->join(orgNext);6429}6430else6431{6432for (n = ni.getFirst(); n; n = ni.getNext())6433{6434TR::TreeTop *top = TR::TreeTop::create(comp(), n);6435block->append(top);6436count++;6437}6438}6439if (trace()) traceMsg(comp(), "insertAfterNodes adds %d node(s)\n", count);6440return block;6441}64426443//*****************************************************************************************6444// Add nodes for idiom independent transformations6445//*****************************************************************************************6446TR::Block *6447TR_CISCTransformer::insertAfterNodes(TR::Block *block, bool prepend)6448{6449return insertAfterNodes(block, &_afterInsertions, prepend);6450}64516452// Add nodes for idiom specific transformations6453TR::Block *6454TR_CISCTransformer::insertAfterNodesIdiom(TR::Block *block, int32_t pos, bool prepend)6455{6456return insertAfterNodes(block, _afterInsertionsIdiom + pos, prepend);6457}645864596460// Remove all nodes from TreeTops from "start" to "end"6461TR::TreeTop *6462TR_CISCTransformer::removeAllNodes(TR::TreeTop *start, TR::TreeTop *end)6463{6464TR::TreeTop *ret = start->getPrevTreeTop();6465#if 06466for (; start != end; start = start->getNextTreeTop())6467{6468start->getNode()->removeAllChildren();6469start->setNode(0);6470}6471#endif6472TR::TreeTop *next;6473for (; start != end; start = next)6474{6475next = start->getNextTreeTop();6476TR::TransformUtil::removeTree(comp(), start);6477if (next == end)6478break;6479}6480return ret;6481}648264836484//*****************************************************************************6485// These functions create function tables for TRT or TRxx instructions.6486//*****************************************************************************6487bool6488TR_CISCTransformer::analyzeBoolTable(TR_BitVector **bv, TR::TreeTop **retSameExit, TR_CISCNode *boolTable, TR_BitVector *defBV, TR_CISCNode *defNode, TR_CISCNode *ignoreNode, int32_t bvoffset, int32_t allocBVSize)6489{6490List<TR_CISCNode> *P2T = _P2T;6491List<TR_CISCNode> *T2P = _T2P;6492TR_CISCGraph *P = _P;6493TR_CISCGraph *T = _T;6494ListIterator<TR_CISCNode> ni(_candidateRegion);6495TR::TreeTop *exitTreeTop;6496bool initExitTreeTop;6497TR_CISCNode * exitnode;6498TR_CISCNode *n;6499int32_t i;6500TR_BitVector takenBV(allocBVSize, trMemory(), stackAlloc), ntakenBV(allocBVSize, trMemory(), stackAlloc), tmpBV(allocBVSize, trMemory(), stackAlloc);6501TR_BitVector orgBV(allocBVSize, trMemory(), stackAlloc);65026503//6504// Perform a forward dataflow analysis to compute exit conditions of the loop6505//6506for (i = T->getNumNodes(); --i >= 0; )6507bv[i] = new (trStackMemory()) TR_BitVector(allocBVSize, trMemory(), stackAlloc);6508exitnode = T->getExitNode();6509exitTreeTop = 0;6510initExitTreeTop = false;6511int loopCount = 0;6512bool doAgain = true;6513while(doAgain)6514{6515if (loopCount++ > 10)6516{6517TR_ASSERT(false, "analyzeBoolTable: infinite loop!\n");6518return false;6519}6520doAgain = false;6521for (n = ni.getFirst(); n; n = ni.getNext())6522{6523int32_t pos;6524uint32_t tID = n->getID();6525TR_CISCNode *p = getT2PheadRep(tID);6526bool doPropagateSuccs = true;6527if (analyzeT2P(n, defNode) & _T2P_MatchMask)6528{6529p = defNode;6530if (bv[tID]->isEmpty()) *bv[tID] = *defBV;6531}6532else if (p == boolTable)6533{6534if (n->getOpcode() == TR::Case)6535{6536if (!n->isValidOtherInfo())6537{6538// default case6539TR_ASSERT(n->getParents()->isSingleton(), "error!!!");6540TR_CISCNode *swbody = n->getHeadOfParents();6541takenBV = *bv[tID];6542for (i = swbody->getNumChildren(); --i >= 2; )6543{6544pos = swbody->getChild(i)->getOtherInfo()+bvoffset;6545takenBV.reset(pos);6546}6547}6548else6549{6550pos = n->getOtherInfo()+bvoffset;6551takenBV.empty();6552takenBV.set(pos);6553if (!n->isOutsideOfLoop())6554{6555TR::TreeTop *thisDestination = n->getDestination();6556if (!initExitTreeTop)6557{6558initExitTreeTop = true;6559exitTreeTop = thisDestination;6560}6561if (exitTreeTop != thisDestination)6562{6563if (trace() && exitTreeTop)6564{6565traceMsg(comp(), "Succ(0) is not exit node. ID:%d (TR::Case)\n", n->getID());6566}6567exitTreeTop = 0;6568}6569}6570}65716572if (doAgain)6573{6574*bv[n->getSucc(0)->getID()] |= takenBV;6575}6576else6577{6578int id;6579id = n->getSucc(0)->getID();6580orgBV = *bv[id];6581*bv[id] |= takenBV;6582if (!(orgBV == *bv[id])) doAgain = true;6583}65846585doPropagateSuccs = false; // already done6586}6587else6588{6589TR_CISCNode *child1 = n->getChild(1);6590while (!child1->isInterestingConstant())6591{6592child1 = child1->getNodeIfSingleChain();6593if (!child1)6594{6595if (trace()) traceMsg(comp(), "analyzeBoolTable failed for %p. (no single chain)\n", n->getChild(1));6596return false;6597}6598if (!child1->isStoreDirect())6599{6600if (trace()) traceMsg(comp(), "analyzeBoolTable failed for %p. (%p is not store)\n", n->getChild(1), child1);6601return false;6602}6603child1 = child1->getChild(0);6604}6605pos = child1->getOtherInfo()+bvoffset;66066607switch(n->getOpcode())6608{6609case TR::ifbcmpeq:6610case TR::ificmpeq:6611takenBV.empty();6612ntakenBV = *bv[tID];6613if (bv[tID]->isSet(pos))6614{6615takenBV.set(pos);6616ntakenBV.reset(pos);6617}6618break;6619case TR::ifbcmpne:6620case TR::ificmpne:6621takenBV = *bv[tID];6622ntakenBV.empty();6623if (bv[tID]->isSet(pos))6624{6625takenBV.reset(pos);6626ntakenBV.set(pos);6627}6628break;6629case TR::ifbcmplt:6630case TR::ifsucmplt:6631case TR::ificmplt:6632takenBV = *bv[tID];6633ntakenBV = takenBV;6634tmpBV.empty();6635tmpBV.setAll(0, pos-1);6636takenBV &= tmpBV;6637ntakenBV -= tmpBV;6638break;6639case TR::ifbcmpge:6640case TR::ifsucmpge:6641case TR::ificmpge:6642takenBV = *bv[tID];6643ntakenBV = takenBV;6644tmpBV.empty();6645tmpBV.setAll(0, pos-1);6646ntakenBV &= tmpBV;6647takenBV -= tmpBV;6648break;6649case TR::ifbcmpgt:6650case TR::ifsucmpgt:6651case TR::ificmpgt:6652takenBV = *bv[tID];6653ntakenBV = takenBV;6654tmpBV.empty();6655tmpBV.setAll(0, pos);6656ntakenBV &= tmpBV;6657takenBV -= tmpBV;6658break;6659case TR::ifbcmple:6660case TR::ifsucmple:6661case TR::ificmple:6662takenBV = *bv[tID];6663ntakenBV = takenBV;6664tmpBV.empty();6665tmpBV.setAll(0, pos);6666takenBV &= tmpBV;6667ntakenBV -= tmpBV;6668break;6669default:6670TR_ASSERT(false, "not implemented yet");6671// not implemented yet6672return false;6673}66746675if (doAgain)6676{6677*bv[n->getSucc(0)->getID()] |= ntakenBV;6678*bv[n->getSucc(1)->getID()] |= takenBV;6679}6680else6681{6682int id;6683id = n->getSucc(0)->getID();6684orgBV = *bv[id];6685*bv[id] |= ntakenBV;6686if (!(orgBV == *bv[id])) doAgain = true;66876688id = n->getSucc(1)->getID();6689orgBV = *bv[id];6690*bv[id] |= takenBV;6691if (!(orgBV == *bv[id])) doAgain = true;6692}66936694doPropagateSuccs = false; // already done66956696if (!n->isOutsideOfLoop())6697{6698TR::TreeTop *thisDestination = n->getDestination();6699if (!initExitTreeTop)6700{6701if (trace())6702traceMsg(comp(), "analyzeBoolTable - Delimiter checking node %d targets treetop: %p block_%d: %p\n",6703n->getID(), thisDestination, thisDestination->getEnclosingBlock()->getNumber(), thisDestination->getEnclosingBlock());6704initExitTreeTop = true;6705exitTreeTop = thisDestination;6706}6707if (exitTreeTop != thisDestination)6708{6709if (trace() && exitTreeTop)6710traceMsg(comp(), "analyzeBoolTable - found conflicting successors. Delimiter checking node %d targets treetop: %p (!= %p) block_%d: %p\n",6711n->getID(), thisDestination, exitTreeTop, thisDestination->getEnclosingBlock()->getNumber(), thisDestination->getEnclosingBlock());67126713exitTreeTop = NULL;6714}6715}6716}6717}6718else if (p == ignoreNode)6719{6720doPropagateSuccs = false; // ignore6721}6722else6723{6724if (p &&6725p->getNumSuccs() >= 2)6726{6727for (i = p->getNumSuccs(); --i >= 0; )6728{6729if (p->getSucc(i)->getOpcode() == TR_exitnode)6730{6731doPropagateSuccs = false; // ignore6732break;6733}6734}6735}6736}67376738if (doPropagateSuccs)6739{6740if (doAgain)6741{6742for (i = n->getNumSuccs(); --i >= 0; )6743*bv[n->getSucc(i)->getID()] |= *bv[tID];6744}6745else6746{6747for (i = n->getNumSuccs(); --i >= 0; )6748{6749int id = n->getSucc(i)->getID();6750orgBV = *bv[id];6751*bv[id] |= *bv[tID];6752if (!(orgBV == *bv[id])) doAgain = true;6753}6754}6755}6756}6757}67586759if (retSameExit)6760{6761*retSameExit = exitTreeTop;6762}6763return true;6764}676567666767#define BYTEBVOFFSET (128)6768#define ALLOCBYTEBVSIZE (128+256)6769int32_t6770TR_CISCTransformer::analyzeByteBoolTable(TR_CISCNode *boolTable, uint8_t *table256, TR_CISCNode *ignoreNode, TR::TreeTop **retSameExit)6771{6772TR::StackMemoryRegion stackMemoryRegion(*trMemory());67736774List<TR_CISCNode> *P2T = _P2T;6775List<TR_CISCNode> *T2P = _T2P;6776TR_CISCGraph *P = _P;6777TR_CISCGraph *T = _T;6778//int32_t i;6779//TR::TreeTop *exitTreeTop;67806781//6782// initialize6783//6784memset(table256, 0, 256);6785if (!boolTable || !getP2TRepInLoop(boolTable)) return 0; // # of delimiter is zero67866787TR_BitVector **bv, defBV(ALLOCBYTEBVSIZE, trMemory(), stackAlloc);6788uint32_t size = sizeof(*bv) * T->getNumNodes();6789TR_ASSERT(boolTable->getOpcode() == TR_booltable, "error!");6790TR_CISCNode *defNode = boolTable->getChild(0);6791TR_CISCNode *defTargetNode = getP2TRepInLoop(defNode);6792bv = (TR_BitVector **)trMemory()->allocateMemory(size, stackAlloc);6793memset(bv, 0, size);67946795switch((defTargetNode ? defTargetNode : defNode)->getOpcode())6796{6797case TR::b2i:6798if (defNode->isOptionalNode()) defNode = defNode->getChild(0);6799// fall through6800case TR::bloadi:6801defBV.setAll(-128+BYTEBVOFFSET, 127+BYTEBVOFFSET);6802break;6803case TR::bu2i:6804defBV.setAll( 0+BYTEBVOFFSET, 255+BYTEBVOFFSET);6805break;6806default:6807TR_ASSERT(false, "not implemented yet");6808// not implemented yet6809return -1; // error6810}68116812if (!analyzeBoolTable(bv, retSameExit, boolTable, &defBV, defNode, ignoreNode, BYTEBVOFFSET, ALLOCBYTEBVSIZE)) return -1; // error68136814TR_BitVectorIterator bvi(*bv[T->getExitNode()->getID()]);6815int32_t count = 0;68166817// Create the function table from the BitVector of the exit node6818while (bvi.hasMoreElements())6819{6820int32_t nextElement = bvi.getNextElement() - BYTEBVOFFSET;6821if (nextElement < 0) nextElement += 256;6822TR_ASSERT(0 <= nextElement && nextElement < 256, "error!!!");6823table256[nextElement] = nextElement ? nextElement : 1;6824count ++;6825}6826if (trace())6827{6828static int traceByteBoolTable = -1;6829if (traceByteBoolTable < 0)6830{6831char *p = feGetEnv("traceBoolTable");6832traceByteBoolTable = p ? 1 : 0;6833}6834if (1 > count || count > 255 || traceByteBoolTable)6835{6836traceMsg(comp(), "analyzeByteBoolTable: count is %d\n",count);6837ListIterator<TR_CISCNode> pi(_candidateRegion);6838TR_CISCNode *pn;6839traceMsg(comp(), "Predecessors of the exit node:\n ID:count\n");6840for (pn = pi.getFirst(); pn; pn = pi.getNext())6841{6842int32_t id = pn->getID();6843if (getT2PheadRep(id) == boolTable)6844{6845traceMsg(comp(), "%3d:%3d:",id,bv[id]->elementCount());6846bv[id]->print(comp());6847traceMsg(comp(), "\n");6848}6849}6850}6851}6852//TR_ASSERT(1 <= count && count <= 255, "maybe error!!");6853return count;6854}685568566857#define CHARBVOFFSET (0)6858#define ALLOCCHARBVSIZE (65536)6859int32_t6860TR_CISCTransformer::analyzeCharBoolTable(TR_CISCNode *boolTable, uint8_t *table65536, TR_CISCNode *ignoreNode, TR::TreeTop **retSameExit)6861{6862TR::StackMemoryRegion stackMemoryRegion(*trMemory());68636864List<TR_CISCNode> *P2T = _P2T;6865List<TR_CISCNode> *T2P = _T2P;6866TR_CISCGraph *P = _P;6867TR_CISCGraph *T = _T;6868//int32_t i;68696870//6871// initialize6872//6873memset(table65536, 0, 65536);6874if (!boolTable || !getP2TRepInLoop(boolTable)) return 0; // # of delimiter is zero68756876TR_BitVector **bv, defBV(ALLOCCHARBVSIZE, trMemory(), stackAlloc);6877uint32_t size = sizeof(*bv) * T->getNumNodes();6878TR_ASSERT(boolTable->getOpcode() == TR_booltable, "error!");6879TR_CISCNode *defNode = boolTable->getChild(0);6880TR_CISCNode *defTargetNode = getP2TRepInLoop(defNode);6881bv = (TR_BitVector **)trMemory()->allocateMemory(size, stackAlloc);6882memset(bv, 0, size);68836884switch((defTargetNode ? defTargetNode : defNode)->getOpcode())6885{6886case TR::su2i:6887if (defNode->isOptionalNode()) defNode = defNode->getChild(0);6888// fall through6889case TR::sloadi:6890defBV.setAll(0, 65535);6891break;6892default:6893TR_ASSERT(false, "not implemented yet");6894// not implemented yet6895return -1; // error6896}68976898if (!analyzeBoolTable(bv, retSameExit, boolTable, &defBV, defNode, ignoreNode, CHARBVOFFSET, ALLOCCHARBVSIZE)) return -1; // error68996900TR_BitVectorIterator bvi(*bv[T->getExitNode()->getID()]);6901int32_t count = 0;69026903// Create the function table from the BitVector of the exit node6904while (bvi.hasMoreElements())6905{6906int32_t nextElement = bvi.getNextElement() - CHARBVOFFSET;6907TR_ASSERT(0 <= nextElement && nextElement < 65536, "error!!!");6908table65536[nextElement] = 1;6909count ++;6910}6911if (trace())6912{6913static char *traceCharBoolTable = feGetEnv("traceBoolTable");69146915if (count < 1 || count > 65535 || traceCharBoolTable)6916{6917traceMsg(comp(), "analyzeByteBoolTable: count is %d\n",count);6918ListIterator<TR_CISCNode> pi(_candidateRegion);6919TR_CISCNode *pn;6920traceMsg(comp(), "Predecessors of the exit node:\n ID:count\n");6921for (pn = pi.getFirst(); pn; pn = pi.getNext())6922{6923int32_t id = pn->getID();6924if (getT2PheadRep(id) == boolTable)6925{6926traceMsg(comp(), "%3d:%3d:", id, bv[id]->elementCount());6927bv[id]->print(comp());6928traceMsg(comp(), "\n");6929}6930}6931}6932}69336934return count;6935}693669376938//*****************************************************************************6939// It sets the cold flags to the all blocks in _bblistBody.6940//*****************************************************************************6941void6942TR_CISCTransformer::setColdLoopBody()6943{6944ListIterator<TR::Block> bi(&_bblistBody);6945TR::Block *b;6946for (b = bi.getFirst(); b; b = bi.getNext())6947{6948b->setIsCold();6949b->setFrequency(-1);6950}6951}695269536954//*****************************************************************************6955// It get minimum and maximum of byte code index within the list l.6956// Note: Please initialize minIndex and maxIndex in caller!!6957// The return value means whether the result includes inlined region.6958//*****************************************************************************6959bool6960TR_CISCTransformer::getBCIndexMinMax(List<TR_CISCNode> *l, int32_t *_minIndex, int32_t *_maxIndex, int32_t *_minLN, int32_t *_maxLN, bool allowInlined)6961{6962int32_t minIndex = *_minIndex;6963int32_t maxIndex = *_maxIndex;6964int32_t minLN = *_minLN;6965int32_t maxLN = *_maxLN;6966ListIterator<TR_CISCNode> ni(l);6967TR_CISCNode *n;6968TR::Node *tn;6969bool includeInline = false;6970for (n = ni.getFirst(); n; n = ni.getNext())6971{6972ListElement<TrNodeInfo> *le = n->getTrNodeInfo()->getListHead();6973if (le)6974{6975tn = le->getData()->_node;6976bool go = true;6977if (tn->getInlinedSiteIndex() != -1)6978{6979if (allowInlined)6980includeInline = true;6981else6982go = false;6983}6984if (go)6985{6986int bcIndex = tn->getByteCodeIndex();6987if (minIndex > bcIndex) minIndex = bcIndex;6988if (maxIndex < bcIndex) maxIndex = bcIndex;6989int LN = comp()->getLineNumber(tn);6990if (minLN > LN) minLN = LN;6991if (maxLN < LN) maxLN = LN;6992}6993}6994}6995*_minIndex = minIndex;6996*_maxIndex = maxIndex;6997*_minLN = minLN;6998*_maxLN = maxLN;6999return includeInline;7000}700170027003//*****************************************************************************7004// It shows candidates of input code that cannot be transformed to idioms.7005//*****************************************************************************7006void7007TR_CISCTransformer::showCandidates()7008{7009if (!isShowingCandidates()) return;7010FILE *fp = stderr;7011int32_t minIndex = _candidatesForShowing.getMinBCIndex();7012int32_t maxIndex = _candidatesForShowing.getMaxBCIndex();7013int32_t minLN = _candidatesForShowing.getMinLineNumber();7014int32_t maxLN = _candidatesForShowing.getMaxLineNumber();7015if (minIndex <= maxIndex)7016{7017ListIterator<TR_CISCGraph> idiomI(_candidatesForShowing.getListIdioms());7018TR_CISCGraph *p;7019fprintf(fp, "!!!!!!!!!!!!!!!!!!!!!!!!!!\n");7020fprintf(fp, "Candidate is found for ");7021int count = 0;7022for (p = idiomI.getFirst(); p; p = idiomI.getNext())7023{7024if (count != 0) fprintf(fp, ",");7025fprintf(fp, "%s", p->getTitle());7026count ++;7027}7028fprintf(fp, " (%s) in %s",7029comp()->getHotnessName(comp()->getMethodHotness()),7030_T->getTitle());7031#if SHOW_BCINDICES7032fprintf(fp, "\t bcindex is %d - %d, linenumber is %d - %d.", minIndex, maxIndex, minLN, maxLN);7033#endif7034fprintf(fp, "\n");7035}7036}7037703870397040//*****************************************************************************7041// It registers candidates of input code that cannot be transformed to idioms.7042//*****************************************************************************7043void7044TR_CISCTransformer::registerCandidates()7045{7046if (!isShowingCandidates()) return;7047ListIterator<TR_CISCNodeRegion> ri(&_candidatesForRegister);7048TR_CISCNodeRegion *r;7049int32_t minIndex, maxIndex;7050int32_t minLN, maxLN;7051minIndex = 0x7fffffff;7052maxIndex = -minIndex;7053minLN = 0x7fffffff;7054maxLN = -minIndex;7055for (r = ri.getFirst(); r; r = ri.getNext())7056{7057getBCIndexMinMax(r, &minIndex, &maxIndex, &minLN, &maxLN, false);7058}7059if (minIndex <= maxIndex) _candidatesForShowing.add(_P, minIndex, maxIndex, minLN, maxLN);7060}706170627063//*****************************************************************************7064// It returns the following conditions:7065// * _T2P_NULL: There is no pattern nodes corresponding to the target node t.7066// (regardless whether p is null or non-null)7067//7068// If p is non-null,7069// * _T2P_NotMatch: Cannot find any relationships between t and p7070// * _T2P_MatchAndSingle: The node t corresponds to ONLY the node p. (= _T2P_MatchMask | _T2P_Single)7071// * _T2P_MatchAndMultiple: The node t corresponds to the node p, (= _T2P_MatchMask | _T2P_Multiple)7072// but there is another candidate.7073// If p is null,7074// * _T2P_Single: There is a single pattern node corresponding to the node t.7075// * _T2P_Multiple: There are multiple pattern nodes corresponding to the node t.7076//*****************************************************************************7077CISCT2PCond7078TR_CISCTransformer::analyzeT2P(TR_CISCNode *t, TR_CISCNode *p)7079{7080//CISCT2PCond ret;7081int32_t tid = t->getID();7082List<TR_CISCNode> *l = _T2P + tid;7083TR_CISCNode *t2p;7084if (l->isEmpty())7085{7086return _T2P_NULL;7087}7088else if (l->isSingleton())7089{7090t2p = l->getListHead()->getData();7091if (!p) return _T2P_Single;7092return (p == t2p) ? _T2P_MatchAndSingle : _T2P_NotMatch;7093}7094else7095{7096if (!p) return _T2P_Multiple;7097ListIterator<TR_CISCNode> t2pi(l);7098for (t2p = t2pi.getFirst(); t2p; t2p = t2pi.getNext())7099{7100if (p == t2p) return _T2P_MatchAndMultiple;7101}7102return _T2P_NotMatch;7103}7104}710571067107//*****************************************************************************7108// It analyzes whether the pattern node TR_arrayindex corresponds to7109// an induction variable V or V + something in an input graph.7110//*****************************************************************************7111bool7112TR_CISCTransformer::analyzeOneArrayIndex(TR_CISCNode *arrayindex, TR::SymbolReference *inductionVariableSymRef)7113{7114List<TR_CISCNode> *l = getP2T() + arrayindex->getID();7115if (l->isEmpty())7116{7117if (arrayindex->isOptionalNode()) return true;7118else return false;7119}7120else if (!l->isSingleton()) return false;7121TR_CISCNode *t = l->getListHead()->getData();7122if (t->getOpcode() == TR::iadd) // check induction variable + something7123{7124bool ret = false;7125TR_CISCNode *c;7126c = t->getChild(0);7127if (c->getOpcode() == TR::iload)7128{7129TR::SymbolReference *symref = c->getHeadOfTrNodeInfo()->_node->getSymbolReference();7130if (symref == inductionVariableSymRef) ret = true;7131}7132if (!ret)7133{7134c = t->getChild(1);7135if (c->getOpcode() == TR::iload)7136{7137TR::SymbolReference *symref = c->getHeadOfTrNodeInfo()->_node->getSymbolReference();7138if (symref == inductionVariableSymRef) ret = true;7139}7140}7141if (!ret) return false;7142}7143else if (t->getOpcode() != TR_variable)7144{7145return false;7146}7147return true;7148}714971507151// Check if all of the node TR_arrayindex correspond to V or V+something7152bool7153TR_CISCTransformer::analyzeArrayIndex(TR::SymbolReference *inductionVariableSymRef)7154{7155// check each array index7156for (int32_t i = 0; ; i++)7157{7158TR_CISCNode *arrayindex = _P->getCISCNode(TR_arrayindex, true, i);7159if (!arrayindex) break; // end71607161if (!analyzeOneArrayIndex(arrayindex, inductionVariableSymRef)) return false;7162}7163return true;7164}716571667167// Count valid nodes where the node TR_arrayindex corresponds to V or V+something7168int32_t7169TR_CISCTransformer::countGoodArrayIndex(TR::SymbolReference *inductionVariableSymRef)7170{7171int32_t ret = 0;7172// check each array index7173for (int32_t i = 0; ; i++)7174{7175TR_CISCNode *arrayindex = _P->getCISCNode(TR_arrayindex, true, i);7176if (!arrayindex)7177{7178if (i == 0) ret = -1; // no TR_arrayindex in the pattern7179break; // end7180}71817182if (analyzeOneArrayIndex(arrayindex, inductionVariableSymRef)) ret ++;7183}7184return ret;7185}718671877188//*****************************************************************************7189// It performs very simple optimizations using UD/DU chains.7190// Currently, it performs:7191// (1) redundant BNDCHK elimination. necessary for very early phase, such as in the earlyGlobalOpts phase7192//*****************************************************************************7193bool7194TR_CISCTransformer::simpleOptimization()7195{7196TR_ASSERT(_T->isSetUDDUchains(), "please call simpleOptimization() after executing importUDchains()!");7197ListIterator<TR_CISCNode> ni(_T->getOrderByData());7198TR_CISCNode *n, *ch, *def;7199List<TR_CISCNode> *l;7200TR_CISCNode quasiConst2(trMemory(), TR_quasiConst2, TR::NoType, 0, 0, 0, 0);72017202for (n = ni.getFirst(); n; n = ni.getNext())7203{7204if (!n->isNegligible())7205{7206switch(n->getOpcode())7207{7208case TR::BNDCHK:7209// check whether the size is greater or equal to 2567210// and the array index comes from TR::bu2i.7211ch = n->getChild(0);7212if (ch->getOpcode() == TR::iconst)7213{7214if (ch->getOtherInfo() >= 256)7215{7216ch = n->getChild(1);7217switch(ch->getOpcode())7218{7219case TR::iload:7220def = ch->getNodeIfSingleChain();7221if (def)7222{7223if ((def->getNumChildren() > 0) && def->getChild(0)) // There is a bug where def is a TR_entrynode, so getChild(0) is null.7224{7225switch(def->getChild(0)->getOpcode())7226{7227case TR::bu2i:7228n->setIsNegligible(); // because the range is 0 - 2557229break;7230}7231}7232}7233break;7234case TR::bu2i:7235n->setIsNegligible(); // because the range is 0 - 2557236break;7237}7238}7239}7240break;72417242default:7243if (!n->isOutsideOfLoop())7244{7245if (n->isStoreDirect())7246{7247ListIterator<TR_CISCNode> useI(n->getChains());7248bool includeExitNode = false;7249for (ch = useI.getFirst(); ch; ch = useI.getNext())7250{7251if (ch->getDagID() != n->getDagID())7252{7253includeExitNode = true;7254break;7255}7256}7257if (!includeExitNode) n->setIsNegligible();7258}7259}7260if (!n->isNegligible()) // Still need to analysis?7261{7262if (quasiConst2.isEqualOpc(n) &&7263n->getParents()->isSingleton())7264{7265TR_CISCNode *parent = n->getHeadOfParents();7266if (parent->getOpcode() == TR::iadd)7267{7268l = _T2P + parent->getID();7269if (l->isSingleton() &&7270l->getListHead()->getData()->getOpcode() == TR_arrayindex)7271{7272n->setIsNegligible();7273}7274}7275}7276}7277break;7278}7279}7280}7281return true;7282}728372847285//*****************************************************************************7286// get the hash value of TR_CISCNodeRegion7287//*****************************************************************************7288uint64_t7289TR_CISCTransformer::getHashValue(TR_CISCNodeRegion *r)7290{7291uint64_t ret = 0;7292int count = 0;7293ListIterator<TR_CISCNode> ri(r);7294TR_CISCNode *n;7295for (n = ri.getFirst(); n; n = ri.getNext())7296{7297int i = count % 74;7298int bigshift = i % 5;7299int smallshift = i / 5;7300int shiftcount = bigshift * 10 + smallshift;7301ret += (uint64_t)n->getOpcode() << (uint64_t)shiftcount;7302count++;7303}7304return ret;7305}730673077308//*****************************************************************************7309// It analyzes whether we can convert the loop ArrayCmpLen in String.compareTo7310// to either ArrayCmpSign or ArrayCmp.7311//*****************************************************************************7312bool7313TR_CISCTransformer::canConvertArrayCmpSign(TR::Node *storeNode, List<TR::TreeTop> *compareIfs, bool *canConvertToArrayCmp)7314{7315static int disable = -1;7316if (disable < 0)7317{7318char *p = feGetEnv("DISABLE_CONVERTCMPSIGN");7319disable = p ? 1 : 0;7320}7321if (disable) return false;7322static int disableCMP = -1;7323if (disableCMP < 0)7324{7325char *p = feGetEnv("DISABLE_CONVERTCMP");7326disableCMP = p ? 1 : 0;7327}73287329TR_ASSERT(storeNode->getOpCode().isStoreDirect(), "error");7330TR_UseDefInfo *useDefInfo = getUseDefInfo();7331int32_t useDefIndex = storeNode->getUseDefIndex();7332if (useDefIndex == 0) return true;7333TR_ASSERT(useDefInfo->isDefIndex(useDefIndex), "error!");7334TR_UseDefInfo::BitVector info(comp()->allocator());7335useDefInfo->getUsesFromDef(info, useDefIndex);7336if (!info.IsZero())7337{7338TR_UseDefInfo::BitVector::Cursor cursor(info);7339bool convertToArrayCmp = true;7340for (cursor.SetToFirstOne(); cursor.Valid(); cursor.SetToNextOne())7341{7342int32_t useIndex = (int32_t) cursor + useDefInfo->getFirstUseIndex();7343TR_ASSERT(useDefInfo->isUseIndex(useIndex), "error!");7344TR::Node *useNode = useDefInfo->getNode(useIndex);7345if (useNode->getReferenceCount() > 1)7346{7347if (trace()) traceMsg(comp(), "canConvertArrayCmpSign failed because ReferenceCount > 1. %p\n",useNode);7348return false;7349}7350TR::Node *parentNode = NULL;7351int32_t retChildNum = -1;7352TR_CISCNode *useCISCNode = _T->getCISCNode(useNode);7353TR::TreeTop *foundTT = NULL;7354if (useCISCNode)7355{7356if (useCISCNode->getParents()->isSingleton())7357{7358TR_CISCNode *parentCISCNode = useCISCNode->getHeadOfParents();7359if (parentCISCNode->getTrNodeInfo()->isSingleton())7360{7361parentNode = parentCISCNode->getHeadOfTrNode();7362foundTT = parentCISCNode->getHeadOfTreeTop();7363if (parentNode->getChild(0) == useNode)7364retChildNum = 0;7365else if (parentNode->getChild(1) == useNode)7366retChildNum = 1;7367else7368parentNode = 0;7369}7370}7371}7372else7373{7374_useTreeTopMap.buildAllMap();7375foundTT = _useTreeTopMap.findParentTreeTop(useNode);7376if (NULL == foundTT || !searchNodeInTrees(foundTT->getNode(), useNode, &parentNode, &retChildNum))7377{7378if (trace()) traceMsg(comp(), "canConvertArrayCmpSign failed because searchNodeInTrees failed. UseNode: %p with corresponding TreeTop: %p\n",useNode, foundTT);7379return false;7380}7381}7382if (!parentNode)7383{7384if (trace()) traceMsg(comp(), "canConvertArrayCmpSign failed because parentNode is NULL. %p\n",useNode);7385return false;7386}7387TR_ASSERT(foundTT, "error!");7388if (parentNode->getOpCode().isStoreDirect())7389{7390if (!canConvertArrayCmpSign(parentNode, compareIfs, &convertToArrayCmp))7391{7392if (trace()) traceMsg(comp(), "canConvertArrayCmpSign failed because canConvertArrayCmpSign(p) failed. %p\n",useNode);7393return false;7394}7395}7396else if (parentNode->getOpCode().isBooleanCompare())7397{7398TR_ASSERT(retChildNum == 0 || retChildNum == 1, "error");7399TR::Node *theOtherChild = parentNode->getChild(1-retChildNum);7400if (theOtherChild->getInt() != 0 ||7401theOtherChild->getOpCodeValue() != TR::iconst)7402{7403if (trace()) traceMsg(comp(), "canConvertArrayCmpSign failed because theOtherChild is not iconst 0. %p\n",useNode);7404return false;7405}7406if (compareIfs) compareIfs->add(foundTT);7407switch(parentNode->getOpCodeValue())7408{7409case TR::icmpeq:7410case TR::icmpne:7411case TR::ificmpeq:7412case TR::ificmpne:7413break; // OK!7414default:7415if (trace())7416{7417traceMsg(comp(), "convertArrayCmp failed because parentNode is %s. %x\n",7418parentNode->getOpCode().getName(),7419useNode,parentNode);7420}7421convertToArrayCmp = false;7422break;7423}7424}7425else7426{7427if (trace())7428{7429traceMsg(comp(), "canConvertArrayCmpSign failed because unhandled opcode %s. %x %x\n",7430parentNode->getOpCode().getName(),useNode,parentNode);7431}7432return false;7433}7434}7435if (canConvertToArrayCmp) *canConvertToArrayCmp = convertToArrayCmp;7436}7437if (disableCMP) *canConvertToArrayCmp = false;7438#if VERBOSE7439if (*canConvertToArrayCmp)7440printf("!!! canConvertToArrayCmp %s\n",_T->getTitle());7441else7442printf("!!! canConvertToArrayCmpSign %s\n",_T->getTitle());7443#endif7444return true;7445}7446744774487449bool7450TR_CISCTransformer::computeTopologicalEmbedding(TR_CISCGraph *P, TR_CISCGraph *T)7451{7452TR::SimpleRegex *disabledPatterns = comp()->getOptions()->getDisabledIdiomPatterns();7453if (disabledPatterns && TR::SimpleRegex::match(disabledPatterns, P->getTitle()))7454{7455if (trace())7456traceMsg(comp(), "%s is disabled by disabledIdiomPatterns={}\n", P->getTitle());7457return false;7458}74597460//FIXME: improve this7461if (!T->testAllAspects(P))7462{7463if (trace())7464traceMsg(comp(), "%s is skipped since graph properties do not match (%08x)\n", P->getTitle(),P->getAspectsValue());7465return false; // No need to analyze7466}7467if (T->testAnyNoAspects(P))7468{7469if (trace())7470traceMsg(comp(), "%s is skipped due to existence of testAnyNoAspects (%08x)\n",P->getTitle(),P->getNoAspectsValue());7471return false; // No need to analyze7472}7473if (!T->meetMinCounts(P))7474{7475if (trace())7476traceMsg(comp(), "%s is skipped due to failure of meetMinCounts (%d %d %d)\n",P->getTitle(),7477P->getAspects()->getIfCount(), P->getAspects()->getIndirectLoadCount(), P->getAspects()->getIndirectStoreCount());7478return false; // No need to analyze7479}74807481// avoid analyzing very large graphs7482if (T->getNumNodes() >= IDIOM_SIZE_FACTOR*P->getNumNodes())7483{7484if (trace())7485traceMsg(comp(), "%s is skipped due to loop being very large\n", P->getTitle());7486return false; // No need to analyze7487}74887489#if !STRESS_TEST7490//FIXME: add TR_EnableIdiomRecognitionWarm to options7491if (1)//!_compilation->getOption(TR_EnableIdiomRecognitionWarm))7492{7493if (T->getHotness() < P->getHotness())7494{7495if (trace())7496traceMsg(comp(), "%s is skipped due to hotness\n",P->getTitle());7497return false; // No need to analyze7498}7499}7500bool incorrectBBFreqLevel = (T->getHotness() == veryHot);7501if (!incorrectBBFreqLevel)7502{7503if (P->isHighFrequency() && !T->isHighFrequency())7504{7505if (trace())7506traceMsg(comp(), "%s is skipped due to the rarely iterated loop (!isHighFrequency)\n",P->getTitle());7507return false; // No need to analyze7508}7509}7510if (T->getHotness() > warm) // Because IR is called only one time at the warm level, skip this check.7511{7512if (isAfterVersioning() ? P->isInhibitAfterVersioning() :7513P->isInhibitBeforeVersioning())7514{7515if (trace())7516traceMsg(comp(), "%s is skipped due to loop versioning check\n",P->getTitle());7517return false; // No need to analyze7518}7519}7520#endif7521752275237524if (trace())7525{7526traceMsg(comp(), "loopid %d: ", _bblistBody.getListHead()->getData()->getNumber());7527P->dump(comp()->getOutFile(), comp());7528}7529_P = P;7530_T = T;7531_numPNodes = P->getNumNodes();7532_numTNodes = T->getNumNodes();7533initTopologicalEmbedding();75347535// Step 1 computes embedding information for an input data dependence graph.7536//7537if (trace())7538traceMsg(comp(), "Computing embedding info for idiom %s in loop %d\n", P->getTitle(), _bblistBody.getListHead()->getData()->getNumber());7539if (showMesssagesStdout()) printf("Idiom: loop %d, %s\n",_bblistBody.getListHead()->getData()->getNumber(),7540P->getTitle());7541_sizeResult = _numPNodes * _numTNodes * sizeof(*_embeddedForData);7542_embeddedForData = (uint8_t*)trMemory()->allocateMemory(_sizeResult, stackAlloc);7543if (!computeEmbeddedForData()) return false; // It cannot find all of the idiom nodes.7544if (showMesssagesStdout()) printf("find1 %s\n", P->getTitle());7545if (trace())7546traceMsg(comp(), "Detected IL nodes in loop for idiom %s\n", P->getTitle());75477548// Step 2 computes embedding information for an input control flow graph.7549//7550_embeddedForCFG = (uint8_t*)trMemory()->allocateMemory(_sizeResult, stackAlloc);7551_sizeDE = _numPNodes * sizeof(*_DE);7552_EM = (uint8_t*)trMemory()->allocateMemory(_sizeResult, stackAlloc);7553_DE = (uint8_t*)trMemory()->allocateMemory(_sizeDE, stackAlloc);7554if (!computeEmbeddedForCFG()) return false; // It cannot find all of the idiom nodes.7555if (showMesssagesStdout()) printf("find2 %s\n", P->getTitle());7556if (trace())7557traceMsg(comp(), "finished topological embedding for idiom %s\n", P->getTitle());75587559// Step 3 creates P2T and T2P tables from embedding information.7560// P and T denote Pattern and Target, respectively.7561// We can use them to find target nodes from pattern nodes, and vice versa.7562//7563_sizeP2T = _numPNodes * sizeof(*_P2T);7564_P2T = (List<TR_CISCNode> *)trMemory()->allocateMemory(_sizeP2T, stackAlloc);7565_sizeT2P = _numTNodes * sizeof(*_T2P);7566_T2P = (List<TR_CISCNode> *)trMemory()->allocateMemory(_sizeT2P, stackAlloc);7567if (!makeLists()) return false; // a variable corresponds to multiple nodes7568if (showMesssagesStdout()) printf("find3 %s\n", P->getTitle());75697570// Step 4 transforms the target graph if necessary and7571// checks that both graphs are exactly matched.7572//7573_candidatesForShowing.init();75747575// Import UD/DU information of TR::Node to TR_CISCNode._chain7576//7577T->importUDchains(comp(), _useDefInfo);75787579// It performs very simple optimizations using UD/DU chains.7580// Currently, it performs:7581// (1) redundant BNDCHK elimination.7582simpleOptimization();7583if (trace())7584{7585T->dump(comp()->getOutFile(), comp());7586}75877588// Analyze whether each candidate of array header constant is appropriate compared to the idiom.7589// Because the array header size is sometimes modified by constant folding7590// e.g. When AH is -24 for a[i], AH is modified to -25 for a[i+1]7591// If the analysis fails, it'll invalidate that node.7592//7593if (P->isRequireAHconst()) analyzeArrayHeaderConst();75947595// Analyze relationships for parents, children, predecessors, successors.7596// They are represented to four flags.7597//7598analyzeConnection();75997600// Based on above four relationships, we extract the region that matches the idiom graph.7601// If all nodes in the idiom graph are not included in the region, it returns 0.7602//7603_candidateRegion = extractMatchingRegion();7604if (!_candidateRegion ||7605!verifyCandidate() || // all blocks of the loop body are not included in the _candidateRegion7606embeddingHasConflictingBranches())7607{7608if (trace()) traceMsg(comp(), "computeTopologicalEmbedding: Graph transformations failed. (step 3)\n\n");7609registerCandidates();7610_T->restoreListsDuplicator();7611return false;7612}7613if (showMesssagesStdout()) printf("find4 %s\n", P->getTitle());76147615//***************************************************************************************7616// Start to transform actual code (TR::Block, TR::TreeTop, TR::Node, and so on...)7617//***************************************************************************************7618resetFlags();7619TransformerPtr transformer = P->getTransformer();7620if (performTransformation(comp(), "%sReducing loop %d to %s\n", OPT_DETAILS, _bblistBody.getListHead()->getData()->getNumber(),7621P->getTitle()) && !transformer(this))7622{7623if (trace()) traceMsg(comp(), "computeTopologicalEmbedding: IL Transformer failed. (step 4)\n\n");7624registerCandidates();7625_T->restoreListsDuplicator();7626return false; // The transformation fails7627}76287629if (trace() || showMesssagesStdout())7630{7631char *bcinfo = "";7632#if SHOW_BCINDICES7633char tmpbuf[256];7634int32_t minIndex, maxIndex;7635int32_t minLN, maxLN;7636minIndex = 0x7fffffff;7637maxIndex = -minIndex;7638minLN = 0x7fffffff;7639maxLN = -minIndex;7640bool inlined = getBCIndexMinMax(_candidateRegion, &minIndex, &maxIndex, &minLN, &maxLN, true);7641if (minIndex <= maxIndex)7642{7643sprintf(tmpbuf, ", bcindex %" OMR_PRIu32 " - %" OMR_PRIu32 " linenumber %" OMR_PRIu32 " - %" OMR_PRIu32 "%s.", minIndex, maxIndex, minLN, maxLN, inlined ? " (inlined)" : "");7644bcinfo = tmpbuf;7645}7646#endif7647#if SHOW_STATISTICS7648if (showMesssagesStdout())7649printf("!! Hash=0x%" OMR_PRIx64 " %s %s\n", getHashValue(_candidateRegion), P->getTitle(), T->getTitle());7650#endif76517652if (trace()) traceMsg(comp(), "***** Transformed *****, %s, %s, %s, loop:%d%s\n",7653comp()->getHotnessName(comp()->getMethodHotness()),7654P->getTitle(), T->getTitle(),7655_bblistBody.getListHead()->getData()->getNumber(),7656bcinfo);7657if (showMesssagesStdout()) printf("== Transformed == %s, %s, %s, loop:%d%s\n",7658comp()->getHotnessName(comp()->getMethodHotness()),7659P->getTitle(), T->getTitle(),7660_bblistBody.getListHead()->getData()->getNumber(),7661bcinfo);7662}76637664TR::DebugCounter::incStaticDebugCounter(comp(),7665TR::DebugCounter::debugCounterName(comp(),7666"idiomRecognition.matched/%s/(%s)/%s/loop=%d",7667P->getTitle(),7668comp()->signature(),7669comp()->getHotnessName(comp()->getMethodHotness()),7670_bblistBody.getListHead()->getData()->getNumber()));76717672return true;7673}76747675static void7676traceConflictingBranches(7677TR_CISCTransformer *opt,7678TR_CISCNode *pn,7679List<TR_CISCNode> *matches)7680{7681if (!opt->trace())7682return;76837684TR::Compilation *comp = opt->comp();7685traceMsg(comp, "Pattern node %d (%s) has conflicting branches:",7686pn->getID(),7687TR_CISCNode::getName((TR_CISCOps)pn->getOpcode(), comp));76887689bool first = true;7690ListIterator<TR_CISCNode> ti(matches);7691for (TR_CISCNode *tn = ti.getFirst(); tn != NULL; tn = ti.getNext())7692{7693traceMsg(comp, "%s %d (%s)",7694first ? "" : ",",7695tn->getID(),7696TR_CISCNode::getName((TR_CISCOps)tn->getOpcode(), comp));7697first = false;7698}76997700traceMsg(comp, "\n");7701}77027703/**7704* Determine whether there is a branch in the pattern with multiple matches.7705*7706* Such branches obscure the control flow of the target loop, which otherwise7707* should have to closely match the control flow seen in the pattern.7708*7709* Booltable nodes are exempt because the condition may be expressed via a7710* series of conditionals.7711*7712* Additionally, a branch may have matches ahead of the loop, but since these7713* have no bearing on control flow within the loop, they are allowed. An7714* in-loop match is moved to the front of _P2T, ahead of any outside matches.7715*7716* \return true if there is a troublesome branch7717*/7718bool7719TR_CISCTransformer::embeddingHasConflictingBranches()7720{7721static const char * const disableEnv =7722feGetEnv("TR_disableIdiomRecognitionConflictingBranchTest");7723static bool disable = disableEnv != NULL && disableEnv[0] != '\0';7724if (disable)7725return false;77267727List<TR_CISCNode> * const dagNodes = _P->getDagId2Nodes();7728const int32_t dagCount = _P->getNumDagIds();7729for (int32_t dag = 0; dag < dagCount; dag++)7730{7731ListIterator<TR_CISCNode> pi(&dagNodes[dag]);7732for (TR_CISCNode *pn = pi.getFirst(); pn != NULL; pn = pi.getNext())7733{7734uint32_t op = pn->getOpcode();7735bool isIf =7736op == (uint32_t)TR_ifcmpall7737|| (op < (uint32_t)TR::NumIlOps && pn->getIlOpCode().isIf());77387739if (!isIf)7740continue;77417742TR_CISCNode *inLoopMatch = NULL;7743List<TR_CISCNode> *matches = &_P2T[pn->getID()];7744ListIterator<TR_CISCNode> ti(matches);7745for (TR_CISCNode *tn = ti.getFirst(); tn != NULL; tn = ti.getNext())7746{7747if (getCandidateRegion()->isIncluded(tn))7748{7749if (inLoopMatch != NULL)7750{7751traceConflictingBranches(this, pn, matches);7752TR::DebugCounter::incStaticDebugCounter(comp(),7753TR::DebugCounter::debugCounterName(comp(),7754"idiomRecognition.rejected/branchConflict/%s/(%s)/%s/loop=%d",7755_P->getTitle(),7756comp()->signature(),7757comp()->getHotnessName(comp()->getMethodHotness()),7758_bblistBody.getListHead()->getData()->getNumber()));7759return true;7760}7761inLoopMatch = tn;7762}7763}7764if (inLoopMatch != NULL && matches->getHeadData() != inLoopMatch)7765{7766// move it to the front7767matches->remove(inLoopMatch);7768matches->addAfter(inLoopMatch, NULL);7769}7770}7771}77727773return false;7774}77757776// Iterate through the loop body blocks to remove Bits.keepAlive() and Reference.reachabilityFence() calls.7777// The keepAlive() and Reference.reachabilityFence() calls are NOP functions inserted into NIO libraries to keep7778// the NIO object and its native ptr alive until after the native pointer accesses.7779// Reference.reachabilityFence is a newly introduced Java 9 public API, we will remove this call only if the caller comes7780// from the java.nio package, i.e. only for Reference.reachabilityFence calls replacing existing Bits.keepAlive calls7781// in the java.nio package. For all other non-nio callers, we will conservatively not remove the call to not break existing7782// Idiom Recognition code.77837784bool7785TR_CISCTransformer::removeBitsKeepAliveCalls(List<TR::Block> *body)7786{7787if (trace())7788traceMsg(comp(), "\tScanning for java/nio/Bits.keepAlive(Ljava/lang/Object;)V calls.\n");7789ListIterator<TR::Block> bi(body);7790TR::Block *block = NULL;7791bool foundCall = false;77927793_BitsKeepAliveList.init();77947795// Iterate through loop body blcoks7796for (block = bi.getFirst(); block != 0; block = bi.getNext())7797{7798for (TR::TreeTop *tt = block->getEntry(); tt != block->getExit(); tt = tt->getNextTreeTop())7799{7800TR::Node* node = tt->getNode();78017802// Look for the following tree:7803// treetop7804// vcall #481[0x74fd1dc0] static Method[java/nio/Bits.keepAlive(Ljava/lang/Object;)V]7805// aload #423[0x00694234] Parm[<parm 1 Ljava/nio/ByteBuffer;>] <flags:"0x4" (X!=0 )/>7806if (node->getOpCodeValue() == TR::treetop)7807{7808node = node->getChild(0);7809if (node->getOpCode().isCall())7810{7811TR::MethodSymbol *methodSym = node->getSymbol()->getMethodSymbol();7812// If the call is Bits.keepAlive() which is package private hence can only be called from the nio package,7813// or the call is Reference.reachabilityFence() which is public _and_ provided the caller is from the nio package,7814// only then we will remove the call node.7815if (methodSym->getRecognizedMethod() == TR::java_nio_Bits_keepAlive7816|| ((methodSym->getRecognizedMethod() == TR::java_lang_ref_Reference_reachabilityFence)7817&& (!strncmp(comp()->fe()->sampleSignature(node->getOwningMethod(), 0, 0, comp()->trMemory()), "java/nio/", 9))))7818{7819if (trace())7820traceMsg(comp(), "\t\tRemoving KeepAlive call found in block %d [%p] @ Node: %p\n",block->getNumber(), block, node);7821foundCall = true;78227823TR_BitsKeepAliveInfo *info = new (comp()->trStackMemory()) TR_CISCTransformer::TR_BitsKeepAliveInfo(block, tt, tt->getPrevTreeTop());7824_BitsKeepAliveList.add(info);78257826// Disconnect treetop from list.7827tt->getPrevTreeTop()->setNextTreeTop(tt->getNextTreeTop());7828tt->getNextTreeTop()->setPrevTreeTop(tt->getPrevTreeTop());7829}7830}7831}7832}7833}7834return foundCall;7835}78367837// Insert cloned copies of Bits.keepAlive() call into the fast path code of the reduced loop.7838void7839TR_CISCTransformer::insertBitsKeepAliveCalls(TR::Block * block)7840{7841if (trace())7842traceMsg(comp(), "\tInserting java/nio/Bits.keepAlive(Ljava/lang/Object;)V calls into reduced loop.\n");78437844ListIterator<TR_BitsKeepAliveInfo> bi(&_BitsKeepAliveList);78457846for (TR_BitsKeepAliveInfo *info = bi.getFirst(); info != NULL; info = bi.getNext())7847{7848TR::TreeTop * tt = info->_treeTop;78497850// Clone the call node7851TR::Node *callNode = TR::Node::copy(tt->getNode()->getChild(0));7852callNode->decReferenceCount();7853callNode->getChild(0)->incReferenceCount();78547855// Uncommon the child7856callNode->setChild(0,callNode->getChild(0)->uncommon());7857TR::Node *treetopNode = TR::Node::create(TR::treetop, 1, callNode);7858block->append(TR::TreeTop::create(comp(), treetopNode));78597860if (trace())7861{7862TR::TreeTop * prev = info->_prevTreeTop;7863TR::Block * keepAliveBlock = info->_block;7864traceMsg(comp(), "\t\tInserting KeepAlive call clone node: %p from block %d [%p] node: %p into block: %d %p\n", callNode, keepAliveBlock->getNumber(), keepAliveBlock, tt->getNode(), block->getNumber(), block);7865}7866}7867}78687869// Restore any Bits.keepAlive() calls to their original locations7870void7871TR_CISCTransformer::restoreBitsKeepAliveCalls()7872{7873if (trace())7874traceMsg(comp(), "\tRestoring for java/nio/Bits.keepAlive(Ljava/lang/Object;)V calls.\n");7875ListIterator<TR_BitsKeepAliveInfo> bi(&_BitsKeepAliveList);78767877for (TR_BitsKeepAliveInfo *info = bi.getFirst(); info != NULL; info = bi.getNext())7878{7879TR::TreeTop * tt = info->_treeTop;7880TR::TreeTop * prev = info->_prevTreeTop;7881TR::Block * block = info->_block;78827883if (trace())7884traceMsg(comp(), "\t\tInserting KeepAlive call found in block %d [%p] @ Node: %p\n",block->getNumber(), block, tt->getNode());7885prev->insertAfter(tt);7886}7887}788878897890