Path: blob/master/runtime/compiler/optimizer/IdiomTransformations.cpp
6000 views
/*******************************************************************************1* Copyright (c) 2000, 2022 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/2122#include <stdint.h>23#include <stdio.h>24#include <stdlib.h>25#include <string.h>26#include "codegen/CodeGenerator.hpp"27#include "env/FrontEnd.hpp"28#include "compile/Compilation.hpp"29#include "compile/SymbolReferenceTable.hpp"30#include "control/Options.hpp"31#include "control/Options_inlines.hpp"32#include "cs2/bitvectr.h"33#include "env/CompilerEnv.hpp"34#include "env/TRMemory.hpp"35#include "env/jittypes.h"36#include "il/AutomaticSymbol.hpp"37#include "il/Block.hpp"38#include "il/DataTypes.hpp"39#include "il/ILOpCodes.hpp"40#include "il/ILOps.hpp"41#include "il/ILProps.hpp"42#include "il/Node.hpp"43#include "il/Node_inlines.hpp"44#include "il/Symbol.hpp"45#include "il/SymbolReference.hpp"46#include "il/TreeTop.hpp"47#include "il/TreeTop_inlines.hpp"48#include "infra/Assert.hpp"49#include "infra/BitVector.hpp"50#include "infra/Cfg.hpp"51#include "infra/List.hpp"52#include "optimizer/IdiomRecognition.hpp"53#include "optimizer/IdiomRecognitionUtils.hpp"54#include "optimizer/Optimization_inlines.hpp"55#include "optimizer/Optimizer.hpp"56#include "optimizer/UseDefInfo.hpp"57#include "ras/Debug.hpp"5859#define OPT_DETAILS "O^O NEWLOOPREDUCER: "60#define DISPTRACE(OBJ) ((OBJ)->trace())61#define VERBOSE(OBJ) ((OBJ)->showMesssagesStdout())62#define PNEW new (PERSISTENT_NEW)6364/** \brief65* Determines whether we should avoid transforming loops in java/lang/String due to functional issues when String66* compression is enabled.67*68* \param comp69* The compilation object.70*71* \return72* <c>true</c> if the transformation should be avoided, <c>false</c> otherwise.73*/74static bool avoidTransformingStringLoops(TR::Compilation* comp)75{76static bool cacheInitialized = false;77static bool cacheValue = false;7879if (!cacheInitialized)80{81// TODO: This is a workaround for Java 829 functionality as we switched to using a byte[] backing array in String*.82// Remove this workaround once obsolete. Idiom recognition currently does not handle idioms involving char[] in a83// compressed string. String compression is technically a Java 9 feature, but for the sake of evaluating performance84// we need to be able to run our standard set of benchmarks, most of which do not work under Java 9 at the moment.85// This leaves us with the only option to run the respective benchmarks on Java 8 SR5, however in Java 8 SR5 the86// java/lang/String.value is of type char[] which will cause functional problems. To avoid these issues we will87// disable idiom recognition on Java 8 SR5 if String compression is enabled.88TR_OpaqueClassBlock* stringClass = comp->cg()->fej9()->getSystemClassFromClassName("java/lang/String", strlen("java/lang/String"), true);8990if (stringClass != NULL)91{92// Only initialize the cache after we are certain java/lang/String has been resolved93cacheInitialized = true;9495if (comp->cg()->fej9()->getInstanceFieldOffset(stringClass, "value", "[C") != ~0)96{97cacheValue = IS_STRING_COMPRESSION_ENABLED_VM(static_cast<TR_J9VMBase*>(comp->fe())->getJ9JITConfig()->javaVM);98}99}100}101102return cacheValue;103}104105//*****************************************************************************************106// It partially peels the loop body to align the top of the region107//*****************************************************************************************108bool109ChangeAlignmentOfRegion(TR_CISCTransformer *trans)110{111const bool disptrace = DISPTRACE(trans);112TR_CISCGraph *P = trans->getP();113TR_CISCGraph *T = trans->getT();114TR_CISCNode *pTop = P->getEntryNode()->getSucc(0);115TR_CISCNode *t;116TR_CISCNode *beforeLoop = NULL;117bool changed = false;118119TR::Compilation * comp = trans->comp();120121// Find actual pTop. Skip an optional node if there is no corresponding target node.122while (trans->getP2TRep(pTop) == NULL)123{124if (!pTop->isOptionalNode()) return changed;125pTop = pTop->getSucc(0);126}127128// Try to find pTop in the predecessors of the loop body129for (t = T->getEntryNode(); t->isOutsideOfLoop();)130{131if (trans->analyzeT2P(t, pTop) & _T2P_MatchMask)132{133TR_CISCNode *chk;134for (chk = t->getSucc(0); chk->isOutsideOfLoop(); chk=chk->getSucc(0))135{136if (!chk->isNegligible() && trans->analyzeT2P(chk) == _T2P_NULL) break; // t is still invalid.137}138if (!chk->isOutsideOfLoop())139{140if (disptrace) traceMsg(comp, "ChangeAlignmentOfRegion : (t:%d p:%d) no need to change alignment\n",t->getID(),pTop->getID());141return changed; // Find pTop! Already aligned correctly142}143}144if (t->getNumSuccs() < 1)145{146if (disptrace) traceMsg(comp,"ChangeAlignmentOfRegion : #succs of tID:%d is 0\n", t->getID());147return changed; // cannot find either a loop body or pTop in the fallthrough path148}149beforeLoop = t;150switch(t->getOpcode())151{152case TR::lookup:153case TR::table:154int i;155for (i = t->getNumSuccs(); --i >= 0; )156{157TR_CISCNode *next_t = t->getSucc(i);158if (next_t->getOpcode() == TR::Case &&159next_t->getSucc(0) != T->getExitNode())160{161t = next_t->getSucc(0);162goto exit_switch;163}164}165// fall through166default:167t = t->getSucc(0);168break;169}170exit_switch:;171}172TR_ASSERT(beforeLoop, "error");173if (t->getOpcode() != TR::BBStart) return changed; // already aligned by this transformation before174t = t->getSucc(0); // Skip BBStart175176int condT2P = trans->analyzeT2P(t, pTop);177if (condT2P & _T2P_MatchMask) return changed; // no need to change alignment178179if (disptrace) traceMsg(comp,"ChangeAlignmentOfRegion : tTop %d, pTop %d\n",t->getID(),pTop->getID());180TR_CISCNodeRegion r(T->getNumNodes(), trans->trMemory()->heapMemoryRegion());181TR_CISCNode *firstNode = t;182TR_CISCNode *lastNode = NULL;183// Find the target node corresponding to pTop184int branchCount = 0;185for (;;)186{187if (condT2P != _T2P_NULL || !t->isNegligible()) lastNode = t;188t = t->getSucc(0);189if (t->getOpcode() == TR::BBEnd || t->getOpcode() == TR_exitnode || t == firstNode) return changed; // current limitation. peeling can be performed within the first BB of the body190if (t->getIlOpCode().isBranch())191if (++branchCount >= 2) return changed; // allow a single branch192condT2P = trans->analyzeT2P(t, pTop);193if (condT2P & _T2P_MatchMask)194break; // the target node corresponding to pTop is found195}196if (!lastNode) return changed; // the last node of the peeling region197TR_CISCNode *foundNode = lastNode->getSucc(0);198199// Find the last non-negligible node200if (lastNode->isNegligible())201{202TR_CISCNode *lastNonNegligble = NULL;203for (t = firstNode; ;t = t->getSucc(0))204{205if (!t->isNegligible()) lastNonNegligble = t;206if (t == lastNode) break;207}208if (!lastNonNegligble) return changed;209lastNode = lastNonNegligble;210}211212// Add nodes from firstNode to lastNode into the region r213if (disptrace) traceMsg(comp, "ChangeAlignmentOfRegion : foundNode %d, lastNode %d\n",foundNode->getID(),lastNode->getID());214if (branchCount > 0 &&215!lastNode->getIlOpCode().isBranch())216{217if (disptrace) traceMsg(comp, "Fail: there is a branch in the region. lastNode must be a branch node.\n");218return changed;219}220for (t = firstNode; ;t = t->getSucc(0))221{222r.append(t);223if (t == lastNode) break;224}225226// analyze that all parents of every node in the region r are included in the region r.227ListIterator<TR_CISCNode> ri(&r);228for (t = ri.getFirst(); t; t = ri.getNext()) // each node in the region r229{230if (t->getOpcode() == TR::aload || t->getOpcode() == TR::iload)231{232bool noDefInR = true;233ListIterator<TR_CISCNode> chain(t->getChains());234TR_CISCNode *def;235for (def = chain.getFirst(); def; def = chain.getNext())236{237if (r.isIncluded(def))238{239noDefInR = false;240break;241}242}243if (noDefInR) continue; // If there is no def in r, it ignores this load node.244}245ListIterator<TR_CISCNode> pi(t->getParents());246TR_CISCNode *pn;247for (pn = pi.getFirst(); pn; pn = pi.getNext()) // each parent of t248{249if (!r.isIncluded(pn))250{251if (disptrace) traceMsg(comp,"ChangeAlignmentOfRegion : There is a parent(%d) of %d in the outside of the region\n", pn->getID(), t->getID());252return changed; // fail253}254}255}256257/////////////////////////////258// From here, success path //259/////////////////////////////260T->duplicateListsDuplicator();261changed = true;262TR_CISCNode *from = r.getListHead()->getData();263TR_CISCNode *to = r.getListTail()->getData();264if (disptrace)265{266traceMsg(comp,"ChangeAlignmentOfRegion: Succ[0] of %d will be changed from %d to %d.\n",267beforeLoop->getID(),268beforeLoop->getSucc(0)->getID(),269foundNode->getID());270traceMsg(comp,"\tNodes from %d to %d will be added to BeforeInsertionList.\n",271from->getID(),to->getID());272}273TR_ASSERT(r.getListTail()->getData()->getIlOpCode().isTreeTop(), "error");274beforeLoop->replaceSucc(0, foundNode); // replace the loop entry with foundNode275TR_NodeDuplicator duplicator(comp);276for (t = ri.getFirst(); t; t = ri.getNext())277{278if (t->getIlOpCode().isTreeTop())279{280TR::Node *rep = t->getHeadOfTrNodeInfo()->_node;281if (disptrace)282{283traceMsg(comp,"add TR::Node 0x%p (tid:%d) to BeforeInsertionList.\n", rep, t->getID());284}285rep = duplicator.duplicateTree(rep);286if (t->getIlOpCode().isIf())287{288if (t->getOpcode() != rep->getOpCodeValue())289{290TR::TreeTop *ret;291for (ret = t->getHeadOfTreeTop()->getNextTreeTop();292ret->getNode()->getOpCodeValue() != TR::BBStart;293ret = ret->getNextTreeTop());294TR::Node::recreate(rep, (TR::ILOpCodes)t->getOpcode());295rep->setBranchDestination(ret);296}297}298trans->getBeforeInsertionList()->append(rep);299}300}301// Move the region ("from" - "to") to the last302trans->moveCISCNodes(from, to, NULL);303304if (disptrace && changed)305{306traceMsg(comp,"After ChangeAlignmentOfRegion\n");307T->dump(comp->getOutFile(), comp);308}309return changed;310}311312313//*****************************************************************************************314// Analyze whether we can move the node n to immediately before the nodes in tgt.315// Both the node n and a node in tgt must be included in the list l.316// If the analysis fails, it will return NULL.317// Otherwise, it will return the target node, which must be included in the list tgt.318//*****************************************************************************************319TR_CISCNode *320analyzeMoveNodeForward(TR_CISCTransformer *trans, List<TR_CISCNode> *l, TR_CISCNode *n, List<TR_CISCNode> *tgt)321{322const bool disptrace = DISPTRACE(trans);323ListIterator<TR_CISCNode> ti(l);324TR_CISCNode *t;325TR_CISCNode *ret = NULL;326327TR::Compilation * comp = trans->comp();328329for (t = ti.getFirst(); t; t = ti.getNext())330{331if (t == n) break;332}333TR_ASSERT(t != NULL, "cannot find the node n in the list l!");334335t = ti.getNext();336TR_ASSERT(t != NULL, "cannot find any node in tgt in the list l!");337if (tgt->find(t)) return NULL; // already moved338339bool go = false;340if (n->isStoreDirect())341{342go = true;343}344else if (n->getNumChildren() == 2)345{346if (n->getIlOpCode().isAdd() ||347n->getIlOpCode().isSub() ||348n->getIlOpCode().isMul() ||349n->getIlOpCode().isLeftShift() ||350n->getIlOpCode().isRightShift() ||351n->getIlOpCode().isShiftLogical() ||352n->getIlOpCode().isAnd() ||353n->getIlOpCode().isOr() ||354n->getIlOpCode().isXor()) // Safe expressions355{356go = true;357if (n->getChild(0)->getOpcode() == TR_variable ||358n->getChild(1)->getOpcode() == TR_variable)359go = false; // not implemented yet.360}361}362else if (n->getNumChildren() == 1)363{364if (n->getIlOpCode().isConversion() ||365n->getIlOpCode().isNeg()) // Safe expressions366{367go = true;368if (n->getChild(0)->getOpcode() == TR_variable)369go = false; // not implemented yet.370}371}372else373{374if (n->getIlOpCode().isLoadConst())375{376go = true;377}378}379380if (go)381{382List<TR_CISCNode> *chains = n->getChains();383List<TR_CISCNode> *parents = n->getParents();384TR_CISCNode *specialCareIf = trans->getP()->getSpecialCareNode(0);385bool generateCompensation0 = false;386while(true)387{388if (chains->find(t)) break; // it cannot be moved beyond its use/def.389if (parents->find(t)) break; // it cannot be moved beyond its parent.390391if (t->getOpcode() == TR::BBStart)392{393TR::Block *block = t->getHeadOfTrNode()->getBlock();394if (block->getPredecessors().size() > 1) return NULL; // It currently analyzes within this BB.395}396if (t->getNumSuccs() >= 2 && specialCareIf)397{398bool fail = true;399TR_CISCNode *p = trans->getT2Phead(t);400if (p &&401p == specialCareIf &&402t->getSucc(1) == trans->getT()->getExitNode())403{404// add compensation code into AfterInsertionIdiomList and go ahead405TR::Node *trNode = n->getHeadOfTrNode();406if (trNode->getOpCode().isTreeTop())407{408if (trNode->getOpCode().isStoreDirect())409{410if (!generateCompensation0)411{412trans->getT()->duplicateListsDuplicator();413if (disptrace) traceMsg(comp,"analyzeMoveNodeForward: append the tree of 0x%p into AfterInsertionIdiomList\n", trNode);414trans->getAfterInsertionIdiomList(0)->append(trNode->duplicateTree());415}416fail = false;417generateCompensation0 = true;418}419// else, fail to move420}421else422{423fail = false;424}425}426if (fail) break; // It currently analyzes within this BB.427}428t = ti.getNext();429if (t == NULL) break; // cannot find any node in tgt in the list l.430ret = t;431if (tgt->find(t)) break; // find goal!432}433}434return ret;435}436437438//*****************************************************************************************439// It tries to reorder target nodes to match idiom nodes within each BB.440//*****************************************************************************************441bool442reorderTargetNodesInBB(TR_CISCTransformer *trans)443{444TR_CISCGraph *P = trans->getP();445TR_CISCGraph *T = trans->getT();446List<TR_CISCNode> *T2P = trans->getT2P(), *P2T = trans->getP2T(), *l;447TR_CISCNode *t, *p;448bool changed = false;449const bool disptrace = DISPTRACE(trans);450451TR::Compilation * comp = trans->comp();452453static int enable = -1;454if (enable < 0)455{456char *p = feGetEnv("DISABLE_REORDER");457enable = p ? 0 : 1;458}459if (!enable) return false;460461TR_BitVector visited(T->getNumNodes(), comp->trMemory());462while(true)463{464ListIterator<TR_CISCNode> ti(T->getNodes());465int currentPID = 0x10000;466bool anyChanged = false;467468for (t = ti.getFirst(); t; t = ti.getNext())469{470int tID = t->getID();471if (visited.isSet(tID)) continue;472visited.set(tID);473l = T2P + tID;474if (l->isEmpty()) // There is no idiom nodes corresponding to the node t475{476if (t->isNegligible())477{478continue; // skip the node t479}480else481{482break; // finish this analysis483}484}485int maxPid = -1;486ListIterator<TR_CISCNode> pi(l);487for (p = pi.getFirst(); p; p = pi.getNext())488{489if (p->getID() > maxPid) maxPid = p->getID();490}491if (maxPid >= 0)492{493if (maxPid <= currentPID)494{495currentPID = maxPid; // no problem496}497else498{499if (t->isOutsideOfLoop()) break; // reordering is currently supported only inside of the loop500501// Try moving the node t forward502List<TR_CISCNode> *nextPlist = P2T+maxPid+1;503if (disptrace)504{505ListIterator<TR_CISCNode> nextTi(nextPlist);506TR_CISCNode *nextT;507traceMsg(comp,"reorderTargetNodesInBB: Try moving the tgt node %d forward until",tID);508for (nextT = nextTi.getFirst(); nextT; nextT = nextTi.getNext())509{510traceMsg(comp," %p(%d)",nextT,nextT->getID());511}512traceMsg(comp,"\n");513}514515// Analyze whether we can move the node t to immediately before the nodes in nextPlist516List<TR_CISCNode> *dagList = T->getDagId2Nodes()+t->getDagID();517TR_CISCNode *tgt = analyzeMoveNodeForward(trans, dagList, t, nextPlist);518if (tgt)519{520T->duplicateListsDuplicator();521// OK, we can move the node t!522if (disptrace) traceMsg(comp,"We can move the node %d to %p(%d)\n",tID,tgt,tgt->getID());523anyChanged = changed = true;524525trans->moveCISCNodes(t, t, tgt, "reorderTargetNodesInBB");526break;527}528}529}530}531if (!anyChanged) break;532}533if (disptrace && changed)534{535traceMsg(comp,"After reorderTargetNodesInBB\n");536T->dump(comp->getOutFile(), comp);537}538return changed;539}540541542//*****************************************************************************************543// It replicates a store instruction outside of the loop.544// It is specialized to those idioms that include TR_booltable545// Input: SpecialCareNode(0) - the TR_booltable in the idiom546// ImportantNode(1) - ificmpge for exiting the loop (optional)547//*****************************************************************************************548bool549moveStoreOutOfLoopForward(TR_CISCTransformer *trans)550{551TR_CISCGraph *P = trans->getP();552List<TR_CISCNode> *P2T = trans->getP2T();553TR_CISCNode *ixload, *aload, *iload;554TR::Compilation *comp = trans->comp();555556TR_CISCNode *boolTable = P->getSpecialCareNode(0); // Note: The opcode isn't always TR_booltable.557TR_CISCNode *p = boolTable->getChild(0); // just before TR_booltable, such as b2i558559TR_BitVector findBV(P->getNumNodes(), trans->trMemory(), stackAlloc);560findBV.set(boolTable->getID());561562TR_CISCNode *optionalCmp = P->getImportantNode(1); // ificmpge563if (optionalCmp && (optionalCmp->getOpcode() == TR::ificmpge || optionalCmp->getOpcode() == TR_ifcmpall))564findBV.set(optionalCmp->getID());565566ListIterator<TR_CISCNode> ti(P2T + p->getID());567TR_CISCNode *t;568TR_CISCNode *storedVariable = NULL;569bool success0 = false;570TR_ScratchList<TR_CISCNode> targetList(comp->trMemory());571for (t = ti.getFirst(); t; t = ti.getNext()) // for each target node corresponding to p572{573// t is a target node corresponding to p (just before TR_booltable)574ListIterator<TR_CISCNode> tParentIter(t->getParents());575TR_CISCNode *tParent;576for (tParent = tParentIter.getFirst(); tParent; tParent = tParentIter.getNext())577{578// checking whether tParent is a store instruction579if (tParent->isStoreDirect() &&580!tParent->isNegligible())581{582// checking whether all variables of stores are same.583if (!storedVariable) storedVariable = tParent->getChild(1);584else if (storedVariable != tParent->getChild(1))585{586if (DISPTRACE(trans)) traceMsg(comp, "moveStoreOutOfLoopForward failed because all variables of stores are not same.\n");587success0 = false;588goto endSpecial0; // FAIL!589}590591// checking whether tParent will reach either boolTable or optionalCmp592if (checkSuccsSet(trans, tParent, &findBV))593{594success0 = true; // success for this t595break;596}597else598{599if (DISPTRACE(trans)) traceMsg(comp, "moveStoreOutOfLoopForward failed because tParent will not reach either boolTable or optionalCmp.\n");600success0 = false;601goto endSpecial0; // FAIL!602}603}604}605if (tParent) targetList.add(tParent); // add a store instruction606}607endSpecial0:608609if (targetList.isEmpty())610{611if (DISPTRACE(trans)) traceMsg(comp, "moveStoreOutOfLoopForward failed because targetList is empty.\n");612success0 = false;613}614// check if descendants of p include an array load615if (!getThreeNodesForArray(p, &ixload, &aload, &iload, true))616{617if (DISPTRACE(trans)) traceMsg(comp, "moveStoreOutOfLoopForward failed because decendents of pid:%d don't include an array load.\n", p->getID());618success0 = false;619}620621if (success0)622{623ixload = trans->getP2TRep(ixload);624aload = trans->getP2TRep(aload);625iload = trans->getP2TRep(iload);626if (DISPTRACE(trans)) traceMsg(comp, "moveStoreOutOfLoopForward: Target nodes ixload=%d, aload=%d, iload=%d\n",627ixload ? ixload->getID() : -1, aload ? aload->getID() : -1, iload ? iload->getID() : -1);628trans->getT()->duplicateListsDuplicator();629if (ixload && aload && iload && (iload->isLoadVarDirect() || iload->getOpcode() == TR_variable))630{631TR::Node *store;632TR::Node *conv;633TR::Node *storeDup0;634TR::Node *storeDup1;635TR::Node *convDup;636TR::Node *ixloadNode = ixload->getHeadOfTrNodeInfo()->_node;637TR::Node *iloadNode = iload->getHeadOfTrNodeInfo()->_node; // index638TR::Node *iloadm1Node = createOP2(comp, TR::isub,639TR::Node::createLoad(iloadNode, iloadNode->getSymbolReference()),640TR::Node::create(iloadNode, TR::iconst, 0, 1));641642// prepare base[index]643TR::Node *arrayLoad0 = createArrayLoad(comp, trans->isGenerateI2L(),644ixloadNode,645aload->getHeadOfTrNodeInfo()->_node,646iloadNode,647ixloadNode->getSize());648649// prepare base[index-1] (it may not be used.)650TR::Node *arrayLoad1 = createArrayLoad(comp, trans->isGenerateI2L(),651ixloadNode,652aload->getHeadOfTrNodeInfo()->_node,653iloadm1Node,654ixloadNode->getSize());655ti.set(&targetList);656t = ti.getFirst();657store = t->getHeadOfTrNodeInfo()->_node;658conv = store->getChild(0);659if (conv->getOpCode().isConversion())660{661convDup = TR::Node::create(conv->getOpCodeValue(), 1, arrayLoad0);662storeDup0 = TR::Node::createStore(store->getSymbolReference(), convDup);663convDup = TR::Node::create(conv->getOpCodeValue(), 1, arrayLoad1);664storeDup1 = TR::Node::createStore(store->getSymbolReference(), convDup);665}666else667{668storeDup0 = TR::Node::createStore(store->getSymbolReference(), arrayLoad0);669storeDup1 = TR::Node::createStore(store->getSymbolReference(), arrayLoad1);670}671trans->getAfterInsertionIdiomList(0)->append(storeDup0); // base[index]672trans->getAfterInsertionIdiomList(1)->append(storeDup1); // base[index-1] (it may not be used.)673if (VERBOSE(trans)) printf("%s moveStoreOutOfLoopForward\n", trans->getT()->getTitle());674if (DISPTRACE(trans)) traceMsg(comp, "moveStoreOutOfLoopForward adds %d into compensation code [0] and [1]\n", t->getID());675for (; t; t = ti.getNext()) t->setIsNegligible(); // set negligible to all stores676}677else678success0 = false;679}680681return success0;682}683684685//*****************************************************************************************686// It analyzes redundant IAND. It is specialized to MEMCPYxxx2Byte, such as MEMCPYChar2Byte.687// Input: SpecialCareNode(*) - a set of conversions, such as i2b688//*****************************************************************************************689bool690IANDSpecialNodeTransformer(TR_CISCTransformer *trans)691{692TR_CISCGraph *P = trans->getP();693List<TR_CISCNode> *P2T = trans->getP2T();694TR::Compilation *comp = trans->comp();695int idx;696bool ret = false;697698for (idx = 0; idx < MAX_SPECIALCARE_NODES; idx++)699{700TR_CISCNode *p = P->getSpecialCareNode(idx);701if (!p) break;702ListIterator<TR_CISCNode> ti(P2T + p->getID());703TR_CISCNode *t;704for (t = ti.getFirst(); t; t = ti.getNext())705{706if (t->getOpcode() != TR::i2b) continue; // not implemented yet for other OPs707TR_CISCNode *ch = t->getChild(0);708if (ch->isNegligible()) continue;709710// example: the following two IANDs are redundant.711// dst = (byte)(((ch & 0xFF00) >> 8) & 0xFF)712// ^^^^^^^^ ^^^^^^713switch(ch->getOpcode())714{715case TR::iand:716if (!ch->getParents()->isSingleton() ||717!testIConst(ch, 1, 0xFF)) return false; // child(1) is "iconst 0xff"718ch->setIsNegligible(); // this IAND can be negligible!719ret = true;720721ch = ch->getChild(0);722if (ch->getOpcode() != TR::ishr && ch->getOpcode() != TR::iushr) break;723// fall through if TR::ishr724case TR::ishr:725case TR::iushr:726if (!testIConst(ch, 1, 0x8)) break; // child(1) is "iconst 0x8"727728ch = ch->getChild(0);729if (ch->getOpcode() != TR::iand) break;730if (!ch->getParents()->isSingleton() ||731!testIConst(ch, 1, 0xFF00)) return false; // child(1) is "iconst 0xFF00"732ch->setIsNegligible(); // this SHR can be negligible!733ret = true;734break;735}736}737}738return ret;739}740741//////////////////////////////////////////////////////////////////////////742// utility routines743744static void745findIndexLoad(TR::Node *aiaddNode, TR::Node *&index1, TR::Node *&index2, TR::Node *&topLevelIndex)746{747// iiload748// aiadd <-- aiaddNode749// aload750// isub751// imul752// iload <-- looking for the index753// iconst 4754// iconst -16755//756// -or-757// iiload758// aiadd759// aload760// isub761// iload762// iconst763//764// -or-765// iiload766// aiadd <-- aiaddNode767// aload768// isub769// imul770// iadd771// iload <-- looking for the index772// iload <-- looking for the index773// iconst 4774// iconst -16775//776// -or-777// iiload778// aiadd779// aload780// isub781// iadd782// iload <-- looking for the index783// iload <-- looking for the index784// iconst785//786index1 = NULL;787index2 = NULL;788topLevelIndex = NULL;789TR::Node *addOrSubNode = aiaddNode->getSecondChild();790if (addOrSubNode->getOpCode().isAdd() || addOrSubNode->getOpCode().isSub())791{792TR::Node *grandChild = NULL;793if (addOrSubNode->getFirstChild()->getOpCode().isMul())794grandChild = addOrSubNode->getFirstChild()->getFirstChild();795else796grandChild = addOrSubNode->getFirstChild();797798if (grandChild->getOpCodeValue() == TR::i2l)799grandChild = grandChild->getFirstChild();800801topLevelIndex = grandChild;802803if (grandChild->getOpCode().hasSymbolReference())804{805index1 = grandChild;806}807else if (grandChild->getOpCode().isAdd() || grandChild->getOpCode().isSub())808{809TR::Node *grandGrandChild1 = grandChild->getFirstChild();810TR::Node *grandGrandChild2 = grandChild->getSecondChild();811while(grandGrandChild1->getOpCode().isAdd() || grandGrandChild1->getOpCode().isSub())812{813grandGrandChild2 = grandGrandChild1->getSecondChild();814grandGrandChild1 = grandGrandChild1->getFirstChild();815}816if (grandGrandChild1->getOpCode().hasSymbolReference())817{818index1 = grandGrandChild1;819}820if (grandGrandChild2->getOpCode().hasSymbolReference())821{822index2 = grandGrandChild2;823}824}825}826}827828829// get the iv thats involved in the looptest830//831static bool832usedInLoopTest(TR::Compilation *comp, TR::Node *loopTestNode, TR::SymbolReference *srcSymRef)833{834TR::Node *ivNode = loopTestNode->getFirstChild();835if (ivNode->getOpCode().isAdd() || ivNode->getOpCode().isSub())836ivNode = ivNode->getFirstChild();837838if (ivNode->getOpCode().hasSymbolReference())839{840if (ivNode->getSymbolReference()->getReferenceNumber() == srcSymRef->getReferenceNumber())841return true;842}843else dumpOptDetails(comp, "iv %p in the loop test %p has no symRef?\n", ivNode, loopTestNode);844return false;845}846847static bool848indexContainsArray(TR::Compilation *comp, TR::Node *index, vcount_t visitCount)849{850if (index->getVisitCount() == visitCount)851return false;852853index->setVisitCount(visitCount);854855if (comp->trace(OMR::idiomRecognition))856traceMsg(comp, "analyzing node %p\n", index);857858if (index->getOpCode().hasSymbolReference() &&859index->getSymbolReference()->getSymbol()->isArrayShadowSymbol())860{861if (comp->trace(OMR::idiomRecognition))862traceMsg(comp, "found array node %p\n", index);863return true;864}865866for (int32_t i = 0; i < index->getNumChildren(); i++)867if (indexContainsArray(comp, index->getChild(i), visitCount))868return true;869870return false;871}872873874static bool875indexContainsArrayAccess(TR::Compilation *comp, TR::Node *aXaddNode)876{877if (comp->trace(OMR::idiomRecognition))878traceMsg(comp, "axaddnode %p\n", aXaddNode);879880TR::Node *loadNode1, *loadNode2, *topLevelIndex;881findIndexLoad(aXaddNode, loadNode1, loadNode2, topLevelIndex);882// topLevelIndex now contains the actual expression q in a[q]883// if q contains another array access, then we cannot reduce884// this loop into an arraycopy885// ie. a[b[i]] do not represent linear array accesses886//887if (comp->trace(OMR::idiomRecognition))888traceMsg(comp, "aXaddNode %p topLevelIndex %p\n", aXaddNode, topLevelIndex);889vcount_t visitCount = comp->incOrResetVisitCount();890if (topLevelIndex)891return indexContainsArray(comp, topLevelIndex, visitCount);892return false;893}894895// isIndexVariableInList checks whether the induction (index) variable symbol(s)896// from the given 'node' subtree is found inside 'nodeList'.897//898// Returns true if899// 1. one induction variable symbol is found in the list.900// Returns false if901// 1. no induction variables are found.902// 2. two induction variables found in 'node' tree are both in the list.903// i.e. a[i+j]904// i++;905// j++;906// In this case, the access pattern of the array would skip every907// other element.908static bool909isIndexVariableInList(TR::Node *node, List<TR::Node> *nodeList)910{911TR::Symbol *indexSymbol1 = NULL, *indexSymbol2 = NULL;912TR::Node *loadNode1, *loadNode2, *topLevelIndex;913914findIndexLoad(node->getOpCode().isAdd() ? node : node->getFirstChild(),915loadNode1, loadNode2, topLevelIndex);916if (loadNode1)917indexSymbol1 = loadNode1->getSymbolReference()->getSymbol();918if (loadNode2)919indexSymbol2 = loadNode2->getSymbolReference()->getSymbol();920921bool foundSymbol1 = false, foundSymbol2 = false;922923if (indexSymbol1 || indexSymbol2)924{925// Search the node list for the index symbol(s).926ListIterator<TR::Node> li(nodeList);927TR::Node *store;928for (store = li.getFirst(); store; store = li.getNext())929{930TR::Symbol *storeSymbol = store->getSymbolReference()->getSymbol();931if (indexSymbol1 == storeSymbol)932foundSymbol1 = true;933if (indexSymbol2 && indexSymbol2 == storeSymbol)934foundSymbol2 = true;935}936}937938// Return true only if either one symbol is found, but not both.939return foundSymbol1 ^ foundSymbol2;940}941942943// for the memCmp transformer944//945static bool946indicesAndStoresAreConsistent(TR::Compilation *comp, TR::Node *lhsSrcNode, TR::Node *rhsSrcNode, TR_CISCNode *lhsNode, TR_CISCNode *rhsNode)947{948// lhs and rhs indicate the two arrays involved in the comparison test949//950//951TR_ScratchList<TR::Node> variableList(comp->trMemory());952if (lhsNode)953variableList.add(lhsNode->getHeadOfTrNode());954if (rhsNode && rhsNode != lhsNode)955variableList.add(rhsNode->getHeadOfTrNode());956return (isIndexVariableInList(lhsSrcNode, &variableList) &&957isIndexVariableInList(rhsSrcNode, &variableList));958}959960static TR::Node* getArrayBase(TR::Node *node)961{962if (node->getOpCode().hasSymbolReference() &&963node->getSymbolReference()->getSymbol()->isArrayShadowSymbol())964{965node = node->getFirstChild();966if (node->getOpCode().isArrayRef()) node = node->getFirstChild();967if (node->getOpCode().isIndirect()) node = node->getFirstChild();968return node;969}970return NULL;971}972973static bool974areArraysInvariant(TR::Compilation *comp, TR::Node *inputNode, TR::Node *outputNode, TR_CISCGraph *T)975{976if (T)977{978TR::Node *aNode = getArrayBase(inputNode);979TR::Node *bNode = getArrayBase(outputNode);980981if (comp->trace(OMR::idiomRecognition))982traceMsg(comp, "aNode = %p bNode = %p\n", aNode, bNode);983if (aNode && aNode->getOpCode().isLoadDirect() &&984bNode && bNode->getOpCode().isLoadDirect())985{986TR_CISCNode *aCNode = T->getCISCNode(aNode);987TR_CISCNode *bCNode = T->getCISCNode(bNode);988989if (comp->trace(OMR::idiomRecognition))990traceMsg(comp, "aC = %p %d bC = %p %d\n", aCNode, aCNode->getID(), bCNode, bCNode->getID());991if (aCNode && bCNode)992{993ListIterator<TR_CISCNode> aDefI(aCNode->getChains());994ListIterator<TR_CISCNode> bDefI(bCNode->getChains());995TR_CISCNode *ch;996for (ch = aDefI.getFirst(); ch; ch = aDefI.getNext())997{998if (ch->getDagID() == aCNode->getDagID())999{1000traceMsg(comp, "def %d found inside loop for %d\n", ch->getID(), aCNode->getID());1001return false;1002}1003}1004for (ch = bDefI.getFirst(); ch; ch = bDefI.getNext())1005{1006if (ch->getDagID() == bCNode->getDagID())1007{1008traceMsg(comp, "def %d found inside loop for %d\n", ch->getID(), bCNode->getID());1009return false;1010}1011}1012}1013}1014}1015return true;1016}101710181019// used for a TRTO reduction in java/io/DataOutputStream.writeUTF(String)1020//1021static TR::Node *1022areDefsOnlyInsideLoop(TR::Compilation *comp, TR_CISCTransformer *trans, TR::Node *outputNode)1023{1024bool extraTrace = DISPTRACE(trans);10251026if (extraTrace)1027traceMsg(trans->comp(), "finding defs for index used in tree %p\n", outputNode);10281029TR_UseDefInfo *info = trans->optimizer()->getUseDefInfo();1030if (info)1031{1032TR::Node *loadNode = NULL, *loadNode1, *loadNode2, *topLevelIndex;1033findIndexLoad(outputNode, loadNode1, loadNode2, topLevelIndex);10341035if (loadNode1 && loadNode2) return NULL; // Try to keep the original semantics, but it may be too strict.1036loadNode = loadNode1 ? loadNode1 : loadNode2;10371038if (loadNode)1039{1040uint16_t useDefIndex = loadNode->getUseDefIndex();1041TR_UseDefInfo::BitVector defs(comp->allocator());1042info->getUseDef(defs, useDefIndex);1043if (!defs.IsZero())1044{1045TR_UseDefInfo::BitVector::Cursor cursor(defs);1046int32_t numDefs = 0;1047TR::TreeTop *defTT = NULL;1048for (cursor.SetToFirstOne(); cursor.Valid(); cursor.SetToNextOne())1049{1050int32_t defIndex = cursor;1051if (defIndex < info->getFirstRealDefIndex())1052continue; // method entry is def1053defTT = info->getTreeTop(defIndex);1054numDefs++;1055}1056// if the only def is one inside the loop, then1057// insert the def before the translation node1058//1059if (numDefs == 1)1060{1061TR::Block *defBlock = defTT->getEnclosingBlock();1062if (extraTrace)1063traceMsg(trans->comp(), "found single def %p for load %p\n", defTT->getNode(), loadNode);1064if (trans->isBlockInLoopBody(defBlock))1065return (defTT->getNode()->duplicateTree(trans->comp()));1066}1067}1068}1069}1070return NULL;1071}107210731074static void1075findIndVarLoads(TR::Node *node, TR::Node *indVarStoreNode, bool &storeFound,1076List<TR::Node> *ivLoads, TR::Symbol *ivSym, vcount_t visitCount)1077{1078if (node->getVisitCount() == visitCount)1079return;1080node->setVisitCount(visitCount);10811082if (node == indVarStoreNode)1083storeFound = true;10841085if (node->getOpCodeValue() == TR::iload &&1086node->getSymbolReference()->getSymbol() == ivSym)1087{1088if (!ivLoads->find(node))1089ivLoads->add(node);1090}10911092for (int32_t i = 0; i < node->getNumChildren(); i++)1093findIndVarLoads(node->getChild(i), indVarStoreNode, storeFound, ivLoads, ivSym, visitCount);1094}10951096static int32_t1097checkForPostIncrement(TR::Compilation *comp, TR::Block *loopHeader, TR::Node *loopCmpNode, TR::Symbol *ivSym)1098{1099TR::TreeTop *startTree = loopHeader->getFirstRealTreeTop();1100TR::Node *indVarStoreNode = NULL;1101TR::TreeTop *tt;1102for (tt = startTree; tt != loopHeader->getExit(); tt = tt->getNextTreeTop())1103{1104TR::Node *n = tt->getNode();1105if (n->getOpCode().isStoreDirect() &&1106(n->getSymbolReference()->getSymbol() == ivSym) /*&&1107n->getFirstChild()->getSecondChild()->getOpCode().isLoadConst()*/)1108{1109indVarStoreNode = n;1110break;1111}1112}1113if (!indVarStoreNode)1114return 0;11151116bool storeFound = false;1117vcount_t visitCount = comp->incOrResetVisitCount();1118TR_ScratchList<TR::Node> ivLoads(comp->trMemory());1119for (tt = startTree; !storeFound && tt != loopHeader->getExit(); tt = tt->getNextTreeTop())1120findIndVarLoads(tt->getNode(), indVarStoreNode, storeFound, &ivLoads, ivSym, visitCount);11211122TR::Node *cmpFirstChild = loopCmpNode->getFirstChild();11231124TR::Node *storeIvLoad = indVarStoreNode->getFirstChild();1125if (storeIvLoad->getOpCode().isAdd() || storeIvLoad->getOpCode().isSub())1126storeIvLoad = storeIvLoad->getFirstChild();11271128if(comp->trace(OMR::idiomRecognition))1129traceMsg(comp, "found storeIvload %p cmpFirstChild %p\n", storeIvLoad, cmpFirstChild);1130// simple case1131// the loopCmp uses the un-incremented value1132// of the iv1133//1134if (storeIvLoad == cmpFirstChild)1135return 1;11361137// the loopCmp uses some load of the iv that1138// was commoned1139//1140if (ivLoads.find(cmpFirstChild))1141return 1;11421143// uses a brand new load of the iv1144return 0;1145}11461147static bool1148checkByteToChar(TR::Compilation *comp, TR::Node *iorNode, TR::Node *&inputNode, bool bigEndian)1149{1150// this is the pattern thats being reduced1151//1152// ior1153// imul1154// bu2i1155// ibload #261 Shadow[<array-shadow>]1156// aiadd <flags:"0x8000" (internalPtr )/>1157// aload #523 Auto[<temp slot 10>]1158// isub1159// ==>iload i1160// iconst -171161// iconst 2561162// bu2i1163// ibload #261 Shadow[<array-shadow>]1164// aiadd <flags:"0x8000" (internalPtr )/>1165// ==>aload at #5231166// isub1167// ==>iload i1168// iconst -161169//1170// for little-endian platforms,1171// char = byte[i+1] << 8 | byte[i] (ie. lower index is in the lsb)1172//1173// for big-endian platforms,1174// char = byte[i] << 8 | byte[i+1] (ie. lower index is in the msb)1175//1176// in either case, if the incoming user code is swapped, then the transformation1177// is illegal.1178//1179if (!iorNode) return false;11801181TR::Node *imulNode = iorNode->getFirstChild();1182if ((imulNode->getOpCodeValue() != TR::imul) &&1183(imulNode->getOpCodeValue() != TR::ishl))1184imulNode = iorNode->getSecondChild();11851186if ((imulNode->getOpCodeValue() == TR::imul) ||1187(imulNode->getOpCodeValue() == TR::ishl))1188{1189// find the index to be either i, i+11190// if (le)1191// if index is i+1 then inputNode = other ibload of the ior1192// else fail1193// if (be)1194// if index is i then inputNode = ibload child of imul1195// else fail1196//1197TR::Node *ibloadNode = imulNode->getFirstChild()->skipConversions();1198bool plusOne = false;1199bool matchPattern = false;1200if (ibloadNode->getOpCodeValue() == TR::bloadi)1201{1202TR::Node *subNode = ibloadNode->getFirstChild()->getSecondChild();1203int32_t hdrSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 1;1204if (subNode->getOpCode().isSub() &&1205subNode->getSecondChild()->getOpCode().isLoadConst())1206{1207int32_t constVal;1208if (subNode->getSecondChild()->getOpCodeValue() == TR::lconst)1209constVal = (int32_t) subNode->getSecondChild()->getLongInt();1210else1211constVal = subNode->getSecondChild()->getInt();12121213if (constVal < 0) constVal = -constVal;12141215if (constVal == hdrSize)1216{1217matchPattern = true;1218plusOne = true;1219}1220else if (constVal == hdrSize-1)1221{1222matchPattern = true;1223plusOne = false;1224}12251226if (matchPattern)1227{1228if (bigEndian)1229{1230if (!plusOne)1231{1232inputNode = ibloadNode->getFirstChild();1233return true;1234}1235else1236return false;1237}1238else1239{1240if (plusOne)1241{1242inputNode = iorNode->getSecondChild()->skipConversions();1243if (inputNode->getOpCodeValue() == TR::bloadi)1244{1245inputNode = inputNode->getFirstChild();1246return true;1247}1248else1249return false;1250}1251else1252return false;1253}1254}1255}1256}1257}12581259return false;1260}12611262static bool1263ivIncrementedBeforeBoolTableExit(TR::Compilation *comp, TR::Node *boolTableExit,1264TR::Block *entryBlock,1265TR::SymbolReference *ivSymRef)1266{1267TR::TreeTop *startTree = entryBlock->getFirstRealTreeTop();1268TR::Node *ivStore = NULL;1269bool foundBoolTable = false;1270for (TR::TreeTop *tt = startTree; tt != entryBlock->getExit(); tt = tt->getNextTreeTop())1271{1272TR::Node *n = tt->getNode();1273if (n == boolTableExit)1274{1275foundBoolTable = true;1276break;1277}1278if (n->getOpCode().isStoreDirect() &&1279(n->getSymbolReference()->getSymbol() == ivSymRef->getSymbol()))1280ivStore = n;1281}12821283if (foundBoolTable && ivStore)1284return true;1285return false;1286}12871288128912901291//*****************************************************************************************1292// default graph transformer1293// currently, it has:1294// (1) partial peeling of the loop body1295//*****************************************************************************************1296bool1297defaultSpecialNodeTransformer(TR_CISCTransformer *trans)1298{1299bool success = ChangeAlignmentOfRegion(trans);1300success |= reorderTargetNodesInBB(trans);1301return success;1302}130313041305//*****************************************************************************************1306// graph transformer for MEMCPY1307// default + IANDSpecialNodeTransformer1308//*****************************************************************************************1309bool1310MEMCPYSpecialNodeTransformer(TR_CISCTransformer *trans)1311{1312bool success = defaultSpecialNodeTransformer(trans);1313success |= IANDSpecialNodeTransformer(trans);1314return success;1315}131613171318//*****************************************************************************************1319// graph transformer for TRT1320// default + moveStoreOutOfLoopForward1321//*****************************************************************************************1322bool1323TRTSpecialNodeTransformer(TR_CISCTransformer *trans)1324{1325bool success = moveStoreOutOfLoopForward(trans);1326success |= defaultSpecialNodeTransformer(trans);1327return success;1328}132913301331//*****************************************************************************************1332// IL code generation for exploiting the TRT (or SRST) instruction1333// Input: ImportantNode(0) - booltable1334// ImportantNode(1) - ificmpge1335// ImportantNode(2) - NULLCHK1336// ImportantNode(3) - array load1337//*****************************************************************************************1338// Possible parameters of TR::arraytranslateAndTest1339// retIndex = findbytes(uint8_t *arrayBase, int arrayIndex, uint8_t *table, int arrayLen)1340// retIndex = findbytes(uint8_t *arrayBase, int arrayIndex, uint8_t *table, int arrayLen, int endLen)1341// retIndex = findbytes(uint8_t *arrayBase, int arrayIndex, int findByte, int arrayLen)1342// retIndex = findbytes(uint8_t *arrayBase, int arrayIndex, int findByte, int arrayLen, int endLen)13431344// If the flag charArrayTRT is set, the type of the array is "char".13451346bool1347CISCTransform2FindBytes(TR_CISCTransformer *trans)1348{1349TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");1350// the arraytranslateAndTest opcode is overloaded1351// with a flag1352const bool disptrace = DISPTRACE(trans);1353TR::Node *trNode;1354TR::TreeTop *trTreeTop;1355TR::Block *block;1356TR_CISCGraph *P = trans->getP();1357TR_CISCGraph *T = trans->getT();1358List<TR_CISCNode> *P2T = trans->getP2T();1359TR::Compilation * comp = trans->comp();1360bool isTRT2Char = false;1361TR::CFG *cfg = comp->getFlowGraph();13621363TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");13641365// find the first node of the region _candidateRegion1366trans->findFirstNode(&trTreeTop, &trNode, &block);1367if (!block) return false; // cannot find13681369if (isLoopPreheaderLastBlockInMethod(comp, block))1370{1371traceMsg(comp, "Bailing CISCTransform2FindBytes due to null TT - might be a preheader in last block of method\n");1372return false;1373}13741375List<TR_CISCNode> *listT = P2T + P->getImportantNode(1)->getID(); // ificmpge1376TR_CISCNode *exitIfRep = trans->getP2TRepInLoop(P->getImportantNode(1));1377int32_t modLength = 0;1378if (exitIfRep)1379{1380if (exitIfRep != trans->getP2TInLoopIfSingle(P->getImportantNode(1)))1381{1382if (disptrace) traceMsg(comp, "Give up because of multiple candidates of ificmpge.\n");1383return false;1384}1385bool isDecrement;1386if (!testExitIF(exitIfRep->getOpcode(), &isDecrement, &modLength)) return false;1387if (isDecrement) return false;1388}13891390TR::Block *target = trans->analyzeSuccessorBlock();1391if (!target) // multiple successors1392{1393// current restrictions. allow only the case where there is an ificmpge node and successor is 2.1394if (listT->isEmpty() ||1395trans->getNumOfBBlistSucc() != 2)1396{1397if (disptrace) traceMsg(comp, "Currently, CISCTransform2FindBytes allows only the case where there is an ificmpge node and successor is 2.\n");1398return false;1399}1400}14011402// Check if there is idiom specific node insertion.1403// Currently, it is inserted by moveStoreOutOfLoopForward() or reorderTargetNodesInBB()1404bool isCompensateCode = !trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1);14051406// There is an ificmpge node and (multiple successors or need to generate idiom specific node insertion)1407bool isNeedGenIcmpge = !listT->isEmpty() && (!target || isCompensateCode);14081409TR::Node *baseRepNode, *indexRepNode, *ahConstNode = NULL;1410// get each target node corresponding to p0 and p11411getP2TTrRepNodes(trans, &baseRepNode, &indexRepNode);1412// get the node corresponding to1413// aiadd1414// aload1415// isub <---1416// index1417// headerConst1418//1419TR_CISCNode *ahConstCISCNode = trans->getP2TRepInLoop(P->getImportantNode(3)->getChild(0)->getChild(1));14201421if (ahConstCISCNode)1422{1423ahConstNode = ahConstCISCNode->getHeadOfTrNodeInfo()->_node;1424if (ahConstNode->getOpCode().isAdd() || ahConstNode->getOpCode().isSub())1425ahConstNode = ahConstNode->getSecondChild();1426else1427ahConstNode = NULL;1428}1429TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();14301431// Prepare the function table1432TR::Node *tableNode;1433uint8_t *tmpTable = (uint8_t *)comp->trMemory()->allocateStackMemory(65536 * sizeof(uint8_t));1434int32_t count;1435TR::TreeTop *retSameExit = NULL;1436TR_CISCNode *pBoolTable = P->getImportantNode(0);1437TR_CISCNode *tBoolTable = NULL;14381439TR_ASSERT(trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isByte() ||1440trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isShort() && trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isUnsigned(), "Error");1441isTRT2Char = trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isShort() && trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isUnsigned();14421443if (!isTRT2Char)1444{1445if ((count = trans->analyzeByteBoolTable(pBoolTable, tmpTable, P->getImportantNode(1), &retSameExit)) <= 0)1446{1447bool go = false;1448if ((tBoolTable = trans->getP2TInLoopIfSingle(pBoolTable)) != 0 &&1449(tBoolTable->getOpcode() == TR::ificmpeq))1450{1451retSameExit = tBoolTable->getDestination();1452go = true;1453}1454if (!go)1455{1456if (disptrace) traceMsg(comp, "analyzeByteBoolTable failed.\n");1457return false; // fail to analyze1458}1459}1460}1461else1462{1463bool supportsSRSTU = comp->cg()->getSupportsSearchCharString();1464if ((count = trans->analyzeCharBoolTable(pBoolTable, tmpTable, P->getImportantNode(1), &retSameExit)) <= 0)1465{1466// Case where we have a single, non-constant delimiter. With SRSTU, we can handle this situation.1467if (supportsSRSTU && // Confirm that the processor has the SRSTU instruction.1468(tBoolTable = trans->getP2TInLoopIfSingle(pBoolTable)) != NULL &&1469(tBoolTable->getOpcode() == TR::ificmpeq))1470{1471retSameExit = tBoolTable->getDestination();1472}1473else1474{1475if (disptrace) traceMsg(comp, "analyzeCharBoolTable failed.\n");1476return false; // fail to analyze1477}1478}1479else1480{1481if (!supportsSRSTU || // If we don't have SRSTU support, we can use SRST/TRT for single byte searches if delimiters are within 1-255.1482count != 1) // If we only have 1 constant delimiter and have SRSTU support, we can search for any 2-byte delimiter.1483{1484if (disptrace && count > 1)1485traceMsg(comp, "Multiple exit conditions for a char array. We can implement this case using the TRTE instruction on z6.\n");14861487if (tmpTable[0])1488{1489traceMsg(comp, "Char array has '0' as an exit condition, loop will not be reduced TRT/SRST (single-byte) instruction.\n");1490return false; // if zero is a delimiter, give up.1491}1492for (int32_t i = 256; i < 65536; i++)1493{1494if (tmpTable[i])1495{1496traceMsg(comp, "Char array has one of 256 through 65535 (%d) as an exit condition, loop cannot be reduced to TRT/SRST (single-byte) instruction.\n", i);1497return false; // if any value between 256 and 65535 is a delimiter, give up.1498}1499}1500}1501}1502}15031504if (count != 0 && !retSameExit) // there is a booltable check and all destinations of booltable are not same1505{1506traceMsg(comp, "Multiple targets for different delimiter checks detected. Abandoning reduction.\n");1507return false;1508}15091510// Check to ensure that the delimiter checks 'break' to the target successor blocks if single successor.1511if (retSameExit != NULL && !isNeedGenIcmpge && retSameExit->getEnclosingBlock() != target)1512{1513traceMsg(comp, "Target for delimiter check (Treetop: %p / Block %d: %p) is different than loop exit block_%d: %p. Abandoning reduction.\n",1514retSameExit, retSameExit->getEnclosingBlock()->getNumber(), retSameExit->getEnclosingBlock(),1515target->getNumber(), target);1516return false;1517}15181519// FIXME: this test is needed because in the TRT2Byte1520// and TRT2 idioms, the aHeader const is not the 4th node1521//1522bool indexRequiresAdjustment = false;1523int32_t ahValue = 0;1524if (ahConstNode && ahConstNode->getOpCode().isLoadConst())1525{1526ahValue = (ahConstNode->getType().isInt64() ?1527(int32_t)ahConstNode->getLongInt() : ahConstNode->getInt());1528if (ahValue < 0)1529ahValue = -ahValue;15301531if (ahValue != TR::Compiler->om.contiguousArrayHeaderSizeInBytes())1532indexRequiresAdjustment = true;1533}1534// We currently don't distinguish between case when starting index is in form of index = index + offset1535// aiadd1536// aload1537// lsub <--- ahConstCISCNode->getHeadOfTrNode()1538// lmul <--- indexLoadNode1539// iload1540// lconst 21541// lconst -10 <--- headerConst1542//1543// vs index' = index; index++; (See: PR: 82148)1544//1545// istore <--- index++;1546// isub1547// iload1548// iconst -11549//..1550// aiadd1551// aload1552// lsub <--- ahConstCISCNode->getHeadOfTrNode()1553// lmul <--- indexLoadNode1554// ==>iload <--- index'1555// lconst 21556// lconst -10 <--- headerConst1557//1558// for now disable cases when ahConstNode doesn't equal contiguousArrayHeaderSizeInBytes1559if (indexRequiresAdjustment)1560{1561traceMsg(comp, "headerConst node value doesn't equal contiguous array header size %p. Abandoning reduction.\n", ahConstNode);1562return false;1563}15641565if (avoidTransformingStringLoops(comp))1566{1567traceMsg(comp, "Abandoning reduction because of functional problems when String compression is enabled in Java 8 SR5\n");1568return false;1569}15701571if (count == -1) // single delimiter which is not constant value1572{1573TR_CISCNode *tableCISCNode = tBoolTable->getChild(1);1574tableNode = createLoad(tableCISCNode->getHeadOfTrNodeInfo()->_node);1575if (disptrace) traceMsg(comp, "Single non-constant delimiter found. Setting %p as tableNode.\n", comp->getDebug()->getName(tableCISCNode->getHeadOfTrNodeInfo()->_node));1576}1577else if (count == 1) // single delimiter1578{1579tableNode = NULL;1580int32_t i = 0;1581for (i = 0; i < 65536; i++)1582{1583if (tmpTable[i])1584{1585tableNode = TR::Node::create( baseRepNode, TR::iconst, 0, i); // prepare for SRST / SRSTU1586break;1587}1588}1589TR_ASSERT(tableNode, "error!!!");1590if (disptrace) traceMsg(comp, "Single delimiter found. Setting 'iconst %d' [%p] as tableNode.\n", i, comp->getDebug()->getName(tableNode));1591}1592else1593{1594// the static table currently cannot be relocated1595if (comp->compileRelocatableCode())1596{1597if (disptrace) traceMsg(comp, "Abandoning reduction since we can't relocate the static table\n");1598return false;1599}1600tableNode = createTableLoad(comp, baseRepNode, 8, 8, tmpTable, disptrace); // function table for TRT1601}16021603// prepare the TR::arraytranslateAndTest node1604TR::Node *findBytesNode = TR::Node::create(trNode, TR::arraytranslateAndTest, 5);1605findBytesNode->setArrayTRT(true);1606TR::Node *baseNode = createLoad(baseRepNode);16071608TR::Node *indexNode = TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef);1609if (indexRequiresAdjustment)1610{1611// if refCount > 1, then this means that an old1612// value of the iv is being used in the array index1613//1614if (ahConstCISCNode)1615{1616// aiadd1617// aload1618// isub <--- ahConstCISCNode->getHeadOfTrNode()1619// index <--- indexLoadNode1620// headerConst1621//1622TR::Node *indexParentNode=0;1623int32_t childNum=0;1624if (trans->searchNodeInTrees(ahConstCISCNode->getHeadOfTrNode(), indexNode, &indexParentNode, &childNum))1625{1626TR::Node *indexLoadNode = indexParentNode->getChild(childNum);1627if (indexLoadNode->getOpCode().isLoadVar() &&1628indexLoadNode->getReferenceCount() > 1)1629indexNode = indexLoadNode;1630}1631}16321633int32_t width = isTRT2Char ? 2 : 1;1634indexNode = TR::Node::create(TR::isub, 2,1635indexNode,1636TR::Node::create(indexRepNode, TR::iconst, 0,1637((TR::Compiler->om.contiguousArrayHeaderSizeInBytes() - ahValue)/width))1638);1639}16401641TR::Node *alenNode = TR::Node::create( baseRepNode, TR::arraylength, 1);1642alenNode->setAndIncChild(0, baseNode);1643////findBytesNode->setSymbolReference(comp->getSymRefTab()->findOrCreateFindBytesSymbol());1644findBytesNode->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayTranslateAndTestSymbol());1645findBytesNode->setAndIncChild(0, baseNode);1646findBytesNode->setAndIncChild(1, createI2LIfNecessary(comp, trans->isGenerateI2L(), indexNode));1647findBytesNode->setAndIncChild(2, tableNode);1648findBytesNode->setAndIncChild(3, createI2LIfNecessary(comp, trans->isGenerateI2L(), alenNode));1649////findBytesNode->setElementChar(isTRT2Byte);1650findBytesNode->setCharArrayTRT(isTRT2Char);16511652TR_CISCNode *icmpgeCISCnode = NULL;1653TrNodeInfo *icmpgeRepInfo = NULL;1654TR::Node *lenRepNode = NULL;16551656// There is no ificmpge node.1657if (listT->isEmpty())1658{1659findBytesNode->setNumChildren(4); // we don't need to prepare the fifth parameter "endLen"1660}1661else1662{1663if (disptrace) traceMsg(comp,"Loop has TR::ificmpge for comparing the index.\n");1664TR_CISCNode *lenNode;1665if (listT->isSingleton())1666{1667icmpgeCISCnode = listT->getListHead()->getData();1668lenNode = icmpgeCISCnode->getChild(1);1669}1670else1671{1672ListIterator<TR_CISCNode> li(listT);1673TR_CISCNode *n;1674lenNode = NULL;1675// find icmpge in the candidate region1676for (n = li.getFirst(); n; n = li.getNext())1677{1678if (trans->getCandidateRegion()->isIncluded(n))1679{1680icmpgeCISCnode = n;1681lenNode = n->getChild(1);1682break;1683}1684}1685TR_ASSERT(lenNode != NULL, "error!");1686}1687// set the fifth parameter "endLen"1688icmpgeRepInfo = icmpgeCISCnode->getHeadOfTrNodeInfo();1689lenRepNode = createLoad(lenNode->getHeadOfTrNodeInfo()->_node);1690if (modLength) lenRepNode = createOP2(comp, TR::isub, lenRepNode,1691TR::Node::create( baseRepNode, TR::iconst, 0, -modLength));1692findBytesNode->setAndIncChild(4, createI2LIfNecessary(comp, trans->isGenerateI2L(), lenRepNode));1693}1694TR::Node * top = TR::Node::create(TR::treetop, 1, findBytesNode);1695TR::Node * storeToIndVar = TR::Node::createStore(indexVarSymRef, findBytesNode);16961697// create Nodes if there are multiple exit points.1698TR::Node *icmpgeNode = NULL;1699TR::TreeTop *failDest = NULL;1700TR::TreeTop *okDest = NULL;1701TR::Block *compensateBlock0 = NULL;1702TR::Block *compensateBlock1 = NULL;1703if (isNeedGenIcmpge)1704{1705if (disptrace) traceMsg(comp,"Now assuming that all exits of booltable are identical and the exit of icmpge points different.\n");1706TR_ASSERT(icmpgeRepInfo, "Not implemented yet"); // current restriction1707okDest = retSameExit;1708failDest = icmpgeCISCnode->getDestination();1709// create two empty blocks for inserting compensation code (base[index] and base[index-1]) prepared by moveStoreOutOfLoopForward()1710if (isCompensateCode)1711{1712compensateBlock1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);1713compensateBlock0 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);1714compensateBlock0->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, okDest)));1715compensateBlock1->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest)));1716okDest = compensateBlock0->getEntry();1717failDest = compensateBlock1->getEntry();1718}1719TR_ASSERT(okDest != NULL, "error! okDest == NULL");1720TR_ASSERT(failDest != NULL, "error! failDest == NULL");1721if (disptrace) traceMsg(comp, "Block: okDest=%d failDest=%d\n", okDest->getEnclosingBlock()->getNumber(),1722failDest->getEnclosingBlock()->getNumber());1723TR_ASSERT(okDest != failDest, "error! okDest == failDest");17241725// It actually generates "ificmplt" (NOT ificmpge!) in order to suppress a redundant goto block.1726icmpgeNode = TR::Node::createif(TR::ificmplt,1727TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef),1728lenRepNode,1729okDest);1730}17311732// Check existence of nullchk1733// Insert (nullchk), findbytes, and result store instructions1734listT = P2T + P->getImportantNode(2)->getID();1735TR::TreeTop *last;1736TR::TreeTop *nextTreeTop1 = trTreeTop->getNextTreeTop();1737if (nextTreeTop1 == block->getExit())1738{1739nextTreeTop1 = TR::TreeTop::create(comp); // need to create1740}1741if (listT->isEmpty()) // no NULLCHK1742{1743last = trans->removeAllNodes(trTreeTop, block->getExit());1744last->join(block->getExit());1745block = trans->insertBeforeNodes(block);1746last = block->getLastRealTreeTop();1747last->join(trTreeTop);1748trTreeTop->setNode(top);1749trTreeTop->join(nextTreeTop1);1750nextTreeTop1->setNode(storeToIndVar);1751nextTreeTop1->join(block->getExit());1752}1753else1754{1755if (disptrace) traceMsg(comp,"NULLCHK is found!\n");1756// a NULLCHK was found, so just create a NULLCHK on1757// the arraybase1758// NULLCHK1759// PassThrough1760// baseNode1761//1762///TR_CISCNode *nullNode = listT->getListHead()->getData();1763///TR::Node *nullRepNode = nullNode->getHeadOfTrNodeInfo()->_node;1764TR::Node *dupNullRepNode = baseNode->duplicateTree();1765dupNullRepNode = TR::Node::create(TR::PassThrough, 1, dupNullRepNode);1766dupNullRepNode = TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, dupNullRepNode, comp->getSymRefTab()->findOrCreateNullCheckSymbolRef(comp->getMethodSymbol()));1767TR::TreeTop *nextTreeTop2 = TR::TreeTop::create(comp);1768last = trans->removeAllNodes(trTreeTop, block->getExit());1769last->join(block->getExit());1770block = trans->insertBeforeNodes(block);1771last = block->getLastRealTreeTop();1772last->join(trTreeTop);1773trTreeTop->setNode(dupNullRepNode);1774trTreeTop->join(nextTreeTop1);1775nextTreeTop1->setNode(top);1776nextTreeTop1->join(nextTreeTop2);1777nextTreeTop2->setNode(storeToIndVar);1778nextTreeTop2->join(block->getExit());1779}17801781// insert compensation code generated by non-idiom-specific transformation1782block = trans->insertAfterNodes(block);17831784if (isNeedGenIcmpge)1785{1786block->append(TR::TreeTop::create(comp, icmpgeNode));1787if (isCompensateCode)1788{1789cfg->setStructure(NULL);1790TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();1791TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();1792compensateBlock0 = trans->insertAfterNodesIdiom(compensateBlock0, 0, true); // ch = base[index]1793compensateBlock1 = trans->insertAfterNodesIdiom(compensateBlock1, 1, true); // ch = base[index-1]1794cfg->insertBefore(compensateBlock0, orgNextBlock);1795cfg->insertBefore(compensateBlock1, compensateBlock0);1796cfg->join(block, compensateBlock1);1797}1798}1799else if (isCompensateCode)1800{1801block = trans->insertAfterNodesIdiom(block, 0); // ch = base[index]1802}18031804// set successor edge(s) to the original block1805if (!isNeedGenIcmpge)1806{1807trans->setSuccessorEdge(block, target);1808}1809else1810{1811trans->setSuccessorEdges(block,1812failDest->getEnclosingBlock(),1813okDest->getEnclosingBlock());1814}18151816return true;1817}181818191820/*************************************************************************************1821Corresponding Java-like pseudocode1822int i, end;1823byte byteArray[ ];1824while(true){1825if (booltable(byteArray[i])) break;1826i++;1827if (i >= end) break; // optional1828}18291830Note 1: The wildcard node "booltable" matches if-statements or switch-case statements1831whose operands consist of the argument of booltable and any constants.1832Note 2: "optional" can be excluded in an input program.1833*************************************************************************************/1834TR_PCISCGraph *1835makeTRTGraph(TR::Compilation *c, int32_t ctrl)1836{1837TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TRT", 0, 16);1838/************************************ opc id dagId #cfg #child other/pred/children */1839TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(byteArray); // array base1840TR_PCISCNode *iv = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(iv); // array index1841TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 6, 0, 0); tgt->addNode(end); // length (optional)1842TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(aHeader); // array header1843TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, -1); tgt->addNode(increment);1844TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 3, 0, 0); tgt->addNode(mulFactor); // Multiply Factor1845TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);1846TR_PCISCNode *nullChk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::NULLCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 1, entry, byteArray);1847tgt->addNode(nullChk); // optional1848TR_PCISCNode *arrayLen = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::arraylength, TR::NoType, tgt->incNumNodes(), 1, 1, 1, nullChk, byteArray); tgt->addNode(arrayLen);1849TR_PCISCNode *bndChk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, arrayLen, arrayLen, iv); tgt->addNode(bndChk);1850TR_PCISCNode *arrayLoad = createIdiomArrayLoadInLoop(tgt, ctrl, 1, bndChk, TR_ibcload, TR::NoType, byteArray, iv, aHeader, mulFactor);1851TR_PCISCNode *b2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, arrayLoad, arrayLoad); tgt->addNode(b2iNode);1852TR_PCISCNode *boolTable = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, b2iNode, b2iNode); tgt->addNode(boolTable);1853TR_PCISCNode *ivStore = createIdiomDecVarInLoop(tgt, ctrl, 1, boolTable, iv, increment);1854TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, ivStore, iv, end);1855tgt->addNode(loopTest); // optional1856TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);18571858boolTable->setSucc(1, exit);1859loopTest->setSuccs(entry->getSucc(0), exit);18601861end->setIsOptionalNode();1862loopTest->setIsOptionalNode();1863nullChk->setIsOptionalNode();18641865b2iNode->setIsChildDirectlyConnected();1866loopTest->setIsChildDirectlyConnected();18671868tgt->setSpecialCareNode(0, boolTable); // TR_booltable1869tgt->setEntryNode(entry);1870tgt->setExitNode(exit);1871tgt->setImportantNodes(boolTable, loopTest, nullChk, arrayLoad);1872tgt->setNumDagIds(9);1873tgt->createInternalData(1);18741875tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);1876tgt->setTransformer(CISCTransform2FindBytes);1877tgt->setInhibitAfterVersioning();1878tgt->setAspects(isub|bndchk, existAccess, 0);1879tgt->setNoAspects(call|bitop1, 0, existAccess);1880tgt->setMinCounts(1, 1, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount1881tgt->setHotness(warm, true);1882return tgt;1883}188418851886TR_PCISCGraph *1887makeTRTGraph2(TR::Compilation *c, int32_t ctrl)1888{1889TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TRT2", 0, 16);1890/******************************************************************* opc id dagId #cfg #child other/pred/children */1891TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(v0); // array base1892TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(v1); // array index1893TR_PCISCNode *corv= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 7, 0, 0); tgt->addNode(corv); // length (optional)1894TR_PCISCNode *alen= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 6, 0, 0); tgt->addNode(alen); // arraylength (optional)1895TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(cmah); // array header1896TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, -1); tgt->addNode(cm1);1897TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 3, 0, 0); tgt->addNode(mulFactor); // Multiply Factor1898TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);1899TR_PCISCNode *nchk= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::NULLCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 1, ent, v0); tgt->addNode(nchk); // optional1900TR_PCISCNode *bck = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nchk, alen, v1); tgt->addNode(bck); // optional1901TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, bck, TR_ibcload, TR::NoType, v0, v1, cmah, mulFactor);1902TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);1903TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, n3, n3); tgt->addNode(n4);1904TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, n4, v1, cm1);1905TR_PCISCNode *n7 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n6, v1, corv); tgt->addNode(n7); // optional1906TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n8);19071908n4->setSucc(1, n8);1909n7->setSuccs(ent->getSucc(0), n8);19101911corv->setIsOptionalNode();1912n7->setIsOptionalNode();1913alen->setIsOptionalNode();1914nchk->setIsOptionalNode();1915bck->setIsOptionalNode();19161917n3->setIsChildDirectlyConnected();1918n7->setIsChildDirectlyConnected();19191920tgt->setSpecialCareNode(0, n4); // TR_booltable1921tgt->setEntryNode(ent);1922tgt->setExitNode(n8);1923tgt->setImportantNodes(n4, n7, nchk, n2);1924tgt->setNumDagIds(10);1925tgt->createInternalData(1);19261927tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);1928tgt->setTransformer(CISCTransform2FindBytes);1929tgt->setInhibitBeforeVersioning();1930tgt->setAspects(isub, existAccess, 0);1931tgt->setNoAspects(call|bitop1, 0, existAccess);1932tgt->setMinCounts(1, 1, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount1933tgt->setHotness(warm, true);1934return tgt;1935}193619371938/****************************************************************************************1939Corresponding Java-like pseudocode1940int i, end;1941char charArray[ ]; // char array1942while(true){1943if (booltable(charArray[i])) break;1944i++;1945if (i >= end) break; // optional1946}19471948Note 1: There is one limitation. Only when the booltable matches if-statements comparing1949to the constants 1 through 255, the transformation will succeed.1950Note 2: Currently, the generated code checks whether the character found by TRT (or SRST)1951is a delimiter.1952Note 3: New instructions that directly support a 2-byte array will improve current1953drawbacks described in Notes 1 and 2.1954****************************************************************************************/1955TR_PCISCGraph *1956makeTRT2ByteGraph(TR::Compilation *c, int32_t ctrl)1957{1958TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TRT2Byte", 0, 16);1959/**************************************************************************** opc id dagId #cfg #child other/pred/children */1960TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(charArray); // array base1961TR_PCISCNode *iv = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 1); tgt->addNode(iv); // array index1962TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 7, 0, 0); tgt->addNode(end); // length (optional)1963TR_PCISCNode *arrayLen = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 6, 0, 0);1964tgt->addNode(arrayLen); // arraylength (optional)1965TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(aHeader); // array header1966TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, -1); tgt->addNode(increment);1967TR_PCISCNode *mulFactor = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 2); // element size1968TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);1969TR_PCISCNode *nullChk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::NULLCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 1, entry, charArray); tgt->addNode(nullChk); // optional1970TR_PCISCNode *bndChk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nullChk, arrayLen, iv);1971tgt->addNode(bndChk); // optional1972TR_PCISCNode *arrayLoad = createIdiomArrayLoadInLoop(tgt, ctrl, 1, bndChk, TR::sloadi, TR::Int16, charArray, iv, aHeader, mulFactor);1973TR_PCISCNode *c2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, arrayLoad, arrayLoad); tgt->addNode(c2iNode);1974TR_PCISCNode *boolTable = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, c2iNode, c2iNode); tgt->addNode(boolTable);1975TR_PCISCNode *ivStore = createIdiomDecVarInLoop(tgt, ctrl, 1, boolTable, iv, increment);1976TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, ivStore, iv, end);1977tgt->addNode(loopTest); // optional1978TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);19791980boolTable->setSucc(1, exit);1981loopTest->setSuccs(entry->getSucc(0), exit);19821983end->setIsOptionalNode();1984loopTest->setIsOptionalNode();1985arrayLen->setIsOptionalNode();1986nullChk->setIsOptionalNode();1987bndChk->setIsOptionalNode();19881989c2iNode->setIsChildDirectlyConnected();1990loopTest->setIsChildDirectlyConnected();19911992tgt->setSpecialCareNode(0, boolTable); // TR_booltable1993tgt->setEntryNode(entry);1994tgt->setExitNode(exit);1995tgt->setImportantNodes(boolTable, loopTest, nullChk, arrayLoad);1996tgt->setNumDagIds(10);1997tgt->createInternalData(1);19981999tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);2000tgt->setTransformer(CISCTransform2FindBytes);2001tgt->setInhibitBeforeVersioning();2002tgt->setAspects(isub|mul, ILTypeProp::Size_2, 0);2003tgt->setNoAspects(call|bitop1, 0, existAccess);2004tgt->setMinCounts(1, 1, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount2005tgt->setHotness(warm, true);2006return tgt;2007}200820092010//////////////////////////////////////////////////////////////////////////2011//////////////////////////////////////////////////////////////////////////2012//////////////////////////////////////////////////////////////////////////20132014//*****************************************************************************************2015// IL code generation for exploiting the TRT (or SRST) instruction2016// This is the case where the function table is prepared by the user program.2017// Input: ImportantNodes(0) - booltable2018// ImportantNodes(1) - ificmpge2019// ImportantNodes(2) - NULLCHK2020//*****************************************************************************************2021bool2022CISCTransform2NestedArrayFindBytes(TR_CISCTransformer *trans)2023{2024TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");2025// arraytranslateAndTest is overloaded with a flag2026//2027const bool disptrace = DISPTRACE(trans);2028TR::Node *trNode;2029TR::TreeTop *trTreeTop;2030TR::Block *block;2031TR_CISCGraph *P = trans->getP();2032List<TR_CISCNode> *P2T = trans->getP2T();2033TR::Compilation *comp = trans->comp();2034int lenForDynamic = trans->isInitializeNegative128By1() ? 128 : 256;20352036TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");20372038TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");2039if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;20402041trans->findFirstNode(&trTreeTop, &trNode, &block);2042if (!block) return false; // cannot find20432044if (isLoopPreheaderLastBlockInMethod(comp, block))2045{2046traceMsg(comp, "Bailing CISCTransform2NestedArrayFindBytes due to null TT - might be a preheader in last block of method\n");2047return false;2048}20492050TR::Block *target = trans->analyzeSuccessorBlock();2051// Currently, it allows only a single successor.2052if (!target) return false;20532054uint8_t tmpTable[256];2055int count;2056if ((count = trans->analyzeByteBoolTable(P->getImportantNode(0), tmpTable, P->getImportantNode(1))) <= 0)2057return false;2058if (disptrace) dump256Bytes(tmpTable, comp);20592060bool isMapDirectlyUsed = isFitTRTFunctionTable(tmpTable);2061bool isGenerateTROO = !isMapDirectlyUsed;20622063// Currently, we support only if the map table can be directly used as the function table.2064// Thus, the following code is tentative.2065//2066if (!isMapDirectlyUsed) return false;2067//20682069if (avoidTransformingStringLoops(comp))2070{2071traceMsg(comp, "Abandoning reduction because of functional problems when String compression is enabled in Java 8 SR5\n");2072return false;2073}20742075TR::Node *baseRepNode, *indexRepNode, *outerBaseRepNode;2076getP2TTrRepNodes(trans, &baseRepNode, &indexRepNode, &outerBaseRepNode);2077TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();2078TR::SymbolReference * outerBaseVarSymRef = outerBaseRepNode->getSymbolReference();20792080uint8_t *tableOuterResult = NULL;2081if (!isMapDirectlyUsed)2082{2083// TODO: To make this work on non-Java environments, the table should be in the code cache, not persistent memory2084tableOuterResult= (uint8_t *)comp->jitPersistentAlloc(256);2085if (trans->isInitializeNegative128By1())2086memset(tableOuterResult+128, 1, 128);2087}20882089// Currently, TROO is never generated here. In this case, it returned with the failure above.2090TR::Node * tableNode;2091TR::Node * topOfTranslateNode = NULL;2092if (isGenerateTROO)2093{2094tableNode = createTableLoad(comp, baseRepNode, 8, 8, tmpTable, disptrace);2095//2096// Prepare TR::arraytranslate2097//2098TR::Node * inputNode = createArrayTopAddressTree(comp, trans->isGenerateI2L(), outerBaseRepNode);2099TR::Node * outputNode = TR::Node::aconst(baseRepNode, (uintptr_t)tableOuterResult);2100TR::Node * termCharNode = TR::Node::create( baseRepNode, TR::iconst, 0, 0xff);2101TR::Node * lengthNode = TR::Node::create( baseRepNode, TR::iconst, 0, lenForDynamic);2102TR::Node * stoppingNode = TR::Node::create( baseRepNode, TR::iconst, 0, 0xffffffff);21032104TR::Node * translateNode = TR::Node::create(trNode, TR::arraytranslate, 6);2105translateNode->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayTranslateSymbol());2106translateNode->setAndIncChild(0, inputNode);2107translateNode->setAndIncChild(1, outputNode);2108translateNode->setAndIncChild(2, tableNode);2109translateNode->setAndIncChild(3, termCharNode);2110translateNode->setAndIncChild(4, lengthNode);2111translateNode->setAndIncChild(5, stoppingNode);21122113translateNode->setSourceIsByteArrayTranslate(true);2114translateNode->setTargetIsByteArrayTranslate(true);2115translateNode->setTermCharNodeIsHint(false);2116translateNode->setSourceCellIsTermChar(false);2117translateNode->setTableBackedByRawStorage(true);2118topOfTranslateNode = TR::Node::create(TR::treetop, 1, translateNode);2119}21202121//2122// Prepare TR::arraytranslateAndTest2123//2124TR::Node *findBytesNode = TR::Node::create(trNode, TR::arraytranslateAndTest, 5);2125findBytesNode->setArrayTRT(true);2126TR::Node *baseNode = createLoad(baseRepNode);2127TR::Node *indexNode = TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef);2128TR::Node *alenNode = TR::Node::create( baseRepNode, TR::arraylength, 1);2129alenNode->setAndIncChild(0, baseNode);2130// Currently, it always uses "isMapDirectlyUsed" version.2131if (isMapDirectlyUsed)2132{2133tableNode = createArrayTopAddressTree(comp, trans->isGenerateI2L(), outerBaseRepNode);2134}2135else2136{2137tableNode = TR::Node::create( baseRepNode, TR::aconst, (uintptr_t)tableOuterResult);2138}2139////findBytesNode->setSymbolReference(comp->getSymRefTab()->findOrCreateFindBytesSymbol());2140findBytesNode->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayTranslateAndTestSymbol());2141findBytesNode->setAndIncChild(0, baseNode);2142findBytesNode->setAndIncChild(1, createI2LIfNecessary(comp, trans->isGenerateI2L(), indexNode));2143findBytesNode->setAndIncChild(2, tableNode);2144findBytesNode->setAndIncChild(3, createI2LIfNecessary(comp, trans->isGenerateI2L(), alenNode));2145findBytesNode->setCharArrayTRT(false);21462147List<TR_CISCNode> *listT = P2T + P->getImportantNode(1)->getID();2148if (listT->isEmpty())2149{2150findBytesNode->setNumChildren(4);2151}2152else2153{2154if (disptrace) traceMsg(comp,"TR::ificmpge for comaring the index is found!\n");2155TR_CISCNode *lenNode;2156TR::Node *lenRepNode;2157if (listT->isSingleton())2158{2159lenNode = listT->getListHead()->getData()->getChild(1);2160}2161else2162{2163ListIterator<TR_CISCNode> li(listT);2164TR_CISCNode *n;2165lenNode = NULL;2166for (n = li.getFirst(); n; n = li.getNext())2167{2168if (trans->getCandidateRegion()->isIncluded(n))2169{2170if (!lenNode)2171{2172lenNode = n->getChild(1);2173}2174}2175}2176TR_ASSERT(lenNode != NULL, "error!");2177}2178lenRepNode = createLoad(lenNode->getHeadOfTrNodeInfo()->_node);2179findBytesNode->setAndIncChild(4, createI2LIfNecessary(comp, trans->isGenerateI2L(), lenRepNode));2180}2181TR::Node * top = TR::Node::create(TR::treetop, 1, findBytesNode);2182TR::Node * storeToIndVar = TR::Node::createStore(indexVarSymRef, findBytesNode);21832184// Check existence of nullchk2185// Insert (nullchk), findbytes, and result store instructions2186listT = P2T + P->getImportantNode(2)->getID();2187TR::TreeTop *last;21882189if (listT->isEmpty()) // no NULLCHK2190{2191TR::TreeTop *nextTreeTop1 = TR::TreeTop::create(comp);2192last = trans->removeAllNodes(trTreeTop, block->getExit());2193last->join(block->getExit());2194block = trans->insertBeforeNodes(block);2195last = block->getLastRealTreeTop();2196last->join(trTreeTop);2197if (topOfTranslateNode)2198{2199TR::TreeTop *nextTreeTop2 = TR::TreeTop::create(comp);2200trTreeTop->setNode(topOfTranslateNode);2201trTreeTop->join(nextTreeTop1);2202nextTreeTop1->setNode(top);2203nextTreeTop1->join(nextTreeTop2);2204nextTreeTop2->setNode(storeToIndVar);2205nextTreeTop2->join(block->getExit());2206}2207else2208{2209trTreeTop->setNode(top);2210trTreeTop->join(nextTreeTop1);2211nextTreeTop1->setNode(storeToIndVar);2212nextTreeTop1->join(block->getExit());2213}2214}2215else2216{2217if (disptrace) traceMsg(comp,"NULLCHK is found!\n");2218TR::TreeTop *nextTreeTop1 = TR::TreeTop::create(comp);2219TR::TreeTop *nextTreeTop2 = TR::TreeTop::create(comp);2220// a NULLCHK was found, so just create a NULLCHK on2221// the arraybase2222// NULLCHK2223// PassThrough2224// baseNode2225//2226///TR_CISCNode *nullNode = listT->getListHead()->getData();2227///TR::Node *nullRepNode = nullNode->getHeadOfTrNodeInfo()->_node;2228TR::Node *dupNullRepNode = baseNode->duplicateTree();2229dupNullRepNode = TR::Node::create(TR::PassThrough, 1, dupNullRepNode);2230dupNullRepNode = TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, dupNullRepNode, comp->getSymRefTab()->findOrCreateNullCheckSymbolRef(comp->getMethodSymbol()));22312232last = trans->removeAllNodes(trTreeTop, block->getExit());2233last->join(block->getExit());2234block = trans->insertBeforeNodes(block);2235last = block->getLastRealTreeTop();2236last->join(trTreeTop);2237trTreeTop->setNode(dupNullRepNode);2238trTreeTop->join(nextTreeTop1);2239if (topOfTranslateNode)2240{2241TR::TreeTop *nextTreeTop3 = TR::TreeTop::create(comp);2242nextTreeTop1->setNode(topOfTranslateNode);2243nextTreeTop1->join(nextTreeTop2);2244nextTreeTop2->setNode(top);2245nextTreeTop2->join(nextTreeTop3);2246nextTreeTop3->setNode(storeToIndVar);2247nextTreeTop3->join(block->getExit());2248}2249else2250{2251nextTreeTop1->setNode(top);2252nextTreeTop1->join(nextTreeTop2);2253nextTreeTop2->setNode(storeToIndVar);2254nextTreeTop2->join(block->getExit());2255}2256}2257block = trans->insertAfterNodes(block); // insert compensation code generated by non-idiom-specific transformation2258block = trans->insertAfterNodesIdiom(block, 0); // ch = base[index]22592260trans->setSuccessorEdge(block, target);2261return true;2262}2263226422652266/****************************************************************************************2267Corresponding Java-like pseudocode2268int i, end;2269byte byteArray[ ], map[ ];2270while(true){2271if (map[byteArray[i] & 0xff] != 0)) break;2272i++;2273if (i >= end) break; // optional2274}2275****************************************************************************************/2276TR_PCISCGraph *2277makeTRT4NestedArrayGraph(TR::Compilation *c, int32_t ctrl)2278{2279TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TRT4NestedArray", 0, 16);2280/**************************************************************************** opc id dagId #cfg #child other/pred/children */2281TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(byteArray); // array base2282TR_PCISCNode *iv = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(iv); // array index2283TR_PCISCNode *mapArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(mapArray); // outer array base2284TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 6, 0, 0); tgt->addNode(end); // length (optional)2285TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(aHeader); // array header2286TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, -1); tgt->addNode(increment);2287TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_allconst, TR::NoType, tgt->incNumNodes(),3, 0, 0); tgt->addNode(mulFactor); // Multiply Factor2288TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);2289TR_PCISCNode *nullChk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::NULLCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 1, entry, byteArray);2290tgt->addNode(nullChk); // optional2291TR_PCISCNode *arrayLen = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::arraylength, TR::NoType, tgt->incNumNodes(),1, 1, 1, nullChk, byteArray); tgt->addNode(arrayLen);2292TR_PCISCNode *bndChk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, arrayLen, arrayLen, iv); tgt->addNode(bndChk);2293TR_PCISCNode *bALoad = createIdiomArrayLoadInLoop(tgt, ctrl, 1, bndChk, TR_ibcload, TR::NoType, byteArray, iv, aHeader, mulFactor);2294TR_PCISCNode *bu2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, bALoad, bALoad); tgt->addNode(bu2iNode);2295TR_PCISCNode *mapAload = createIdiomArrayLoadInLoop(tgt, ctrl, 1, bu2iNode, TR_ibcload, TR::NoType, mapArray, bu2iNode, aHeader, mulFactor);2296TR_PCISCNode *b2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, mapAload, mapAload); tgt->addNode(b2iNode);2297TR_PCISCNode *boolTable = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, b2iNode, b2iNode); tgt->addNode(boolTable);2298TR_PCISCNode *ivStore = createIdiomDecVarInLoop(tgt, ctrl, 1, boolTable, iv, increment);2299TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, ivStore, iv, end);2300tgt->addNode(loopTest); // optional2301TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);23022303boolTable->setSucc(1, exit);2304loopTest->setSuccs(entry->getSucc(0), exit);23052306end->setIsOptionalNode();2307loopTest->setIsOptionalNode();2308nullChk->setIsOptionalNode();2309b2iNode->setIsOptionalNode();23102311bu2iNode->setIsChildDirectlyConnected();2312loopTest->setIsChildDirectlyConnected();23132314tgt->setSpecialCareNode(0, boolTable); // TR_booltable2315tgt->setEntryNode(entry);2316tgt->setExitNode(exit);2317tgt->setImportantNodes(boolTable, loopTest, nullChk);2318tgt->setNumDagIds(10);2319tgt->createInternalData(1);23202321tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);2322tgt->setTransformer(CISCTransform2NestedArrayFindBytes);2323tgt->setInhibitAfterVersioning();2324tgt->setAspects(isub|bndchk, ILTypeProp::Size_1, 0);2325tgt->setNoAspects(call|bitop1, 0, existAccess);2326tgt->setMinCounts(1, 2, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount2327tgt->setHotness(veryHot, true);2328return tgt;2329}233023312332//////////////////////////////////////////////////////////////////////////2333//////////////////////////////////////////////////////////////////////////2334//////////////////////////////////////////////////////////////////////////23352336bool2337CISCTransform2NestedArrayIfFindBytes(TR_CISCTransformer *trans)2338{2339trans->setIsInitializeNegative128By1();2340return CISCTransform2NestedArrayFindBytes(trans);2341}2342234323442345/****************************************************************************************2346Corresponding Java-like Pseudo Program2347int v1, end;2348byte v0[ ], map[ ];2349while(true){2350T = v0[v1];2351if (T < 0 || map[T] != 0)) break;2352v1++;2353if (v1 >= end) break; // optional2354}2355****************************************************************************************/2356TR_PCISCGraph *2357makeTRT4NestedArrayIfGraph(TR::Compilation *c, int32_t ctrl)2358{2359TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TRT4NestedArrayIf", 0, 16);2360/********************************************************************* opc id dagId #cfg #child other/pred/children */2361TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 0); tgt->addNode(v0); // array base2362TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(v1); // array index2363TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 1); tgt->addNode(v2); // outer array base2364TR_PCISCNode *corv= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),7, 0, 0); tgt->addNode(corv); // length (optional)2365TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 0); tgt->addNode(cmah); // array header2366TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, -1); tgt->addNode(cm1);2367TR_PCISCNode *cm0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 0); tgt->addNode(cm0);2368TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_allconst, TR::NoType, tgt->incNumNodes(),3, 0, 0); tgt->addNode(mulFactor); // Multiply Factor2369TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);2370TR_PCISCNode *nchk= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::NULLCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 1, ent , v0); tgt->addNode(nchk); // optional2371TR_PCISCNode *alen= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::arraylength, TR::NoType, tgt->incNumNodes(),1, 1, 1, nchk, v0); tgt->addNode(alen);2372TR_PCISCNode *bck = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, alen, alen, v1); tgt->addNode(bck);2373TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, bck, TR_ibcload, TR::NoType, v0, v1, cmah, mulFactor);2374TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::b2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);2375TR_PCISCNode *nif0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmplt, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n3, n3, cm0); tgt->addNode(nif0);2376TR_PCISCNode *nn2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, nif0, TR_ibcload, TR::NoType, v2, n3, cmah, mulFactor);2377TR_PCISCNode *nn3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::b2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nn2, nn2); tgt->addNode(nn3);2378TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, nn3, nn3); tgt->addNode(n4);2379TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, n4, v1, cm1);2380TR_PCISCNode *n7 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n6, v1, corv); tgt->addNode(n7); // optional2381TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n8);23822383nif0->setSucc(1, n8);2384n4->setSucc(1, n8);2385n7->setSuccs(ent->getSucc(0), n8);23862387corv->setIsOptionalNode();2388n7->setIsOptionalNode();2389nchk->setIsOptionalNode();23902391n3->setIsChildDirectlyConnected();2392n7->setIsChildDirectlyConnected();23932394tgt->setSpecialCareNode(0, n4); // TR_booltable2395tgt->setEntryNode(ent);2396tgt->setExitNode(n8);2397tgt->setImportantNodes(n4, n7, nchk);2398tgt->setNumDagIds(11);2399tgt->createInternalData(1);24002401tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);2402tgt->setTransformer(CISCTransform2NestedArrayIfFindBytes);2403tgt->setInhibitAfterVersioning();2404tgt->setAspects(isub|bndchk, ILTypeProp::Size_1, 0);2405tgt->setNoAspects(call|bitop1, 0, existAccess);2406tgt->setMinCounts(2, 2, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount2407tgt->setHotness(veryHot, true);2408return tgt;2409}241024112412//////////////////////////////////////////////////////////////////////////2413//////////////////////////////////////////////////////////////////////////2414//////////////////////////////////////////////////////////////////////////24152416//*****************************************************************************************2417// IL code generation for exploiting the TROT or TROO instruction2418// This is the case where the compiler will create the function table by analyzing booltable.2419// Input: ImportantNode(0) - booltable2420// ImportantNode(1) - ificmpge2421// ImportantNode(2) - load of the source array2422// ImportantNode(3) - store of the destination array2423// ImportantNode(4) - optional node for optimizing java/lang/String.<init>([BIII)V2424// We will version the loop by "if (high == 0)".2425//*****************************************************************************************2426static TR_YesNoMaybe isSignExtendingCopyingTROx(TR_CISCTransformer *trans);24272428#define TERMCHAR (0xF0FF) // not the sign- or zero-extension of any byte2429bool2430CISCTransform2CopyingTROx(TR_CISCTransformer *trans)2431{2432const bool disptrace = DISPTRACE(trans);2433TR::Node *trNode;2434TR::TreeTop *trTreeTop;2435TR::Block *block;2436TR_CISCGraph *P = trans->getP();2437List<TR_CISCNode> *P2T = trans->getP2T();2438TR::Compilation *comp = trans->comp();2439bool isOutputChar = trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isShort() && trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isUnsigned();2440const char *title = P->getTitle();2441int32_t pattern = P->getPatternType();24422443bool genTRxx = comp->cg()->getSupportsArrayTranslateTRxx();2444bool genSIMD = comp->cg()->getSupportsVectorRegisters() && !comp->getOption(TR_DisableSIMDArrayTranslate);24452446if (!isOutputChar && genSIMD && !genTRxx){2447traceMsg(comp, "Bailing CISCTransform2CopyingTROx : b2b - no proper evaluator available\n");2448return false;2449}24502451bool isSignExtending = false;2452if (isOutputChar)2453{2454TR_YesNoMaybe sx = isSignExtendingCopyingTROx(trans);2455if (sx == TR_maybe)2456{2457traceMsg(comp,2458"Bailing CISCTransform2CopyingTROx : unknown integer conversion\n");2459return false;2460}2461isSignExtending = sx == TR_yes;2462}24632464TR_CISCNode *additionHigh = NULL;2465if (P->getImportantNode(4))2466additionHigh = trans->getP2TRepInLoop(P->getImportantNode(4));24672468if (additionHigh)2469{2470TR_CISCNode *loadResult = trans->getP2TRepInLoop(P->getImportantNode(0)->getChild(0));2471// Below we need to be able to tell which of the children of iadd is the2472// loaded value, and which is the loop-invariant offset. We do that by2473// requiring that one is obviously the loaded value. If not, give up now.2474if (additionHigh->getChild(0) != loadResult && additionHigh->getChild(1) != loadResult)2475{2476traceMsg(comp,2477"Bailing CISCTransform2CopyingTROx : inscrutable iadd\n");2478return false;2479}2480}24812482/*2483while (*title != '\0')2484{2485if (*title == '(')2486{2487pattern = *(++title) - '0';2488break;2489}2490++title;2491}2492*/2493if (disptrace)2494traceMsg(comp, "Found graph pattern as %d\n", pattern);24952496trans->findFirstNode(&trTreeTop, &trNode, &block);2497if (!block) return false; // cannot find24982499if (isLoopPreheaderLastBlockInMethod(comp, block))2500{2501traceMsg(comp, "Bailing CISCTransform2CopyingTROx due to null TT - might be a preheader in last block of method\n");2502return false;2503}25042505TR_CISCNode * inputCISCNode = trans->getP2TRepInLoop(P->getImportantNode(2)->getChild(0));2506TR_CISCNode * outputCISCNode = trans->getP2TRepInLoop(P->getImportantNode(3)->getChild(0));2507TR::Node * inputNode = inputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();2508TR::Node * outputNode = outputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();25092510TR::Node *baseRepNode, *indexRepNode, *dstBaseRepNode, *dstIndexRepNode, *indexDiffRepNode;2511getP2TTrRepNodes(trans, &baseRepNode, &indexRepNode, &dstBaseRepNode, &dstIndexRepNode, &indexDiffRepNode);2512TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();2513TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode ? dstIndexRepNode->getSymbolReference() : NULL;2514if (trans->countGoodArrayIndex(indexVarSymRef) == 0)2515{2516if (disptrace) traceMsg(comp, "countGoodArrayIndex failed for %p\n",indexRepNode);2517return false;2518}2519if (indexVarSymRef == dstIndexVarSymRef)2520{2521dstIndexRepNode = NULL;2522dstIndexVarSymRef = NULL;2523}2524if (dstIndexVarSymRef)2525{2526if (trans->countGoodArrayIndex(dstIndexVarSymRef) == 0)2527{2528if (disptrace) traceMsg(comp, "countGoodArrayIndex failed for %p\n",dstIndexRepNode);2529return false;2530}2531}2532TR_ScratchList<TR::Node> variableList(comp->trMemory());2533variableList.add(indexRepNode);2534if (dstIndexRepNode) variableList.add(dstIndexRepNode);2535if (!isIndexVariableInList(inputNode, &variableList) ||2536!isIndexVariableInList(outputNode, &variableList))2537{2538dumpOptDetails(comp, "indices used in array loads %p and %p are not consistent with the induction varaible updates\n", inputNode, outputNode);2539return false;2540}2541TR::SymbolReference * indexDiffVarSymRef = (indexDiffRepNode->getOpCode().isLoadVarOrStore() &&2542!indexDiffRepNode->getOpCode().isIndirect()) ?2543indexDiffRepNode->getSymbolReference() : NULL;2544TR::Node *ignoreTree = dstIndexVarSymRef && indexDiffVarSymRef && indexVarSymRef ?2545createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, indexVarSymRef, indexDiffVarSymRef, trNode) : NULL;2546TR::Block *target = trans->analyzeSuccessorBlock(ignoreTree);2547if (!target) // multiple successors2548{2549// current restrictions. allow only the case where the number of successors is 2.2550if (trans->getNumOfBBlistSucc() != 2)2551{2552if (disptrace) traceMsg(comp, "current restrictions. The number of successors is %d\n", trans->getNumOfBBlistSucc());2553return false;2554}2555}25562557// Check if there is idiom specific node insertion.2558// Currently, it is inserted by moveStoreOutOfLoopForward() or reorderTargetNodesInBB()2559bool isCompensateCode = !trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1);25602561// There is an ificmpge node and (multiple successors or need to generate idiom specific node insertion)2562bool isNeedGenIcmpge = (!target || isCompensateCode);25632564// Prepare the function table2565TR::Node *tableNode;2566uint8_t tmpTable[256];25672568TR::TreeTop *retSameExit = NULL;25692570// Number of Bool Table Test characters:2571// -1 -> analyzeByteBoolTable error.2572// 0 -> no bool table tests.2573// >0 -> # of constant test characters.2574int32_t numBoolTableTestChars = trans->analyzeByteBoolTable(P->getImportantNode(0), tmpTable, P->getImportantNode(1), &retSameExit);2575if (numBoolTableTestChars < 0)2576{2577if (disptrace) traceMsg(comp, "analyzeByteBoolTable failed.\n");2578return false;2579}25802581if (numBoolTableTestChars != 0 && !retSameExit) // Destinations of booltable checks are not same2582{2583traceMsg(comp, "Multiple targets for different delimiter checks detected. Abandoning reduction.\n");2584return false;2585}25862587// Check to ensure that the delimiter checks 'break' to the target successor blocks if single successor.2588if (retSameExit != NULL && !isNeedGenIcmpge && retSameExit->getEnclosingBlock() != target)2589{2590traceMsg(comp, "Target for delimiter check (Treetop: %p / Block %d: %p) is different than loop exit block_%d: %p. Abandoning reduction.\n",2591retSameExit, retSameExit->getEnclosingBlock()->getNumber(), retSameExit->getEnclosingBlock(),2592target->getNumber(), target);2593return false;2594}25952596// check if the induction variable needs to be updated by 12597// this depends on whether the induction variable is incremented2598// before the boolTable exit or after (ie. before the loop driving test)2599//2600TR_CISCNode *boolTableExit = P->getImportantNode(0) ? trans->getP2TRepInLoop(P->getImportantNode(0)) : NULL;2601bool ivNeedsUpdate = false;2602bool dstIvNeedsUpdate = false;2603if (0 && boolTableExit)2604{2605TR::Node *boolTableNode = boolTableExit->getHeadOfTrNodeInfo()->_node;2606traceMsg(comp, "boolTableNode : %p of loop %d\n", boolTableNode, block->getNumber());2607ivNeedsUpdate = ivIncrementedBeforeBoolTableExit(comp, boolTableNode, block, indexVarSymRef);2608if (dstIndexVarSymRef)2609dstIvNeedsUpdate = ivIncrementedBeforeBoolTableExit(comp, boolTableNode, block, dstIndexVarSymRef);2610}26112612int termchar;2613int stopchar = -1;2614if (comp->cg()->getSupportsArrayTranslateTROTNoBreak()||comp->cg()->getSupportsArrayTranslateTROT())2615{2616//for b2s on X (ISO, ASCII) and P(ISO)2617bool foundLoopToReduce = false;2618termchar = 0; //value of 0, needed by arraytranslateEvaluator to decide between TROT and TROTNoBreak versions.2619if (!isOutputChar)2620{2621traceMsg(comp, "failed because of reason 1 %\n");2622return false;2623}2624if (comp->cg()->getSupportsArrayTranslateTROTNoBreak())2625{2626foundLoopToReduce = true;2627for (int i = 0; i < 256; i++)2628{2629if (tmpTable[i] != 0)2630foundLoopToReduce = false;2631}26322633if (foundLoopToReduce)2634termchar = TERMCHAR; //It needs to be greater than zero, dummy termination char otherwise, i.e., it's not gonna be used,2635}2636if (!foundLoopToReduce && comp->cg()->getSupportsArrayTranslateTROT()) //try ascii2637{2638foundLoopToReduce = true;2639for (int i = 0; i < 256; i++)2640{2641bool excluded = tmpTable[i] != 0;2642bool nonASCII = i >= 128;2643if (excluded != nonASCII)2644foundLoopToReduce = false;2645}26462647if (foundLoopToReduce)2648termchar = 0; //to distinguish between ISO and ASCII when evaluating the node.2649}2650//2651if (!foundLoopToReduce)2652{2653traceMsg(comp, "failed because of reason 2\n");2654return false;2655}2656tableNode = TR::Node::create(baseRepNode, TR::iconst, 0, 0); //dummy table node, it's not gonna be used26572658if (termchar != 0)2659{2660// This is ISO 8859-1. The decode helpers accept all input bytes, and2661// they zero-extend each byte into a char. While that's the right way2662// to decode ISO 8859-1, it may not be what the loop asks us to do.2663if (isSignExtending)2664{2665traceMsg(comp,2666"Bailing CISCTransform2CopyingTROx due to sign-extension\n");2667return false;2668}2669}2670}2671else2672{2673//SIMD or TRxx2674if (isOutputChar)2675{2676//b2c2677termchar = TERMCHAR;2678uint16_t table[256];26792680bool isSIMDPossible = genSIMD && !isSignExtending;2681if (isSIMDPossible) {2682//SIMD possible only if we have consecutive chars, and no ranges2683for (int i = 0; i < 256; i++) {2684if (tmpTable[i] == 0) {2685if (stopchar != (i-1)) {2686isSIMDPossible = false;2687break;2688}2689stopchar++;2690}2691}26922693//case all are non-valid chars2694if (stopchar == -1 )2695isSIMDPossible = false;2696}26972698if (isSIMDPossible) {2699tableNode = TR::Node::create(baseRepNode, TR::aconst, 0, 0); //dummy table node, it's not gonna be used2700} else if (!genTRxx){2701traceMsg(comp, "Bailing CISCTransform2CopyingTROx: b2c - no proper evaluator available\n");2702return false;2703} else {2704for (int i = 0; i < 256; i++)2705{2706uint8_t excluded = tmpTable[i];2707uint16_t *entry = &table[i];2708if (excluded)2709*entry = TERMCHAR;2710else if (isSignExtending)2711*entry = (int8_t)i; // sign-extends up from 8-bit2712else2713*entry = i;2714}2715tableNode = createTableLoad(comp, baseRepNode, 8, 16, table, disptrace);2716}2717}2718else2719{2720//b2b2721termchar = -1;2722for (int i = 0; i < 256; i++)2723{2724uint8_t u8 = tmpTable[i];2725if (u8)2726{2727if (termchar < 0) termchar = i;2728tmpTable[i] = termchar;2729}2730else2731{2732tmpTable[i] = i;2733}2734}2735if (termchar < 0)2736{2737traceMsg(comp, "No terminating character found. Abandoning reduction.\n");2738return false;2739}2740tableNode = createTableLoad(comp, baseRepNode, 8, 8, tmpTable, disptrace);2741}2742}27432744// find the target node of icmpge2745TR_CISCNode *icmpgeCISCnode = NULL;2746TrNodeInfo *icmpgeRepInfo = NULL;2747TR::Node *lenRepNode = NULL;2748List<TR_CISCNode> *listT = P2T + P->getImportantNode(1)->getID(); // ificmpge2749TR_CISCNode *lenNode;2750if (listT->isSingleton())2751{2752icmpgeCISCnode = listT->getListHead()->getData();2753lenNode = icmpgeCISCnode->getChild(1);2754}2755else2756{2757ListIterator<TR_CISCNode> li(listT);2758TR_CISCNode *n;2759lenNode = NULL;2760// find icmpge in the candidate region2761for (n = li.getFirst(); n; n = li.getNext())2762{2763if (trans->getCandidateRegion()->isIncluded(n))2764{2765if (icmpgeCISCnode != NULL)2766{2767if (disptrace)2768traceMsg(comp, "Bailing CISCTransform2CopyingTROx: multiple loop tests: %d and %d\n", icmpgeCISCnode->getID(), n->getID());2769return false;2770}2771icmpgeCISCnode = n;2772lenNode = n->getChild(1);2773}2774}2775TR_ASSERT(lenNode != NULL, "error!");2776}2777bool isDecrement;2778int32_t modLength;2779if (!testExitIF(icmpgeCISCnode->getOpcode(), &isDecrement, &modLength)) return false;2780if (isDecrement) return false;2781TR_ASSERT(modLength == 0 || modLength == 1, "error");2782icmpgeRepInfo = icmpgeCISCnode->getHeadOfTrNodeInfo();2783lenRepNode = createLoad(lenNode->getHeadOfTrNodeInfo()->_node);27842785// Modify array header constant if necessary2786TR::Node *constLoad;2787if (trans->getOffsetOperand1())2788{2789constLoad = modifyArrayHeaderConst(comp, inputNode, trans->getOffsetOperand1());2790TR_ASSERT(constLoad, "Not implemented yet");2791if (disptrace) traceMsg(comp,"The array header const of inputNode %p is modified. (offset=%d)\n", inputNode, trans->getOffsetOperand1());2792}2793if (trans->getOffsetOperand2())2794{2795int32_t offset = trans->getOffsetOperand2() * (isOutputChar ? 2 : 1);2796constLoad = modifyArrayHeaderConst(comp, outputNode, offset);2797TR_ASSERT(constLoad, "Not implemented yet");2798if (disptrace) traceMsg(comp,"The array header const of outputNode %p is modified. (offset=%d)\n", outputNode, offset);2799}28002801// Prepare the arraytranslate node2802TR::Node * indexNode = TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef);2803TR::Node * lenTmpNode = createOP2(comp, TR::isub, lenRepNode, indexNode);2804if (modLength) lenTmpNode = createOP2(comp, TR::isub, lenTmpNode, TR::Node::create(indexRepNode, TR::iconst, 0, -modLength));2805TR::Node * lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), lenTmpNode);2806TR::Node * termCharNode = TR::Node::create( baseRepNode, TR::iconst, 0, termchar);2807TR::Node * stopCharNode = TR::Node::create( baseRepNode, TR::iconst, 0, stopchar);28082809TR::Node * translateNode = TR::Node::create(trNode, TR::arraytranslate, 6);2810translateNode->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayTranslateSymbol());2811translateNode->setAndIncChild(0, inputNode);2812translateNode->setAndIncChild(1, outputNode);2813translateNode->setAndIncChild(2, tableNode);2814translateNode->setAndIncChild(3, termCharNode);2815translateNode->setAndIncChild(4, lengthNode);2816translateNode->setAndIncChild(5, stopCharNode);28172818translateNode->setSourceIsByteArrayTranslate(true);2819translateNode->setTargetIsByteArrayTranslate(!isOutputChar);2820translateNode->setTermCharNodeIsHint(false);2821translateNode->setSourceCellIsTermChar(false);2822translateNode->setTableBackedByRawStorage(true);2823TR::SymbolReference * translateTemp = comp->getSymRefTab()->2824createTemporary(comp->getMethodSymbol(), TR::Int32);2825TR::Node * topOfTranslateNode = TR::Node::createStore(translateTemp, translateNode);28262827// prepare nodes that add the number of elements (which was translated) into the induction variables28282829TR::Node * addCountNode = createOP2(comp, TR::iadd, indexNode->duplicateTree(), translateNode);2830if (ivNeedsUpdate)2831addCountNode = TR::Node::create(TR::iadd, 2, addCountNode, TR::Node::iconst(indexNode, 1));28322833TR::Node * indVarUpdateNode = TR::Node::createStore(indexVarSymRef, addCountNode);2834TR::TreeTop * indVarUpdateTreeTop = TR::TreeTop::create(comp, indVarUpdateNode);28352836TR::TreeTop * dstIndVarUpdateTreeTop = NULL;2837// update the derived induction variable accordingly as well2838//2839if (dstIndexRepNode)2840{2841// find the store corresponding to the derived induction variable2842//2843TR_CISCNode *loopTest = P->getImportantNode(1);2844ListIterator<TR_CISCNode> ni(P->getNodes());2845TR_CISCNode *jstore = NULL;2846TR::Node *dstIVStore = NULL;2847for (TR_CISCNode *n = ni.getFirst(); n; n = ni.getNext())2848{2849if (n->getNumSuccs() >= 1 &&2850n->getSucc(0) &&2851(n->getSucc(0)->getID() == loopTest->getID()))2852{2853jstore = n;2854break;2855}28562857}2858if (jstore)2859{2860///traceMsg(comp, "found jstore %p to be %d\n", jstore, jstore->getID());2861TR_CISCNode *matchJ = trans->getP2TRepInLoop(jstore);2862if (matchJ)2863{2864///traceMsg(comp, "found matching jstore %p to be %d\n", matchJ, matchJ->getID());2865///traceMsg(comp, "actual store node is %p\n", matchJ->getHeadOfTrNodeInfo()->_node);2866dstIVStore = matchJ->getHeadOfTrNodeInfo()->_node;2867}2868}28692870if (dstIVStore &&2871dstIVStore->getOpCode().hasSymbolReference() &&2872dstIVStore->getSymbolReference() == dstIndexVarSymRef)2873{2874// j = j + 1 (pattern=1)2875// final value j_final = j_start + arraytranslate + needsUpdate ? 1 : 02876// or2877// j = i + offset (pattern=0)2878// final value j_final = i_final + offset (i_final has already been emitted in the previous TT)2879//2880dstIVStore = dstIVStore->duplicateTree();2881TR::Node * dstIndVarUpdateNode = NULL;2882if (pattern == 1)2883{2884TR::Node *dstAddCountNode = createOP2(comp, TR::iadd,2885TR::Node::createLoad(dstIndexRepNode, dstIndexVarSymRef),2886translateNode);2887if (dstIvNeedsUpdate)2888dstAddCountNode = TR::Node::create(TR::iadd, 2,2889dstAddCountNode,2890TR::Node::iconst(dstAddCountNode, 1));289128922893dstIndVarUpdateNode = TR::Node::createStore(dstIndexVarSymRef, dstAddCountNode);2894}2895else if (pattern == 0)2896{2897TR::Node *firstChild = dstIVStore->getFirstChild();2898if (firstChild->getOpCode().isAdd() || firstChild->getOpCode().isSub())2899{2900TR::Node *ivLoad = firstChild->getFirstChild();2901if (!ivLoad->getOpCode().hasSymbolReference() ||2902(ivLoad->getSymbolReference() != indexVarSymRef))2903{2904ivLoad->recursivelyDecReferenceCount();2905firstChild->setAndIncChild(0, TR::Node::createLoad(indexRepNode, indexVarSymRef));2906}2907}2908dstIndVarUpdateNode = dstIVStore;2909}2910if (dstIndVarUpdateNode)2911dstIndVarUpdateTreeTop = TR::TreeTop::create(comp, dstIndVarUpdateNode);2912}2913}29142915// create Nodes if there are multiple exit points.2916TR::Node *icmpgeNode = NULL;2917TR::TreeTop *failDest = NULL;2918TR::TreeTop *okDest = NULL;2919TR::Block *compensateBlock0 = NULL;2920TR::Block *compensateBlock1 = NULL;2921if (isNeedGenIcmpge)2922{2923if (disptrace) traceMsg(comp, "Now assuming that all exits of booltable are identical and the exit of icmpge points different.\n");29242925TR_ASSERT(icmpgeRepInfo, "Not implemented yet"); // current restriction2926okDest = retSameExit;2927failDest = icmpgeCISCnode->getDestination();2928// create two empty blocks for inserting compensation code (base[index] and base[index-1]) prepared by moveStoreOutOfLoopForward()2929if (isCompensateCode)2930{2931compensateBlock1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);2932compensateBlock0 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);2933compensateBlock0->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, okDest)));2934compensateBlock1->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest)));2935okDest = compensateBlock0->getEntry();2936failDest = compensateBlock1->getEntry();2937}2938TR_ASSERT(okDest != NULL && failDest != NULL && okDest != failDest, "error!");29392940// It actually generates "ificmplt" (NOT ificmpge!) in order to suppress a redundant goto block.2941icmpgeNode = TR::Node::createif(TR::ificmplt,2942TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef),2943lenRepNode,2944okDest);2945}29462947// Insert nodes and maintain the CFG2948if (additionHigh)2949{2950TR_CISCNode *highCISCNode;2951TR_CISCNode *loadResult = trans->getP2TRepInLoop(P->getImportantNode(0)->getChild(0));2952// Guaranteed above2953TR_ASSERT(additionHigh->getChild(0) == loadResult || additionHigh->getChild(1) == loadResult, "error!");2954highCISCNode = (additionHigh->getChild(0) == loadResult) ? additionHigh->getChild(1) :2955additionHigh->getChild(0);2956List<TR::Node> guardList(comp->trMemory());2957guardList.add(TR::Node::createif(TR::ificmpne, convertStoreToLoad(comp, highCISCNode->getHeadOfTrNodeInfo()->_node),2958TR::Node::create(lengthNode, TR::iconst, 0, 0)));2959block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lenTmpNode->duplicateTree(), &guardList);2960}2961else2962{2963block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lenTmpNode->duplicateTree());2964}29652966// Create the fast path code2967block = trans->insertBeforeNodes(block);2968block->append(TR::TreeTop::create(comp, topOfTranslateNode));29692970block->append(indVarUpdateTreeTop);2971//block->append(indVarIncTreeTop);2972if (dstIndVarUpdateTreeTop) block->append(dstIndVarUpdateTreeTop);2973block = trans->insertAfterNodes(block);29742975if (isNeedGenIcmpge)2976{2977block->append(TR::TreeTop::create(comp, icmpgeNode));2978if (isCompensateCode)2979{2980TR::CFG *cfg = comp->getFlowGraph();2981cfg->setStructure(NULL);2982TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();2983TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();2984compensateBlock0 = trans->insertAfterNodesIdiom(compensateBlock0, 0, true); // ch = base[index]2985compensateBlock1 = trans->insertAfterNodesIdiom(compensateBlock1, 1, true); // ch = base[index-1]2986cfg->insertBefore(compensateBlock0, orgNextBlock);2987cfg->insertBefore(compensateBlock1, compensateBlock0);2988cfg->join(block, compensateBlock1);2989}2990}2991else if (isCompensateCode)2992{2993block = trans->insertAfterNodesIdiom(block, 0); // ch = base[index]2994}29952996// set successor edge(s) to the original block2997if (!isNeedGenIcmpge)2998{2999trans->setSuccessorEdge(block, target);3000}3001else3002{3003trans->setSuccessorEdges(block,3004failDest->getEnclosingBlock(),3005okDest->getEnclosingBlock());3006}30073008return true;3009}30103011/**3012* Determine whether the 16-bit output values are sign- or zero-extended.3013*3014* Loops transformed by CISCTransform2CopyingTROx(TR_CISCTransformer*) are3015* loops that copy values from an input <tt>byte[]</tt> to an output3016* <tt>byte[]</tt> or <tt>char[]</tt>, When the output goes into a3017* <tt>char[]</tt>, the output values are never identical to the input values,3018* because they are wider. So each is the result of some integer conversion,3019* effectively like this:3020*3021\verbatim3022dest[j] = convert(src[i])3023\endverbatim3024*3025* In order to correctly transform the loop, it's important to know the3026* conversion operation. This function analyzes the loop to determine whether3027* the conversion is known to be a sign-extension (\c TR_yes), known to be a3028* zero-extension (\c TR_no), or neither (\c TR_maybe).3029*3030* Note that a result of \c TR_maybe necessarily prevents the transformation3031* from succeeding. A result of \c TR_no allows the transformation to proceed,3032* since zero-extension was previously the tacit assumption. For contrast, an3033* effort is made to transform sign-extending loops (\c TR_yes), but doing so3034* is not always possible, even in cases where the corresponding zero-extending3035* loop can be transformed.3036*3037* \param[in] trans The optimization pass object.3038* \return \c TR_yes for sign-extension, \c TR_no for zero-extension, or \c3039* TR_maybe for unknown/neither.3040*/3041static TR_YesNoMaybe3042isSignExtendingCopyingTROx(TR_CISCTransformer *trans)3043{3044TR_CISCGraph *P = trans->getP();3045TR::Compilation *comp = trans->comp();30463047TR_CISCNode *patArrStore = P->getImportantNode(3);3048TR_CISCNode *patStoreConv = patArrStore->getChild(1);3049TR_ASSERT(3050patStoreConv->getOpcode() == TR_conversion3051|| patStoreConv->getIlOpCode().isConversion(),3052"isSignExtendingCopyingTROx: pattern store conversion not found\n");30533054TR_CISCNode *patLoadConv = patStoreConv->getChild(0);3055// In CopyingTROx(*), the child is an optional iadd, but not in3056// CopyingTROTInduction1 or CopyingTROOSpecial.3057if (patLoadConv->getOpcode() == TR::iadd)3058patLoadConv = patLoadConv->getChild(0);30593060TR_ASSERT(3061patLoadConv->getOpcode() == TR_conversion3062|| patLoadConv->getIlOpCode().isConversion(),3063"isSignExtendingCopyingTROx: pattern load conversion not found\n");30643065TR_CISCNode *tgtStoreConv = trans->getP2TRepInLoop(patStoreConv);3066TR_CISCNode *tgtLoadConv = trans->getP2TRepInLoop(patLoadConv);3067TR_ASSERT(3068tgtStoreConv != NULL || tgtLoadConv != NULL,3069"isSignExtendingCopyingTROx: converted from byte to char without "3070"any conversions\n");30713072TR::Node *storeConv = NULL;3073if (tgtStoreConv != NULL)3074storeConv = tgtStoreConv->getHeadOfTrNodeInfo()->_node;30753076TR::Node *loadConv = NULL;3077if (tgtLoadConv != NULL)3078loadConv = tgtLoadConv->getHeadOfTrNodeInfo()->_node;30793080if (storeConv == NULL || loadConv == NULL) // only one conversion3081{3082TR::Node *loneConv = loadConv != NULL ? loadConv : storeConv;3083TR::ILOpCode op = loneConv->getOpCode();3084TR_ASSERT(3085op.isZeroExtension() || op.isSignExtension(),3086"isSignExtendingCopyingTROx: lone conversion not an extension\n");3087return op.isSignExtension() ? TR_yes : TR_no;3088}30893090// Two conversions.3091TR::ILOpCode firstOp = loadConv->getOpCode();3092if (!firstOp.isInteger() && !firstOp.isUnsigned())3093{3094traceMsg(comp,3095"isSignExtendingCopyingTROx: conversion through non-integer type\n");3096return TR_maybe;3097}30983099// The first conversion has to be a (zero- or sign-) extension, because Int83100// is the smallest available integer type.3101TR_ASSERT(3102firstOp.isZeroExtension() || firstOp.isSignExtension(),3103"isSignExtendingCopyingTROx: first conversion not an extension\n");31043105// If it produces a 16-bit integer directly, the second would have to be a3106// "conversion" from short to short.3107TR_ASSERT(3108!firstOp.isShort(),3109"isSignExtendingCopyingTROx: first conversion directly to short\n");31103111// So the intermediate type is an integer type longer than 16-bit, and the3112// second conversion has to be a truncation to 16 bits. The net effect is3113// either a zero- or sign-extension depending only on the first conversion.3114return firstOp.isSignExtension() ? TR_yes : TR_no;3115}31163117bool3118CISCTransform2CopyingTROxAddDest1(TR_CISCTransformer *trans)3119{3120trans->setOffsetOperand2(1); // add offset of destination with 13121return CISCTransform2CopyingTROx(trans);3122}31233124/****************************************************************************************3125Corresponding Java-like Pseudo Program3126int v1, v3, end;3127byte v0[ ];3128byte v2[ ];3129while(true){3130if (booltable(v0[v1])) break;3131v2[v3] = v0[v1];3132v1++;3133v3++;3134if (v1 >= end) break;3135}31363137Note 1: It allows that variables v1 and v3 are identical.3138****************************************************************************************/3139TR_PCISCGraph *3140makeCopyingTROOSpecialGraph(TR::Compilation *c, int32_t ctrl)3141{3142TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "CopyingTROOSpecial", 0, 16);3143/********************************************************************** opc id dagId #cfg #child other/pred/children */3144TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v0); // src array base3145TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0); tgt->addNode(v1); // src array index3146TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 1); tgt->addNode(v2); // dst array base3147TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v3); // dst array index3148TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(idx0);3149TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 1); tgt->addNode(idx1);3150TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),7, 0, 0); tgt->addNode(vorc); // length3151TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 0); tgt->addNode(cmah); // array header3152TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, -1); tgt->addNode(cm1);3153TR_PCISCNode *lc1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 4, 1); // element size for input3154TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_allconst, TR::NoType, tgt->incNumNodes(),3, 0, 0); tgt->addNode(mulFactor); // Multiply Factor3155TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);3156TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, ent, TR::bloadi, TR::Int8, v0, idx0, cmah, lc1);3157TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);3158TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, n3, n3); tgt->addNode(n4); // optional3159TR_PCISCNode *n5 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, n4, TR::bloadi, TR::Int8, v0, idx0, cmah, mulFactor);3160TR_PCISCNode *nn0 = createIdiomArrayStoreInLoop(tgt, ctrl|CISCUtilCtl_NoConversion, 1, n5, TR::bstorei, TR::Int8, v2, idx1, cmah, mulFactor, n5);3161TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, nn0, v1, cm1);3162TR_PCISCNode *nn1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n6, v3, cm1); tgt->addNode(nn1);3163TR_PCISCNode *nn2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2s, TR::Int16, tgt->incNumNodes(), 1, 1, 1, nn1, nn1); tgt->addNode(nn2); // optional3164TR_PCISCNode *nn3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::s2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nn2, nn2); tgt->addNode(nn3); // optional3165TR_PCISCNode *nn6 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nn3, nn3, v3); tgt->addNode(nn6);3166TR_PCISCNode *n7 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, nn6, v1, vorc); tgt->addNode(n7);3167TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n8);31683169n4->setSucc(1, n8);3170n7->setSuccs(ent->getSucc(0), n8);31713172n4->setIsOptionalNode();3173nn2->setIsOptionalNode();3174nn3->setIsOptionalNode();31753176n3->setIsChildDirectlyConnected();3177n7->setIsChildDirectlyConnected();31783179tgt->setSpecialCareNode(0, n4); // TR_booltable3180tgt->setEntryNode(ent);3181tgt->setExitNode(n8);3182tgt->setImportantNodes(n4, n7, n2, nn0, NULL);3183tgt->setNumDagIds(14);3184tgt->createInternalData(1);31853186tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);3187tgt->setTransformer(CISCTransform2CopyingTROx);3188tgt->setInhibitBeforeVersioning();3189tgt->setAspects(isub|sameTypeLoadStore, ILTypeProp::Size_1, ILTypeProp::Size_1);3190tgt->setNoAspects(call|bndchk|bitop1, 0, 0);3191tgt->setMinCounts(1, 2, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount3192tgt->setHotness(warm, false);3193static char *versionLengthStr = feGetEnv("TR_CopyingTROOSpecialGraph_versionLength");3194static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 19);3195tgt->setVersionLength(versionLength); // depending on each architecture3196tgt->setPatternType(1); // dest. induction variable is updated by incrementing3197return tgt;3198}319932003201/****************************************************************************************3202Corresponding Java-like pseudocode32033204int i, j, end;3205byte byteArray[ ];3206char charArray[ ];3207while(true){3208char T = (char)byteArray[i];3209if (booltable(T)) break;3210(T = T + high;) // optional3211charArray[j] = T;3212i++;3213j++;3214if (i >= end) break;3215}32163217Note 1: Idiom allows variables i and j to be identical.3218Note 2: The optional addition "T = T + high" is to optimize java/lang/String.<init>([BIII)V.3219We will version the loop by "if (high == 0)".3220****************************************************************************************/3221TR_PCISCGraph *3222makeCopyingTROxGraph(TR::Compilation *c, int32_t ctrl, int pattern)3223{3224TR_ASSERT(pattern == 0 || pattern == 1, "not implemented");3225char *name = (char *)TR_MemoryBase::jitPersistentAlloc(16);3226sprintf(name, "CopyingTROx(%d)",pattern);3227TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), name, 0, 16);3228/**************************************************************************** opc id dagId #cfg #child other/pred/children */3229TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(),16, 0, 0, 0);3230tgt->addNode(byteArray); // src array base3231TR_PCISCNode *i = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(i); // src array index3232TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(),14, 0, 0, 1); tgt->addNode(charArray); // dst array base3233TR_PCISCNode *j = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(j); // dst array index3234TR_PCISCNode *idx0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),12, 0, 0, 0); tgt->addNode(idx0);3235TR_PCISCNode *idx1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),11, 0, 0, 1); tgt->addNode(idx1);3236TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_quasiConst2, TR::NoType, tgt->incNumNodes(),10, 0, 0); tgt->addNode(end); // length3237TR_PCISCNode *high = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),9, 0, 0); tgt->addNode(high); // optional3238TR_PCISCNode *aHeader0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(aHeader0); // array header3239TR_PCISCNode *aHeader1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(aHeader1); // array header3240TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -1); tgt->addNode(increment);3241TR_PCISCNode *lc1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 5, 1); // element size for input3242TR_PCISCNode *elemSize = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_allconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0); tgt->addNode(elemSize); // Multiply Factor3243TR_PCISCNode *offset = NULL;3244if (pattern == 0)3245{3246offset = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),3, 0, 0); tgt->addNode(offset); // optional3247}3248TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);3249TR_PCISCNode *byteAddr = createIdiomArrayLoadInLoop(tgt, ctrl, 1, entry, TR::bloadi, TR::Int8, byteArray, idx0, aHeader0, lc1);3250TR_PCISCNode *b2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, byteAddr, byteAddr);3251tgt->addNode(b2iNode);3252TR_PCISCNode *exitTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, b2iNode, b2iNode);3253tgt->addNode(exitTest); // optional3254TR_PCISCNode *add = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iadd, TR::Int32, tgt->incNumNodes(), 1, 1, 2, exitTest, b2iNode, high); tgt->addNode(add); // optional3255TR_PCISCNode *charAddr = createIdiomArrayStoreInLoop(tgt, ctrl, 1, add, TR_ibcstore, TR::NoType, charArray, idx1, aHeader1, elemSize, add);3256TR_PCISCNode *iStore = createIdiomDecVarInLoop(tgt, ctrl, 1, charAddr, i, increment);3257TR_PCISCNode *jStore = NULL;3258switch(pattern)3259{3260case 0:3261jStore = createIdiomIncVarInLoop(tgt, ctrl, 1, iStore, j, i, offset); // j = i + offset; (optional)3262break;3263case 1:3264jStore = createIdiomDecVarInLoop(tgt, ctrl, 1, iStore, j, increment); // j = j + 1; (optional)3265break;3266default:3267TR_ASSERT(0, "not implemented!");3268return NULL;3269}3270TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, jStore, i, end); tgt->addNode(loopTest);3271TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);32723273exitTest->setSucc(1, exit);3274loopTest->setSuccs(entry->getSucc(0), exit);32753276jStore->getChild(0)->setIsOptionalNode();3277jStore->setIsOptionalNode();3278j->setIsOptionalNode();32793280exitTest->setIsOptionalNode();3281add->setIsOptionalNode();3282high->setIsOptionalNode();3283if (offset) offset->setIsOptionalNode();32843285b2iNode->setIsChildDirectlyConnected();3286loopTest->setIsChildDirectlyConnected();32873288tgt->setSpecialCareNode(0, exitTest); // TR_booltable3289tgt->setEntryNode(entry);3290tgt->setExitNode(exit);3291tgt->setImportantNodes(exitTest, loopTest, byteAddr, charAddr, add);3292tgt->setNumDagIds(17);3293tgt->createInternalData(1);32943295tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);3296tgt->setTransformer(CISCTransform2CopyingTROx);3297tgt->setInhibitBeforeVersioning();3298tgt->setAspects(isub|mul, ILTypeProp::Size_1, existAccess);3299tgt->setNoAspects(call|bndchk|bitop1, 0, 0);3300tgt->setMinCounts(1, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount3301tgt->setHotness(warm, false);3302static char *versionLengthStr = feGetEnv("TR_CopyingTROxGraph_versionLength");3303static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 8);3304tgt->setVersionLength(versionLength); // depending on each architecture33053306tgt->setPatternType(pattern);33073308return tgt;3309}331033113312/****************************************************************************************3313Corresponding Java-like Pseudo Program3314int v1, end;3315int v3; // optional3316int v4; // v4 usually has the value of "v3 - v1".3317byte v0[ ];3318char v2[ ];3319while(true){3320char T = (char)v0[v1];3321if (booltable(T)) break;3322v2[v1+v4] = T;3323v1++;3324v3 = v1+v4; // optional3325if (v1 >= end) break;3326}3327****************************************************************************************/3328TR_PCISCGraph *3329makeCopyingTROTInduction1Graph(TR::Compilation *c, int32_t ctrl, int32_t pattern)3330{3331TR_ASSERT(pattern == 0 || pattern == 1, "not implemented");3332char *name = (char *)TR_MemoryBase::jitPersistentAlloc(26);3333sprintf(name, "CopyingTROTInduction1(%d)",pattern);3334TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), name, 0, 16);3335/********************************************************************* opc id dagId #cfg #child other/pred/children */3336TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v0); // src array base3337TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0); tgt->addNode(v1); // src array index3338TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 1); tgt->addNode(v2); // dst array base3339TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v3); // actual dst array index (optional)3340TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 2); tgt->addNode(v4); // difference of dst array index from src array index3341TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),8, 0, 0); tgt->addNode(vorc); // length3342TR_PCISCNode *cmah0=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(cmah0); // array header3343TR_PCISCNode *cmah1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 1); tgt->addNode(cmah1); // array header3344TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, -1); tgt->addNode(cm1);3345TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 4, 1); // element size3346TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 2); // element size3347TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);3348TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, ent, TR::bloadi, TR::Int8, v0, v1, cmah0, c1);3349TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);3350TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, n3, n3); tgt->addNode(n4); // optional3351TR_PCISCNode *n45 = (pattern == 1) ? createIdiomDecVarInLoop(tgt, ctrl, 1, n4, v1, cm1) : n4;3352TR_PCISCNode *n5 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iadd, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n45, v1, v4); tgt->addNode(n5);3353TR_PCISCNode *nn0 = createIdiomCharArrayStoreInLoop(tgt, ctrl, 1, n5, v2, n5, cmah1, c2, n3);3354TR_PCISCNode *n6 = (pattern == 0) ? createIdiomDecVarInLoop(tgt, ctrl, 1, nn0, v1, cm1) : nn0;3355TR_PCISCNode *op0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n6, n5, cm1); tgt->addNode(op0); // (optional)3356TR_PCISCNode *op1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore , TR::Int32, tgt->incNumNodes(), 1, 1, 2, op0,op0, v3); tgt->addNode(op1); // (optional)3357TR_PCISCNode *n7 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, op1, v1, vorc); tgt->addNode(n7);3358TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n8);33593360n4->setSucc(1, n8);3361n7->setSuccs(ent->getSucc(0), n8);33623363n4->setIsOptionalNode();3364v3->setIsOptionalNode();3365op0->setIsOptionalNode();3366op1->setIsOptionalNode();33673368op1->setIsChildDirectlyConnected();3369n3->setIsChildDirectlyConnected();3370n7->setIsChildDirectlyConnected();33713372tgt->setSpecialCareNode(0, n4); // TR_booltable3373tgt->setEntryNode(ent);3374tgt->setExitNode(n8);3375tgt->setImportantNodes(n4, n7, n2, nn0, NULL);3376tgt->setNumDagIds(14);3377tgt->createInternalData(1);33783379tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);3380tgt->setTransformer(pattern == 0 ? CISCTransform2CopyingTROx : CISCTransform2CopyingTROxAddDest1);3381tgt->setInhibitBeforeVersioning();3382tgt->setAspects(isub|mul, ILTypeProp::Size_1, ILTypeProp::Size_2);3383tgt->setNoAspects(call|bndchk|bitop1, 0, 0);3384tgt->setMinCounts(1, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount3385tgt->setHotness(warm, false);3386static char *versionLengthStr = feGetEnv("TR_CopyingTROTInduction1Graph_versionLength");3387static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 8);3388tgt->setVersionLength(versionLength); // depending on each architecture3389return tgt;3390}339133923393//////////////////////////////////////////////////////////////////////////3394//////////////////////////////////////////////////////////////////////////3395//////////////////////////////////////////////////////////////////////////33963397//*****************************************************************************************3398// IL code generation for exploiting the TROT instruction3399// This is the case where the function table is prepared by the user program.3400// Input: ImportantNodes(0) - booltable3401// ImportantNodes(1) - ificmpge3402// ImportantNodes(2) - address of the source array3403// ImportantNodes(3) - address of the destination array3404//*****************************************************************************************3405#define TERMBYTE (0x0B) // Vertical Tab is rarely used, I guess...3406bool3407CISCTransform2TROTArray(TR_CISCTransformer *trans)3408{3409TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");3410const bool disptrace = DISPTRACE(trans);3411TR::Node *trNode;3412TR::TreeTop *trTreeTop;3413TR::Block *block;3414TR_CISCGraph *P = trans->getP();3415List<TR_CISCNode> *P2T = trans->getP2T();3416TR::Compilation *comp = trans->comp();34173418TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");3419if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;34203421trans->findFirstNode(&trTreeTop, &trNode, &block);3422if (!block) return false; // cannot find34233424if (isLoopPreheaderLastBlockInMethod(comp, block))3425{3426traceMsg(comp, "Bailing CISCTransform2TROTArray due to null TT - might be a preheader in last block of method\n");3427return false;3428}34293430TR_CISCNode * inputCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(2));3431TR_CISCNode * outputCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(3));3432if (!inputCISCNode || !outputCISCNode) return false;3433TR::Node * inputNode = inputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();3434TR::Node * outputNode = outputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();34353436TR::Node *baseRepNode, *indexRepNode, *dstBaseRepNode, *dstIndexRepNode, *mapBaseRepNode;3437getP2TTrRepNodes(trans, &baseRepNode, &indexRepNode, &dstBaseRepNode, &dstIndexRepNode, &mapBaseRepNode);3438TR::Node *cmpRepNode = trans->getP2TRep(P->getImportantNode(1))->getHeadOfTrNodeInfo()->_node;3439TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();3440TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode ? dstIndexRepNode->getSymbolReference() : NULL;3441if (trans->countGoodArrayIndex(indexVarSymRef) == 0) return false;3442if (dstIndexVarSymRef == indexVarSymRef)3443{3444dstIndexRepNode = NULL;3445dstIndexVarSymRef = NULL;3446}3447if (dstIndexVarSymRef)3448{3449if (trans->countGoodArrayIndex(dstIndexVarSymRef) == 0) return false;3450}3451TR_ScratchList<TR::Node> variableList(comp->trMemory());3452variableList.add(indexRepNode);3453if (dstIndexRepNode) variableList.add(dstIndexRepNode);3454if (!isIndexVariableInList(inputNode, &variableList) ||3455!isIndexVariableInList(outputNode, &variableList))3456{3457dumpOptDetails(comp, "indices used in array loads %p and %p are not consistent with the induction varaible updates\n", inputNode, outputNode);3458return false;3459}3460TR::Block *target = trans->analyzeSuccessorBlock();34613462// Prepare arraytranslate node3463TR::Node * tableNode = createLoad(mapBaseRepNode);3464TR::Node * indexNode = TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef);3465TR::Node * lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(),3466createOP2(comp, TR::isub, cmpRepNode->getChild(1)->duplicateTree(),3467indexNode));3468TR_CISCNode *ifeqCiscNode = trans->getP2TRep(P->getImportantNode(0));3469TR::Node * termCharNode;3470if (ifeqCiscNode)3471termCharNode = createLoad(ifeqCiscNode->getHeadOfTrNode()->getChild(1));3472else3473termCharNode = TR::Node::create(inputNode, TR::iconst, 0, TERMBYTE);3474TR::Node * stoppingNode = TR::Node::create( baseRepNode, TR::iconst, 0, 0xffffffff);347534763477TR::Node * translateNode = TR::Node::create(trNode, TR::arraytranslate, 6);3478translateNode->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayTranslateSymbol());3479translateNode->setAndIncChild(0, inputNode);3480translateNode->setAndIncChild(1, outputNode);3481translateNode->setAndIncChild(2, tableNode);3482translateNode->setAndIncChild(3, termCharNode);3483translateNode->setAndIncChild(4, lengthNode);3484translateNode->setAndIncChild(5, stoppingNode);34853486translateNode->setSourceIsByteArrayTranslate(true);3487translateNode->setTargetIsByteArrayTranslate(false);3488translateNode->setTermCharNodeIsHint(ifeqCiscNode ? false : true);3489translateNode->setSourceCellIsTermChar(false);3490translateNode->setTableBackedByRawStorage(false);3491TR::Node * topOfTranslateNode = TR::Node::create(TR::treetop, 1, translateNode);3492TR::Node * lengthTRxx = translateNode;34933494if (target)3495{3496// prepare nodes that add the number of elements (which was translated) into the induction variables34973498/*lengthTRxx = createOP2(comp, TR::isub,3499translateNode,3500TR::Node::create(translateNode, TR::iconst, 0, -1)); */3501}3502else3503{3504// For Multiple Successor Blocks, we have a test character condition in the3505// loop, which may lead to a different successor block than the fallthrough.3506// We need to be able to distinguish the following two scenarios, which both3507// would load the last character in the source array:3508// 1. no test character found (translateNode == lengthNode).3509// 2. test character found in the last element(translateNode < lengthNode).3510// The final IV value is always (IV + translateNode).3511// However, under case 1, the element loaded is at index (IV + translateNode - 1).3512// Under case 2, the element loaded is at index (IV + translateNode).3513// As such, we will subtract 1 in the existing final IV calculation for case 1,3514// so that any array accesses will be correctly indexed. The final IV value will3515// be increased by 1 again before we hit the exit test.3516lengthTRxx = TR::Node::create(TR::isub, 2, translateNode,3517TR::Node::create(TR::icmpeq, 2, translateNode,3518lengthNode->getOpCodeValue() == TR::i2l ? lengthNode->getChild(0)3519: lengthNode));3520}35213522TR::Node * addCountNode = createOP2(comp, TR::iadd, indexNode->duplicateTree(), lengthTRxx);3523TR::Node * indVarUpdateNode = TR::Node::createStore(indexVarSymRef, addCountNode);3524TR::TreeTop * indVarUpdateTreeTop = TR::TreeTop::create(comp, indVarUpdateNode);35253526TR::TreeTop * dstIndVarUpdateTreeTop = NULL;3527if (dstIndexRepNode)3528{3529dstIndVarUpdateTreeTop = TR::TreeTop::create(comp, createStoreOP2(comp, dstIndexVarSymRef, TR::iadd,3530dstIndexVarSymRef, lengthTRxx, dstIndexRepNode));3531}35323533// Insert nodes and maintain the CFG3534block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree());35353536// Create the fast path code3537block = trans->insertBeforeNodes(block);3538block->append(TR::TreeTop::create(comp, topOfTranslateNode));3539block->append(indVarUpdateTreeTop);3540if (dstIndVarUpdateTreeTop) block->append(dstIndVarUpdateTreeTop);3541block = trans->insertAfterNodes(block);35423543if (target)3544{3545// A single successor3546trans->setSuccessorEdge(block, target);3547}3548else3549{3550// Multiple successors3551TR::SymbolReference * translateTemp = comp->getSymRefTab()->3552createTemporary(comp->getMethodSymbol(), TR::Int32);3553TR_ASSERT(ifeqCiscNode, "Expecting equal CISC node.");3554TR::Node *ifeqNode = ifeqCiscNode->getHeadOfTrNode()->duplicateTree();3555if (ifeqCiscNode->getOpcode() != ifeqNode->getOpCodeValue())3556{3557TR::Node::recreate(ifeqNode, (TR::ILOpCodes)ifeqCiscNode->getOpcode());3558ifeqNode->setBranchDestination(ifeqCiscNode->getDestination());3559}3560TR::Node *tempStore = TR::Node::createStore(translateTemp, ifeqNode->getAndDecChild(0));3561ifeqNode->setAndIncChild(0, TR::Node::createLoad(ifeqNode, translateTemp));3562TR::TreeTop *tempStoreTTop = TR::TreeTop::create(comp, tempStore);3563TR::TreeTop *ifeqTTop = TR::TreeTop::create(comp, ifeqNode);3564// Fix up the IV value by adding 1 if translateNode == lengthNode (where no test char was found). See comment above.3565TR::Node *incIndex = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthTRxx->getChild(1), indexRepNode);3566TR::TreeTop *incIndexTTop = TR::TreeTop::create(comp, incIndex);35673568TR::TreeTop *last = block->getLastRealTreeTop();3569last->join(tempStoreTTop);3570tempStoreTTop->join(incIndexTTop);3571if (dstIndVarUpdateTreeTop)3572{3573TR::Node * incDstIndex = createStoreOP2(comp, dstIndexVarSymRef, TR::isub, dstIndexVarSymRef, -1, dstIndexRepNode);3574TR::TreeTop *incDstIndexTTop = TR::TreeTop::create(comp, incDstIndex);3575incIndexTTop->join(incDstIndexTTop);3576last = incDstIndexTTop;3577}3578else3579{3580last = incIndexTTop;3581}3582last->join(ifeqTTop);3583ifeqTTop->join(block->getExit());3584trans->setSuccessorEdges(block,3585NULL, // rely on automatic detection3586ifeqNode->getBranchDestination()->getEnclosingBlock());3587}35883589return true;3590}359135923593/****************************************************************************************3594Corresponding Java-like pseudocode3595int i, j, end, exitValue;3596byte byteArray[ ];3597char charArray[ ], map[ ];3598while(true){3599char c = map[byteArray[i]];3600if (c == exitValue) break;3601charArray[j] = c;3602i++;3603j;3604if (i >= end) break;3605}360636073608Note 1: Idiom allows that variables i and j are identical.3609****************************************************************************************/3610TR_PCISCGraph *3611makeTROTArrayGraph(TR::Compilation *c, int32_t ctrl)3612{3613TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TROTArray", 0, 16);3614/************************************************************************** opc id dagId #cfg #child other/pred/children */3615TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 16, 0, 0, 0);3616tgt->addNode(byteArray); // src array base3617TR_PCISCNode *i = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0);3618tgt->addNode(i); // src array index3619TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1);3620tgt->addNode(charArray); // dst array base3621TR_PCISCNode *j = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(j); // dst array index3622TR_PCISCNode *map = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 2); tgt->addNode(map); // map array base3623TR_PCISCNode *idx0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),11, 0, 0, 0); tgt->addNode(idx0);3624TR_PCISCNode *idx1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 1); tgt->addNode(idx1);3625TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),9, 0, 0); tgt->addNode(end); // length3626TR_PCISCNode *exitValue = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),8, 0, 0); tgt->addNode(exitValue);// exitvalue (optional)3627TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0);3628tgt->addNode(aHeader); // array header constant3629TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -1); tgt->addNode(increment);3630TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 5, 1); // element size3631TR_PCISCNode *elemSize = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 4, 2); // element size3632TR_PCISCNode *offset = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),3, 0, 0); tgt->addNode(offset); // optional3633TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);3634TR_PCISCNode *byteAddr = createIdiomArrayLoadInLoop(tgt, ctrl, 1, entry, TR::bloadi, TR::Int8, byteArray, idx0, aHeader, c1);3635TR_PCISCNode *convNode, *mapAddr;3636if (ctrl & CISCUtilCtl_64Bit)3637{3638convNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2l, TR::Int64, tgt->incNumNodes(), 1, 1, 1, byteAddr, byteAddr); tgt->addNode(convNode);3639mapAddr = createIdiomCharArrayLoadInLoop(tgt, ctrl | CISCUtilCtl_NoI2L, 1, convNode, map, convNode, aHeader, elemSize);3640}3641else3642{3643convNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, byteAddr, byteAddr); tgt->addNode(convNode);3644mapAddr = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, convNode, map, convNode, aHeader, elemSize);3645}3646TR_PCISCNode *c2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, mapAddr, mapAddr); tgt->addNode(c2iNode); // optional3647TR_PCISCNode *exitTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpeq, TR::NoType, tgt->incNumNodes(), 1, 2, 2, c2iNode, c2iNode, exitValue); // optional3648tgt->addNode(exitTest);3649TR_PCISCNode *charAddr = createIdiomCharArrayStoreInLoop(tgt, ctrl, 1, exitTest, charArray, idx1, aHeader, elemSize, c2iNode);3650TR_PCISCNode *iStore = createIdiomDecVarInLoop(tgt, ctrl, 1, charAddr, i, increment);3651TR_PCISCNode *jStore = createIdiomIncVarInLoop(tgt, ctrl, 1, iStore, j, i, offset); // optional3652TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, jStore, i, end); tgt->addNode(loopTest);3653TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);36543655exitTest->setSucc(1, exit);3656loopTest->setSuccs(entry->getSucc(0), exit);36573658jStore->getChild(0)->setIsOptionalNode();3659jStore->setIsOptionalNode();3660j->setIsOptionalNode();3661offset->setIsOptionalNode();36623663convNode->setIsChildDirectlyConnected();3664loopTest->setIsChildDirectlyConnected();3665charAddr->setIsChildDirectlyConnected(false);36663667exitTest->setIsOptionalNode();3668exitValue->setIsOptionalNode();3669c2iNode->setIsOptionalNode();3670c2iNode->getHeadOfParents()->setIsOptionalNode();36713672tgt->setSpecialCareNode(0, convNode); // TR_booltable3673tgt->setEntryNode(entry);3674tgt->setExitNode(exit);3675tgt->setImportantNodes(exitTest, loopTest, byteAddr->getChild(0), charAddr->getChild(0));3676tgt->setNumDagIds(17);3677tgt->createInternalData(1);36783679tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);3680tgt->setTransformer(CISCTransform2TROTArray);3681tgt->setInhibitBeforeVersioning();3682tgt->setAspects(isub|mul, ILTypeProp::Size_1|ILTypeProp::Size_2, ILTypeProp::Size_2);3683tgt->setNoAspects(call|bndchk|bitop1, 0, 0);3684tgt->setMinCounts(1, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount3685tgt->setHotness(warm, false);3686static char *versionLengthStr = feGetEnv("TR_CopyingTRTOInduction1Graph_versionLength");3687static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 8);3688tgt->setVersionLength(versionLength); // depending on each architecture3689return tgt;3690}369136923693//////////////////////////////////////////////////////////////////////////3694//////////////////////////////////////////////////////////////////////////3695//////////////////////////////////////////////////////////////////////////3696//*****************************************************************************************3697// IL code generation for exploiting the TRTx instruction3698// This is the case where the compiler will create the function table by analyzing booltable.3699// Input: ImportantNode(0) - booltable3700// ImportantNode(1) - ificmpge3701// ImportantNode(2) - load of the source array3702// ImportantNode(3) - store of the destination array3703// ImportantNode(4) - another ificmpxx if exists (optional)3704//*****************************************************************************************3705bool3706CISCTransform2CopyingTRTx(TR_CISCTransformer *trans)3707{3708const bool disptrace = DISPTRACE(trans);3709TR::Node *trNode;3710TR::TreeTop *trTreeTop;3711TR::Block *block;3712TR_CISCGraph *P = trans->getP();3713List<TR_CISCNode> *P2T = trans->getP2T();3714TR::Compilation *comp = trans->comp();3715bool isOutputChar = trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isShort() && trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isUnsigned();3716bool genTRxx = comp->cg()->getSupportsArrayTranslateTRxx();3717bool genSIMD = comp->cg()->getSupportsVectorRegisters() && !comp->getOption(TR_DisableSIMDArrayTranslate);37183719if (isOutputChar && genSIMD && !genTRxx){3720traceMsg(comp, "Bailing CISCTransform2CopyingTRTx : c2c - no proper evaluator available\n");3721return false;3722}372337243725trans->findFirstNode(&trTreeTop, &trNode, &block);3726if (!block)3727return false; // cannot find37283729if (isLoopPreheaderLastBlockInMethod(comp, block))3730{3731traceMsg(comp, "Bailing CISCTransform2CopyingTRTx due to null TT - might be a preheader in last block of method\n");3732return false;3733}37343735TR_CISCNode * inputCISCNode = trans->getP2TRepInLoop(P->getImportantNode(2)->getChild(0));3736TR_CISCNode * outputCISCNode = trans->getP2TRepInLoop(P->getImportantNode(3)->getChild(0));3737TR::Node * inputNode = inputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();3738TR::Node * outputNode = outputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();37393740TR::Node *baseRepNode, *indexRepNode, *dstBaseRepNode, *dstIndexRepNode;3741getP2TTrRepNodes(trans, &baseRepNode, &indexRepNode, &dstBaseRepNode, &dstIndexRepNode);3742if (indexRepNode == 0) indexRepNode = dstIndexRepNode;3743TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();3744TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode ? dstIndexRepNode->getSymbolReference() : NULL;3745if (indexVarSymRef == dstIndexVarSymRef)3746{3747dstIndexRepNode = NULL;3748dstIndexVarSymRef = NULL;3749}3750if (trans->countGoodArrayIndex(indexVarSymRef) == 0 &&3751(!dstIndexVarSymRef || trans->countGoodArrayIndex(dstIndexVarSymRef) == 0))3752{3753if (disptrace) traceMsg(comp, "countGoodArrayIndex failed for %p, %p\n",indexRepNode,dstIndexRepNode);3754return false;3755}3756TR_ScratchList<TR::Node> variableList(comp->trMemory());3757variableList.add(indexRepNode);3758if (dstIndexRepNode) variableList.add(dstIndexRepNode);3759if (!isIndexVariableInList(inputNode, &variableList) ||3760!isIndexVariableInList(outputNode, &variableList))3761{3762dumpOptDetails(comp, "indices used in array loads %p and %p are not consistent with the induction variable updates\n", inputNode, outputNode);3763return false;3764}37653766TR::Block *target = trans->analyzeSuccessorBlock();3767if (!target) // multiple successors3768{3769// current restrictions. allow only the case where the number of successors is greater than 3.3770if (trans->getNumOfBBlistSucc() > 3)3771{3772if (disptrace) traceMsg(comp, "trans->getNumOfBBlistSucc() is %d.",trans->getNumOfBBlistSucc());3773return false;3774}3775}37763777// Check if there is idiom specific node insertion.3778// Currently, it is inserted by moveStoreOutOfLoopForward() or reorderTargetNodesInBB()3779bool isCompensateCode = !trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1);37803781// There is an ificmpge node and (multiple successors or need to generate idiom specific node insertion)3782bool isNeedGenIcmpge = (!target || isCompensateCode);37833784TR::Node *tableNode;3785uint8_t *tmpTable = (uint8_t*)comp->trMemory()->allocateMemory(65536, stackAlloc);3786bool isAllowSourceCellTermChar = false;37873788int count;3789TR::TreeTop *retSameExit = NULL;3790if ((count = trans->analyzeCharBoolTable(P->getImportantNode(0), tmpTable, P->getImportantNode(1), &retSameExit)) <= 0)3791{3792if (disptrace) traceMsg(comp, "trans->analyzeCharBoolTable failed\n");3793return false;3794}37953796if (!retSameExit) // all destinations of booltable are not same3797{3798traceMsg(comp, "Multiple targets for different delimiter checks detected. Abandoning reduction.\n");3799return false;3800}38013802// Check to ensure that the delimiter checks 'break' to the target successor blocks if single successor.3803if (retSameExit != NULL && !isNeedGenIcmpge && retSameExit->getEnclosingBlock() != target)3804{3805traceMsg(comp, "Target for delimiter check (Treetop: %p / Block %d: %p) is different than loop exit block_%d: %p. Abandoning reduction.\n",3806retSameExit, retSameExit->getEnclosingBlock()->getNumber(), retSameExit->getEnclosingBlock(),3807target->getNumber(), target);3808return false;3809}38103811// check if the induction variable needs to be updated by 13812// this depends on whether the induction variable is incremented3813// before the boolTable exit or after (ie. before the loop driving test)3814//3815TR_CISCNode *boolTableExit = P->getImportantNode(0) ? trans->getP2TRepInLoop(P->getImportantNode(0)) : NULL;3816bool ivNeedsUpdate = false;3817bool dstIvNeedsUpdate = false;3818if (0 && boolTableExit)3819{3820TR::Node *boolTableNode = boolTableExit->getHeadOfTrNodeInfo()->_node;3821traceMsg(comp, "boolTableNode : %p of loop %d\n", boolTableNode, block->getNumber());3822ivNeedsUpdate = ivIncrementedBeforeBoolTableExit(comp, boolTableNode, block, indexVarSymRef);3823if (dstIndexVarSymRef)3824dstIvNeedsUpdate = ivIncrementedBeforeBoolTableExit(comp, boolTableNode, block, dstIndexVarSymRef);3825}38263827// Try to find a terminal byte (but we might not find it in many cases...)3828int termchar = -1;3829int stopchar = -1;3830if (comp->cg()->getSupportsArrayTranslateTRTO255() || comp->cg()->getSupportsArrayTranslateTRTO() )3831{3832if (isOutputChar)3833return false;38343835for (int i = 256; i < 65536; i++)3836if (tmpTable[i] != 1)3837return false;3838if (comp->cg()->getSupportsArrayTranslateTRTO255())3839{3840for (int i = 0; i < 256; i++)3841if (tmpTable[i] != 0)3842return false;3843termchar = 0x0ff00ff00;3844}3845else3846{3847bool allOnes = true;3848bool allZeros = true;38493850for (int i = 0; i < 128; i++)3851if (tmpTable[i] != 0)3852return false;38533854for (int i = 128; i < 256; i++)3855{3856uint8_t u8 = tmpTable[i];3857if (u8 == 0)3858allOnes = false;3859else if (u8 == 1)3860allZeros = false;3861else3862{3863allOnes = false;3864allZeros = false;3865}3866}38673868if (allZeros && !allOnes) //this is 255 (ISO_8859_1)3869termchar = 0x0ff00ff00;3870else if (allOnes && !allZeros) //this is 127 (ASCII)3871termchar = 0x0ff80ff80;3872else3873return false;3874}3875//termchar = TERMBYTE; //It needs to be greater than zero, dummy termination char otherwise, i.e., it's not gonna be used,3876tableNode = TR::Node::create(baseRepNode, TR::iconst, 0, 0); //dummy table node, it's not gonna be used3877}3878else //Z3879{3880if (!isOutputChar)3881{3882uint8_t termByteTable[256];3883memset(termByteTable, 0, 256);3884int i;3885for (i = 0; i < 65536; i++)3886{3887if (tmpTable[i] == 0) {3888if ( i >= 256)3889return false;3890termByteTable[i] = 1;3891}3892}389338943895bool isSIMDPossible = genSIMD;3896if (isSIMDPossible) {3897//SIMD possible only if we have consecutive chars, and no ranges3898for (int i = 0; i < 256; i++) {3899if (tmpTable[i] == 0) {3900if (stopchar != (i-1)) {3901isSIMDPossible = false;3902break;3903}3904stopchar++;3905}3906}39073908//case all non valid chars3909if (stopchar == -1 )3910isSIMDPossible = false;3911}39123913if (isSIMDPossible) {3914tableNode = TR::Node::create(baseRepNode, TR::aconst, 0, 0); //dummy table node, it's not gonna be used3915} else if(!genTRxx){3916traceMsg(comp, "Bailing CISCTransform2CopyingTRTx : c2b - no proper evaluator available\n");3917return false;3918} else {3919//TRxx3920for (i = 256; --i >= 0; )3921{3922if (termByteTable[i] == 0)3923{3924termchar = i; // find termchar;3925break;3926}3927}39283929// Create the function table for TRTO3930if (termchar < 0) // no room of termchar3931{3932isAllowSourceCellTermChar = true; // Generated code will check whether the character is a delimiter.3933termchar = TERMBYTE;3934if (disptrace)3935traceMsg(comp, "setAllowSourceCellIsTermChar: ");3936}3937if (disptrace)3938traceMsg(comp, "termchar is 0x%02x\n", termchar);393939403941uint8_t *table = (uint8_t*)comp->trMemory()->allocateMemory(65536, stackAlloc);3942//Only check up to 256 because we already3943for (i = 0; i < 65536; i++)3944{3945uint8_t u8 = tmpTable[i];3946//Not sure I understand the reasning behind discarding those: chars larger than 256 which map to byte ... possible3947//we have the table to hold all chars. Value needs to represent i & ff3948//for now I moved the check up - so bail out earlier.3949//Reach here only if chars that need mapping are <256.3950//if (!u8 && i >= 256)3951// return false;3952table[i] = (uint8_t)(u8 ? termchar : i);3953}3954tableNode = createTableLoad(comp, baseRepNode, 16, 8, table, disptrace);3955}39563957}3958else3959{3960//c2c case - currently no SIMD support3961uint16_t *table = (uint16_t*)comp->trMemory()->allocateMemory(65536*2, stackAlloc);3962int i;3963for (i = 0; i < 65536; i++)3964{3965uint8_t u8 = tmpTable[i];3966if (u8)3967{3968if (termchar < 0)3969termchar = i;3970table[i] = termchar;3971}3972else3973{3974table[i] = i;3975}3976}3977tableNode = createTableLoad(comp, baseRepNode, 16, 16, table, disptrace);3978}3979}3980398139823983// find the target node of icmpge3984TR_ScratchList<TR_CISCNode> necessaryCmp(comp->trMemory());39853986// find icmpge in the candidate region3987sortList(P2T + P->getImportantNode(1)->getID(),3988&necessaryCmp, trans->getCandidateRegion());39893990bool isDecrement;3991int32_t modLength;3992TR::Node * cmpIndexNode;3993TR::Node * lenTmpNode;3994TR::Node * lengthNode;39953996TR_CISCNode *icmpgeCISCnode1 = NULL;3997TR::Node *lenRepNode1 = NULL;3998TR_CISCNode *icmpgeCISCnode2 = NULL;3999TR::Node *lenRepNode2 = NULL;4000TR::SymbolReference * icmpgeSymRef2 = NULL;40014002// We cannot handle too many loop exit tests.4003if (necessaryCmp.getSize() >= 3)4004{4005if (disptrace) traceMsg(comp, "Too many (%d) loop exit tests to transform correctly. Transformation only supports up to 2. Abandoning reduction.\n", necessaryCmp.getSize());4006return false;4007}40084009icmpgeCISCnode1 = necessaryCmp.getListHead()->getData();40104011if (!testExitIF(icmpgeCISCnode1->getOpcode(), &isDecrement, &modLength))4012{4013if (disptrace) traceMsg(comp, "testExitIF for icmpgeCISCnode1 failed\n");4014return false;4015}4016if (isDecrement)4017{4018if (disptrace) traceMsg(comp, "Not support a decrement loop. (icmpgeCISCnode1)\n");4019return false;4020}4021TR_ASSERT(modLength == 0 || modLength == 1, "error");40224023// The length calculation requires the initial value of the induction variable4024// used in the loop iteration comparison.4025TR::Node *cmpChild = icmpgeCISCnode1->getHeadOfTrNode()->getChild(0);40264027TR::SymbolReference * cmpVarSymRef = NULL;4028while (cmpChild && (cmpChild->getOpCode().isAdd() || cmpChild->getOpCode().isSub()))4029{4030cmpChild = cmpChild->getChild(0);4031}4032if (cmpChild && cmpChild->getOpCode().isLoadVar())4033cmpVarSymRef = cmpChild->getSymbolReference();4034if (cmpVarSymRef == NULL)4035{4036if (disptrace) traceMsg(comp, "Unable to determine the sym ref of induction variable in loop termination node.\n");4037return false;4038}40394040lenRepNode1 = createLoad(icmpgeCISCnode1->getChild(1)->getHeadOfTrNode());4041if (modLength) lenRepNode1 = createOP2(comp, TR::isub, lenRepNode1, TR::Node::create(baseRepNode, TR::iconst, 0, -modLength));4042cmpIndexNode = TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, cmpVarSymRef);4043lenTmpNode = createOP2(comp, TR::isub, lenRepNode1, cmpIndexNode);4044if (necessaryCmp.isDoubleton())4045{4046icmpgeCISCnode2 = necessaryCmp.getListHead()->getNextElement()->getData();4047}40484049// analyze ImportantNode(4) - another ificmpxx4050if (P->getImportantNode(4))4051{4052if (icmpgeCISCnode2)4053{4054if (disptrace) traceMsg(comp, "Not support yet more than three if-statements. (1)\n");4055return false;4056}4057icmpgeCISCnode2 = trans->getP2TInLoopIfSingle(P->getImportantNode(4));4058if (!icmpgeCISCnode2)4059{4060if (disptrace) traceMsg(comp, "Not support yet more than three if-statements. (2)\n");4061return false;4062}4063}40644065if (icmpgeCISCnode2)4066{4067if (!testExitIF(icmpgeCISCnode2->getOpcode(), &isDecrement, &modLength))4068{4069if (disptrace) traceMsg(comp, "testExitIF for icmpgeCISCnode2 failed\n");4070return false;4071}4072if (isDecrement)4073{4074if (disptrace) traceMsg(comp, "Not support a decrement loop. (icmpgeCISCnode2)\n");4075return false;4076}4077TR_ASSERT(modLength == 0 || modLength == 1, "error");4078lenRepNode2 = createLoad(icmpgeCISCnode2->getChild(1)->getHeadOfTrNode());4079if (modLength) lenRepNode2 = createOP2(comp, TR::isub, lenRepNode2, TR::Node::create(baseRepNode, TR::iconst, 0, -modLength));40804081TR::Node *icmpgeNode2 = icmpgeCISCnode2->getHeadOfTrNode();4082TR_ASSERT(icmpgeNode2->getChild(0)->getOpCode().isLoadVarDirect(), "Please remove this assertion");4083if (!icmpgeNode2->getChild(0)->getOpCode().isLoadVarDirect()) return false;4084icmpgeSymRef2 = icmpgeNode2->getChild(0)->getSymbolReference();40854086TR::Node *lenTmpNode2 = createOP2(comp, TR::isub, lenRepNode2, TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, icmpgeSymRef2));40874088lenTmpNode = createMin(comp, lenTmpNode, lenTmpNode2);4089}4090lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), lenTmpNode);40914092// Modify array header constant if necessary4093TR::Node *constLoad;4094if (trans->getOffsetOperand1())4095{4096int32_t offset = trans->getOffsetOperand1() * 2;4097constLoad = modifyArrayHeaderConst(comp, inputNode, offset);4098TR_ASSERT(constLoad, "Not implemented yet");4099if (disptrace) traceMsg(comp,"The array header const of inputNode %p is modified. (offset=%d)\n", inputNode, offset);4100}4101if (trans->getOffsetOperand2())4102{4103int32_t offset = trans->getOffsetOperand2() * (isOutputChar ? 2 : 1);4104constLoad = modifyArrayHeaderConst(comp, outputNode, offset);4105TR_ASSERT(constLoad, "Not implemented yet");4106if (disptrace) traceMsg(comp,"The array header const of outputNode %p is modified. (offset=%d)\n", outputNode, offset);4107}41084109// Prepare arraytranslate4110TR::Node * termCharNode = TR::Node::create( baseRepNode, TR::iconst, 0, termchar);4111TR::Node * stopCharNode = TR::Node::create( baseRepNode, TR::iconst, 0, stopchar);4112411341144115TR::Node * translateNode = TR::Node::create(trNode, TR::arraytranslate, 6);4116translateNode->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayTranslateSymbol());4117translateNode->setAndIncChild(0, inputNode);4118translateNode->setAndIncChild(1, outputNode);4119translateNode->setAndIncChild(2, tableNode);4120translateNode->setAndIncChild(3, termCharNode);4121translateNode->setAndIncChild(4, lengthNode);4122translateNode->setAndIncChild(5, stopCharNode);41234124translateNode->setSourceIsByteArrayTranslate(false);4125translateNode->setTargetIsByteArrayTranslate(!isOutputChar);4126translateNode->setTableBackedByRawStorage(true);4127if (isAllowSourceCellTermChar)4128{4129translateNode->setTermCharNodeIsHint(true);4130//translateNode->setAllowSourceCellIsTermChar(true); // Generated code will check whether the character is a delimiter.4131// determine the use of this flag on the node4132translateNode->setSourceCellIsTermChar(true); // Generated code will check whether the character is a delimiter.4133}4134else4135{4136translateNode->setTermCharNodeIsHint(false);4137translateNode->setSourceCellIsTermChar(false);4138}4139TR::SymbolReference * translateTemp = comp->getSymRefTab()->4140createTemporary(comp->getMethodSymbol(), TR::Int32);4141TR::Node * topOfTranslateNode = TR::Node::createStore(translateTemp, translateNode);41424143// prepare nodes that add the number of elements (which was translated) into the induction variables4144TR::Node *addCountNode = createOP2(comp, TR::iadd,4145TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef),4146translateNode);4147if (ivNeedsUpdate)4148addCountNode = TR::Node::create(TR::iadd, 2, addCountNode, TR::Node::iconst(indexRepNode, 1));41494150TR::Node * indVarUpdateNode = TR::Node::createStore(indexVarSymRef, addCountNode);4151TR::TreeTop * indVarUpdateTreeTop = TR::TreeTop::create(comp, indVarUpdateNode);41524153TR::TreeTop * dstIndVarUpdateTreeTop = NULL;4154TR::Node *dstIndVarInitializer = NULL;4155if (dstIndexRepNode)4156{4157dstIndVarInitializer = areDefsOnlyInsideLoop(comp, trans, outputCISCNode->getHeadOfTrNodeInfo()->_node);41584159TR::Node *dstAddCountNode = NULL;4160if (dstIndexVarSymRef->getSymbol()->getDataType() == TR::Int32)4161{4162dstAddCountNode = createOP2(comp, TR::iadd,4163TR::Node::createWithSymRef(dstIndexRepNode, TR::iload, 0, dstIndexVarSymRef),4164translateNode);4165if (dstIvNeedsUpdate)4166dstAddCountNode = TR::Node::create(TR::iadd, 2, dstAddCountNode, TR::Node::iconst(dstAddCountNode, 1));4167}4168else4169{4170dstAddCountNode = createOP2(comp, TR::ladd,4171TR::Node::createWithSymRef(dstIndexRepNode, TR::lload, 0, dstIndexVarSymRef),4172TR::Node::create(TR::i2l, 1, translateNode));4173if (dstIvNeedsUpdate)4174dstAddCountNode = TR::Node::create(TR::ladd, 2, dstAddCountNode, TR::Node::lconst(dstAddCountNode, 1));4175}4176TR::Node * dstIndVarUpdateNode = TR::Node::createStore(dstIndexVarSymRef, dstAddCountNode);4177dstIndVarUpdateTreeTop = TR::TreeTop::create(comp, dstIndVarUpdateNode);4178}41794180// create Nodes if there are multiple exit points.4181TR::Node *icmpgeNode = NULL;4182TR::TreeTop *failDest = NULL;4183TR::TreeTop *okDest = NULL;4184TR::Block *compensateBlock0 = NULL;4185TR::Block *compensateBlock1 = NULL;4186if (icmpgeCISCnode2)4187{4188TR_ASSERT(isNeedGenIcmpge, "assumption error?");4189TR::Node *icmpgeNode2 = NULL;4190TR::TreeTop *failDest2 = NULL;4191TR::Block *compensateBlock2 = NULL;4192TR::Block *newBlockForIf2 = NULL;41934194if (disptrace) traceMsg(comp, "Now assuming that all exits of booltable are identical.\n");41954196icmpgeNode = icmpgeCISCnode1->getHeadOfTrNode();4197okDest = retSameExit;4198failDest = icmpgeCISCnode1->getDestination();41994200icmpgeNode2 = icmpgeCISCnode2->getHeadOfTrNode();4201failDest2 = icmpgeCISCnode2->getDestination();4202newBlockForIf2 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);4203// create two empty blocks for inserting compensation code (base[index] and base[index-1]) prepared by moveStoreOutOfLoopForward()4204if (isCompensateCode)4205{4206compensateBlock2 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);4207compensateBlock2->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest2)));4208failDest2 = compensateBlock2->getEntry();42094210compensateBlock1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);4211compensateBlock0 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);4212compensateBlock0->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, okDest)));4213compensateBlock1->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest)));4214okDest = compensateBlock0->getEntry();4215failDest = compensateBlock1->getEntry();4216}4217if (disptrace)4218{4219if (okDest == NULL) traceMsg(comp,"error, okDest == NULL!\n");4220if (failDest == NULL) traceMsg(comp,"error, failDest == NULL!\n");4221if (failDest2 == NULL) traceMsg(comp,"error, failDest2 == NULL!\n");4222}4223TR_ASSERT(okDest != NULL && failDest != NULL && failDest2 != NULL, "error!");42244225// It generates "ificmpge".4226icmpgeNode = TR::Node::createif(TR::ificmpge,4227cmpIndexNode->duplicateTree(),4228lenRepNode1,4229failDest);4230icmpgeNode2 = TR::Node::createif(TR::ificmpge,4231TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, icmpgeSymRef2),4232lenRepNode2->duplicateTree(),4233failDest2);42344235// Insert nodes and maintain the CFG4236block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lenTmpNode->duplicateTree());42374238if (P->needsInductionVariableInit())4239{4240TR::TreeTop *storeTree = TR::TreeTop::create(comp, dstIndexRepNode->duplicateTree());4241block->prepend(storeTree);4242}42434244// Create the fast path code4245block = trans->insertBeforeNodes(block);4246TR::TreeTop *translateTT = TR::TreeTop::create(comp, topOfTranslateNode);4247block->append(translateTT);4248if (dstIndVarInitializer)4249{4250translateTT->insertBefore(TR::TreeTop::create(comp, dstIndVarInitializer));4251}4252block->append(indVarUpdateTreeTop);4253if (dstIndVarUpdateTreeTop) block->append(dstIndVarUpdateTreeTop);4254block = trans->insertAfterNodes(block);42554256block->append(TR::TreeTop::create(comp, icmpgeNode));4257newBlockForIf2->append(TR::TreeTop::create(comp, icmpgeNode2));4258TR::CFG *cfg = comp->getFlowGraph();4259cfg->setStructure(NULL);4260TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();4261TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();4262if (isCompensateCode)4263{4264compensateBlock0 = trans->insertAfterNodesIdiom(compensateBlock0, 0, true); // ch = base[index]4265compensateBlock1 = trans->insertAfterNodesIdiom(compensateBlock1, 1, true); // ch = base[index-1]4266// Duplicate all insertion nodes in getAfterInsertionIdiomList(1)4267ListElement<TR::Node> *le;4268for (le = trans->getAfterInsertionIdiomList(1)->getListHead(); le; le = le->getNextElement())4269{4270le->setData(le->getData()->duplicateTree());4271}4272compensateBlock2 = trans->insertAfterNodesIdiom(compensateBlock2, 1, true); // ch = base[index-1]4273cfg->insertBefore(compensateBlock0, orgNextBlock);4274cfg->insertBefore(compensateBlock1, compensateBlock0);4275cfg->insertBefore(compensateBlock2, compensateBlock1);4276cfg->insertBefore(newBlockForIf2, compensateBlock2);4277cfg->join(block, newBlockForIf2);4278}4279else4280{4281cfg->insertBefore(newBlockForIf2, orgNextBlock);4282cfg->join(block, newBlockForIf2);4283}4284trans->setSuccessorEdges(block,4285newBlockForIf2,4286failDest->getEnclosingBlock());4287trans->setSuccessorEdges(newBlockForIf2,4288okDest->getEnclosingBlock(),4289failDest2->getEnclosingBlock());4290}4291else4292{4293if (isNeedGenIcmpge)4294{4295if (disptrace) traceMsg(comp, "Now assuming that all exits of booltable are identical and the exit of icmpge points different.\n");42964297icmpgeNode = icmpgeCISCnode1->getHeadOfTrNode();4298okDest = retSameExit;4299failDest = icmpgeCISCnode1->getDestination();4300// create two empty blocks for inserting compensation code (base[index] and base[index-1]) prepared by moveStoreOutOfLoopForward()4301if (isCompensateCode)4302{4303compensateBlock1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);4304compensateBlock0 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);4305compensateBlock0->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, okDest)));4306compensateBlock1->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest)));4307okDest = compensateBlock0->getEntry();4308failDest = compensateBlock1->getEntry();4309}4310TR_ASSERT(okDest != NULL && failDest != NULL && okDest != failDest, "error!");43114312// It actually generates "ificmplt" (NOT ificmpge!) in order to suppress a redundant goto block.4313icmpgeNode = TR::Node::createif(TR::ificmplt,4314cmpIndexNode->duplicateTree(), // TR::Node::create(indexRepNode, TR::iload, 0, indexVarSymRef),4315lenRepNode1,4316okDest);4317}43184319// Insert nodes and maintain the CFG4320block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lenTmpNode->duplicateTree());43214322if (P->needsInductionVariableInit())4323{4324TR::TreeTop *storeTree = TR::TreeTop::create(comp, dstIndexRepNode->duplicateTree());4325block->prepend(storeTree);4326}43274328// Create the fast path code4329block = trans->insertBeforeNodes(block);4330block->append(TR::TreeTop::create(comp, topOfTranslateNode));4331block->append(indVarUpdateTreeTop);4332if (dstIndVarUpdateTreeTop) block->append(dstIndVarUpdateTreeTop);4333block = trans->insertAfterNodes(block);43344335if (isNeedGenIcmpge)4336{4337block->append(TR::TreeTop::create(comp, icmpgeNode));4338if (isCompensateCode)4339{4340TR::CFG *cfg = comp->getFlowGraph();4341cfg->setStructure(NULL);4342TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();4343TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();4344compensateBlock0 = trans->insertAfterNodesIdiom(compensateBlock0, 0, true); // ch = base[index]4345compensateBlock1 = trans->insertAfterNodesIdiom(compensateBlock1, 1, true); // ch = base[index-1]4346cfg->insertBefore(compensateBlock0, orgNextBlock);4347cfg->insertBefore(compensateBlock1, compensateBlock0);4348cfg->join(block, compensateBlock1);4349}4350}4351else if (isCompensateCode)4352{4353block = trans->insertAfterNodesIdiom(block, 0); // ch = base[index]4354}43554356// set successor edge(s) to the original block4357if (!isNeedGenIcmpge)4358{4359trans->setSuccessorEdge(block, target);4360}4361else4362{4363trans->setSuccessorEdges(block,4364failDest->getEnclosingBlock(),4365okDest->getEnclosingBlock());4366}4367}43684369return true;4370}43714372bool4373CISCTransform2CopyingTRTxAddDest1(TR_CISCTransformer *trans)4374{4375trans->setOffsetOperand2(1); // add offset of destination with 14376return CISCTransform2CopyingTRTx(trans);4377}43784379/****************************************************************************************4380Corresponding Java-like Pseudo Program4381int i, j, end;4382char charArray[ ];4383byte byteArray[ ];4384while(true){4385char c = charArray[i];4386if (booltable(c)) break;4387byteArray[j] = (byte)c;4388i++;4389j++;4390if (j >= end) break;4391}43924393Note 1: It allows that variables v1 and v3 are identical.4394****************************************************************************************/4395TR_PCISCGraph *4396makeCopyingTRTxGraph(TR::Compilation *c, int32_t ctrl, int pattern)4397{4398TR_ASSERT(pattern == 0 || pattern == 1 || pattern == 2, "not implemented");4399char *name = (char *)TR_MemoryBase::jitPersistentAlloc(16);4400sprintf(name, "CopyingTRTx(%d)",pattern);4401TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), name, 0, 16);4402/*************************************************************************** opc id dagId #cfg #child other/pred/children */4403TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0);4404tgt->addNode(charArray); // src array base4405TR_PCISCNode *i = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 0); tgt->addNode(i); // src array index4406TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1);4407tgt->addNode(byteArray); // dst array base4408TR_PCISCNode *j = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 1); tgt->addNode(j); // dst array index4409TR_PCISCNode *idx0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),11, 0, 0, 0); tgt->addNode(idx0);4410TR_PCISCNode *idx1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 1); tgt->addNode(idx1);4411TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 9, 0, 0); tgt->addNode(end); // length4412TR_PCISCNode *aHeader0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(aHeader0); // array header4413TR_PCISCNode *aHeader1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(aHeader1); // array header4414TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -1); tgt->addNode(increment);4415TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0); tgt->addNode(mulFactor); // Multiply Factor4416TR_PCISCNode *elemSize = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 4, 2); // element size4417TR_PCISCNode *offset = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 3, 0, 0, 2); tgt->addNode(offset); // optional4418TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);4419TR_PCISCNode *charAddr = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, entry, charArray, idx0, aHeader0, elemSize);4420TR_PCISCNode *c2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, charAddr, charAddr);4421tgt->addNode(c2iNode);4422TR_PCISCNode *exitTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, c2iNode, c2iNode);4423tgt->addNode(exitTest);4424TR_PCISCNode *byteAddr = createIdiomArrayStoreInLoop(tgt, ctrl, 1, exitTest, TR_ibcstore, TR::NoType, byteArray, idx1, aHeader1, mulFactor, c2iNode);4425TR_PCISCNode *store1, *store2;4426switch(pattern)4427{4428case 0:4429store1 = createIdiomDecVarInLoop(tgt, ctrl, 1, byteAddr, i, increment); // optional (i = i + 1)4430store2 = createIdiomDecVarInLoop(tgt, ctrl, 1, store1, j, idx1, increment); // j = idx1 + 14431store1->getChild(0)->setIsOptionalNode();4432store1->setIsOptionalNode();4433break;4434case 1:4435store1 = createIdiomDecVarInLoop(tgt, ctrl, 1, byteAddr, j, idx1, increment); // j = idx1 + 14436store2 = createIdiomIncVarInLoop(tgt, ctrl, 1, store1, i, j, offset); // optional (i = j + offset)4437store2->getChild(0)->setIsOptionalNode();4438store2->setIsOptionalNode();4439break;4440case 2:4441store1 = createIdiomDecVarInLoop(tgt, ctrl, 1, byteAddr, i, increment); // optional (i = i + 1)4442store2 = createIdiomDecVarInLoop(tgt, ctrl, 1, store1, j, j, increment); // j = j + 14443store1->getChild(0)->setIsOptionalNode();4444store1->setIsOptionalNode();4445break;4446}4447TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, store2, j, end); tgt->addNode(loopTest);4448TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);44494450exitTest->setSucc(1, exit);4451loopTest->setSuccs(entry->getSucc(0), exit);44524453i->setIsOptionalNode();4454offset->setIsOptionalNode();44554456c2iNode->setIsChildDirectlyConnected();4457loopTest->setIsChildDirectlyConnected();44584459tgt->setSpecialCareNode(0, exitTest); // TR_booltable4460tgt->setEntryNode(entry);4461tgt->setExitNode(exit);4462tgt->setImportantNodes(exitTest, loopTest, charAddr, byteAddr, NULL);4463tgt->setNumDagIds(16);4464tgt->createInternalData(1);44654466tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);4467tgt->setTransformer(CISCTransform2CopyingTRTx);4468tgt->setInhibitBeforeVersioning();4469tgt->setAspects(isub|mul, ILTypeProp::Size_2, existAccess);4470tgt->setNoAspects(call|bndchk|bitop1, 0, 0);4471tgt->setMinCounts(2, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount4472tgt->setHotness(warm, false);4473static char *versionLengthStr = feGetEnv("TR_CopyingTRTxGraph_versionLength");4474static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 15);4475tgt->setVersionLength(versionLength); // depending on each architecture44764477tgt->setPatternType(pattern);44784479return tgt;4480}448144824483/****************************************************************************************4484Corresponding Java-like Pseudo Program4485int i, j, end;4486char charArray[ ];4487byte byteArray[ ];4488while(true){4489char c = charArray[i];4490if (booltable(c)) break;4491if (j > end) break;4492byteArray[j] = (byte)c;4493i++;4494j++;4495if (i >= end) break;4496}44974498Note 1: It allows that variables i and j are identical.4499****************************************************************************************/4500TR_PCISCGraph *4501makeCopyingTRTxThreeIfsGraph(TR::Compilation *c, int32_t ctrl)4502{4503TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "CopyingTRTxThreeIfs", 0, 16);4504/*************************************************************************** opc id dagId #cfg #child other/pred/children */4505TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0);4506tgt->addNode(charArray); // src array base4507TR_PCISCNode *i = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0); tgt->addNode(i); // src array index4508TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 1);4509tgt->addNode(byteArray); // dst array base4510TR_PCISCNode *j = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(j); // dst array index4511TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 9, 0, 0); tgt->addNode(end); // length4512TR_PCISCNode *end2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 8, 0, 0); tgt->addNode(end2); // length24513TR_PCISCNode *aHeader0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(aHeader0); // array header4514TR_PCISCNode *aHeader1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 1); tgt->addNode(aHeader1); // array header4515TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, -1); tgt->addNode(increment);4516TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0); tgt->addNode(mulFactor); // Multiply Factor4517TR_PCISCNode *elemSize = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 2); // element size4518TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);4519TR_PCISCNode *charAddr = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, entry, charArray, i, aHeader0, elemSize);4520TR_PCISCNode *c2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, charAddr, charAddr);4521tgt->addNode(c2iNode);4522TR_PCISCNode *exitTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, c2iNode, c2iNode);4523tgt->addNode(exitTest);4524TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, exitTest, j, end); tgt->addNode(loopTest);4525TR_PCISCNode *byteAddr = createIdiomArrayStoreInLoop(tgt, ctrl, 1, loopTest, TR_ibcstore, TR::NoType, byteArray, j, aHeader1, mulFactor, c2iNode);4526TR_PCISCNode *store1 = createIdiomDecVarInLoop(tgt, ctrl, 1, byteAddr, i, increment); // i = i + 14527TR_PCISCNode *store2 = createIdiomDecVarInLoop(tgt, ctrl, 1, store1, j, j, increment); // j = j + 14528TR_PCISCNode *loopTest2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, store2, i, end2); tgt->addNode(loopTest2);4529TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);45304531exitTest->setSucc(1, exit);4532loopTest->setSucc(1, exit);4533loopTest2->setSuccs(entry->getSucc(0), exit);45344535c2iNode->setIsChildDirectlyConnected();4536loopTest->setIsChildDirectlyConnected();4537loopTest2->setIsChildDirectlyConnected();45384539tgt->setSpecialCareNode(0, exitTest); // TR_booltable4540tgt->setEntryNode(entry);4541tgt->setExitNode(exit);4542tgt->setImportantNodes(exitTest, loopTest2, charAddr, byteAddr, loopTest);4543tgt->setNumDagIds(14);4544tgt->createInternalData(1);45454546tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);4547tgt->setTransformer(CISCTransform2CopyingTRTx);4548tgt->setInhibitBeforeVersioning();4549tgt->setAspects(isub|mul, ILTypeProp::Size_2, existAccess);4550tgt->setNoAspects(call|bndchk|bitop1, 0, 0);4551tgt->setMinCounts(3, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount4552tgt->setHotness(warm, false);4553static char *versionLengthStr = feGetEnv("TR_CopyingTRTxThreeIfsGraph_versionLength");4554static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 15);4555tgt->setVersionLength(versionLength); // depending on each architecture4556return tgt;4557}455845594560/****************************************************************************************4561****************************************************************************************/4562TR_PCISCGraph *4563makeCopyingTRTOGraphSpecial(TR::Compilation *c, int32_t ctrl)4564{4565TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "CopyingTRTOSpecial", 0, 16);4566/************************************ opc id dagId #cfg #child other/pred/children */4567TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0);4568tgt->addNode(charArray); // src array base4569TR_PCISCNode *i = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(i); // src array index4570TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1);4571tgt->addNode(byteArray); // dst array base4572TR_PCISCNode *j = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 1); tgt->addNode(j); // dst array index4573TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),8, 0, 0); tgt->addNode(end); // length4574TR_PCISCNode *aHeader0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(aHeader0); // array header4575TR_PCISCNode *aHeader1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 1); tgt->addNode(aHeader1); // array header4576TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, -1); tgt->addNode(increment);4577TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0); tgt->addNode(mulFactor); // Multiply Factor4578TR_PCISCNode *elemSize = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 2); // element size4579TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);4580TR_PCISCNode *charAddr = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, entry, charArray, i, aHeader0, elemSize);4581TR_ASSERT((ctrl & CISCUtilCtl_64Bit) && i->getParents()->isSingleton(), "assumption error");4582TR_PCISCNode *i2lNode = (TR_PCISCNode *)i->getHeadOfParents();4583TR_PCISCNode *c2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, charAddr, charAddr);4584tgt->addNode(c2iNode);4585TR_PCISCNode *lStore = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lstore , TR::Int64, tgt->incNumNodes(), 1, 1, 2, c2iNode, i2lNode, j);4586tgt->addNode(lStore);4587TR_PCISCNode *exitTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, lStore, c2iNode); tgt->addNode(exitTest);4588TR_PCISCNode *byteAddr = createIdiomArrayStoreInLoop(tgt, ctrl|CISCUtilCtl_NoI2L, 1, exitTest, TR_ibcstore, TR::NoType, byteArray, j, aHeader1, mulFactor, c2iNode);4589TR_PCISCNode *iStore = createIdiomDecVarInLoop(tgt, ctrl, 1, byteAddr, i, increment);4590TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, iStore, i, end); tgt->addNode(loopTest);4591TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);45924593exitTest->setSucc(1, exit);4594loopTest->setSuccs(entry->getSucc(0), exit);45954596c2iNode->setIsChildDirectlyConnected();4597loopTest->setIsChildDirectlyConnected();45984599tgt->setSpecialCareNode(0, c2iNode); // TR_booltable4600tgt->setEntryNode(entry);4601tgt->setExitNode(exit);4602tgt->setImportantNodes(exitTest, loopTest, charAddr, byteAddr, NULL);4603tgt->setNumDagIds(13);4604tgt->createInternalData(1);46054606tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);4607tgt->setTransformer(CISCTransform2CopyingTRTx);4608tgt->setInhibitBeforeVersioning();4609tgt->setAspects(isub|mul, ILTypeProp::Size_2, ILTypeProp::Size_1);4610tgt->setNoAspects(call|bndchk|bitop1, 0, 0);4611tgt->setMinCounts(2, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount4612tgt->setHotness(warm, false);4613static char *versionLengthStr = feGetEnv("TR_CopyingTRTOGraphSpecial_versionLength");4614static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 15);4615tgt->setVersionLength(versionLength); // depending on each architecture46164617// needs induction variable init4618tgt->setNeedsInductionVariableInit(true);46194620return tgt;4621}46224623/****************************************************************************************4624Corresponding Java-like Pseudo Program4625int v1, end;4626int v3; // optional4627int v4; // v4 usually has the value of "v3 - v1".4628byte v0[ ];4629char v2[ ];4630while(true){4631char T = (char)v0[v1];4632if (booltable(T)) break;4633v2[v1+v4] = T;4634v3 = (v1+v4)+1; // optional4635v1++;4636if (v1 >= end) break;4637}4638****************************************************************************************/4639TR_PCISCGraph *4640makeCopyingTRTOInduction1Graph(TR::Compilation *c, int32_t ctrl, int32_t pattern)4641{4642TR_ASSERT(pattern == 0 || pattern == 1 || pattern == 2, "not implemented");4643char *name = (char *)TR_MemoryBase::jitPersistentAlloc(26);4644sprintf(name, "CopyingTRTOInduction1(%d)",pattern);4645TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), name, 0, 16);4646/********************************************************************* opc id dagId #cfg #child other/pred/children */4647TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v0); // src array base4648TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0); tgt->addNode(v1); // src array index4649TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 1); tgt->addNode(v2); // dst array base4650TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v3); // actual dst array index (optional)4651TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 2); tgt->addNode(v4); // difference of dst array index from src array index4652TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),8, 0, 0); tgt->addNode(vorc); // length4653TR_PCISCNode *cmah0=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(cmah0); // array header4654TR_PCISCNode *cmah1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 1); tgt->addNode(cmah1); // array header4655TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, -1); tgt->addNode(cm1);4656TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 4, 1); // element size4657TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 2); // element size4658TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);4659TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, ent, TR::sloadi, TR::Int16, v0, v1, cmah0, c2);4660TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);4661TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, n3, n3); tgt->addNode(n4); // optional4662TR_PCISCNode *n5, *nn0, *op1, *n6;4663switch(pattern)4664{4665case 0: {4666// v2[v1+v4] = T;4667// v3 = (v1+v4)+1 (optional)4668// v1++;4669TR_PCISCNode *op0;4670n5 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iadd, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n4, v1, v4); tgt->addNode(n5);4671nn0 = createIdiomArrayStoreInLoop(tgt, ctrl, 1, n5, TR::bstorei, TR::Int8, v2, n5, cmah1, c1, n3);4672op0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nn0, n5, cm1); tgt->addNode(op0); // (optional)4673op1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore , TR::Int32, tgt->incNumNodes(), 1, 1, 2, op0,op0, v3); tgt->addNode(op1); // (optional)4674n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, op1, v1, cm1);4675op0->setIsOptionalNode();4676op1->setIsOptionalNode();4677op1->setIsChildDirectlyConnected();4678break; }46794680case 1: {4681// v2[v3] = T;4682// v1++;4683// v3 = v1+v4;4684op1 = NULL;4685nn0 = createIdiomArrayStoreInLoop(tgt, ctrl, 1, n4, TR::bstorei, TR::Int8, v2, v3, cmah1, c1, n3);4686n5 = createIdiomDecVarInLoop(tgt, ctrl, 1, nn0, v1, cm1);4687n6 = createIdiomIncVarInLoop(tgt, ctrl, 1, n5, v3, v1, v4);4688break; }46894690case 2: {4691// v1++;4692// v2[v1+v4] = T; In this case, we need to add 1 to the destination index, because v1 was incremented.4693// v3 = v1+v4;4694TR_PCISCNode *n45;4695n45 = createIdiomDecVarInLoop(tgt, ctrl, 1, n4, v1, cm1);4696n5 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iadd, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n45, v1, v4); tgt->addNode(n5);4697nn0 = createIdiomArrayStoreInLoop(tgt, ctrl, 1, n5, TR::bstorei, TR::Int8, v2, n5, cmah1, c1, n3);4698op1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore , TR::Int32, tgt->incNumNodes(), 1, 1, 2, nn0, n5, v3); tgt->addNode(op1);4699n6 = op1;4700op1->setIsChildDirectlyConnected();4701break; }4702}4703TR_PCISCNode *n7 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n6, v1, vorc); tgt->addNode(n7);4704TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n8);47054706n4->setSucc(1, n8);4707n7->setSuccs(ent->getSucc(0), n8);47084709n4->setIsOptionalNode();4710v3->setIsOptionalNode();47114712n3->setIsChildDirectlyConnected();4713n7->setIsChildDirectlyConnected();47144715tgt->setSpecialCareNode(0, n4); // TR_booltable4716tgt->setEntryNode(ent);4717tgt->setExitNode(n8);4718tgt->setImportantNodes(n4, n7, n2, nn0, NULL);4719tgt->setNumDagIds(14);4720tgt->createInternalData(1);47214722tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);4723tgt->setTransformer(pattern != 2 ? CISCTransform2CopyingTRTx : CISCTransform2CopyingTRTxAddDest1);4724tgt->setInhibitBeforeVersioning();4725tgt->setAspects(isub|mul, ILTypeProp::Size_2, ILTypeProp::Size_1);4726tgt->setNoAspects(call|bndchk|bitop1, 0, 0);4727tgt->setMinCounts(1, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount4728tgt->setHotness(warm, false);4729static char *versionLengthStr = feGetEnv("TR_CopyingTRTOInduction1Graph_versionLength");4730static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 8);4731tgt->setVersionLength(versionLength); // depending on each architecture4732return tgt;4733}473447354736/****************************************************************************************4737Corresponding Java-like Pseudo Program4738int v1, v3, end;4739char v0[ ];4740char v2[ ];4741while(true){4742if (booltable(v0[v1])) break;4743v2[v3] = v0[v1];4744v1++;4745v3++;4746if (v1 >= end) break;4747}47484749Note 1: It allows that variables v1 and v3 are identical.4750****************************************************************************************/4751TR_PCISCGraph *4752makeCopyingTRTTSpecialGraph(TR::Compilation *c, int32_t ctrl)4753{4754TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "CopyingTRTTSpecial", 0, 16);4755/************************************ opc id dagId #cfg #child other/pred/children */4756TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0); tgt->addNode(v0); // src array base4757TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(v1); // src array index4758TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v2); // dst array base4759TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 1); tgt->addNode(v3); // dst array index4760TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(idx0);4761TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(idx1);4762TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),6, 0, 0); tgt->addNode(vorc); // length4763TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(cmah); // array header4764TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, -1); tgt->addNode(cm1);4765TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 2); // element size4766TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);4767TR_PCISCNode *n2 = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, ent, v0, idx0, cmah, c2);4768TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);4769TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, n3, n3); tgt->addNode(n4);4770TR_PCISCNode *n5 = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, n4, v0, idx0, cmah, c2);4771TR_PCISCNode *nn0 = createIdiomCharArrayStoreInLoop(tgt, ctrl|CISCUtilCtl_NoConversion, 1, n5, v2, idx1, cmah, c2, n5);4772TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, nn0, v1, cm1);4773TR_PCISCNode *nn6 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v3, cm1);4774TR_PCISCNode *n7 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, nn6, v3, vorc); tgt->addNode(n7);4775TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n8);47764777n4->setSucc(1, n8);4778n7->setSuccs(ent->getSucc(0), n8);47794780n3->setIsChildDirectlyConnected();4781n7->setIsChildDirectlyConnected();47824783tgt->setSpecialCareNode(0, n4); // TR_booltable4784tgt->setEntryNode(ent);4785tgt->setExitNode(n8);4786tgt->setImportantNodes(n4, n7, n2, nn0, NULL);4787tgt->setNumDagIds(13);4788tgt->createInternalData(1);47894790tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);4791tgt->setTransformer(CISCTransform2CopyingTRTx);4792tgt->setInhibitBeforeVersioning();4793tgt->setAspects(isub|mul|sameTypeLoadStore, ILTypeProp::Size_2, ILTypeProp::Size_2);4794tgt->setNoAspects(call|bndchk|bitop1, 0, 0);4795tgt->setMinCounts(2, 2, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount4796tgt->setHotness(warm, false);4797static char *versionLengthStr = feGetEnv("TR_CopyingTRTTSpecialGraph_versionLength");4798static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 20);4799tgt->setVersionLength(versionLength); // depending on each architecture4800return tgt;4801}48024803//////////////////////////////////////////////////////////////////////////4804//////////////////////////////////////////////////////////////////////////4805//////////////////////////////////////////////////////////////////////////48064807//*****************************************************************************************4808// IL code generation for exploiting the TRTO instruction4809// This is the case where the function table is prepared by the user program.4810// Input: ImportantNode(0) - ificmpeq (booltable)4811// ImportantNode(1) - ificmpge4812// ImportantNode(2) - address of the source array4813// ImportantNode(3) - address of the destination array4814// ImportantNode(4) - optional ificmpge for limit checking4815//*****************************************************************************************4816bool4817CISCTransform2TRTOArray(TR_CISCTransformer *trans)4818{4819TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");4820const bool disptrace = DISPTRACE(trans);4821TR::Node *trNode;4822TR::TreeTop *trTreeTop;4823TR::Block *block;4824TR_CISCGraph *P = trans->getP();4825List<TR_CISCNode> *P2T = trans->getP2T();4826TR::Compilation *comp = trans->comp();48274828TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");4829if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;48304831trans->findFirstNode(&trTreeTop, &trNode, &block);4832if (!block) return false; // cannot find48334834if (isLoopPreheaderLastBlockInMethod(comp, block))4835{4836traceMsg(comp, "Bailing CISCTransform2TRTOArray due to null TT - might be a preheader in last block of method\n");4837return false;4838}48394840TR_CISCNode * inputCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(2));4841TR_CISCNode * outputCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(3));4842if (!inputCISCNode || !outputCISCNode) return false;4843TR::Node * inputNode = inputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();4844TR::Node * outputNode = outputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();48454846TR::Node *baseRepNode, *indexRepNode, *dstBaseRepNode, *dstIndexRepNode, *mapBaseRepNode;4847getP2TTrRepNodes(trans, &baseRepNode, &indexRepNode, &dstBaseRepNode, &dstIndexRepNode, &mapBaseRepNode);4848TR::Node *cmpRepNode = trans->getP2TRep(P->getImportantNode(1))->getHeadOfTrNodeInfo()->_node;4849TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();4850TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode ? dstIndexRepNode->getSymbolReference() : NULL;4851if (trans->countGoodArrayIndex(indexVarSymRef) == 0) return false;4852if (dstIndexVarSymRef == indexVarSymRef)4853{4854dstIndexRepNode = NULL;4855dstIndexVarSymRef = NULL;4856}4857if (dstIndexVarSymRef)4858{4859if (trans->countGoodArrayIndex(dstIndexVarSymRef) == 0) return false;4860}4861TR_ScratchList<TR::Node> variableList(comp->trMemory());4862variableList.add(indexRepNode);4863if (dstIndexRepNode) variableList.add(dstIndexRepNode);4864if (!isIndexVariableInList(inputNode, &variableList) ||4865!isIndexVariableInList(outputNode, &variableList))4866{4867dumpOptDetails(comp, "indices used in array loads %p and %p are not consistent with the induction varaible updates\n", inputNode, outputNode);4868return false;4869}48704871// check if the induction variable needs to be updated by 14872// this depends on whether the induction variable is incremented4873// before the boolTable exit or after (ie. before the loop driving test)4874//4875TR_CISCNode *boolTableExit = P->getImportantNode(0) ? trans->getP2TRepInLoop(P->getImportantNode(0)) : NULL;4876bool ivNeedsUpdate = false;4877bool dstIvNeedsUpdate = false;4878if (0 && boolTableExit)4879{4880TR::Node *boolTableNode = boolTableExit->getHeadOfTrNodeInfo()->_node;4881///traceMsg(comp, "boolTableNode : %p of loop %d\n", boolTableNode, block->getNumber());4882ivNeedsUpdate = ivIncrementedBeforeBoolTableExit(comp, boolTableNode, block, indexVarSymRef);4883if (dstIndexVarSymRef)4884dstIvNeedsUpdate = ivIncrementedBeforeBoolTableExit(comp, boolTableNode, block, dstIndexVarSymRef);4885}488648874888TR::Block *target = trans->analyzeSuccessorBlock();48894890// Prepare arraytranslate node4891TR::Node * tableNode = createLoad(mapBaseRepNode);4892if (tableNode->getOpCode().isLong() && comp->target().is32Bit())4893tableNode = TR::Node::create(TR::l2i, 1, tableNode);4894TR::Node * indexNode = TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef);4895TR::Node * lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(),4896createOP2(comp, TR::isub, cmpRepNode->getChild(1)->duplicateTree(), indexNode));4897TR::Node * termCharNode = createLoad(trans->getP2TRep(P->getImportantNode(0)->getChild(1))->getHeadOfTrNodeInfo()->_node);4898TR::Node * stoppingNode = TR::Node::create( baseRepNode, TR::iconst, 0, 0xffffffff);48994900TR::Node * translateNode = TR::Node::create(trNode, TR::arraytranslate, 6);4901translateNode->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayTranslateSymbol());4902translateNode->setAndIncChild(0, inputNode);4903translateNode->setAndIncChild(1, outputNode);4904translateNode->setAndIncChild(2, tableNode);4905translateNode->setAndIncChild(3, termCharNode);4906translateNode->setAndIncChild(4, lengthNode);4907translateNode->setAndIncChild(5, stoppingNode);49084909translateNode->setSourceIsByteArrayTranslate(false);4910translateNode->setTargetIsByteArrayTranslate(true);4911translateNode->setTermCharNodeIsHint(false);4912translateNode->setSourceCellIsTermChar(false);4913translateNode->setTableBackedByRawStorage(trans->isTableBackedByRawStorage());4914TR::Node * topOfTranslateNode = TR::Node::create(TR::treetop, 1, translateNode);4915TR::Node * lengthTRxx = translateNode;49164917TR_CISCNode *ifeqCiscNode = NULL;4918TR::Node *ifeqNode = NULL;4919if (target) // single successor block4920{4921// prepare nodes that add the number of elements (which was translated) into the induction variables49224923/*lengthTRxx = createOP2(comp, TR::isub,4924translateNode,4925TR::Node::create(translateNode, TR::iconst, 0, -1)); */4926}4927else4928{ // multiple successor blocks4929// A loop may have multiple successor blocks (i.e. break from a test character match)4930// First, we need to identify the node that we will try to match. We have one of two4931// scenarios:4932// 1. b2i node (commoned with the b2i load of the translation table character)4933// 2. iload of an auto - the same auto should have a preceding store with the4934// translation table character.4935// In case 2, we'll try to replace the load with the RHS expression of the corresponding4936// store (expect b2i node).4937//4938// Once we have the b2i node in hand, we attempt to break the commoning of that node between4939// the store and test comparison node.4940TR_CISCNode *b2iCiscNode = NULL;4941TR::Node *b2iNode = NULL;49424943ifeqCiscNode = trans->getP2TRep(P->getImportantNode(0));4944b2iCiscNode = ifeqCiscNode->getChild(0);4945TR_CISCNode *store;4946ifeqNode = ifeqCiscNode->getHeadOfTrNodeInfo()->_node;4947// try to find a tree including the array load4948switch(b2iCiscNode->getOpcode())4949{4950case TR::b2i:4951break;4952case TR::iload:4953TR_ASSERT(b2iCiscNode->getChains()->isSingleton(), "Not implemented yet");4954store = b2iCiscNode->getChains()->getListHead()->getData();4955b2iCiscNode = store->getChild(0);4956TR_ASSERT(b2iCiscNode->getOpcode() == TR::b2i, "Not implemented yet");4957b2iNode = b2iCiscNode->getHeadOfTrNodeInfo()->_node;4958break;4959case TR_variable:4960if (ifeqCiscNode->isEmptyHint()) return false;4961b2iCiscNode = ifeqCiscNode->getHintChildren()->getListHead()->getData();4962TR_ASSERT(b2iCiscNode->getOpcode() == TR::b2i, "Not implemented yet");4963store = b2iCiscNode->getHeadOfParents();4964TR_ASSERT(store->getOpcode() == TR::istore, "Not implemented yet");4965TR_ASSERT(store->getChild(1) == ifeqCiscNode->getChild(0), "Not implemented yet");4966b2iNode = b2iCiscNode->getHeadOfTrNodeInfo()->_node;4967break;4968default:4969TR_ASSERT(0, "Not implemented yet");4970break;4971}4972// Expect b2iCiscNode has the tree.4973TR_CISCNode *ixload, *aload, *iload;4974if (getThreeNodesForArray(b2iCiscNode, &ixload, &aload, &iload))4975{4976// Try to replace "iload" with a RHS expression of the single store.4977if (iload->getOpcode() == TR::iload &&4978iload->getChains()->isSingleton() &&4979iload->getParents()->isSingleton())4980{ // simple copy propagation4981TR_ASSERT(iload->getChains()->isSingleton(), "Not implemented yet");4982store = iload->getChains()->getListHead()->getData();4983TR::Node *storeTR = store->getHeadOfTrNode();4984TR::Node *iloadTR = iload->getHeadOfTrNode();49854986TR_ASSERT(iload->getParents()->isSingleton(), "Not implemented yet");4987TR_CISCNode *iloadParent = iload->getHeadOfParents();4988TR::Node *iloadParentTR = iloadParent->getHeadOfTrNodeInfo()->_node;49894990if (iloadParentTR->getChild(0) == iloadTR)4991{4992iloadParentTR->setAndIncChild(0, storeTR->getChild(0)->duplicateTree());4993}4994else if (iloadParentTR->getChild(1) == iloadTR)4995{4996iloadParentTR->setAndIncChild(1, storeTR->getChild(0)->duplicateTree());4997}4998else4999{5000TR_ASSERT(false, "Not implemented yet");5001}5002}5003}5004if (b2iNode)5005{5006ifeqNode->getAndDecChild(0);5007ifeqNode->setAndIncChild(0, b2iNode->duplicateTree());5008}50095010// For Multiple Successor Blocks, we have a test character condition in the5011// loop, which may lead to a different successor block than the fallthrough.5012// We need to be able to distinguish the following two scenarios, which both5013// would load the last character in the source array:5014// 1. no test character found (translateNode == lengthNode).5015// 2. test character found in the last element(translateNode < lengthNode).5016// The final IV value is always (IV + translateNode).5017// However, under case 1, the element loaded is at index (IV + translateNode - 1).5018// Under case 2, the element loaded is at index (IV + translateNode).5019// As such, we will subtract 1 in the existing final IV calculation for case 1,5020// so that any array accesses will be correctly indexed. The final IV value will5021// be increased by 1 again before we hit the exit test.5022lengthTRxx = TR::Node::create(TR::isub, 2, translateNode,5023TR::Node::create(TR::icmpeq, 2, translateNode,5024lengthNode->getOpCodeValue() == TR::i2l ? lengthNode->getChild(0)5025: lengthNode));5026}50275028// prepare nodes that add the number of elements (which was translated) into the induction variables5029TR::Node * addCountNode = createOP2(comp, TR::iadd, indexNode->duplicateTree(), lengthTRxx);5030if (ivNeedsUpdate)5031addCountNode = TR::Node::create(TR::iadd, 2, addCountNode, TR::Node::iconst(indexNode, 1));5032TR::Node * indVarUpdateNode = TR::Node::createStore(indexVarSymRef, addCountNode);5033TR::TreeTop * indVarUpdateTreeTop = TR::TreeTop::create(comp, indVarUpdateNode);50345035TR::TreeTop * dstIndVarUpdateTreeTop = NULL;5036if (dstIndexRepNode)5037{5038TR::Node *dstAddCountNode = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd,5039dstIndexVarSymRef, lengthTRxx, dstIndexRepNode);5040if (dstIvNeedsUpdate)5041dstAddCountNode = TR::Node::create(TR::iadd, 2, dstAddCountNode, TR::Node::iconst(dstAddCountNode, 1));50425043dstIndVarUpdateTreeTop = TR::TreeTop::create(comp, dstAddCountNode);5044}50455046// Insert nodes and maintain the CFG5047TR_CISCNode *optionalIficmpge = NULL;5048if (P->getImportantNode(4)) optionalIficmpge = trans->getP2TRepInLoop(P->getImportantNode(4));5049TR_ScratchList<TR::Node> guardList(comp->trMemory());5050if (optionalIficmpge)5051{5052TR_CISCNode *limitCISCNode = optionalIficmpge->getChild(1);5053guardList.add(TR::Node::createif(TR::ificmple, convertStoreToLoad(comp, limitCISCNode->getHeadOfTrNode()),5054TR::Node::create(lengthNode, TR::iconst, 0, 65535)));5055}5056TR::Node* alignmentCheck = createTableAlignmentCheck(comp, tableNode, false, true, trans->isTableBackedByRawStorage());5057if (alignmentCheck)5058guardList.add(alignmentCheck);5059block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree(), &guardList);50605061// Create the fast path code5062block = trans->insertBeforeNodes(block);5063block->append(TR::TreeTop::create(comp, topOfTranslateNode));5064block->append(indVarUpdateTreeTop);5065if (dstIndVarUpdateTreeTop) block->append(dstIndVarUpdateTreeTop);50665067// Insert java/nio/Bits.keepAlive() calls into fastpath, if any.5068trans->insertBitsKeepAliveCalls(block);50695070block = trans->insertAfterNodes(block);50715072if (target)5073{5074// A single successor block5075trans->setSuccessorEdge(block, target);5076}5077else5078{5079// Multiple successor blocks5080// Generate the if-statement to jump to the correct destinations.5081TR::SymbolReference * translateTemp = comp->getSymRefTab()->5082createTemporary(comp->getMethodSymbol(), TR::Int32);5083TR::Node *tempStore;5084ifeqNode = ifeqNode->duplicateTree();5085tempStore = TR::Node::createStore(translateTemp, ifeqNode->getAndDecChild(0));5086ifeqNode->setAndIncChild(0, TR::Node::createLoad(ifeqNode, translateTemp));5087TR::TreeTop *tempStoreTTop = TR::TreeTop::create(comp, tempStore);5088TR::TreeTop *ifeqTTop = TR::TreeTop::create(comp, ifeqNode);5089// Fix up the IV value by adding 1 if translateNode == lengthNode (where no test char was found). See comment above.5090TR::Node *incIndex = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthTRxx->getChild(1), indexRepNode);5091///TR::Node *icmpeqNode = TR::Node::create(TR::icmpeq, 2, TR::Node::createLoad(indexNode, statusCheckTemp), TR::Node::iconst(indexNode, 0));5092///TR::Node *incNode = TR::Node::create(TR::iadd, 2, TR::Node::createLoad(indexNode, indexVarSymRef), icmpeqNode);5093///TR::Node *incIndex = TR::Node::createStore(indexVarSymRef, incNode);5094TR::TreeTop *incIndexTTop = TR::TreeTop::create(comp, incIndex);50955096TR::TreeTop *last = block->getLastRealTreeTop();5097last->join(tempStoreTTop);5098tempStoreTTop->join(incIndexTTop);5099if (dstIndVarUpdateTreeTop)5100{5101TR::Node * incDstIndex = createStoreOP2(comp, dstIndexVarSymRef, TR::isub, dstIndexVarSymRef, -1, dstIndexRepNode);5102TR::TreeTop *incDstIndexTTop = TR::TreeTop::create(comp, incDstIndex);5103incIndexTTop->join(incDstIndexTTop);5104last = incDstIndexTTop;5105}5106else5107{5108last = incIndexTTop;5109}5110last->join(ifeqTTop);5111ifeqTTop->join(block->getExit());5112if (ifeqCiscNode->getOpcode() != ifeqNode->getOpCodeValue())5113{5114ifeqNode->setBranchDestination(ifeqCiscNode->getDestination());5115TR::Node::recreate(ifeqNode, (TR::ILOpCodes)ifeqCiscNode->getOpcode());5116}5117TR::Block *okDest = ifeqNode->getBranchDestination()->getEnclosingBlock();5118TR::Block *failDest = NULL;5119TR::Block *optionalDest = NULL;5120if (optionalIficmpge) optionalDest = optionalIficmpge->getDestination()->getEnclosingBlock();5121failDest = trans->searchOtherBlockInSuccBlocks(okDest, optionalDest);5122TR_ASSERT(failDest, "error");5123trans->setSuccessorEdges(block, failDest, okDest);5124}51255126return true;5127}51285129bool5130CISCTransform2TRTOArrayTableRaw(TR_CISCTransformer *trans)5131{5132trans->setTableBackedByRawStorage();5133return CISCTransform2TRTOArray(trans);5134}51355136/****************************************************************************************5137Corresponding Java-like pseudocode51385139int i, j, end, exitValue;5140char charArray[ ];5141byte byteArray[ ], map[ ];5142while(true){5143byte b = map[charArray[i]];5144if (b == exitValue) break;5145byteArray[j] = b;5146i++;5147j++;5148if (i >= end) break;5149}51505151Note 1: Idiom allows variables i and j to be identical.5152****************************************************************************************/5153TR_PCISCGraph *5154makeTRTOArrayGraph(TR::Compilation *c, int32_t ctrl)5155{5156TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TRTOArray", 0, 16);5157/************************************ opc id dagId #cfg #child other/pred/children */5158TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 16, 0, 0, 0);5159tgt->addNode(charArray); // src array base5160TR_PCISCNode *i = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(i); // src array index5161TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1);5162tgt->addNode(byteArray); // dst array base5163TR_PCISCNode *j = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(j); // dst array index5164TR_PCISCNode *map = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 2); tgt->addNode(map); // map array base5165TR_PCISCNode *idx0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),11, 0, 0, 0); tgt->addNode(idx0);5166TR_PCISCNode *idx1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 1); tgt->addNode(idx1);5167TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),9, 0, 0); tgt->addNode(end); // length5168TR_PCISCNode *exitValue = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),8, 0, 0); tgt->addNode(exitValue); // exitvalue5169TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0);5170tgt->addNode(aHeader); // array header const5171TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -1); tgt->addNode(increment);5172TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0); tgt->addNode(mulFactor); // Multiply Factor5173TR_PCISCNode *elemSize = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 4, 2); // element size5174TR_PCISCNode *offset = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),3, 0, 0); tgt->addNode(offset); // optional5175TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);5176TR_PCISCNode *charAddr = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, entry, charArray, idx0, aHeader, elemSize);5177TR_PCISCNode *convNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), (ctrl & CISCUtilCtl_64Bit) ? TR::su2l : TR::su2i,5178(ctrl & CISCUtilCtl_64Bit) ? TR::Int64 : TR::Int32,5179tgt->incNumNodes(), 1, 1, 1, charAddr, charAddr); tgt->addNode(convNode);5180TR_PCISCNode *mapAddr = createIdiomArrayLoadInLoop(tgt, ctrl|CISCUtilCtl_NoI2L, 1, convNode, TR::bloadi, TR::Int8, map, convNode, aHeader, mulFactor);5181TR_PCISCNode *b = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::b2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, mapAddr, mapAddr); tgt->addNode(b);5182TR_PCISCNode *exitTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpeq, TR::NoType, tgt->incNumNodes(), 1, 2, 2, b, b, exitValue); tgt->addNode(exitTest);5183TR_PCISCNode *byteAddr = createIdiomArrayStoreInLoop(tgt, ctrl, 1, exitTest, TR::bstorei, TR::Int8, byteArray, idx1, aHeader, mulFactor, b);5184TR_PCISCNode *iStore = createIdiomDecVarInLoop(tgt, ctrl, 1, byteAddr, i, increment);5185TR_PCISCNode *jStore = createIdiomIncVarInLoop(tgt, ctrl, 1, iStore, j, i, offset); // optional5186TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, jStore, i, end); tgt->addNode(loopTest);5187TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);51885189exitTest->setSucc(1, exit);5190loopTest->setSuccs(entry->getSucc(0), exit);51915192jStore->getChild(0)->setIsOptionalNode();5193jStore->setIsOptionalNode();5194j->setIsOptionalNode();5195offset->setIsOptionalNode();51965197convNode->setIsChildDirectlyConnected();5198loopTest->setIsChildDirectlyConnected();51995200tgt->setSpecialCareNode(0, exitTest); // TR_booltable5201tgt->setEntryNode(entry);5202tgt->setExitNode(exit);5203tgt->setImportantNodes(exitTest, loopTest, charAddr->getChild(0), byteAddr->getChild(0), NULL);5204tgt->setNumDagIds(16);5205tgt->createInternalData(1);52065207tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);5208tgt->setTransformer(CISCTransform2TRTOArray);5209tgt->setInhibitBeforeVersioning();5210tgt->setAspects(isub|mul, ILTypeProp::Size_1|ILTypeProp::Size_2, ILTypeProp::Size_1);5211tgt->setNoAspects(call|bndchk|bitop1, 0, 0);5212tgt->setMinCounts(2, 2, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount5213tgt->setHotness(warm, false);5214tgt->setVersionLength(c->target().cpu.isPower() ? 0 : 11); // depending on each architecture5215return tgt;5216}521752185219/****************************************************************************************5220Corresponding Java-like pseudocode5221int i, j, end, exitValue;5222char charArray[ ];5223byte byteArray[ ], *map;5224while(true){5225int T = charArray[i];5226if (T >= limit) break; // optional5227byte b = *(map + T); - (1)5228if (b == exitValue) break;5229byteArray[j] = b;5230i++;5231j++;5232if (i >= end) break;5233}52345235Note 1: Idiom allows variables i and j to be identical.5236Note 2: This pattern is found in "sun/io/CharToByteSingleByte.JITintrinsicConvert".5237I don't know how we can write (1) in a Java program. From a log file, it seems5238that the map table is in java.nio.DirectByteBuffer and is treated as a pointer5239of C; the address (1) can be computed without adding the array header size.5240****************************************************************************************/5241TR_PCISCGraph *5242makeTRTOArrayGraphSpecial(TR::Compilation *c, int32_t ctrl)5243{5244TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TRTOArraySpecial", 0, 16);5245/*************************************************************************** opc id dagId #cfg #child other/pred/children */5246TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 16, 0, 0, 0);5247tgt->addNode(charArray); // src array base5248TR_PCISCNode *i = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(i); // src array index5249TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1);5250tgt->addNode(byteArray); // dst array base5251TR_PCISCNode *j = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(j); // dst array index5252TR_PCISCNode *map = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 2); tgt->addNode(map); // map array base5253TR_PCISCNode *idx0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),11, 0, 0, 0); tgt->addNode(idx0);5254TR_PCISCNode *idx1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 1); tgt->addNode(idx1);5255TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),9, 0, 0); tgt->addNode(end); // length5256TR_PCISCNode *exitValue = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),8, 0, 0); tgt->addNode(exitValue); // exitvalue5257TR_PCISCNode *limit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),7, 0, 0); tgt->addNode(limit); // optional5258TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 0);5259tgt->addNode(aHeader); // array header const5260TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, -1); tgt->addNode(increment);5261TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0); tgt->addNode(mulFactor); // Multiply Factor5262TR_PCISCNode *elemSize = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 2); // element size5263TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);5264TR_PCISCNode *charAddr = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, entry, charArray, idx0, aHeader, elemSize);5265TR_PCISCNode *c2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, charAddr, charAddr); tgt->addNode(c2iNode);5266TR_PCISCNode *limitChk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, c2iNode, c2iNode, limit);5267tgt->addNode(limitChk); // optional5268TR_PCISCNode *mapAddr = createIdiomByteDirectArrayLoadInLoop(tgt, ctrl, 1, limitChk, map, c2iNode);5269TR_PCISCNode *b2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::b2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, mapAddr, mapAddr); tgt->addNode(b2iNode);5270TR_PCISCNode *exitTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpeq, TR::NoType, tgt->incNumNodes(), 1, 2, 2, b2iNode, b2iNode, exitValue);5271tgt->addNode(exitTest);5272TR_PCISCNode *byteAddr = createIdiomArrayStoreInLoop(tgt, ctrl, 1, exitTest, TR::bstorei, TR::Int8, byteArray, idx1, aHeader, mulFactor, b2iNode);5273TR_PCISCNode *iStore = createIdiomDecVarInLoop(tgt, ctrl, 1, byteAddr, i, increment);5274TR_PCISCNode *jStore = createIdiomDecVarInLoop(tgt, ctrl, 1, iStore, j, increment);5275TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, jStore, i, end); tgt->addNode(loopTest);5276TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);52775278exitTest->setSucc(1, exit);5279limitChk->setSucc(1, exit);5280loopTest->setSuccs(entry->getSucc(0), exit);52815282c2iNode->setIsChildDirectlyConnected();5283loopTest->setIsChildDirectlyConnected();52845285limit->setIsOptionalNode();5286limitChk->setIsOptionalNode();52875288tgt->setSpecialCareNode(0, exitTest); // TR_booltable5289tgt->setEntryNode(entry);5290tgt->setExitNode(exit);5291tgt->setImportantNodes(exitTest, loopTest, charAddr->getChild(0), byteAddr->getChild(0), limitChk);5292tgt->setNumDagIds(16);5293tgt->createInternalData(1);52945295tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);5296tgt->setTransformer(CISCTransform2TRTOArrayTableRaw);5297tgt->setInhibitBeforeVersioning();5298tgt->setAspects(isub|mul, ILTypeProp::Size_1|ILTypeProp::Size_2, ILTypeProp::Size_1);5299tgt->setNoAspects(call|bndchk|bitop1, 0, 0);5300tgt->setMinCounts(2, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount5301tgt->setHotness(warm, false);5302tgt->setVersionLength(c->target().cpu.isPower() ? 0 : 11); // depending on each architecture5303return tgt;5304}530553065307enum StatusArrayStore5308{5309NO_NEED_TO_CHECK = 0,5310ABANDONING_REDUCTION = 1,5311GENERATE_ARRAY_ALIAS_TEST = 2,5312GENERATE_SUBRANGE_OVERLAP_TEST = 3,5313};53145315static StatusArrayStore checkArrayStore(TR::Compilation *comp, TR::Node *inputNode, TR::Node *outputNode, int elementSize, bool isForward)5316{5317// if the src and dest objects are the same, return5318// a) x86 arraycopy helpers dont handle byte copies well5319// (they use SSE so at least 4 bytes are copied at a time for perf)5320// b) this idiom cannot distinguish between loops of the type5321// for (i=0; i<N; i++)5322// a[l+i] = a[l-1]5323// and a typical arraycopy loop. loops like above cannot be reduced to an arraycopy5324// (look at the java semantics for arraycopy)5325//5326if (comp->target().cpu.isZ() || comp->target().cpu.isARM64())5327return NO_NEED_TO_CHECK; // On 390, MVC (which performs byte copies) is generated. ARM64 arraycopy can handle byte copies.53285329if (inputNode->getFirstChild()->getSymbol()->getRegisterMappedSymbol() == outputNode->getFirstChild()->getSymbol()->getRegisterMappedSymbol())5330{5331static bool disableArraycopyOverlapTest = feGetEnv("TR_disableArraycopyOverlapTest") != NULL;5332if (!disableArraycopyOverlapTest &&5333comp->getOptLevel() >= hot &&5334performTransformation(comp, "%sNot abandoning reduction due to src == dest, generating element range overlap test\n", OPT_DETAILS))5335{5336dumpOptDetails(comp, "src and dest are the same, generating guard code 'if (src/dest subranges nonoverlapping)'\n");5337return GENERATE_SUBRANGE_OVERLAP_TEST;5338}5339dumpOptDetails(comp, "src and dest are the same, abandoning reduction\n");5340return ABANDONING_REDUCTION;5341}5342if (!inputNode->getFirstChild()->getOpCode().hasSymbolReference() ||5343!outputNode->getFirstChild()->getOpCode().hasSymbolReference())5344{5345dumpOptDetails(comp, "src and dest may be the same, generating guard code 'if (src != dst)'\n");5346return GENERATE_ARRAY_ALIAS_TEST;5347}53485349//the only safe thing to do, in general, is to dynamically check the arrays5350return GENERATE_ARRAY_ALIAS_TEST;5351}53525353namespace5354{5355// Subrange overlap test: Arraycopy is incorrect for aliased arrays iff5356// |d - s| < n,5357// where d is the destination offset, s is the source offset,5358// and n is the length (all in bytes).5359// (Note that it would be safe to relax this condition, and run the loop5360// instead of arraycopy more often.)5361class SubrangeOverlapTestGenerator5362{5363public:5364SubrangeOverlapTestGenerator(TR::Compilation *comp, TR::Node *arraycopy, TR::Node *byteLength, bool is64Bit, int elementSize);5365TR::Node *generate();53665367private:5368TR::ILOpCodes ifxcmplt() { return _is64Bit ? TR::iflcmplt : TR::ificmplt; }5369TR::ILOpCodes xabs() { return _is64Bit ? TR::labs : TR::iabs; }5370TR::ILOpCodes xconst() { return _is64Bit ? TR::lconst : TR::iconst; }5371TR::ILOpCodes xmul() { return _is64Bit ? TR::lmul : TR::imul; }5372TR::ILOpCodes xsub() { return _is64Bit ? TR::lsub : TR::isub; }53735374// probably better to teach simplifier how to do these5375void simplifyConstSub();5376void simplifyConstMul();5377void simplifyI2L();53785379TR::Node *byteOffset(TR::Node *addr);5380void checkTypes();5381void checkType(const char *desc, TR::Node *node);53825383TR::Compilation *_comp;5384bool _is64Bit;5385TR::Node *_dst;5386TR::Node *_src;5387TR::Node *_len;5388int _elementSize;5389};53905391SubrangeOverlapTestGenerator::SubrangeOverlapTestGenerator(5392TR::Compilation *comp,5393TR::Node *arraycopy,5394TR::Node *byteLength,5395bool is64Bit,5396int elementSize)5397: _comp(comp)5398, _is64Bit(is64Bit)5399, _dst(byteOffset(arraycopy->getChild(1)))5400, _src(byteOffset(arraycopy->getChild(0)))5401, _len(byteLength)5402, _elementSize(elementSize)5403{5404checkTypes();5405simplifyConstSub();5406checkTypes();5407simplifyConstMul();5408checkTypes();5409simplifyI2L();5410checkTypes();5411}54125413// Generate the test: if |d - s| < n5414TR::Node *SubrangeOverlapTestGenerator::generate()5415{5416_dst = _dst->duplicateTree();5417_src = _src->duplicateTree();5418_len = _len->duplicateTree();54195420TR::Node *diff = TR::Node::create(xsub(), 2, _dst, _src);5421TR::Node *separation = TR::Node::create(xabs(), 1, diff);54225423return TR::Node::createif(ifxcmplt(), separation, _len);5424}54255426// For all k, TFAE:5427// 1. |(d - k) - (s - k)| < n5428// 2. |d - s| < n5429void SubrangeOverlapTestGenerator::simplifyConstSub()5430{5431static bool disableArraycopyOverlapTestSubSimplification =5432feGetEnv("TR_disableArraycopyOverlapTestSubSimplification") != NULL;5433if (disableArraycopyOverlapTestSubSimplification)5434return;54355436// Check that both are sub.5437if (_dst->getOpCodeValue() != xsub())5438return;5439if (_src->getOpCodeValue() != xsub())5440return;54415442// Check that both subtrahends are constant.5443TR::Node *dstSubtrahend = _dst->getChild(1);5444TR::Node *srcSubtrahend = _src->getChild(1);5445if (dstSubtrahend->getOpCodeValue() != xconst())5446return;5447if (srcSubtrahend->getOpCodeValue() != xconst())5448return;54495450// Check that the constants are equal.5451if (dstSubtrahend->getConstValue() != srcSubtrahend->getConstValue())5452return;54535454// Ask permission to transform.5455if (!performTransformation(_comp, "%sSimplifying arraycopy element range overlap test by looking through sub const\n", OPT_DETAILS))5456return;54575458// Transform.5459_dst = _dst->getChild(0);5460_src = _src->getChild(0);5461}54625463// For all k > 0,5464// if dk, sk, nk don't overflow,5465// and if dk, sk >= 0, then TFAE:5466// 1. |dk - sk| < nk5467// 2. |d - s| < n.5468void SubrangeOverlapTestGenerator::simplifyConstMul()5469{5470static bool disableArraycopyOverlapTestMulSimplification =5471feGetEnv("TR_disableArraycopyOverlapTestMulSimplification") != NULL;5472if (disableArraycopyOverlapTestMulSimplification)5473return;54745475// Check that all three are mul.5476if (_dst->getOpCodeValue() != xmul())5477return;5478if (_src->getOpCodeValue() != xmul())5479return;5480if (_len->getOpCodeValue() != xmul())5481return;54825483// Check that all three multiplicands are constant.5484TR::Node *dstMultiplicand = _dst->getChild(1);5485TR::Node *srcMultiplicand = _src->getChild(1);5486TR::Node *lenMultiplicand = _len->getChild(1);5487if (dstMultiplicand->getOpCodeValue() != xconst())5488return;5489if (srcMultiplicand->getOpCodeValue() != xconst())5490return;5491if (lenMultiplicand->getOpCodeValue() != xconst())5492return;54935494// Check that all constants are equal, and positive.5495int64_t k = dstMultiplicand->getConstValue();5496if (k <= 0)5497return;5498if (srcMultiplicand->getConstValue() != k)5499return;5500if (lenMultiplicand->getConstValue() != k)5501return;55025503// Check that the multiplications don't overflow.5504if (!_dst->cannotOverflow() || !_src->cannotOverflow())5505return;5506// NB. when _elementSize > 1, _len is expected to start as a5507// newly-created multiply by _elementSize. It won't yet be marked5508// as cannotOverflow, but overflow would mean serious trouble.5509if (!_len->cannotOverflow() && !(k == _elementSize && k > 1))5510return;55115512// Check that src, dst >= 0.5513if (!_dst->isNonNegative() || !_src->isNonNegative())5514return;55155516// Ask permission to transform.5517if (!performTransformation(_comp, "%sSimplifying arraycopy element range overlap test by looking through mul const\n", OPT_DETAILS))5518return;55195520// Transform.5521_dst = _dst->getChild(0);5522_src = _src->getChild(0);5523_len = _len->getChild(0);5524_elementSize = 1;5525}55265527// When d, s >= 0, TFAE:5528// 1. |i2l(d) - i2l(s)| < i2l(n)5529// 2. |d - s| < n5530void SubrangeOverlapTestGenerator::simplifyI2L()5531{5532static bool disableArraycopyOverlapTestI2LSimplification =5533feGetEnv("TR_disableArraycopyOverlapTestI2LSimplification") != NULL;5534if (disableArraycopyOverlapTestI2LSimplification)5535return;55365537// Check that we are operating on 64-bit numbers,5538// and that all three are i2l.5539if (!_is64Bit)5540return;5541if (_dst->getOpCodeValue() != TR::i2l)5542return;5543if (_src->getOpCodeValue() != TR::i2l)5544return;5545if (_len->getOpCodeValue() != TR::i2l)5546return;55475548// Check that dst, src >= 0.5549// Note that x and i2l(x) have identical signs,5550// so it's good enough if the child is >= 0.5551if (!_dst->isNonNegative() && !_dst->getFirstChild()->isNonNegative())5552return;5553if (!_src->isNonNegative() && !_src->getFirstChild()->isNonNegative())5554return;55555556// Ask permission to transform.5557if (!performTransformation(_comp, "%sSimplifying arraycopy element range overlap test by looking through i2l\n", OPT_DETAILS))5558return;55595560// Transform5561_dst = _dst->getFirstChild();5562_src = _src->getFirstChild();5563_len = _len->getFirstChild();5564_is64Bit = false;5565}55665567// Get the byte offset from an array element address calculation.5568TR::Node *SubrangeOverlapTestGenerator::byteOffset(TR::Node *addr)5569{5570TR::ILOpCodes op = addr->getOpCodeValue();5571TR_ASSERT(op == TR::aladd || op == TR::aiadd,5572"unexpected arraycopy child opcode %s",5573addr->getOpCode().getName());5574return addr->getChild(1);5575}55765577// Assert that all nodes involved in the test have the expected data type.5578void SubrangeOverlapTestGenerator::checkTypes()5579{5580checkType("destination index", _dst);5581checkType("source index", _src);5582checkType("length", _len);5583}55845585// Assert that a single node has the expected data type.5586void SubrangeOverlapTestGenerator::checkType(const char *desc, TR::Node *node)5587{5588TR::DataType expectedType = _is64Bit ? TR::Int64 : TR::Int32;5589TR::DataType actualType = node->getDataType();5590TR_ASSERT(5591actualType == expectedType,5592"expected %s node to have type %s, but found %s (%d)",5593desc,5594TR::DataType::getName(expectedType),5595TR::DataType::getName(actualType),5596(int)actualType);5597}5598}55995600//////////////////////////////////////////////////////////////////////////5601//////////////////////////////////////////////////////////////////////////5602//////////////////////////////////////////////////////////////////////////5603//*****************************************************************************************5604// IL code generation for copying memory5605// Input: ImportantNode(0) - array load5606// ImportantNode(1) - array store5607// ImportantNode(2) - the size of elements (NULL for the byte array)5608// ImportantNode(3) - exit if node5609// ImportantNode(4) - optional iistore5610//*****************************************************************************************5611static bool5612CISCTransform2ArrayCopySub(TR_CISCTransformer *trans, TR::Node *indexRepNode, TR::Node *dstIndexRepNode,5613TR::Node *exitVarRepNode, TR::Node *variableORconstRepNode)5614{5615TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");5616TR::Node *trNode;5617TR::TreeTop *trTreeTop;5618TR::Block *block;5619TR_CISCGraph *P = trans->getP();5620List<TR_CISCNode> *P2T = trans->getP2T();5621TR::Compilation *comp = trans->comp();5622bool isDecrement = trans->isMEMCPYDec();5623const bool disptrace = DISPTRACE(trans);56245625TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");5626if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;56275628trans->findFirstNode(&trTreeTop, &trNode, &block);5629if (!block) return false; // cannot find56305631if (isLoopPreheaderLastBlockInMethod(comp, block))5632{5633traceMsg(comp, "Bailing CISCTransform2ARrayCopySub due to null TT - might be a preheader in last block of method\n");5634return false;5635}56365637TR::Block *target = trans->analyzeSuccessorBlock();5638// Currently, it allows only a single successor.5639if (!target) return false;56405641TR_CISCNode * inLoadCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(0));5642TR_CISCNode * inStoreCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(1));5643if (!inLoadCISCNode || !inStoreCISCNode)5644{5645if (DISPTRACE(trans)) traceMsg(comp, "CISCTransform2ArrayCopy failed. inLoadCISCNode %x inStoreCISCNode %x\n",inLoadCISCNode,inStoreCISCNode);5646return false;5647}56485649// The transformation can support one exit if-stmt5650TR_CISCGraph *T = trans->getT();5651if (T && T->getAspects()->getIfCount() > 1)5652{5653traceMsg(comp,"CISCTransform2ArrayCopySub detected %d if-stmts in loop (> 1). Not transforming.\n", T->getAspects()->getIfCount());5654return false;5655}56565657TR::Node * inLoadNode = inLoadCISCNode->getHeadOfTrNodeInfo()->_node;5658TR::Node * inStoreNode = inStoreCISCNode->getHeadOfTrNodeInfo()->_node;5659TR::Node * mulFactorNode;5660int elementSize;56615662TR::Node * inputNode = inLoadNode->getChild(0)->duplicateTree();5663TR::Node * outputNode = inStoreNode->getChild(0)->duplicateTree();56645665// Get the size of elements5666if (!getMultiplier(trans, P->getImportantNode(2), &mulFactorNode, &elementSize, inLoadNode->getType()))5667{5668if (DISPTRACE(trans)) traceMsg(comp, "CISCTransform2ArrayCopy getMultiplier failed.\n");5669return false;5670}5671if (elementSize != inLoadNode->getSize() || elementSize != inStoreNode->getSize())5672{5673traceMsg(comp, "CISCTransform2ArrayCopy failed - Size Mismatch. Element Size: %d InLoadSize: %d inStoreSize: %d\n", elementSize, inLoadNode->getSize(), inStoreNode->getSize());5674return false; // Size is mismatch!5675}56765677// if the src and dest objects are the same, return5678//5679StatusArrayStore statusArrayStore;5680if ((statusArrayStore = checkArrayStore(comp, inputNode, outputNode, elementSize, !isDecrement)) == ABANDONING_REDUCTION)5681return false;56825683if (indexContainsArrayAccess(comp, inLoadNode->getFirstChild()) ||5684indexContainsArrayAccess(comp, inStoreNode->getFirstChild()))5685{5686traceMsg(comp, "inputNode %p or outputnode %p contains another arrayaccess, no reduction\n", inLoadNode, inStoreNode);5687return false;5688}56895690TR_CISCNode *cmpIfAllCISCNode = trans->getP2TRepInLoop(P->getImportantNode(3));5691int modStartIdx = 0;5692int modLength = 0;5693bool isDecrementRet;5694if (!testExitIF(cmpIfAllCISCNode->getOpcode(), &isDecrementRet, &modLength, &modStartIdx))5695{5696if (DISPTRACE(trans)) traceMsg(comp, "CISCTransform2ArrayCopy testExitIF failed.\n");5697return false;5698}5699if (isDecrement != isDecrementRet) return false;570057015702TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();5703if (!trans->analyzeArrayIndex(indexVarSymRef))5704{5705if (DISPTRACE(trans)) traceMsg(comp, "analyzeArrayIndex failed. %x\n",indexRepNode);5706return false;5707}5708TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode ? dstIndexRepNode->getSymbolReference() : NULL;5709if (indexVarSymRef == dstIndexVarSymRef) dstIndexVarSymRef = NULL;5710indexRepNode = convertStoreToLoad(comp, indexRepNode);5711if (!trans->searchNodeInTrees(inputNode, indexRepNode))5712{5713if (DISPTRACE(trans)) traceMsg(comp, "searchNodeInTrees for inputNode failed.\n");5714return false;5715}5716if (!trans->searchNodeInTrees(outputNode, dstIndexVarSymRef ? convertStoreToLoad(comp, dstIndexRepNode) : indexRepNode))5717{5718if (DISPTRACE(trans)) traceMsg(comp, "searchNodeInTrees for outputNode failed.\n");5719return false;5720}5721TR::SymbolReference * exitVarSymRef = exitVarRepNode->getSymbolReference();5722if (indexVarSymRef != exitVarSymRef && dstIndexVarSymRef != exitVarSymRef)5723{5724if (DISPTRACE(trans)) traceMsg(comp, "CISCTransform2ArrayCopy cannot find exitVarSymRef correctly %x.\n", exitVarRepNode);5725return false;5726}57275728TR::Node *optionalIistore = NULL;5729if (P->getImportantNode(4))5730{5731TR_CISCNode *optionalCISCIistore = trans->getP2TInLoopIfSingle(P->getImportantNode(4));5732if (!optionalCISCIistore)5733return false;5734optionalIistore = optionalCISCIistore->getHeadOfTrNode()->duplicateTree();5735}57365737TR::Node * exitVarNode = createLoad(exitVarRepNode);5738variableORconstRepNode = convertStoreToLoad(comp, variableORconstRepNode);573957405741int32_t postIncrement = checkForPostIncrement(comp, block, cmpIfAllCISCNode->getHeadOfTrNodeInfo()->_node, exitVarSymRef->getSymbol());57425743if (disptrace)5744traceMsg(comp, "detected postIncrement %d modLength %d modStartIdx %d\n", postIncrement, modLength, modStartIdx);57455746TR::Node * lengthNode;5747if (isDecrement)5748{5749TR_ASSERT(dstIndexVarSymRef == NULL, "not implemented yet");5750TR_CISCNode *ixloadORstore, *aload, *iload;5751if (postIncrement &&5752(modStartIdx > 0))5753modStartIdx = 0;5754TR::Node *startIdx = modStartIdx ? createOP2(comp, TR::isub, variableORconstRepNode,5755TR::Node::create(trNode, TR::iconst, 0, -modStartIdx)) :5756variableORconstRepNode;5757if (!getThreeNodesForArray(inLoadCISCNode, &ixloadORstore, &aload, &iload)) return false;5758if ((inputNode = replaceIndexInAddressTree(comp, ixloadORstore->getChild(0)->getHeadOfTrNodeInfo()->_node->duplicateTree(),5759indexVarSymRef, startIdx)) == NULL) return false;5760if (!getThreeNodesForArray(inStoreCISCNode, &ixloadORstore, &aload, &iload)) return false;5761if ((outputNode = replaceIndexInAddressTree(comp, ixloadORstore->getChild(0)->getHeadOfTrNodeInfo()->_node->duplicateTree(),5762dstIndexVarSymRef ? dstIndexVarSymRef : indexVarSymRef, startIdx)) == NULL) return false;5763lengthNode = createOP2(comp, TR::isub, exitVarNode, variableORconstRepNode);5764}5765else5766{5767TR_ASSERT(modStartIdx == 0, "error");5768inputNode = inputNode->duplicateTree();5769outputNode = outputNode->duplicateTree();5770lengthNode = createOP2(comp, TR::isub, variableORconstRepNode, exitVarNode);5771}57725773if (postIncrement != 0)5774lengthNode = createOP2(comp, TR::iadd, lengthNode, TR::Node::create(lengthNode, TR::iconst, 0, postIncrement));57755776if (modLength) lengthNode = createOP2(comp, TR::isub, lengthNode, TR::Node::create(trNode, TR::iconst, 0, -modLength));5777TR::Node * diff = lengthNode;57785779lengthNode = createBytesFromElement(comp, trans->isGenerateI2L(), lengthNode, elementSize);57805781// Prepare the arraycopy node.5782bool needWriteBarrier = false;5783if (inStoreNode->getOpCodeValue() == TR::awrtbari)5784{5785switch (TR::Compiler->om.writeBarrierType())5786{5787case gc_modron_wrtbar_oldcheck:5788case gc_modron_wrtbar_cardmark:5789case gc_modron_wrtbar_cardmark_and_oldcheck:5790case gc_modron_wrtbar_cardmark_incremental:5791needWriteBarrier = true;5792break;5793default:5794break;5795}5796}57975798if (!comp->cg()->getSupportsReferenceArrayCopy() && needWriteBarrier)5799{5800if (DISPTRACE(trans)) traceMsg(comp, "CISCTransform2ArrayCopy: needWriteBarrier but not getSupportsReferenceArrayCopy().\n");5801return false;5802}58035804TR::Node * arraycopy;5805if (!needWriteBarrier)5806{5807arraycopy = TR::Node::createArraycopy(inputNode, outputNode, lengthNode);5808}5809else5810{5811arraycopy = TR::Node::createArraycopy(inputNode->getFirstChild(), outputNode->getFirstChild(), inputNode, outputNode, lengthNode);5812}5813arraycopy->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCopySymbol());5814if (isDecrement)5815{5816arraycopy->setBackwardArrayCopy(true); /* bit available only to primitive arraycopy */5817}5818else5819{5820arraycopy->setForwardArrayCopy(true);5821}5822arraycopy->setArrayCopyElementType(inStoreNode->getDataType());58235824switch(elementSize)5825{5826case 2:5827arraycopy->setHalfWordElementArrayCopy(true);5828break;58295830case 4:5831case 8:5832arraycopy->setWordElementArrayCopy(true);5833break;5834}58355836TR::Node * topArraycopy = TR::Node::create(TR::treetop, 1, arraycopy);58375838// Insert nodes and maintain the CFG5839if (statusArrayStore == GENERATE_ARRAY_ALIAS_TEST)5840{5841// devinmp: Should this also check the index ranges?5842List<TR::Node> guardList(comp->trMemory());5843guardList.add(TR::Node::createif(TR::ifacmpeq, inputNode->getFirstChild()->duplicateTree(),5844outputNode->getFirstChild()->duplicateTree()));5845block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree(), &guardList);5846}5847else if (statusArrayStore == GENERATE_SUBRANGE_OVERLAP_TEST)5848{5849// We know that the arrays alias, so only test the index ranges.5850bool is64Bit = trans->isGenerateI2L();5851SubrangeOverlapTestGenerator overlapTestGen(comp, arraycopy, lengthNode, is64Bit, elementSize);5852List<TR::Node> guardList(comp->trMemory());5853guardList.add(overlapTestGen.generate());5854block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree(), &guardList);5855}5856else5857{5858TR_ASSERT(statusArrayStore == NO_NEED_TO_CHECK, "unexpected statusArrayStore value %d", (int)statusArrayStore);5859block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree());5860}5861block = trans->insertBeforeNodes(block);5862block->append(TR::TreeTop::create(comp, topArraycopy));5863TR::Node * finalValue = variableORconstRepNode;5864if (modLength || (postIncrement != 0))5865{5866int32_t incr = 0;5867if (modLength)5868incr = modLength;5869else if (postIncrement != 0)5870incr = postIncrement;58715872finalValue = createOP2(comp, TR::isub, finalValue,5873TR::Node::create(trNode, TR::iconst, 0, isDecrement ? incr : -incr));5874}5875TR::TreeTop * exitVarUpdateTreeTop = TR::TreeTop::create(comp,5876TR::Node::createStore(exitVarSymRef, finalValue));58775878block->append(exitVarUpdateTreeTop);5879TR_ASSERT(indexVarSymRef == exitVarSymRef || dstIndexVarSymRef == exitVarSymRef, "error!");5880TR::Node * theOtherVarUpdateNode = NULL;5881if (dstIndexVarSymRef != NULL)5882{5883// If there are two induction variables, we need to maintain the other one.5884TR::SymbolReference * theOtherSymRef = (indexVarSymRef == exitVarSymRef ? dstIndexVarSymRef : indexVarSymRef);5885TR::Node * result = createOP2(comp, isDecrement ? TR::isub : TR::iadd,5886TR::Node::createLoad(trNode, theOtherSymRef),5887diff);5888theOtherVarUpdateNode = TR::Node::createStore(theOtherSymRef, result);5889TR::TreeTop * theOtherVarUpdateTreeTop = TR::TreeTop::create(comp, theOtherVarUpdateNode);5890block->append(theOtherVarUpdateTreeTop);5891}58925893if (optionalIistore)5894{5895TR_ASSERT(theOtherVarUpdateNode != NULL, "error!");5896optionalIistore->setAndIncChild(1, theOtherVarUpdateNode->getChild(0));5897block->append(TR::TreeTop::create(comp, optionalIistore));5898}58995900trans->insertAfterNodes(block);59015902trans->setSuccessorEdge(block, target);5903return true;5904}59055906bool5907CISCTransform2ArrayCopy(TR_CISCTransformer *trans)5908{5909TR::Node *indexRepNode, *dstIndexRepNode, *exitVarRepNode, *variableORconstRepNode;5910getP2TTrRepNodes(trans, &indexRepNode, &dstIndexRepNode, &exitVarRepNode, &variableORconstRepNode);5911return CISCTransform2ArrayCopySub(trans, indexRepNode, dstIndexRepNode, exitVarRepNode, variableORconstRepNode);5912}59135914bool5915CISCTransform2ArrayCopySpecial(TR_CISCTransformer *trans)5916{5917TR::Node *indexRepNode, *dstIndexRepNode, *variableORconstRepNode;5918getP2TTrRepNodes(trans, &indexRepNode, &dstIndexRepNode, &variableORconstRepNode);5919return CISCTransform2ArrayCopySub(trans, indexRepNode, dstIndexRepNode, indexRepNode, variableORconstRepNode);5920}59215922bool5923CISCTransform2ArrayCopyDec(TR_CISCTransformer *trans)5924{5925trans->setMEMCPYDec();5926return CISCTransform2ArrayCopy(trans);5927}592859295930/****************************************************************************************5931Corresponding Java-like Pseudo Program5932int v1, v3, end;5933byte v0[ ], v2[ ];5934while(true){5935v2[v3] = v0[v1];5936v1++;5937v3++;5938if (v1 >= end) break;5939}5940****************************************************************************************/5941TR_PCISCGraph *5942makeMemCpySpecialGraph(TR::Compilation *c, int32_t ctrl)5943{5944TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCpySpecial", 0, 16);5945/******************************************************************** opc id dagId #cfg #child other/pred/children */5946TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 0); tgt->addNode(v1); // src array index5947TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(v3); // dst array index5948TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),12, 0, 0); tgt->addNode(vorc); // length59495950TR_PCISCNode *v5 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 2); tgt->addNode(v5); // dst array index5951TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 0); tgt->addNode(idx0);5952TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 1); tgt->addNode(idx1);5953TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(v0); // src array base5954TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(v2); // dst array base5955TR_PCISCNode *cmah0=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 0); tgt->addNode(cmah0); // array header5956TR_PCISCNode *cmah1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 1); tgt->addNode(cmah1); // array header5957TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, -1); tgt->addNode(cm1);5958TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 3, 0, 0); tgt->addNode(mulFactor); // Multiply Factor5959TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);5960TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ent, v3, cm1);5961n6->getChild(0)->setIsSuccDirectlyConnected(false);5962TR_PCISCNode *iis = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istorei, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n6, v5, n6->getChild(0)); tgt->addNode(iis);5963TR_PCISCNode *n0 = createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, iis, idx1, cmah1, mulFactor);5964TR_PCISCNode *n1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, n0, v2, n0);5965TR_PCISCNode *n2 = createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, n1, idx0, cmah0, mulFactor);5966TR_PCISCNode *n3 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, n2, v0, n2);5967TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indload, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n3, n3); tgt->addNode(n4);5968TR_PCISCNode *nn1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indstore, TR::NoType, tgt->incNumNodes(), 1, 1, 2, n4, n1, n4); tgt->addNode(nn1);5969TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, nn1, v1, cm1);5970TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v1, vorc); tgt->addNode(n8);5971TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);59725973n8->setSuccs(ent->getSucc(0), n9);59745975n4->setIsChildDirectlyConnected();5976nn1->setIsChildDirectlyConnected();5977n8->setIsChildDirectlyConnected();59785979tgt->setEntryNode(ent);5980tgt->setExitNode(n9);5981tgt->setImportantNodes(n4, nn1, NULL, n8, iis);5982tgt->setNumDagIds(15);5983tgt->createInternalData(1);59845985tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);5986tgt->setTransformer(CISCTransform2ArrayCopySpecial);5987tgt->setAspects(isub|sameTypeLoadStore, existAccess, existAccess);5988tgt->setNoAspects(call|bndchk|bitop1, 0, 0);5989tgt->setMinCounts(1, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount5990tgt->setHotness(warm, false);5991tgt->setInhibitBeforeVersioning();5992return tgt;5993}59945995/****************************************************************************************5996Corresponding Java-like Pseudo Program5997int v1, v3, end;5998v0[ ], v2[ ]; // char, int, float, long, and so on5999while(true){6000v2[v3] = v0[v1];6001v1++;6002v3++;6003if (v1 >= end) break;6004}6005****************************************************************************************/6006TR_PCISCGraph *6007makeMemCpyGraph(TR::Compilation *c, int32_t ctrl)6008{6009TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCpy", 0, 16);6010/************************************ opc id dagId #cfg #child other/pred/children */6011TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 0); tgt->addNode(v1); // src array index6012TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(v3); // dst array index6013TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 2); tgt->addNode(v4); // exit checking6014TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),11, 0, 0); tgt->addNode(vorc); // length60156016TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 0); tgt->addNode(idx0);6017TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 1); tgt->addNode(idx1);6018TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(v0); // src array base6019TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(v2); // dst array base6020TR_PCISCNode *iall= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0); tgt->addNode(iall); // Multiply Factor6021TR_PCISCNode *cmah0=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(cmah0); // array header6022TR_PCISCNode *cmah1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0, 1); tgt->addNode(cmah1); // array header6023TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);6024TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);6025TR_PCISCNode *n1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, ent, v2, idx1, cmah1, iall);6026TR_PCISCNode *n3 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, n1, v0, idx0, cmah0, iall);6027TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indload, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n3, n3); tgt->addNode(n4);6028TR_PCISCNode *nn1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indstore, TR::NoType, tgt->incNumNodes(), 1, 1, 2, n4, n1, n4); tgt->addNode(nn1);6029TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, nn1, v3, cm1);6030TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v1, cm1);6031TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v4, vorc); tgt->addNode(n8);6032TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);60336034n8->setSuccs(ent->getSucc(0), n9);60356036n4->setIsChildDirectlyConnected();6037nn1->setIsChildDirectlyConnected();6038n8->setIsChildDirectlyConnected();60396040tgt->setEntryNode(ent);6041tgt->setExitNode(n9);6042tgt->setImportantNodes(n4, nn1, iall, n8, NULL);6043tgt->setNumDagIds(15);6044tgt->createInternalData(1);60456046tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);6047tgt->setTransformer(CISCTransform2ArrayCopy);6048tgt->setAspects(isub|mul | sameTypeLoadStore, existAccess, existAccess);6049tgt->setNoAspects(call|bndchk|bitop1, 0, 0);6050tgt->setMinCounts(1, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount6051tgt->setHotness(warm, true);6052tgt->setInhibitBeforeVersioning();6053return tgt;6054}60556056/****************************************************************************************6057Corresponding Java-like Pseudo Program6058int v1, v3, end;6059v0[ ], v2[ ]; // char, int, float, long, and so on6060while(true){6061v2[v1] = v0[v1];6062v1--;6063if (v1 <= end) break;6064}6065****************************************************************************************/6066TR_PCISCGraph *6067makeMemCpyDecGraph(TR::Compilation *c, int32_t ctrl)6068{6069TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCpyDec", 0, 16);6070/********************************************************************* opc id dagId #cfg #child other/pred/children */6071TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 0); tgt->addNode(v1); // src array index6072TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(v3); // dst array index6073TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 2); tgt->addNode(v4); // exit checking6074TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),11, 0, 0); tgt->addNode(vorc); // length60756076TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 0); tgt->addNode(idx0);6077TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 1); tgt->addNode(idx1);6078TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(v0); // src array base6079TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(v2); // dst array base6080TR_PCISCNode *iall= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0); tgt->addNode(iall); // Multiply Factor6081TR_PCISCNode *cmah0=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(cmah0); // array header6082TR_PCISCNode *cmah1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0, 1); tgt->addNode(cmah1); // array header6083TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);6084TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);6085TR_PCISCNode *n1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, ent, v2, idx1, cmah1, iall);6086TR_PCISCNode *n3 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, n1, v0, idx0, cmah0, iall);6087TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indload, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n3, n3); tgt->addNode(n4);6088TR_PCISCNode *nn1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indstore, TR::NoType, tgt->incNumNodes(), 1, 1, 2, n4, n1, n4); tgt->addNode(nn1);6089TR_PCISCNode *n6 = createIdiomIncVarInLoop(tgt, ctrl, 1, nn1, v3, cm1);6090TR_PCISCNode *n7 = createIdiomIncVarInLoop(tgt, ctrl, 1, n6, v1, cm1);6091TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v4, vorc); tgt->addNode(n8);6092TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);60936094n8->setSuccs(ent->getSucc(0), n9);60956096n4->setIsChildDirectlyConnected();6097nn1->setIsChildDirectlyConnected();6098n8->setIsChildDirectlyConnected();60996100tgt->setEntryNode(ent);6101tgt->setExitNode(n9);6102tgt->setImportantNodes(n4, nn1, iall, n8, NULL);6103tgt->setNumDagIds(15);6104tgt->createInternalData(1);61056106tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);6107tgt->setTransformer(CISCTransform2ArrayCopyDec);6108tgt->setAspects(iadd|mul | sameTypeLoadStore, existAccess, existAccess);6109tgt->setNoAspects(call|bndchk|bitop1, 0, 0);6110tgt->setMinCounts(1, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount6111tgt->setHotness(warm, false);6112tgt->setInhibitBeforeVersioning();6113return tgt;6114}611561166117//////////////////////////////////////////////////////////////////////////6118//////////////////////////////////////////////////////////////////////////6119//////////////////////////////////////////////////////////////////////////6120//*****************************************************************************************6121// IL code generation for copying memory (ByteToChar or CharToByte version)6122// Input: ImportantNodes(0) - array load6123// ImportantNodes(1) - array store6124//*****************************************************************************************6125bool6126CISCTransform2ArrayCopyB2CorC2B(TR_CISCTransformer *trans)6127{6128TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");6129TR::Node *trNode;6130TR::TreeTop *trTreeTop;6131TR::Block *block;6132TR_CISCGraph *P = trans->getP();6133List<TR_CISCNode> *P2T = trans->getP2T();6134TR::Compilation *comp = trans->comp();61356136bool bigEndian = comp->target().cpu.isBigEndian();61376138TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");61396140TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");6141if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;61426143trans->findFirstNode(&trTreeTop, &trNode, &block);6144if (!block) return false; // cannot find61456146if (isLoopPreheaderLastBlockInMethod(comp, block))6147{6148traceMsg(comp, "Bailing CISCTransform2ArrayCopyB2CorC2B due to null TT - might be a preheader in last block of method\n");6149return false;6150}61516152TR::Block *target = trans->analyzeSuccessorBlock();6153// Currently, it allows only a single successor.6154if (!target) return false;61556156TR::Node *indexRepNode, *dstIndexRepNode, *exitVarRepNode, *variableORconstRepNode;6157getP2TTrRepNodes(trans, &indexRepNode, &dstIndexRepNode, &exitVarRepNode, &variableORconstRepNode);6158TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();6159TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode->getSymbolReference();6160TR::SymbolReference * exitVarSymRef = exitVarRepNode->getSymbolReference();6161TR_ASSERT(indexVarSymRef == exitVarSymRef || dstIndexVarSymRef == exitVarSymRef, "error!");6162TR_ASSERT(indexVarSymRef != dstIndexVarSymRef, "error!");61636164TR::Node * inputMemNode = trans->getP2TRepInLoop(P->getImportantNode(0))->getHeadOfTrNodeInfo()->_node->duplicateTree();6165TR::Node * outputMemNode = trans->getP2TRepInLoop(P->getImportantNode(1))->getHeadOfTrNodeInfo()->_node->duplicateTree();6166TR::Node * inputNode = trans->getP2TRepInLoop(P->getImportantNode(0)->getChild(0))->getHeadOfTrNodeInfo()->_node;6167TR::Node * outputNode = trans->getP2TRepInLoop(P->getImportantNode(1)->getChild(0))->getHeadOfTrNodeInfo()->_node->duplicateTree();61686169// check whether the transformation is valid6170//6171if (outputMemNode->getOpCode().isShort())6172{6173TR::Node * iorNode = trans->getP2TRepInLoop(P->getImportantNode(2))->getHeadOfTrNodeInfo()->_node;6174if (!checkByteToChar(comp, iorNode, inputNode, bigEndian))6175{6176dumpOptDetails(comp, "byte loads in [%p] are not compatible with endian-ness %d\n", iorNode, bigEndian);6177return false;6178}6179}6180inputNode = inputNode->duplicateTree();61816182TR::Node * exitVarNode = createLoad(exitVarRepNode);6183variableORconstRepNode = convertStoreToLoad(comp, variableORconstRepNode);6184TR::Node * lengthNode = createOP2(comp, TR::isub,6185variableORconstRepNode,6186exitVarNode);6187TR::Node * updateTree1, *updateTree2;6188TR::Node * c2 = TR::Node::create(exitVarRepNode, TR::iconst, 0, 2);6189bool isExitVarChar;6190isExitVarChar = (dstIndexVarSymRef == exitVarSymRef) ? outputMemNode->getSize() == 2 :6191inputMemNode->getSize() == 2;6192//there are 2 scenarios6193// dstIndexVarSymRef is a char (ie. outputMemNode size == 2, consequently inputMemNode == 1 and indexVarSymRef is a byte)6194// or6195// indexVarSymRef is a char (ie. inputMemNode size == 2, consequently outputMemNode == 1 and dstIndexVarSymRef is a byte)6196// in each of these cases, its possible that each induction variable could be the loop controlling variable (ie. exitVarSymRef) ; thereby creating 4 possible conditions6197//6198if (outputMemNode->getSize() == 2) // type is a byteToChar copy6199{6200// for a byteToChar copy, the length needs to be expressed in number of bytes6201if (dstIndexVarSymRef == exitVarSymRef)6202{6203// dstIndex is the char array's index and length should be multiplied by 2 because the6204// arraycopy length should be expressed in bytes6205//6206lengthNode = TR::Node::create(TR::imul, 2, lengthNode, c2);6207updateTree1 = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthNode, trNode);6208updateTree2 = TR::Node::createStore(dstIndexVarSymRef, variableORconstRepNode);6209}6210else6211{6212// byte array's index is the loop controlling variable. this means length is already in bytes6213// nothing to do for length6214updateTree1 = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthNode, trNode);6215updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef,6216TR::Node::create(TR::idiv, 2, lengthNode, c2), trNode);6217}6218}6219else // type is a charToByte copy6220{6221// For a charToByte copy, the length needs to be expressed in number of bytes6222if (dstIndexVarSymRef == exitVarSymRef)6223{6224// dstIndex is the byte array's index and length is already in bytes.6225// index is the char array's index and needs to be adjusted by # of chars (byte / 2).6226updateTree1 = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, TR::Node::create(TR::idiv, 2, lengthNode, c2), trNode);6227updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef, lengthNode, trNode);6228}6229else6230{6231// char array's index is the loop controlling variable, so length needs to be adjusted by 2.6232// index is the char array's index and should be added to original length value.6233// dstIndex is the byte array's index and needs to be added to 2 * length (or updated lengthNode).6234updateTree1 = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthNode, trNode);6235lengthNode = TR::Node::create(TR::imul, 2, lengthNode, c2);6236updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef, lengthNode, trNode);6237}6238}62396240#if 06241// Prepare nodes for byte length and induction variable updates6242if (isExitVarChar) // The variable that checks the exit condition is for a 2-byte array.6243{6244TR::Node * diff = lengthNode;6245lengthNode = TR::Node::create(TR::imul, 2, lengthNode, c2);6246// lengthNode has the byte size, and diff has the char-based size (that is, lengthNode = diff * 2)6247updateTree1 = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthNode, trNode);6248updateTree2 = TR::Node::createStore(dstIndexVarSymRef, variableORconstRepNode);6249}6250else6251{6252///TR::Node * div2 = TR::Node::create(TR::idiv, 2, lengthNode, c2);6253lengthNode = TR::Node::create(TR::idiv, 2, lengthNode, c2);6254///lengthNode = TR::Node::create(TR::imul, 2, div2, c2); // to make the length even6255// lengthNode has the byte size, and div2 has the char-based size (that is, lengthNode = div2 * 2)6256updateTree1 = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthNode, trNode);6257updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef,6258TR::Node::create(TR::imul, 2, lengthNode, c2), trNode);6259}6260#endif62616262lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode);62636264// Prepare the arraycopy node6265TR::Node * arraycopy = TR::Node::createArraycopy(inputNode, outputNode, lengthNode);6266arraycopy->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCopySymbol());6267arraycopy->setForwardArrayCopy(true);6268arraycopy->setArrayCopyElementType(TR::Int8);62696270TR::Node * topArraycopy = TR::Node::create(TR::treetop, 1, arraycopy);6271TR::TreeTop * updateTreeTop1 = TR::TreeTop::create(comp, updateTree1);6272TR::TreeTop * updateTreeTop2 = TR::TreeTop::create(comp, updateTree2);62736274// Insert nodes and maintain the CFG6275TR::TreeTop *last;6276last = trans->removeAllNodes(trTreeTop, block->getExit());6277last->join(block->getExit());6278block = trans->insertBeforeNodes(block);6279last = block->getLastRealTreeTop();6280last->join(trTreeTop);6281trTreeTop->setNode(topArraycopy);6282trTreeTop->join(updateTreeTop1);6283updateTreeTop1->join(updateTreeTop2);6284updateTreeTop2->join(block->getExit());62856286trans->insertAfterNodes(block);62876288trans->setSuccessorEdge(block, target);6289return true;6290}62916292/****************************************************************************************6293Corresponding Java-like Pseudo Program (for big endian)6294int v1, v3, end;6295byte v0[ ];6296char v2[ ];6297while(true){6298v2[v3] = ((v0[v1] & 0xFF) << 8) | (v0[v1+1] & 0xFF))6299v1+=2;6300v3++;6301if (v3 >= end) break;6302}63036304Note 1: This idiom also supports little endian.6305****************************************************************************************/6306TR_PCISCGraph *6307makeMemCpyByteToCharGraph(TR::Compilation *c, int32_t ctrl)6308{6309TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCpyByteToChar", 0, 16);6310bool isBigEndian = (ctrl & CISCUtilCtl_BigEndian);6311/************************************ opc id dagId #cfg #child other/pred/children */6312TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(v1); // src array index6313TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1); tgt->addNode(v3); // dst array index6314TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 2); tgt->addNode(v4); // exit checking6315TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),12, 0, 0); tgt->addNode(vorc); // length6316TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(v0); // src array base6317TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v2); // dst array base6318TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(cmah); // array header6319TR_PCISCNode *cmah1=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 8, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+1));// array header+16320TR_PCISCNode *ah1 = isBigEndian ? cmah : cmah1;6321TR_PCISCNode *ah2 = isBigEndian ? cmah1 : cmah;6322TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 7, 0, 0, -1); tgt->addNode(cm1);6323TR_PCISCNode *cm2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -2); tgt->addNode(cm2);6324TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 5, 2); // element size6325TR_PCISCNode *c256= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 256); tgt->addNode(c256);6326TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size6327TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);6328TR_PCISCNode *ns0 = createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, ent, v3, cmah, c2);6329TR_PCISCNode *ns1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns0, v2, ns0);6330TR_PCISCNode *nl00;6331TR_PCISCNode *nl10;6332if (ctrl & CISCUtilCtl_64Bit)6333{6334nl00= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2l, TR::Int64, tgt->incNumNodes(), 1, 1, 1, ns1, v1); tgt->addNode(nl00);6335nl10= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl | CISCUtilCtl_NoI2L, 1, nl00, nl00, ah1, c1);6336}6337else6338{6339nl00= v1;6340nl10= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl | CISCUtilCtl_NoI2L, 1, ns1, nl00, ah1, c1);6341}6342TR_PCISCNode *nl11= createIdiomArrayAddressInLoop(tgt, ctrl, 1, nl10, v0, nl10);6343TR_PCISCNode *nl12= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bloadi, TR::Int8, tgt->incNumNodes(), 1, 1, 1, nl11, nl11); tgt->addNode(nl12);6344TR_PCISCNode *nl13= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl12, nl12); tgt->addNode(nl13);6345TR_PCISCNode *nl14= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl13, nl13, c256); tgt->addNode(nl14);6346TR_PCISCNode *nl20= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl | CISCUtilCtl_NoI2L, 1, nl14, nl00, ah2, c1);6347TR_PCISCNode *nl21= createIdiomArrayAddressInLoop(tgt, ctrl, 1, nl20, v0, nl20);6348TR_PCISCNode *nl22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bloadi, TR::Int8, tgt->incNumNodes(), 1, 1, 1, nl21, nl21); tgt->addNode(nl22);6349TR_PCISCNode *nl23= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl22, nl22); tgt->addNode(nl23);6350TR_PCISCNode *ns2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ior, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl23, nl14, nl23); tgt->addNode(ns2);6351TR_PCISCNode *ns3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2s, TR::Int16, tgt->incNumNodes(), 1, 1, 1, ns2, ns2); tgt->addNode(ns3);6352TR_PCISCNode *ns4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::sstorei, TR::Int16, tgt->incNumNodes(), 1, 1, 2, ns3, ns1, ns3); tgt->addNode(ns4);6353TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ns4, v1, cm2);6354TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v3, cm1);6355TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v4, vorc); tgt->addNode(n8);6356TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);63576358n8->setSuccs(ent->getSucc(0), n9);6359n8->setIsChildDirectlyConnected();63606361tgt->setEntryNode(ent);6362tgt->setExitNode(n9);6363tgt->setImportantNodes(nl12, ns4, ns2);6364tgt->setNumDagIds(16);6365tgt->createInternalData(1);63666367tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);6368tgt->setTransformer(CISCTransform2ArrayCopyB2CorC2B);6369tgt->setAspects(isub|mul|bitop1, ILTypeProp::Size_1, ILTypeProp::Size_2);6370tgt->setNoAspects(call|bndchk, 0, 0);6371tgt->setMinCounts(1, 2, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount6372tgt->setHotness(warm, false);6373tgt->setInhibitBeforeVersioning();6374return tgt;6375}637663776378//////////////////////////////////////////////////////////////////////////6379//////////////////////////////////////////////////////////////////////////6380//////////////////////////////////////////////////////////////////////////638163826383/****************************************************************************************6384Corresponding Java-like Pseudo Program (for big endian)6385int v1, v3, end;6386char v0[ ];6387byte v2[ ];6388while(true){6389v2[v3] = (byte)(v0[v1] >> 8);6390v2[v3+1] = (byte)(v0[v1] & 0xff);6391v1++;6392v3+=2;6393if (v1 >= end) break;6394}63956396Note 1: This idiom also supports little endian.6397****************************************************************************************/6398TR_PCISCGraph *6399makeMemCpyCharToByteGraph(TR::Compilation *c, int32_t ctrl)6400{6401TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCpyCharToByte", 0, 16);6402/************************************ opc id dagId #cfg #child other/pred/children */6403TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(v1); // src array index6404TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1); tgt->addNode(v3); // dst array index6405TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 2); tgt->addNode(v4); // exit checking6406TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),12, 0, 0); tgt->addNode(vorc); // length6407TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(v0); // src array base6408TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v2); // dst array base6409TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(cmah); // array header6410TR_PCISCNode *cmah1=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 8, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+1));// array header+16411TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 7, 0, 0, -1); tgt->addNode(cm1);6412TR_PCISCNode *cm2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -2); tgt->addNode(cm2);6413TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 5, 2); // element size6414TR_PCISCNode *c8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 8); tgt->addNode(c8);6415TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size6416TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);6417TR_PCISCNode *ns10= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, ent, v3, cmah, c1);6418TR_PCISCNode *ns11= createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns10, v2, ns10);6419TR_PCISCNode *nl0 = createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, ns11, v1, cmah, c2);6420TR_PCISCNode *nl1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, nl0, v0, nl0);6421TR_PCISCNode *nl2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::sloadi, TR::Int16, tgt->incNumNodes(), 1, 1, 1, nl1, nl1); tgt->addNode(nl2);6422TR_PCISCNode *cvt0, *cvt1;6423if ((ctrl & CISCUtilCtl_BigEndian))6424{6425TR_PCISCNode *nc2i= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl2, nl2); tgt->addNode(nc2i);6426TR_PCISCNode *ns22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ishrall, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nc2i, nc2i, c8); tgt->addNode(ns22);6427cvt0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns22, ns22); tgt->addNode(cvt0);6428}6429else6430{6431cvt0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::s2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, nl2, nl2); tgt->addNode(cvt0);6432}6433TR_PCISCNode *ns14= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, cvt0, ns11, cvt0); tgt->addNode(ns14);6434TR_PCISCNode *ns20= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl|CISCUtilCtl_NoI2L, 1, ns14, ns10->getChild(0)->getChild(0), cmah1, c1);6435TR_PCISCNode *ns21= createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns20, v2, ns20);6436if ((ctrl & CISCUtilCtl_BigEndian))6437{6438cvt1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::s2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns21, nl2); tgt->addNode(cvt1);6439}6440else6441{6442TR_PCISCNode *nc2i= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, ns21, nl2); tgt->addNode(nc2i);6443TR_PCISCNode *ns22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ishrall, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nc2i, nc2i, c8); tgt->addNode(ns22);6444cvt1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns22, ns22); tgt->addNode(cvt1);6445}6446TR_PCISCNode *ns24= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, cvt1, ns21, cvt1); tgt->addNode(ns24);6447TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ns24, v3, cm2);6448TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v1, cm1);6449TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v4, vorc); tgt->addNode(n8);6450TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);64516452n8->setSuccs(ent->getSucc(0), n9);64536454n8->setIsChildDirectlyConnected();64556456tgt->setEntryNode(ent);6457tgt->setExitNode(n9);6458tgt->setImportantNodes(nl2, ns14);6459tgt->setNumDagIds(16);6460tgt->createInternalData(1);64616462tgt->setSpecialCareNode(0, cvt0); // conversion (possibly i2b)6463tgt->setSpecialCareNode(1, cvt1); // conversion (possibly i2b)6464tgt->setSpecialNodeTransformer(MEMCPYSpecialNodeTransformer);64656466tgt->setTransformer(CISCTransform2ArrayCopyB2CorC2B);6467tgt->setAspects(isub|mul|shr, ILTypeProp::Size_2, ILTypeProp::Size_1);6468tgt->setNoAspects(call|bndchk, 0, 0);6469tgt->setMinCounts(1, 1, 2); // minimum ifCount, indirectLoadCount, indirectStoreCount6470tgt->setHotness(warm, false);6471tgt->setInhibitBeforeVersioning();6472return tgt;6473}647464756476//////////////////////////////////////////////////////////////////////////6477//////////////////////////////////////////////////////////////////////////6478//////////////////////////////////////////////////////////////////////////6479//*****************************************************************************************6480// IL code generation for copying memory (ByteToChar or CharToByte version)6481// Input: ImportantNode(0) - array load6482// ImportantNode(1) - array store6483// ImportantNode(2) - indirect load of the array index for the array load6484//*****************************************************************************************6485bool6486CISCTransform2ArrayCopyB2CBndchk(TR_CISCTransformer *trans)6487{6488TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");6489TR::Node *trNode;6490TR::TreeTop *trTreeTop;6491TR::Block *block;6492TR_CISCGraph *P = trans->getP();6493List<TR_CISCNode> *P2T = trans->getP2T();6494TR::Compilation *comp = trans->comp();64956496TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");6497if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;64986499trans->findFirstNode(&trTreeTop, &trNode, &block);6500if (!block) return false; // cannot find65016502if (isLoopPreheaderLastBlockInMethod(comp, block))6503{6504traceMsg(comp, "Bailing CISCTransform2ArrayCopyB2CBndchk due to null TT - might be a preheader in last block of method\n");6505return false;6506}65076508TR::Block *target = trans->analyzeSuccessorBlock();6509// Currently, it allows only a single successor.6510if (!target) return false;65116512TR::Node *dstIndexRepNode, *exitVarRepNode, *variableORconstRepNode, *arrayLenRepNode;6513getP2TTrRepNodes(trans, &dstIndexRepNode, &exitVarRepNode, &variableORconstRepNode, &arrayLenRepNode);6514TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode->getSymbolReference();6515TR::SymbolReference * exitVarSymRef = exitVarRepNode->getSymbolReference();6516if (!trans->analyzeArrayIndex(dstIndexVarSymRef))6517{6518if (DISPTRACE(trans)) traceMsg(comp, "analyzeArrayIndex failed. %x\n",dstIndexRepNode);6519return false;6520}65216522TR::Node * inputMemNode = trans->getP2TRepInLoop(P->getImportantNode(0))->getHeadOfTrNodeInfo()->_node;6523TR::Node * outputMemNode = trans->getP2TRepInLoop(P->getImportantNode(1))->getHeadOfTrNodeInfo()->_node;6524TR::Node * indexLoadNode = trans->getP2TRepInLoop(P->getImportantNode(2))->getHeadOfTrNodeInfo()->_node;6525TR_ASSERT(inputMemNode && outputMemNode && indexLoadNode, "error");6526TR::Node * inputNode = inputMemNode->getChild(0)->duplicateTree();6527TR::Node * outputNode = outputMemNode->getChild(0)->duplicateTree();65286529TR::Node * exitVarNode = createLoad(exitVarRepNode);6530variableORconstRepNode = convertStoreToLoad(comp, variableORconstRepNode);6531TR::Node * lengthNode = createOP2(comp, TR::isub,6532variableORconstRepNode,6533exitVarNode);6534TR::Node * updateTree1, *updateTree2, *updateTree3;6535TR::Node * c2 = TR::Node::create(exitVarRepNode, TR::iconst, 0, 2);6536bool isExitVarChar = (outputMemNode->getSize() == 2);6537// Prepare nodes for byte length and induction variable updates6538indexLoadNode = indexLoadNode->duplicateTree();6539TR::Node * endIndex;6540if (isExitVarChar) // The variable that checks the exit condition is for a 2-byte array.6541{6542TR::Node * diff = lengthNode;6543lengthNode = TR::Node::create(TR::imul, 2, lengthNode, c2);6544// lengthNode has the byte size, and diff has the char-based size (that is, lengthNode = diff * 2)6545endIndex = createOP2(comp, TR::iadd, indexLoadNode, lengthNode);6546updateTree1 = TR::Node::createWithSymRef(TR::istorei, 2, 2, indexLoadNode->getChild(0), endIndex, indexLoadNode->getSymbolReference());6547updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef, diff, trNode);6548}6549else6550{6551TR::Node * div2 = TR::Node::create(TR::idiv, 2, lengthNode, c2);6552lengthNode = TR::Node::create(TR::imul, 2, div2, c2); // to make the length even6553// lengthNode has the byte size, and div2 has the char-based size (that is, lengthNode = div2 * 2)6554endIndex = createOP2(comp, TR::iadd, indexLoadNode, lengthNode);6555updateTree1 = TR::Node::createWithSymRef(TR::istorei, 2, 2, indexLoadNode->getChild(0), endIndex, indexLoadNode->getSymbolReference());6556updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef, div2, trNode);6557}6558updateTree3 = TR::Node::createStore(exitVarSymRef, variableORconstRepNode);65596560lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode);6561// Prepare the arraycopy node6562TR::Node * arraycopy = TR::Node::createArraycopy(inputNode, outputNode, lengthNode);6563arraycopy->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCopySymbol());6564arraycopy->setForwardArrayCopy(true);6565arraycopy->setArrayCopyElementType(TR::Int8);65666567TR::Node * topArraycopy = TR::Node::create(TR::treetop, 1, arraycopy);6568TR::TreeTop * updateTreeTop1 = TR::TreeTop::create(comp, updateTree1);6569TR::TreeTop * updateTreeTop2 = TR::TreeTop::create(comp, updateTree2);6570TR::TreeTop * updateTreeTop3 = TR::TreeTop::create(comp, updateTree3);65716572// Insert nodes and maintain the CFG6573List<TR::Node> guardList(comp->trMemory());6574guardList.add(TR::Node::createif(TR::ifiucmpgt, endIndex->duplicateTree(), createLoad(arrayLenRepNode)));6575guardList.add(TR::Node::createif(TR::ifiucmpge, indexLoadNode->duplicateTree(), createLoad(arrayLenRepNode)));6576block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree(), &guardList);65776578block = trans->insertBeforeNodes(block);65796580block->append(TR::TreeTop::create(comp, topArraycopy));6581block->append(updateTreeTop1);6582block->append(updateTreeTop2);6583block->append(updateTreeTop3);65846585block = trans->insertAfterNodes(block);65866587trans->setSuccessorEdge(block, target);6588return true;6589}65906591/****************************************************************************************6592Corresponding Java-like Pseudo Program6593int indIndex2, end;6594byte v0[ ];6595char v2[ ];6596while(true){6597v2[v1++] = ((v0[this.indeIndex1++] & 0xFF) << 8) + (v0[this.indIndex1++] & 0xFF))6598v3++;6599if (v3 >= end) break;6600}66016602Note 1: One of target methods is com/ibm/rmi/iiop/CDRInputStream.read_wstring().6603****************************************************************************************/6604TR_PCISCGraph *6605makeMemCpyByteToCharBndchkGraph(TR::Compilation *c, int32_t ctrl)6606{6607TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCpyByteToCharBndchk", 0, 16);6608bool isBigEndian = (ctrl & CISCUtilCtl_BigEndian);6609/******************************************************************* opc id dagId #cfg #child other/pred/children */6610TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 17, 0, 0, 0); tgt->addNode(v3); // dst array index6611TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 16, 0, 0, 1); tgt->addNode(v4); // exit checking6612TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_quasiConst2, TR::NoType, tgt->incNumNodes(),15, 0, 0); tgt->addNode(vorc); // length6613TR_PCISCNode *alen= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_quasiConst2, TR::NoType, tgt->incNumNodes(),14, 0, 0); tgt->addNode(alen); // arraylength6614TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v0); // src array base6615TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 1); tgt->addNode(v2); // dst array base6616TR_PCISCNode *ths = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 2); tgt->addNode(ths); // this object6617TR_PCISCNode *aidx= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 0); tgt->addNode(aidx);6618TR_PCISCNode *cmah= createIdiomArrayHeaderConst (tgt, ctrl, tgt->incNumNodes(), 9, c);// array header6619TR_PCISCNode *cmah1=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 8, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+1));// array header+16620TR_PCISCNode *ah1 = isBigEndian ? cmah : cmah1;6621TR_PCISCNode *ah2 = isBigEndian ? cmah1 : cmah;6622TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 7, 0, 0, -1); tgt->addNode(cm1);6623TR_PCISCNode *cm2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -2); tgt->addNode(cm2);6624TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 5, 2); // element size6625TR_PCISCNode *c256= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 256); tgt->addNode(c256);66266627TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size6628TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);6629TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iloadi, TR::Int32, tgt->incNumNodes(), 1, 1, 1, ent, ths); tgt->addNode(idx0);6630TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, idx0, idx0, cm1); tgt->addNode(idx1);6631TR_PCISCNode *idx2= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istorei, TR::Int32, tgt->incNumNodes(), 1, 1, 2, idx1, ths, idx1); tgt->addNode(idx2);6632TR_PCISCNode *idx3= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, idx2, alen,idx0); tgt->addNode(idx3);6633TR_PCISCNode *idx4= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, idx3, idx0, cm2); tgt->addNode(idx4);6634TR_PCISCNode *idx5= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istorei, TR::Int32, tgt->incNumNodes(), 1, 1, 2, idx4, ths, idx4); tgt->addNode(idx5);6635TR_PCISCNode *idx6= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, idx5, alen,idx1); tgt->addNode(idx6);6636TR_PCISCNode *ns0 = createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, idx6, aidx, cmah, c2);6637TR_PCISCNode *ns1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns0, v2, ns0);6638TR_PCISCNode *nl00;6639TR_PCISCNode *nl10;6640if (ctrl & CISCUtilCtl_64Bit)6641{6642nl00= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2l, TR::Int64, tgt->incNumNodes(), 1, 1, 1, ns1, idx0); tgt->addNode(nl00);6643nl10= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl | CISCUtilCtl_NoI2L, 1, nl00, nl00, ah1, c1);6644}6645else6646{6647nl00= idx0;6648nl10= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl | CISCUtilCtl_NoI2L, 1, ns1, nl00, ah1, c1);6649}6650TR_PCISCNode *nl11= createIdiomArrayAddressInLoop(tgt, ctrl, 1, nl10, v0, nl10);6651TR_PCISCNode *nl12= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bloadi, TR::Int8, tgt->incNumNodes(), 1, 1, 1, nl11, nl11); tgt->addNode(nl12);6652TR_PCISCNode *nl13= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl12, nl12); tgt->addNode(nl13);6653TR_PCISCNode *nl14= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl13, nl13, c256); tgt->addNode(nl14);6654TR_PCISCNode *nl20= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl | CISCUtilCtl_NoI2L, 1, nl14, nl00, ah2, c1);6655TR_PCISCNode *nl21= createIdiomArrayAddressInLoop(tgt, ctrl, 1, nl20, v0, nl20);6656TR_PCISCNode *nl22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bloadi, TR::Int8, tgt->incNumNodes(), 1, 1, 1, nl21, nl21); tgt->addNode(nl22);6657TR_PCISCNode *nl23= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl22, nl22); tgt->addNode(nl23);6658TR_PCISCNode *ns2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iadd, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl23, nl14, nl23); tgt->addNode(ns2);6659TR_PCISCNode *ns3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2s, TR::Int16, tgt->incNumNodes(), 1, 1, 1, ns2, ns2); tgt->addNode(ns3);6660TR_PCISCNode *ns4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::sstorei, TR::Int16, tgt->incNumNodes(), 1, 1, 2, ns3, ns1, ns3); tgt->addNode(ns4);6661TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ns4, v3, cm1);6662TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v4, cm1);6663TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v4, vorc); tgt->addNode(n8);6664TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);66656666n8->setSuccs(ent->getSucc(0), n9);6667n8->setIsChildDirectlyConnected();6668idx3->setIsChildDirectlyConnected();6669idx6->setIsChildDirectlyConnected();66706671tgt->setEntryNode(ent);6672tgt->setExitNode(n9);6673tgt->setImportantNodes(isBigEndian ? nl12 : nl22, ns4, idx0);6674tgt->setNumDagIds(18);6675tgt->createInternalData(1);66766677tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);6678tgt->setTransformer(CISCTransform2ArrayCopyB2CBndchk);6679tgt->setAspects(isub|iadd|mul|bndchk|sameTypeLoadStore, ILTypeProp::Size_1|ILTypeProp::Size_4, ILTypeProp::Size_2|ILTypeProp::Size_4);6680tgt->setNoAspects(call, 0, 0);6681tgt->setMinCounts(1, 3, 3); // minimum ifCount, indirectLoadCount, indirectStoreCount6682tgt->setHotness(warm, false);6683tgt->setInhibitBeforeVersioning();6684return tgt;6685}6686668766886689//////////////////////////////////////////////////////////////////////////6690//////////////////////////////////////////////////////////////////////////6691//////////////////////////////////////////////////////////////////////////6692//*****************************************************************************************6693// IL code generation for copying memory (ByteToChar or CharToByte version)6694// Input: ImportantNode(0) - array load in the little endian path6695// ImportantNode(1) - array store in the little endian path6696// ImportantNode(2) - array load in the big endian path6697// ImportantNode(3) - array store in the big endian path6698// ImportantNode(4) - if statement of the flag checking6699// ImportantNode(5) - if statement of back edge6700//*****************************************************************************************6701bool6702CISCTransform2ArrayCopyC2BMixed(TR_CISCTransformer *trans)6703{6704TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");6705TR::Node *trNode;6706TR::TreeTop *trTreeTop;6707TR::Block *block;6708TR_CISCGraph *P = trans->getP();6709List<TR_CISCNode> *P2T = trans->getP2T();6710TR::Compilation *comp = trans->comp();67116712TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");6713if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;67146715trans->findFirstNode(&trTreeTop, &trNode, &block);6716if (!block) return false; // cannot find67176718if (isLoopPreheaderLastBlockInMethod(comp, block))6719{6720traceMsg(comp, "Bailing CISCTransform2ArrayCopyC2BMixed due to null TT - might be a preheader in last block of method\n");6721return false;6722}67236724TR::Block *target = trans->analyzeSuccessorBlock();6725// Currently, it allows only a single successor.6726if (!target) return false;67276728TR::Node *indexRepNode, *dstIndexRepNode, *arrayLenRepNode;6729getP2TTrRepNodes(trans, &indexRepNode, &dstIndexRepNode, &arrayLenRepNode);6730TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();6731TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode->getSymbolReference();6732if (trans->countGoodArrayIndex(indexVarSymRef) == 0)6733{6734if (DISPTRACE(trans)) traceMsg(comp, "analyzeArrayIndex failed. %x\n",indexRepNode);6735return false;6736}6737TR_ASSERT(indexVarSymRef != dstIndexVarSymRef, "error");6738if (trans->countGoodArrayIndex(dstIndexVarSymRef) == 0)6739{6740if (DISPTRACE(trans)) traceMsg(comp, "analyzeArrayIndex failed. %x\n",dstIndexRepNode);6741return false;6742}67436744TR_CISCNode * BEloadMem = trans->getP2TInLoopIfSingle(P->getImportantNode(2));6745TR_CISCNode * BEstoreMem = trans->getP2TInLoopIfSingle(P->getImportantNode(3));6746TR_CISCNode * LEloadMem = trans->getP2TRepInLoop(P->getImportantNode(0), BEloadMem);6747TR_CISCNode * LEstoreMem = trans->getP2TInLoopIfSingle(P->getImportantNode(1));6748TR_CISCNode * flagIf = trans->getP2TInLoopIfSingle(P->getImportantNode(4));6749TR_CISCNode * backIf = trans->getP2TInLoopIfSingle(P->getImportantNode(5));67506751if (DISPTRACE(trans)) traceMsg(comp, "All parameters: %x %x %x %x %x %x\n",6752LEloadMem, LEstoreMem, BEloadMem, BEstoreMem, flagIf, backIf);6753if (!LEloadMem || !LEstoreMem || !BEloadMem || !BEstoreMem || !flagIf || !backIf) return false;6754if (flagIf->getOpcode() != TR::ificmpeq && flagIf->getOpcode() != TR::ificmpne) return false;67556756TR_ASSERT(searchNodeInBlock(flagIf->getSucc(1), LEloadMem) ||6757searchNodeInBlock(flagIf->getSucc(1), BEloadMem), "error");6758TR_ASSERT(!searchNodeInBlock(flagIf->getSucc(1), LEloadMem) ||6759!searchNodeInBlock(flagIf->getSucc(1), BEloadMem), "error");6760bool LEalongJumpPath = searchNodeInBlock(flagIf->getSucc(1), LEloadMem);6761bool isBig = comp->target().cpu.isBigEndian();6762if (!isBig) LEalongJumpPath = !LEalongJumpPath;6763if (DISPTRACE(trans)) traceMsg(comp, "LEalongJumpPath = %d\n",LEalongJumpPath);67646765TR::Block *blockBE = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency()/2, block);6766TR::Block *blockLE = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency()/2, block);6767TR::Block *blockAfter = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);67686769TR::Node * LEloadMemNode = LEloadMem->getHeadOfTrNode();6770TR::Node * LEstoreMemNode = LEstoreMem->getHeadOfTrNode();6771TR::Node * BEloadMemNode = BEloadMem->getHeadOfTrNode();6772TR::Node * BEstoreMemNode = BEstoreMem->getHeadOfTrNode();6773TR::Node * flagIfNode = flagIf->getHeadOfTrNode()->duplicateTree();6774TR::Node * backIfNode = backIf->getHeadOfTrNode();67756776TR::Node * variableORconstRepNode = backIfNode->getChild(1)->duplicateTree();6777indexRepNode = createLoad(indexRepNode);6778TR::Node * lengthNode = createOP2(comp, TR::isub, variableORconstRepNode, indexRepNode);6779TR::Node * c2 = TR::Node::create(indexRepNode, TR::iconst, 0, 2);6780TR::Node * diff = lengthNode;6781lengthNode = TR::Node::create(TR::imul, 2, lengthNode, c2);6782// lengthNode has the byte size, and diff has the char-based size (that is, lengthNode = diff * 2)6783TR::Node * indexLoadNode = backIfNode->getChild(0)->duplicateTree();67846785//6786// Big Endian Path6787//6788TR::Node * BELoadAddrTree = BEloadMemNode->getChild(0)->duplicateTree();6789TR::Node * BEStoreAddrTree = BEstoreMemNode->getChild(0)->duplicateTree();6790TR::Node * BEMemCpy = TR::Node::createArraycopy(BELoadAddrTree, BEStoreAddrTree, createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode));6791BEMemCpy->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCopySymbol());6792BEMemCpy->setForwardArrayCopy(true);6793BEMemCpy->setArrayCopyElementType(TR::Int8);6794blockBE->append(TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, BEMemCpy)));6795TR::Node * updateTree1 = TR::Node::createStore(indexVarSymRef, variableORconstRepNode->duplicateTree());6796TR::Node * updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef, lengthNode, trNode);6797blockBE->append(TR::TreeTop::create(comp, updateTree2));6798blockBE->append(TR::TreeTop::create(comp, updateTree1));6799blockBE->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, blockAfter->getEntry())));68006801//6802// Little Endian Path6803//6804TR::Node * LELoadTree = LEloadMemNode->duplicateTree();6805TR::Node * LEStoreAddrTree = LEstoreMemNode->getChild(0)->duplicateTree();6806if (comp->cg()->supportsByteswap())6807{6808TR::Node * LEReverseStore = TR::Node::createWithSymRef(TR::sstorei, 2, 2,6809LEStoreAddrTree,6810TR::Node::create(TR::sbyteswap, 1, LELoadTree),6811comp->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0));6812blockLE->append(TR::TreeTop::create(comp, LEReverseStore));6813}6814else6815{6816TR::Node *replaceParent = NULL;6817int childNum = -1;6818bool ret;6819TR::Node * LEStoreAddrTree2 = LEStoreAddrTree->duplicateTree();6820TR::Node *arrayHeaderConst = createArrayHeaderConst(comp, comp->target().is64Bit(), trNode);6821ret = trans->searchNodeInTrees(isBig ? LEStoreAddrTree2 : LEStoreAddrTree,6822arrayHeaderConst, &replaceParent, &childNum);6823TR_ASSERT(ret, "error");6824if (comp->target().is64Bit())6825{6826arrayHeaderConst->setLongInt(arrayHeaderConst->getLongInt()-1);6827}6828else6829{6830arrayHeaderConst->setInt(arrayHeaderConst->getInt()-1);6831}6832replaceParent->setAndIncChild(childNum, arrayHeaderConst);68336834TR::Node * LEc2b0 = TR::Node::create(TR::s2b, 1, LELoadTree);6835TR::Node * LEstore0 = TR::Node::createWithSymRef(TR::bstorei, 2, 2, LEStoreAddrTree, LEc2b0,6836comp->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0));6837blockLE->append(TR::TreeTop::create(comp, LEstore0));68386839TR::Node * LEand1 = createOP2(comp, TR::iushr, LELoadTree, TR::Node::create(indexRepNode, TR::iconst, 0, 0x8));6840TR::Node * LEi2b1 = TR::Node::create(TR::i2b, 1, LEand1);6841TR::Node * LEstore1 = TR::Node::createWithSymRef(TR::bstorei, 2, 2, LEStoreAddrTree2, LEi2b1,6842comp->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0));6843blockLE->append(TR::TreeTop::create(comp, LEstore1));6844}6845TR::Node * c1 = TR::Node::create(indexRepNode, TR::iconst, 0, 1);6846TR::Node * indexUpdateLE = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, c1, trNode);6847blockLE->append(TR::TreeTop::create(comp, indexUpdateLE));6848blockLE->append(TR::TreeTop::create(comp, createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef, c2->duplicateTree(), trNode)));6849TR::Node *backIfLE = TR::Node::createif(TR::ificmplt, indexUpdateLE->getChild(0), variableORconstRepNode->duplicateTree(),6850blockLE->getEntry());6851blockLE->append(TR::TreeTop::create(comp, backIfLE));68526853// after these two paths6854//6855// Currently, blockAfter has no nodes.6856//68576858//6859// Insert nodes and maintain the CFG6860List<TR::Node> guardList(comp->trMemory());6861guardList.add(TR::Node::createif(TR::ifiucmpgt, updateTree2->getChild(0)->duplicateTree(), createLoad(arrayLenRepNode)));6862guardList.add(TR::Node::createif(TR::ifiucmpge, createLoad(dstIndexRepNode), createLoad(arrayLenRepNode)));6863block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree(), &guardList);6864block = trans->insertBeforeNodes(block);6865flagIfNode->setBranchDestination(blockLE->getEntry());6866if (!LEalongJumpPath) TR::Node::recreate(flagIfNode, flagIfNode->getOpCode().getOpCodeForReverseBranch());6867block->append(TR::TreeTop::create(comp, flagIfNode));68686869TR::CFG *cfg = comp->getFlowGraph();6870cfg->setStructure(NULL);6871TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();6872if (orgNextTreeTop)6873{6874cfg->insertBefore(blockAfter, orgNextTreeTop->getNode()->getBlock());6875}6876else6877{6878cfg->addNode(blockAfter);6879}6880cfg->insertBefore(blockLE, blockAfter);6881cfg->insertBefore(blockBE, blockLE);6882cfg->join(block, blockBE);68836884blockAfter = trans->insertAfterNodes(blockAfter);68856886trans->setSuccessorEdges(block, blockBE, blockLE);6887trans->setSuccessorEdge(blockAfter, target);68886889return true;6890}68916892/****************************************************************************************6893Corresponding Java-like Pseudo Program (for big endian)6894char v0[ ];6895byte v2[ ];6896while (true)6897{6898if(flag)6899{6900v2[i++] = (byte)(v0[j] & 0xff);6901v2[i++] = (byte)(v0[j] >>> 8 & 0xff);6902}6903else6904{6905v2[i++] = (byte)(v0[j] >>> 8 & 0xff);6906v2[i++] = (byte)(v0[j] & 0xff);6907}6908j++;6909if (j >= len) break;6910}69116912Note 1: One of target methods is com/ibm/rmi/iiop/CDROutputStream.read_wstring().6913****************************************************************************************/6914TR_PCISCGraph *6915makeMEMCPYChar2ByteMixedGraph(TR::Compilation *c, int32_t ctrl)6916{6917TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MEMCPYChar2ByteMixed", 0, 16);6918/******************************************************************** opc id dagId #cfg #child other/pred/children */6919TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 18, 0, 0, 0); tgt->addNode(v1); // src array index6920TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 17, 0, 0, 1); tgt->addNode(v3); // dst array index6921TR_PCISCNode *alen = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_quasiConst2, TR::NoType, tgt->incNumNodes(),16, 0, 0); tgt->addNode(alen); // arraylength6922TR_PCISCNode *vorc = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_quasiConst2, TR::NoType, tgt->incNumNodes(),15, 0, 0); tgt->addNode(vorc); // length6923TR_PCISCNode *flag = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 2); tgt->addNode(flag); // flag6924TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v0); // src array base6925TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 1); tgt->addNode(v2); // dst array base6926TR_PCISCNode *aidx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),11, 0, 0, 0); tgt->addNode(aidx0);6927TR_PCISCNode *cmah = createIdiomArrayHeaderConst (tgt, ctrl, tgt->incNumNodes(), 10, c);// array header6928TR_PCISCNode *cmah1=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 9, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+1));// array header+16929TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 8, 0, 0, -1); tgt->addNode(cm1);6930TR_PCISCNode *cm2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 7, 0, 0, -2); tgt->addNode(cm2);6931TR_PCISCNode *c0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, 0); tgt->addNode(c0);6932TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 5, 2); // element size6933TR_PCISCNode *c8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 8); tgt->addNode(c8);6934TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size6935TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);6936TR_PCISCNode *fchk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, ent, flag, c0); tgt->addNode(fchk);69376938// big endian path6939TR_PCISCNode *bbck0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, fchk, alen, v3); tgt->addNode(bbck0);6940TR_PCISCNode *bld0 = createIdiomCharArrayLoadInLoop(tgt, ctrl | CISCUtilCtl_ChildDirectConnected, 1, bbck0, v0, aidx0, cmah, c2);6941TR_PCISCNode *bnc2i= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, bld0, bld0); tgt->addNode(bnc2i);6942TR_PCISCNode *bns22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iushr, TR::Int32, tgt->incNumNodes(), 1, 1, 2, bnc2i, bnc2i, c8); tgt->addNode(bns22);6943//TR_PCISCNode *bcvt0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, bns22, bns22); tgt->addNode(bcvt0);6944TR_PCISCNode *bns0 = createIdiomArrayStoreInLoop(tgt, ctrl|CISCUtilCtl_ChildDirectConnected, 1, bns22, TR::bstorei, TR::Int8, v2, v3, cmah, c1, bns22);6945TR_PCISCNode *ba1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, bns0, v3, cm1); tgt->addNode(ba1);6946TR_PCISCNode *bbck1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, ba1, alen, ba1); tgt->addNode(bbck1);6947TR_PCISCNode *bns10= createIdiomArrayAddressInLoop(tgt, ctrl | CISCUtilCtl_ChildDirectConnected, 1, bbck1, v2, v3, cmah1, c1);6948TR_PCISCNode *bcvt1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::s2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, bns10, bld0); tgt->addNode(bcvt1);6949TR_PCISCNode *bns11= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, bcvt1, bns10, bcvt1); tgt->addNode(bns11);6950TR_PCISCNode *bn6 = createIdiomDecVarInLoop(tgt, ctrl, 1, bns11, v3, cm2);69516952// little endian path6953TR_PCISCNode *lbck0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, fchk, alen, v3); tgt->addNode(lbck0);6954TR_PCISCNode *lld0 = createIdiomCharArrayLoadInLoop(tgt, ctrl | CISCUtilCtl_ChildDirectConnected, 1, lbck0, v0, aidx0, cmah, c2);6955TR_PCISCNode *lns10= createIdiomArrayAddressInLoop(tgt, ctrl | CISCUtilCtl_ChildDirectConnected, 1, lld0, v2, v3, cmah, c1);6956TR_PCISCNode *lcvt1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::s2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, lns10, lld0); tgt->addNode(lcvt1);6957TR_PCISCNode *lns11= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, lcvt1, lns10, lcvt1); tgt->addNode(lns11);6958TR_PCISCNode *la1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, lns11, v3, cm1); tgt->addNode(la1);6959TR_PCISCNode *lbck1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, la1, alen, la1); tgt->addNode(lbck1);6960TR_PCISCNode *lnc2i= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, lbck1, lld0); tgt->addNode(lnc2i);6961TR_PCISCNode *lns22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iushr, TR::Int32, tgt->incNumNodes(), 1, 1, 2, lnc2i, lnc2i, c8); tgt->addNode(lns22);6962//TR_PCISCNode *lcvt0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, lns22, lns22); tgt->addNode(lcvt0);6963TR_PCISCNode *lns0 = createIdiomArrayStoreInLoop(tgt, ctrl|CISCUtilCtl_ChildDirectConnected, 1, lns22, TR::bstorei, TR::Int8, v2, v3, cmah1, c1, lns22);6964TR_PCISCNode *ln6 = createIdiomDecVarInLoop(tgt, ctrl, 1, lns0, v3, cm2);69656966// merge two paths6967TR_PCISCNode *addv1= createIdiomDecVarInLoop(tgt, ctrl, 1, ln6, v1, cm1);6968TR_PCISCNode *topAddV1 = addv1->getChild(0);6969TR_PCISCNode *back = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, addv1, v1, vorc); tgt->addNode(back);6970TR_PCISCNode *ext = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(ext);69716972fchk->setSuccs(lbck0, bbck0);6973bn6->setSucc(0, topAddV1);6974back->setSuccs(ent->getSucc(0), ext);69756976bbck0->setIsChildDirectlyConnected();6977bbck1->setIsChildDirectlyConnected();6978bnc2i->setIsChildDirectlyConnected();6979bns22->setIsChildDirectlyConnected();6980//bcvt0->setIsChildDirectlyConnected();6981bcvt1->setIsChildDirectlyConnected();6982bns10->setIsChildDirectlyConnected();6983bns11->setIsChildDirectlyConnected();69846985lbck0->setIsChildDirectlyConnected();6986lbck1->setIsChildDirectlyConnected();6987lnc2i->setIsChildDirectlyConnected();6988lns22->setIsChildDirectlyConnected();6989//lcvt0->setIsChildDirectlyConnected();6990lcvt1->setIsChildDirectlyConnected();6991lns10->setIsChildDirectlyConnected();6992lns11->setIsChildDirectlyConnected();69936994fchk->setIsChildDirectlyConnected();6995back->setIsChildDirectlyConnected();69966997bld0->setIsSuccDirectlyConnected();69986999tgt->setEntryNode(ent);7000tgt->setExitNode(ext);7001tgt->setImportantNodes(lld0, lns11, bld0, bns0, fchk, back);7002tgt->setNumDagIds(18);7003tgt->createInternalData(1);70047005tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);70067007tgt->setTransformer(CISCTransform2ArrayCopyC2BMixed);7008tgt->setAspects(isub|mul|shr|bndchk, ILTypeProp::Size_2, ILTypeProp::Size_1);7009tgt->setNoAspects(call, 0, 0);7010tgt->setMinCounts(2, 2, 4); // minimum ifCount, indirectLoadCount, indirectStoreCount7011tgt->setHotness(warm, false);7012tgt->setInhibitBeforeVersioning();7013return tgt;7014}70157016701770187019//////////////////////////////////////////////////////////////////////////7020//////////////////////////////////////////////////////////////////////////7021//////////////////////////////////////////////////////////////////////////7022//*****************************************************************************************7023// IL code generation for copying memory for CharToByte with two if-statements version7024// Input: ImportantNodes(0) - array load7025// ImportantNodes(1) - array store7026// ImportantNodes(2) - the first if7027// ImportantNodes(3) - the second if7028//*****************************************************************************************7029bool7030CISCTransform2ArrayCopyC2BIf2(TR_CISCTransformer *trans)7031{7032TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");7033TR::Node *trNode;7034TR::TreeTop *trTreeTop;7035TR::Block *block;7036TR_CISCGraph *P = trans->getP();7037List<TR_CISCNode> *P2T = trans->getP2T();7038TR::Compilation *comp = trans->comp();70397040TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");70417042TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");7043if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;70447045trans->findFirstNode(&trTreeTop, &trNode, &block);7046if (!block) return false; // cannot find70477048if (isLoopPreheaderLastBlockInMethod(comp, block))7049{7050traceMsg(comp, "Bailing CISCTransform2ArrayCopyC2BIf2 due to null TT - might be a preheader in last block of method\n");7051return false;7052}70537054TR::Block *target = trans->analyzeSuccessorBlock();70557056TR::Node *indexRepNode, *dstIndexRepNode, *variableORconstRepNode, *variableORconstRepNode2;7057getP2TTrRepNodes(trans, &indexRepNode, &dstIndexRepNode, &variableORconstRepNode, &variableORconstRepNode2);7058TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();7059TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode->getSymbolReference();7060TR_ASSERT(indexVarSymRef != dstIndexVarSymRef, "error!");70617062TR::Node * inputNode = trans->getP2TRepInLoop(P->getImportantNode(0)->getChild(0))->getHeadOfTrNodeInfo()->_node->duplicateTree();7063TR::Node * outputNode = trans->getP2TRepInLoop(P->getImportantNode(1)->getChild(0))->getHeadOfTrNodeInfo()->_node->duplicateTree();70647065//**********************************************************************7066// For this idiom, because there are two if-statements, we need to check7067// which if-statement will trigger the loop exit.7068// Based on this, it will compute the length of copy, which will be7069// stored into the variable "lengthByteTemp".7070//**********************************************************************7071//7072TR::CFG *cfg = comp->getFlowGraph();7073TR::Node * c2 = TR::Node::create(indexRepNode, TR::iconst, 0, 2);7074indexRepNode = convertStoreToLoad(comp, indexRepNode)->duplicateTree();7075dstIndexRepNode = convertStoreToLoad(comp, dstIndexRepNode)->duplicateTree();7076variableORconstRepNode = convertStoreToLoad(comp, variableORconstRepNode)->duplicateTree();7077variableORconstRepNode2 = convertStoreToLoad(comp, variableORconstRepNode2)->duplicateTree();70787079// Compute length7080TR::Block *chkLen1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);7081TR::Block *chkLen2 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);7082TR::Block *bodyBlock = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);7083TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();70847085TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();7086// chkLen17087TR::SymbolReference * lengthCharTemp = comp->getSymRefTab()->7088createTemporary(comp->getMethodSymbol(), TR::Int32);7089TR::SymbolReference * lengthByteTemp = comp->getSymRefTab()->7090createTemporary(comp->getMethodSymbol(), TR::Int32);7091TR::SymbolReference * selectLen2 = comp->getSymRefTab()->7092createTemporary(comp->getMethodSymbol(), TR::Int32);70937094// use the formula to compute the number of iterations7095// the number of times the loop is executed7096// n1 => C1 = ceiling[(N1 - i)/incr(i)] = (N1 - i) // entry valueof i ; increment is 1 & lt condition7097// C2 = floor[(N2 - j)/incr(j)] = floor[(N2 - j)/2] // entry valueof j ; increment is 2 & le condition7098// n2 => C2 + 1 // which necessitates adding an extra iteration7099//7100// so the lesser(C1, C2) will decide which test exits the loop.7101//7102//7103TR::Node * lengthSrcNode = createOP2(comp, TR::isub,7104variableORconstRepNode,7105indexRepNode);7106TR::Node * storeSrcCharLen = TR::Node::createStore(lengthCharTemp, lengthSrcNode);7107TR::Node * storeSrcByteLen = TR::Node::createStore(lengthByteTemp,7108TR::Node::create(TR::imul, 2, lengthSrcNode, c2));7109TR::Node *zeroConst = TR::Node::create(indexRepNode, TR::iconst, 0, 0);7110TR::Node * storeSelectLen = TR::Node::createStore(selectLen2,7111zeroConst);7112TR::Node * lengthDstNode = createOP2(comp, TR::isub,7113variableORconstRepNode2,7114dstIndexRepNode);71157116TR::Node * c1 = TR::Node::create(indexRepNode, TR::iconst, 0, 1);71177118TR::Node *incr = c1->duplicateTree();7119lengthDstNode = TR::Node::create(TR::ishr, 2, lengthDstNode, incr);7120TR::Node * lengthDstDiv2Node = TR::Node::create(TR::isub, 2, lengthDstNode, TR::Node::create(indexRepNode, TR::iconst, 0, -1));71217122TR::Node *cmpMin = TR::Node::createif(TR::ificmpge, lengthDstDiv2Node, lengthSrcNode, bodyBlock->getEntry());7123chkLen1->append(TR::TreeTop::create(comp, storeSrcCharLen));7124chkLen1->append(TR::TreeTop::create(comp, storeSrcByteLen));7125chkLen1->append(TR::TreeTop::create(comp, storeSelectLen));7126chkLen1->append(TR::TreeTop::create(comp, cmpMin));71277128// chkLen27129c1 = c1->duplicateTree();7130lengthDstDiv2Node = lengthDstDiv2Node->duplicateTree();7131TR::Node * storeSrcCharLen2 = TR::Node::createStore(lengthCharTemp, lengthDstDiv2Node);7132TR::Node * storeSrcByteLen2 = TR::Node::createStore(lengthByteTemp,7133TR::Node::create(TR::ishl, 2, lengthDstDiv2Node, c1->duplicateTree()));7134TR::Node * storeSelectLen2 = TR::Node::createStore(selectLen2, c1);7135chkLen2->append(TR::TreeTop::create(comp, storeSrcCharLen2));7136chkLen2->append(TR::TreeTop::create(comp, storeSrcByteLen2));7137chkLen2->append(TR::TreeTop::create(comp, storeSelectLen2));71387139// body7140c2 = c2->duplicateTree();7141TR::Node * updateTree1, *updateTree2;7142updateTree1 = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthCharTemp, trNode);7143updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef, lengthByteTemp, trNode);71447145// Prepare the node arraycopy7146TR::Node *lenNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, lengthByteTemp));7147TR::Node * arraycopy = TR::Node::createArraycopy(inputNode, outputNode, lenNode);7148arraycopy->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCopySymbol());7149arraycopy->setForwardArrayCopy(true);7150arraycopy->setArrayCopyElementType(TR::Int8);71517152TR::Node * topArraycopy = TR::Node::create(TR::treetop, 1, arraycopy);7153TR::TreeTop * updateTreeTop1 = TR::TreeTop::create(comp, updateTree1);7154TR::TreeTop * updateTreeTop2 = TR::TreeTop::create(comp, updateTree2);7155TR::Node * cmpExit = NULL;7156TR::TreeTop *failDest = NULL;7157TR::TreeTop *okDest = NULL;7158if (!target) // multiple successor blocks7159{7160TR_CISCNode *cmpgeCISCNode = trans->getP2TRepInLoop(P->getImportantNode(2));7161TR_CISCNode *cmpgtCISCNode = trans->getP2TRepInLoop(P->getImportantNode(3));7162failDest = cmpgtCISCNode->getDestination();7163okDest = cmpgeCISCNode->getDestination();71647165cmpExit = TR::Node::createif(TR::ificmpeq,7166TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, selectLen2),7167TR::Node::create(indexRepNode, TR::iconst, 0, 0),7168okDest);7169}71707171//7172// Insert nodes and maintain the CFG7173//7174TR::TreeTop *last;7175last = trans->removeAllNodes(trTreeTop, block->getExit());7176last->join(block->getExit());7177block = trans->insertBeforeNodes(block);71787179cfg->setStructure(NULL);71807181trTreeTop->setNode(topArraycopy);7182bodyBlock->append(trTreeTop);7183bodyBlock->append(updateTreeTop1);7184bodyBlock->append(updateTreeTop2);7185trans->insertAfterNodes(bodyBlock);7186cfg->insertBefore(bodyBlock, orgNextBlock);7187cfg->insertBefore(chkLen2, bodyBlock);7188cfg->insertBefore(chkLen1, chkLen2);7189cfg->join(block, chkLen1);7190if (target) // single successor block7191{7192trans->setSuccessorEdge(bodyBlock, target);7193}7194else7195{ // multiple successor blocks7196bodyBlock->append(TR::TreeTop::create(comp, cmpExit));7197trans->setSuccessorEdges(bodyBlock,7198failDest->getEnclosingBlock(),7199okDest->getEnclosingBlock());7200}7201trans->setSuccessorEdge(block, chkLen1);7202return true;7203}720472057206/****************************************************************************************7207Corresponding Java-like Pseudo Program (for big endian)7208int v1, v3, end, end2;7209char v0[ ];7210byte v2[ ];7211while(true){7212if (v1 >= end) break;7213if (v3 > end2) break;7214char T = v0[v1++];7215v2[v3++] = (byte)(T >> 8);7216v2[v3++] = (byte)(T & 0xff);7217}72187219Note 1: This idiom also supports little endian.7220****************************************************************************************/7221TR_PCISCGraph *7222makeMEMCPYChar2ByteGraph2(TR::Compilation *c, int32_t ctrl)7223{7224TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MEMCPYChar2Byte2", 0, 16);7225/************************************ opc id dagId #cfg #child other/pred/children */7226TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(v1); // src array index7227TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1); tgt->addNode(v3); // dst array index7228TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),13, 0, 0); tgt->addNode(vorc); // length7229TR_PCISCNode *vorc2=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),12, 0, 0); tgt->addNode(vorc2); // length27230TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(v0); // src array base7231TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v2); // dst array base7232TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(cmah); // array header7233TR_PCISCNode *cmah1=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 8, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+1));// array header+17234TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 7, 0, 0, -1); tgt->addNode(cm1);7235TR_PCISCNode *cm2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -2); tgt->addNode(cm2);7236TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 5, 2); // element size7237TR_PCISCNode *c8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 8); tgt->addNode(c8);7238TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size7239TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);7240TR_PCISCNode *lv1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iload, TR::Int32, tgt->incNumNodes(), 1, 1, 1, ent, v1); tgt->addNode(lv1);7241TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, lv1, lv1, cm1);7242TR_PCISCNode *ns10= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, n7, v3, cmah, c1);7243TR_PCISCNode *ns11= createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns10, v2, ns10);7244TR_PCISCNode *nl0 = createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, ns11, lv1, cmah, c2);7245TR_PCISCNode *nl1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, nl0, v0, nl0);7246TR_PCISCNode *nl2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::sloadi, TR::Int16, tgt->incNumNodes(), 1, 1, 1, nl1, nl1); tgt->addNode(nl2);7247TR_PCISCNode *cvt0, *cvt1;7248if ((ctrl & CISCUtilCtl_BigEndian))7249{7250TR_PCISCNode *nc2i= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl2, nl2); tgt->addNode(nc2i);7251TR_PCISCNode *ns22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ishrall, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nc2i, nc2i, c8); tgt->addNode(ns22);7252cvt0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns22, ns22); tgt->addNode(cvt0);7253}7254else7255{7256cvt0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::s2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, nl2, nl2); tgt->addNode(cvt0);7257}7258TR_PCISCNode *ns14= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, cvt0, ns11, cvt0); tgt->addNode(ns14);7259TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ns14, v3, cm2);7260TR_PCISCNode *ns20= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl|CISCUtilCtl_NoI2L, 1, n6, ns10->getChild(0)->getChild(0), cmah1, c1);7261TR_PCISCNode *ns21= createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns20, v2, ns20);7262if ((ctrl & CISCUtilCtl_BigEndian))7263{7264cvt1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::s2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns21, nl2); tgt->addNode(cvt1);7265}7266else7267{7268TR_PCISCNode *nc2i= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, ns21, nl2); tgt->addNode(nc2i);7269TR_PCISCNode *ns22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ishrall, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nc2i, nc2i, c8); tgt->addNode(ns22);7270cvt1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns22, ns22); tgt->addNode(cvt1);7271}7272TR_PCISCNode *ns24= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, cvt1, ns21, cvt1); tgt->addNode(ns24);7273TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, ns24, v1, vorc); tgt->addNode(n8);7274TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpgt, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n8, v3, vorc2); tgt->addNode(n9);7275TR_PCISCNode *n10 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n10);72767277n8->setSucc(1, n10);7278n9->setSuccs(ent->getSucc(0), n10);72797280n8->setIsChildDirectlyConnected();7281n9->setIsChildDirectlyConnected();72827283tgt->setEntryNode(ent);7284tgt->setExitNode(n10);7285tgt->setImportantNodes(nl2, ns14, n8, n9);7286tgt->setNumDagIds(16);7287tgt->createInternalData(1);72887289tgt->setSpecialCareNode(0, cvt0); // conversion (possibly i2b)7290tgt->setSpecialCareNode(1, cvt1); // conversion (possibly i2b)7291tgt->setSpecialNodeTransformer(MEMCPYSpecialNodeTransformer);72927293tgt->setTransformer(CISCTransform2ArrayCopyC2BIf2);7294tgt->setAspects(isub|mul|shr, ILTypeProp::Size_2, ILTypeProp::Size_1);7295tgt->setNoAspects(call|bndchk, 0, 0);7296tgt->setMinCounts(1, 1, 2); // minimum ifCount, indirectLoadCount, indirectStoreCount7297tgt->setHotness(warm, false);7298tgt->setInhibitBeforeVersioning();7299return tgt;7300}73017302730373047305//////////////////////////////////////////////////////////////////////////7306//////////////////////////////////////////////////////////////////////////7307//////////////////////////////////////////////////////////////////////////7308//*****************************************************************************************7309// IL code generation for copying memory (ByteToInt or IntToByte version)7310// Input: ImportantNodes(0) - array load7311// ImportantNodes(1) - array store7312//*****************************************************************************************7313bool7314CISCTransform2ArrayCopyB2I(TR_CISCTransformer *trans)7315{7316TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");7317TR::Node *trNode;7318TR::TreeTop *trTreeTop;7319TR::Block *block;7320TR_CISCGraph *P = trans->getP();7321List<TR_CISCNode> *P2T = trans->getP2T();7322TR::Compilation *comp = trans->comp();73237324TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");73257326TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");7327if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;73287329trans->findFirstNode(&trTreeTop, &trNode, &block);7330if (!block) return false; // cannot find73317332if (isLoopPreheaderLastBlockInMethod(comp, block))7333{7334traceMsg(comp, "Bailing CISCTransform2ArrayCopyB2I due to null TT - might be a preheader in last block of method\n");7335return false;7336}73377338TR::Block *target = trans->analyzeSuccessorBlock();7339// Currently, it allows only a single successor.7340if (!target) return false;73417342TR::Node *indexRepNode, *variableORconstRepNode;7343getP2TTrRepNodes(trans, &indexRepNode, &variableORconstRepNode);7344TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();73457346TR::Node * inputMemNode = trans->getP2TRepInLoop(P->getImportantNode(0))->getHeadOfTrNodeInfo()->_node->duplicateTree();7347TR::Node * outputMemNode = trans->getP2TRepInLoop(P->getImportantNode(1))->getHeadOfTrNodeInfo()->_node->duplicateTree();7348TR::Node * inputNode = trans->getP2TRepInLoop(P->getImportantNode(0)->getChild(0))->getHeadOfTrNodeInfo()->_node->duplicateTree();7349TR::Node * outputNode = trans->getP2TRepInLoop(P->getImportantNode(1)->getChild(0))->getHeadOfTrNodeInfo()->_node->duplicateTree();73507351TR::Node * exitVarNode = createLoad(indexRepNode);7352variableORconstRepNode = convertStoreToLoad(comp, variableORconstRepNode);7353TR::Node * lengthNode = createOP2(comp, TR::isub,7354variableORconstRepNode,7355exitVarNode);7356TR::Node * updateTree1;7357TR::Node * c4 = TR::Node::create(indexRepNode, TR::iconst, 0, 4);7358TR::Node * diff = lengthNode;7359lengthNode = TR::Node::create(TR::imul, 2, lengthNode, c4);7360// lengthNode has the byte size, and diff has the int-based size (that is, lengthNode = diff * 4)7361updateTree1 = TR::Node::createStore(indexVarSymRef, variableORconstRepNode);73627363lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode);73647365// Prepare the arraycopy node7366TR::Node * arraycopy = TR::Node::createArraycopy(inputNode, outputNode, lengthNode);7367arraycopy->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCopySymbol());7368arraycopy->setForwardArrayCopy(true);7369arraycopy->setArrayCopyElementType(TR::Int8);73707371TR::Node * topArraycopy = TR::Node::create(TR::treetop, 1, arraycopy);7372TR::TreeTop * updateTreeTop1 = TR::TreeTop::create(comp, updateTree1);73737374// Insert nodes and maintain the CFG7375TR::TreeTop *last;7376last = trans->removeAllNodes(trTreeTop, block->getExit());7377last->join(block->getExit());7378block = trans->insertBeforeNodes(block);7379last = block->getLastRealTreeTop();7380last->join(trTreeTop);7381trTreeTop->setNode(topArraycopy);7382trTreeTop->join(updateTreeTop1);7383updateTreeTop1->join(block->getExit());73847385trans->insertAfterNodes(block);73867387trans->setSuccessorEdge(block, target);7388return true;7389}739073917392/****************************************************************************************7393Corresponding Java-like Pseudo Program7394int v1, end;7395byte v0[ ];7396int v2[ ];7397while(true){7398v2[v1] = ((v0[v1*4] & 0xFF) << 24) | (v0[v1*4+1] & 0xFF) << 16) |7399(v0[v1*4+2] & 0xFF) << 8) | (v0[v1*4+3] & 0xFF));7400v1++;7401if (v1 >= end) break;7402}7403****************************************************************************************/7404TR_PCISCGraph *7405makeMEMCPYByte2IntGraph(TR::Compilation *c, int32_t ctrl)7406{7407TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MEMCPYByte2Int", 0, 16);7408/************************************ opc id dagId #cfg #child other/pred/children */7409TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 16, 0, 0, 0); tgt->addNode(v1); // array index of src and dst7410TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 15, 0, 0); tgt->addNode(vorc); // length7411TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 0); tgt->addNode(v0); // src array base7412TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(v2); // dst array base7413TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0); tgt->addNode(cmah); // array header7414TR_PCISCNode *cmah1=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(),12, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+1));// array header+17415TR_PCISCNode *cmah2=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(),11, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+2));// array header+27416TR_PCISCNode *cmah3=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(),10, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+3));// array header+37417TR_PCISCNode *ah1 = (ctrl & CISCUtilCtl_BigEndian) ? cmah : cmah3;7418TR_PCISCNode *ah2 = (ctrl & CISCUtilCtl_BigEndian) ? cmah1 : cmah2;7419TR_PCISCNode *ah3 = (ctrl & CISCUtilCtl_BigEndian) ? cmah2 : cmah1;7420TR_PCISCNode *ah4 = (ctrl & CISCUtilCtl_BigEndian) ? cmah3 : cmah;7421TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 9, 0, 0, -1); tgt->addNode(cm1);7422TR_PCISCNode *c4 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 8, 4); // element size7423TR_PCISCNode *ci4 = c4;7424if (ctrl & CISCUtilCtl_64Bit)7425{7426ci4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 7, 0, 0, 4); tgt->addNode(ci4);7427}7428TR_PCISCNode *cs8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, 0x100); tgt->addNode(cs8);7429TR_PCISCNode *cs16= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, 0x10000); tgt->addNode(cs16);7430TR_PCISCNode *cs24= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 0x1000000); tgt->addNode(cs24);7431TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size7432TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);7433TR_PCISCNode *ns0 = createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, ent, v1, cmah, c4);7434TR_PCISCNode *ns1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns0, v2, ns0);7435TR_PCISCNode *nmul= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, ns1, v1, ci4); tgt->addNode(nmul);7436TR_PCISCNode *nl12= createIdiomArrayLoadInLoop(tgt, ctrl, 1, nmul, TR::bloadi, TR::Int8, v0, nmul, ah1, c1);7437TR_PCISCNode *nl13= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl12, nl12); tgt->addNode(nl13);7438TR_PCISCNode *nl14= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl13, nl13, cs24); tgt->addNode(nl14);7439TR_PCISCNode *nl22= createIdiomArrayLoadInLoop(tgt, ctrl, 1, nl14, TR::bloadi, TR::Int8, v0, nmul, ah2, c1);7440TR_PCISCNode *nl23= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl22, nl22); tgt->addNode(nl23);7441TR_PCISCNode *nl24= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl23, nl23, cs16); tgt->addNode(nl24);7442TR_PCISCNode *nl25= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ior, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl24, nl14, nl24); tgt->addNode(nl25);7443TR_PCISCNode *nl32= createIdiomArrayLoadInLoop(tgt, ctrl, 1, nl25, TR::bloadi, TR::Int8, v0, nmul, ah3, c1);7444TR_PCISCNode *nl33= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl32, nl32); tgt->addNode(nl33);7445TR_PCISCNode *nl34= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl33, nl33, cs8); tgt->addNode(nl34);7446TR_PCISCNode *nl35= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ior, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl34, nl25, nl34); tgt->addNode(nl35);7447TR_PCISCNode *nl42= createIdiomArrayLoadInLoop(tgt, ctrl, 1, nl35, TR::bloadi, TR::Int8, v0, nmul, ah4, c1);7448TR_PCISCNode *nl43= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl42, nl42); tgt->addNode(nl43);7449TR_PCISCNode *nl45= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ior, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl43, nl35, nl43); tgt->addNode(nl45);7450TR_PCISCNode *ns4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istorei, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl45, ns1, nl45); tgt->addNode(ns4);7451TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ns4, v1, cm1);7452TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n6, v1, vorc); tgt->addNode(n8);7453TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);74547455n8->setSuccs(ent->getSucc(0), n9);7456n8->setIsChildDirectlyConnected();74577458tgt->setEntryNode(ent);7459tgt->setExitNode(n9);7460tgt->setImportantNodes((ctrl & CISCUtilCtl_BigEndian) ? nl12 : nl42, ns4);7461tgt->setNumDagIds(17);7462tgt->createInternalData(1);74637464tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);7465tgt->setTransformer(CISCTransform2ArrayCopyB2I);7466tgt->setAspects(isub|mul|bitop1, ILTypeProp::Size_1, ILTypeProp::Size_4);7467tgt->setNoAspects(call|bndchk, 0, 0);7468tgt->setMinCounts(1, 4, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount7469tgt->setHotness(hot, false);7470tgt->setInhibitBeforeVersioning();7471return tgt;7472}747374747475/****************************************************************************************7476Corresponding Java-like Pseudo Program7477int v1, end, end2;7478int v0[ ];7479byte v2[ ];7480while(true){7481v2[v1*4] = (byte)(v0[v1] >>> 24) & 0xFF;7482v2[v1*4+1] = (byte)(v0[v1] >>> 16) & 0xFF;7483v2[v1*4+2] = (byte)(v0[v1] >>> 8) & 0xFF;7484v2[v1*4+3] = (byte)(v0[v1] & 0xff);7485v1++;7486if (v1 >= end) break;7487}7488****************************************************************************************/7489TR_PCISCGraph *7490makeMEMCPYInt2ByteGraph(TR::Compilation *c, int32_t ctrl)7491{7492TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MEMCPYInt2Byte", 0, 16);7493/************************************ opc id dagId #cfg #child other/pred/children */7494TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 17, 0, 0, 0); tgt->addNode(v1); // array index of src and dst7495TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),16, 0, 0); tgt->addNode(vorc); // length7496TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(v0); // src array base7497TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1); tgt->addNode(v2); // dst array base7498TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(cmah); // array header7499TR_PCISCNode *cmah1=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(),12, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+1));// array header+17500TR_PCISCNode *cmah2=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(),11, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+2));// array header+27501TR_PCISCNode *cmah3=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(),10, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+3));// array header+37502TR_PCISCNode *ah1 = (ctrl & CISCUtilCtl_BigEndian) ? cmah : cmah3;7503TR_PCISCNode *ah2 = (ctrl & CISCUtilCtl_BigEndian) ? cmah1 : cmah2;7504TR_PCISCNode *ah3 = (ctrl & CISCUtilCtl_BigEndian) ? cmah2 : cmah1;7505TR_PCISCNode *ah4 = (ctrl & CISCUtilCtl_BigEndian) ? cmah3 : cmah;7506TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 9, 0, 0, -1); tgt->addNode(cm1);7507TR_PCISCNode *cs4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 8, 0, 0, 4); tgt->addNode(cs4); // element size7508TR_PCISCNode *cl4 = cs4;7509if (ctrl & CISCUtilCtl_64Bit)7510{7511cl4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lconst, TR::Int64, tgt->incNumNodes(), 7, 0, 0, 4); tgt->addNode(cl4); // element size for 64-bit7512}7513TR_PCISCNode *cs8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, 8); tgt->addNode(cs8);7514TR_PCISCNode *cs16= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, 16); tgt->addNode(cs16);7515TR_PCISCNode *cs24= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 24); tgt->addNode(cs24);7516TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size7517TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);7518TR_PCISCNode *nmul= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, ent, v1, cs4); tgt->addNode(nmul);7519TR_PCISCNode *ns00= createIdiomArrayAddressInLoop(tgt, ctrl, 1, nmul, v2, nmul, ah1, c1);7520TR_PCISCNode *nl00;7521if (ctrl & CISCUtilCtl_64Bit)7522{7523nl00 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns00, v0, v1, cmah, cl4);7524}7525else7526{7527nl00 = createIdiomArrayAddressInLoop (tgt, ctrl, 1, ns00, v0, nmul, cmah, c1);7528}7529TR_PCISCNode *nl01= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iloadi, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl00, nl00); tgt->addNode(nl01);7530TR_PCISCNode *ns01= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ishrall, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nl01, nl01, cs24); tgt->addNode(ns01);7531TR_PCISCNode *ns02= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns01, ns01); tgt->addNode(ns02);7532TR_PCISCNode *ns03= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, ns02, ns00, ns02); tgt->addNode(ns03);7533TR_PCISCNode *ns10= createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns03, v2, nmul, ah2, c1);7534TR_PCISCNode *ns11= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ishrall, TR::NoType, tgt->incNumNodes(), 1, 1, 2, ns10, nl01, cs16); tgt->addNode(ns11);7535TR_PCISCNode *ns12= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns11, ns11); tgt->addNode(ns12);7536TR_PCISCNode *ns13= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, ns12, ns10, ns12); tgt->addNode(ns13);7537TR_PCISCNode *ns20= createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns13, v2, nmul, ah3, c1);7538TR_PCISCNode *ns21= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ishrall, TR::NoType, tgt->incNumNodes(), 1, 1, 2, ns20, nl01, cs8); tgt->addNode(ns21);7539TR_PCISCNode *ns22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns21, ns21); tgt->addNode(ns22);7540TR_PCISCNode *ns23= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, ns22, ns20, ns22); tgt->addNode(ns23);7541TR_PCISCNode *ns30= createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns23, v2, nmul, ah4, c1);7542TR_PCISCNode *ns32= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns30, nl01); tgt->addNode(ns32);7543TR_PCISCNode *ns33= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, ns32, ns30, ns32); tgt->addNode(ns33);7544TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ns33, v1, cm1);7545TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n6, v1, vorc); tgt->addNode(n8);7546TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);75477548n8->setSuccs(ent->getSucc(0), n9);7549n8->setIsChildDirectlyConnected();75507551tgt->setEntryNode(ent);7552tgt->setExitNode(n9);7553tgt->setImportantNodes(nl01, (ctrl & CISCUtilCtl_BigEndian) ? ns03 : ns33);7554tgt->setNumDagIds(18);7555tgt->createInternalData(1);75567557tgt->setSpecialCareNode(0, ns02); // i2b7558tgt->setSpecialCareNode(1, ns12); // i2b7559tgt->setSpecialCareNode(2, ns22); // i2b7560tgt->setSpecialCareNode(3, ns32); // i2b7561tgt->setSpecialNodeTransformer(MEMCPYSpecialNodeTransformer);75627563tgt->setTransformer(CISCTransform2ArrayCopyB2I);7564tgt->setAspects(isub|mul|shr, ILTypeProp::Size_4, ILTypeProp::Size_1);7565tgt->setNoAspects(call|bndchk, 0, 0);7566tgt->setMinCounts(1, 1, 4); // minimum ifCount, indirectLoadCount, indirectStoreCount7567tgt->setHotness(hot, false);7568tgt->setInhibitBeforeVersioning();7569return tgt;7570}757175727573//////////////////////////////////////////////////////////////////////////7574//////////////////////////////////////////////////////////////////////////7575//////////////////////////////////////////////////////////////////////////7576//*****************************************************************************************7577// IL code generation for filling memory7578// Input: ImportantNode(0) - astore of aiadd or aladd for address induction variable7579// ImportantNode(1) - array element store7580// ImportantNode(2) - exit if7581//*****************************************************************************************7582static int32_t getAbs(int32_t val)7583{7584return val < 0 ? -val : val;7585}7586bool7587CISCTransform2PtrArraySet(TR_CISCTransformer *trans)7588{7589bool trace = trans->trace();7590TR::Node *trNode = NULL;7591TR::TreeTop *trTreeTop = NULL;7592TR::Block *block = NULL;7593TR_CISCGraph *p = trans->getP();7594List<TR_CISCNode> *P2T = trans->getP2T();7595TR::Compilation *comp = trans->comp();7596trans->findFirstNode(&trTreeTop, &trNode, &block);7597if (!block)7598return false; // cannot find75997600// Currently, it allows only a single successor.7601TR::Block *target = trans->analyzeSuccessorBlock();7602if (!target)7603return false;76047605// Only handle very simple loops.7606if (trans->getNumOfBBlistBody() > 1)7607{7608if (trace) traceMsg(comp, "Need exactly 1 basic block\n");7609return false;7610}76117612// Should have 3 treetops in body. See makePtrArraySetGraph7613if (block->getNumberOfRealTreeTops() != 3)7614{7615if (trace) traceMsg(comp, "Need exactly 3 real treetops\n");7616return false;7617}76187619auto astore = trans->getP2TRepInLoop(p->getImportantNode(0));7620auto Store = trans->getP2TRepInLoop(p->getImportantNode(1));7621auto ifcmp = trans->getP2TRepInLoop(p->getImportantNode(2));76227623if (!astore)7624{7625if (trace) traceMsg(comp, "astore missing\n");7626return false;7627}7628if (!Store)7629{7630if (trace) traceMsg(comp, "array element store missing\n");7631return false;7632}7633if (!ifcmp)7634{7635if (trace) traceMsg(comp, "if compare missing\n");7636return false;7637}76387639auto astoreNode = astore->getHeadOfTrNode();7640auto StoreNode = Store->getHeadOfTrNode();7641auto ifcmpNode = ifcmp->getHeadOfTrNode();76427643if (!(astoreNode->getChild(0)->getChild(0) == StoreNode->getChild(0) &&7644astoreNode->getChild(0) == ifcmpNode->getChild(0)))7645{7646if (trace) traceMsg(comp, "node trees not in required form\n");7647return false;7648}76497650if (!ifcmpNode->getChild(0)->getOpCode().isLoadVar() &&7651!ifcmpNode->getChild(1)->getOpCode().isLoadVar())7652{7653if (trace) traceMsg(comp, "neither comparands are loadvar\n");7654return false;7655}76567657if (ifcmpNode->getChild(0)->getOpCode().isLoadVar() ^7658ifcmpNode->getChild(1)->getOpCode().isLoadVar())7659{7660auto nonLoadChild = (ifcmpNode->getChild(0)->getOpCode().isLoadVar()) ?7661ifcmpNode->getChild(1) : ifcmpNode->getChild(0);7662if (astoreNode->getChild(0) != nonLoadChild)7663{7664if (trace) traceMsg(comp, "iv is not a commoned child in if comparand\n");7665return false;7666}7667}76687669// Only ordered compare {lt,le,ge,gt} and ne allowed7670if (!ifcmpNode->getOpCode().isCompareForOrder() &&7671!(!ifcmpNode->getOpCode().isCompareTrueIfEqual() && ifcmpNode->getOpCode().isCompareForEquality()))7672{7673if (trace) traceMsg(comp, "invalid compare condition\n");7674return false;7675}76767677if (!StoreNode->getOpCode().isStoreIndirect() ||7678(StoreNode->getChild(0)->getOpCode().isLoadVar() &&7679StoreNode->getChild(0)->getSymbolReference() != astoreNode->getSymbolReference()))7680{7681if (trace) traceMsg(comp, "array element store node is neither indirect store "7682"nor matched with addr iv\n");7683return false;7684}76857686switch(StoreNode->getSize())7687{7688case 1:7689case 2:7690case 4:7691case 8: break;7692default:7693if (trace)7694traceMsg(comp, "element size is not power-of-2 <= 8\n");7695return false;7696}76977698if (StoreNode->getDataType() == TR::Aggregate)7699{7700if (trace)7701traceMsg(comp, "arrayset can't handle aggregate elem type\n");7702return false;7703}77047705auto increment = astoreNode->getChild(0)->getChild(1)->getConst<int32_t>();7706if (StoreNode->getSize() != getAbs(increment))7707{7708if (trace) traceMsg(comp, "increment size does not match element size\n");7709return false;7710}77117712TR::Node *endPtr = NULL;7713if (ifcmpNode->getChild(0)->getOpCode().isLoadVar() &&7714ifcmpNode->getChild(0)->getSymbolReference() != astoreNode->getSymbolReference())7715endPtr = ifcmpNode->getChild(0);7716else if (ifcmpNode->getChild(1)->getOpCode().isLoadVar() &&7717ifcmpNode->getChild(1)->getSymbolReference() != astoreNode->getSymbolReference())7718endPtr = ifcmpNode->getChild(1);77197720if (!endPtr)7721{7722if (trace) traceMsg(comp, "Could not get end pointer\n");7723return false;7724}77257726// all good.. now actual transformations7727auto startPtr = TR::Node::createWithSymRef(TR::aload, 0, astoreNode->getSymbolReference());7728TR::Node *length, *arrayset;7729bool use64bit = comp->target().is64Bit();7730bool equal = ifcmpNode->getOpCode().isCompareTrueIfEqual(); // fix off by one.7731if (increment < 0)7732{7733length = TR::Node::create(use64bit ? TR::a2l : TR::a2i, 1, TR::Node::create(TR::asub, 2, startPtr, endPtr));7734if (equal)7735{7736length = TR::Node::create(use64bit ? TR::ladd : TR::iadd, 2, length,7737use64bit ? TR::Node::lconst(1) : TR::Node::iconst(1));7738}7739arrayset = TR::Node::create(TR::arrayset, 3, endPtr, StoreNode->getChild(1), length);7740}7741else7742{7743length = TR::Node::create(use64bit ? TR::a2l : TR::a2i, 1, TR::Node::create(TR::asub, 2, endPtr, startPtr));7744if (equal)7745{7746length = TR::Node::create(use64bit ? TR::ladd : TR::iadd, 2, length,7747use64bit ? TR::Node::lconst(1) : TR::Node::iconst(1));7748}7749arrayset = TR::Node::create(TR::arrayset, 3, startPtr, StoreNode->getChild(1), length);7750}7751arrayset->setSymbolReference(comp->getSymRefTab()->findOrCreateArraySetSymbol());77527753//reset block7754block->getEntry()->join(block->getExit());7755block->append(TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, arrayset)));77567757ifcmpNode->recursivelyDecReferenceCount();7758StoreNode->recursivelyDecReferenceCount();7759auto tmpastoreChild = astoreNode->getChild(0);77607761// set startPtr as if it got to the end of the loop7762if (equal)7763{7764int offsetAtEnd = (increment < 0) ? -1 : 1;7765auto newEnd = TR::Node::create(use64bit ? TR::aladd : TR::aiadd, 2, endPtr,7766use64bit ? TR::Node::lconst(offsetAtEnd) : TR::Node::iconst(offsetAtEnd));7767astoreNode->setAndIncChild(0, newEnd);7768}7769else7770{7771astoreNode->setAndIncChild(0, endPtr);7772}7773tmpastoreChild->recursivelyDecReferenceCount();77747775block->append(TR::TreeTop::create(comp, astoreNode));7776trans->setSuccessorEdge(block, target);7777return true;7778}77797780//////////////////////////////////////////////////////////////////////////7781//////////////////////////////////////////////////////////////////////////7782//////////////////////////////////////////////////////////////////////////7783//*****************************************************************************************7784// IL code generation for filling memory7785// Input: ImportantNode(0) - array store7786// ImportantNode(1) - Store of iadd or isub for induction variable7787// ImportantNode(2) - Store of iadd or isub for induction variable 17788// ImportantNode(3) - exit if7789//*****************************************************************************************7790bool7791CISCTransform2ArraySet(TR_CISCTransformer *trans)7792{7793TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");7794const bool disptrace = DISPTRACE(trans);7795TR::Node *trNode = NULL;7796TR::TreeTop *trTreeTop = NULL;7797TR::Block *block = NULL;7798TR_CISCGraph *P = trans->getP();7799List<TR_CISCNode> *P2T = trans->getP2T();7800TR::Compilation *comp = trans->comp();7801bool ctrl = trans->isGenerateI2L();78027803TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");7804if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;78057806trans->findFirstNode(&trTreeTop, &trNode, &block);7807if (!block) return false; // cannot find78087809if (isLoopPreheaderLastBlockInMethod(comp, block))7810{7811traceMsg(comp, "Bailing CISCTransform2ArraySet due to null TT - might be a preheader in last block of method\n");7812return false;7813}78147815TR::Block *target = trans->analyzeSuccessorBlock();7816// Currently, it allows only a single successor.7817if (!target) return false;78187819TR_CISCNode *ivStoreCISCNode = trans->getP2TRepInLoop(P->getImportantNode(1));7820TR_CISCNode *ivStore1CISCNode = trans->getP2TRepInLoop(P->getImportantNode(2));7821TR_CISCNode *addORsubCISCNode = trans->getP2TRepInLoop(P->getImportantNode(1)->getChild(0));7822TR_CISCNode *addORsub1CISCNode = trans->getP2TRepInLoop(P->getImportantNode(2)->getChild(0));7823TR_CISCNode *cmpIfAllCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(3));78247825TR_ScratchList<TR::Node> storeList(comp->trMemory());7826TR_ASSERT(ivStoreCISCNode, "Expected induction variable store node in Transform2ArraySet");7827storeList.add(ivStoreCISCNode->getHeadOfTrNode());78287829if (ivStore1CISCNode && ivStore1CISCNode != ivStoreCISCNode)7830storeList.add(ivStore1CISCNode->getHeadOfTrNode());78317832if (!cmpIfAllCISCNode)7833{7834if (disptrace) traceMsg(comp, "Not implemented yet for multiple-if\n");7835return false;7836}7837TR_ASSERT(addORsubCISCNode->getOpcode() == TR::isub || addORsubCISCNode->getOpcode() == TR::iadd, "error");7838TR_ASSERT(addORsub1CISCNode->getOpcode() == TR::isub || addORsub1CISCNode->getOpcode() == TR::iadd, "error");78397840// Check which count-up or count-down loop7841bool isIncrement0 = (addORsubCISCNode->getOpcode() == TR::isub);7842bool isIncrement1 = (addORsub1CISCNode->getOpcode() == TR::isub);78437844bool isIncrement = isIncrement0;78457846// Depending on the loop exit comparison, we may need to adjust the length of the arrayset.7847int32_t lengthMod = 0;7848TR_CISCNode *retStore = trans->getT()->searchStore(cmpIfAllCISCNode->getChild(0), cmpIfAllCISCNode);7849switch(cmpIfAllCISCNode->getOpcode())7850{7851case TR::ificmpgt:7852lengthMod = 1;7853// fallthrough7854case TR::ificmpge:7855if (!isIncrement) return false;7856if (retStore == ivStoreCISCNode) lengthMod++;7857break;7858case TR::ificmplt:7859lengthMod = 1;7860// fallthrough7861case TR::ificmple:7862if (isIncrement) return false;7863if (retStore == ivStoreCISCNode) lengthMod++;7864break;7865default:7866traceMsg(comp, "Bailing CISCTransform2ArraySet due to unrecognized loop exit comparison.\n");7867return false;7868}78697870if (disptrace)7871traceMsg(comp,"Examining exit comparison CICS node %d, and determined required length modifier to be: %d\n", cmpIfAllCISCNode->getID(), lengthMod);78727873TR_ScratchList<TR::Node> listStores(comp->trMemory());7874ListAppender<TR::Node> appenderListStores(&listStores);7875ListIterator<TR_CISCNode> ni(trans->getP2T() + P->getImportantNode(0)->getID());7876TR_CISCNode *inStoreCISCNode;7877TR::Node *inStoreNode;7878for (inStoreCISCNode = ni.getFirst(); inStoreCISCNode; inStoreCISCNode = ni.getNext())7879{7880if (!inStoreCISCNode->isOutsideOfLoop())7881{7882inStoreNode = inStoreCISCNode->getHeadOfTrNodeInfo()->_node;7883if (!isIndexVariableInList(inStoreNode, &storeList))7884{7885dumpOptDetails(comp, "an index used in an array store %p is not consistent with the induction varaible updates\n", inStoreNode);7886return false;7887}7888// this idiom operates in two modes - arrayset for all values or arrayset only for setting to zero7889// if the codegen does not support generic arrayset - make sure we are storing a constant 07890// note the stored value is constrained to a constant by the node matcher7891if (!trans->comp()->cg()->getSupportsArraySet()7892&& !(inStoreNode->getType().isIntegral() && inStoreNode->getSecondChild()->get64bitIntegralValueAsUnsigned() == 0)7893&& !(inStoreNode->getType().isAddress() && inStoreNode->getSecondChild()->getAddress() == 0))7894{7895dumpOptDetails(comp, "the cg only supports arrayset to zero, but found a non-zero or non-constant value\n");7896return false;7897}7898appenderListStores.add(inStoreNode);7899}7900}7901if (listStores.isEmpty()) return false;79027903TR::Node *indexRepNode, *index1RepNode, *dstBaseRepNode, *variableORconstRepNode1;7904getP2TTrRepNodes(trans, &indexRepNode, &index1RepNode, &dstBaseRepNode, &variableORconstRepNode1);79057906if (disptrace)7907{7908traceMsg(comp,"Identified target nodes\n\tindexRepNode: %p\n\tindex1RepNode: %p\n\tdstBaseRepNode: %p\n\tvariableOrconstRepNode1: %p\n",7909indexRepNode, index1RepNode, dstBaseRepNode, variableORconstRepNode1);7910}7911TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();7912TR::SymbolReference * indexVar1SymRef = index1RepNode->getSymbolReference();7913if (trans->countGoodArrayIndex(indexVarSymRef) == 0 &&7914trans->countGoodArrayIndex(indexVar1SymRef) == 0) return false;7915if (indexVarSymRef != indexVar1SymRef)7916{7917// there are two induction variables7918if (!listStores.isSingleton())7919{7920dumpOptDetails(comp, "Multiple induction variables with multiple stores not supported for arrayset transformation.\n");7921return false;7922}7923if (!isIncrement1)7924{7925// We do not correctly handle the second induction variable being a decrement.7926// TODO: Things to fix include:7927// Proper Last Value calculation for count-down loop that uses ind var 1.7928// Proper length calculation for count-down loop that uses ind var 1.7929dumpOptDetails(comp, "A decrementing second induction variable is not supported. \n");7930return false;7931}7932}79337934//7935// analyze each store7936//7937ListIterator<TR::Node> iteratorStores(&listStores);7938TR::Node * indexNode = createLoad(indexRepNode);79397940// check if the induction variable7941// is being stored into the array7942for (inStoreNode = iteratorStores.getFirst(); inStoreNode; inStoreNode = iteratorStores.getNext())7943{7944TR::Node * valueNode = inStoreNode->getChild(1);7945if (valueNode->getOpCode().isLoadDirect() && valueNode->getOpCode().hasSymbolReference())7946{7947if (valueNode->getSymbolReference()->getReferenceNumber() == indexNode->getSymbolReference()->getReferenceNumber() ||7948valueNode->getSymbolReference()->getReferenceNumber() == index1RepNode->getSymbolReference()->getReferenceNumber())7949{7950traceMsg(comp, "arraystore tree has induction variable on rhs\n");7951return false;7952}7953}7954}79557956List<TR::Node> listArraySet(comp->trMemory());7957TR::Node * computeIndex = NULL;7958TR::Node * lengthNode = NULL;7959TR::Node * lengthByteNode = NULL;79607961for (inStoreNode = iteratorStores.getFirst(); inStoreNode; inStoreNode = iteratorStores.getNext())7962{7963TR::Node * outputNode = inStoreNode->getChild(0)->duplicateTree();7964TR::Node * valueNode = convertStoreToLoad(comp, inStoreNode->getChild(1));79657966uint32_t elementSize = 0;7967if (inStoreNode->getType().isAddress())7968elementSize = TR::Compiler->om.sizeofReferenceField();7969else7970elementSize = inStoreNode->getSize();79717972// Depending on the induction variable used in the loop, determine if it's count up or count down.7973bool loopIsIncrement = false;7974if (findAndOrReplaceNodesWithMatchingSymRefNumber(outputNode->getSecondChild(), NULL, indexVarSymRef->getReferenceNumber()))7975{7976loopIsIncrement = isIncrement0;7977}7978else7979{7980TR_ASSERT(findAndOrReplaceNodesWithMatchingSymRefNumber(outputNode->getSecondChild(), NULL, indexVar1SymRef->getReferenceNumber()), "Unable to find matching array access induction variable.\n");7981loopIsIncrement = isIncrement1;7982}79837984if (!loopIsIncrement) // count-down loop7985{7986// This case covers a backwards counting loops of the following general forms:79877988// A) Induction variable update BEFORE the array store.7989// i = i_init;7990// do {7991// i--;7992// a [i + c] = d;7993// } while ( i >= i_last );7994//7995// B) Induction variable update AFTER the array store.7996// i = i_init;7997// do {7998// a [i + c] = d;7999// i--;8000// } while ( i >= i_last );8001//8002// The loops can be transformed into an equivalent forward counting loop:8003// i = i_last';8004// do {8005// a [i + c] = d;8006// i++;8007// } while (i <= i_init')8008//8009// Where:8010// A) Induction variable update BEFORE the array store.8011// i_init' = i_init - 18012// i_last' = i_last - 18013// B) Induction variable update AFTER the array store.8014// i_init' = i_init8015// i_last' = i_last8016//8017// This forward version can be reduced to an arrayset8018// arrayset8019// a[i_last' + c] // Address of first element to set (forward sense)8020// bconst d // Element to set.8021// i_init - i_last (+1) // Length8022// Calculate the last value of the induction variable in the original count-down loop.8023// This value becomes the index of the first element in the count-up version, and hence8024// the first element of the arrayset.80258026TR::Node * lastValueNode = convertStoreToLoad(comp, variableORconstRepNode1);80278028// Determine if the induction variable update is before the arrayset8029bool isIndexVarUpdateBeforeArrayset = (trans->findStoreToSymRefInInsertBeforeNodes(indexVarSymRef->getReferenceNumber()) != NULL);80308031// Adjust for the index based on exit condition (i.e. > vs >= ) and whether the induction8032// variable update is before/after the array stores.8033// i_last': > (lengthMod=0) >= (lengthMod=1)8034// --------------- ----------------8035// Before i_last i_last - 18036// After i_last + 1 i_last8037int32_t lastLegalValueAdjustment = -lengthMod;8038if (!isIndexVarUpdateBeforeArrayset)8039lastLegalValueAdjustment++;80408041// If the induction variable update is before the arrayset, we need to validate whether the array access8042// commoned the node with the iadd/isub of the induction variable. i.e.8043//8044// istore #indvar8045// iadd (A)8046// iload #indvar (B)8047// iconst -18048// istore8049// aiadd8050// aload arraybase8051// aiadd8052// index8053// iconst array_header_size8054//8055// where index could be:8056// (A) commoned to iadd, effectively using new value of #indvar8057// (B) commoned to iload, effectively using old value of #indvar8058// (C) a new iload using new value of #indvar8059//8060// Case (A) is problematic, as the induction variable store is still before the arrayset, but8061// the array access pattern is using the original value of #indvar.8062// Case (B) is okay, in that topological embedding will recognize that to be equivalent to8063// updating induction variable after the arraystore.8064// Case (C) is handled correctly.8065int32_t arrayStoreCommoningAdjustment = 0;8066if (isIndexVarUpdateBeforeArrayset)8067{8068TR::Node *origIndVarStore = ivStoreCISCNode->getHeadOfTrNodeInfo()->_node;8069TR::Node *origIndVarLoad = origIndVarStore->getChild(0)->getChild(0);80708071TR::Node *origArrayIndVarLoad = findLoadWithMatchingSymRefNumber(inStoreNode->getChild(0)->getSecondChild(), indexVarSymRef->getReferenceNumber());80728073// If they match, we have case (B), so we need to readjust by +1.8074if (origIndVarLoad == origArrayIndVarLoad)8075{8076traceMsg(comp, "Identified array index to have been referencing original induction variable value: %p\n",origIndVarLoad);8077arrayStoreCommoningAdjustment = 1;8078}8079}80808081TR::Node *lastLegalValue = createOP2(comp, TR::iadd, lastValueNode,8082TR::Node::create(indexNode, TR::iconst, 0, lastLegalValueAdjustment + arrayStoreCommoningAdjustment));80838084// Search for the induction variable in the array access sub-tree and replace that node8085// with the last value index we just calculated.8086bool isFound = findAndOrReplaceNodesWithMatchingSymRefNumber(outputNode->getSecondChild(), lastLegalValue, indexVarSymRef->getReferenceNumber());8087if (!isFound && (indexVarSymRef != indexVar1SymRef))8088isFound = findAndOrReplaceNodesWithMatchingSymRefNumber(outputNode->getSecondChild(), lastLegalValue, indexVar1SymRef->getReferenceNumber());80898090TR_ASSERT(isFound, "Count down arrayset was unable to find and replace array access induction variable.\n");80918092// Determine the length of the arrayset (# of elements to set) and adjusting it based on exit condition.8093// In the case of the induction variable update is before the array store, the indexNode value has already been8094// decremented by 1 once already (since i--; is inserted before the final arrayset. We need to readjust that.8095// length: > (lengthMod=0) >= (lengthMod=1)8096// --------------- ----------------8097// Before i_init - i_last + 1 i_init - i_last +28098// After i_init - i_last i_init - i_last +18099int32_t lengthAdjustment = lengthMod + ((isIndexVarUpdateBeforeArrayset)?1:0);81008101lengthNode = createOP2(comp, TR::isub, indexNode, lastValueNode);8102lengthNode = createOP2(comp, TR::iadd, lengthNode, TR::Node::create(indexNode, TR::iconst, 0, lengthAdjustment));81038104// Determine the final induction variable value on loop exit.8105// If the induction variable update is before the arrayset,8106// it will be the last value we access.8107// If the induction variable update is after the arrayset,8108// it will always be one less than the last index (count-down sense) that we access.8109computeIndex = createOP2(comp, TR::iadd, lastLegalValue, TR::Node::create(indexRepNode, TR::iconst, 0, ((isIndexVarUpdateBeforeArrayset)?0:-1) - arrayStoreCommoningAdjustment));81108111}8112else // count-up loop8113{8114TR::Node * lastValue = convertStoreToLoad(comp, variableORconstRepNode1);8115lastValue = createOP2(comp, isIncrement0 ? TR::iadd : TR::isub, lastValue,8116TR::Node::create(indexNode, TR::iconst, 0, lengthMod));81178118// Induction variable 0 is always part of the loop exit condition based on idiom graph.8119if (isIncrement0)8120{8121lengthNode = createOP2(comp, TR::isub, lastValue, indexNode);8122}8123else8124{8125lengthNode = createOP2(comp, TR::isub, indexNode, lastValue);8126}8127computeIndex = lastValue;8128}81298130lengthByteNode = lengthNode;8131const bool longOffsets = trans->isGenerateI2L();8132lengthByteNode = createI2LIfNecessary(comp, longOffsets, lengthByteNode);8133if (elementSize > 1)8134{8135TR::Node *elementSizeNode = NULL;8136if (longOffsets)8137elementSizeNode = TR::Node::lconst(inStoreNode, elementSize);8138else8139elementSizeNode = TR::Node::iconst(inStoreNode, elementSize);81408141lengthByteNode = TR::Node::create(8142longOffsets ? TR::lmul : TR::imul,81432,8144lengthByteNode,8145elementSizeNode);8146}81478148TR::Node * arrayset = TR::Node::create(TR::arrayset, 3, outputNode, valueNode, lengthByteNode);8149arrayset->setSymbolReference(comp->getSymRefTab()->findOrCreateArraySetSymbol());81508151listArraySet.add(TR::Node::create(TR::treetop, 1, arrayset));8152}81538154TR::Node * indVarUpdateNode = TR::Node::createStore(indexVarSymRef, computeIndex);8155TR::TreeTop * indVarUpdateTreeTop = TR::TreeTop::create(comp, indVarUpdateNode);8156TR::Node * indVar1UpdateNode = NULL;8157TR::TreeTop * indVar1UpdateTreeTop = NULL;8158if (indexVarSymRef != indexVar1SymRef)8159{8160indVar1UpdateNode = createStoreOP2(comp, indexVar1SymRef, TR::iadd, indexVar1SymRef, lengthNode, trNode);8161indVar1UpdateTreeTop = TR::TreeTop::create(comp, indVar1UpdateNode);8162}81638164// Insert nodes and maintain the CFG8165TR::TreeTop *last;8166ListIterator<TR::Node> iteratorArraySet(&listArraySet);8167TR::Node *arrayset = NULL;8168TR_ASSERT(lengthByteNode, "Expected at least one set of arrayset.");8169block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthByteNode->duplicateTree());8170block = trans->insertBeforeNodes(block);8171last = block->getLastRealTreeTop();8172for (arrayset = iteratorArraySet.getFirst(); arrayset; arrayset = iteratorArraySet.getNext())8173{8174TR::TreeTop *newTop = TR::TreeTop::create(comp, arrayset);8175last->join(newTop);8176last = newTop;8177}8178last->join(indVarUpdateTreeTop);8179indVarUpdateTreeTop->join(block->getExit());8180if (indVar1UpdateTreeTop)8181{8182block->append(indVar1UpdateTreeTop);8183}81848185trans->insertAfterNodes(block);81868187trans->setSuccessorEdge(block, target);8188return true;8189}81908191bool CISCTransform2Strlen16(TR_CISCTransformer *trans)8192{8193bool trace = trans->trace();8194TR::Node *trNode = NULL;8195TR::TreeTop *trTreeTop = NULL;8196TR::Block *block = NULL;8197TR_CISCGraph *p = trans->getP();8198List<TR_CISCNode> *P2T = trans->getP2T();8199TR::Compilation *comp = trans->comp();8200trans->findFirstNode(&trTreeTop, &trNode, &block);8201if (!block)8202return false; // cannot find82038204// Currently, it allows only a single successor.8205TR::Block *target = trans->analyzeSuccessorBlock();8206if (!target)8207return false;82088209// Only handle very simple loops.8210if (trans->getNumOfBBlistBody() > 1)8211{8212if (trace) traceMsg(comp, "Need exactly 1 basic block\n");8213return false;8214}82158216// Should have 2 treetops in body. See makeStrlen16Graph8217if (block->getNumberOfRealTreeTops() != 2)8218{8219if (trace) traceMsg(comp, "Need exactly 2 real treetops\n");8220return false;8221}82228223auto astore = trans->getP2TRepInLoop(p->getImportantNode(0));8224auto loopTest = trans->getP2TRepInLoop(p->getImportantNode(1));8225auto astoreNode = astore->getHeadOfTrNode();8226auto ificmpne = loopTest->getHeadOfTrNode();82278228if (!astore || !loopTest || !astoreNode || !ificmpne)8229return false;82308231auto ptr = astoreNode->getChild(0)->getChild(0);8232auto increment = astoreNode->getChild(0)->getChild(1)->getConst<int32_t>();82338234TR::Node *iconst=NULL, *conv=NULL;8235if (ificmpne->getChild(0)->getOpCodeValue() == TR::iconst)8236{8237iconst = ificmpne->getChild(0);8238conv = ificmpne->getChild(1);8239}8240else if (ificmpne->getChild(1)->getOpCodeValue() == TR::iconst)8241{8242iconst = ificmpne->getChild(1);8243conv = ificmpne->getChild(0);8244}82458246if (trace) traceMsg(comp, "Failed one of the requirements\n");8247return false;8248}82498250/*********************************************************************************************8251* Catch very simple case of strlen168252n170n BBStart <block_30> (freq 1682) (in loop 30)8253n177n astore <auto slot 14>[id=384:"pszTmp"] [#65 Auto] [flags 0x7 0x0 ]8254n176n aladd (X>=0 internalPtr sharedMemory )8255n172n aload <auto slot 14>[id=384:"pszTmp"] [#65 Auto] [flags 0x7 0x0 ]8256n175n lconst 2 (highWordZero X!=0 X>=0 )8257n185n ificmpne --> block_30 BBStart at n170n ()8258n184n su2i (X>=0 )8259n181n sloadi <refined-array-shadow>[id=185:"(unsigned short)"] [#61 Shadow]8260n176n ==>aladd8261n183n iconst 0 (X==0 X>=0 X<=0 )8262n179n BBEnd </block_30> =====8263*/82648265TR_PCISCGraph *8266makeStrlen16Graph(TR::Compilation *c, int32_t ctrl)8267{8268auto tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "Strlen16", 0, 10);8269auto entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 9, 1, 0);8270tgt->addNode(entry);82718272auto ptr = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::Address, tgt->incNumNodes(), 8, 0, 0, 0);8273tgt->addNode(ptr);8274auto increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lconst, TR::Int64, tgt->incNumNodes(), 7, 0, 0, 2);8275tgt->addNode(increment);8276auto addrAdd = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::aladd, TR::Address, tgt->incNumNodes(), 6, 1, 2, entry, ptr, increment);8277tgt->addNode(addrAdd);8278auto addrStore = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::astore, TR::Address, tgt->incNumNodes(), 5, 1, 2, addrAdd, addrAdd, ptr);8279tgt->addNode(addrStore);82808281auto Load = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indload, TR::Int16, tgt->incNumNodes(), 4, 1, 1, addrStore, ptr);8282Load->addHint(addrAdd);8283tgt->addNode(Load);8284auto conversion = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::Int32, tgt->incNumNodes(), 3, 1, 1, Load, Load);8285tgt->addNode(conversion);8286auto nullChar = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 2, 0, 0, 0);8287tgt->addNode(nullChar);8288auto loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::Int32, tgt->incNumNodes(), 1, 2, 2, conversion, conversion, nullChar);8289tgt->addNode(loopTest);82908291auto exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0);8292tgt->addNode(exit);82938294loopTest->setSuccs(entry->getSucc(0), exit);8295loopTest->setIsChildDirectlyConnected();82968297tgt->setEntryNode(entry);8298tgt->setExitNode(exit);8299tgt->setImportantNodes(addrStore, loopTest);8300tgt->setNumDagIds(10);8301tgt->createInternalData(1);83028303tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);8304tgt->setTransformer(CISCTransform2Strlen16);8305//tgt->setAspects(storeMasks); // not sure which to set, but do want astore aload for ptr incr and any size ptr deref store8306tgt->setNoAspects(call|bndchk|bitop1, 0, 0);8307tgt->setMinCounts(1, 1, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount8308tgt->setHotness(warm, false);8309tgt->setInhibitBeforeVersioning();8310return tgt;8311}831283138314/****************************************************************************************8315* Corresponding C-like pseudocode with loop with pointer increment8316* T* start = arr; // char, short, int long ptr. Array can be any type container8317* T* end = arr + size // arr8318* while(start < end)8319* *start++ = 0;8320*8321n16n BBStart <block_5> (freq 10000) (in loop 5) [0x00000000823bf380]8322n22n astore <auto slot 2>[id=3:"start"] [#49 Auto] [flags 0x7 0x0 ] [0x00000000823bf590]8323n21n aladd (internalPtr sharedMemory ) [0x00000000823bf538]8324n18n aload <auto slot 2>[id=3:"start"] [#49 Auto] [flags 0x7 0x0 ] (X>=0 sharedMemory ) [0x00000000823b8325n20n lconst 1 (highWordZero X!=0 X>=0 ) [0x00000000823bf4e0]8326n26n bstorei <refined-array-shadow>[id=7:"(char)"] [#51 Shadow] [flags 0x80000601 0x0 ] [0x00000000823bf6f08327n18n ==>aload8328n25n bconst 0 (Unsigned X==0 X>=0 X<=0 ) [0x00000000823bf698]8329n31n ifacmpne --> block_5 BBStart at n16n () [0x00000000823bf8a8]8330n21n ==>aladd8331n30n aload <auto slot 0>[id=5:"end"] [#50 Auto] [flags 0x7 0x0 ] [0x00000000823bf850]8332n28n BBEnd </block_5> ===== [0x00000000823bf7a0]8333*8334*/8335TR_PCISCGraph *8336makePtrArraySetGraph(TR::Compilation *c, int32_t ctrl)8337{8338bool is64bit = c->target().is64Bit();8339TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "PtrArraySet", 0, 10);8340/****************************************************************************** opc id dagId #cfg #child other/pred/children */8341auto entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 9, 1, 0);8342tgt->addNode(entry);8343auto ptr = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::Address, tgt->incNumNodes(), 8, 0, 0, 0);8344tgt->addNode(ptr);8345auto increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lconst, TR::Int64, tgt->incNumNodes(), 7, 0, 0);8346tgt->addNode(increment);8347auto addrAdd = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), is64bit ? TR::aladd : TR::aiadd, TR::Address, tgt->incNumNodes(), 6, 1, 2, entry, ptr, increment);8348tgt->addNode(addrAdd);8349auto addrStore = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::astore, TR::Address, tgt->incNumNodes(), 5, 1, 2, addrAdd, addrAdd, ptr);8350tgt->addNode(addrStore);8351auto value = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variableORconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0);8352tgt->addNode(value); // set value8353auto Store = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indstore, TR::NoType, tgt->incNumNodes(), 3, 1, 2, addrStore, ptr, value);8354tgt->addNode(Store);8355auto endPtr = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variableORconst, TR::Address, tgt->incNumNodes(), 2, 0, 0, 0);8356tgt->addNode(endPtr);8357auto loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::Address, tgt->incNumNodes(), 1, 2, 2, Store, ptr, endPtr);8358tgt->addNode(loopTest);8359auto exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0);8360tgt->addNode(exit);83618362loopTest->setSuccs(entry->getSucc(0), exit);8363loopTest->setIsChildDirectlyConnected();83648365tgt->setEntryNode(entry);8366tgt->setExitNode(exit);8367tgt->setImportantNodes(addrStore, Store, loopTest);8368tgt->setNumDagIds(10);8369tgt->createInternalData(1);83708371tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);8372tgt->setTransformer(CISCTransform2PtrArraySet);8373//tgt->setAspects(storeMasks); // not sure which to set, but do want astore aload for ptr incr and any size ptr deref store8374tgt->setNoAspects(call|bndchk|bitop1, 0, 0);8375tgt->setMinCounts(1, 0, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount8376tgt->setHotness(warm, false);8377tgt->setInhibitBeforeVersioning();8378return tgt;8379}83808381/****************************************************************************************8382Corresponding Java-like pseudocode8383int i, end, value;8384Array[ ]; // char, int, float, long, and so on8385while(true){8386Array[i] = value;8387iaddORisub(i, -1)8388ifcmpall(i, end) break;8389}83908391Note 1: This idiom matches both count up and down loops.8392Note 2: The wildcard node iaddORisub matches iadd or isub.8393Note 3: The wildcard node ifcmpall matches all types of if-instructions.8394****************************************************************************************/8395TR_PCISCGraph *8396makeMemSetGraph(TR::Compilation *c, int32_t ctrl)8397{8398TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemSet", 0, 16);8399/****************************************************************************** opc id dagId #cfg #child other/pred/children */8400TR_PCISCNode *iv = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(),11, 0, 0, 0); tgt->addNode(iv); // array index8401TR_PCISCNode *iv1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(),10, 0, 0, 1); tgt->addNode(iv1); // array index8402TR_PCISCNode *Array = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(),9, 0, 0, 0); // array base8403tgt->addNode(Array);8404TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),8, 0, 0); tgt->addNode(end); // length8405// if cg only supports arrayset to zero only match constant nodes8406TR_PCISCNode *value = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), c->cg()->getSupportsArraySet() ? TR_variableORconst : TR_allconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0); tgt->addNode(value); // set value8407TR_PCISCNode *mulConst = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0);8408tgt->addNode(mulConst); // Multiplicative factor for index into non-byte arrays8409TR_PCISCNode *idx0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),5, 0, 0, 0); tgt->addNode(idx0);8410TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0, 0); tgt->addNode(aHeader); // array header8411TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(increment);8412TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(),2, 1, 0); tgt->addNode(entry);8413TR_PCISCNode *Addr = createIdiomArrayAddressInLoop(tgt, ctrl, 1, entry, Array, idx0, aHeader, mulConst);8414TR_PCISCNode *i2x = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, Addr, value); tgt->addNode(i2x);8415TR_PCISCNode *Store = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indstore, TR::NoType, tgt->incNumNodes(), 1, 1, 2, i2x, Addr, i2x);8416tgt->addNode(Store);8417TR_PCISCNode *ivStore = createIdiomIOP2VarInLoop(tgt, ctrl, 1, Store, TR_iaddORisub, iv, increment);8418TR_PCISCNode *iv1Store = createIdiomIOP2VarInLoop(tgt, ctrl, 1, ivStore, TR_iaddORisub, iv1, increment);8419TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, iv1Store, iv, end);8420tgt->addNode(loopTest);8421TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);84228423loopTest->setSuccs(entry->getSucc(0), exit);8424loopTest->setIsChildDirectlyConnected();84258426i2x->setIsOptionalNode();8427i2x->setIsChildDirectlyConnected();84288429tgt->setEntryNode(entry);8430tgt->setExitNode(exit);8431tgt->setImportantNodes(Store, ivStore, iv1Store, loopTest);8432tgt->setNumDagIds(12);8433tgt->createInternalData(1);84348435tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);8436tgt->setTransformer(CISCTransform2ArraySet);8437tgt->setAspects(mul, 0, existAccess);8438tgt->setNoAspects(call|bndchk|bitop1, 0, 0);8439tgt->setMinCounts(1, 0, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount8440tgt->setHotness(warm, false);8441tgt->setInhibitBeforeVersioning();8442return tgt;8443}84448445//////////////////////////////////////////////////////////////////////////8446//////////////////////////////////////////////////////////////////////////8447//////////////////////////////////////////////////////////////////////////84488449//*****************************************************************************************8450// IL code generation for filling memory8451// Input: ImportantNode(0) - non-byte array store8452// ImportantNode(1) - byte array store8453// ImportantNode(2) - iadd or isub for induction variable8454// ImportantNode(3) - exit if8455// ImportantNode(4) - the size of elements8456//*****************************************************************************************8457bool8458CISCTransform2MixedArraySet(TR_CISCTransformer *trans)8459{8460TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");8461TR::Node *trNode;8462TR::TreeTop *trTreeTop;8463TR::Block *block;8464TR_CISCGraph *P = trans->getP();8465List<TR_CISCNode> *P2T = trans->getP2T();8466TR::Compilation *comp = trans->comp();8467bool ctrl = trans->isGenerateI2L();84688469TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");84708471TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");8472if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;84738474trans->findFirstNode(&trTreeTop, &trNode, &block);8475if (!block) return false; // cannot find84768477if (isLoopPreheaderLastBlockInMethod(comp, block))8478{8479traceMsg(comp, "Bailing CISCTransform2MixedArraySet due to null TT - might be a preheader in last block of method\n");8480return false;8481}84828483TR::Block *target = trans->analyzeSuccessorBlock();8484// Currently, it allows only a single successor.8485if (!target) return false;84868487TR_CISCNode *addORsubCISCNode = trans->getP2TRepInLoop(P->getImportantNode(2));8488TR_CISCNode *cmpIfAllCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(3));8489if (!cmpIfAllCISCNode) return false;8490TR_ASSERT(addORsubCISCNode->getOpcode() == TR::isub || addORsubCISCNode->getOpcode() == TR::iadd, "error");84918492// Check which count-up or count-down loop8493bool isIncrement = (addORsubCISCNode->getOpcode() == TR::isub);8494int lengthMod = 0;8495switch(cmpIfAllCISCNode->getOpcode())8496{8497case TR::ificmpgt:8498lengthMod = 1;8499// fallthrough8500case TR::ificmpge:8501if (!isIncrement) return false;8502break;8503case TR::ificmple:8504lengthMod = -1;8505// fallthrough8506case TR::ificmplt:8507if (isIncrement) return false;8508break;8509default:8510return false;8511}85128513List<TR::Node> listStores(comp->trMemory());8514ListAppender<TR::Node> appenderListStores(&listStores);8515ListIterator<TR_CISCNode> ni(trans->getP2T() + P->getImportantNode(0)->getID());8516TR_CISCNode *inStoreCISCNode;8517for (inStoreCISCNode = ni.getFirst(); inStoreCISCNode; inStoreCISCNode = ni.getNext())8518{8519if (!inStoreCISCNode->isOutsideOfLoop())8520appenderListStores.add(inStoreCISCNode->getHeadOfTrNodeInfo()->_node);8521}8522ni.set(trans->getP2T() + P->getImportantNode(1)->getID());8523for (inStoreCISCNode = ni.getFirst(); inStoreCISCNode; inStoreCISCNode = ni.getNext())8524{8525if (!inStoreCISCNode->isOutsideOfLoop())8526appenderListStores.add(inStoreCISCNode->getHeadOfTrNodeInfo()->_node);8527}8528if (listStores.isEmpty()) return false;85298530TR::Node *indexRepNode, *variableORconstRepNode1;8531getP2TTrRepNodes(trans, &indexRepNode, &variableORconstRepNode1);8532TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();8533if (trans->countGoodArrayIndex(indexVarSymRef) == 0) return false;85348535//8536// analyze each store8537//8538ListIterator<TR::Node> iteratorStores(&listStores);8539TR::Node *inStoreNode;8540TR::Node * indexNode = createLoad(indexRepNode);85418542// check if the induction variable8543// is being stored into the array8544for (inStoreNode = iteratorStores.getFirst(); inStoreNode; inStoreNode = iteratorStores.getNext())8545{8546TR::Node * valueNode = inStoreNode->getChild(1);8547if (valueNode->getOpCode().isLoadDirect() && valueNode->getOpCode().hasSymbolReference())8548{8549if (valueNode->getSymbolReference()->getReferenceNumber() == indexNode->getSymbolReference()->getReferenceNumber())8550{8551dumpOptDetails(comp, "arraystore tree has induction variable on rhs\n");8552return false;8553}8554}8555}85568557List<TR::Node> listArraySet(comp->trMemory());8558TR::Node * computeIndex = NULL;8559for (inStoreNode = iteratorStores.getFirst(); inStoreNode; inStoreNode = iteratorStores.getNext())8560{8561TR::Node * outputNode = inStoreNode->getChild(0)->duplicateTree();8562TR::Node * valueNode = convertStoreToLoad(comp, inStoreNode->getChild(1));8563int elementSize = inStoreNode->getSize();85648565TR::Node * lengthNode;8566if (!isIncrement) // count-down loop8567{8568// exit variable is zero or not8569bool isInitOffset0 = (variableORconstRepNode1->getOpCodeValue() == TR::iconst && (variableORconstRepNode1->getInt()-lengthMod) == 0);8570bool done = false;8571TR::Node * constm1 = TR::Node::create(indexRepNode, TR::iconst, 0, -1);8572TR::Node * lastValue = NULL;8573if (isInitOffset0)8574{8575// When the array index is zero, it will modify the address computation to "base + size of header".8576TR::Node *arrayheader = outputNode->getSecondChild()->getSecondChild();8577switch (outputNode->getSecondChild()->getOpCodeValue())8578{8579case TR::iadd:8580case TR::ladd:8581outputNode->setSecond(arrayheader);8582done = true;8583break;8584case TR::isub:8585if (arrayheader->getOpCodeValue() == TR::iconst)8586{8587arrayheader->setInt(-arrayheader->getInt());8588outputNode->setSecond(arrayheader);8589done = true;8590}8591break;8592case TR::lsub:8593if (arrayheader->getOpCodeValue() == TR::lconst)8594{8595arrayheader->setLongInt(-arrayheader->getLongInt());8596outputNode->setSecond(arrayheader);8597done = true;8598}8599break;8600default:8601break;8602}8603lengthNode = indexNode;8604computeIndex = constm1;8605}8606else8607{8608lastValue = convertStoreToLoad(comp, variableORconstRepNode1);8609if (lengthMod)8610{8611lastValue = createOP2(comp, TR::isub,8612lastValue,8613TR::Node::create(indexNode, TR::iconst, 0, lengthMod));8614}8615lengthNode = createOP2(comp, TR::isub, indexNode, lastValue);8616computeIndex = createOP2(comp, TR::iadd, lastValue, constm1);8617}8618lengthNode = createOP2(comp, TR::isub, lengthNode, TR::Node::create(indexNode, TR::iconst, 0, -(lengthMod+1)));86198620if (!done)8621{8622if (!lastValue) lastValue = convertStoreToLoad(comp, variableORconstRepNode1);8623TR::Node *termNode = createI2LIfNecessary(comp, ctrl, lastValue);8624TR::Node *mulNode = outputNode->getSecondChild()->getFirstChild();8625mulNode = mulNode->skipConversions();8626if (elementSize > 1)8627mulNode->setAndIncChild(0, termNode);8628else8629outputNode->getSecondChild()->setAndIncChild(0, termNode);8630}8631}8632else // count-up loop8633{8634TR::Node * lastValue = convertStoreToLoad(comp, variableORconstRepNode1);8635if (lengthMod)8636{8637lastValue = createOP2(comp, TR::iadd,8638lastValue,8639TR::Node::create(indexNode, TR::iconst, 0, lengthMod));8640}8641lengthNode = createOP2(comp, TR::isub, lastValue, indexNode);8642computeIndex = lastValue;8643}86448645if (elementSize > 1)8646lengthNode = TR::Node::create(TR::imul, 2,8647lengthNode,8648TR::Node::create(inStoreNode, TR::iconst, 0, elementSize));86498650lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode);86518652TR::Node * arrayset = TR::Node::create(TR::arrayset, 3, outputNode, valueNode, lengthNode);8653arrayset->setSymbolReference(comp->getSymRefTab()->findOrCreateArraySetSymbol());86548655listArraySet.add(TR::Node::create(TR::treetop, 1, arrayset));8656}86578658TR::Node * indVarUpdateNode = TR::Node::createStore(indexVarSymRef, computeIndex);8659TR::TreeTop * indVarUpdateTreeTop = TR::TreeTop::create(comp, indVarUpdateNode);86608661// Insert nodes and maintain the CFG8662TR::TreeTop *last;8663ListIterator<TR::Node> iteratorArraySet(&listArraySet);8664TR::Node *arrayset;8665last = trans->removeAllNodes(trTreeTop, block->getExit());8666last->join(block->getExit());8667block = trans->insertBeforeNodes(block);8668last = block->getLastRealTreeTop();8669for (arrayset = iteratorArraySet.getFirst(); arrayset; arrayset = iteratorArraySet.getNext())8670{8671TR::TreeTop *newTop = TR::TreeTop::create(comp, arrayset);8672last->join(newTop);8673last = newTop;8674}8675last->join(indVarUpdateTreeTop);8676indVarUpdateTreeTop->join(block->getExit());86778678trans->insertAfterNodes(block);86798680trans->setSuccessorEdge(block, target);8681return true;8682}86838684/****************************************************************************************8685Corresponding Java-like pseudocode8686int i, end, value;8687byte byteArray[ ];8688Array[ ]; // char, int, float, long, and so on8689while(true){8690Array[i] = value1;8691byteArray[i] = value2;8692iaddORisub(i, -1)8693ifcmpall(i, end) break;8694}86958696Note 1: This idiom matches both count up and down loops.8697Note 2: The wildcard node iaddORisub matches iadd or isub.8698Note 3: The wildcard node ifcmpall matches all types of if-instructions.8699****************************************************************************************/8700TR_PCISCGraph *8701makeMixedMemSetGraph(TR::Compilation *c, int32_t ctrl)8702{8703TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MixedMemSet", 0, 16);8704/************************************ opc id dagId #cfg #child other/pred/children */8705TR_PCISCNode *iv = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(),12, 0, 0, 0); tgt->addNode(iv); // array index8706TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),11, 0, 0); tgt->addNode(end); // length8707TR_PCISCNode *Array = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(),10, 0, 0, 0); // array base8708tgt->addNode(Array);8709TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 1); // array base8710tgt->addNode(byteArray);8711TR_PCISCNode *value1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variableORconst, TR::NoType, tgt->incNumNodes(), 8, 0, 0); tgt->addNode(value1); // set value8712TR_PCISCNode *value2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variableORconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0); tgt->addNode(value2); // set value8713TR_PCISCNode *mulConst = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0);8714tgt->addNode(mulConst); // Multiplicative factor for index into non-byte arrays8715TR_PCISCNode *idx0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),5, 0, 0, 0); tgt->addNode(idx0);8716TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0, 0); tgt->addNode(aHeader); // array header8717TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(increment);8718TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size8719TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(),2, 1, 0); tgt->addNode(entry);8720TR_PCISCNode *Addr = createIdiomArrayAddressInLoop(tgt, ctrl, 1, entry, Array, idx0, aHeader, mulConst);8721TR_PCISCNode *i2x = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, Addr, value1); tgt->addNode(i2x);8722TR_PCISCNode *Store = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_inbstore, TR::NoType, tgt->incNumNodes(), 1, 1, 2, i2x, Addr, i2x);8723tgt->addNode(Store);8724TR_PCISCNode *byteAddr = createIdiomArrayAddressInLoop(tgt, ctrl, 1, Store, byteArray, idx0, aHeader, c1);8725TR_PCISCNode *i2b = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, byteAddr, value2); tgt->addNode(i2b);8726TR_PCISCNode *byteStore =new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, i2b, byteAddr, i2b);8727tgt->addNode(byteStore);8728TR_PCISCNode *ivStore = createIdiomIOP2VarInLoop(tgt, ctrl, 1, byteStore, TR_iaddORisub, iv, increment);8729TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, ivStore, iv, end);8730tgt->addNode(loopTest);8731TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);87328733loopTest->setSuccs(entry->getSucc(0), exit);8734loopTest->setIsChildDirectlyConnected();87358736i2x->setIsOptionalNode();8737i2x->setIsChildDirectlyConnected();8738i2b->setIsOptionalNode();8739i2b->setIsChildDirectlyConnected();87408741tgt->setEntryNode(entry);8742tgt->setExitNode(exit);8743tgt->setImportantNodes(Store, byteStore, ivStore->getChild(0), loopTest, mulConst);8744tgt->setNumDagIds(13);8745tgt->createInternalData(1);87468747tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);8748tgt->setTransformer(CISCTransform2MixedArraySet);8749tgt->setAspects(mul, 0, existAccess);8750tgt->setNoAspects(call|bndchk|bitop1, ILTypeProp::Size_2, 0);8751tgt->setMinCounts(1, 0, 2); // minimum ifCount, indirectLoadCount, indirectStoreCount8752tgt->setHotness(warm, false);8753tgt->setInhibitBeforeVersioning();8754return tgt;8755}87568757875887598760//////////////////////////////////////////////////////////////////////////8761//////////////////////////////////////////////////////////////////////////8762//*****************************************************************************************8763// IL code generation for 2 if-statement version of comparing memory (using CLCL)8764// Input: ImportantNode(0) - array load for src18765// ImportantNode(1) - array load for src28766// ImportantNode(2) - exit-if for checking the length8767// ImportantNode(3) - exit-if for comparing two arrays8768// ImportantNode(4) - increment the array index for src18769// ImportantNode(5) - increment the array index for src28770// ImportantNode(6) - the size of elements (NULL for byte arrays)8771//8772// Note: If we need to know the position where characters are different (flag generateArraycmplen),8773// we generate the CLCL instruction. Otherwise, we generate the CLC instruction.8774//*****************************************************************************************8775bool8776CISCTransform2ArrayCmp2Ifs(TR_CISCTransformer *trans)8777{8778TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");8779TR::Node *trNode;8780TR::TreeTop *trTreeTop;8781TR::Block *block;8782TR_CISCGraph *P = trans->getP();8783List<TR_CISCNode> *P2T = trans->getP2T();8784TR::Compilation *comp = trans->comp();8785bool ctrl = trans->isGenerateI2L();87868787TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");87888789trans->findFirstNode(&trTreeTop, &trNode, &block);8790if (!block) return false; // cannot find87918792if (isLoopPreheaderLastBlockInMethod(comp, block))8793{8794traceMsg(comp, "Bailing CISCTransform2ArrayCmp2Ifs due to null TT - might be a preheader in last block of method\n");8795return false;8796}87978798TR_CISCNode *cmpIfAllCISCNode = trans->getP2TRepInLoop(P->getImportantNode(2));8799TR::TreeTop *okDest = NULL; // Target treetop of the array length check.8800TR_CISCNode *cmpneIfAllCISCNode[2]; // CISCNodes for the arraycmp checks.8801TR::TreeTop *topCmpIfNonEqual[2]; // Treetops of the arraycmp checks.8802TR::Node *cmpIfNonEqual[2]; // Nodes of the arraycmp checks.8803TR::TreeTop *failDest[2]; // Target treetops of the arraycmp checks.88048805int32_t count = 0;8806// Extract all the CISCNodes corresponding to the two exit if-stmts8807// for comparing the two arrays.8808ListIterator <TR_CISCNode> ci;8809ci.set(trans->getP2T() + P->getImportantNode(3)->getID());8810for (TR_CISCNode *c = ci.getFirst(); c; c = ci.getNext())8811{8812if (!c->isOutsideOfLoop())8813{8814// Checks exit-if for comparing two arrays8815switch(c->getOpcode())8816{8817case TR::ificmpgt:8818case TR::ificmplt:8819case TR::iflcmpgt:8820case TR::iflcmplt:8821if (count >= 2) return false;8822cmpneIfAllCISCNode[count] = c;8823topCmpIfNonEqual[count] = c->getHeadOfTrNodeInfo()->_treeTop;8824cmpIfNonEqual[count] = c->getHeadOfTrNodeInfo()->_node;8825failDest[count] = c->getDestination();8826count++;8827break;8828default:8829return false;8830}8831}8832}8833if (count != 2) return false;88348835// Checks exit-if for checking the length8836switch(cmpIfAllCISCNode->getOpcode())8837{8838case TR::ificmpge:8839break;8840default:8841return false;8842}8843okDest = cmpIfAllCISCNode->getDestination();88448845//8846// obtain a CISCNode of each store for incrementing induction variables8847TR_CISCNode *storeSrc1 = trans->getP2TRepInLoop(P->getImportantNode(4));8848TR_CISCNode *storeSrc2 = trans->getP2TRepInLoop(P->getImportantNode(5));8849if (!storeSrc2) storeSrc2 = storeSrc1;8850TR_ASSERT(storeSrc1 != NULL && storeSrc2 != NULL, "error");88518852//8853// checking a set of all uses for each index8854TR_ASSERT(storeSrc1->getDagID() == storeSrc2->getDagID(), "error");88558856TR_CISCNode *src1CISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(0));8857TR_CISCNode *src2CISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(1));8858if (!src1CISCNode || !src2CISCNode) return false;8859TR::Node * inSrc1Node = src1CISCNode->getHeadOfTrNodeInfo()->_node;8860TR::Node * inSrc2Node = src2CISCNode->getHeadOfTrNodeInfo()->_node;8861// check the indices used in the array loads and8862// the store nodes8863//8864if (!indicesAndStoresAreConsistent(comp, inSrc1Node, inSrc2Node, storeSrc1, storeSrc2))8865{8866dumpOptDetails(comp, "indices used in array loads %p and %p are not consistent with the induction varaible updates\n", inSrc1Node, inSrc2Node);8867return false;8868}8869TR::Node * mulFactorNode;8870int32_t elementSize;88718872// Get the size of elements8873if (!getMultiplier(trans, P->getImportantNode(6), &mulFactorNode, &elementSize, inSrc1Node->getType())) return false;8874if (elementSize != inSrc1Node->getSize() || elementSize != inSrc2Node->getSize())8875{8876traceMsg(comp, "CISCTransform2ArrayCmp2Ifs failed - Size Mismatch. Element Size: %d InSrc1Size: %d inSrc2Size: %d\n", elementSize, inSrc1Node->getSize(), inSrc2Node->getSize());8877return false; // Size is mismatch!8878}88798880TR::Node *src1IdxRepNode, *src2IdxRepNode, *src1BaseRepNode, *src2BaseRepNode, *variableORconstRepNode;8881getP2TTrRepNodes(trans, &src1IdxRepNode, &src2IdxRepNode, &src1BaseRepNode, &src2BaseRepNode, &variableORconstRepNode);8882if (!src2IdxRepNode) src2IdxRepNode = src1IdxRepNode;8883TR::SymbolReference * src1IdxSymRef = src1IdxRepNode->getSymbolReference();8884if (!trans->analyzeArrayIndex(src1IdxSymRef)) return false;8885TR::SymbolReference * src2IdxSymRef = src2IdxRepNode->getSymbolReference();8886TR::Node *start1Idx, *start2Idx, *end1Idx, *end2Idx, *diff2;8887TR_CISCNode *arrayindex0, *arrayindex1;8888arrayindex0 = trans->getP()->getCISCNode(TR_arrayindex, true, 0);8889bool indexOf = trans->isIndexOf();8890if (indexOf && arrayindex0)8891{8892// more analysis for String.indexOf(Ljava/lang/String;I)I8893TR_CISCNode *a0;8894ListIterator<TR_CISCNode> pi(arrayindex0->getParents());8895for (a0 = pi.getFirst(); a0; a0 = pi.getNext())8896{8897if (a0->getOpcode() == TR::isub)8898{8899if (trans->getP2TRepInLoop(a0)) arrayindex0 = a0;8900break;8901}8902}8903}8904arrayindex1 = trans->getP()->getCISCNode(TR_arrayindex, true, 1);89058906bool useSrc1 = usedInLoopTest(comp, cmpIfAllCISCNode->getHeadOfTrNodeInfo()->_node, src1IdxSymRef);8907end2Idx = convertStoreToLoad(comp, variableORconstRepNode);8908start2Idx = convertStoreToLoad(comp, useSrc1 ? src1IdxRepNode : src2IdxRepNode);8909diff2 = createOP2(comp, TR::isub, end2Idx, start2Idx);8910start1Idx = convertStoreToLoad(comp, src1IdxRepNode);8911end1Idx = NULL;89128913if (arrayindex0) start1Idx = trans->getP2TRep(arrayindex0)->getHeadOfTrNodeInfo()->_node;8914if (arrayindex1) start2Idx = trans->getP2TRep(arrayindex1)->getHeadOfTrNodeInfo()->_node;89158916// Prepare effective addresses for arraycmplen8917TR::Node * input1Node = inSrc1Node->getChild(0)->duplicateTree();8918TR::Node * input2Node = inSrc2Node->getChild(0)->duplicateTree();8919TR::Node * lengthNode;8920lengthNode = diff2;89218922// an extra compare is going to be generated after the arrayCmp8923// to determine where to branch. if the arrayCmp found a mismatch8924// between the two array elements, the induction variable will be8925// updated correctly and the extra compare will test the element8926// at the index where the mismatch occurred.8927// however, if the two arrays are the same, the arrayCmp will terminate8928// after searching lengthNode bytes causing the extra compare to test8929// the index at lengthNode+1 which is incorrect.8930//8931lengthNode = TR::Node::create(TR::isub, 2, lengthNode, TR::Node::create(mulFactorNode, TR::iconst, 0, 1));89328933int shrCount = 0;8934TR::Node * elementSizeNode = NULL;8935if (elementSize > 1)8936{8937//FIXME: enable this code for 64-bit8938// currently disabled until all uses of lengthNode are8939// sign-extended correctly8940//8941TR::ILOpCodes mulOp = TR::imul;8942#if 08943if (comp->target().is64Bit())8944{8945elementSizeNode = TR::Node::create(mulFactorNode, TR::lconst);8946elementSizeNode->setLongInt(elementSize);8947lengthNode = TR::Node::create(TR::i2l, 1, lengthNode);8948mulOp = TR::lmul;8949}8950else8951#endif8952elementSizeNode = TR::Node::create(mulFactorNode, TR::iconst, 0, elementSize);8953lengthNode = TR::Node::create(mulOp, 2,8954lengthNode,8955elementSizeNode);8956switch(elementSize)8957{8958case 2: shrCount = 1; break;8959case 4: shrCount = 2; break;8960case 8: shrCount = 3; break;8961default: TR_ASSERT(false, "error");8962}8963}89648965// Currently, it is inserted by reorderTargetNodesInBB()8966bool isCompensateCode = !trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1);8967TR::Block *compensateBlock0[2];8968compensateBlock0[0] = compensateBlock0[1] = NULL;8969TR::Block *compensateBlock1 = NULL;89708971// create two empty blocks for inserting compensation code prepared by reorderTargetNodesInBB()8972if (isCompensateCode)8973{8974compensateBlock0[0] = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);8975compensateBlock0[1] = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);8976compensateBlock1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);8977compensateBlock0[0]->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest[0])));8978compensateBlock0[1]->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest[1])));8979compensateBlock1->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, okDest)));8980failDest[0] = compensateBlock0[0]->getEntry();8981failDest[1] = compensateBlock0[1]->getEntry();8982okDest = compensateBlock1->getEntry();8983}8984TR_ASSERT(okDest != NULL && failDest[0] != NULL && failDest[1] != NULL, "error!");89858986TR::Node * topArraycmp;8987TR::TreeTop * newFirstTreeTop[2];8988TR::TreeTop * newLastTreeTop[2];89898990// Using the CLCL instruction8991lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode);8992TR::Node * arraycmplen = TR::Node::create(TR::arraycmp, 3, input1Node, input2Node, lengthNode);8993arraycmplen->setArrayCmpLen(true);8994arraycmplen->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCmpSymbol());89958996TR::SymbolReference * resultSymRef = comp->getSymRefTab()->8997createTemporary(comp->getMethodSymbol(), TR::Int32);8998topArraycmp = TR::Node::createStore(resultSymRef, arraycmplen);89999000TR::Node * resultLoad = TR::Node::createLoad(topArraycmp, resultSymRef);9001TR::Node * equalLen = resultLoad;9002if (shrCount != 0)9003{9004equalLen = TR::Node::create(TR::ishr, 2,9005equalLen,9006TR::Node::create(equalLen, TR::iconst, 0, shrCount));9007}90089009TR::Node *tmpNode = createStoreOP2(comp, src1IdxSymRef, TR::iadd, src1IdxSymRef, equalLen, trNode);9010newFirstTreeTop[0] = TR::TreeTop::create(comp, tmpNode);9011newLastTreeTop[0] = newFirstTreeTop[0];9012TR::TreeTop * tmpTreeTop = NULL;90139014if (src1IdxSymRef != src2IdxSymRef)9015{9016tmpNode = createStoreOP2(comp, src2IdxSymRef, TR::iadd, src2IdxSymRef, equalLen, trNode);9017tmpTreeTop = TR::TreeTop::create(comp, tmpNode);9018newLastTreeTop[0]->join(tmpTreeTop);9019newLastTreeTop[0] = tmpTreeTop;9020}90219022//9023// Generate 2 if-statements90249025// First One9026TR_CISCNode *ifChild[2];9027ifChild[0] = trans->getP2TInLoopAllowOptionalIfSingle(P->getImportantNode(3)->getChild(0));9028ifChild[1] = trans->getP2TInLoopAllowOptionalIfSingle(P->getImportantNode(3)->getChild(1));9029TR::DataType dataType = cmpIfNonEqual[0]->getChild(0)->getDataType();9030TR_ASSERT(dataType == TR::Int32 || dataType == TR::Int64, "error!");9031TR::SymbolReference * diffSymRef = comp->getSymRefTab()->9032createTemporary(comp->getMethodSymbol(), dataType);9033tmpNode = TR::Node::createStore(diffSymRef,9034TR::Node::create(dataType == TR::Int32 ? TR::isub : TR::lsub, 2,9035ifChild[0]->getHeadOfTrNodeInfo()->_node,9036ifChild[1]->getHeadOfTrNodeInfo()->_node));9037tmpTreeTop = TR::TreeTop::create(comp, tmpNode);9038newLastTreeTop[0]->join(tmpTreeTop);9039newLastTreeTop[0] = tmpTreeTop;9040TR::Node * loadNode = convertStoreToLoad(comp, tmpNode);9041TR::Node * constNode;9042if (dataType == TR::Int32)9043{9044constNode = TR::Node::create(loadNode, TR::iconst, 0, 0);9045}9046else9047{9048constNode = TR::Node::create(loadNode, TR::lconst, 0, 0);9049constNode->setLongInt(0);9050}90519052tmpNode = TR::Node::createif((TR::ILOpCodes)cmpneIfAllCISCNode[0]->getOpcode(),9053loadNode,9054constNode,9055failDest[0]);9056tmpTreeTop = TR::TreeTop::create(comp, tmpNode);9057newLastTreeTop[0]->join(tmpTreeTop);9058newLastTreeTop[0] = tmpTreeTop;90599060// Second One9061tmpNode = TR::Node::createif((TR::ILOpCodes)cmpneIfAllCISCNode[1]->getOpcode(),9062loadNode->duplicateTree(),9063constNode->duplicateTree(),9064failDest[1]);9065tmpTreeTop = TR::TreeTop::create(comp, tmpNode);9066newFirstTreeTop[1] = tmpTreeTop;9067newLastTreeTop[1] = newFirstTreeTop[1];90689069// Transform CFG9070TR::CFG *cfg = comp->getFlowGraph();9071TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();9072cfg->setStructure(NULL);9073TR::TreeTop *last;90749075last = trans->removeAllNodes(trTreeTop, block->getExit());9076last->join(block->getExit());9077block = trans->insertBeforeNodes(block);9078last = block->getLastRealTreeTop();9079last->join(trTreeTop);9080trTreeTop->setNode(topArraycmp);9081trTreeTop->join(newFirstTreeTop[0]);9082newLastTreeTop[0]->join(block->getExit());90839084block = trans->insertAfterNodes(block);90859086TR::Block *if1Block = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);9087if1Block->getEntry()->join(newFirstTreeTop[1]);9088newLastTreeTop[1]->join(if1Block->getExit());9089if (orgNextTreeTop != NULL) {9090cfg->insertBefore(if1Block, orgNextTreeTop->getNode()->getBlock());9091} else {9092// Block returned by findFirstNode is the last BB of the method.9093cfg->addNode(if1Block);9094}9095cfg->join(block, if1Block);90969097trans->setSuccessorEdges(if1Block,9098okDest->getEnclosingBlock(),9099failDest[1]->getEnclosingBlock());91009101trans->setSuccessorEdges(block,9102if1Block,9103failDest[0]->getEnclosingBlock());91049105block = if1Block;9106if (isCompensateCode)9107{9108TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();9109TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();9110compensateBlock0[0] = trans->insertAfterNodesIdiom(compensateBlock0[0], 0, true);9111compensateBlock0[1] = trans->insertAfterNodesIdiom(compensateBlock0[1], 0, true);9112compensateBlock1 = trans->insertAfterNodesIdiom(compensateBlock1, 1, true);9113cfg->insertBefore(compensateBlock1, orgNextBlock);9114cfg->insertBefore(compensateBlock0[1], compensateBlock1);9115cfg->insertBefore(compensateBlock0[0], compensateBlock0[1]);9116cfg->join(block, compensateBlock0[0]);9117}91189119return true;9120}9121912291239124//*****************************************************************************************9125// IL code generation for comparing memory (using CLC or CLCL)9126// Input: ImportantNode(0) - array load for src19127// ImportantNode(1) - array load for src29128// ImportantNode(2) - exit-if for checking the length9129// ImportantNode(3) - exit-if for comparing two arrays9130// ImportantNode(4) - increment the array index for src19131// ImportantNode(5) - increment the array index for src29132// ImportantNode(6) - the size of elements (NULL for byte arrays)9133// ImportantNode(7) - additional node for analyzing MEMCMPCompareTo. Not used for the others.9134//9135// Note: If we need to know the position where characters are different (flag generateArraycmplen),9136// we generate the CLCL instruction. Otherwise, we generate the CLC instruction.9137//*****************************************************************************************9138bool9139CISCTransform2ArrayCmp(TR_CISCTransformer *trans)9140{91419142TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");9143const bool disptrace = DISPTRACE(trans);9144TR::Node *trNode;9145TR::TreeTop *trTreeTop;9146TR::Block *block;9147TR_CISCGraph *P = trans->getP();9148List<TR_CISCNode> *P2T = trans->getP2T();9149TR::Compilation *comp = trans->comp();9150bool ctrl = trans->isGenerateI2L();91519152trans->findFirstNode(&trTreeTop, &trNode, &block);9153if (!block) return false; // cannot find91549155TR::Block *preHeader = NULL;9156if (isLoopPreheaderLastBlockInMethod(comp, block, &preHeader))9157{9158traceMsg(comp, "Bailing CISCTransform2ArrayCmp due to null TT - might be a preheader in last block of method\n");9159return false;9160}91619162TR_CISCNode *storeSrc1 = trans->getP2TRepInLoop(P->getImportantNode(4));9163TR_CISCNode *storeSrc2 = trans->getP2TRepInLoop(P->getImportantNode(5));9164if (!storeSrc2) storeSrc2 = storeSrc1;9165TR_ASSERT(storeSrc1 != NULL && storeSrc2 != NULL, "error");916691679168if (preHeader)9169{9170if (disptrace)9171traceMsg(comp, "found preheader to be %d\n", preHeader->getNumber());9172//9173// obtain a CISCNode of each store for incrementing induction variables91749175//check if any of the loop indices are defined between the preheader first tree and the first node found to match idiom9176TR::Node * inStoreSrc1= storeSrc1->getHeadOfTrNodeInfo()->_node;9177TR::Node * inStoreSrc2= storeSrc2->getHeadOfTrNodeInfo()->_node;91789179int32_t index1SymRefNum = inStoreSrc1->getSymbolReference()->getReferenceNumber();9180int32_t index2SymRefNum = inStoreSrc2->getSymbolReference()->getReferenceNumber();91819182if (disptrace)9183traceMsg(comp, "searching for stores to loop indices between preheader first tree %p and first matching tree %p, looking for symrefnum %d %d\n", preHeader->getFirstRealTreeTop()->getNode(),trTreeTop->getNode(),index1SymRefNum,index2SymRefNum);918491859186TR::Node * tempNode;9187for (TR::TreeTop * tt = preHeader->getFirstRealTreeTop();tt && tt != trTreeTop; tt = tt->getNextRealTreeTop())9188{9189tempNode = tt->getNode();9190if (tempNode->getOpCode().isStore() && tempNode->getOpCode().hasSymbolReference() &&9191((tempNode->getSymbolReference()->getReferenceNumber() == index1SymRefNum) ||9192(tempNode->getSymbolReference()->getReferenceNumber() == index2SymRefNum)))9193{9194traceMsg(comp, "Bailing CISCTransform2ArrayCmp due to unexpected store (%p) of one of the indices prior to the idiom\n",tempNode);9195return false;9196}9197}9198}91999200TR_CISCNode *cmpIfAllCISCNode = trans->getP2TRepInLoop(P->getImportantNode(2));9201TR_CISCNode *cmpneIfAllCISCNode = trans->getP2TRepInLoop(P->getImportantNode(3));9202ListIterator <TR_CISCNode> ci;9203TR_CISCNode *c;9204bool isDecrement = false;9205bool needVersioned = false;9206bool addLength1 = true;9207TR::TreeTop *failDest = NULL, *okDest = NULL;9208bool generateArraycmplen;9209bool generateArraycmpsign;9210bool compareTo;9211bool indexOf = trans->isIndexOf();92129213// The transformation can support two if-stmts: array comparison exit and one loop ending condition.9214TR_CISCGraph *T = trans->getT();9215if (T && T->getAspects()->getIfCount() > 2)9216{9217traceMsg(comp,"CISCTransform2ArrayCmp detected %d if-stmts in loop (> 2). Not transforming.\n", T->getAspects()->getIfCount());9218return false;9219}92209221// Checks exit-if for comparing two arrays9222switch(cmpneIfAllCISCNode->getOpcode())9223{9224case TR::ificmpne:9225case TR::ifbcmpne:9226case TR::ifscmpne:9227case TR::iflcmpne:9228case TR::ifacmpne:9229case TR::iffcmpne:9230case TR::ifdcmpne:9231case TR::iffcmpneu:9232case TR::ifdcmpneu:9233break;9234case TR::ificmpgt:9235case TR::ificmplt:9236case TR::iflcmpgt:9237case TR::iflcmplt:9238return CISCTransform2ArrayCmp2Ifs(trans); // Use 2 if-statements version9239default:9240return false;9241}92429243failDest = cmpneIfAllCISCNode->getDestination();92449245// We will fail this pattern if the comparison 'exit' is in fact not an exit out of the loop9246if (trans->isBlockInLoopBody(failDest->getNode()->getBlock()))9247{9248if (disptrace)9249traceMsg(comp, "CISCTransform2ArrayCmp failing transformer, ifcmpall test branch does not exit the loop.\n");9250return false;9251}92529253// Checks exit-if for checking the length9254switch(cmpIfAllCISCNode->getOpcode())9255{9256case TR::ificmplt:9257if (cmpIfAllCISCNode->isEmptyHint()) return false;9258c = cmpIfAllCISCNode->getHintChildren()->getListHead()->getData();9259if (c->getOpcode() != TR::iadd) return false;9260isDecrement = true;9261needVersioned = true;9262addLength1 = true;9263break;9264case TR::ificmple:{9265TR_CISCNode *child = cmpIfAllCISCNode->getChild(0);9266ci.set(child->getParents());9267for (c = ci.getFirst(); c; c = ci.getNext())9268if (c->getOpcode() == TR::iadd) break;9269if (!c) return false;9270isDecrement = true;9271needVersioned = true;9272addLength1 = true;9273break;}9274case TR::ificmpgt:9275isDecrement = false;9276needVersioned = false;9277addLength1 = true;9278break;9279case TR::ificmpge:9280isDecrement = false;9281needVersioned = false;9282addLength1 = false;9283break;9284default:9285return false;9286}92879288okDest = cmpIfAllCISCNode->getDestination();92899290//9291// checking a set of all uses for each index9292TR_ASSERT(storeSrc1->getDagID() == storeSrc2->getDagID(), "error");9293generateArraycmplen = false;9294generateArraycmpsign = false;9295if (storeSrc1 == storeSrc2)9296{9297if (!storeSrc1->checkDagIdInChains())9298{9299// there is an use outside of the loop.9300if (isDecrement)9301return false;9302else9303generateArraycmplen = true;9304}9305}9306else9307{9308if (!storeSrc1->checkDagIdInChains() || !storeSrc2->checkDagIdInChains())9309{9310// there is an use outside of the loop.9311if (isDecrement)9312return false;9313else9314generateArraycmplen = true;9315}9316}9317List<TR::TreeTop> compareIfs(comp->trMemory());9318if (true == (compareTo = trans->isCompareTo()))9319{9320if (!generateArraycmplen)9321{9322bool canConvertToArrayCmp = false;9323if (trans->canConvertArrayCmpSign(trans->getP2TRep(P->getImportantNode(7))->getHeadOfTrNode(),9324&compareIfs, &canConvertToArrayCmp))9325{9326if (!canConvertToArrayCmp)9327generateArraycmpsign = true;9328}9329else9330{9331generateArraycmplen = true;9332}9333}9334}93359336TR_CISCNode *src1CISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(0));9337TR_CISCNode *src2CISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(1));9338if (!src1CISCNode || !src2CISCNode || src1CISCNode == src2CISCNode) return false;9339TR::Node * inSrc1Node = src1CISCNode->getHeadOfTrNodeInfo()->_node;9340TR::Node * inSrc2Node = src2CISCNode->getHeadOfTrNodeInfo()->_node;93419342if (generateArraycmpsign)9343{9344if (!comp->cg()->getSupportsArrayCmpSign() ||9345!((inSrc1Node->getType().isIntegral() && src1CISCNode->getIlOpCode().isUnsigned()) || inSrc1Node->getType().isAddress()))9346{9347// arrayCmpLen can be reduced to arrayCmpSign, but either codegen does not support it9348// or we can't guarantee it works with byte-by-byte comparisons (only allow addresses and unsigned integrals)9349generateArraycmpsign = false;9350generateArraycmplen = true;9351}9352}93539354// check the indices used in the array loads and9355// the store nodes9356//9357if (!indicesAndStoresAreConsistent(comp, inSrc1Node, inSrc2Node, storeSrc1, storeSrc2))9358{9359dumpOptDetails(comp, "indices used in array loads %p and %p are not consistent with the induction varaible updates\n", inSrc1Node, inSrc2Node);9360return false;9361}93629363if (!areArraysInvariant(comp, inSrc1Node, inSrc2Node, T))9364{9365traceMsg(comp, "input array bases %p and %p are not invariant, no reduction\n", inSrc1Node, inSrc2Node);9366return false;9367}93689369TR::Node * mulFactorNode;9370int elementSize;93719372// Get the size of elements9373if (!getMultiplier(trans, P->getImportantNode(6), &mulFactorNode, &elementSize, inSrc1Node->getType())) return false;93749375if (inSrc1Node->getType() != inSrc2Node->getType())9376{9377traceMsg(comp,9378"CISCTransform2ArrayCmp failed - Array access types differ. inSrc1: %s, inSrc2: %s\n",9379TR::DataType::getName(inSrc1Node->getType()),9380TR::DataType::getName(inSrc2Node->getType()));9381return false; // Size is mismatch!9382}93839384const uint32_t expectedSize = inSrc1Node->getType().isAddress()9385? TR::Compiler->om.sizeofReferenceField()9386: inSrc1Node->getSize();93879388if (elementSize != expectedSize)9389{9390traceMsg(comp,9391"CISCTransform2ArrayCmp failed - Size Mismatch. Element Size: %d, Expected Size: %d\n",9392elementSize,9393expectedSize);9394return false; // Size is mismatch!9395}93969397TR::Node *src1IdxRepNode, *src2IdxRepNode, *src1BaseRepNode, *src2BaseRepNode, *variableORconstRepNode;9398getP2TTrRepNodes(trans, &src1IdxRepNode, &src2IdxRepNode, &src1BaseRepNode, &src2BaseRepNode, &variableORconstRepNode);9399if (!src2IdxRepNode) src2IdxRepNode = src1IdxRepNode;9400TR::SymbolReference * src1IdxSymRef = src1IdxRepNode->getSymbolReference();9401if (!trans->analyzeArrayIndex(src1IdxSymRef)) return false;9402TR::SymbolReference * src2IdxSymRef = src2IdxRepNode->getSymbolReference();9403TR::Node *start1Idx, *start2Idx, *end1Idx, *end2Idx, *diff2;9404TR_CISCNode *arrayindex0, *arrayindex1;9405arrayindex0 = trans->getP()->getCISCNode(TR_arrayindex, true, 0);9406if (indexOf && arrayindex0)9407{9408// more analysis for String.indexOf(Ljava/lang/String;I)I9409TR_CISCNode *a0;9410ListIterator<TR_CISCNode> pi(arrayindex0->getParents());9411for (a0 = pi.getFirst(); a0; a0 = pi.getNext())9412{9413if (a0->getOpcode() == TR::isub)9414{9415if (trans->getP2TRepInLoop(a0)) arrayindex0 = a0;9416break;9417}9418}9419}9420arrayindex1 = trans->getP()->getCISCNode(TR_arrayindex, true, 1);94219422bool useSrc1 = usedInLoopTest(comp, cmpIfAllCISCNode->getHeadOfTrNodeInfo()->_node, src1IdxSymRef);94239424TR::Node * input1Node;9425TR::Node * input2Node;9426TR::Node *startNode = NULL;9427TR::Node *endNode = NULL;9428if (isDecrement) // count-down loop9429{9430start2Idx = convertStoreToLoad(comp, variableORconstRepNode);9431end2Idx = convertStoreToLoad(comp, useSrc1 ? src1IdxRepNode : src2IdxRepNode);9432diff2 = createOP2(comp, TR::isub, end2Idx, start2Idx);9433end1Idx = convertStoreToLoad(comp, src1IdxRepNode);9434start1Idx = createOP2(comp, TR::isub, end1Idx, diff2);94359436if (disptrace)9437traceMsg(comp, "isDecrement start1Idx %p start2Idx %p end1Idx %p end2Idx %p\n", start1Idx, start2Idx, end1Idx, end2Idx);9438startNode = start2Idx->duplicateTree();9439endNode = useSrc1 ? end1Idx->duplicateTree() : end2Idx->duplicateTree();94409441if (arrayindex0) end1Idx = trans->getP2TRep(arrayindex0)->getHeadOfTrNodeInfo()->_node;9442if (arrayindex1) end2Idx = trans->getP2TRep(arrayindex1)->getHeadOfTrNodeInfo()->_node;9443input1Node = createArrayAddressTree(comp, ctrl, src1BaseRepNode, start1Idx, elementSize);9444input2Node = createArrayAddressTree(comp, ctrl, src2BaseRepNode, start2Idx, elementSize);9445}9446else9447{ // count-up loop9448end2Idx = convertStoreToLoad(comp, variableORconstRepNode);9449start2Idx = convertStoreToLoad(comp, useSrc1 ? src1IdxRepNode : src2IdxRepNode);9450diff2 = createOP2(comp, TR::isub, end2Idx, start2Idx);9451start1Idx = convertStoreToLoad(comp, src1IdxRepNode);9452end1Idx = needVersioned ? createOP2(comp, TR::iadd, start1Idx, diff2) : NULL;94539454if (disptrace)9455traceMsg(comp, "start1Idx %p start2Idx %p end1Idx %p end2Idx %p\n", start1Idx, start2Idx, end1Idx, end2Idx);9456startNode = useSrc1 ? start1Idx->duplicateTree() : start2Idx->duplicateTree();9457endNode = end2Idx->duplicateTree();94589459if (arrayindex0) start1Idx = trans->getP2TRep(arrayindex0)->getHeadOfTrNodeInfo()->_node;9460if (arrayindex1) start2Idx = trans->getP2TRep(arrayindex1)->getHeadOfTrNodeInfo()->_node;9461input1Node = inSrc1Node->getChild(0)->duplicateTree();9462input2Node = inSrc2Node->getChild(0)->duplicateTree();9463}94649465// Prepare effective addresses for arraycmp(len)9466TR::Node * lengthNode;9467if (addLength1)9468{9469lengthNode = createOP2(comp, TR::isub,9470diff2,9471TR::Node::create(src1BaseRepNode, TR::iconst, 0, -1));9472}9473else9474{9475lengthNode = diff2;9476}94779478int shrCount = 0;9479TR::Node * elementSizeNode = NULL;9480if (elementSize > 1)9481{9482//FIXME: enable this code for 64-bit9483// currently disabled until all uses of lengthNode are9484// sign-extended correctly9485//9486TR::ILOpCodes mulOp = TR::imul;9487#if 09488if (comp->target().is64Bit())9489{9490elementSizeNode = TR::Node::create(mulFactorNode, TR::lconst);9491elementSizeNode->setLongInt(elementSize);9492mulOp = TR::lmul;9493lengthNode = TR::Node::create(TR::i2l, 1, lengthNode);9494}9495else9496#endif9497elementSizeNode = TR::Node::create(mulFactorNode, TR::iconst, 0, elementSize);9498lengthNode = TR::Node::create(mulOp, 2,9499lengthNode,9500elementSizeNode);9501switch(elementSize)9502{9503case 2: shrCount = 1; break;9504case 4: shrCount = 2; break;9505case 8: shrCount = 3; break;9506default: TR_ASSERT(false, "error");9507}9508}95099510TR_ASSERT(!generateArraycmplen || !generateArraycmpsign, "error");95119512// Prepare compensation code9513if (compareTo)9514{9515if (generateArraycmplen)9516{9517TR::Node *tmpNode;9518TR_CISCNode *storeResult = P->getImportantNode(7);9519tmpNode = trans->getP2TRep(storeResult)->getHeadOfTrNodeInfo()->_node->duplicateTree();9520trans->getAfterInsertionIdiomList(0)->add(tmpNode);95219522tmpNode = TR::Node::createStore(tmpNode->getSymbolReference(),9523TR::Node::create(tmpNode, TR::iconst, 0, 0));9524trans->getAfterInsertionIdiomList(1)->add(tmpNode);9525}9526}95279528bool isCompensateCode = !trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1);9529TR::Block *compensateBlock0 = NULL;9530TR::Block *compensateBlock1 = NULL;95319532// create two empty blocks for inserting compensation code prepared by reorderTargetNodesInBB()9533if (isCompensateCode)9534{9535compensateBlock0 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);9536compensateBlock1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);9537compensateBlock0->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest)));9538compensateBlock1->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, okDest)));9539failDest = compensateBlock0->getEntry();9540okDest = compensateBlock1->getEntry();9541}9542TR_ASSERT(okDest != NULL && failDest != NULL, "error!");95439544TR::Node * topArraycmp;9545TR::TreeTop * newFirstTreeTop;9546TR::TreeTop * newLastTreeTop;95479548TR::Node *storeCompareToResult = NULL;95499550if (generateArraycmplen)9551{9552// Using the CLCL instruction95539554TR::Node * arraycmplen = TR::Node::create(TR::arraycmp, 3, input1Node, input2Node, createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode));9555arraycmplen->setArrayCmpLen(true);9556arraycmplen->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCmpSymbol());95579558TR::SymbolReference * resultSymRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR::Int32);9559topArraycmp = TR::Node::createStore(resultSymRef, arraycmplen);95609561TR::Node * resultLoad = TR::Node::createLoad(topArraycmp, resultSymRef);9562TR::Node * equalLen = resultLoad;9563if (shrCount != 0)9564{9565equalLen = TR::Node::create(TR::ishr, 2,9566equalLen,9567TR::Node::create(equalLen, TR::iconst, 0, shrCount));9568}95699570TR::Node *tmpNode = createStoreOP2(comp, src1IdxSymRef, TR::iadd, src1IdxSymRef, equalLen, trNode);9571newFirstTreeTop = TR::TreeTop::create(comp, tmpNode);9572newLastTreeTop = newFirstTreeTop;9573TR::TreeTop * tmpTreeTop = NULL;95749575if (src1IdxSymRef != src2IdxSymRef)9576{9577tmpNode = createStoreOP2(comp, src2IdxSymRef, TR::iadd, src2IdxSymRef, equalLen, trNode);9578tmpTreeTop = TR::TreeTop::create(comp, tmpNode);9579newLastTreeTop->join(tmpTreeTop);9580newLastTreeTop = tmpTreeTop;9581}95829583tmpNode = TR::Node::createif(TR::ificmpeq,9584lengthNode,9585resultLoad,9586okDest);9587tmpTreeTop = TR::TreeTop::create(comp, tmpNode);9588newLastTreeTop->join(tmpTreeTop);9589newLastTreeTop = tmpTreeTop;9590}9591else9592{9593// Using the CLC instruction9594TR::Node * arraycmp = TR::Node::create(TR::arraycmp, 3, input1Node, input2Node, createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode));9595arraycmp->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCmpSymbol());95969597TR::Node * cmpIfNode;9598if (compareTo)9599{9600storeCompareToResult = trans->getP2TRep(P->getImportantNode(7))->getHeadOfTrNode();9601if (generateArraycmpsign)9602{9603TR_ASSERT(comp->cg()->getSupportsArrayCmpSign(), "error");9604arraycmp->setArrayCmpSign(true);96059606topArraycmp = TR::Node::createStore(storeCompareToResult->getSymbolReference(), arraycmp);9607cmpIfNode = TR::Node::createif(TR::ificmpeq, arraycmp,9608TR::Node::create( src1BaseRepNode, TR::iconst, 0, 0),9609okDest);9610}9611else9612{9613if (disptrace) traceMsg(comp, "ArrayCmp: Convert compareTo into equals!\n");9614topArraycmp = TR::Node::createStore(storeCompareToResult->getSymbolReference(),9615createOP2(comp, TR::isub,9616TR::Node::create(src1BaseRepNode, TR::iconst, 0, 1),9617TR::Node::create(TR::iand, 2, TR::Node::create(TR::ixor, 2, arraycmp, TR::Node::iconst(arraycmp, 1)), TR::Node::create(TR::ishr, 2, TR::Node::create(TR::ixor, 2, arraycmp, TR::Node::iconst(arraycmp, 2)), TR::Node::iconst(arraycmp, 1)))));96189619cmpIfNode = TR::Node::createif(TR::ificmpeq, arraycmp,9620TR::Node::create( src1BaseRepNode, TR::iconst, 0, 0),9621okDest);96229623}9624}9625else9626{9627topArraycmp = TR::Node::createStore(src1IdxSymRef, TR::Node::create(TR::iand, 2, TR::Node::create(TR::ixor, 2, arraycmp, TR::Node::iconst(arraycmp, 1)), TR::Node::create(TR::ishr, 2, TR::Node::create(TR::ixor, 2, arraycmp, TR::Node::iconst(arraycmp, 2)), TR::Node::iconst(arraycmp, 1))));9628cmpIfNode = TR::Node::createif(TR::ificmpeq, arraycmp,9629TR::Node::create( src1BaseRepNode, TR::iconst, 0, 0),9630okDest);9631}9632newFirstTreeTop = TR::TreeTop::create(comp, cmpIfNode);9633newLastTreeTop = newFirstTreeTop;9634}96359636TR::TreeTop *last;96379638if (needVersioned) // Need to version the loop to eliminate array bounds checking9639{9640TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");96419642// making two versions (safe and non-safe).9643TR::CFG *cfg = comp->getFlowGraph();9644TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();9645TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();9646TR::Block *chkSrc1a = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);9647TR::Block *chkSrc1b = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);9648TR::Block *chkSrc2a = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);9649TR::Block *chkSrc2b = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);9650TR::Block *fastpath = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);9651TR::Block *slowpad = block->split(trTreeTop, cfg, true);9652TR::Block *gotoBlock = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);96539654TR::Node *cmp, *len1, *len2;9655len1 = TR::Node::create(TR::arraylength, 1,9656convertStoreToLoad(comp, src1BaseRepNode));9657cmp = TR::Node::createif(TR::ifiucmpge, start1Idx->duplicateTree(), len1, slowpad->getEntry());9658chkSrc1a->getEntry()->insertAfter(TR::TreeTop::create(comp, cmp));9659cmp = TR::Node::createif(TR::ifiucmpge, end1Idx->duplicateTree(), len1->duplicateTree(), slowpad->getEntry());9660chkSrc1b->getEntry()->insertAfter(TR::TreeTop::create(comp, cmp));9661len2 = TR::Node::create(TR::arraylength, 1,9662convertStoreToLoad(comp, src2BaseRepNode));9663cmp = TR::Node::createif(TR::ifiucmpge, start2Idx->duplicateTree(), len2, slowpad->getEntry());9664chkSrc2a->getEntry()->insertAfter(TR::TreeTop::create(comp, cmp));9665cmp = TR::Node::createif(TR::ifiucmpge, end2Idx->duplicateTree(), len2->duplicateTree(), slowpad->getEntry());9666chkSrc2b->getEntry()->insertAfter(TR::TreeTop::create(comp, cmp));96679668TR::TreeTop * branchTreeTop = TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest));9669gotoBlock->append(branchTreeTop);96709671cfg->setStructure(NULL);9672cfg->insertBefore(gotoBlock, slowpad);9673cfg->insertBefore(fastpath, gotoBlock);9674cfg->insertBefore(chkSrc2b, fastpath);9675cfg->insertBefore(chkSrc2a, chkSrc2b);9676cfg->insertBefore(chkSrc1b, chkSrc2a);9677cfg->insertBefore(chkSrc1a, chkSrc1b);96789679fastpath = trans->insertBeforeNodes(fastpath);9680last = fastpath->getLastRealTreeTop();9681TR::TreeTop *arrayCmpTreeTop = TR::TreeTop::create(comp, topArraycmp);9682last->join(arrayCmpTreeTop);9683arrayCmpTreeTop->join(newFirstTreeTop);9684newLastTreeTop->join(fastpath->getExit());9685fastpath = trans->insertAfterNodes(fastpath);96869687if (isCompensateCode)9688{9689cfg->setStructure(NULL);9690TR::TreeTop * orgNextTreeTop = fastpath->getExit()->getNextTreeTop();9691TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();9692compensateBlock0 = trans->insertAfterNodesIdiom(compensateBlock0, 0, true);9693compensateBlock1 = trans->insertAfterNodesIdiom(compensateBlock1, 1, true);9694cfg->insertBefore(compensateBlock1, orgNextBlock);9695cfg->insertBefore(compensateBlock0, compensateBlock1);9696cfg->join(fastpath, compensateBlock0);9697}96989699fastpath->getExit()->join(gotoBlock->getEntry());9700trans->setSuccessorEdges(fastpath,9701gotoBlock,9702okDest->getEnclosingBlock());97039704block->getExit()->join(chkSrc1a->getEntry());9705cfg->addEdge(block, chkSrc1a);9706cfg->removeEdge(block, slowpad);9707trans->setColdLoopBody();9708}9709else9710{9711// making only the bound-check free version9712block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree());9713block = trans->insertBeforeNodes(block);9714block->append(TR::TreeTop::create(comp, topArraycmp));9715last = block->getLastRealTreeTop();9716last->join(newFirstTreeTop);9717newLastTreeTop->join(block->getExit());97189719block = trans->insertAfterNodes(block);97209721if (isCompensateCode)9722{9723TR::CFG *cfg = comp->getFlowGraph();9724cfg->setStructure(NULL);9725TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();9726TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();9727compensateBlock0 = trans->insertAfterNodesIdiom(compensateBlock0, 0, true);9728compensateBlock1 = trans->insertAfterNodesIdiom(compensateBlock1, 1, true);9729cfg->insertBefore(compensateBlock1, orgNextBlock);9730cfg->insertBefore(compensateBlock0, compensateBlock1);9731cfg->join(block, compensateBlock0);9732}97339734trans->setSuccessorEdges(block,9735failDest->getEnclosingBlock(),9736okDest->getEnclosingBlock());9737}97389739if (0 && isCompensateCode)9740{9741// create control flow as below9742// --start preheader--9743// if (i reverseopcode N)9744// goto compensateblock09745// --end preheader--9746// else9747// arraycmp9748// ...9749traceMsg(comp, "cmpifallciscnode %d ifcmpge %d\n", cmpIfAllCISCNode->getOpcode(), TR::ificmpge);9750TR::Node *compareNode = TR::Node::createif((TR::ILOpCodes)cmpIfAllCISCNode->getOpcode(), startNode, endNode, compensateBlock0->getEntry());9751TR::TreeTop *compareTree = TR::TreeTop::create(comp, compareNode);9752if (!preHeader)9753preHeader = trans->addPreHeaderIfNeeded(trans->getCurrentLoop());9754preHeader->append(compareTree);9755comp->getFlowGraph()->addEdge(preHeader, compensateBlock0);9756}97579758return true;9759}97609761bool9762CISCTransform2ArrayCmpCompareTo(TR_CISCTransformer *trans)9763{9764trans->setCompareTo();9765return CISCTransform2ArrayCmp(trans);9766}976797689769bool9770CISCTransform2ArrayCmpIndexOf(TR_CISCTransformer *trans)9771{9772trans->setIndexOf();9773return CISCTransform2ArrayCmp(trans);9774}97759776/****************************************************************************************9777Corresponding Java-like Pseudo Program9778int v1, v2, end;9779v3[ ], v4[ ]; // char, int, float, long, and so on9780while(true){9781ifcmpall (v3[v1], v4[v2] ) break;9782v1++;9783v2++;9784ifcmpall(v1, end) break;9785}97869787Note 1: It allows that variables v1 and v2 are identical.9788Note 2: The wildcard node ifcmpall matches all types of if-instructions.9789****************************************************************************************/9790TR_PCISCGraph *9791makeMemCmpGraph(TR::Compilation *c, int32_t ctrl)9792{9793TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCmp", 0, 16);9794/************************************ opc id dagId #cfg #child other/pred/children */9795TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v1); // array index for src19796TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 1); tgt->addNode(v2); // array index for src29797TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(v3); // src1 array base9798TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v4); // src2 array base9799TR_PCISCNode *vorc1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 9, 0, 0); tgt->addNode(vorc1); // length9800TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(idx0);9801TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(idx1);9802TR_PCISCNode *cmah0=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 0); tgt->addNode(cmah0); // array header9803TR_PCISCNode *cmah1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 1); tgt->addNode(cmah1); // array header9804TR_PCISCNode *iall= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0); tgt->addNode(iall); // Multiply Factor9805TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);9806TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);9807TR_PCISCNode *n0 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, ent, TR_indload, TR::NoType, v3, idx0, cmah0, iall);9808TR_PCISCNode *n1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n0, n0); tgt->addNode(n1);9809TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, n1, TR_indload, TR::NoType, v4, idx1, cmah1, iall);9810TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);9811TR_PCISCNode *ncmp= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n3, n1, n3); tgt->addNode(ncmp);9812TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ncmp, v1, cm1);9813TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v2, cm1);9814TR_PCISCNode *ncmpge = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v1, vorc1); tgt->addNode(ncmpge);9815TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);98169817ncmpge->setSuccs(ent->getSucc(0), n9);9818ncmp->setSucc(1, n9);98199820n1->setIsOptionalNode();9821n3->setIsOptionalNode();98229823tgt->setEntryNode(ent);9824tgt->setExitNode(n9);9825tgt->setSpecialCareNode(0, ncmp); // exit-if due to a different character9826tgt->setImportantNodes(n0, n2, ncmpge, ncmp, n6, n7, iall);9827tgt->setNumDagIds(14);9828tgt->createInternalData(1);98299830tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);9831tgt->setTransformer(CISCTransform2ArrayCmp);9832tgt->setAspects(isub|mul, existAccess, 0);9833tgt->setNoAspects(call|bndchk|bitop1, 0, existAccess);9834tgt->setMinCounts(2, 2, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount9835tgt->setInhibitBeforeVersioning();9836tgt->setHotness(warm, false);9837return tgt;9838}983998409841/****************************************************************************************9842Corresponding Java-like Pseudo Program9843int v1, v2, end;9844v3[ ], v4[ ]; // char, int, float, long, and so on9845while(true){9846ifcmpall (v3[v1], v4[v2] ) break;9847v1++;9848v2++;9849ifcmpall(v1, end) break;9850}98519852Note 1: It allows that variables v1 and v2 are identical.9853Note 2: The wildcard node ifcmpall matches all types of if-instructions.9854****************************************************************************************/9855TR_PCISCGraph *9856makeMemCmpIndexOfGraph(TR::Compilation *c, int32_t ctrl)9857{9858TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCmpIndexOf", 0, 16);9859/************************************ opc id dagId #cfg #child other/pred/children */9860TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0); tgt->addNode(v1); // array index for src19861TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 1); tgt->addNode(v2); // array index for src29862TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 0); tgt->addNode(v3); // src1 array base9863TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 1); tgt->addNode(v4); // src2 array base9864TR_PCISCNode *vorc1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 8, 0, 0); tgt->addNode(vorc1); // length9865TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(idx0);9866TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 1); tgt->addNode(idx1);9867TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(cmah); // array header9868TR_PCISCNode *iall= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0); tgt->addNode(iall); // Multiply Factor9869TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);9870TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);9871TR_PCISCNode *a1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, ent, idx0, cm1); tgt->addNode(a1);9872TR_PCISCNode *n0 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, a1, TR_inbload, TR::NoType, v3, a1, cmah, iall);9873a1->getHeadOfParents()->setIsChildDirectlyConnected();9874TR_PCISCNode *n1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n0, n0); tgt->addNode(n1);9875TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, n1, TR_inbload, TR::NoType, v4, idx1, cmah, iall);9876TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);9877TR_PCISCNode *ncmp= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n3, n1, n3); tgt->addNode(ncmp);9878TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ncmp, v1, cm1);9879TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v2, cm1);9880TR_PCISCNode *ncmpge = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v1, vorc1); tgt->addNode(ncmpge);9881TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);98829883ncmpge->setSuccs(ent->getSucc(0), n9);9884ncmp->setSucc(1, n9);98859886n1->setIsOptionalNode();9887n3->setIsOptionalNode();98889889tgt->setEntryNode(ent);9890tgt->setExitNode(n9);9891tgt->setSpecialCareNode(0, ncmp); // exit-if due to a different character9892tgt->setImportantNodes(n0, n2, ncmpge, ncmp, n6, n7, iall);9893tgt->setNumDagIds(13);9894tgt->createInternalData(1);98959896tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);9897tgt->setTransformer(CISCTransform2ArrayCmpIndexOf);9898tgt->setAspects(isub|mul, existAccess, 0);9899tgt->setNoAspects(call|bndchk|bitop1, ILTypeProp::Size_1, existAccess);9900tgt->setMinCounts(2, 2, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount9901tgt->setInhibitBeforeVersioning();9902tgt->setHotness(warm, false);9903return tgt;9904}990599069907/****************************************************************************************9908Corresponding Java-like Pseudo Program9909int v1, v2, end, v5;9910v3[ ], v4[ ]; // char, int, float, long, and so on9911while(true){9912v5 = v3[v1++] - v4[v2++];9913if (v5 != 0) break;9914if (v1 >= end) break;9915}99169917Note 1: It allows that variables v1 and v2 are identical.9918****************************************************************************************/9919TR_PCISCGraph *9920makeMemCmpSpecialGraph(TR::Compilation *c, int32_t ctrl)9921{9922TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCmpSpecial", 0, 16);9923/************************************ opc id dagId #cfg #child other/pred/children */9924TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v1); // array index for src19925TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 1); tgt->addNode(v2); // array index for src29926TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(v3); // src1 array base9927TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v4); // src2 array base9928TR_PCISCNode *vorc1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 9, 0, 0); tgt->addNode(vorc1); // length9929TR_PCISCNode *v5 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 2); tgt->addNode(v5); // result9930TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(idx0);9931TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 0); tgt->addNode(cmah); // array header9932TR_PCISCNode *iall= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0); tgt->addNode(iall); // Multiply Factor9933TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, -1); tgt->addNode(cm1);9934TR_PCISCNode *c0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, 0); tgt->addNode(c0);9935TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);9936TR_PCISCNode *n0 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, ent, TR_inbload, TR::NoType, v3, v1, cmah, iall);9937TR_PCISCNode *n1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n0, n0); tgt->addNode(n1);9938TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, n1, TR_inbload, TR::NoType, v4, idx0, cmah, iall);9939TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);9940TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n3, n1, n3); tgt->addNode(n4);9941TR_PCISCNode *n5 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n4, n4, v5); tgt->addNode(n5);9942TR_PCISCNode *ncmp= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpne, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n5, v5, c0); tgt->addNode(ncmp);9943TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ncmp, v1, cm1);9944TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v2, cm1);9945TR_PCISCNode *ncmpge= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v1, vorc1); tgt->addNode(ncmpge);9946TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);99479948ncmpge->setSuccs(ent->getSucc(0), n9);9949ncmp->setSucc(1, n9);99509951n1->setIsOptionalNode();9952n3->setIsOptionalNode();99539954tgt->setSpecialCareNode(0, ncmp); // exit-if due to a different character9955tgt->setEntryNode(ent);9956tgt->setExitNode(n9);9957tgt->setImportantNodes(n0, n2, ncmpge, ncmp, n6, n7, iall, n5);9958tgt->setNumDagIds(14);9959tgt->createInternalData(1);99609961tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);9962tgt->setTransformer(CISCTransform2ArrayCmpCompareTo);9963tgt->setAspects(isub|mul, existAccess, 0);9964tgt->setNoAspects(call|bndchk|bitop1, ILTypeProp::Size_1, existAccess);9965tgt->setMinCounts(2, 2, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount9966tgt->setInhibitBeforeVersioning();9967tgt->setHotness(warm, false);9968return tgt;9969}997099719972//////////////////////////////////////////////////////////////////////////9973//////////////////////////////////////////////////////////////////////////9974//////////////////////////////////////////////////////////////////////////9975// Utilities for BitOpMem99769977static void9978setSubopBitOpMem(TR::Compilation *comp, TR::Node *bitOpMem, TR_CISCNode *opCISCNode)9979{9980if (opCISCNode->getIlOpCode().isAnd())9981{9982bitOpMem->setAndBitOpMem(true);9983}9984else if (opCISCNode->getIlOpCode().isXor())9985{9986bitOpMem->setXorBitOpMem(true);9987}9988else9989{9990TR_ASSERT(opCISCNode->getIlOpCode().isOr(), "error");9991bitOpMem->setOrBitOpMem(true);9992}9993}99949995static TR::AutomaticSymbol *9996setPinningArray(TR::Compilation *comp, TR::Node *internalPtrStore, TR::Node *base, TR::Block *appendBlock)9997{9998TR::AutomaticSymbol *pinningArray = NULL;9999if (base->getOpCode().isLoadVarDirect() &&10000base->getSymbolReference()->getSymbol()->isAuto())10001{10002pinningArray = (base->getSymbolReference()->getSymbol()->castToAutoSymbol()->isInternalPointer()) ?10003base->getSymbolReference()->getSymbol()->castToInternalPointerAutoSymbol()->getPinningArrayPointer() :10004base->getSymbolReference()->getSymbol()->castToAutoSymbol();10005}10006else10007{10008TR::SymbolReference *newRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR::Address);10009appendBlock->append(TR::TreeTop::create(comp, TR::Node::createStore(newRef, createLoad(base))));10010pinningArray = newRef->getSymbol()->castToAutoSymbol();10011}10012pinningArray->setPinningArrayPointer();10013internalPtrStore->getSymbolReference()->getSymbol()->castToInternalPointerAutoSymbol()->setPinningArrayPointer(pinningArray);10014if (internalPtrStore->isInternalPointer()) internalPtrStore->setPinningArrayPointer(pinningArray);10015return pinningArray;10016}1001710018//*****************************************************************************************10019// IL code generation for bit operations for memory to memory (dest = src1 op src2)10020// Input: ImportantNode(0) - array load for src110021// ImportantNode(1) - array load for src210022// ImportantNode(2) - array store for dest10023// ImportantNode(3) - a bit operation (XOR, AND, or OR)10024// ImportantNode(4) - increment the array index for src110025// ImportantNode(5) - increment the array index for src210026// ImportantNode(6) - increment the array index for dest10027// ImportantNode(7) - the size of elements (NULL for byte arrays)10028//*****************************************************************************************10029// This transformer will generate the following code.10030// if (dest.addr == src1.addr)10031// {10032// // dest and src1 are identical10033// bitOpMem(dest.addr, src2.addr, len); // three children (dest op= src2)10034// }10035// else if (dest.addr == src2.addr)10036// {10037// // dest and src2 are identical10038// bitOpMem(dest.addr, src1.addr, len); // three children (dest op= src1)10039// }10040// else if (dest.obj == src1.obj || dest.obj == src2.obj)10041// {10042// // the destination may overlap to src1 or src2.10043// <go to the original loop>10044// }10045// else10046// {10047// // We can guarantee the destination NEVER overlaps to src1 or src2.10048// bitOpMem(dest.addr, src1.addr, src2.addr, len); // four children (dest = src1 op src2)10049// }10050bool10051CISCTransform2BitOpMem(TR_CISCTransformer *trans)10052{10053TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");10054const bool disptrace = DISPTRACE(trans);10055TR::Node *trNode;10056TR::TreeTop *trTreeTop;10057TR::Block *block;10058TR_CISCGraph *P = trans->getP();10059List<TR_CISCNode> *P2T = trans->getP2T();10060TR::Compilation *comp = trans->comp();10061bool ctrl = trans->isGenerateI2L();1006210063TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");10064if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;1006510066trans->findFirstNode(&trTreeTop, &trNode, &block);10067if (!block) return false; // cannot find1006810069if (isLoopPreheaderLastBlockInMethod(comp, block))10070{10071traceMsg(comp, "Bailing CISCTransform2BitOpMem due to null TT - might be a preheader in last block of method\n");10072return false;10073}1007410075TR::Block *target = trans->analyzeSuccessorBlock();10076// Currently, it allows only a single successor.10077if (!target) return false;1007810079//10080// obtain a CISCNode of each store for incrementing induction variables10081TR_CISCNode *src1CISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(0));10082TR_CISCNode *src2CISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(1));10083if (!src1CISCNode || !src2CISCNode || src1CISCNode == src2CISCNode) return false;10084TR_CISCNode *destCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(2));10085if (!destCISCNode) return false;10086TR_CISCNode *opCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(3));10087TR_ASSERT(opCISCNode, "error");10088TR::Node * inSrc1Node = src1CISCNode->getHeadOfTrNodeInfo()->_node;10089TR::Node * inSrc2Node = src2CISCNode->getHeadOfTrNodeInfo()->_node;10090TR::Node * inDestNode = destCISCNode->getHeadOfTrNodeInfo()->_node;10091TR::Node * inputNode1 = inSrc1Node->getChild(0);10092TR::Node * inputNode2 = inSrc2Node->getChild(0);10093TR::Node * outputNode = inDestNode->getChild(0);1009410095TR::Node * mulFactorNode;10096int elementSize;1009710098// Get the size of elements10099if (!getMultiplier(trans, P->getImportantNode(7), &mulFactorNode, &elementSize, inSrc1Node->getType())) return false;10100if (elementSize != inSrc1Node->getSize() || elementSize != inSrc2Node->getSize())10101{10102traceMsg(comp, "CISCTransform2BitOpMem failed - Size Mismatch. Element Size: %d InSrc1Size: %d inSrc2Size: %d\n", elementSize, inSrc1Node->getSize(), inSrc2Node->getSize());10103return false; // Size is mismatch!10104}1010510106TR_CISCNode *storeSrc1 = trans->getP2TRepInLoop(P->getImportantNode(4));10107TR_CISCNode *storeSrc2 = trans->getP2TRepInLoop(P->getImportantNode(5));10108TR_CISCNode *storeDest = trans->getP2TRepInLoop(P->getImportantNode(6));1010910110// check the indices used in the array loads and10111// the store nodes10112//10113List<TR::Node> storeList(comp->trMemory());10114TR_ASSERT(storeSrc1, "error");10115storeList.add(storeSrc1->getHeadOfTrNode());10116if (storeSrc2 && storeSrc2 != storeSrc1) storeList.add(storeSrc2->getHeadOfTrNode());10117if (storeDest && storeDest != storeSrc1) storeList.add(storeDest->getHeadOfTrNode());10118if (!isIndexVariableInList(inSrc1Node, &storeList) ||10119!isIndexVariableInList(inSrc2Node, &storeList) ||10120!isIndexVariableInList(inDestNode, &storeList))10121{10122dumpOptDetails(comp, "indices used in array loads %p, %p, and %p are not consistent with the induction varaible updates\n", inSrc1Node, inSrc2Node, inDestNode);10123return false;10124}1012510126TR::Node *src1IdxRepNode, *src2IdxRepNode, *destIdxRepNode, *src1BaseRepNode, *src2BaseRepNode, *destBaseRepNode, *variableORconstRepNode;10127getP2TTrRepNodes(trans, &src1IdxRepNode, &src2IdxRepNode, &destIdxRepNode, &src1BaseRepNode, &src2BaseRepNode, &destBaseRepNode, &variableORconstRepNode);10128TR_ASSERT(src1IdxRepNode != 0, "error");10129TR::SymbolReference * src1IdxSymRef = src1IdxRepNode->getSymbolReference();10130TR::SymbolReference * src2IdxSymRef = 0;10131TR::SymbolReference * destIdxSymRef = 0;10132if (src2IdxRepNode)10133src2IdxSymRef = src2IdxRepNode->getSymbolReference();10134if (destIdxRepNode)10135destIdxSymRef = destIdxRepNode->getSymbolReference();10136if (src1IdxSymRef == destIdxSymRef) destIdxSymRef = 0;10137if (src1IdxSymRef == src2IdxSymRef) src2IdxSymRef = 0;10138if (trans->countGoodArrayIndex(src1IdxSymRef) == 0) return false;10139if (src2IdxSymRef && (trans->countGoodArrayIndex(src2IdxSymRef) == 0)) return false;10140if (destIdxSymRef && (trans->countGoodArrayIndex(destIdxSymRef) == 0)) return false;10141TR::Node *startSrc1Idx, *endSrc1Idx, *diff2;10142endSrc1Idx = convertStoreToLoad(comp, variableORconstRepNode);10143startSrc1Idx = convertStoreToLoad(comp, src1IdxRepNode);10144diff2 = createOP2(comp, TR::isub, endSrc1Idx, startSrc1Idx);10145TR::Node * elementSizeNode = NULL;1014610147TR::Node * lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), diff2);10148if (elementSize > 1)10149{10150TR::ILOpCodes mulOp = TR::imul;10151if (comp->target().is64Bit())10152{10153elementSizeNode = TR::Node::create(mulFactorNode, TR::lconst);10154elementSizeNode->setLongInt(elementSize);10155mulOp = TR::lmul;10156}10157else10158elementSizeNode = TR::Node::create(mulFactorNode, TR::iconst, 0, elementSize);10159lengthNode = TR::Node::create(mulOp, 2,10160lengthNode,10161elementSizeNode);10162}1016310164TR::Node * src1Update = TR::Node::createStore(src1IdxSymRef, endSrc1Idx->duplicateTree());10165TR::Node * destUpdate = NULL;10166if (destIdxSymRef != NULL && src1IdxSymRef != destIdxSymRef)10167{10168// If there are two induction variables, we need to maintain the other one.10169TR::Node * result = createOP2(comp, TR::iadd,10170TR::Node::createLoad(trNode, destIdxSymRef),10171diff2->duplicateTree());10172destUpdate = TR::Node::createStore(destIdxSymRef, result);10173}10174TR::Node * src2Update = NULL;10175if (src2IdxSymRef != NULL && src2IdxSymRef != destIdxSymRef && src2IdxSymRef != src1IdxSymRef)10176{10177// If there are three induction variables, we need to maintain the other one.10178TR::Node * result = createOP2(comp, TR::iadd,10179TR::Node::createLoad(trNode, src2IdxSymRef),10180diff2->duplicateTree());10181src2Update = TR::Node::createStore(src2IdxSymRef, result);10182}1018310184TR::Node * bitOpMem = NULL;10185if (outputNode == inputNode1 || outputNode == inputNode2)10186{10187bitOpMem = TR::Node::create(TR::bitOpMem, 3,10188outputNode->duplicateTree(),10189(outputNode == inputNode1 ? inputNode2 : inputNode1)->duplicateTree(),10190lengthNode);10191bitOpMem->setSymbolReference(comp->getSymRefTab()->findOrCreatebitOpMemSymbol());10192setSubopBitOpMem(comp, bitOpMem, opCISCNode);10193}1019410195//********************10196// Modify actual code10197//********************10198if (bitOpMem)10199{ // src1 or src2 is equal to dest10200block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree());10201block = trans->insertBeforeNodes(block);10202block->append(TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, bitOpMem)));10203}10204else10205{10206TR::CFG *cfg = comp->getFlowGraph();10207cfg->setStructure(0);10208TR::Block *slowpad;10209TR::Block *orgPrevBlock = 0;10210TR::Block *checkSrc1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);10211TR::Block *fastpath1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);10212TR::Block *checkSrc2 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);10213TR::Block *fastpath2 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);10214TR::Block *checkSrc3 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);10215TR::Block *checkSrc4 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);10216TR::Block *fastpath3 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);10217TR::Block *lastpath = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);1021810219// find orgPrevBlock and slowpad10220if (block->getFirstRealTreeTop() == trTreeTop)10221{10222// search the entry pad10223orgPrevBlock = trans->searchPredecessorOfBlock(block);10224}1022510226slowpad = block;10227if (!orgPrevBlock)10228{10229orgPrevBlock = block;10230slowpad = block->split(trTreeTop, cfg, true);10231}1023210233// checkSrc1: if (dest.addr != src1.addr) goto checkSrc210234TR::SymbolReference *destAddrSymRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR::Address, true);10235TR::SymbolReference *src1AddrSymRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR::Address, true);10236TR::SymbolReference *src2AddrSymRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR::Address, true);10237TR::Node *destStore = TR::Node::createStore(destAddrSymRef, outputNode->duplicateTree());10238TR::Node *src1Store = TR::Node::createStore(src1AddrSymRef, inputNode1->duplicateTree());10239TR::Node *src2Store = TR::Node::createStore(src2AddrSymRef, inputNode2->duplicateTree());1024010241setPinningArray(comp, destStore, destBaseRepNode, checkSrc1);10242setPinningArray(comp, src1Store, src1BaseRepNode, checkSrc1);10243setPinningArray(comp, src2Store, src2BaseRepNode, checkSrc1);1024410245checkSrc1->append(TR::TreeTop::create(comp, destStore));10246checkSrc1->append(TR::TreeTop::create(comp, src1Store));10247checkSrc1->append(TR::TreeTop::create(comp, src2Store));10248checkSrc1->append(TR::TreeTop::create(comp, TR::Node::createif(TR::ifacmpne,10249TR::Node::createLoad(trNode, destAddrSymRef),10250TR::Node::createLoad(trNode, src1AddrSymRef),10251checkSrc2->getEntry())));1025210253// fastpath1: bitOpMem(dest, src2, length); goto lastpath;10254TR::Node *bitOpMem1 = TR::Node::create(TR::bitOpMem, 3,10255TR::Node::createLoad(trNode, destAddrSymRef),10256TR::Node::createLoad(trNode, src2AddrSymRef),10257lengthNode->duplicateTree());10258bitOpMem1->setSymbolReference(comp->getSymRefTab()->findOrCreatebitOpMemSymbol());10259setSubopBitOpMem(comp, bitOpMem1, opCISCNode);10260///fastpath1 = trans->insertBeforeNodes(fastpath1);10261fastpath1->append(TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, bitOpMem1)));10262fastpath1->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, lastpath->getEntry())));1026310264// checkSrc2: if (dest.addr != src2.addr) goto checkSrc310265checkSrc2->append(TR::TreeTop::create(comp, TR::Node::createif(TR::ifacmpne,10266TR::Node::createLoad(trNode, destAddrSymRef),10267TR::Node::createLoad(trNode, src2AddrSymRef),10268checkSrc3->getEntry())));1026910270// fastpath2: bitOpMem(dest, src1, length); goto lastpath;10271TR::Node *bitOpMem2 = TR::Node::create(TR::bitOpMem, 3,10272TR::Node::createLoad(trNode, destAddrSymRef),10273TR::Node::createLoad(trNode, src1AddrSymRef),10274lengthNode->duplicateTree());10275bitOpMem2->setSymbolReference(comp->getSymRefTab()->findOrCreatebitOpMemSymbol());10276setSubopBitOpMem(comp, bitOpMem2, opCISCNode);10277///fastpath2 = trans->insertBeforeNodes(fastpath2);10278fastpath2->append(TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, bitOpMem2)));10279fastpath2->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, lastpath->getEntry())));1028010281// checkSrc3: if (dest.obj == src1.obj) goto slowpad10282checkSrc3->append(TR::TreeTop::create(comp, TR::Node::createif(TR::ifacmpeq,10283createLoad(destBaseRepNode),10284createLoad(src1BaseRepNode),10285slowpad->getEntry())));1028610287// checkSrc4: if (dest.obj == src2.obj) goto slowpad10288checkSrc4->append(TR::TreeTop::create(comp, TR::Node::createif(TR::ifacmpeq,10289createLoad(destBaseRepNode),10290createLoad(src2BaseRepNode),10291slowpad->getEntry())));1029210293// fastpath3: bitOpMem(dest, src1, src2, length);10294// We can guarantee the destination NEVER overlaps to src1 or src2.10295bitOpMem = TR::Node::create(TR::bitOpMem, 4,10296TR::Node::createLoad(trNode, destAddrSymRef),10297TR::Node::createLoad(trNode, src1AddrSymRef),10298TR::Node::createLoad(trNode, src2AddrSymRef),10299lengthNode->duplicateTree());10300bitOpMem->setSymbolReference(comp->getSymRefTab()->findOrCreatebitOpMemSymbol());10301setSubopBitOpMem(comp, bitOpMem, opCISCNode);10302///fastpath3 = trans->insertBeforeNodes(fastpath3);10303fastpath3->append(TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, bitOpMem)));1030410305// Insert new blocks10306TR::TreeTop * orgPrevTreeTop = orgPrevBlock->getExit();10307TR::Node *lastOrgPrevRealNode = orgPrevBlock->getLastRealTreeTop()->getNode();10308TR::TreeTop * orgNextTreeTop = orgPrevTreeTop->getNextTreeTop();10309if (orgNextTreeTop)10310{10311TR::Block * orgNextBlock = orgNextTreeTop->getNode()->getBlock();10312cfg->insertBefore(lastpath, orgNextBlock);10313}10314else10315{10316cfg->addNode(lastpath);10317}10318cfg->insertBefore(fastpath3, lastpath);10319cfg->insertBefore(checkSrc4, fastpath3);10320cfg->insertBefore(checkSrc3, checkSrc4);10321cfg->insertBefore(fastpath2, checkSrc3);10322cfg->insertBefore(checkSrc2, fastpath2);10323cfg->insertBefore(fastpath1, checkSrc2);10324cfg->insertBefore(checkSrc1, fastpath1);1032510326TR::Block *extraBlock = NULL;10327if (!trans->isEmptyBeforeInsertionList())10328{10329extraBlock = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);10330cfg->insertBefore(extraBlock, checkSrc1);10331orgPrevTreeTop->join(extraBlock->getEntry());10332cfg->addEdge(orgPrevBlock, extraBlock);10333TR::Block *newBlock = trans->insertBeforeNodes(extraBlock);10334}10335else10336{10337orgPrevTreeTop->join(checkSrc1->getEntry());10338cfg->addEdge(orgPrevBlock, checkSrc1);10339}10340cfg->removeEdge(orgPrevBlock, slowpad);10341block = lastpath;1034210343if (disptrace) traceMsg(comp, "CISCTransform2BitOpMem: orgPrevBlock=%d checkSrc1=%d lastpath=%d slowpad=%d orgNextTreeTop=%x\n",10344orgPrevBlock->getNumber(), checkSrc1->getNumber(), lastpath->getNumber(), slowpad->getNumber(), orgNextTreeTop);1034510346if (lastOrgPrevRealNode->getOpCode().getOpCodeValue() == TR::Goto)10347{10348TR_ASSERT(lastOrgPrevRealNode->getBranchDestination() == slowpad->getEntry(), "Error");10349if (!extraBlock)10350lastOrgPrevRealNode->setBranchDestination(checkSrc1->getEntry());10351else10352lastOrgPrevRealNode->setBranchDestination(extraBlock->getEntry());10353}10354}1035510356if (src2Update) block->append(TR::TreeTop::create(comp, src2Update));10357if (destUpdate) block->append(TR::TreeTop::create(comp, destUpdate));10358// Original value of first induction variable used in the updates of the two induction variables above10359// Update this one last10360block->append(TR::TreeTop::create(comp, src1Update));1036110362trans->insertAfterNodes(block);1036310364trans->setSuccessorEdge(block, target);10365return true;10366}1036710368/****************************************************************************************10369Corresponding Java-like Pseudo Program10370int v1, v2, end;10371v3[ ], v4[ ], v5[ ];10372while(true){10373v5[v2] = v3[v1] op v4[v1]; // op will match one of AND, OR, and XOR operations.10374v1++;10375v2++;10376if (v1 >= end) break;10377}1037810379Note 1: It allows that variables v1 and v2 are identical.10380****************************************************************************************/10381TR_PCISCGraph *10382makeBitOpMemGraph(TR::Compilation *c, int32_t ctrl)10383{10384TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "BitOpMem", 0, 16);10385/************************************ opc id dagId #cfg #child other/pred/children */10386TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 16, 0, 0, 0); tgt->addNode(v1); // array index for src110387TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 1); tgt->addNode(v2); // array index for src210388TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 2); tgt->addNode(v3); // array index for dest10389TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v4); // src1 array base10390TR_PCISCNode *v5 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 1); tgt->addNode(v5); // src2 array base10391TR_PCISCNode *v6 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 2); tgt->addNode(v6); // dest array base10392TR_PCISCNode *vorc1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),10, 0, 0); tgt->addNode(vorc1); // length10393TR_PCISCNode *iall= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 9, 0, 0); tgt->addNode(iall); // Multiply Factor10394TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(idx0);10395TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(idx1);10396TR_PCISCNode *idx2= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 2); tgt->addNode(idx2);10397TR_PCISCNode *cmah0=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(cmah0); // array header10398TR_PCISCNode *cmah1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0, 1); tgt->addNode(cmah1); // array header10399TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);10400TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);10401TR_PCISCNode *sn0 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, ent, v6, idx0, cmah1, iall);10402TR_PCISCNode *n0 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, sn0, TR_indload, TR::NoType, v4, idx1, cmah0, iall);10403TR_PCISCNode *cv0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n0, n0); tgt->addNode(cv0); // optional10404TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, cv0, TR_indload, TR::NoType, v5, idx2, cmah0, iall);10405TR_PCISCNode *cv1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(cv1); // optional10406TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_bitop1, TR::NoType, tgt->incNumNodes(), 1, 1, 2, cv1, cv0, cv1); tgt->addNode(n4);10407TR_PCISCNode *cv2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n4, n4); tgt->addNode(cv2); // optional10408TR_PCISCNode *sn1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indstore, TR::NoType, tgt->incNumNodes(), 1, 1, 2, cv2, sn0, cv2); tgt->addNode(sn1);10409TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, sn1, v1, cm1);10410TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v2, cm1);10411TR_PCISCNode *n8 = createIdiomDecVarInLoop(tgt, ctrl, 1, n7, v3, cm1);10412TR_PCISCNode *ncmpge= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n8, v1, vorc1); tgt->addNode(ncmpge);10413TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);1041410415ncmpge->setSuccs(ent->getSucc(0), n9);1041610417cv0->setIsOptionalNode();10418cv1->setIsOptionalNode();10419cv2->setIsOptionalNode();1042010421tgt->setEntryNode(ent);10422tgt->setExitNode(n9);10423tgt->setImportantNodes(n0, n2, sn1, n4, n6, n7, n8, iall);10424tgt->setNumDagIds(17);10425tgt->createInternalData(1);1042610427tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);10428tgt->setTransformer(CISCTransform2BitOpMem);10429tgt->setAspects(isub|mul|sameTypeLoadStore|bitop1, existAccess, existAccess);10430tgt->setNoAspects(call|bndchk, 0, 0);10431tgt->setMinCounts(1, 2, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount10432tgt->setInhibitBeforeVersioning();10433tgt->setHotness(warm, false);10434return tgt;10435}10436104371043810439//////////////////////////////////////////////////////////////////////////10440//////////////////////////////////////////////////////////////////////////10441//////////////////////////////////////////////////////////////////////////10442// Counts number of digits (Not count the character '-' (minus))10443//10444// e.g. do { count ++; } while((l /= 10) != 0);10445//10446// numDigit = countdigit10(int i, void *work)10447// numDigit = countdigit10(long i, void *work)10448//10449// Use of work area depends on each platform. (e.g. 16 bytes for zSeries)10450// The work area for some platforms may be NULL.10451static const int64_t digit10Table[] =10452{10453-10L, // 010454-100L, // 110455-1000L, // 210456-10000L, // 310457-100000L, // 410458-1000000L, // 510459-10000000L, // 610460-100000000L, // 710461-1000000000L // 8 (32-bit)10462#ifdef TR_TARGET_64BIT10463,-10000000000L, // 910464-100000000000L, // 1010465-1000000000000L, // 1110466-10000000000000L, // 1210467-100000000000000L, // 1310468-1000000000000000L, // 1410469-10000000000000000L, // 1510470-100000000000000000L,// 1610471-1000000000000000000L// 17 (64-bit)10472#endif10473};1047410475#if 010476struct ppcDigit10TableEnt10477{10478int32_t digits;10479uint32_t limit; // 10^digits-110480uint64_t limitLong; // 10^digits-110481};1048210483// For CountDecimalDigitInt, use ppcDigit10Table[32..64]10484static const struct ppcDigit10TableEnt ppcDigit10Table[64 + 1] =10485{10486//digits limit limitLong zeros32 zeros64 min max limit limitLong incr10487//---------------------------------------------------------------------------------------------------------------------------10488{19, 0u, 9999999999999999999llu}, // 0 0 [0x8000000000000000,0xffffffffffffffff] 0x00000000 0x8ac7230489e7ffff *10489{19, 0u, 9999999999999999999llu}, // 0 1 [0x4000000000000000,0x7fffffffffffffff] 0x00000000 0x8ac7230489e7ffff10490{19, 0u, 9999999999999999999llu}, // 0 2 [0x2000000000000000,0x3fffffffffffffff] 0x00000000 0x8ac7230489e7ffff10491{19, 0u, 9999999999999999999llu}, // 0 3 [0x1000000000000000,0x1fffffffffffffff] 0x00000000 0x8ac7230489e7ffff10492{18, 0u, 999999999999999999llu}, // 0 4 [0x0800000000000000,0x0fffffffffffffff] 0x00000000 0x0de0b6b3a763ffff *10493{18, 0u, 999999999999999999llu}, // 0 5 [0x0400000000000000,0x07ffffffffffffff] 0x00000000 0x0de0b6b3a763ffff10494{18, 0u, 999999999999999999llu}, // 0 6 [0x0200000000000000,0x03ffffffffffffff] 0x00000000 0x0de0b6b3a763ffff10495{17, 0u, 99999999999999999llu}, // 0 7 [0x0100000000000000,0x01ffffffffffffff] 0x00000000 0x016345785d89ffff *10496{17, 0u, 99999999999999999llu}, // 0 8 [0x0080000000000000,0x00ffffffffffffff] 0x00000000 0x016345785d89ffff10497{17, 0u, 99999999999999999llu}, // 0 9 [0x0040000000000000,0x007fffffffffffff] 0x00000000 0x016345785d89ffff10498{16, 0u, 9999999999999999llu}, // 0 10 [0x0020000000000000,0x003fffffffffffff] 0x00000000 0x002386f26fc0ffff *10499{16, 0u, 9999999999999999llu}, // 0 11 [0x0010000000000000,0x001fffffffffffff] 0x00000000 0x002386f26fc0ffff10500{16, 0u, 9999999999999999llu}, // 0 12 [0x0008000000000000,0x000fffffffffffff] 0x00000000 0x002386f26fc0ffff10501{16, 0u, 9999999999999999llu}, // 0 13 [0x0004000000000000,0x0007ffffffffffff] 0x00000000 0x002386f26fc0ffff10502{15, 0u, 999999999999999llu}, // 0 14 [0x0002000000000000,0x0003ffffffffffff] 0x00000000 0x00038d7ea4c67fff *10503{15, 0u, 999999999999999llu}, // 0 15 [0x0001000000000000,0x0001ffffffffffff] 0x00000000 0x00038d7ea4c67fff10504{15, 0u, 999999999999999llu}, // 0 16 [0x0000800000000000,0x0000ffffffffffff] 0x00000000 0x00038d7ea4c67fff10505{14, 0u, 99999999999999llu}, // 0 17 [0x0000400000000000,0x00007fffffffffff] 0x00000000 0x00005af3107a3fff *10506{14, 0u, 99999999999999llu}, // 0 18 [0x0000200000000000,0x00003fffffffffff] 0x00000000 0x00005af3107a3fff10507{14, 0u, 99999999999999llu}, // 0 19 [0x0000100000000000,0x00001fffffffffff] 0x00000000 0x00005af3107a3fff10508{13, 0u, 9999999999999llu}, // 0 20 [0x0000080000000000,0x00000fffffffffff] 0x00000000 0x000009184e729fff *10509{13, 0u, 9999999999999llu}, // 0 21 [0x0000040000000000,0x000007ffffffffff] 0x00000000 0x000009184e729fff10510{13, 0u, 9999999999999llu}, // 0 22 [0x0000020000000000,0x000003ffffffffff] 0x00000000 0x000009184e729fff10511{13, 0u, 9999999999999llu}, // 0 23 [0x0000010000000000,0x000001ffffffffff] 0x00000000 0x000009184e729fff10512{12, 0u, 999999999999llu}, // 0 24 [0x0000008000000000,0x000000ffffffffff] 0x00000000 0x000000e8d4a50fff *10513{12, 0u, 999999999999llu}, // 0 25 [0x0000004000000000,0x0000007fffffffff] 0x00000000 0x000000e8d4a50fff10514{12, 0u, 999999999999llu}, // 0 26 [0x0000002000000000,0x0000003fffffffff] 0x00000000 0x000000e8d4a50fff10515{11, 0u, 99999999999llu}, // 0 27 [0x0000001000000000,0x0000001fffffffff] 0x00000000 0x000000174876e7ff *10516{11, 0u, 99999999999llu}, // 0 28 [0x0000000800000000,0x0000000fffffffff] 0x00000000 0x000000174876e7ff10517{11, 0u, 99999999999llu}, // 0 29 [0x0000000400000000,0x00000007ffffffff] 0x00000000 0x000000174876e7ff10518{10, 0u, 9999999999llu}, // 0 30 [0x0000000200000000,0x00000003ffffffff] 0x00000000 0x00000002540be3ff *10519{10, 0u, 9999999999llu}, // 0 31 [0x0000000100000000,0x00000001ffffffff] 0x00000000 0x00000002540be3ff10520{10, 4294967295u, 9999999999llu}, // 0 32 [0x0000000080000000,0x00000000ffffffff] 0xffffffff 0x00000002540be3ff10521{10, 4294967295u, 9999999999llu}, // 1 33 [0x0000000040000000,0x000000007fffffff] 0xffffffff 0x00000002540be3ff10522{ 9, 999999999u, 999999999llu}, // 2 34 [0x0000000020000000,0x000000003fffffff] 0x3b9ac9ff 0x000000003b9ac9ff *10523{ 9, 999999999u, 999999999llu}, // 3 35 [0x0000000010000000,0x000000001fffffff] 0x3b9ac9ff 0x000000003b9ac9ff10524{ 9, 999999999u, 999999999llu}, // 4 36 [0x0000000008000000,0x000000000fffffff] 0x3b9ac9ff 0x000000003b9ac9ff10525{ 8, 99999999u, 99999999llu}, // 5 37 [0x0000000004000000,0x0000000007ffffff] 0x05f5e0ff 0x0000000005f5e0ff *10526{ 8, 99999999u, 99999999llu}, // 6 38 [0x0000000002000000,0x0000000003ffffff] 0x05f5e0ff 0x0000000005f5e0ff10527{ 8, 99999999u, 99999999llu}, // 7 39 [0x0000000001000000,0x0000000001ffffff] 0x05f5e0ff 0x0000000005f5e0ff10528{ 7, 9999999u, 9999999llu}, // 8 40 [0x0000000000800000,0x0000000000ffffff] 0x0098967f 0x000000000098967f *10529{ 7, 9999999u, 9999999llu}, // 9 41 [0x0000000000400000,0x00000000007fffff] 0x0098967f 0x000000000098967f10530{ 7, 9999999u, 9999999llu}, // 10 42 [0x0000000000200000,0x00000000003fffff] 0x0098967f 0x000000000098967f10531{ 7, 9999999u, 9999999llu}, // 11 43 [0x0000000000100000,0x00000000001fffff] 0x0098967f 0x000000000098967f10532{ 6, 999999u, 999999llu}, // 12 44 [0x0000000000080000,0x00000000000fffff] 0x000f423f 0x00000000000f423f *10533{ 6, 999999u, 999999llu}, // 13 45 [0x0000000000040000,0x000000000007ffff] 0x000f423f 0x00000000000f423f10534{ 6, 999999u, 999999llu}, // 14 46 [0x0000000000020000,0x000000000003ffff] 0x000f423f 0x00000000000f423f10535{ 5, 99999u, 99999llu}, // 15 47 [0x0000000000010000,0x000000000001ffff] 0x0001869f 0x000000000001869f *10536{ 5, 99999u, 99999llu}, // 16 48 [0x0000000000008000,0x000000000000ffff] 0x0001869f 0x000000000001869f10537{ 5, 99999u, 99999llu}, // 17 49 [0x0000000000004000,0x0000000000007fff] 0x0001869f 0x000000000001869f10538{ 4, 9999u, 9999llu}, // 18 50 [0x0000000000002000,0x0000000000003fff] 0x0000270f 0x000000000000270f *10539{ 4, 9999u, 9999llu}, // 19 51 [0x0000000000001000,0x0000000000001fff] 0x0000270f 0x000000000000270f10540{ 4, 9999u, 9999llu}, // 20 52 [0x0000000000000800,0x0000000000000fff] 0x0000270f 0x000000000000270f10541{ 4, 9999u, 9999llu}, // 21 53 [0x0000000000000400,0x00000000000007ff] 0x0000270f 0x000000000000270f10542{ 3, 999u, 999llu}, // 22 54 [0x0000000000000200,0x00000000000003ff] 0x000003e7 0x00000000000003e7 *10543{ 3, 999u, 999llu}, // 23 55 [0x0000000000000100,0x00000000000001ff] 0x000003e7 0x00000000000003e710544{ 3, 999u, 999llu}, // 24 56 [0x0000000000000080,0x00000000000000ff] 0x000003e7 0x00000000000003e710545{ 2, 99u, 99llu}, // 25 57 [0x0000000000000040,0x000000000000007f] 0x00000063 0x0000000000000063 *10546{ 2, 99u, 99llu}, // 26 58 [0x0000000000000020,0x000000000000003f] 0x00000063 0x000000000000006310547{ 2, 99u, 99llu}, // 27 59 [0x0000000000000010,0x000000000000001f] 0x00000063 0x000000000000006310548{ 1, 9u, 9llu}, // 28 60 [0x0000000000000008,0x000000000000000f] 0x00000009 0x0000000000000009 *10549{ 1, 9u, 9llu}, // 29 61 [0x0000000000000004,0x0000000000000007] 0x00000009 0x000000000000000910550{ 1, 9u, 9llu}, // 30 62 [0x0000000000000002,0x0000000000000003] 0x00000009 0x000000000000000910551{ 1, 9u, 9llu}, // 31 63 [0x0000000000000001,0x0000000000000001] 0x00000009 0x000000000000000910552{ 1, 9u, 9llu}, // 32 64 [0x0000000000000000,0xffffffffffffffff] 0x00000009 0x0000000000000009 *10553};10554#endif1055510556static TR::SymbolReference *10557getSymrefDigit10(TR::Compilation *comp, TR::Node *trNode)10558{10559if (comp->target().cpu.isZ())10560{10561return comp->getSymRefTab()->createKnownStaticDataSymbolRef((void *)digit10Table, TR::Address);10562}1056310564return NULL;10565}1056610567static TR::Node *10568createNodeLoadDigit10Table(TR::Compilation *comp, TR::Node *trNode)10569{10570TR_ASSERT(trNode->getDataType() == TR::Int32 || trNode->getDataType() == TR::Int64, "Unexpected datatype for trNode for CountDigits10.");10571TR::SymbolReference *symRef = getSymrefDigit10(comp, trNode);10572return symRef ? TR::Node::createWithSymRef(trNode, TR::loadaddr, 0, symRef) :10573TR::Node::create(trNode, TR::aconst, 0, 0);10574}1057510576//*****************************************************************************************10577// IL code generation for counting digits10578// The IL node TR_countdigit10 will find the number of digits by using a binary search, which10579// uses the above table "digit10Table".10580//10581// Input: ImportantNode(0) - if node10582//*****************************************************************************************10583bool10584CISCTransform2CountDecimalDigit(TR_CISCTransformer *trans)10585{10586TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");10587const bool disptrace = DISPTRACE(trans);10588TR::Node *trNode;10589TR::TreeTop *trTreeTop;10590TR::Block *block;10591TR_CISCGraph *P = trans->getP();10592List<TR_CISCNode> *P2T = trans->getP2T();10593TR::Compilation *comp = trans->comp();10594bool ctrl = trans->isGenerateI2L();1059510596TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");1059710598TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");10599if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;1060010601trans->findFirstNode(&trTreeTop, &trNode, &block);10602if (!block) return false; // cannot find1060310604if (isLoopPreheaderLastBlockInMethod(comp, block))10605{10606traceMsg(comp, "Bailing CISCTransform2CountDecimalDigit due to null TT - might be a preheader in last block of method\n");10607return false;10608}1060910610TR::Block *target = trans->analyzeSuccessorBlock();10611// Currently, it allows only a single successor.10612if (!target) return false;10613TR_CISCNode *ifcmp = trans->getP2TInLoopIfSingle(P->getImportantNode(0));10614TR_ASSERT(ifcmp, "error!");10615TR_CISCNode *constNode = ifcmp->getChild(1);10616if (!constNode->getIlOpCode().isLoadConst())10617{10618if (disptrace) traceMsg(comp, "%p is not isLoadConst().\n",constNode);10619return false;10620}1062110622TR::Node *countVarRepNode, *inputVarRepNode;10623getP2TTrRepNodes(trans, &countVarRepNode, &inputVarRepNode);10624TR::SymbolReference * countVarSymRef = countVarRepNode->getSymbolReference();10625TR::SymbolReference * inputVarSymRef = inputVarRepNode->getSymbolReference();10626TR::Node *countVar, *inputVar;10627TR::Node *workNode, *digitNode;10628countVar = createLoad(countVarRepNode);10629inputVar = createLoad(inputVarRepNode);1063010631TR_ASSERT(inputVar->getDataType() == TR::Int32 || inputVar->getDataType() == TR::Int64, "error");106321063310634// The countDigitsEvaluator does not handle long (register pairs) on 31-bit.10635if (inputVar->getDataType() == TR::Int64 && (!comp->target().cpu.isPower() && comp->target().is32Bit()))10636{10637return false;10638}1063910640TR::Node *versionNode = 0;10641int modificationResult = 0;10642switch(ifcmp->getOpcode())10643{10644case TR::ificmpeq:10645case TR::iflcmpeq:10646if (constNode->getOtherInfo() != 0)10647{10648if (disptrace) traceMsg(comp, "The exit-if is TR::if*cmpeq but the constant value is %d.\n",constNode->getOtherInfo());10649return false;10650}10651break;10652case TR::ificmplt:10653case TR::iflcmplt:10654if (constNode->getOtherInfo() != 10)10655{10656if (disptrace) traceMsg(comp, "The exit-if is TR::if*cmplt but the constant value is %d.\n",constNode->getOtherInfo());10657return false;10658}10659versionNode = TR::Node::createif((TR::ILOpCodes)ifcmp->getOpcode(), inputVar->duplicateTree(),10660constNode->getHeadOfTrNode()->duplicateTree());10661modificationResult = -1;10662break;10663default:10664if (disptrace) traceMsg(comp, "The exit-if %p is not as expected. We may be able to implement this case.\n",ifcmp);10665return false;10666}1066710668//workNode = createNodeLoadDigit10Table(comp, trNode);10669workNode = createNodeLoadDigit10Table(comp, inputVarRepNode);1067010671digitNode = TR::Node::create(trNode, TR::countDigits, 2);10672digitNode->setAndIncChild(0, inputVar);10673digitNode->setAndIncChild(1, workNode);10674if (modificationResult != 0)10675{10676digitNode = createOP2(comp, TR::isub, digitNode,10677TR::Node::create(digitNode, TR::iconst, 0, -modificationResult));10678}1067910680TR::Node *top = TR::Node::createStore(countVarSymRef,10681createOP2(comp, TR::iadd, countVar, digitNode));1068210683// Insert nodes and maintain the CFG10684if (versionNode)10685{10686List<TR::Node> guardList(comp->trMemory());10687guardList.add(versionNode);10688block = trans->modifyBlockByVersioningCheck(block, trTreeTop, &guardList);10689}10690else10691{10692block = trans->modifyBlockByVersioningCheck(block, trTreeTop, (List<TR::Node>*)0);10693}1069410695block = trans->insertBeforeNodes(block);10696block->append(TR::TreeTop::create(comp, top));10697trans->insertAfterNodes(block);1069810699trans->setSuccessorEdge(block, target);10700return true;10701}1070210703/****************************************************************************************10704Corresponding Java-like Pseudo Program10705int v1;10706long v2;10707while(true){10708v1++;10709v2 = v2 / 10;10710if (v2 == 0) break;10711}1071210713Note 1: This idiom already supported both division and multiplication versions.10714****************************************************************************************/10715TR_PCISCGraph *10716makeCountDecimalDigitLongGraph(TR::Compilation *c, int32_t ctrl, bool isDiv2Mul)10717{10718TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "CountDecimalDigitLong", 0, 16);10719TR_PCISCNode *ent, *ncmp, *v2, *cexit, *n9, *ndiv;10720if (isDiv2Mul)10721{10722/************************************ opc id dagId #cfg #child other/pred/children */10723TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(v1); // count10724v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(v2); // long var10725cexit=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0); tgt->addNode(cexit); // all constant10726TR_PCISCNode *c2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 5, 0, 0, 2); tgt->addNode(c2); // iconst 210727TR_PCISCNode *c63 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 4, 0, 0, 63); tgt->addNode(c63);// iconst 63 (optional)10728TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);10729ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);10730TR_PCISCNode *n1 = createIdiomDecVarInLoop(tgt, ctrl, 1, ent, v1, cm1);10731TR_PCISCNode *mag = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lconst , TR::Int64, tgt->incNumNodes(), 1, 1, 0, n1); tgt->addNode(mag); // lconst 737869762948382064710732TR_PCISCNode *nmul= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lmulh , TR::Int64, tgt->incNumNodes(), 1, 1, 2, mag, v2, mag); tgt->addNode(nmul);10733TR_PCISCNode *nshr= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lshr , TR::Int64, tgt->incNumNodes(), 1, 1, 2, nmul, nmul, c2); tgt->addNode(nshr);10734TR_PCISCNode *ushr= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lushr , TR::Int64, tgt->incNumNodes(), 1, 1, 2, nshr, v2, c63); tgt->addNode(ushr); // optional10735ndiv= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ladd , TR::Int64, tgt->incNumNodes(), 1, 1, 2, ushr, nshr, ushr); tgt->addNode(ndiv); // optional10736c63->setIsOptionalNode();10737ushr->setIsOptionalNode();10738ushr->setSkipParentsCheck();10739ndiv->setIsOptionalNode();10740tgt->setNumDagIds(9);10741tgt->setAspects(isub|mul|shr);10742}10743else10744{10745/************************************ opc id dagId #cfg #child other/pred/children */10746TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(v1); // count10747v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 1); tgt->addNode(v2); // long var10748cexit=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0); tgt->addNode(cexit); // all constant10749TR_PCISCNode *c10 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lconst , TR::Int64, tgt->incNumNodes(), 4, 0, 0, 10); tgt->addNode(c10);// lconst 1010750TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);10751ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);10752TR_PCISCNode *n1 = createIdiomDecVarInLoop(tgt, ctrl, 1, ent, v1, cm1);10753ndiv= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ldiv , TR::Int64, tgt->incNumNodes(), 1, 1, 2, n1, v2, c10); tgt->addNode(ndiv);10754tgt->setNumDagIds(8);10755tgt->setAspects(isub|division);10756}10757TR_PCISCNode *nst = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lstore , TR::Int64, tgt->incNumNodes(), 1, 1, 2, ndiv, ndiv, v2); tgt->addNode(nst);10758ncmp= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, nst, v2, cexit); tgt->addNode(ncmp);10759n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);1076010761ncmp->setSuccs(ent->getSucc(0), n9);1076210763tgt->setEntryNode(ent);10764tgt->setExitNode(n9);10765tgt->createInternalData(1);1076610767tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);10768tgt->setImportantNode(0, ncmp);10769tgt->setTransformer(CISCTransform2CountDecimalDigit);10770tgt->setInhibitAfterVersioning();10771tgt->setNoAspects(call|bndchk, existAccess, existAccess);10772tgt->setMinCounts(1, 0, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount10773tgt->setHotness(warm, false);10774return tgt;10775}1077610777/****************************************************************************************10778Corresponding Java-like Pseudo Program (Division version)10779int v1, v2;10780while(true){10781v1++;10782v2 = v2 / 10;10783if (v2 == 0) break;10784}1078510786Note 1: This idiom already supported both division and multiplication versions.10787****************************************************************************************/10788// Division is converted to multiply10789TR_PCISCGraph *10790makeCountDecimalDigitIntGraph(TR::Compilation *c, int32_t ctrl, bool isDiv2Mul)10791{10792TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "CountDecimalDigitInt", 0, 16);10793TR_PCISCNode *ent, *ncmp, *v2, *cexit, *n9, *ndiv;10794if (isDiv2Mul)10795{10796/************************************ opc id dagId #cfg #child other/pred/children */10797TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(v1); // count10798v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 1); tgt->addNode(v2); // int var10799cexit=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0); tgt->addNode(cexit); // all constant10800TR_PCISCNode *c2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 6, 0, 0, 2); tgt->addNode(c2); // iconst 210801TR_PCISCNode *c31 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 5, 0, 0, 31); tgt->addNode(c31); // iconst 31 (optional)10802TR_PCISCNode *mag = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 4, 0, 0, 1717986919); tgt->addNode(mag);// iconst 171798691910803TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);10804ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);10805TR_PCISCNode *n1 = createIdiomDecVarInLoop(tgt, ctrl, 1, ent, v1, cm1);10806ndiv= createIdiomIDiv10InLoop(tgt, ctrl, true, 1, n1, v2, mag, c2, c31);10807tgt->setAspects(isub|mul|shr);10808tgt->setNumDagIds(10);10809}10810else10811{10812/************************************ opc id dagId #cfg #child other/pred/children */10813TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(v1); // count10814v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 1); tgt->addNode(v2); // int var10815cexit=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0); tgt->addNode(cexit); // iconst 010816TR_PCISCNode *mag = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 4, 0, 0, 10); tgt->addNode(mag); // iconst 1010817TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);10818ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);10819TR_PCISCNode *n1 = createIdiomDecVarInLoop(tgt, ctrl, 1, ent, v1, cm1);10820ndiv= createIdiomIDiv10InLoop(tgt, ctrl, false, 1, n1, v2, mag, NULL, NULL);10821tgt->setAspects(isub|division);10822tgt->setNumDagIds(8);10823}10824TR_PCISCNode * nst = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore , TR::Int32, tgt->incNumNodes(), 1, 1, 2, ndiv, ndiv, v2); tgt->addNode(nst);10825ncmp= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, nst, v2, cexit); tgt->addNode(ncmp);10826n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);1082710828ncmp->setSuccs(ent->getSucc(0), n9);10829tgt->setEntryNode(ent);10830tgt->setExitNode(n9);10831tgt->createInternalData(1);1083210833tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);10834tgt->setImportantNode(0, ncmp);10835tgt->setTransformer(CISCTransform2CountDecimalDigit);10836tgt->setInhibitAfterVersioning();10837tgt->setNoAspects(call|bndchk, existAccess, existAccess);10838tgt->setMinCounts(1, 0, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount10839tgt->setHotness(warm, false);10840return tgt;10841}10842108431084410845//////////////////////////////////////////////////////////////////////////10846//////////////////////////////////////////////////////////////////////////10847//////////////////////////////////////////////////////////////////////////10848// Convert long to string10849/* Example10850int v2, v3;10851while(true) {10852int num = v3 / 10;10853int ch = v3 - num * 10;10854v1[v2] = (char) ('0' - ch);10855v2--;10856v3 = num;10857} while (v3 != 0);10858*/1085910860static TR::SymbolReference *10861getSymrefLocalArray(TR::Compilation *comp, int size)10862{10863if (comp->target().cpu.isZ())10864{10865TR::SymbolReference *workSymRef = comp->getSymRefTab()->createLocalPrimArray(size, comp->getMethodSymbol(), 8); // work area for CVD(G)10866workSymRef->setStackAllocatedArrayAccess();10867return workSymRef;10868}10869return NULL;10870}1087110872static TR::Node *10873createNodeLoadLocalArray(TR::Compilation *comp, TR::Node *trNode, int size)10874{10875TR::SymbolReference *symRef = getSymrefLocalArray(comp, size);10876return symRef ? TR::Node::createWithSymRef(trNode, TR::loadaddr, 0, symRef) :10877TR::Node::create(trNode, TR::aconst, 0, 0);10878}1087910880//*****************************************************************************************10881// IL code generation for converting integer to string (using CVD and UNPKU)10882// Input: ImportantNode(0) - istore node for index (V2)10883// ImportantNode(1) - (i/l)store node for input value (V3)10884// ImportantNode(2) - array store node10885// ImportantNode(3) - null check (optional)10886//*****************************************************************************************10887bool10888CISCTransform2LongToStringDigit(TR_CISCTransformer *trans)10889{10890TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");10891TR::Node *trNode;10892TR::TreeTop *trTreeTop;10893TR::Block *block;10894TR_CISCGraph *P = trans->getP();10895List<TR_CISCNode> *P2T = trans->getP2T();10896TR::Compilation *comp = trans->comp();10897bool ctrl = trans->isGenerateI2L();1089810899TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");1090010901TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");10902if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;1090310904trans->findFirstNode(&trTreeTop, &trNode, &block);10905if (!block) return false; // cannot find1090610907if (isLoopPreheaderLastBlockInMethod(comp, block))10908{10909traceMsg(comp, "Bailing CISCTransform2LongToStringDigit due to null TT - might be a preheader in last block of method\n");10910return false;10911}1091210913TR::Block *target = trans->analyzeSuccessorBlock();10914// Currently, it allows only a single successor.10915if (!target) return false;10916TR_CISCNode *arrayStoreCISC = trans->getP2TInLoopIfSingle(P->getImportantNode(2));10917if (!arrayStoreCISC) return false;10918TR::Node *arrayStoreAddress = arrayStoreCISC->getHeadOfTrNode()->getChild(0)->duplicateTree();1091910920TR::Node *baseVarRepNode, *countVarRepNode, *inputVarRepNode;10921getP2TTrRepNodes(trans, &baseVarRepNode, &countVarRepNode, &inputVarRepNode);10922TR::SymbolReference * countVarSymRef = countVarRepNode->getSymbolReference();10923TR::SymbolReference * inputVarSymRef = inputVarRepNode->getSymbolReference();10924TR::Node *countVar, *inputVar;10925countVar = createLoad(countVarRepNode);10926inputVar = createLoad(inputVarRepNode);10927TR::Node *replaceParent = NULL;10928int childNum = -1;10929if (!trans->searchNodeInTrees(arrayStoreAddress, countVar, &replaceParent, &childNum))10930return false;1093110932TR_ASSERT(inputVar->getDataType() == TR::Int32 || inputVar->getDataType() == TR::Int64, "error");1093310934//10935// obtain a CISCNode of each store10936TR_CISCNode *storeV2 = trans->getP2TRepInLoop(P->getImportantNode(0));10937TR_CISCNode *storeV3 = trans->getP2TRepInLoop(P->getImportantNode(1));10938TR_ASSERT(storeV2 != NULL && storeV3 != NULL, "error");10939TR::Node *nullchk = 0;10940if (P->getImportantNode(3))10941{10942TR_CISCNode *nullchkCISC = trans->getP2TInLoopIfSingle(P->getImportantNode(3));10943if (nullchkCISC) nullchk = nullchkCISC->getHeadOfTrNode()->duplicateTree();10944}1094510946//10947// checking a set of all uses for each index10948TR_ASSERT(storeV2->getDagID() == storeV3->getDagID(), "error");10949#if 110950TR::Node *digit = TR::Node::create(TR::countDigits, 2,10951inputVar,10952createNodeLoadDigit10Table(comp, inputVarRepNode));10953#else10954TR::Node *digit = TR::Node::create(TR::countDigits, 2,10955inputVar,10956createNodeLoadDigit10Table(comp, trNode));10957#endif10958TR::Node *resultV2 = createOP2(comp, TR::isub, countVar, digit);10959replaceParent->setAndIncChild(childNum, createOP2(comp, TR::isub, resultV2,10960TR::Node::create(trNode, TR::iconst, 0, -1)));10961TR::Node *storeResultV3 = 0;10962if (!storeV3->checkDagIdInChains())10963{10964TR::DataType dataType = storeV3->getDataType();10965TR::Node * constNode;10966if (dataType == TR::Int32)10967{10968constNode = TR::Node::create(trNode, TR::iconst, 0, 0);10969}10970else10971{10972constNode = TR::Node::create(trNode, TR::lconst, 0, 0);10973constNode->setLongInt(0);10974}10975storeResultV3 = TR::Node::createStore(inputVarSymRef, constNode);10976}1097710978TR::Node *l2s = TR::Node::create(trNode, TR::long2String, 4);10979l2s->setSymbolReference(comp->getSymRefTab()->findOrCreatelong2StringSymbol());10980l2s->setAndIncChild(0, inputVar);10981l2s->setAndIncChild(1, arrayStoreAddress);10982l2s->setAndIncChild(2, digit);10983l2s->setAndIncChild(3, createNodeLoadLocalArray(comp, trNode, 16));10984TR::Node *storeResultV2 = TR::Node::createStore(countVarSymRef, resultV2);1098510986// Insert nodes and maintain the CFG10987TR::TreeTop *last;10988last = trans->removeAllNodes(trTreeTop, block->getExit());10989last->join(block->getExit());10990block = trans->insertBeforeNodes(block);10991if (nullchk) block->append(TR::TreeTop::create(comp, nullchk));10992block->append(TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, l2s)));10993block->append(TR::TreeTop::create(comp, storeResultV2));10994if (storeResultV3) block->append(TR::TreeTop::create(comp, storeResultV3));1099510996trans->insertAfterNodes(block);1099710998trans->setSuccessorEdge(block, target);10999return true;11000}110011100211003TR_PCISCGraph *11004makeLongToStringGraph(TR::Compilation *c, int32_t ctrl)11005{11006TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "LongToString", 0, 16);11007/************************************ opc id dagId #cfg #child other/pred/children */11008TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 16, 0, 0, 0); tgt->addNode(v1); // array base11009TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(v2); // count11010TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1); tgt->addNode(v3); // long var11011TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 2); tgt->addNode(v4); // stored value11012TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 12, 0, 0); tgt->addNode(vorc); // length11013TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(cmah); // array header11014TR_PCISCNode *cl0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lconst , TR::Int64, tgt->incNumNodes(), 10, 0, 0, 0); tgt->addNode(cl0);// lconst 011015TR_PCISCNode *cl10= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lconst , TR::Int64, tgt->incNumNodes(), 9, 0, 0, 10); tgt->addNode(cl10);//lconst 1011016TR_PCISCNode *c0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(c0); // iconst 011017TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 7, 2); // element size11018TR_PCISCNode *c9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 6, 0, 0, 9); tgt->addNode(c9); // iconst 911019TR_PCISCNode *cm87= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 5, 0, 0, -87); tgt->addNode(cm87);//iconst -8711020TR_PCISCNode *cm48= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 4, 0, 0, -48); tgt->addNode(cm48);//iconst -4811021TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);11022TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);11023TR_PCISCNode *nrem= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lrem , TR::Int64, tgt->incNumNodes(), 1, 1, 2, ent, v3, cl10); tgt->addNode(nrem);11024TR_PCISCNode *nl2i= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::l2i , TR::Int32, tgt->incNumNodes(), 1, 1, 1, nrem, nrem); tgt->addNode(nl2i);11025TR_PCISCNode *nneg= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub , TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl2i, c0, nl2i); tgt->addNode(nneg);11026TR_PCISCNode *nst4= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore , TR::Int32, tgt->incNumNodes(), 1, 1, 2, nneg, nneg, v4); tgt->addNode(nst4);11027TR_PCISCNode *ifge= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpgt, TR::NoType, tgt->incNumNodes(), 1, 2, 2, nst4, v4, c9); tgt->addNode(ifge);11028TR_PCISCNode *ad48= createIdiomDecVarInLoop(tgt, ctrl, 1, ifge, v4, cm48);11029TR_PCISCNode *ad87= createIdiomDecVarInLoop(tgt, ctrl, 1, ad48, v4, cm87);11030TR_PCISCNode *adm1= createIdiomIncVarInLoop(tgt, ctrl, 1, ad87, v2, cm1);11031TR_PCISCNode *nck = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::NULLCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 1, adm1, v1); tgt->addNode(nck); // optional11032TR_PCISCNode *bck = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nck, vorc, v2); tgt->addNode(bck);11033TR_PCISCNode *ncst= createIdiomCharArrayStoreInLoop(tgt, ctrl, 1, bck, v1, v2, cmah, c2, v4);11034TR_PCISCNode *ndiv= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ldiv , TR::Int64, tgt->incNumNodes(), 1, 1, 2, ncst, v3, cl10); tgt->addNode(ndiv);11035TR_PCISCNode *nst3= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lstore , TR::Int64, tgt->incNumNodes(), 1, 1, 2, ndiv, ndiv, v3); tgt->addNode(nst3);11036TR_PCISCNode *ifeq= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iflcmpeq, TR::NoType, tgt->incNumNodes(), 1, 2, 2, nst3, v3, cl0); tgt->addNode(ifeq);11037TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);1103811039ifge->setSucc(1, ad87);11040ad48->setSucc(0, adm1);11041ifeq->setSuccs(ent->getSucc(0), n9);11042nck->setIsOptionalNode();1104311044tgt->setEntryNode(ent);11045tgt->setExitNode(n9);11046tgt->setImportantNodes(adm1, nst3, ncst, nck);11047tgt->setNumDagIds(17);11048tgt->createInternalData(1);1104911050tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);11051tgt->setTransformer(CISCTransform2LongToStringDigit);11052tgt->setAspects(isub|iadd|bndchk|division|reminder, 0, ILTypeProp::Size_2);11053tgt->setNoAspects(call, 0, 0);11054tgt->setMinCounts(2, 0, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount11055tgt->setHotness(warm, false);11056tgt->setInhibitAfterVersioning();11057return tgt;11058}110591106011061TR_PCISCGraph *11062makeIntToStringGraph(TR::Compilation *c, int32_t ctrl, bool isDiv2Mul)11063{11064TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "IntToString", 0, 16);11065TR_PCISCNode *ci2, *c2, *c10, *c31, *mag, *v1, *v2, *v3;11066uint32_t otherMask;11067/******************************************************** opc id dagId #cfg #child other/pred/children */11068v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(v1); // array base11069v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 0); tgt->addNode(v2); // count11070v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(v3); // long var11071c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(),12, 2); // element size11072c10 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 11, 0, 0, 10); tgt->addNode(c10);// iconst 1011073if (isDiv2Mul)11074{11075c31 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 10, 0, 0, 31); tgt->addNode(c31);// iconst 3111076if (ctrl & CISCUtilCtl_64Bit)11077{11078ci2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 9, 0, 0, 2); tgt->addNode(ci2);// iconst 211079}11080else11081ci2 = c2;11082mag = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 8, 0, 0, 1717986919); tgt->addNode(mag);// iconst 171798691911083otherMask = shr;11084}11085else11086{11087ci2 = c31 = NULL;11088mag = c10;11089otherMask = division;11090}11091TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),7, 0, 0); tgt->addNode(vorc); // length11092TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 0); tgt->addNode(cmah); // array header11093TR_PCISCNode *c0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(c0); // iconst 011094TR_PCISCNode *c48 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 4, 0, 0, 48); tgt->addNode(c48);//iconst 4811095TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);11096TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);11097TR_PCISCNode *nck = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::NULLCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 1, ent, v1); tgt->addNode(nck); // optional11098TR_PCISCNode *bck = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nck, vorc, v2); tgt->addNode(bck);11099TR_PCISCNode *addr= createIdiomArrayAddressInLoop(tgt, ctrl, 1, bck, v1, v2, cmah, c2);11100TR_PCISCNode *ndiv= createIdiomIDiv10InLoop(tgt, ctrl, isDiv2Mul, 1, addr, v3, mag, ci2, c31);11101TR_PCISCNode *nmul= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, ndiv, ndiv, c10); tgt->addNode(nmul);11102TR_PCISCNode *nrem= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nmul, v3, nmul); tgt->addNode(nrem);11103TR_PCISCNode *nch = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nrem, c48, nrem); tgt->addNode(nch);11104TR_PCISCNode *ncst= createIdiomCharArrayStoreBodyInLoop(tgt, ctrl, 1, nch, addr, nch);11105TR_PCISCNode *nst3= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore , TR::Int32, tgt->incNumNodes(), 1, 1, 2, ncst, ndiv, v3); tgt->addNode(nst3);11106TR_PCISCNode *adm1= createIdiomIncVarInLoop(tgt, ctrl, 1, nst3, v2, cm1);11107TR_PCISCNode *ifeq= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpeq, TR::NoType, tgt->incNumNodes(), 1, 2, 2, adm1, v3, c0); tgt->addNode(ifeq);11108TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);1110911110ifeq->setSuccs(ent->getSucc(0), n9);11111nck->setIsOptionalNode();1111211113tgt->setEntryNode(ent);11114tgt->setExitNode(n9);11115tgt->setImportantNodes(adm1, nst3, ncst, nck);11116tgt->setNumDagIds(16);11117tgt->createInternalData(1);1111811119tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);11120tgt->setTransformer(CISCTransform2LongToStringDigit);11121tgt->setAspects(isub|iadd|bndchk|mul|otherMask, 0, ILTypeProp::Size_2);11122tgt->setNoAspects(call, 0, 0);11123tgt->setMinCounts(1, 0, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount11124tgt->setHotness(warm, false);11125tgt->setInhibitAfterVersioning();11126return tgt;11127}111281112911130