Path: blob/master/runtime/compiler/optimizer/J9EstimateCodeSize.cpp
6000 views
/*******************************************************************************1* Copyright (c) 2000, 2021 IBM Corp. and others2*3* This program and the accompanying materials are made available under4* the terms of the Eclipse Public License 2.0 which accompanies this5* distribution and is available at https://www.eclipse.org/legal/epl-2.0/6* or the Apache License, Version 2.0 which accompanies this distribution and7* is available at https://www.apache.org/licenses/LICENSE-2.0.8*9* This Source Code may also be made available under the following10* Secondary Licenses when the conditions for such availability set11* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU12* General Public License, version 2 with the GNU Classpath13* Exception [1] and GNU General Public License, version 2 with the14* OpenJDK Assembly Exception [2].15*16* [1] https://www.gnu.org/software/classpath/license.html17* [2] http://openjdk.java.net/legal/assembly-exception.html18*19* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception20*******************************************************************************/2122#include <algorithm>23#include "codegen/CodeGenerator.hpp"24#include "compile/InlineBlock.hpp"25#include "compile/Method.hpp"26#include "compile/ResolvedMethod.hpp"27#if defined(J9VM_OPT_JITSERVER)28#include "env/j9methodServer.hpp"29#endif /* defined(J9VM_OPT_JITSERVER) */30#include "env/VMJ9.h"31#include "il/Node.hpp"32#include "il/Node_inlines.hpp"33#include "il/ParameterSymbol.hpp"34#include "il/TreeTop.hpp"35#include "il/TreeTop_inlines.hpp"36#include "optimizer/PreExistence.hpp"37#include "optimizer/J9CallGraph.hpp"38#include "optimizer/J9EstimateCodeSize.hpp"39#include "optimizer/InterpreterEmulator.hpp"40#include "ras/LogTracer.hpp"41#include "runtime/J9Profiler.hpp"4243// Empirically determined value44const float TR_J9EstimateCodeSize::STRING_COMPRESSION_ADJUSTMENT_FACTOR = 0.75f;4546// There was no analysis done to determine this factor. It was chosen by intuition.47const float TR_J9EstimateCodeSize::METHOD_INVOKE_ADJUSTMENT_FACTOR = 0.20f;484950/*51DEFINEs are ugly in general, but putting52if (tracer)53heuristicTrace(...)54everywhere in this class seems to be a much worse idea.55Unfortunately, C++98 doesn't have a good way to forward varargs56except for using a DEFINE57*/5859#define heuristicTraceIfTracerIsNotNull(r, ...) \60if (r) { \61if ((r)->heuristicLevel()) { (r)->alwaysTraceM(__VA_ARGS__); } \62}63class NeedsPeekingHeuristic64{65public:6667static const int default_distance = 25;68static const int NUM_LOADS = 4;6970NeedsPeekingHeuristic(TR_CallTarget* calltarget, TR_J9ByteCodeIterator& bci, TR::ResolvedMethodSymbol* methodSymbol, TR::Compilation* comp, int d = default_distance) :71_hasArgumentsInfo(false),72_size(0),73_bci(bci),74_distance(d),75_numOfArgs(0),76_needsPeeking(false),77_tracer(0)78{79TR_PrexArgInfo* argInfo = calltarget->_ecsPrexArgInfo;80//no argInfo available for this caller81if (!argInfo)82return;8384int i = 0;85int32_t numParms = methodSymbol->getParameterList().getSize();86_numOfArgs = numParms;87ListIterator<TR::ParameterSymbol> parmIt(&methodSymbol->getParameterList());88for (TR::ParameterSymbol *p = parmIt.getFirst(); p; p = parmIt.getNext(), i++)89{90int32_t len;91const char *sig = p->getTypeSignature(len);92if (i >= argInfo->getNumArgs() || //not enough slots in argInfo93(*sig != 'L' && *sig != 'Q') || //primitive arg94!argInfo->get(i) || //no arg at the i-th slot95!argInfo->get(i)->getClass() //no classInfo at the i-th slot96)97{98continue;99}100101TR_OpaqueClassBlock *clazz = comp->fej9()->getClassFromSignature(sig, len, methodSymbol->getResolvedMethod());102if (!clazz)103{104continue;105}106107TR_OpaqueClassBlock* argClass = argInfo->get(i)->getClass();108//findCallSiteTarget and validateAndPropagateArgsFromCalleeSymbol109//should take care of incompatible receivers110//this assertion only checks if the receiver is of the right type111//there's no harm in propagating other incompatible arguments112//as soon as one of those becomes a receiver this very same assertion113//should fire114TR_ASSERT(comp->fej9()->isInstanceOf(argClass, clazz, true, true, true) == TR_yes || i != 0 || !calltarget->_myCallSite->_isIndirectCall, "Incompatible receiver should have been handled by findCallSiteTarget");115116// even the arg type propagated from the caller might be not more specific117// than the type got from callee signature, we should still try to118// do peeking. if we don't do peeking here, we will lose the chance to propagate119// the type info to the callsites of this calltarget.120static const bool keepBogusPeekingCondition = feGetEnv("TR_DisableBogusPeekingCondition") ? false: true;121if ( !keepBogusPeekingCondition || clazz != argClass ) //if two classes aren't equal it follows that argClass is more specific122//argsClass can either be equal to or a subclass of clazz123//see validateAndPropagateArgsFromCalleeSymbol124{125_hasArgumentsInfo = true;126_argInfo = argInfo;127}128129/*130if (comp->fej9()->isInstanceOf (argClass, clazz, true, true, true) == TR_yes)131{132if (clazz != argClass)133_hasArgumentsInfo = true;134}135else136{137_hasArgumentsInfo = false;138return; // _hasArgumentsInfo will be equal to false and no propagation is going to happen139// because the incoming type information is not compatible140}141*/142}143};144145void setTracer(TR_InlinerTracer *trc)146{147_tracer = trc;148heuristicTraceIfTracerIsNotNull(_tracer, "NeedsPeekingHeuristic is initialized with the following values: _hasArgumentsInfo = %d, NUM_LOADS = %d, _distance =%d, _needsPeeking = %d", _hasArgumentsInfo, NUM_LOADS, _distance, _needsPeeking);149}150151void checkIfThereIsAParmLoadWithinDistance()152{153for (int i = 0; i < _size; i++)154{155if (_bci.bcIndex() - _loadIndices[i] <= _distance)156{157_needsPeeking = true;158heuristicTraceIfTracerIsNotNull(_tracer, "there is a parm load at %d which is within %d of a call at %d", _loadIndices[i], _distance, _bci.bcIndex());159}160}161};162163void processByteCode()164{165if (!_hasArgumentsInfo)166return;167TR_J9ByteCode bc = _bci.current();168int slotIndex = -1;169switch (bc)170{171case J9BCaload0:172slotIndex = 0;173break;174case J9BCaload1:175slotIndex = 1;176break;177case J9BCaload2:178slotIndex = 2;179break;180case J9BCaload3:181slotIndex = 3;182break;183case J9BCaload:184slotIndex = _bci.nextByte();185TR_ASSERT(slotIndex >= 0 , "a slot shouldn't be negative");186break;187case J9BCaloadw:188slotIndex = _bci.next2Bytes();189TR_ASSERT(slotIndex >= 0 , "a slot shouldn't be negative");190break;191case J9BCinvokevirtual:192case J9BCinvokespecial:193case J9BCinvokestatic:194case J9BCinvokeinterface:195case J9BCinvokedynamic:196case J9BCinvokehandle:197case J9BCinvokehandlegeneric:198checkIfThereIsAParmLoadWithinDistance ();199default :200break;201202}203204if (slotIndex >=0)205{206processParameterLoad(slotIndex);207}208209};210211212void processParameterLoad (int slotIndex)213{214//This heuristic simply checks if we indeed hit a parameter load (as opposed to an auto)215//and if we have an argInfo for this slot we would want to propagate216//Note, _hasArgumentsInfo is checked in processByteCode217//we should not even reach this code unless we have some PrexInfo218if (slotIndex < _numOfArgs && _argInfo->get(slotIndex))219{220heuristicTraceIfTracerIsNotNull(_tracer,"came across of a load of slot %d at %d", slotIndex, _bci.bcIndex());221_loadIndices[_size] = _bci.bcIndex();222_size = (_size + 1) % NUM_LOADS;223}224}225bool doPeeking () { return _needsPeeking; };226227protected:228int32_t _loadIndices [NUM_LOADS];229int _size;230int _numOfArgs;231int _distance;232TR_J9ByteCodeIterator& _bci;233bool _hasArgumentsInfo;234TR_PrexArgInfo * _argInfo;235bool _needsPeeking;236TR_InlinerTracer * _tracer;237238};239#undef heuristicTraceIfTracerIsNotNull240241void242TR_J9EstimateCodeSize::setupNode(TR::Node *node, uint32_t bcIndex,243TR_ResolvedMethod *feMethod, TR::Compilation *comp)244{245node->getByteCodeInfo().setDoNotProfile(0);246node->setByteCodeIndex(bcIndex);247node->setInlinedSiteIndex(-10);248node->setMethod(feMethod->getPersistentIdentifier());249}250251252TR::Block *253TR_J9EstimateCodeSize::getBlock(TR::Compilation *comp, TR::Block * * blocks,254TR_ResolvedMethod *feMethod, int32_t i, TR::CFG & cfg)255{256if (!blocks[i])257{258259TR::TreeTop *startTree = TR::TreeTop::create(comp, TR::Node::create(260NULL, TR::BBStart, 0));261TR::TreeTop *endTree = TR::TreeTop::create(comp, TR::Node::create(262NULL, TR::BBEnd, 0));263264startTree->join(endTree);265blocks[i] = TR::Block::createBlock(startTree, endTree, cfg);266267blocks[i]->setBlockBCIndex(i);268blocks[i]->setNumber(cfg.getNextNodeNumber());269270setupNode(startTree->getNode(), i, feMethod, comp);271setupNode(endTree->getNode(), i, feMethod, comp);272cfg.addNode(blocks[i]);273}274275return blocks[i];276}277278static TR::ILOpCodes convertBytecodeToIL (TR_J9ByteCode bc)279{280switch (bc)281{282case J9BCifeq: return TR::ificmpeq;283case J9BCifne: return TR::ificmpne;284case J9BCiflt: return TR::ificmplt;285case J9BCifge: return TR::ificmpge;286case J9BCifgt: return TR::ificmpgt;287case J9BCifle: return TR::ificmple;288case J9BCifnull: return TR::ifacmpeq;289case J9BCifnonnull: return TR::ifacmpne;290case J9BCificmpeq: return TR::ificmpeq;291case J9BCificmpne: return TR::ificmpne;292case J9BCificmplt: return TR::ificmplt;293case J9BCificmpge: return TR::ificmpge;294case J9BCificmpgt: return TR::ificmpgt;295case J9BCificmple: return TR::ificmple;296case J9BCifacmpeq: return TR::ifacmpeq;297case J9BCifacmpne: return TR::ifacmpne;298case J9BCtableswitch: return TR::table;299case J9BClookupswitch: return TR::lookup;300case J9BCgoto:301case J9BCgotow: return TR::Goto;302case J9BCReturnC: /* fall-through */303case J9BCReturnS: /* fall-through */304case J9BCReturnB: /* fall-through */305case J9BCReturnZ: /* fall-through */306case J9BCgenericReturn: return TR::Return;307case J9BCathrow: return TR::athrow;308default:309TR_ASSERT(0,"Unsupported conversion for now.");310return TR::BadILOp;311}312return TR::BadILOp;313}314315void316TR_J9EstimateCodeSize::setupLastTreeTop(TR::Block *currentBlock, TR_J9ByteCode bc,317uint32_t bcIndex, TR::Block *destinationBlock, TR_ResolvedMethod *feMethod,318TR::Compilation *comp)319{320TR::Node *node = TR::Node::createOnStack(NULL, convertBytecodeToIL(bc), 0);321TR::TreeTop *tree = TR::TreeTop::create(comp, node);322setupNode(node, bcIndex, feMethod, comp);323if (node->getOpCode().isBranch())324node->setBranchDestination(destinationBlock->getEntry());325currentBlock->append(tree);326}327328329//Partial Inlining330bool331TR_J9EstimateCodeSize::isInExceptionRange(TR_ResolvedMethod * feMethod,332int32_t bcIndex)333{334int32_t numExceptionRanges = feMethod->numberOfExceptionHandlers();335336if (numExceptionRanges == 0)337return false;338339int32_t start, end, catchtype;340341for (int32_t i = 0; i < numExceptionRanges; i++)342{343feMethod->exceptionData(i, &start, &end, &catchtype);344if (bcIndex > start && bcIndex < end)345return true;346}347return false;348}349350351static bool cameFromArchetypeSpecimen(TR_ResolvedMethod *method)352{353if (!method)354return false; // end of recursion355else if (method->convertToMethod()->isArchetypeSpecimen())356return true; // Archetypes often call methods that are never called until the archetype is compiled357else358return cameFromArchetypeSpecimen(method->owningMethod());359}360361bool362TR_J9EstimateCodeSize::adjustEstimateForStringCompression(TR_ResolvedMethod* method, int32_t& value, float factor)363{364const uint16_t classNameLength = method->classNameLength();365366if ((classNameLength == 16 && !strncmp(method->classNameChars(), "java/lang/String", classNameLength)) ||367(classNameLength == 22 && !strncmp(method->classNameChars(), "java/lang/StringBuffer", classNameLength)) ||368(classNameLength == 23 && !strncmp(method->classNameChars(), "java/lang/StringBuilder", classNameLength)))369{370// A statistical analysis of the number of places certain methods got inlined yielded results which suggest that the371// following recognized methods incurr several percent worth of increase in compile-time at no benefit to throughput.372// As such we can save additional compile-time by not making adjustments to these methods.373374if (method->getRecognizedMethod() != TR::java_lang_String_regionMatches &&375method->getRecognizedMethod() != TR::java_lang_String_regionMatches_bool &&376method->getRecognizedMethod() != TR::java_lang_String_equals)377{378value *= factor;379380return true;381}382}383384return false;385}386387/** \details388* The `Method.invoke` API contains a call to `Reflect.getCallerClass()` API which when executed will trigger a389* stack walking operation. Performance wise this is quite expensive. The `Reflect.getCallerClass()` API returns390* the class of the method which called `Method.invoke`, so if we can promote inlining of `Method.invoke` we can391* eliminate the `Reflect.getCallerClass()` call with a simple load, thus avoiding the expensive stack walk.392*/393bool394TR_J9EstimateCodeSize::adjustEstimateForMethodInvoke(TR_ResolvedMethod* method, int32_t& value, float factor)395{396if (method->getRecognizedMethod() == TR::java_lang_reflect_Method_invoke)397{398static const char *factorOverrideChars = feGetEnv("TR_MethodInvokeInlinerFactor");399static const int32_t factorOverride = (factorOverrideChars != NULL) ? atoi(factorOverrideChars) : 0;400if (factorOverride != 0)401{402factor = 1.0f / static_cast<float>(factorOverride);403}404405value *= factor;406407return true;408}409410return false;411}412413bool414TR_J9EstimateCodeSize::estimateCodeSize(TR_CallTarget *calltarget, TR_CallStack *prevCallStack, bool recurseDown)415{416if (realEstimateCodeSize(calltarget, prevCallStack, recurseDown, comp()->trMemory()->currentStackRegion()))417{418if (_isLeaf && _realSize > 1)419{420heuristicTrace(tracer(),"Subtracting 1 from sizes because _isLeaf is true");421--_realSize;422--_optimisticSize;423}424return true;425}426427return false;428}429430TR::CFG&431TR_J9EstimateCodeSize::processBytecodeAndGenerateCFG(TR_CallTarget *calltarget, TR::Region &cfgRegion, TR_J9ByteCodeIterator& bci, NeedsPeekingHeuristic &nph, TR::Block** blocks, flags8_t * flags)432{433434char nameBuffer[1024];435const char *callerName = NULL;436if (tracer()->heuristicLevel())437callerName = comp()->fej9()->sampleSignature(438calltarget->_calleeMethod->getPersistentIdentifier(), nameBuffer,4391024, comp()->trMemory());440441int size = calltarget->_myCallSite->_isIndirectCall ? 5 : 0;442443int32_t maxIndex = bci.maxByteCodeIndex() + 5;444445int32_t *bcSizes = (int32_t *) comp()->trMemory()->allocateStackMemory(446maxIndex * sizeof(int32_t));447memset(bcSizes, 0, maxIndex * sizeof(int32_t));448449bool blockStart = true;450451bool thisOnStack = false;452bool hasThisCalls = false;453bool foundNewAllocation = false;454455bool unresolvedSymbolsAreCold = comp()->notYetRunMeansCold();456457TR_ByteCodeInfo newBCInfo;458newBCInfo.setDoNotProfile(0);459if (_mayHaveVirtualCallProfileInfo)460newBCInfo.setCallerIndex(comp()->getCurrentInlinedSiteIndex());461462// PHASE 1: Bytecode Iteration463464bool callExists = false;465size = calltarget->_myCallSite->_isIndirectCall ? 5 : 0;466TR_J9ByteCode bc = bci.first(), nextBC;467468#if defined(J9VM_OPT_JITSERVER)469if (comp()->isOutOfProcessCompilation())470{471// JITServer optimization:472// request this resolved method to create all of its callee resolved methods473// in a single query.474//475// If the method is unresolved, return NULL for 2 requests without asking the client,476// since they are called almost immediately after this request and are unlikely to477// become resolved.478//479// NOTE: first request occurs in the for loop over bytecodes, immediately after this request,480// second request occurs in InterpreterEmulator::findAndCreateCallsitesFromBytecodes481auto calleeMethod = static_cast<TR_ResolvedJ9JITServerMethod *>(calltarget->_calleeMethod);482calleeMethod->cacheResolvedMethodsCallees(2);483}484#endif /* defined(J9VM_OPT_JITSERVER) */485486for (; bc != J9BCunknown; bc = bci.next())487{488nph.processByteCode();489TR_ResolvedMethod * resolvedMethod;490int32_t cpIndex;491bool isVolatile, isPrivate, isUnresolvedInCP, resolved;492TR::DataType type = TR::NoType;493void * staticAddress;494uint32_t fieldOffset;495496newBCInfo.setByteCodeIndex(bci.bcIndex());497int32_t i = bci.bcIndex();498499if (blockStart) //&& calltarget->_calleeSymbol)500{501flags[i].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);502blockStart = false;503foundNewAllocation = false;504}505506if (bc == J9BCgenericReturn ||507bc == J9BCReturnC ||508bc == J9BCReturnS ||509bc == J9BCReturnB ||510bc == J9BCReturnZ)511{512if (!calltarget->_calleeMethod->isSynchronized())513size += 1;514else515size += bci.estimatedCodeSize();516}517else518size += bci.estimatedCodeSize();519520switch (bc)521{522case J9BCificmpeq:523case J9BCificmpne:524case J9BCificmplt:525case J9BCificmpge:526case J9BCificmpgt:527case J9BCificmple:528case J9BCifacmpeq:529case J9BCifacmpne:530case J9BCifnull:531case J9BCifnonnull:532case J9BCifeq:533case J9BCifne:534case J9BCiflt:535case J9BCifge:536case J9BCifgt:537case J9BCifle:538case J9BCgoto:539case J9BCgotow:540flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isBranch);541flags[i + bci.relativeBranch()].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);542blockStart = true;543break;544case J9BCReturnC:545case J9BCReturnS:546case J9BCReturnB:547case J9BCReturnZ:548case J9BCgenericReturn:549flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isBranch);550blockStart = true;551break;552case J9BCnew:553case J9BCnewarray:554case J9BCanewarray:555case J9BCmultianewarray:556if (calltarget->_calleeSymbol)557foundNewAllocation = true;558flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);559break;560case J9BCathrow:561_foundThrow = true;562flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isBranch);563blockStart = true;564if (!_aggressivelyInlineThrows)565flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);566flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);567break;568case J9BCtableswitch:569{570flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isBranch);571int32_t index = bci.defaultTargetIndex();572flags[i + bci.nextSwitchValue(index)].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);573int32_t low = bci.nextSwitchValue(index);574int32_t high = bci.nextSwitchValue(index) - low + 1;575for (int32_t j = 0; j < high; ++j)576flags[i + bci.nextSwitchValue(index)].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);577blockStart = true;578flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);579break;580}581case J9BClookupswitch:582{583flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isBranch);584int32_t index = bci.defaultTargetIndex();585flags[i + bci.nextSwitchValue(index)].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);586int32_t tableSize = bci.nextSwitchValue(index);587for (int32_t j = 0; j < tableSize; ++j)588{589index += 4; // match value590flags[i + bci.nextSwitchValue(index)].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);591}592blockStart = true;593flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);594break;595}596case J9BCinvokevirtual:597{598if (thisOnStack)599hasThisCalls = true;600cpIndex = bci.next2Bytes();601auto calleeMethod = (TR_ResolvedJ9Method*)calltarget->_calleeMethod;602resolvedMethod = calleeMethod->getResolvedPossiblyPrivateVirtualMethod(comp(), cpIndex, true, &isUnresolvedInCP);603604///if (!resolvedMethod || isUnresolvedInCP || resolvedMethod->isCold(comp(), true))605if ((isUnresolvedInCP && !resolvedMethod) || (resolvedMethod606&& resolvedMethod->isCold(comp(), true)))607{608609if(tracer()->heuristicLevel())610{611if(resolvedMethod)612{613heuristicTrace(tracer(), "Depth %d: Call at bc index %d is Cold. Not searching for targets. Signature %s",_recursionDepth,i,tracer()->traceSignature(resolvedMethod));614}615else616{617TR::Method *meth = comp()->fej9()->createMethod(comp()->trMemory(), calltarget->_calleeMethod->containingClass(), cpIndex);618heuristicTrace(tracer(), "Depth %d: Call at bc index %d is Cold. Not searching for targets. Signature %s",_recursionDepth,i,tracer()->traceSignature(meth));619}620}621if (unresolvedSymbolsAreCold)622flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);623_isLeaf = false;624}625}626627callExists = true;628flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);629break;630case J9BCinvokespecial:631case J9BCinvokespecialsplit:632{633if (thisOnStack)634hasThisCalls = true;635cpIndex = bci.next2Bytes();636resolvedMethod = calltarget->_calleeMethod->getResolvedSpecialMethod(comp(), (bc == J9BCinvokespecialsplit)?cpIndex |= J9_SPECIAL_SPLIT_TABLE_INDEX_FLAG:cpIndex, &isUnresolvedInCP);637bool isIndirectCall = false;638bool isInterface = false;639TR::Method *interfaceMethod = 0;640TR::TreeTop *callNodeTreeTop = 0;641TR::Node *parent = 0;642TR::Node *callNode = 0;643TR::ResolvedMethodSymbol *resolvedSymbol = 0;644if (!resolvedMethod || isUnresolvedInCP || resolvedMethod->isCold(comp(), false))645{646if(tracer()->heuristicLevel())647{648if(resolvedMethod)649{650heuristicTrace(tracer(), "Depth %d: Call at bc index %d is Cold. Not searching for targets. Signature %s",_recursionDepth,i,tracer()->traceSignature(resolvedMethod));651}652else653{654if (bc == J9BCinvokespecialsplit)655cpIndex |= J9_SPECIAL_SPLIT_TABLE_INDEX_FLAG;656TR::Method *meth = comp()->fej9()->createMethod(comp()->trMemory(), calltarget->_calleeMethod->containingClass(), cpIndex);657heuristicTrace(tracer(), "Depth %d: Call at bc index %d is Cold. Not searching for targets. Signature %s",_recursionDepth,i,tracer()->traceSignature(meth));658}659}660if (unresolvedSymbolsAreCold)661flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);662_isLeaf = false;663}664}665callExists = true;666flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);667break;668case J9BCinvokestatic:669case J9BCinvokestaticsplit:670{671cpIndex = bci.next2Bytes();672resolvedMethod = calltarget->_calleeMethod->getResolvedStaticMethod(comp(), (bc == J9BCinvokestaticsplit)?cpIndex |= J9_STATIC_SPLIT_TABLE_INDEX_FLAG:cpIndex, &isUnresolvedInCP);673bool isIndirectCall = false;674bool isInterface = false;675TR::Method *interfaceMethod = 0;676TR::TreeTop *callNodeTreeTop = 0;677TR::Node *parent = 0;678TR::Node *callNode = 0;679TR::ResolvedMethodSymbol *resolvedSymbol = 0;680if (!resolvedMethod || isUnresolvedInCP || resolvedMethod->isCold(comp(), false))681{682if (unresolvedSymbolsAreCold)683flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);684if(tracer()->heuristicLevel())685{686if(resolvedMethod)687heuristicTrace(tracer(), "Depth %d: Call at bc index %d is Cold. Not searching for targets. Signature %s",_recursionDepth,i,tracer()->traceSignature(resolvedMethod));688else689{690if (bc == J9BCinvokestaticsplit)691cpIndex |= J9_STATIC_SPLIT_TABLE_INDEX_FLAG;692TR::Method *meth = comp()->fej9()->createMethod(comp()->trMemory(), calltarget->_calleeMethod->containingClass(), cpIndex);693heuristicTrace(tracer(), "Depth %d: Call at bc index %d is Cold. Not searching for targets. Signature %s",_recursionDepth,i,tracer()->traceSignature(meth));694}695}696}697}698callExists = true;699flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);700break;701case J9BCinvokeinterface:702cpIndex = bci.next2Bytes();703flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);704break;705case J9BCgetfield:706resolved = calltarget->_calleeMethod->fieldAttributes(comp(), bci.next2Bytes(), &fieldOffset, &type, &isVolatile, 0, &isPrivate, false, &isUnresolvedInCP, false);707if (!resolved || isUnresolvedInCP)708{709if (unresolvedSymbolsAreCold)710flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);711if (!resolved)712_isLeaf = false;713}714if (isInExceptionRange(calltarget->_calleeMethod, i))715flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);716break;717case J9BCputfield:718resolved = calltarget->_calleeMethod->fieldAttributes(comp(), bci.next2Bytes(), &fieldOffset, &type, &isVolatile, 0, &isPrivate, true, &isUnresolvedInCP, false);719if (!resolved || isUnresolvedInCP)720{721if (unresolvedSymbolsAreCold)722flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);723if (!resolved)724_isLeaf = false;725}726flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);727break;728case J9BCgetstatic:729resolved = calltarget->_calleeMethod->staticAttributes(comp(), bci.next2Bytes(), &staticAddress, &type, &isVolatile, 0, &isPrivate, false, &isUnresolvedInCP, false);730if (!resolved || isUnresolvedInCP)731{732if (unresolvedSymbolsAreCold)733flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);734if (!resolved)735_isLeaf = false;736}737if (isInExceptionRange(calltarget->_calleeMethod, i))738flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);739break;740case J9BCputstatic:741resolved = calltarget->_calleeMethod->staticAttributes(comp(), bci.next2Bytes(), &staticAddress, &type, &isVolatile, 0, &isPrivate, true, &isUnresolvedInCP, false);742if (!resolved || isUnresolvedInCP)743{744if (unresolvedSymbolsAreCold)745flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);746if (!resolved)747_isLeaf = false;748}749flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);750break;751case J9BCaload0:752if (calltarget->_myCallSite->_isIndirectCall)753thisOnStack = true;754break;755case J9BCiastore:756case J9BClastore:757case J9BCfastore:758case J9BCdastore:759case J9BCaastore:760case J9BCbastore:761case J9BCcastore:762case J9BCsastore: //array stores can change the global state - hence unsanitizeable763flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);764break;765case J9BCiaload:766case J9BClaload:767case J9BCfaload:768case J9BCdaload:769case J9BCaaload:770case J9BCbaload:771case J9BCcaload:772case J9BCsaload:773case J9BCarraylength: //array accesses are ok as long as we don't catch exception774case J9BCidiv:775case J9BCldiv:776case J9BCfdiv:777case J9BCddiv:778case J9BCirem:779case J9BClrem:780case J9BCfrem:781case J9BCdrem:782case J9BCcheckcast:783case J9BCinstanceof:784case J9BCasyncCheck:785if (isInExceptionRange(calltarget->_calleeMethod, i))786flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);787break;788case J9BCinvokedynamic:789case J9BCinvokehandle:790case J9BCinvokehandlegeneric:791// TODO:JSR292: Use getResolvedHandleMethod792case J9BCmonitorenter:793case J9BCmonitorexit:794case J9BCunknown:795flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);796break;797default:798break;799}800801if (flags[i].testAny(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable))802debugTrace(tracer(),"BC at index %d is unsanitizeable.", i);803else if (flags[i].testAny(InterpreterEmulator::BytecodePropertyFlag::isCold))804debugTrace(tracer(),"BC at index %d is cold.", i);805else806debugTrace(tracer(),"BC iteration at index %d.", i); //only print this index if we are debugging807808bcSizes[i] = size;809}810811auto sizeBeforeAdjustment = size;812813if (adjustEstimateForStringCompression(calltarget->_calleeMethod, size, STRING_COMPRESSION_ADJUSTMENT_FACTOR))814{815heuristicTrace(tracer(), "*** Depth %d: Adjusting size for %s because of string compression from %d to %d", _recursionDepth, callerName, sizeBeforeAdjustment, size);816}817818if (adjustEstimateForMethodInvoke(calltarget->_calleeMethod, size, METHOD_INVOKE_ADJUSTMENT_FACTOR))819{820heuristicTrace(tracer(), "*** Depth %d: Adjusting size for %s because of java/lang/reflect/Method.invoke from %d to %d", _recursionDepth, callerName, sizeBeforeAdjustment, size);821}822823calltarget->_fullSize = size;824825if (calltarget->_calleeSymbol)826{827TR::DebugCounter::incStaticDebugCounter(comp(), TR::DebugCounter::debugCounterName(comp(), "inliner/%s/estimatedBytecodeSize/%d", calltarget->_calleeSymbol->signature(comp()->trMemory()), calltarget->_fullSize));828}829830/********* PHASE 2: Generate CFG **********/831832heuristicTrace(tracer(),"--- Done Iterating over Bytecodes in call to %s. size = %d _recursionDepth = %d _optimisticSize = %d _realSize = %d _sizeThreshold = %d",callerName, size, _recursionDepth, _optimisticSize, _realSize, _sizeThreshold);833834if (hasThisCalls && calltarget->_calleeSymbol)835calltarget->_calleeSymbol->setHasThisCalls(true);836837838TR_Array<TR_J9ByteCodeIterator::TryCatchInfo> tryCatchInfo(839comp()->trMemory(),840calltarget->_calleeMethod->numberOfExceptionHandlers(), true,841stackAlloc);842843int32_t i;844for (i = calltarget->_calleeMethod->numberOfExceptionHandlers() - 1; i845>= 0; --i)846{847int32_t start, end, type;848int32_t handler = calltarget->_calleeMethod->exceptionData(i, &start,849&end, &type);850851flags[start].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);852flags[end + 1].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);853flags[handler].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);854855tryCatchInfo[i].initialize((uint16_t) start, (uint16_t) end,856(uint16_t) handler, (uint32_t) type);857}858859calltarget->_cfg = new (cfgRegion) TR::CFG(comp(), calltarget->_calleeSymbol, cfgRegion);860TR::CFG &cfg = *(calltarget->_cfg);861cfg.setStartAndEnd(TR::Block::createBlock(862TR::TreeTop::create(comp(), TR::Node::create(NULL,863TR::BBStart, 0)), TR::TreeTop::create(comp(),864TR::Node::create(NULL, TR::BBEnd, 0)),865cfg), TR::Block::createBlock(866TR::TreeTop::create(comp(), TR::Node::create(NULL,867TR::BBStart, 0)), TR::TreeTop::create(comp(),868TR::Node::create(NULL, TR::BBEnd, 0)),869cfg));870871cfg.getStart()->asBlock()->getEntry()->join(872cfg.getStart()->asBlock()->getExit());873cfg.getEnd()->asBlock()->getEntry()->join(874cfg.getEnd()->asBlock()->getExit());875cfg.getStart()->setNumber(cfg.getNextNodeNumber());876cfg.allocateNodeNumber();877cfg.getEnd()->setNumber(cfg.getNextNodeNumber());878cfg.allocateNodeNumber();879880cfg.getEnd()->asBlock()->setIsEndBlock();881882TR::Block * currentBlock = cfg.getStart()->asBlock();883currentBlock->setBlockBCIndex(0);884885int32_t endNodeIndex = bci.maxByteCodeIndex() - 1;886if (endNodeIndex < 0)887{888debugTrace(tracer(), "MaxByteCodeIndex <= 0, setting BC index for end node to 0.");889endNodeIndex = 0;890}891892setupNode(cfg.getStart()->asBlock()->getEntry()->getNode(), 0,893calltarget->_calleeMethod, comp());894setupNode(cfg.getStart()->asBlock()->getExit()->getNode(), 0,895calltarget->_calleeMethod, comp());896setupNode(cfg.getEnd()->asBlock()->getEntry()->getNode(),897endNodeIndex, calltarget->_calleeMethod, comp());898setupNode(cfg.getEnd()->asBlock()->getExit()->getNode(),899endNodeIndex, calltarget->_calleeMethod, comp());900901902debugTrace(tracer(),"PECS: startblock %p %d endblock %p %d",cfg.getStart()->asBlock(), cfg.getStart()->getNumber(), cfg.getEnd()->asBlock(), cfg.getEnd()->getNumber());903904bool addFallThruEdge = true;905906debugTrace(tracer(),"PECS: iterating over bc indexes in CFG creation. maxIndex =%d", maxIndex);907int32_t blockStartSize = 0;908int32_t startIndex = 0;909for (TR_J9ByteCode bc = bci.first(); bc != J9BCunknown; bc = bci.next())910{911int32_t i = bci.bcIndex();912if (flags[i].testAny(InterpreterEmulator::BytecodePropertyFlag::bbStart))913{914debugTrace(tracer(),"Calling getBlock. blocks[%d] = %p", i, blocks[i]);915TR::Block * newBlock = getBlock(comp(), blocks,916calltarget->_calleeMethod, i, cfg);917918if (i != startIndex)919{920currentBlock->setBlockSize(bcSizes[i] - blockStartSize);921if (cfg.getMethodSymbol())922cfg.getMethodSymbol()->addProfilingOffsetInfo(currentBlock->getEntry()->getNode()->getByteCodeIndex(), currentBlock->getEntry()->getNode()->getByteCodeIndex() + currentBlock->getBlockSize());923}924925if (addFallThruEdge)926{927debugTrace(tracer(),"adding a fallthrough edge between block %p %d and %p %d", currentBlock, currentBlock->getNumber(), newBlock, newBlock->getNumber());928debugTrace(tracer(),"joining nodes between blocks %p %d and %p %d", currentBlock, currentBlock->getNumber(), newBlock, newBlock->getNumber());929currentBlock->getExit()->join(newBlock->getEntry());930cfg.addEdge(currentBlock, newBlock);931}932else933{934addFallThruEdge = true;935}936currentBlock = newBlock;937938startIndex = i;939blockStartSize = bcSizes[i];940}941942if (flags[i].testAny(InterpreterEmulator::BytecodePropertyFlag::isCold))943{944partialTrace(tracer(), "Setting block %p[%d] blocks[%d]=%p as cold because bytecode %d was identified as cold",currentBlock, currentBlock->getNumber(), i, blocks[i], i);945currentBlock->setIsCold();946currentBlock->setFrequency(0);947}948if (flags[i].testAny(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable))949{950partialTrace(tracer(), "Setting unsanitizeable flag on block %p[%d] blocks[%d]=%p",currentBlock, currentBlock->getNumber(), i, blocks[i]);951currentBlock->setIsUnsanitizeable();952}953954if (flags[i].testAny(InterpreterEmulator::BytecodePropertyFlag::isBranch))955{956if (startIndex != i)957{958currentBlock->setBlockSize(bcSizes[i] - blockStartSize);959if (cfg.getMethodSymbol())960cfg.getMethodSymbol()->addProfilingOffsetInfo(currentBlock->getEntry()->getNode()->getByteCodeIndex(), currentBlock->getEntry()->getNode()->getByteCodeIndex() + currentBlock->getBlockSize());961}962else963{964currentBlock->setBlockSize(1); // if there startIndex is the same as the current index then the block consists only of a branch965if (cfg.getMethodSymbol())966cfg.getMethodSymbol()->addProfilingOffsetInfo(currentBlock->getEntry()->getNode()->getByteCodeIndex(), currentBlock->getEntry()->getNode()->getByteCodeIndex() + currentBlock->getBlockSize());967}968969switch (bc)970{971case J9BCificmpeq:972case J9BCificmpne:973case J9BCificmplt:974case J9BCificmpge:975case J9BCificmpgt:976case J9BCificmple:977case J9BCifacmpeq:978case J9BCifacmpne:979case J9BCifeq:980case J9BCifne:981case J9BCiflt:982case J9BCifge:983case J9BCifgt:984case J9BCifle:985case J9BCifnull:986case J9BCifnonnull:987{988debugTrace(tracer(),"if branch.i = %d adding edge between blocks %p %d and %p %d",989i, currentBlock, currentBlock->getNumber(), getBlock(comp(), blocks, calltarget->_calleeMethod, i+ bci.relativeBranch(), cfg),990getBlock(comp(), blocks, calltarget->_calleeMethod, i + bci.relativeBranch(), cfg)->getNumber());991992setupLastTreeTop(currentBlock, bc, i, getBlock(comp(), blocks, calltarget->_calleeMethod, i + bci.relativeBranch(), cfg), calltarget->_calleeMethod, comp());993cfg.addEdge(currentBlock, getBlock(comp(), blocks,994calltarget->_calleeMethod, i + bci.relativeBranch(),995cfg));996addFallThruEdge = true;997break;998}999case J9BCgoto:1000case J9BCgotow:1001setupLastTreeTop(currentBlock, bc, i, getBlock(comp(), blocks, calltarget->_calleeMethod, i + bci.relativeBranch(), cfg), calltarget->_calleeMethod, comp());1002cfg.addEdge(currentBlock, getBlock(comp(), blocks, calltarget->_calleeMethod, i + bci.relativeBranch(), cfg));1003addFallThruEdge = false;1004break;1005case J9BCReturnC:1006case J9BCReturnS:1007case J9BCReturnB:1008case J9BCReturnZ:1009case J9BCgenericReturn:1010case J9BCathrow:1011setupLastTreeTop(currentBlock, bc, i, cfg.getEnd()->asBlock(), calltarget->_calleeMethod, comp());1012cfg.addEdge(currentBlock, cfg.getEnd());1013addFallThruEdge = false;1014break;1015case J9BCtableswitch:1016{1017int32_t index = bci.defaultTargetIndex();1018TR::Block *defaultBlock = getBlock(comp(), blocks,1019calltarget->_calleeMethod, i + bci.nextSwitchValue(1020index), cfg);1021setupLastTreeTop(currentBlock, bc, i, defaultBlock,1022calltarget->_calleeMethod, comp());1023cfg.addEdge(currentBlock, defaultBlock);1024int32_t low = bci.nextSwitchValue(index);1025int32_t high = bci.nextSwitchValue(index) - low + 1;1026for (int32_t j = 0; j < high; ++j)1027cfg.addEdge(currentBlock, getBlock(comp(), blocks,1028calltarget->_calleeMethod, i + bci.nextSwitchValue(1029index), cfg));1030addFallThruEdge = false;1031break;1032}1033case J9BClookupswitch:1034{1035int32_t index = bci.defaultTargetIndex();1036TR::Block *defaultBlock = getBlock(comp(), blocks,1037calltarget->_calleeMethod, i + bci.nextSwitchValue(1038index), cfg);1039setupLastTreeTop(currentBlock, bc, i, defaultBlock,1040calltarget->_calleeMethod, comp());1041cfg.addEdge(currentBlock, defaultBlock);1042int32_t tableSize = bci.nextSwitchValue(index);1043for (int32_t j = 0; j < tableSize; ++j)1044{1045index += 4; // match value1046cfg.addEdge(currentBlock, getBlock(comp(), blocks,1047calltarget->_calleeMethod, i + bci.nextSwitchValue(1048index), cfg));1049}1050addFallThruEdge = false;1051break;1052}1053default:1054break;1055}1056}1057// printf("Iterating through sizes array. bcSizes[%d] = %d maxIndex = %d\n",i,bcSizes[i],maxIndex);1058}10591060for (i = 0; i < (int32_t) tryCatchInfo.size(); ++i)1061{1062TR_J9ByteCodeIterator::TryCatchInfo * handlerInfo = &tryCatchInfo[i];10631064blocks[handlerInfo->_handlerIndex]->setHandlerInfoWithOutBCInfo(1065handlerInfo->_catchType, 0, handlerInfo->_handlerIndex,1066calltarget->_calleeMethod, comp());10671068for (int32_t j = handlerInfo->_startIndex; j <= handlerInfo->_endIndex; ++j)1069if (blocks[j])1070cfg.addExceptionEdge(blocks[j], blocks[handlerInfo->_handlerIndex]);1071}10721073107410751076return cfg;1077}10781079bool1080TR_J9EstimateCodeSize::realEstimateCodeSize(TR_CallTarget *calltarget, TR_CallStack *prevCallStack, bool recurseDown, TR::Region &cfgRegion)1081{1082TR_ASSERT(calltarget->_calleeMethod, "assertion failure");10831084heuristicTrace(tracer(), "*** Depth %d: ECS CSI -- calltarget = %p , _ecsPrexArgInfo = %p",1085_recursionDepth, calltarget, calltarget->_ecsPrexArgInfo);1086108710881089if (tracer()->heuristicLevel() && calltarget->_ecsPrexArgInfo)1090{1091heuristicTrace(tracer(), "ECS CSI -- ArgInfo :");1092calltarget->_ecsPrexArgInfo->dumpTrace();1093}10941095TR_InlinerDelimiter delimiter(tracer(), "realEstimateCodeSize");10961097if (calltarget->_calleeMethod->numberOfExceptionHandlers() > 0)1098_hasExceptionHandlers = true;10991100if (_aggressivelyInlineThrows)1101{1102TR_CatchBlockProfileInfo * catchInfo = TR_CatchBlockProfileInfo::get(comp(), calltarget->_calleeMethod);1103if (catchInfo)1104_throwCount += catchInfo->getThrowCounter();1105}11061107//TR::Compilation * comp = _inliner->comp();11081109char nameBuffer[1024];1110const char *callerName = NULL;1111if (tracer()->heuristicLevel())1112callerName = comp()->fej9()->sampleSignature(1113calltarget->_calleeMethod->getPersistentIdentifier(), nameBuffer,11141024, comp()->trMemory());11151116heuristicTrace(tracer(),1117"*** Depth %d: ECS to begin for target %p signature %s size assuming we can partially inline (optimistic size) = %d total real size so far = %d sizeThreshold %d",1118_recursionDepth, calltarget, callerName, _optimisticSize, _realSize,1119_sizeThreshold);11201121TR_ByteCodeInfo newBCInfo;1122newBCInfo.setDoNotProfile(0);1123TR::ResolvedMethodSymbol* methodSymbol = TR::ResolvedMethodSymbol::create(comp()->trHeapMemory(), calltarget->_calleeMethod, comp());1124if (_mayHaveVirtualCallProfileInfo)1125{1126if (!comp()->incInlineDepth(methodSymbol, calltarget->_myCallSite->_bcInfo, 0, NULL, !calltarget->_myCallSite->_isIndirectCall))1127{1128return false; //this is intentional1129//calling returnCleanup here will result in assertion1130//as incInlineDepth doesn't do anything1131}113211331134newBCInfo.setCallerIndex(comp()->getCurrentInlinedSiteIndex());1135}11361137if( comp()->getVisitCount() > HIGH_VISIT_COUNT )1138{1139heuristicTrace(tracer(),"Depth %d: estimateCodeSize aborting due to high comp()->getVisitCount() of %d",_recursionDepth,comp()->getVisitCount());1140return returnCleanup(ECS_VISITED_COUNT_THRESHOLD_EXCEEDED);1141}11421143if (_recursionDepth > MAX_ECS_RECURSION_DEPTH)1144{1145calltarget->_isPartialInliningCandidate = false;1146heuristicTrace(tracer(), "*** Depth %d: ECS end for target %p signature %s. Exceeded Recursion Depth", _recursionDepth, calltarget, callerName);1147return returnCleanup(ECS_RECURSION_DEPTH_THRESHOLD_EXCEEDED);1148}11491150InterpreterEmulator bci(calltarget, methodSymbol, static_cast<TR_J9VMBase *> (comp()->fej9()), comp(), tracer(), this);11511152int32_t maxIndex = bci.maxByteCodeIndex() + 5;11531154flags8_t * flags = (flags8_t *) comp()->trMemory()->allocateStackMemory(1155maxIndex * sizeof(flags8_t));1156memset(flags, 0, maxIndex * sizeof(flags8_t));11571158TR_CallSite * * callSites =1159(TR_CallSite * *) comp()->trMemory()->allocateStackMemory(maxIndex1160* sizeof(TR_CallSite *));1161memset(callSites, 0, maxIndex * sizeof(TR_CallSite *));11621163bool unresolvedSymbolsAreCold = comp()->notYetRunMeansCold();11641165TR_CallStack callStack(comp(), 0, calltarget->_calleeMethod, prevCallStack, 0);11661167TR_PrexArgInfo* argsFromSymbol = TR_PrexArgInfo::buildPrexArgInfoForMethodSymbol(methodSymbol, tracer());11681169if (!TR_PrexArgInfo::validateAndPropagateArgsFromCalleeSymbol(argsFromSymbol, calltarget->_ecsPrexArgInfo, tracer()))1170{1171heuristicTrace(tracer(), "*** Depth %d: ECS end for target %p signature %s. Incompatible arguments", _recursionDepth, calltarget, callerName);1172return returnCleanup(ECS_ARGUMENTS_INCOMPATIBLE);1173}11741175NeedsPeekingHeuristic nph(calltarget, bci, methodSymbol, comp());1176//this might be a little bit too verbose, so let's hide the heuristic's output behind this env var1177static char *traceNeedsPeeking = feGetEnv("TR_traceNeedsPeekingHeuristic");1178if (traceNeedsPeeking)1179{1180nph.setTracer(tracer());1181}11821183bool wasPeekingSuccessfull = false;11841185const static bool debugMHInlineWithOutPeeking = feGetEnv("TR_DebugMHInlineWithOutPeeking") ? true: false;1186bool mhInlineWithPeeking = comp()->getOption(TR_DisableMHInlineWithoutPeeking);1187const static bool disableMethodHandleInliningAfterFirstPass = feGetEnv("TR_DisableMethodHandleInliningAfterFirstPass") ? true: false;1188bool inlineArchetypeSpecimen = calltarget->_calleeMethod->convertToMethod()->isArchetypeSpecimen() &&1189(!disableMethodHandleInliningAfterFirstPass || _inliner->firstPass());1190bool inlineLambdaFormGeneratedMethod = comp()->fej9()->isLambdaFormGeneratedMethod(calltarget->_calleeMethod) &&1191(!disableMethodHandleInliningAfterFirstPass || _inliner->firstPass());11921193// No need to peek LF methods, as we'll always interprete the method with state in order to propagate object info1194// through bytecodes to find call targets1195if (!inlineLambdaFormGeneratedMethod &&1196((nph.doPeeking() && recurseDown) ||1197(inlineArchetypeSpecimen && mhInlineWithPeeking)))1198{11991200heuristicTrace(tracer(), "*** Depth %d: ECS CSI -- needsPeeking is true for calltarget %p",1201_recursionDepth, calltarget);12021203bool ilgenSuccess = (NULL != methodSymbol->getResolvedMethod()->genMethodILForPeekingEvenUnderMethodRedefinition(methodSymbol, comp(), false, NULL));1204if (ilgenSuccess)1205{1206heuristicTrace(tracer(), "*** Depth %d: ECS CSI -- peeking was successfull for calltarget %p", _recursionDepth, calltarget);1207_inliner->getUtil()->clearArgInfoForNonInvariantArguments(calltarget->_ecsPrexArgInfo, methodSymbol, tracer());1208wasPeekingSuccessfull = true;1209}1210}1211else if (inlineArchetypeSpecimen && !mhInlineWithPeeking && debugMHInlineWithOutPeeking)1212{1213traceMsg(comp(), "printing out trees and bytecodes through peeking because DebugMHInlineWithOutPeeking is on\n");1214methodSymbol->getResolvedMethod()->genMethodILForPeekingEvenUnderMethodRedefinition(methodSymbol, comp(), false, NULL);1215}12161217TR::Block * * blocks =1218(TR::Block * *) comp()->trMemory()->allocateStackMemory(maxIndex1219* sizeof(TR::Block *));1220memset(blocks, 0, maxIndex * sizeof(TR::Block *));12211222TR::CFG &cfg = processBytecodeAndGenerateCFG(calltarget, cfgRegion, bci, nph, blocks, flags);1223int size = calltarget->_fullSize;12241225// Adjust call frequency for unknown or direct calls, for which we don't get profiling information1226//1227TR_ValueProfileInfoManager * profileManager = TR_ValueProfileInfoManager::get(comp());1228bool callGraphEnabled = !comp()->getOption(TR_DisableCallGraphInlining);//profileManager->isCallGraphProfilingEnabled(comp());1229if (!_inliner->firstPass() || inlineArchetypeSpecimen || inlineLambdaFormGeneratedMethod)1230callGraphEnabled = false; // TODO: Work out why this doesn't function properly on subsequent passes1231if (callGraphEnabled && recurseDown)1232{1233TR_OpaqueMethodBlock *method = calltarget->_myCallSite->_callerResolvedMethod->getPersistentIdentifier();1234uint32_t bcIndex = calltarget->_myCallSite->_bcInfo.getByteCodeIndex();1235int32_t callCount = profileManager->getCallGraphProfilingCount(method,1236bcIndex, comp());1237cfg._calledFrequency = callCount;12381239if (callCount <= 0 && _lastCallBlockFrequency > 0)1240cfg._calledFrequency = _lastCallBlockFrequency;12411242heuristicTrace(tracer(),1243"Depth %d: Setting called count for caller index %d, bytecode index %d of %d", _recursionDepth,1244calltarget->_myCallSite->_bcInfo.getCallerIndex(),1245calltarget->_myCallSite->_bcInfo.getByteCodeIndex(), callCount);1246}1247else if (callGraphEnabled)1248{1249cfg._calledFrequency = 10000;1250}12511252cfg.propagateColdInfo(callGraphEnabled); // propagate coldness but also generate frequency information1253// for blocks if call graph profiling is enabled12541255if (tracer()->heuristicLevel())1256{1257heuristicTrace(tracer(), "After propagating the coldness info\n");1258heuristicTrace(tracer(), "<cfg>");1259for (TR::CFGNode* node = cfg.getFirstNode(); node; node = node->getNext())1260{1261comp()->findOrCreateDebug()->print(comp()->getOutFile(), node, 6);1262}1263heuristicTrace(tracer(), "</cfg>");1264}12651266bool callsitesAreCreatedFromTrees = false;1267if (wasPeekingSuccessfull1268&& comp()->getOrCreateKnownObjectTable()1269&& calltarget->_calleeMethod->convertToMethod()->isArchetypeSpecimen())1270{1271TR::Block *currentInlinedBlock = NULL;1272// call sites in method handle thunks are created from trees so skip bci.findAndCreateCallsitesFromBytecodes below1273callsitesAreCreatedFromTrees = true;1274TR::NodeChecklist visited(comp());1275for (TR::TreeTop* tt = methodSymbol->getFirstTreeTop(); tt; tt = tt->getNextTreeTop())1276{1277if (tt->getNode()->getOpCodeValue() == TR::BBStart)1278/*1279* TODO: we should use the proper block with correct block frequency info1280* but profiling for method handle thunks doesn't work yet1281*/1282currentInlinedBlock = tt->getEnclosingBlock();12831284if (tt->getNode()->getNumChildren()>0 &&1285tt->getNode()->getFirstChild()->getOpCode().isCall())1286{1287TR::Node* parent = tt->getNode();1288TR::Node* callNode = tt->getNode()->getFirstChild();1289TR::SymbolReference* symRef = callNode->getSymbolReference();1290if (!callNode->getSymbolReference()->isUnresolved() && !visited.contains(callNode) &&1291!callSites[callNode->getByteCodeIndex()]) // skip if the callsite has already been created for this byte code index1292{1293int i = callNode->getByteCodeIndex();1294visited.add(callNode);1295TR_ResolvedMethod* resolvedMethod = callNode->getSymbol()->getResolvedMethodSymbol()->getResolvedMethod();1296TR::RecognizedMethod rm = resolvedMethod->getRecognizedMethod();12971298TR_CallSite *callsite = TR_CallSite::create(tt, parent, callNode,1299resolvedMethod->classOfMethod(), symRef, resolvedMethod,1300comp(), comp()->trMemory() , heapAlloc, calltarget->_calleeMethod, _recursionDepth, false);13011302TR_PrexArgInfo *argInfo = calltarget->_ecsPrexArgInfo;13031304callsite->_callerBlock = currentInlinedBlock;1305if (isInlineable(&callStack, callsite))1306{1307callSites[i] = callsite;1308bci._inlineableCallExists = true;13091310if (!currentInlinedBlock->isCold())1311_hasNonColdCalls = true;1312for (int j = 0; j < callSites[i]->numTargets(); j++)1313callSites[i]->getTarget(j)->_originatingBlock = currentInlinedBlock;1314}1315else1316{1317//support counters1318calltarget->addDeadCallee(callsite);1319}13201321// clearing the node generated by peeking ilgen1322// _callNode will be filled with node generated by actual ilgen @see TR_InlinerBase::findAndUpdateCallSiteInGraph1323callsite->_callNode = NULL;1324}1325}1326}1327}13281329if (!callsitesAreCreatedFromTrees)1330{1331bci.prepareToFindAndCreateCallsites(blocks, flags, callSites, &cfg, &newBCInfo, _recursionDepth, &callStack);1332bool iteratorWithState = (inlineArchetypeSpecimen && !mhInlineWithPeeking) || inlineLambdaFormGeneratedMethod;13331334if (!bci.findAndCreateCallsitesFromBytecodes(wasPeekingSuccessfull, iteratorWithState))1335{1336heuristicTrace(tracer(), "*** Depth %d: ECS end for target %p signature %s. bci.findAndCreateCallsitesFromBytecode failed", _recursionDepth, calltarget, callerName);1337return returnCleanup(ECS_CALLSITES_CREATION_FAILED);1338}1339_hasNonColdCalls = bci._nonColdCallExists;1340}13411342if (comp()->isServerInlining())1343{1344int coldCode = 0;1345int executedCode = 0;1346bool isCold = false;1347int coldBorderFrequency = 20;13481349for (TR_J9ByteCode bc = bci.first(); bc != J9BCunknown; bc = bci.next())1350{1351int32_t i = bci.bcIndex();1352if (blocks[i])1353{1354if (!blocks[i]->isCold() && blocks[i]->getFrequency() > coldBorderFrequency)1355isCold = false;1356else1357isCold = true;1358}13591360if (isCold)1361coldCode++;1362else1363executedCode++;1364}13651366if (executedCode != 0)1367{1368float ratio = ((float) executedCode) / ((float) (coldCode1369+ executedCode));13701371if (recurseDown)1372{1373if (ratio < 0.7f)1374{1375ratio = 0.7f;1376}1377}1378else1379{1380if (ratio < 0.1f)1381{1382ratio = 0.1f;1383}1384}13851386calltarget->_fullSize = (int) ((float) calltarget->_fullSize * ratio);1387heuristicTrace(tracer(),"Depth %d: Opt Server is reducing size of call to %d",_recursionDepth,calltarget->_fullSize);1388}1389}1390else if (_inliner->getPolicy()->aggressiveSmallAppOpts())1391{1392TR_J9InlinerPolicy *j9inlinerPolicy = (TR_J9InlinerPolicy *) _inliner->getPolicy();1393if (j9inlinerPolicy->aggressivelyInlineInLoops() && calltarget && calltarget->_calleeMethod && strncmp(calltarget->_calleeMethod->classNameChars(),"java/math/BigDecimal",calltarget->_calleeMethod->classNameLength())!=0)1394{1395if ((callStack._inALoop) &&1396(calltarget->_fullSize > 10))1397{1398calltarget->_fullSize = 10;1399heuristicTrace(tracer(),"Opt Server is reducing size of call to %d",calltarget->_fullSize);1400}1401}1402else1403heuristicTrace(tracer(),"Omitting Big Decimal method from size readjustment, calltarget = %p calleemethod = %p",calltarget,calltarget ? calltarget->_calleeMethod : 0);1404}14051406if (_inliner->forceInline(calltarget))1407{1408calltarget->_fullSize = 0;1409calltarget->_partialSize = 0;1410}141114121413/*************** PHASE 3: Optimistically Assume we can partially inline calltarget and add to an optimisticSize ******************/14141415TR_Queue<TR::Block> callBlocks(comp()->trMemory());1416bool isCandidate = trimBlocksForPartialInlining(calltarget, &callBlocks);14171418switch (calltarget->_calleeMethod->getRecognizedMethod())1419{1420case TR::java_util_HashMap_get:1421case TR::java_util_HashMap_findNonNullKeyEntry:1422calltarget->_isPartialInliningCandidate = false;1423isCandidate = false;1424break;1425default:1426break;1427}14281429if (isCandidate)1430_optimisticSize += calltarget->_partialSize;1431else1432_optimisticSize += calltarget->_fullSize;14331434int32_t sizeThreshold = _sizeThreshold;1435if (isCandidate)1436sizeThreshold = std::max(4096, sizeThreshold);1437///if(_optimisticSize > _sizeThreshold) // even optimistically we've blown our budget1438heuristicTrace(tracer(),"--- Depth %d: Checking Optimistic size vs Size Threshold: _optimisticSize %d _sizeThreshold %d sizeThreshold %d ",_recursionDepth, _optimisticSize, _sizeThreshold, sizeThreshold);14391440if (_optimisticSize > sizeThreshold) // even optimistically we've blown our budget1441{1442calltarget->_isPartialInliningCandidate = false;1443heuristicTrace(tracer(), "*** Depth %d: ECS end for target %p signature %s. optimisticSize exceeds Size Threshold", _recursionDepth, calltarget, callerName);1444return returnCleanup(ECS_OPTIMISTIC_SIZE_THRESHOLD_EXCEEDED);1445}14461447if (!recurseDown)1448{1449heuristicTrace(tracer(),"*** Depth %d: ECS end for target %p signature %s. recurseDown set to false. size = %d _fullSize = %d", _recursionDepth, calltarget, callerName, size, calltarget->_fullSize);1450return returnCleanup(ECS_NORMAL);1451}14521453/****************** Phase 4: Deal with Inlineable Calls **************************/1454TR::Block *currentBlock = NULL;1455for (TR_J9ByteCode bc = bci.first(); bc != J9BCunknown && bci._inlineableCallExists; bc = bci.next())1456{1457int32_t i = bci.bcIndex();1458//heuristicTrace(tracer(),"--- Depth %d: Checking _real size vs Size Threshold: _realSize %d _sizeThreshold %d sizeThreshold %d ",_recursionDepth, _realSize, _sizeThreshold, sizeThreshold);14591460if (_realSize > sizeThreshold)1461{1462heuristicTrace(tracer(),"*** Depth %d: ECS end for target %p signature %s. real size %d exceeds sizeThreshold %d", _recursionDepth,calltarget, callerName,_realSize,sizeThreshold);1463return returnCleanup(ECS_REAL_SIZE_THRESHOLD_EXCEEDED);1464}14651466if (blocks[i])1467currentBlock = blocks[i];14681469newBCInfo.setByteCodeIndex(i);1470if (callSites[i])1471{1472callSites[i]->setDepth(_recursionDepth);1473debugTrace(tracer(),"Found a call at bytecode %d, depth = %d", i, _recursionDepth);14741475// TODO: Investigate if we should add BigAppOpts opts here1476for (int32_t j = 0; j < callSites[i]->numTargets(); j++)1477{1478TR_CallTarget *targetCallee = callSites[i]->getTarget(j);14791480char nameBuffer[1024];1481const char *calleeName = NULL;1482if (tracer()->heuristicLevel())1483calleeName = comp()->fej9()->sampleSignature(targetCallee->_calleeMethod->getPersistentIdentifier(), nameBuffer, 1024, comp()->trMemory());14841485if (callGraphEnabled && !currentBlock->isCold())1486{1487// if call-graph profiling is enabled and the call is special or static (!indirect)1488// then update the block frequency information because we don't profile predictable calls1489if (!callSites[i]->isIndirectCall())1490{1491profileManager->updateCallGraphProfilingCount( currentBlock, calltarget->_calleeMethod->getPersistentIdentifier(), i, comp());1492heuristicTrace(tracer(),"Depth %d: Updating Call Graph Profiling Count for calltarget %p count = %d",_recursionDepth, calltarget,profileManager->getCallGraphProfilingCount(calltarget->_calleeMethod->getPersistentIdentifier(), i, comp()));1493}14941495// TODO: This coldCallInfoIsReliable logic should be in a more1496// central place so everyone agrees on it. It shouldn't just be1497// for inliner.1498//1499bool coldCallInfoIsReliable = !cameFromArchetypeSpecimen(calltarget->_calleeMethod);15001501if (_inliner->getPolicy()->tryToInline(targetCallee, &callStack, true))1502{1503heuristicTrace(tracer(),"tryToInline filter matched %s", targetCallee->_calleeMethod->signature(comp()->trMemory()));1504}1505else1506{1507int32_t freqCutoff = 40;1508bool isColdCall = (((comp()->getMethodHotness() <= warm) && profileManager->isColdCall(targetCallee->_calleeMethod->getPersistentIdentifier(), calltarget->_calleeMethod->getPersistentIdentifier(), i, comp())) || (currentBlock->getFrequency() < freqCutoff)) && !_inliner->alwaysWorthInlining(targetCallee->_calleeMethod, NULL);15091510if (coldCallInfoIsReliable && isColdCall)1511{1512heuristicTrace(tracer(),"Depth %d: Skipping estimate on call %s, with count=%d and block frequency %d, because it's cold.",_recursionDepth,calleeName,profileManager->getCallGraphProfilingCount(targetCallee->_calleeMethod->getPersistentIdentifier(), calltarget->_calleeMethod->getPersistentIdentifier(), i, comp()), currentBlock->getFrequency());1513callSites[i]->removecalltarget(j, tracer(), Cold_Call);1514j--;1515continue;1516}15171518if (comp()->getMethodHotness() <= warm && comp()->isServerInlining() && calltarget->_calleeMethod->isWarmCallGraphTooBig(i, comp()) && !_inliner->alwaysWorthInlining(targetCallee->_calleeMethod, NULL))1519{1520heuristicTrace(tracer(), "Depth %d: Skipping estimate on call %s, with count=%d, because its warm call graph is too big.",1521_recursionDepth, calleeName,1522profileManager->getCallGraphProfilingCount(calltarget->_calleeMethod->getPersistentIdentifier(),i, comp())1523);1524callSites[i]->removecalltarget(j, tracer(), Cold_Call);1525j--;1526continue;1527}1528}1529}15301531//inline Native method even if it is cold as the Natives1532//are usually very small and inlining them would not hurt1533if (currentBlock->isCold() && !_inliner->alwaysWorthInlining(targetCallee->_calleeMethod, callSites[i]->_callNode))1534{1535heuristicTrace(tracer(),"Depth %d: Skipping estimate on call %s, because it's in a cold block.",_recursionDepth, calleeName);1536callSites[i]->removecalltarget(j, tracer(), Cold_Block);1537j--;1538continue;1539}15401541if (_optimisticSize <= sizeThreshold) // for multiple calltargets, is this the desired behaviour?1542{1543_recursionDepth++;1544_numOfEstimatedCalls++;15451546_lastCallBlockFrequency = currentBlock->getFrequency();15471548debugTrace(tracer(),"About to call ecs on call target %p at depth %d _optimisticSize = %d _realSize = %d _sizeThreshold = %d",1549targetCallee, _recursionDepth, _optimisticSize, _realSize, _sizeThreshold);1550heuristicTrace(tracer(),"--- Depth %d: EstimateCodeSize to recursively estimate call from %s to %s",_recursionDepth, callerName, calleeName);15511552int32_t origOptimisticSize = _optimisticSize;1553int32_t origRealSize = _realSize;1554bool prevNonColdCalls = _hasNonColdCalls;1555bool estimateSuccess = estimateCodeSize(targetCallee, &callStack); //recurseDown = true1556bool calltargetSetTooBig = false;1557bool calleeHasNonColdCalls = _hasNonColdCalls;1558_hasNonColdCalls = prevNonColdCalls;// reset the bool for the parent15591560// update optimisticSize and cull candidates15611562if ((comp()->getMethodHotness() >= warm) && comp()->isServerInlining())1563{1564int32_t bigCalleeThreshold;1565int32_t freqCutoff = comp()->getMethodHotness() <= warm ?1566comp()->getOptions()->getBigCalleeFrequencyCutoffAtWarm() :1567comp()->getOptions()->getBigCalleeFrequencyCutoffAtHot();1568bool isColdCall = ((profileManager->isColdCall(targetCallee->_calleeMethod->getPersistentIdentifier(), calltarget->_calleeMethod->getPersistentIdentifier(), i, comp()) ||1569(currentBlock->getFrequency() <= freqCutoff)) && !_inliner->alwaysWorthInlining(targetCallee->_calleeMethod, NULL));15701571if (comp()->getMethodHotness() <= warm)1572{1573bigCalleeThreshold = isColdCall ?1574comp()->getOptions()->getBigCalleeThresholdForColdCallsAtWarm():1575comp()->getOptions()->getBigCalleeThreshold();1576}1577else // above warm1578{15791580if(isColdCall)1581{1582bigCalleeThreshold = comp()->getOptions()->getBigCalleeThresholdForColdCallsAtHot();1583}1584else1585{1586if (comp()->getMethodHotness() == scorching ||1587(comp()->getMethodHotness() == veryHot && comp()->isProfilingCompilation()))1588{1589bigCalleeThreshold = comp()->getOptions()->getBigCalleeScorchingOptThreshold();1590}1591else1592{1593bigCalleeThreshold = comp()->getOptions()->getBigCalleeHotOptThreshold();1594}1595}1596}159715981599if (_optimisticSize - origOptimisticSize > bigCalleeThreshold)1600{1601///printf("set warmcallgraphtoobig for method %s at index %d\n", calleeName, newBCInfo._byteCodeIndex);fflush(stdout);1602calltarget->_calleeMethod->setWarmCallGraphTooBig( newBCInfo.getByteCodeIndex(), comp());1603heuristicTrace(tracer(), "set warmcallgraphtoobig for method %s at index %d\n", calleeName, newBCInfo.getByteCodeIndex());1604//_optimisticSize = origOptimisticSize;1605//_realSize = origRealSize;1606calltargetSetTooBig = true;16071608}1609}16101611if (!estimateSuccess && !calltargetSetTooBig)1612{1613int32_t estimatedSize = (_optimisticSize - origOptimisticSize);1614int32_t bytecodeSize = targetCallee->_calleeMethod->maxBytecodeIndex();1615bool inlineAnyway = false;16161617if ((_optimisticSize - origOptimisticSize) < 40)1618inlineAnyway = true;1619else if (estimatedSize < 100)1620{1621if ((estimatedSize < bytecodeSize) || ((bytecodeSize - estimatedSize)< 20))1622inlineAnyway = true;1623}16241625if (inlineAnyway && !calleeHasNonColdCalls)1626{1627_optimisticSize = origOptimisticSize;1628_realSize = origRealSize;1629}1630else if (!_inliner->alwaysWorthInlining(targetCallee->_calleeMethod, NULL))1631{1632calltarget->_isPartialInliningCandidate = false;1633callSites[i]->removecalltarget(j, tracer(),1634Callee_Too_Many_Bytecodes);1635_optimisticSize = origOptimisticSize;1636_realSize = origRealSize;1637calltarget->addDeadCallee(callSites[i]);1638j--;1639_numOfEstimatedCalls--;1640}16411642if(comp()->getVisitCount() > HIGH_VISIT_COUNT)1643{1644heuristicTrace(tracer(),"Depth %d: estimateCodeSize aborting due to high comp()->getVisitCount() of %d",_recursionDepth,comp()->getVisitCount());1645return returnCleanup(ECS_VISITED_COUNT_THRESHOLD_EXCEEDED);1646}1647}1648else if (calltargetSetTooBig)1649{1650_optimisticSize = origOptimisticSize;1651_realSize = origRealSize;16521653if (!_inliner->alwaysWorthInlining(targetCallee->_calleeMethod, NULL))1654{1655calltarget->_isPartialInliningCandidate = false;1656callSites[i]->removecalltarget(j, tracer(),1657Callee_Too_Many_Bytecodes);1658calltarget->addDeadCallee(callSites[i]);1659j--;1660_numOfEstimatedCalls--;1661}16621663if(comp()->getVisitCount() > HIGH_VISIT_COUNT)1664{1665heuristicTrace(tracer(),"Depth %d: estimateCodeSize aborting due to high comp()->getVisitCount() of %d",_recursionDepth,comp()->getVisitCount());1666return returnCleanup(ECS_VISITED_COUNT_THRESHOLD_EXCEEDED);1667}1668}16691670_recursionDepth--;1671}1672else1673{1674heuristicTrace(tracer(),"Depth %d: estimateCodeSize aborting due to _optimisticSize: %d > sizeThreshold: %d",_optimisticSize,sizeThreshold);1675break;1676}1677}16781679if (callSites[i]->numTargets()) //only add a callSite once, even though it may have more than one call target.1680{1681calltarget->addCallee(callSites[i]);1682heuristicTrace(tracer(), "Depth %d: Subtracting %d from optimistic and real size to account for eliminating call", _recursionDepth, bci.estimatedCodeSize());1683if (_optimisticSize > bci.estimatedCodeSize())1684_optimisticSize -= bci.estimatedCodeSize(); // subtract what we added before for the size of the call instruction1685if (_realSize > bci.estimatedCodeSize())1686_realSize -= bci.estimatedCodeSize();1687}1688}1689}16901691auto partialSizeBeforeAdjustment = calltarget->_partialSize;16921693if (adjustEstimateForStringCompression(calltarget->_calleeMethod, calltarget->_partialSize, STRING_COMPRESSION_ADJUSTMENT_FACTOR))1694{1695heuristicTrace(tracer(), "*** Depth %d: Adjusting partial size for %s because of string compression from %d to %d", _recursionDepth, callerName, partialSizeBeforeAdjustment, calltarget->_partialSize);1696}16971698if (adjustEstimateForMethodInvoke(calltarget->_calleeMethod, calltarget->_partialSize, METHOD_INVOKE_ADJUSTMENT_FACTOR))1699{1700heuristicTrace(tracer(), "*** Depth %d: Adjusting partial size for %s because of java/lang/reflect/Method.invoke from %d to %d", _recursionDepth, callerName, partialSizeBeforeAdjustment, calltarget->_partialSize);1701}17021703auto fullSizeBeforeAdjustment = calltarget->_fullSize;17041705if (adjustEstimateForStringCompression(calltarget->_calleeMethod, calltarget->_fullSize, STRING_COMPRESSION_ADJUSTMENT_FACTOR))1706{1707heuristicTrace(tracer(), "*** Depth %d: Adjusting full size for %s because of string compression from %d to %d", _recursionDepth, callerName, fullSizeBeforeAdjustment, calltarget->_fullSize);1708}17091710if (adjustEstimateForMethodInvoke(calltarget->_calleeMethod, calltarget->_fullSize, METHOD_INVOKE_ADJUSTMENT_FACTOR))1711{1712heuristicTrace(tracer(), "*** Depth %d: Adjusting full size for %s because of java/lang/reflect/Method.invoke from %d to %d", _recursionDepth, callerName, fullSizeBeforeAdjustment, calltarget->_fullSize);1713}17141715auto realSizeBeforeAdjustment = _realSize;17161717if (adjustEstimateForStringCompression(calltarget->_calleeMethod, _realSize, STRING_COMPRESSION_ADJUSTMENT_FACTOR))1718{1719heuristicTrace(tracer(), "*** Depth %d: Adjusting real size for %s because of string compression from %d to %d", _recursionDepth, callerName, realSizeBeforeAdjustment, _realSize);1720}17211722if (adjustEstimateForMethodInvoke(calltarget->_calleeMethod, _realSize, METHOD_INVOKE_ADJUSTMENT_FACTOR))1723{1724heuristicTrace(tracer(), "*** Depth %d: Adjusting real size for %s because of java/lang/reflect/Method.invoke from %d to %d", _recursionDepth, callerName, realSizeBeforeAdjustment, _realSize);1725}17261727reduceDAAWrapperCodeSize(calltarget);17281729/****************** PHASE 5: Figure out if We're really going to do a partial Inline and add whatever we do to the realSize. *******************/1730if (isPartialInliningCandidate(calltarget, &callBlocks))1731{1732if (comp()->getOption(TR_TraceBFGeneration))1733traceMsg(comp(), "Call Target %s is a partial inline Candidate with a partial size of %d",callerName,calltarget->_partialSize);17341735heuristicTrace(tracer(), "*** Depth %d: ECS end for target %p signature %s. It is a partial inline Candidate with a partial size of %d", _recursionDepth, calltarget, callerName, calltarget->_partialSize);1736_realSize += calltarget->_partialSize;1737}1738else1739{1740heuristicTrace(tracer(),"*** Depth %d: ECS end for target %p signature %s. It is a full inline Candidate with a full size of %d", _recursionDepth, calltarget, callerName, calltarget->_fullSize);1741_realSize += calltarget->_fullSize;1742}174317441745heuristicTrace(tracer(),"--- Depth %d: Checking _real size vs Size Threshold A second Time: _realSize %d _sizeThreshold %d sizeThreshold %d ",_recursionDepth, _realSize, _sizeThreshold, sizeThreshold);17461747if (_realSize > sizeThreshold)1748{1749heuristicTrace(tracer(),"*** Depth %d: ECS end for target %p signature %s. real size exceeds Size Threshold", _recursionDepth,calltarget, callerName);1750return returnCleanup(ECS_REAL_SIZE_THRESHOLD_EXCEEDED);1751}17521753return returnCleanup(ECS_NORMAL);1754}17551756bool TR_J9EstimateCodeSize::reduceDAAWrapperCodeSize(TR_CallTarget* target)1757{1758if (target == NULL)1759return false;17601761// DAA Wrappers are basically free if intrinsics are on since all they consist of is the slow and fast paths1762if (target->_calleeMethod)1763{1764bool reduceMarshallingWrapper = target->_calleeMethod->isDAAMarshallingWrapperMethod() &&1765!comp()->getOption(TR_DisableMarshallingIntrinsics);17661767bool reducePackedDecimalWrapper = target->_calleeMethod->isDAAPackedDecimalWrapperMethod() &&1768!comp()->getOption(TR_DisableMarshallingIntrinsics);17691770if (reduceMarshallingWrapper || reducePackedDecimalWrapper)1771{1772target->_fullSize /= 5;1773target->_partialSize /= 5;17741775heuristicTrace(tracer(),"DAA: Reducing target %p fullSize to %d and partialSize to %d to increase likelyhood of successful inlining\n", target, target->_fullSize, target->_partialSize);1776return true;1777}1778}17791780return false;1781}17821783/******************1784* A graph searching algorithm. searchItem is the flag type we're looking for, searchPath is the flag type of the path we can go down1785*1786* ***************/17871788bool1789TR_J9EstimateCodeSize::graphSearch(TR::CFG *cfg, TR::Block *startBlock,1790TR::Block::partialFlags searchItem, TR::Block::partialFlags searchPath)1791{1792TR_BitVector *blocksVisited = new (comp()->trStackMemory()) TR_BitVector(1793cfg->getNextNodeNumber(), comp()->trMemory(), stackAlloc);1794blocksVisited->empty();17951796TR_Queue<TR::Block> nodesToBeEvaluated(comp()->trMemory());1797nodesToBeEvaluated.enqueue(startBlock);17981799do1800{1801TR::Block *currentBlock = nodesToBeEvaluated.dequeue();18021803if (blocksVisited->get(currentBlock->getNumber()))1804continue;1805blocksVisited->set(currentBlock->getNumber());18061807if (currentBlock->getPartialFlags().testAny(searchItem))1808return true;18091810for (auto e = currentBlock->getSuccessors().begin(); e != currentBlock->getSuccessors().end(); ++e)1811{1812TR::Block *dest = (*e)->getTo()->asBlock();1813if (dest->getPartialFlags().testAny(searchPath))1814nodesToBeEvaluated.enqueue(dest);1815}1816for (auto e = currentBlock->getExceptionSuccessors().begin(); e != currentBlock->getExceptionSuccessors().end(); ++e)1817{1818TR::Block *dest = (*e)->getTo()->asBlock();1819if (dest->getPartialFlags().testAny(searchPath))1820nodesToBeEvaluated.enqueue(dest);1821}1822}1823while (!nodesToBeEvaluated.isEmpty());18241825return false; //did not find the search item1826}18271828/*************************1829* A graph labelling algorithm1830* TODO: you can add size information in here1831* ***********************/1832#define MIN_PARTIAL_FREQUENCY 151833int32_t1834TR_J9EstimateCodeSize::labelGraph(TR::CFG *cfg,1835TR_Queue<TR::Block> *unsanitizeableBlocks, TR_Queue<TR::Block> *callBlocks)1836{1837TR_BitVector *blocksVisited = new (comp()->trStackMemory()) TR_BitVector(1838cfg->getNextNodeNumber(), comp()->trMemory(), stackAlloc);1839blocksVisited->empty();18401841int32_t size = 0;1842bool hasAtLeastOneRestartBlock = false;1843TR::Block *startBlock = cfg->getStart()->asBlock();1844TR::Block *endBlock = cfg->getEnd()->asBlock();1845TR_Queue<TR::Block> nodesToBeEvaluated(comp()->trMemory());1846TR_Queue<TR::Block> difficultNodesToBeEvaluated(comp()->trMemory());1847nodesToBeEvaluated.enqueue(endBlock);18481849TR::Block *currentBlock = NULL;18501851do1852{1853if (!nodesToBeEvaluated.isEmpty())1854currentBlock = nodesToBeEvaluated.dequeue();1855else if (!difficultNodesToBeEvaluated.isEmpty())1856currentBlock = difficultNodesToBeEvaluated.dequeue();1857else1858TR_ASSERT(0, "Neither Queue has a node left!\n");18591860if (blocksVisited->get(currentBlock->getNumber()))1861continue;1862// blocksVisited->set(currentBlock->getNumber()); // moving this downward a little!18631864if (currentBlock->getBlockSize() == -1 && (currentBlock != startBlock1865&& currentBlock != endBlock))1866TR_ASSERT(0, "labelGraph: a block does not have a valid size!\n");18671868//Part 1: Successor Test: ensure all my successors have been evaluated first and that they are not all restart blocks.18691870bool allRestarts = true;1871bool allVisited = true;1872for (auto e = currentBlock->getSuccessors().begin(); e != currentBlock->getSuccessors().end(); ++e)1873{1874TR::Block *dest = (*e)->getTo()->asBlock();18751876if (!blocksVisited->get(dest->getNumber()))1877{1878allVisited = false;1879break;1880}18811882if (!dest->isRestartBlock())1883{1884allRestarts = false;1885break;1886}1887}1888for (auto e = currentBlock->getExceptionSuccessors().begin(); e != currentBlock->getExceptionSuccessors().end(); ++e)1889{1890TR::Block *dest = (*e)->getTo()->asBlock();18911892if (!blocksVisited->get(dest->getNumber()))1893{1894allVisited = false;1895// break;1896}18971898if (dest->isPartialInlineBlock()) //(!dest->isRestartBlock())1899{1900// allRestarts=false;1901// break;1902}1903}19041905if (!allVisited && !currentBlock->isDifficultBlock())1906{19071908partialTrace(tracer(), "Requeueing block into difficult Nodes List %p %d because its successors have not been all visited \n", currentBlock, currentBlock->getNumber());1909currentBlock->setIsDifficultBlock();1910difficultNodesToBeEvaluated.enqueue(currentBlock);1911continue;1912}1913else if (currentBlock->isDifficultBlock())1914{1915//assuming all unvisited blocks are restarts.1916//which actually means doing nothing here, since I only mark allRestarts = false if I found a partial inline block.19171918blocksVisited->set(currentBlock->getNumber());19191920}1921else1922blocksVisited->set(currentBlock->getNumber());19231924//Part 2: Setting Flags on the Current Block1925int16_t minpartialfreq = MIN_PARTIAL_FREQUENCY;192619271928if (allRestarts && currentBlock != cfg->getEnd()->asBlock())1929{1930currentBlock->setRestartBlock();1931hasAtLeastOneRestartBlock = true;1932if (currentBlock->isPartialInlineBlock())1933{1934currentBlock->setPartialInlineBlock(false);1935if (currentBlock != startBlock && currentBlock != endBlock)1936{1937if (size > currentBlock->getBlockSize())1938size -= currentBlock->getBlockSize();1939}1940}1941}1942else if ((currentBlock->getFrequency() < minpartialfreq || currentBlock->isCold()) && currentBlock != startBlock && currentBlock != endBlock)1943{1944currentBlock->setRestartBlock();1945hasAtLeastOneRestartBlock = true;1946}1947else1948{1949currentBlock->setPartialInlineBlock();1950if (currentBlock != startBlock && currentBlock != endBlock)1951size += currentBlock->getBlockSize();1952}19531954if (currentBlock->isUnsanitizeable())1955unsanitizeableBlocks->enqueue(currentBlock);1956else if (currentBlock->containsCall()) //only need to enqueue it if its not unsanitizeable already1957callBlocks->enqueue(currentBlock);19581959// Part 3: Enqueue all Predecessors19601961for (auto e = currentBlock->getPredecessors().begin(); e != currentBlock->getPredecessors().end(); ++e)1962{1963TR::Block *dest = (*e)->getFrom()->asBlock();1964nodesToBeEvaluated.enqueue(dest);1965}1966for (auto e = currentBlock->getExceptionPredecessors().begin(); e != currentBlock->getExceptionPredecessors().end();1967++e)1968{1969TR::Block *dest = (*e)->getFrom()->asBlock();1970nodesToBeEvaluated.enqueue(dest);1971}19721973if (currentBlock->isRestartBlock()1974&& currentBlock->isPartialInlineBlock())1975TR_ASSERT(0, "currentBlock is both a restart block AND a partial inline block!\n");19761977}1978while (!nodesToBeEvaluated.isEmpty()1979|| !difficultNodesToBeEvaluated.isEmpty());19801981if (!hasAtLeastOneRestartBlock)1982return -1; // this means I should just do a full inline anyways1983return size;1984}1985#define MIN_PARTIAL_SIZE 10019861987bool1988TR_J9EstimateCodeSize::trimBlocksForPartialInlining(TR_CallTarget *calltarget, TR_Queue<TR::Block> *callBlocks)1989{1990TR_ASSERT(calltarget->_originatingBlock, "trimBlocksForPartialInlining: call target does not have an _originatingBlock set yet!\n");19911992if (comp()->getOption(TR_DisablePartialInlining) || calltarget->_calleeMethod->isSynchronized())1993{1994calltarget->_isPartialInliningCandidate = false;1995return false;1996}19971998TR_Queue<TR::Block> unsanitizeableBlocks(comp()->trMemory());19992000int32_t size = labelGraph(calltarget->_cfg, &unsanitizeableBlocks,2001callBlocks);20022003if (tracer()->partialLevel())2004{2005partialTrace(tracer(),"Dumping CFG for calltarget %p", calltarget);2006comp()->dumpFlowGraph(calltarget->_cfg);2007}20082009int32_t minpartialsize = MIN_PARTIAL_SIZE;20102011if (size > -1 && size + minpartialsize >= calltarget->_fullSize)2012{2013partialTrace(tracer()," Candidate partial size of %d is too close to full Size of %d to be of any benefit. Doing a full inline.",size, calltarget->_fullSize);2014}2015else if (size > -1) // a size of -1 means we didn't have any restart blocks - so no sense in doing a 'partial' inline2016{2017bool gs = true;2018while (!unsanitizeableBlocks.isEmpty())2019{2020TR::Block *aBlock = unsanitizeableBlocks.dequeue();2021if (!aBlock->isRestartBlock()) // if the unsanitizeable block is also a restart block, I don't care who it reaches.2022{2023calltarget->_originatingBlock->setIsUnsanitizeable(); // An unsanitizeable block remains in the inline20242025gs = !(graphSearch(calltarget->_cfg, aBlock,2026TR::Block::_restartBlock,2027(TR::Block::partialFlags) (TR::Block::_partialInlineBlock2028| TR::Block::_restartBlock)));2029if (!gs)2030{2031partialTrace(tracer(),"TrimBlocksForPartialInlining: Unsanitizeable block %p %d can reach a restart block.",aBlock, aBlock->getNumber());2032break;2033}2034}2035else2036partialTrace(tracer(),"TrimBlocksForPartialinlining: Unsanitizeable block %p %d is a restart block.",aBlock, aBlock->getNumber());2037}20382039if (gs)2040{2041gs = graphSearch(calltarget->_cfg,2042calltarget->_cfg->getStart()->asBlock(), TR::Block::_endBlock,2043TR::Block::_partialInlineBlock);2044if (!gs)2045{2046partialTrace(tracer(),"TrimBlocksForPartialInlining: No Complete Path from Start to End");2047}2048}20492050if (!gs)2051{2052calltarget->_isPartialInliningCandidate = false;2053return false;2054}20552056partialTrace(tracer(), "TrimBlocksForPartialInlining Found a Candidate. Setting PartialSize to %d. full size = %d",size, calltarget->_fullSize);2057calltarget->_partialSize = size;20582059return true;2060}2061else2062{2063if (!unsanitizeableBlocks.isEmpty())2064calltarget->_originatingBlock->setIsUnsanitizeable(); // A Full Inline with unsanitizeable blocks2065partialTrace(tracer(),"TrimBlocksForPartialInlining: No restart blocks found in candidate. Doing a full inline");2066}20672068calltarget->_isPartialInliningCandidate = false;2069return false;2070}20712072void2073TR_J9EstimateCodeSize::processGraph(TR_CallTarget *calltarget)2074{2075TR::CFG *cfg = calltarget->_cfg;2076calltarget->_partialInline = new (comp()->trHeapMemory()) TR_InlineBlocks(2077_inliner->fe(), _inliner->comp());2078TR_BitVector *blocksVisited = new (comp()->trStackMemory()) TR_BitVector(2079cfg->getNextNodeNumber(), comp()->trMemory(), stackAlloc);2080blocksVisited->empty();20812082TR::Block *startBlock = cfg->getStart()->asBlock();2083TR::Block *endBlock = cfg->getEnd()->asBlock();2084TR_Queue<TR::Block> nodesToBeEvaluated(comp()->trMemory());2085nodesToBeEvaluated.enqueue(startBlock);20862087do2088{2089TR::Block *currentBlock = nodesToBeEvaluated.dequeue();20902091if (blocksVisited->get(currentBlock->getNumber()))2092continue;2093blocksVisited->set(currentBlock->getNumber());20942095if (currentBlock != startBlock && currentBlock != endBlock)2096calltarget->_partialInline->addBlock(currentBlock);20972098for (auto e = currentBlock->getSuccessors().begin(); e != currentBlock->getSuccessors().end(); ++e)2099{2100TR::Block *dest = (*e)->getTo()->asBlock();2101if (dest->isPartialInlineBlock())2102nodesToBeEvaluated.enqueue(dest);2103}2104for (auto e = currentBlock->getExceptionSuccessors().begin(); e != currentBlock->getExceptionSuccessors().end(); ++e)2105{2106TR::Block *dest = (*e)->getTo()->asBlock();2107if (dest->isPartialInlineBlock())2108nodesToBeEvaluated.enqueue(dest);21092110calltarget->_partialInline->addExceptionBlock(dest); //only partial blocks will be processed. any exception block reachable from a partial block needs to be dealt with.2111}21122113}2114while (!nodesToBeEvaluated.isEmpty());21152116}21172118/***************************************2119* isPartialInliningCandidate()2120* Checks any call blocks as being unsanitizeable and if they can reach a restart2121* Generates the list of TR_InlineBlocks that are to be inlined.2122* ***************************************/21232124bool2125TR_J9EstimateCodeSize::isPartialInliningCandidate(TR_CallTarget *calltarget,2126TR_Queue<TR::Block> *callBlocks)2127{2128if (!calltarget->_isPartialInliningCandidate)2129return false;21302131while (!callBlocks->isEmpty())2132{2133TR::Block *callBlock = callBlocks->dequeue();21342135if (callBlock->isUnsanitizeable() && !callBlock->isRestartBlock())2136{2137calltarget->_originatingBlock->setIsUnsanitizeable();2138bool result = graphSearch(calltarget->_cfg, callBlock,2139TR::Block::_restartBlock,2140(TR::Block::partialFlags) (TR::Block::_partialInlineBlock2141| TR::Block::_restartBlock));2142if (result) // unsanitizeable block can reach a restart block2143{2144calltarget->_isPartialInliningCandidate = false;2145return false;2146}2147}21482149}21502151// we have a partial inlining candidate at this point. Now walk the graph and all P blocks to TR_InlineBlocks21522153processGraph(calltarget);21542155return true;2156}215721582159