Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/optimizer/J9EstimateCodeSize.cpp
6000 views
1
/*******************************************************************************
2
* Copyright (c) 2000, 2021 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
23
#include <algorithm>
24
#include "codegen/CodeGenerator.hpp"
25
#include "compile/InlineBlock.hpp"
26
#include "compile/Method.hpp"
27
#include "compile/ResolvedMethod.hpp"
28
#if defined(J9VM_OPT_JITSERVER)
29
#include "env/j9methodServer.hpp"
30
#endif /* defined(J9VM_OPT_JITSERVER) */
31
#include "env/VMJ9.h"
32
#include "il/Node.hpp"
33
#include "il/Node_inlines.hpp"
34
#include "il/ParameterSymbol.hpp"
35
#include "il/TreeTop.hpp"
36
#include "il/TreeTop_inlines.hpp"
37
#include "optimizer/PreExistence.hpp"
38
#include "optimizer/J9CallGraph.hpp"
39
#include "optimizer/J9EstimateCodeSize.hpp"
40
#include "optimizer/InterpreterEmulator.hpp"
41
#include "ras/LogTracer.hpp"
42
#include "runtime/J9Profiler.hpp"
43
44
// Empirically determined value
45
const float TR_J9EstimateCodeSize::STRING_COMPRESSION_ADJUSTMENT_FACTOR = 0.75f;
46
47
// There was no analysis done to determine this factor. It was chosen by intuition.
48
const float TR_J9EstimateCodeSize::METHOD_INVOKE_ADJUSTMENT_FACTOR = 0.20f;
49
50
51
/*
52
DEFINEs are ugly in general, but putting
53
if (tracer)
54
heuristicTrace(...)
55
everywhere in this class seems to be a much worse idea.
56
Unfortunately, C++98 doesn't have a good way to forward varargs
57
except for using a DEFINE
58
*/
59
60
#define heuristicTraceIfTracerIsNotNull(r, ...) \
61
if (r) { \
62
if ((r)->heuristicLevel()) { (r)->alwaysTraceM(__VA_ARGS__); } \
63
}
64
class NeedsPeekingHeuristic
65
{
66
public:
67
68
static const int default_distance = 25;
69
static const int NUM_LOADS = 4;
70
71
NeedsPeekingHeuristic(TR_CallTarget* calltarget, TR_J9ByteCodeIterator& bci, TR::ResolvedMethodSymbol* methodSymbol, TR::Compilation* comp, int d = default_distance) :
72
_hasArgumentsInfo(false),
73
_size(0),
74
_bci(bci),
75
_distance(d),
76
_numOfArgs(0),
77
_needsPeeking(false),
78
_tracer(0)
79
{
80
TR_PrexArgInfo* argInfo = calltarget->_ecsPrexArgInfo;
81
//no argInfo available for this caller
82
if (!argInfo)
83
return;
84
85
int i = 0;
86
int32_t numParms = methodSymbol->getParameterList().getSize();
87
_numOfArgs = numParms;
88
ListIterator<TR::ParameterSymbol> parmIt(&methodSymbol->getParameterList());
89
for (TR::ParameterSymbol *p = parmIt.getFirst(); p; p = parmIt.getNext(), i++)
90
{
91
int32_t len;
92
const char *sig = p->getTypeSignature(len);
93
if (i >= argInfo->getNumArgs() || //not enough slots in argInfo
94
(*sig != 'L' && *sig != 'Q') || //primitive arg
95
!argInfo->get(i) || //no arg at the i-th slot
96
!argInfo->get(i)->getClass() //no classInfo at the i-th slot
97
)
98
{
99
continue;
100
}
101
102
TR_OpaqueClassBlock *clazz = comp->fej9()->getClassFromSignature(sig, len, methodSymbol->getResolvedMethod());
103
if (!clazz)
104
{
105
continue;
106
}
107
108
TR_OpaqueClassBlock* argClass = argInfo->get(i)->getClass();
109
//findCallSiteTarget and validateAndPropagateArgsFromCalleeSymbol
110
//should take care of incompatible receivers
111
//this assertion only checks if the receiver is of the right type
112
//there's no harm in propagating other incompatible arguments
113
//as soon as one of those becomes a receiver this very same assertion
114
//should fire
115
TR_ASSERT(comp->fej9()->isInstanceOf(argClass, clazz, true, true, true) == TR_yes || i != 0 || !calltarget->_myCallSite->_isIndirectCall, "Incompatible receiver should have been handled by findCallSiteTarget");
116
117
// even the arg type propagated from the caller might be not more specific
118
// than the type got from callee signature, we should still try to
119
// do peeking. if we don't do peeking here, we will lose the chance to propagate
120
// the type info to the callsites of this calltarget.
121
static const bool keepBogusPeekingCondition = feGetEnv("TR_DisableBogusPeekingCondition") ? false: true;
122
if ( !keepBogusPeekingCondition || clazz != argClass ) //if two classes aren't equal it follows that argClass is more specific
123
//argsClass can either be equal to or a subclass of clazz
124
//see validateAndPropagateArgsFromCalleeSymbol
125
{
126
_hasArgumentsInfo = true;
127
_argInfo = argInfo;
128
}
129
130
/*
131
if (comp->fej9()->isInstanceOf (argClass, clazz, true, true, true) == TR_yes)
132
{
133
if (clazz != argClass)
134
_hasArgumentsInfo = true;
135
}
136
else
137
{
138
_hasArgumentsInfo = false;
139
return; // _hasArgumentsInfo will be equal to false and no propagation is going to happen
140
// because the incoming type information is not compatible
141
}
142
*/
143
}
144
};
145
146
void setTracer(TR_InlinerTracer *trc)
147
{
148
_tracer = trc;
149
heuristicTraceIfTracerIsNotNull(_tracer, "NeedsPeekingHeuristic is initialized with the following values: _hasArgumentsInfo = %d, NUM_LOADS = %d, _distance =%d, _needsPeeking = %d", _hasArgumentsInfo, NUM_LOADS, _distance, _needsPeeking);
150
}
151
152
void checkIfThereIsAParmLoadWithinDistance()
153
{
154
for (int i = 0; i < _size; i++)
155
{
156
if (_bci.bcIndex() - _loadIndices[i] <= _distance)
157
{
158
_needsPeeking = true;
159
heuristicTraceIfTracerIsNotNull(_tracer, "there is a parm load at %d which is within %d of a call at %d", _loadIndices[i], _distance, _bci.bcIndex());
160
}
161
}
162
};
163
164
void processByteCode()
165
{
166
if (!_hasArgumentsInfo)
167
return;
168
TR_J9ByteCode bc = _bci.current();
169
int slotIndex = -1;
170
switch (bc)
171
{
172
case J9BCaload0:
173
slotIndex = 0;
174
break;
175
case J9BCaload1:
176
slotIndex = 1;
177
break;
178
case J9BCaload2:
179
slotIndex = 2;
180
break;
181
case J9BCaload3:
182
slotIndex = 3;
183
break;
184
case J9BCaload:
185
slotIndex = _bci.nextByte();
186
TR_ASSERT(slotIndex >= 0 , "a slot shouldn't be negative");
187
break;
188
case J9BCaloadw:
189
slotIndex = _bci.next2Bytes();
190
TR_ASSERT(slotIndex >= 0 , "a slot shouldn't be negative");
191
break;
192
case J9BCinvokevirtual:
193
case J9BCinvokespecial:
194
case J9BCinvokestatic:
195
case J9BCinvokeinterface:
196
case J9BCinvokedynamic:
197
case J9BCinvokehandle:
198
case J9BCinvokehandlegeneric:
199
checkIfThereIsAParmLoadWithinDistance ();
200
default :
201
break;
202
203
}
204
205
if (slotIndex >=0)
206
{
207
processParameterLoad(slotIndex);
208
}
209
210
};
211
212
213
void processParameterLoad (int slotIndex)
214
{
215
//This heuristic simply checks if we indeed hit a parameter load (as opposed to an auto)
216
//and if we have an argInfo for this slot we would want to propagate
217
//Note, _hasArgumentsInfo is checked in processByteCode
218
//we should not even reach this code unless we have some PrexInfo
219
if (slotIndex < _numOfArgs && _argInfo->get(slotIndex))
220
{
221
heuristicTraceIfTracerIsNotNull(_tracer,"came across of a load of slot %d at %d", slotIndex, _bci.bcIndex());
222
_loadIndices[_size] = _bci.bcIndex();
223
_size = (_size + 1) % NUM_LOADS;
224
}
225
}
226
bool doPeeking () { return _needsPeeking; };
227
228
protected:
229
int32_t _loadIndices [NUM_LOADS];
230
int _size;
231
int _numOfArgs;
232
int _distance;
233
TR_J9ByteCodeIterator& _bci;
234
bool _hasArgumentsInfo;
235
TR_PrexArgInfo * _argInfo;
236
bool _needsPeeking;
237
TR_InlinerTracer * _tracer;
238
239
};
240
#undef heuristicTraceIfTracerIsNotNull
241
242
void
243
TR_J9EstimateCodeSize::setupNode(TR::Node *node, uint32_t bcIndex,
244
TR_ResolvedMethod *feMethod, TR::Compilation *comp)
245
{
246
node->getByteCodeInfo().setDoNotProfile(0);
247
node->setByteCodeIndex(bcIndex);
248
node->setInlinedSiteIndex(-10);
249
node->setMethod(feMethod->getPersistentIdentifier());
250
}
251
252
253
TR::Block *
254
TR_J9EstimateCodeSize::getBlock(TR::Compilation *comp, TR::Block * * blocks,
255
TR_ResolvedMethod *feMethod, int32_t i, TR::CFG & cfg)
256
{
257
if (!blocks[i])
258
{
259
260
TR::TreeTop *startTree = TR::TreeTop::create(comp, TR::Node::create(
261
NULL, TR::BBStart, 0));
262
TR::TreeTop *endTree = TR::TreeTop::create(comp, TR::Node::create(
263
NULL, TR::BBEnd, 0));
264
265
startTree->join(endTree);
266
blocks[i] = TR::Block::createBlock(startTree, endTree, cfg);
267
268
blocks[i]->setBlockBCIndex(i);
269
blocks[i]->setNumber(cfg.getNextNodeNumber());
270
271
setupNode(startTree->getNode(), i, feMethod, comp);
272
setupNode(endTree->getNode(), i, feMethod, comp);
273
cfg.addNode(blocks[i]);
274
}
275
276
return blocks[i];
277
}
278
279
static TR::ILOpCodes convertBytecodeToIL (TR_J9ByteCode bc)
280
{
281
switch (bc)
282
{
283
case J9BCifeq: return TR::ificmpeq;
284
case J9BCifne: return TR::ificmpne;
285
case J9BCiflt: return TR::ificmplt;
286
case J9BCifge: return TR::ificmpge;
287
case J9BCifgt: return TR::ificmpgt;
288
case J9BCifle: return TR::ificmple;
289
case J9BCifnull: return TR::ifacmpeq;
290
case J9BCifnonnull: return TR::ifacmpne;
291
case J9BCificmpeq: return TR::ificmpeq;
292
case J9BCificmpne: return TR::ificmpne;
293
case J9BCificmplt: return TR::ificmplt;
294
case J9BCificmpge: return TR::ificmpge;
295
case J9BCificmpgt: return TR::ificmpgt;
296
case J9BCificmple: return TR::ificmple;
297
case J9BCifacmpeq: return TR::ifacmpeq;
298
case J9BCifacmpne: return TR::ifacmpne;
299
case J9BCtableswitch: return TR::table;
300
case J9BClookupswitch: return TR::lookup;
301
case J9BCgoto:
302
case J9BCgotow: return TR::Goto;
303
case J9BCReturnC: /* fall-through */
304
case J9BCReturnS: /* fall-through */
305
case J9BCReturnB: /* fall-through */
306
case J9BCReturnZ: /* fall-through */
307
case J9BCgenericReturn: return TR::Return;
308
case J9BCathrow: return TR::athrow;
309
default:
310
TR_ASSERT(0,"Unsupported conversion for now.");
311
return TR::BadILOp;
312
}
313
return TR::BadILOp;
314
}
315
316
void
317
TR_J9EstimateCodeSize::setupLastTreeTop(TR::Block *currentBlock, TR_J9ByteCode bc,
318
uint32_t bcIndex, TR::Block *destinationBlock, TR_ResolvedMethod *feMethod,
319
TR::Compilation *comp)
320
{
321
TR::Node *node = TR::Node::createOnStack(NULL, convertBytecodeToIL(bc), 0);
322
TR::TreeTop *tree = TR::TreeTop::create(comp, node);
323
setupNode(node, bcIndex, feMethod, comp);
324
if (node->getOpCode().isBranch())
325
node->setBranchDestination(destinationBlock->getEntry());
326
currentBlock->append(tree);
327
}
328
329
330
//Partial Inlining
331
bool
332
TR_J9EstimateCodeSize::isInExceptionRange(TR_ResolvedMethod * feMethod,
333
int32_t bcIndex)
334
{
335
int32_t numExceptionRanges = feMethod->numberOfExceptionHandlers();
336
337
if (numExceptionRanges == 0)
338
return false;
339
340
int32_t start, end, catchtype;
341
342
for (int32_t i = 0; i < numExceptionRanges; i++)
343
{
344
feMethod->exceptionData(i, &start, &end, &catchtype);
345
if (bcIndex > start && bcIndex < end)
346
return true;
347
}
348
return false;
349
}
350
351
352
static bool cameFromArchetypeSpecimen(TR_ResolvedMethod *method)
353
{
354
if (!method)
355
return false; // end of recursion
356
else if (method->convertToMethod()->isArchetypeSpecimen())
357
return true; // Archetypes often call methods that are never called until the archetype is compiled
358
else
359
return cameFromArchetypeSpecimen(method->owningMethod());
360
}
361
362
bool
363
TR_J9EstimateCodeSize::adjustEstimateForStringCompression(TR_ResolvedMethod* method, int32_t& value, float factor)
364
{
365
const uint16_t classNameLength = method->classNameLength();
366
367
if ((classNameLength == 16 && !strncmp(method->classNameChars(), "java/lang/String", classNameLength)) ||
368
(classNameLength == 22 && !strncmp(method->classNameChars(), "java/lang/StringBuffer", classNameLength)) ||
369
(classNameLength == 23 && !strncmp(method->classNameChars(), "java/lang/StringBuilder", classNameLength)))
370
{
371
// A statistical analysis of the number of places certain methods got inlined yielded results which suggest that the
372
// following recognized methods incurr several percent worth of increase in compile-time at no benefit to throughput.
373
// As such we can save additional compile-time by not making adjustments to these methods.
374
375
if (method->getRecognizedMethod() != TR::java_lang_String_regionMatches &&
376
method->getRecognizedMethod() != TR::java_lang_String_regionMatches_bool &&
377
method->getRecognizedMethod() != TR::java_lang_String_equals)
378
{
379
value *= factor;
380
381
return true;
382
}
383
}
384
385
return false;
386
}
387
388
/** \details
389
* The `Method.invoke` API contains a call to `Reflect.getCallerClass()` API which when executed will trigger a
390
* stack walking operation. Performance wise this is quite expensive. The `Reflect.getCallerClass()` API returns
391
* the class of the method which called `Method.invoke`, so if we can promote inlining of `Method.invoke` we can
392
* eliminate the `Reflect.getCallerClass()` call with a simple load, thus avoiding the expensive stack walk.
393
*/
394
bool
395
TR_J9EstimateCodeSize::adjustEstimateForMethodInvoke(TR_ResolvedMethod* method, int32_t& value, float factor)
396
{
397
if (method->getRecognizedMethod() == TR::java_lang_reflect_Method_invoke)
398
{
399
static const char *factorOverrideChars = feGetEnv("TR_MethodInvokeInlinerFactor");
400
static const int32_t factorOverride = (factorOverrideChars != NULL) ? atoi(factorOverrideChars) : 0;
401
if (factorOverride != 0)
402
{
403
factor = 1.0f / static_cast<float>(factorOverride);
404
}
405
406
value *= factor;
407
408
return true;
409
}
410
411
return false;
412
}
413
414
bool
415
TR_J9EstimateCodeSize::estimateCodeSize(TR_CallTarget *calltarget, TR_CallStack *prevCallStack, bool recurseDown)
416
{
417
if (realEstimateCodeSize(calltarget, prevCallStack, recurseDown, comp()->trMemory()->currentStackRegion()))
418
{
419
if (_isLeaf && _realSize > 1)
420
{
421
heuristicTrace(tracer(),"Subtracting 1 from sizes because _isLeaf is true");
422
--_realSize;
423
--_optimisticSize;
424
}
425
return true;
426
}
427
428
return false;
429
}
430
431
TR::CFG&
432
TR_J9EstimateCodeSize::processBytecodeAndGenerateCFG(TR_CallTarget *calltarget, TR::Region &cfgRegion, TR_J9ByteCodeIterator& bci, NeedsPeekingHeuristic &nph, TR::Block** blocks, flags8_t * flags)
433
{
434
435
char nameBuffer[1024];
436
const char *callerName = NULL;
437
if (tracer()->heuristicLevel())
438
callerName = comp()->fej9()->sampleSignature(
439
calltarget->_calleeMethod->getPersistentIdentifier(), nameBuffer,
440
1024, comp()->trMemory());
441
442
int size = calltarget->_myCallSite->_isIndirectCall ? 5 : 0;
443
444
int32_t maxIndex = bci.maxByteCodeIndex() + 5;
445
446
int32_t *bcSizes = (int32_t *) comp()->trMemory()->allocateStackMemory(
447
maxIndex * sizeof(int32_t));
448
memset(bcSizes, 0, maxIndex * sizeof(int32_t));
449
450
bool blockStart = true;
451
452
bool thisOnStack = false;
453
bool hasThisCalls = false;
454
bool foundNewAllocation = false;
455
456
bool unresolvedSymbolsAreCold = comp()->notYetRunMeansCold();
457
458
TR_ByteCodeInfo newBCInfo;
459
newBCInfo.setDoNotProfile(0);
460
if (_mayHaveVirtualCallProfileInfo)
461
newBCInfo.setCallerIndex(comp()->getCurrentInlinedSiteIndex());
462
463
// PHASE 1: Bytecode Iteration
464
465
bool callExists = false;
466
size = calltarget->_myCallSite->_isIndirectCall ? 5 : 0;
467
TR_J9ByteCode bc = bci.first(), nextBC;
468
469
#if defined(J9VM_OPT_JITSERVER)
470
if (comp()->isOutOfProcessCompilation())
471
{
472
// JITServer optimization:
473
// request this resolved method to create all of its callee resolved methods
474
// in a single query.
475
//
476
// If the method is unresolved, return NULL for 2 requests without asking the client,
477
// since they are called almost immediately after this request and are unlikely to
478
// become resolved.
479
//
480
// NOTE: first request occurs in the for loop over bytecodes, immediately after this request,
481
// second request occurs in InterpreterEmulator::findAndCreateCallsitesFromBytecodes
482
auto calleeMethod = static_cast<TR_ResolvedJ9JITServerMethod *>(calltarget->_calleeMethod);
483
calleeMethod->cacheResolvedMethodsCallees(2);
484
}
485
#endif /* defined(J9VM_OPT_JITSERVER) */
486
487
for (; bc != J9BCunknown; bc = bci.next())
488
{
489
nph.processByteCode();
490
TR_ResolvedMethod * resolvedMethod;
491
int32_t cpIndex;
492
bool isVolatile, isPrivate, isUnresolvedInCP, resolved;
493
TR::DataType type = TR::NoType;
494
void * staticAddress;
495
uint32_t fieldOffset;
496
497
newBCInfo.setByteCodeIndex(bci.bcIndex());
498
int32_t i = bci.bcIndex();
499
500
if (blockStart) //&& calltarget->_calleeSymbol)
501
{
502
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);
503
blockStart = false;
504
foundNewAllocation = false;
505
}
506
507
if (bc == J9BCgenericReturn ||
508
bc == J9BCReturnC ||
509
bc == J9BCReturnS ||
510
bc == J9BCReturnB ||
511
bc == J9BCReturnZ)
512
{
513
if (!calltarget->_calleeMethod->isSynchronized())
514
size += 1;
515
else
516
size += bci.estimatedCodeSize();
517
}
518
else
519
size += bci.estimatedCodeSize();
520
521
switch (bc)
522
{
523
case J9BCificmpeq:
524
case J9BCificmpne:
525
case J9BCificmplt:
526
case J9BCificmpge:
527
case J9BCificmpgt:
528
case J9BCificmple:
529
case J9BCifacmpeq:
530
case J9BCifacmpne:
531
case J9BCifnull:
532
case J9BCifnonnull:
533
case J9BCifeq:
534
case J9BCifne:
535
case J9BCiflt:
536
case J9BCifge:
537
case J9BCifgt:
538
case J9BCifle:
539
case J9BCgoto:
540
case J9BCgotow:
541
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isBranch);
542
flags[i + bci.relativeBranch()].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);
543
blockStart = true;
544
break;
545
case J9BCReturnC:
546
case J9BCReturnS:
547
case J9BCReturnB:
548
case J9BCReturnZ:
549
case J9BCgenericReturn:
550
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isBranch);
551
blockStart = true;
552
break;
553
case J9BCnew:
554
case J9BCnewarray:
555
case J9BCanewarray:
556
case J9BCmultianewarray:
557
if (calltarget->_calleeSymbol)
558
foundNewAllocation = true;
559
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
560
break;
561
case J9BCathrow:
562
_foundThrow = true;
563
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isBranch);
564
blockStart = true;
565
if (!_aggressivelyInlineThrows)
566
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);
567
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
568
break;
569
case J9BCtableswitch:
570
{
571
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isBranch);
572
int32_t index = bci.defaultTargetIndex();
573
flags[i + bci.nextSwitchValue(index)].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);
574
int32_t low = bci.nextSwitchValue(index);
575
int32_t high = bci.nextSwitchValue(index) - low + 1;
576
for (int32_t j = 0; j < high; ++j)
577
flags[i + bci.nextSwitchValue(index)].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);
578
blockStart = true;
579
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
580
break;
581
}
582
case J9BClookupswitch:
583
{
584
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isBranch);
585
int32_t index = bci.defaultTargetIndex();
586
flags[i + bci.nextSwitchValue(index)].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);
587
int32_t tableSize = bci.nextSwitchValue(index);
588
for (int32_t j = 0; j < tableSize; ++j)
589
{
590
index += 4; // match value
591
flags[i + bci.nextSwitchValue(index)].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);
592
}
593
blockStart = true;
594
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
595
break;
596
}
597
case J9BCinvokevirtual:
598
{
599
if (thisOnStack)
600
hasThisCalls = true;
601
cpIndex = bci.next2Bytes();
602
auto calleeMethod = (TR_ResolvedJ9Method*)calltarget->_calleeMethod;
603
resolvedMethod = calleeMethod->getResolvedPossiblyPrivateVirtualMethod(comp(), cpIndex, true, &isUnresolvedInCP);
604
605
///if (!resolvedMethod || isUnresolvedInCP || resolvedMethod->isCold(comp(), true))
606
if ((isUnresolvedInCP && !resolvedMethod) || (resolvedMethod
607
&& resolvedMethod->isCold(comp(), true)))
608
{
609
610
if(tracer()->heuristicLevel())
611
{
612
if(resolvedMethod)
613
{
614
heuristicTrace(tracer(), "Depth %d: Call at bc index %d is Cold. Not searching for targets. Signature %s",_recursionDepth,i,tracer()->traceSignature(resolvedMethod));
615
}
616
else
617
{
618
TR::Method *meth = comp()->fej9()->createMethod(comp()->trMemory(), calltarget->_calleeMethod->containingClass(), cpIndex);
619
heuristicTrace(tracer(), "Depth %d: Call at bc index %d is Cold. Not searching for targets. Signature %s",_recursionDepth,i,tracer()->traceSignature(meth));
620
}
621
}
622
if (unresolvedSymbolsAreCold)
623
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);
624
_isLeaf = false;
625
}
626
}
627
628
callExists = true;
629
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
630
break;
631
case J9BCinvokespecial:
632
case J9BCinvokespecialsplit:
633
{
634
if (thisOnStack)
635
hasThisCalls = true;
636
cpIndex = bci.next2Bytes();
637
resolvedMethod = calltarget->_calleeMethod->getResolvedSpecialMethod(comp(), (bc == J9BCinvokespecialsplit)?cpIndex |= J9_SPECIAL_SPLIT_TABLE_INDEX_FLAG:cpIndex, &isUnresolvedInCP);
638
bool isIndirectCall = false;
639
bool isInterface = false;
640
TR::Method *interfaceMethod = 0;
641
TR::TreeTop *callNodeTreeTop = 0;
642
TR::Node *parent = 0;
643
TR::Node *callNode = 0;
644
TR::ResolvedMethodSymbol *resolvedSymbol = 0;
645
if (!resolvedMethod || isUnresolvedInCP || resolvedMethod->isCold(comp(), false))
646
{
647
if(tracer()->heuristicLevel())
648
{
649
if(resolvedMethod)
650
{
651
heuristicTrace(tracer(), "Depth %d: Call at bc index %d is Cold. Not searching for targets. Signature %s",_recursionDepth,i,tracer()->traceSignature(resolvedMethod));
652
}
653
else
654
{
655
if (bc == J9BCinvokespecialsplit)
656
cpIndex |= J9_SPECIAL_SPLIT_TABLE_INDEX_FLAG;
657
TR::Method *meth = comp()->fej9()->createMethod(comp()->trMemory(), calltarget->_calleeMethod->containingClass(), cpIndex);
658
heuristicTrace(tracer(), "Depth %d: Call at bc index %d is Cold. Not searching for targets. Signature %s",_recursionDepth,i,tracer()->traceSignature(meth));
659
}
660
}
661
if (unresolvedSymbolsAreCold)
662
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);
663
_isLeaf = false;
664
}
665
}
666
callExists = true;
667
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
668
break;
669
case J9BCinvokestatic:
670
case J9BCinvokestaticsplit:
671
{
672
cpIndex = bci.next2Bytes();
673
resolvedMethod = calltarget->_calleeMethod->getResolvedStaticMethod(comp(), (bc == J9BCinvokestaticsplit)?cpIndex |= J9_STATIC_SPLIT_TABLE_INDEX_FLAG:cpIndex, &isUnresolvedInCP);
674
bool isIndirectCall = false;
675
bool isInterface = false;
676
TR::Method *interfaceMethod = 0;
677
TR::TreeTop *callNodeTreeTop = 0;
678
TR::Node *parent = 0;
679
TR::Node *callNode = 0;
680
TR::ResolvedMethodSymbol *resolvedSymbol = 0;
681
if (!resolvedMethod || isUnresolvedInCP || resolvedMethod->isCold(comp(), false))
682
{
683
if (unresolvedSymbolsAreCold)
684
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);
685
if(tracer()->heuristicLevel())
686
{
687
if(resolvedMethod)
688
heuristicTrace(tracer(), "Depth %d: Call at bc index %d is Cold. Not searching for targets. Signature %s",_recursionDepth,i,tracer()->traceSignature(resolvedMethod));
689
else
690
{
691
if (bc == J9BCinvokestaticsplit)
692
cpIndex |= J9_STATIC_SPLIT_TABLE_INDEX_FLAG;
693
TR::Method *meth = comp()->fej9()->createMethod(comp()->trMemory(), calltarget->_calleeMethod->containingClass(), cpIndex);
694
heuristicTrace(tracer(), "Depth %d: Call at bc index %d is Cold. Not searching for targets. Signature %s",_recursionDepth,i,tracer()->traceSignature(meth));
695
}
696
}
697
}
698
}
699
callExists = true;
700
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
701
break;
702
case J9BCinvokeinterface:
703
cpIndex = bci.next2Bytes();
704
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
705
break;
706
case J9BCgetfield:
707
resolved = calltarget->_calleeMethod->fieldAttributes(comp(), bci.next2Bytes(), &fieldOffset, &type, &isVolatile, 0, &isPrivate, false, &isUnresolvedInCP, false);
708
if (!resolved || isUnresolvedInCP)
709
{
710
if (unresolvedSymbolsAreCold)
711
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);
712
if (!resolved)
713
_isLeaf = false;
714
}
715
if (isInExceptionRange(calltarget->_calleeMethod, i))
716
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
717
break;
718
case J9BCputfield:
719
resolved = calltarget->_calleeMethod->fieldAttributes(comp(), bci.next2Bytes(), &fieldOffset, &type, &isVolatile, 0, &isPrivate, true, &isUnresolvedInCP, false);
720
if (!resolved || isUnresolvedInCP)
721
{
722
if (unresolvedSymbolsAreCold)
723
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);
724
if (!resolved)
725
_isLeaf = false;
726
}
727
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
728
break;
729
case J9BCgetstatic:
730
resolved = calltarget->_calleeMethod->staticAttributes(comp(), bci.next2Bytes(), &staticAddress, &type, &isVolatile, 0, &isPrivate, false, &isUnresolvedInCP, false);
731
if (!resolved || isUnresolvedInCP)
732
{
733
if (unresolvedSymbolsAreCold)
734
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);
735
if (!resolved)
736
_isLeaf = false;
737
}
738
if (isInExceptionRange(calltarget->_calleeMethod, i))
739
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
740
break;
741
case J9BCputstatic:
742
resolved = calltarget->_calleeMethod->staticAttributes(comp(), bci.next2Bytes(), &staticAddress, &type, &isVolatile, 0, &isPrivate, true, &isUnresolvedInCP, false);
743
if (!resolved || isUnresolvedInCP)
744
{
745
if (unresolvedSymbolsAreCold)
746
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isCold);
747
if (!resolved)
748
_isLeaf = false;
749
}
750
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
751
break;
752
case J9BCaload0:
753
if (calltarget->_myCallSite->_isIndirectCall)
754
thisOnStack = true;
755
break;
756
case J9BCiastore:
757
case J9BClastore:
758
case J9BCfastore:
759
case J9BCdastore:
760
case J9BCaastore:
761
case J9BCbastore:
762
case J9BCcastore:
763
case J9BCsastore: //array stores can change the global state - hence unsanitizeable
764
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
765
break;
766
case J9BCiaload:
767
case J9BClaload:
768
case J9BCfaload:
769
case J9BCdaload:
770
case J9BCaaload:
771
case J9BCbaload:
772
case J9BCcaload:
773
case J9BCsaload:
774
case J9BCarraylength: //array accesses are ok as long as we don't catch exception
775
case J9BCidiv:
776
case J9BCldiv:
777
case J9BCfdiv:
778
case J9BCddiv:
779
case J9BCirem:
780
case J9BClrem:
781
case J9BCfrem:
782
case J9BCdrem:
783
case J9BCcheckcast:
784
case J9BCinstanceof:
785
case J9BCasyncCheck:
786
if (isInExceptionRange(calltarget->_calleeMethod, i))
787
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
788
break;
789
case J9BCinvokedynamic:
790
case J9BCinvokehandle:
791
case J9BCinvokehandlegeneric:
792
// TODO:JSR292: Use getResolvedHandleMethod
793
case J9BCmonitorenter:
794
case J9BCmonitorexit:
795
case J9BCunknown:
796
flags[i].set(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable);
797
break;
798
default:
799
break;
800
}
801
802
if (flags[i].testAny(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable))
803
debugTrace(tracer(),"BC at index %d is unsanitizeable.", i);
804
else if (flags[i].testAny(InterpreterEmulator::BytecodePropertyFlag::isCold))
805
debugTrace(tracer(),"BC at index %d is cold.", i);
806
else
807
debugTrace(tracer(),"BC iteration at index %d.", i); //only print this index if we are debugging
808
809
bcSizes[i] = size;
810
}
811
812
auto sizeBeforeAdjustment = size;
813
814
if (adjustEstimateForStringCompression(calltarget->_calleeMethod, size, STRING_COMPRESSION_ADJUSTMENT_FACTOR))
815
{
816
heuristicTrace(tracer(), "*** Depth %d: Adjusting size for %s because of string compression from %d to %d", _recursionDepth, callerName, sizeBeforeAdjustment, size);
817
}
818
819
if (adjustEstimateForMethodInvoke(calltarget->_calleeMethod, size, METHOD_INVOKE_ADJUSTMENT_FACTOR))
820
{
821
heuristicTrace(tracer(), "*** Depth %d: Adjusting size for %s because of java/lang/reflect/Method.invoke from %d to %d", _recursionDepth, callerName, sizeBeforeAdjustment, size);
822
}
823
824
calltarget->_fullSize = size;
825
826
if (calltarget->_calleeSymbol)
827
{
828
TR::DebugCounter::incStaticDebugCounter(comp(), TR::DebugCounter::debugCounterName(comp(), "inliner/%s/estimatedBytecodeSize/%d", calltarget->_calleeSymbol->signature(comp()->trMemory()), calltarget->_fullSize));
829
}
830
831
/********* PHASE 2: Generate CFG **********/
832
833
heuristicTrace(tracer(),"--- Done Iterating over Bytecodes in call to %s. size = %d _recursionDepth = %d _optimisticSize = %d _realSize = %d _sizeThreshold = %d",callerName, size, _recursionDepth, _optimisticSize, _realSize, _sizeThreshold);
834
835
if (hasThisCalls && calltarget->_calleeSymbol)
836
calltarget->_calleeSymbol->setHasThisCalls(true);
837
838
839
TR_Array<TR_J9ByteCodeIterator::TryCatchInfo> tryCatchInfo(
840
comp()->trMemory(),
841
calltarget->_calleeMethod->numberOfExceptionHandlers(), true,
842
stackAlloc);
843
844
int32_t i;
845
for (i = calltarget->_calleeMethod->numberOfExceptionHandlers() - 1; i
846
>= 0; --i)
847
{
848
int32_t start, end, type;
849
int32_t handler = calltarget->_calleeMethod->exceptionData(i, &start,
850
&end, &type);
851
852
flags[start].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);
853
flags[end + 1].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);
854
flags[handler].set(InterpreterEmulator::BytecodePropertyFlag::bbStart);
855
856
tryCatchInfo[i].initialize((uint16_t) start, (uint16_t) end,
857
(uint16_t) handler, (uint32_t) type);
858
}
859
860
calltarget->_cfg = new (cfgRegion) TR::CFG(comp(), calltarget->_calleeSymbol, cfgRegion);
861
TR::CFG &cfg = *(calltarget->_cfg);
862
cfg.setStartAndEnd(TR::Block::createBlock(
863
TR::TreeTop::create(comp(), TR::Node::create(NULL,
864
TR::BBStart, 0)), TR::TreeTop::create(comp(),
865
TR::Node::create(NULL, TR::BBEnd, 0)),
866
cfg), TR::Block::createBlock(
867
TR::TreeTop::create(comp(), TR::Node::create(NULL,
868
TR::BBStart, 0)), TR::TreeTop::create(comp(),
869
TR::Node::create(NULL, TR::BBEnd, 0)),
870
cfg));
871
872
cfg.getStart()->asBlock()->getEntry()->join(
873
cfg.getStart()->asBlock()->getExit());
874
cfg.getEnd()->asBlock()->getEntry()->join(
875
cfg.getEnd()->asBlock()->getExit());
876
cfg.getStart()->setNumber(cfg.getNextNodeNumber());
877
cfg.allocateNodeNumber();
878
cfg.getEnd()->setNumber(cfg.getNextNodeNumber());
879
cfg.allocateNodeNumber();
880
881
cfg.getEnd()->asBlock()->setIsEndBlock();
882
883
TR::Block * currentBlock = cfg.getStart()->asBlock();
884
currentBlock->setBlockBCIndex(0);
885
886
int32_t endNodeIndex = bci.maxByteCodeIndex() - 1;
887
if (endNodeIndex < 0)
888
{
889
debugTrace(tracer(), "MaxByteCodeIndex <= 0, setting BC index for end node to 0.");
890
endNodeIndex = 0;
891
}
892
893
setupNode(cfg.getStart()->asBlock()->getEntry()->getNode(), 0,
894
calltarget->_calleeMethod, comp());
895
setupNode(cfg.getStart()->asBlock()->getExit()->getNode(), 0,
896
calltarget->_calleeMethod, comp());
897
setupNode(cfg.getEnd()->asBlock()->getEntry()->getNode(),
898
endNodeIndex, calltarget->_calleeMethod, comp());
899
setupNode(cfg.getEnd()->asBlock()->getExit()->getNode(),
900
endNodeIndex, calltarget->_calleeMethod, comp());
901
902
903
debugTrace(tracer(),"PECS: startblock %p %d endblock %p %d",cfg.getStart()->asBlock(), cfg.getStart()->getNumber(), cfg.getEnd()->asBlock(), cfg.getEnd()->getNumber());
904
905
bool addFallThruEdge = true;
906
907
debugTrace(tracer(),"PECS: iterating over bc indexes in CFG creation. maxIndex =%d", maxIndex);
908
int32_t blockStartSize = 0;
909
int32_t startIndex = 0;
910
for (TR_J9ByteCode bc = bci.first(); bc != J9BCunknown; bc = bci.next())
911
{
912
int32_t i = bci.bcIndex();
913
if (flags[i].testAny(InterpreterEmulator::BytecodePropertyFlag::bbStart))
914
{
915
debugTrace(tracer(),"Calling getBlock. blocks[%d] = %p", i, blocks[i]);
916
TR::Block * newBlock = getBlock(comp(), blocks,
917
calltarget->_calleeMethod, i, cfg);
918
919
if (i != startIndex)
920
{
921
currentBlock->setBlockSize(bcSizes[i] - blockStartSize);
922
if (cfg.getMethodSymbol())
923
cfg.getMethodSymbol()->addProfilingOffsetInfo(currentBlock->getEntry()->getNode()->getByteCodeIndex(), currentBlock->getEntry()->getNode()->getByteCodeIndex() + currentBlock->getBlockSize());
924
}
925
926
if (addFallThruEdge)
927
{
928
debugTrace(tracer(),"adding a fallthrough edge between block %p %d and %p %d", currentBlock, currentBlock->getNumber(), newBlock, newBlock->getNumber());
929
debugTrace(tracer(),"joining nodes between blocks %p %d and %p %d", currentBlock, currentBlock->getNumber(), newBlock, newBlock->getNumber());
930
currentBlock->getExit()->join(newBlock->getEntry());
931
cfg.addEdge(currentBlock, newBlock);
932
}
933
else
934
{
935
addFallThruEdge = true;
936
}
937
currentBlock = newBlock;
938
939
startIndex = i;
940
blockStartSize = bcSizes[i];
941
}
942
943
if (flags[i].testAny(InterpreterEmulator::BytecodePropertyFlag::isCold))
944
{
945
partialTrace(tracer(), "Setting block %p[%d] blocks[%d]=%p as cold because bytecode %d was identified as cold",currentBlock, currentBlock->getNumber(), i, blocks[i], i);
946
currentBlock->setIsCold();
947
currentBlock->setFrequency(0);
948
}
949
if (flags[i].testAny(InterpreterEmulator::BytecodePropertyFlag::isUnsanitizeable))
950
{
951
partialTrace(tracer(), "Setting unsanitizeable flag on block %p[%d] blocks[%d]=%p",currentBlock, currentBlock->getNumber(), i, blocks[i]);
952
currentBlock->setIsUnsanitizeable();
953
}
954
955
if (flags[i].testAny(InterpreterEmulator::BytecodePropertyFlag::isBranch))
956
{
957
if (startIndex != i)
958
{
959
currentBlock->setBlockSize(bcSizes[i] - blockStartSize);
960
if (cfg.getMethodSymbol())
961
cfg.getMethodSymbol()->addProfilingOffsetInfo(currentBlock->getEntry()->getNode()->getByteCodeIndex(), currentBlock->getEntry()->getNode()->getByteCodeIndex() + currentBlock->getBlockSize());
962
}
963
else
964
{
965
currentBlock->setBlockSize(1); // if there startIndex is the same as the current index then the block consists only of a branch
966
if (cfg.getMethodSymbol())
967
cfg.getMethodSymbol()->addProfilingOffsetInfo(currentBlock->getEntry()->getNode()->getByteCodeIndex(), currentBlock->getEntry()->getNode()->getByteCodeIndex() + currentBlock->getBlockSize());
968
}
969
970
switch (bc)
971
{
972
case J9BCificmpeq:
973
case J9BCificmpne:
974
case J9BCificmplt:
975
case J9BCificmpge:
976
case J9BCificmpgt:
977
case J9BCificmple:
978
case J9BCifacmpeq:
979
case J9BCifacmpne:
980
case J9BCifeq:
981
case J9BCifne:
982
case J9BCiflt:
983
case J9BCifge:
984
case J9BCifgt:
985
case J9BCifle:
986
case J9BCifnull:
987
case J9BCifnonnull:
988
{
989
debugTrace(tracer(),"if branch.i = %d adding edge between blocks %p %d and %p %d",
990
i, currentBlock, currentBlock->getNumber(), getBlock(comp(), blocks, calltarget->_calleeMethod, i+ bci.relativeBranch(), cfg),
991
getBlock(comp(), blocks, calltarget->_calleeMethod, i + bci.relativeBranch(), cfg)->getNumber());
992
993
setupLastTreeTop(currentBlock, bc, i, getBlock(comp(), blocks, calltarget->_calleeMethod, i + bci.relativeBranch(), cfg), calltarget->_calleeMethod, comp());
994
cfg.addEdge(currentBlock, getBlock(comp(), blocks,
995
calltarget->_calleeMethod, i + bci.relativeBranch(),
996
cfg));
997
addFallThruEdge = true;
998
break;
999
}
1000
case J9BCgoto:
1001
case J9BCgotow:
1002
setupLastTreeTop(currentBlock, bc, i, getBlock(comp(), blocks, calltarget->_calleeMethod, i + bci.relativeBranch(), cfg), calltarget->_calleeMethod, comp());
1003
cfg.addEdge(currentBlock, getBlock(comp(), blocks, calltarget->_calleeMethod, i + bci.relativeBranch(), cfg));
1004
addFallThruEdge = false;
1005
break;
1006
case J9BCReturnC:
1007
case J9BCReturnS:
1008
case J9BCReturnB:
1009
case J9BCReturnZ:
1010
case J9BCgenericReturn:
1011
case J9BCathrow:
1012
setupLastTreeTop(currentBlock, bc, i, cfg.getEnd()->asBlock(), calltarget->_calleeMethod, comp());
1013
cfg.addEdge(currentBlock, cfg.getEnd());
1014
addFallThruEdge = false;
1015
break;
1016
case J9BCtableswitch:
1017
{
1018
int32_t index = bci.defaultTargetIndex();
1019
TR::Block *defaultBlock = getBlock(comp(), blocks,
1020
calltarget->_calleeMethod, i + bci.nextSwitchValue(
1021
index), cfg);
1022
setupLastTreeTop(currentBlock, bc, i, defaultBlock,
1023
calltarget->_calleeMethod, comp());
1024
cfg.addEdge(currentBlock, defaultBlock);
1025
int32_t low = bci.nextSwitchValue(index);
1026
int32_t high = bci.nextSwitchValue(index) - low + 1;
1027
for (int32_t j = 0; j < high; ++j)
1028
cfg.addEdge(currentBlock, getBlock(comp(), blocks,
1029
calltarget->_calleeMethod, i + bci.nextSwitchValue(
1030
index), cfg));
1031
addFallThruEdge = false;
1032
break;
1033
}
1034
case J9BClookupswitch:
1035
{
1036
int32_t index = bci.defaultTargetIndex();
1037
TR::Block *defaultBlock = getBlock(comp(), blocks,
1038
calltarget->_calleeMethod, i + bci.nextSwitchValue(
1039
index), cfg);
1040
setupLastTreeTop(currentBlock, bc, i, defaultBlock,
1041
calltarget->_calleeMethod, comp());
1042
cfg.addEdge(currentBlock, defaultBlock);
1043
int32_t tableSize = bci.nextSwitchValue(index);
1044
for (int32_t j = 0; j < tableSize; ++j)
1045
{
1046
index += 4; // match value
1047
cfg.addEdge(currentBlock, getBlock(comp(), blocks,
1048
calltarget->_calleeMethod, i + bci.nextSwitchValue(
1049
index), cfg));
1050
}
1051
addFallThruEdge = false;
1052
break;
1053
}
1054
default:
1055
break;
1056
}
1057
}
1058
// printf("Iterating through sizes array. bcSizes[%d] = %d maxIndex = %d\n",i,bcSizes[i],maxIndex);
1059
}
1060
1061
for (i = 0; i < (int32_t) tryCatchInfo.size(); ++i)
1062
{
1063
TR_J9ByteCodeIterator::TryCatchInfo * handlerInfo = &tryCatchInfo[i];
1064
1065
blocks[handlerInfo->_handlerIndex]->setHandlerInfoWithOutBCInfo(
1066
handlerInfo->_catchType, 0, handlerInfo->_handlerIndex,
1067
calltarget->_calleeMethod, comp());
1068
1069
for (int32_t j = handlerInfo->_startIndex; j <= handlerInfo->_endIndex; ++j)
1070
if (blocks[j])
1071
cfg.addExceptionEdge(blocks[j], blocks[handlerInfo->_handlerIndex]);
1072
}
1073
1074
1075
1076
1077
return cfg;
1078
}
1079
1080
bool
1081
TR_J9EstimateCodeSize::realEstimateCodeSize(TR_CallTarget *calltarget, TR_CallStack *prevCallStack, bool recurseDown, TR::Region &cfgRegion)
1082
{
1083
TR_ASSERT(calltarget->_calleeMethod, "assertion failure");
1084
1085
heuristicTrace(tracer(), "*** Depth %d: ECS CSI -- calltarget = %p , _ecsPrexArgInfo = %p",
1086
_recursionDepth, calltarget, calltarget->_ecsPrexArgInfo);
1087
1088
1089
1090
if (tracer()->heuristicLevel() && calltarget->_ecsPrexArgInfo)
1091
{
1092
heuristicTrace(tracer(), "ECS CSI -- ArgInfo :");
1093
calltarget->_ecsPrexArgInfo->dumpTrace();
1094
}
1095
1096
TR_InlinerDelimiter delimiter(tracer(), "realEstimateCodeSize");
1097
1098
if (calltarget->_calleeMethod->numberOfExceptionHandlers() > 0)
1099
_hasExceptionHandlers = true;
1100
1101
if (_aggressivelyInlineThrows)
1102
{
1103
TR_CatchBlockProfileInfo * catchInfo = TR_CatchBlockProfileInfo::get(comp(), calltarget->_calleeMethod);
1104
if (catchInfo)
1105
_throwCount += catchInfo->getThrowCounter();
1106
}
1107
1108
//TR::Compilation * comp = _inliner->comp();
1109
1110
char nameBuffer[1024];
1111
const char *callerName = NULL;
1112
if (tracer()->heuristicLevel())
1113
callerName = comp()->fej9()->sampleSignature(
1114
calltarget->_calleeMethod->getPersistentIdentifier(), nameBuffer,
1115
1024, comp()->trMemory());
1116
1117
heuristicTrace(tracer(),
1118
"*** Depth %d: ECS to begin for target %p signature %s size assuming we can partially inline (optimistic size) = %d total real size so far = %d sizeThreshold %d",
1119
_recursionDepth, calltarget, callerName, _optimisticSize, _realSize,
1120
_sizeThreshold);
1121
1122
TR_ByteCodeInfo newBCInfo;
1123
newBCInfo.setDoNotProfile(0);
1124
TR::ResolvedMethodSymbol* methodSymbol = TR::ResolvedMethodSymbol::create(comp()->trHeapMemory(), calltarget->_calleeMethod, comp());
1125
if (_mayHaveVirtualCallProfileInfo)
1126
{
1127
if (!comp()->incInlineDepth(methodSymbol, calltarget->_myCallSite->_bcInfo, 0, NULL, !calltarget->_myCallSite->_isIndirectCall))
1128
{
1129
return false; //this is intentional
1130
//calling returnCleanup here will result in assertion
1131
//as incInlineDepth doesn't do anything
1132
}
1133
1134
1135
newBCInfo.setCallerIndex(comp()->getCurrentInlinedSiteIndex());
1136
}
1137
1138
if( comp()->getVisitCount() > HIGH_VISIT_COUNT )
1139
{
1140
heuristicTrace(tracer(),"Depth %d: estimateCodeSize aborting due to high comp()->getVisitCount() of %d",_recursionDepth,comp()->getVisitCount());
1141
return returnCleanup(ECS_VISITED_COUNT_THRESHOLD_EXCEEDED);
1142
}
1143
1144
if (_recursionDepth > MAX_ECS_RECURSION_DEPTH)
1145
{
1146
calltarget->_isPartialInliningCandidate = false;
1147
heuristicTrace(tracer(), "*** Depth %d: ECS end for target %p signature %s. Exceeded Recursion Depth", _recursionDepth, calltarget, callerName);
1148
return returnCleanup(ECS_RECURSION_DEPTH_THRESHOLD_EXCEEDED);
1149
}
1150
1151
InterpreterEmulator bci(calltarget, methodSymbol, static_cast<TR_J9VMBase *> (comp()->fej9()), comp(), tracer(), this);
1152
1153
int32_t maxIndex = bci.maxByteCodeIndex() + 5;
1154
1155
flags8_t * flags = (flags8_t *) comp()->trMemory()->allocateStackMemory(
1156
maxIndex * sizeof(flags8_t));
1157
memset(flags, 0, maxIndex * sizeof(flags8_t));
1158
1159
TR_CallSite * * callSites =
1160
(TR_CallSite * *) comp()->trMemory()->allocateStackMemory(maxIndex
1161
* sizeof(TR_CallSite *));
1162
memset(callSites, 0, maxIndex * sizeof(TR_CallSite *));
1163
1164
bool unresolvedSymbolsAreCold = comp()->notYetRunMeansCold();
1165
1166
TR_CallStack callStack(comp(), 0, calltarget->_calleeMethod, prevCallStack, 0);
1167
1168
TR_PrexArgInfo* argsFromSymbol = TR_PrexArgInfo::buildPrexArgInfoForMethodSymbol(methodSymbol, tracer());
1169
1170
if (!TR_PrexArgInfo::validateAndPropagateArgsFromCalleeSymbol(argsFromSymbol, calltarget->_ecsPrexArgInfo, tracer()))
1171
{
1172
heuristicTrace(tracer(), "*** Depth %d: ECS end for target %p signature %s. Incompatible arguments", _recursionDepth, calltarget, callerName);
1173
return returnCleanup(ECS_ARGUMENTS_INCOMPATIBLE);
1174
}
1175
1176
NeedsPeekingHeuristic nph(calltarget, bci, methodSymbol, comp());
1177
//this might be a little bit too verbose, so let's hide the heuristic's output behind this env var
1178
static char *traceNeedsPeeking = feGetEnv("TR_traceNeedsPeekingHeuristic");
1179
if (traceNeedsPeeking)
1180
{
1181
nph.setTracer(tracer());
1182
}
1183
1184
bool wasPeekingSuccessfull = false;
1185
1186
const static bool debugMHInlineWithOutPeeking = feGetEnv("TR_DebugMHInlineWithOutPeeking") ? true: false;
1187
bool mhInlineWithPeeking = comp()->getOption(TR_DisableMHInlineWithoutPeeking);
1188
const static bool disableMethodHandleInliningAfterFirstPass = feGetEnv("TR_DisableMethodHandleInliningAfterFirstPass") ? true: false;
1189
bool inlineArchetypeSpecimen = calltarget->_calleeMethod->convertToMethod()->isArchetypeSpecimen() &&
1190
(!disableMethodHandleInliningAfterFirstPass || _inliner->firstPass());
1191
bool inlineLambdaFormGeneratedMethod = comp()->fej9()->isLambdaFormGeneratedMethod(calltarget->_calleeMethod) &&
1192
(!disableMethodHandleInliningAfterFirstPass || _inliner->firstPass());
1193
1194
// No need to peek LF methods, as we'll always interprete the method with state in order to propagate object info
1195
// through bytecodes to find call targets
1196
if (!inlineLambdaFormGeneratedMethod &&
1197
((nph.doPeeking() && recurseDown) ||
1198
(inlineArchetypeSpecimen && mhInlineWithPeeking)))
1199
{
1200
1201
heuristicTrace(tracer(), "*** Depth %d: ECS CSI -- needsPeeking is true for calltarget %p",
1202
_recursionDepth, calltarget);
1203
1204
bool ilgenSuccess = (NULL != methodSymbol->getResolvedMethod()->genMethodILForPeekingEvenUnderMethodRedefinition(methodSymbol, comp(), false, NULL));
1205
if (ilgenSuccess)
1206
{
1207
heuristicTrace(tracer(), "*** Depth %d: ECS CSI -- peeking was successfull for calltarget %p", _recursionDepth, calltarget);
1208
_inliner->getUtil()->clearArgInfoForNonInvariantArguments(calltarget->_ecsPrexArgInfo, methodSymbol, tracer());
1209
wasPeekingSuccessfull = true;
1210
}
1211
}
1212
else if (inlineArchetypeSpecimen && !mhInlineWithPeeking && debugMHInlineWithOutPeeking)
1213
{
1214
traceMsg(comp(), "printing out trees and bytecodes through peeking because DebugMHInlineWithOutPeeking is on\n");
1215
methodSymbol->getResolvedMethod()->genMethodILForPeekingEvenUnderMethodRedefinition(methodSymbol, comp(), false, NULL);
1216
}
1217
1218
TR::Block * * blocks =
1219
(TR::Block * *) comp()->trMemory()->allocateStackMemory(maxIndex
1220
* sizeof(TR::Block *));
1221
memset(blocks, 0, maxIndex * sizeof(TR::Block *));
1222
1223
TR::CFG &cfg = processBytecodeAndGenerateCFG(calltarget, cfgRegion, bci, nph, blocks, flags);
1224
int size = calltarget->_fullSize;
1225
1226
// Adjust call frequency for unknown or direct calls, for which we don't get profiling information
1227
//
1228
TR_ValueProfileInfoManager * profileManager = TR_ValueProfileInfoManager::get(comp());
1229
bool callGraphEnabled = !comp()->getOption(TR_DisableCallGraphInlining);//profileManager->isCallGraphProfilingEnabled(comp());
1230
if (!_inliner->firstPass() || inlineArchetypeSpecimen || inlineLambdaFormGeneratedMethod)
1231
callGraphEnabled = false; // TODO: Work out why this doesn't function properly on subsequent passes
1232
if (callGraphEnabled && recurseDown)
1233
{
1234
TR_OpaqueMethodBlock *method = calltarget->_myCallSite->_callerResolvedMethod->getPersistentIdentifier();
1235
uint32_t bcIndex = calltarget->_myCallSite->_bcInfo.getByteCodeIndex();
1236
int32_t callCount = profileManager->getCallGraphProfilingCount(method,
1237
bcIndex, comp());
1238
cfg._calledFrequency = callCount;
1239
1240
if (callCount <= 0 && _lastCallBlockFrequency > 0)
1241
cfg._calledFrequency = _lastCallBlockFrequency;
1242
1243
heuristicTrace(tracer(),
1244
"Depth %d: Setting called count for caller index %d, bytecode index %d of %d", _recursionDepth,
1245
calltarget->_myCallSite->_bcInfo.getCallerIndex(),
1246
calltarget->_myCallSite->_bcInfo.getByteCodeIndex(), callCount);
1247
}
1248
else if (callGraphEnabled)
1249
{
1250
cfg._calledFrequency = 10000;
1251
}
1252
1253
cfg.propagateColdInfo(callGraphEnabled); // propagate coldness but also generate frequency information
1254
// for blocks if call graph profiling is enabled
1255
1256
if (tracer()->heuristicLevel())
1257
{
1258
heuristicTrace(tracer(), "After propagating the coldness info\n");
1259
heuristicTrace(tracer(), "<cfg>");
1260
for (TR::CFGNode* node = cfg.getFirstNode(); node; node = node->getNext())
1261
{
1262
comp()->findOrCreateDebug()->print(comp()->getOutFile(), node, 6);
1263
}
1264
heuristicTrace(tracer(), "</cfg>");
1265
}
1266
1267
bool callsitesAreCreatedFromTrees = false;
1268
if (wasPeekingSuccessfull
1269
&& comp()->getOrCreateKnownObjectTable()
1270
&& calltarget->_calleeMethod->convertToMethod()->isArchetypeSpecimen())
1271
{
1272
TR::Block *currentInlinedBlock = NULL;
1273
// call sites in method handle thunks are created from trees so skip bci.findAndCreateCallsitesFromBytecodes below
1274
callsitesAreCreatedFromTrees = true;
1275
TR::NodeChecklist visited(comp());
1276
for (TR::TreeTop* tt = methodSymbol->getFirstTreeTop(); tt; tt = tt->getNextTreeTop())
1277
{
1278
if (tt->getNode()->getOpCodeValue() == TR::BBStart)
1279
/*
1280
* TODO: we should use the proper block with correct block frequency info
1281
* but profiling for method handle thunks doesn't work yet
1282
*/
1283
currentInlinedBlock = tt->getEnclosingBlock();
1284
1285
if (tt->getNode()->getNumChildren()>0 &&
1286
tt->getNode()->getFirstChild()->getOpCode().isCall())
1287
{
1288
TR::Node* parent = tt->getNode();
1289
TR::Node* callNode = tt->getNode()->getFirstChild();
1290
TR::SymbolReference* symRef = callNode->getSymbolReference();
1291
if (!callNode->getSymbolReference()->isUnresolved() && !visited.contains(callNode) &&
1292
!callSites[callNode->getByteCodeIndex()]) // skip if the callsite has already been created for this byte code index
1293
{
1294
int i = callNode->getByteCodeIndex();
1295
visited.add(callNode);
1296
TR_ResolvedMethod* resolvedMethod = callNode->getSymbol()->getResolvedMethodSymbol()->getResolvedMethod();
1297
TR::RecognizedMethod rm = resolvedMethod->getRecognizedMethod();
1298
1299
TR_CallSite *callsite = TR_CallSite::create(tt, parent, callNode,
1300
resolvedMethod->classOfMethod(), symRef, resolvedMethod,
1301
comp(), comp()->trMemory() , heapAlloc, calltarget->_calleeMethod, _recursionDepth, false);
1302
1303
TR_PrexArgInfo *argInfo = calltarget->_ecsPrexArgInfo;
1304
1305
callsite->_callerBlock = currentInlinedBlock;
1306
if (isInlineable(&callStack, callsite))
1307
{
1308
callSites[i] = callsite;
1309
bci._inlineableCallExists = true;
1310
1311
if (!currentInlinedBlock->isCold())
1312
_hasNonColdCalls = true;
1313
for (int j = 0; j < callSites[i]->numTargets(); j++)
1314
callSites[i]->getTarget(j)->_originatingBlock = currentInlinedBlock;
1315
}
1316
else
1317
{
1318
//support counters
1319
calltarget->addDeadCallee(callsite);
1320
}
1321
1322
// clearing the node generated by peeking ilgen
1323
// _callNode will be filled with node generated by actual ilgen @see TR_InlinerBase::findAndUpdateCallSiteInGraph
1324
callsite->_callNode = NULL;
1325
}
1326
}
1327
}
1328
}
1329
1330
if (!callsitesAreCreatedFromTrees)
1331
{
1332
bci.prepareToFindAndCreateCallsites(blocks, flags, callSites, &cfg, &newBCInfo, _recursionDepth, &callStack);
1333
bool iteratorWithState = (inlineArchetypeSpecimen && !mhInlineWithPeeking) || inlineLambdaFormGeneratedMethod;
1334
1335
if (!bci.findAndCreateCallsitesFromBytecodes(wasPeekingSuccessfull, iteratorWithState))
1336
{
1337
heuristicTrace(tracer(), "*** Depth %d: ECS end for target %p signature %s. bci.findAndCreateCallsitesFromBytecode failed", _recursionDepth, calltarget, callerName);
1338
return returnCleanup(ECS_CALLSITES_CREATION_FAILED);
1339
}
1340
_hasNonColdCalls = bci._nonColdCallExists;
1341
}
1342
1343
if (comp()->isServerInlining())
1344
{
1345
int coldCode = 0;
1346
int executedCode = 0;
1347
bool isCold = false;
1348
int coldBorderFrequency = 20;
1349
1350
for (TR_J9ByteCode bc = bci.first(); bc != J9BCunknown; bc = bci.next())
1351
{
1352
int32_t i = bci.bcIndex();
1353
if (blocks[i])
1354
{
1355
if (!blocks[i]->isCold() && blocks[i]->getFrequency() > coldBorderFrequency)
1356
isCold = false;
1357
else
1358
isCold = true;
1359
}
1360
1361
if (isCold)
1362
coldCode++;
1363
else
1364
executedCode++;
1365
}
1366
1367
if (executedCode != 0)
1368
{
1369
float ratio = ((float) executedCode) / ((float) (coldCode
1370
+ executedCode));
1371
1372
if (recurseDown)
1373
{
1374
if (ratio < 0.7f)
1375
{
1376
ratio = 0.7f;
1377
}
1378
}
1379
else
1380
{
1381
if (ratio < 0.1f)
1382
{
1383
ratio = 0.1f;
1384
}
1385
}
1386
1387
calltarget->_fullSize = (int) ((float) calltarget->_fullSize * ratio);
1388
heuristicTrace(tracer(),"Depth %d: Opt Server is reducing size of call to %d",_recursionDepth,calltarget->_fullSize);
1389
}
1390
}
1391
else if (_inliner->getPolicy()->aggressiveSmallAppOpts())
1392
{
1393
TR_J9InlinerPolicy *j9inlinerPolicy = (TR_J9InlinerPolicy *) _inliner->getPolicy();
1394
if (j9inlinerPolicy->aggressivelyInlineInLoops() && calltarget && calltarget->_calleeMethod && strncmp(calltarget->_calleeMethod->classNameChars(),"java/math/BigDecimal",calltarget->_calleeMethod->classNameLength())!=0)
1395
{
1396
if ((callStack._inALoop) &&
1397
(calltarget->_fullSize > 10))
1398
{
1399
calltarget->_fullSize = 10;
1400
heuristicTrace(tracer(),"Opt Server is reducing size of call to %d",calltarget->_fullSize);
1401
}
1402
}
1403
else
1404
heuristicTrace(tracer(),"Omitting Big Decimal method from size readjustment, calltarget = %p calleemethod = %p",calltarget,calltarget ? calltarget->_calleeMethod : 0);
1405
}
1406
1407
if (_inliner->forceInline(calltarget))
1408
{
1409
calltarget->_fullSize = 0;
1410
calltarget->_partialSize = 0;
1411
}
1412
1413
1414
/*************** PHASE 3: Optimistically Assume we can partially inline calltarget and add to an optimisticSize ******************/
1415
1416
TR_Queue<TR::Block> callBlocks(comp()->trMemory());
1417
bool isCandidate = trimBlocksForPartialInlining(calltarget, &callBlocks);
1418
1419
switch (calltarget->_calleeMethod->getRecognizedMethod())
1420
{
1421
case TR::java_util_HashMap_get:
1422
case TR::java_util_HashMap_findNonNullKeyEntry:
1423
calltarget->_isPartialInliningCandidate = false;
1424
isCandidate = false;
1425
break;
1426
default:
1427
break;
1428
}
1429
1430
if (isCandidate)
1431
_optimisticSize += calltarget->_partialSize;
1432
else
1433
_optimisticSize += calltarget->_fullSize;
1434
1435
int32_t sizeThreshold = _sizeThreshold;
1436
if (isCandidate)
1437
sizeThreshold = std::max(4096, sizeThreshold);
1438
///if(_optimisticSize > _sizeThreshold) // even optimistically we've blown our budget
1439
heuristicTrace(tracer(),"--- Depth %d: Checking Optimistic size vs Size Threshold: _optimisticSize %d _sizeThreshold %d sizeThreshold %d ",_recursionDepth, _optimisticSize, _sizeThreshold, sizeThreshold);
1440
1441
if (_optimisticSize > sizeThreshold) // even optimistically we've blown our budget
1442
{
1443
calltarget->_isPartialInliningCandidate = false;
1444
heuristicTrace(tracer(), "*** Depth %d: ECS end for target %p signature %s. optimisticSize exceeds Size Threshold", _recursionDepth, calltarget, callerName);
1445
return returnCleanup(ECS_OPTIMISTIC_SIZE_THRESHOLD_EXCEEDED);
1446
}
1447
1448
if (!recurseDown)
1449
{
1450
heuristicTrace(tracer(),"*** Depth %d: ECS end for target %p signature %s. recurseDown set to false. size = %d _fullSize = %d", _recursionDepth, calltarget, callerName, size, calltarget->_fullSize);
1451
return returnCleanup(ECS_NORMAL);
1452
}
1453
1454
/****************** Phase 4: Deal with Inlineable Calls **************************/
1455
TR::Block *currentBlock = NULL;
1456
for (TR_J9ByteCode bc = bci.first(); bc != J9BCunknown && bci._inlineableCallExists; bc = bci.next())
1457
{
1458
int32_t i = bci.bcIndex();
1459
//heuristicTrace(tracer(),"--- Depth %d: Checking _real size vs Size Threshold: _realSize %d _sizeThreshold %d sizeThreshold %d ",_recursionDepth, _realSize, _sizeThreshold, sizeThreshold);
1460
1461
if (_realSize > sizeThreshold)
1462
{
1463
heuristicTrace(tracer(),"*** Depth %d: ECS end for target %p signature %s. real size %d exceeds sizeThreshold %d", _recursionDepth,calltarget, callerName,_realSize,sizeThreshold);
1464
return returnCleanup(ECS_REAL_SIZE_THRESHOLD_EXCEEDED);
1465
}
1466
1467
if (blocks[i])
1468
currentBlock = blocks[i];
1469
1470
newBCInfo.setByteCodeIndex(i);
1471
if (callSites[i])
1472
{
1473
callSites[i]->setDepth(_recursionDepth);
1474
debugTrace(tracer(),"Found a call at bytecode %d, depth = %d", i, _recursionDepth);
1475
1476
// TODO: Investigate if we should add BigAppOpts opts here
1477
for (int32_t j = 0; j < callSites[i]->numTargets(); j++)
1478
{
1479
TR_CallTarget *targetCallee = callSites[i]->getTarget(j);
1480
1481
char nameBuffer[1024];
1482
const char *calleeName = NULL;
1483
if (tracer()->heuristicLevel())
1484
calleeName = comp()->fej9()->sampleSignature(targetCallee->_calleeMethod->getPersistentIdentifier(), nameBuffer, 1024, comp()->trMemory());
1485
1486
if (callGraphEnabled && !currentBlock->isCold())
1487
{
1488
// if call-graph profiling is enabled and the call is special or static (!indirect)
1489
// then update the block frequency information because we don't profile predictable calls
1490
if (!callSites[i]->isIndirectCall())
1491
{
1492
profileManager->updateCallGraphProfilingCount( currentBlock, calltarget->_calleeMethod->getPersistentIdentifier(), i, comp());
1493
heuristicTrace(tracer(),"Depth %d: Updating Call Graph Profiling Count for calltarget %p count = %d",_recursionDepth, calltarget,profileManager->getCallGraphProfilingCount(calltarget->_calleeMethod->getPersistentIdentifier(), i, comp()));
1494
}
1495
1496
// TODO: This coldCallInfoIsReliable logic should be in a more
1497
// central place so everyone agrees on it. It shouldn't just be
1498
// for inliner.
1499
//
1500
bool coldCallInfoIsReliable = !cameFromArchetypeSpecimen(calltarget->_calleeMethod);
1501
1502
if (_inliner->getPolicy()->tryToInline(targetCallee, &callStack, true))
1503
{
1504
heuristicTrace(tracer(),"tryToInline filter matched %s", targetCallee->_calleeMethod->signature(comp()->trMemory()));
1505
}
1506
else
1507
{
1508
int32_t freqCutoff = 40;
1509
bool isColdCall = (((comp()->getMethodHotness() <= warm) && profileManager->isColdCall(targetCallee->_calleeMethod->getPersistentIdentifier(), calltarget->_calleeMethod->getPersistentIdentifier(), i, comp())) || (currentBlock->getFrequency() < freqCutoff)) && !_inliner->alwaysWorthInlining(targetCallee->_calleeMethod, NULL);
1510
1511
if (coldCallInfoIsReliable && isColdCall)
1512
{
1513
heuristicTrace(tracer(),"Depth %d: Skipping estimate on call %s, with count=%d and block frequency %d, because it's cold.",_recursionDepth,calleeName,profileManager->getCallGraphProfilingCount(targetCallee->_calleeMethod->getPersistentIdentifier(), calltarget->_calleeMethod->getPersistentIdentifier(), i, comp()), currentBlock->getFrequency());
1514
callSites[i]->removecalltarget(j, tracer(), Cold_Call);
1515
j--;
1516
continue;
1517
}
1518
1519
if (comp()->getMethodHotness() <= warm && comp()->isServerInlining() && calltarget->_calleeMethod->isWarmCallGraphTooBig(i, comp()) && !_inliner->alwaysWorthInlining(targetCallee->_calleeMethod, NULL))
1520
{
1521
heuristicTrace(tracer(), "Depth %d: Skipping estimate on call %s, with count=%d, because its warm call graph is too big.",
1522
_recursionDepth, calleeName,
1523
profileManager->getCallGraphProfilingCount(calltarget->_calleeMethod->getPersistentIdentifier(),i, comp())
1524
);
1525
callSites[i]->removecalltarget(j, tracer(), Cold_Call);
1526
j--;
1527
continue;
1528
}
1529
}
1530
}
1531
1532
//inline Native method even if it is cold as the Natives
1533
//are usually very small and inlining them would not hurt
1534
if (currentBlock->isCold() && !_inliner->alwaysWorthInlining(targetCallee->_calleeMethod, callSites[i]->_callNode))
1535
{
1536
heuristicTrace(tracer(),"Depth %d: Skipping estimate on call %s, because it's in a cold block.",_recursionDepth, calleeName);
1537
callSites[i]->removecalltarget(j, tracer(), Cold_Block);
1538
j--;
1539
continue;
1540
}
1541
1542
if (_optimisticSize <= sizeThreshold) // for multiple calltargets, is this the desired behaviour?
1543
{
1544
_recursionDepth++;
1545
_numOfEstimatedCalls++;
1546
1547
_lastCallBlockFrequency = currentBlock->getFrequency();
1548
1549
debugTrace(tracer(),"About to call ecs on call target %p at depth %d _optimisticSize = %d _realSize = %d _sizeThreshold = %d",
1550
targetCallee, _recursionDepth, _optimisticSize, _realSize, _sizeThreshold);
1551
heuristicTrace(tracer(),"--- Depth %d: EstimateCodeSize to recursively estimate call from %s to %s",_recursionDepth, callerName, calleeName);
1552
1553
int32_t origOptimisticSize = _optimisticSize;
1554
int32_t origRealSize = _realSize;
1555
bool prevNonColdCalls = _hasNonColdCalls;
1556
bool estimateSuccess = estimateCodeSize(targetCallee, &callStack); //recurseDown = true
1557
bool calltargetSetTooBig = false;
1558
bool calleeHasNonColdCalls = _hasNonColdCalls;
1559
_hasNonColdCalls = prevNonColdCalls;// reset the bool for the parent
1560
1561
// update optimisticSize and cull candidates
1562
1563
if ((comp()->getMethodHotness() >= warm) && comp()->isServerInlining())
1564
{
1565
int32_t bigCalleeThreshold;
1566
int32_t freqCutoff = comp()->getMethodHotness() <= warm ?
1567
comp()->getOptions()->getBigCalleeFrequencyCutoffAtWarm() :
1568
comp()->getOptions()->getBigCalleeFrequencyCutoffAtHot();
1569
bool isColdCall = ((profileManager->isColdCall(targetCallee->_calleeMethod->getPersistentIdentifier(), calltarget->_calleeMethod->getPersistentIdentifier(), i, comp()) ||
1570
(currentBlock->getFrequency() <= freqCutoff)) && !_inliner->alwaysWorthInlining(targetCallee->_calleeMethod, NULL));
1571
1572
if (comp()->getMethodHotness() <= warm)
1573
{
1574
bigCalleeThreshold = isColdCall ?
1575
comp()->getOptions()->getBigCalleeThresholdForColdCallsAtWarm():
1576
comp()->getOptions()->getBigCalleeThreshold();
1577
}
1578
else // above warm
1579
{
1580
1581
if(isColdCall)
1582
{
1583
bigCalleeThreshold = comp()->getOptions()->getBigCalleeThresholdForColdCallsAtHot();
1584
}
1585
else
1586
{
1587
if (comp()->getMethodHotness() == scorching ||
1588
(comp()->getMethodHotness() == veryHot && comp()->isProfilingCompilation()))
1589
{
1590
bigCalleeThreshold = comp()->getOptions()->getBigCalleeScorchingOptThreshold();
1591
}
1592
else
1593
{
1594
bigCalleeThreshold = comp()->getOptions()->getBigCalleeHotOptThreshold();
1595
}
1596
}
1597
}
1598
1599
1600
if (_optimisticSize - origOptimisticSize > bigCalleeThreshold)
1601
{
1602
///printf("set warmcallgraphtoobig for method %s at index %d\n", calleeName, newBCInfo._byteCodeIndex);fflush(stdout);
1603
calltarget->_calleeMethod->setWarmCallGraphTooBig( newBCInfo.getByteCodeIndex(), comp());
1604
heuristicTrace(tracer(), "set warmcallgraphtoobig for method %s at index %d\n", calleeName, newBCInfo.getByteCodeIndex());
1605
//_optimisticSize = origOptimisticSize;
1606
//_realSize = origRealSize;
1607
calltargetSetTooBig = true;
1608
1609
}
1610
}
1611
1612
if (!estimateSuccess && !calltargetSetTooBig)
1613
{
1614
int32_t estimatedSize = (_optimisticSize - origOptimisticSize);
1615
int32_t bytecodeSize = targetCallee->_calleeMethod->maxBytecodeIndex();
1616
bool inlineAnyway = false;
1617
1618
if ((_optimisticSize - origOptimisticSize) < 40)
1619
inlineAnyway = true;
1620
else if (estimatedSize < 100)
1621
{
1622
if ((estimatedSize < bytecodeSize) || ((bytecodeSize - estimatedSize)< 20))
1623
inlineAnyway = true;
1624
}
1625
1626
if (inlineAnyway && !calleeHasNonColdCalls)
1627
{
1628
_optimisticSize = origOptimisticSize;
1629
_realSize = origRealSize;
1630
}
1631
else if (!_inliner->alwaysWorthInlining(targetCallee->_calleeMethod, NULL))
1632
{
1633
calltarget->_isPartialInliningCandidate = false;
1634
callSites[i]->removecalltarget(j, tracer(),
1635
Callee_Too_Many_Bytecodes);
1636
_optimisticSize = origOptimisticSize;
1637
_realSize = origRealSize;
1638
calltarget->addDeadCallee(callSites[i]);
1639
j--;
1640
_numOfEstimatedCalls--;
1641
}
1642
1643
if(comp()->getVisitCount() > HIGH_VISIT_COUNT)
1644
{
1645
heuristicTrace(tracer(),"Depth %d: estimateCodeSize aborting due to high comp()->getVisitCount() of %d",_recursionDepth,comp()->getVisitCount());
1646
return returnCleanup(ECS_VISITED_COUNT_THRESHOLD_EXCEEDED);
1647
}
1648
}
1649
else if (calltargetSetTooBig)
1650
{
1651
_optimisticSize = origOptimisticSize;
1652
_realSize = origRealSize;
1653
1654
if (!_inliner->alwaysWorthInlining(targetCallee->_calleeMethod, NULL))
1655
{
1656
calltarget->_isPartialInliningCandidate = false;
1657
callSites[i]->removecalltarget(j, tracer(),
1658
Callee_Too_Many_Bytecodes);
1659
calltarget->addDeadCallee(callSites[i]);
1660
j--;
1661
_numOfEstimatedCalls--;
1662
}
1663
1664
if(comp()->getVisitCount() > HIGH_VISIT_COUNT)
1665
{
1666
heuristicTrace(tracer(),"Depth %d: estimateCodeSize aborting due to high comp()->getVisitCount() of %d",_recursionDepth,comp()->getVisitCount());
1667
return returnCleanup(ECS_VISITED_COUNT_THRESHOLD_EXCEEDED);
1668
}
1669
}
1670
1671
_recursionDepth--;
1672
}
1673
else
1674
{
1675
heuristicTrace(tracer(),"Depth %d: estimateCodeSize aborting due to _optimisticSize: %d > sizeThreshold: %d",_optimisticSize,sizeThreshold);
1676
break;
1677
}
1678
}
1679
1680
if (callSites[i]->numTargets()) //only add a callSite once, even though it may have more than one call target.
1681
{
1682
calltarget->addCallee(callSites[i]);
1683
heuristicTrace(tracer(), "Depth %d: Subtracting %d from optimistic and real size to account for eliminating call", _recursionDepth, bci.estimatedCodeSize());
1684
if (_optimisticSize > bci.estimatedCodeSize())
1685
_optimisticSize -= bci.estimatedCodeSize(); // subtract what we added before for the size of the call instruction
1686
if (_realSize > bci.estimatedCodeSize())
1687
_realSize -= bci.estimatedCodeSize();
1688
}
1689
}
1690
}
1691
1692
auto partialSizeBeforeAdjustment = calltarget->_partialSize;
1693
1694
if (adjustEstimateForStringCompression(calltarget->_calleeMethod, calltarget->_partialSize, STRING_COMPRESSION_ADJUSTMENT_FACTOR))
1695
{
1696
heuristicTrace(tracer(), "*** Depth %d: Adjusting partial size for %s because of string compression from %d to %d", _recursionDepth, callerName, partialSizeBeforeAdjustment, calltarget->_partialSize);
1697
}
1698
1699
if (adjustEstimateForMethodInvoke(calltarget->_calleeMethod, calltarget->_partialSize, METHOD_INVOKE_ADJUSTMENT_FACTOR))
1700
{
1701
heuristicTrace(tracer(), "*** Depth %d: Adjusting partial size for %s because of java/lang/reflect/Method.invoke from %d to %d", _recursionDepth, callerName, partialSizeBeforeAdjustment, calltarget->_partialSize);
1702
}
1703
1704
auto fullSizeBeforeAdjustment = calltarget->_fullSize;
1705
1706
if (adjustEstimateForStringCompression(calltarget->_calleeMethod, calltarget->_fullSize, STRING_COMPRESSION_ADJUSTMENT_FACTOR))
1707
{
1708
heuristicTrace(tracer(), "*** Depth %d: Adjusting full size for %s because of string compression from %d to %d", _recursionDepth, callerName, fullSizeBeforeAdjustment, calltarget->_fullSize);
1709
}
1710
1711
if (adjustEstimateForMethodInvoke(calltarget->_calleeMethod, calltarget->_fullSize, METHOD_INVOKE_ADJUSTMENT_FACTOR))
1712
{
1713
heuristicTrace(tracer(), "*** Depth %d: Adjusting full size for %s because of java/lang/reflect/Method.invoke from %d to %d", _recursionDepth, callerName, fullSizeBeforeAdjustment, calltarget->_fullSize);
1714
}
1715
1716
auto realSizeBeforeAdjustment = _realSize;
1717
1718
if (adjustEstimateForStringCompression(calltarget->_calleeMethod, _realSize, STRING_COMPRESSION_ADJUSTMENT_FACTOR))
1719
{
1720
heuristicTrace(tracer(), "*** Depth %d: Adjusting real size for %s because of string compression from %d to %d", _recursionDepth, callerName, realSizeBeforeAdjustment, _realSize);
1721
}
1722
1723
if (adjustEstimateForMethodInvoke(calltarget->_calleeMethod, _realSize, METHOD_INVOKE_ADJUSTMENT_FACTOR))
1724
{
1725
heuristicTrace(tracer(), "*** Depth %d: Adjusting real size for %s because of java/lang/reflect/Method.invoke from %d to %d", _recursionDepth, callerName, realSizeBeforeAdjustment, _realSize);
1726
}
1727
1728
reduceDAAWrapperCodeSize(calltarget);
1729
1730
/****************** PHASE 5: Figure out if We're really going to do a partial Inline and add whatever we do to the realSize. *******************/
1731
if (isPartialInliningCandidate(calltarget, &callBlocks))
1732
{
1733
if (comp()->getOption(TR_TraceBFGeneration))
1734
traceMsg(comp(), "Call Target %s is a partial inline Candidate with a partial size of %d",callerName,calltarget->_partialSize);
1735
1736
heuristicTrace(tracer(), "*** Depth %d: ECS end for target %p signature %s. It is a partial inline Candidate with a partial size of %d", _recursionDepth, calltarget, callerName, calltarget->_partialSize);
1737
_realSize += calltarget->_partialSize;
1738
}
1739
else
1740
{
1741
heuristicTrace(tracer(),"*** Depth %d: ECS end for target %p signature %s. It is a full inline Candidate with a full size of %d", _recursionDepth, calltarget, callerName, calltarget->_fullSize);
1742
_realSize += calltarget->_fullSize;
1743
}
1744
1745
1746
heuristicTrace(tracer(),"--- Depth %d: Checking _real size vs Size Threshold A second Time: _realSize %d _sizeThreshold %d sizeThreshold %d ",_recursionDepth, _realSize, _sizeThreshold, sizeThreshold);
1747
1748
if (_realSize > sizeThreshold)
1749
{
1750
heuristicTrace(tracer(),"*** Depth %d: ECS end for target %p signature %s. real size exceeds Size Threshold", _recursionDepth,calltarget, callerName);
1751
return returnCleanup(ECS_REAL_SIZE_THRESHOLD_EXCEEDED);
1752
}
1753
1754
return returnCleanup(ECS_NORMAL);
1755
}
1756
1757
bool TR_J9EstimateCodeSize::reduceDAAWrapperCodeSize(TR_CallTarget* target)
1758
{
1759
if (target == NULL)
1760
return false;
1761
1762
// DAA Wrappers are basically free if intrinsics are on since all they consist of is the slow and fast paths
1763
if (target->_calleeMethod)
1764
{
1765
bool reduceMarshallingWrapper = target->_calleeMethod->isDAAMarshallingWrapperMethod() &&
1766
!comp()->getOption(TR_DisableMarshallingIntrinsics);
1767
1768
bool reducePackedDecimalWrapper = target->_calleeMethod->isDAAPackedDecimalWrapperMethod() &&
1769
!comp()->getOption(TR_DisableMarshallingIntrinsics);
1770
1771
if (reduceMarshallingWrapper || reducePackedDecimalWrapper)
1772
{
1773
target->_fullSize /= 5;
1774
target->_partialSize /= 5;
1775
1776
heuristicTrace(tracer(),"DAA: Reducing target %p fullSize to %d and partialSize to %d to increase likelyhood of successful inlining\n", target, target->_fullSize, target->_partialSize);
1777
return true;
1778
}
1779
}
1780
1781
return false;
1782
}
1783
1784
/******************
1785
* A graph searching algorithm. searchItem is the flag type we're looking for, searchPath is the flag type of the path we can go down
1786
*
1787
* ***************/
1788
1789
bool
1790
TR_J9EstimateCodeSize::graphSearch(TR::CFG *cfg, TR::Block *startBlock,
1791
TR::Block::partialFlags searchItem, TR::Block::partialFlags searchPath)
1792
{
1793
TR_BitVector *blocksVisited = new (comp()->trStackMemory()) TR_BitVector(
1794
cfg->getNextNodeNumber(), comp()->trMemory(), stackAlloc);
1795
blocksVisited->empty();
1796
1797
TR_Queue<TR::Block> nodesToBeEvaluated(comp()->trMemory());
1798
nodesToBeEvaluated.enqueue(startBlock);
1799
1800
do
1801
{
1802
TR::Block *currentBlock = nodesToBeEvaluated.dequeue();
1803
1804
if (blocksVisited->get(currentBlock->getNumber()))
1805
continue;
1806
blocksVisited->set(currentBlock->getNumber());
1807
1808
if (currentBlock->getPartialFlags().testAny(searchItem))
1809
return true;
1810
1811
for (auto e = currentBlock->getSuccessors().begin(); e != currentBlock->getSuccessors().end(); ++e)
1812
{
1813
TR::Block *dest = (*e)->getTo()->asBlock();
1814
if (dest->getPartialFlags().testAny(searchPath))
1815
nodesToBeEvaluated.enqueue(dest);
1816
}
1817
for (auto e = currentBlock->getExceptionSuccessors().begin(); e != currentBlock->getExceptionSuccessors().end(); ++e)
1818
{
1819
TR::Block *dest = (*e)->getTo()->asBlock();
1820
if (dest->getPartialFlags().testAny(searchPath))
1821
nodesToBeEvaluated.enqueue(dest);
1822
}
1823
}
1824
while (!nodesToBeEvaluated.isEmpty());
1825
1826
return false; //did not find the search item
1827
}
1828
1829
/*************************
1830
* A graph labelling algorithm
1831
* TODO: you can add size information in here
1832
* ***********************/
1833
#define MIN_PARTIAL_FREQUENCY 15
1834
int32_t
1835
TR_J9EstimateCodeSize::labelGraph(TR::CFG *cfg,
1836
TR_Queue<TR::Block> *unsanitizeableBlocks, TR_Queue<TR::Block> *callBlocks)
1837
{
1838
TR_BitVector *blocksVisited = new (comp()->trStackMemory()) TR_BitVector(
1839
cfg->getNextNodeNumber(), comp()->trMemory(), stackAlloc);
1840
blocksVisited->empty();
1841
1842
int32_t size = 0;
1843
bool hasAtLeastOneRestartBlock = false;
1844
TR::Block *startBlock = cfg->getStart()->asBlock();
1845
TR::Block *endBlock = cfg->getEnd()->asBlock();
1846
TR_Queue<TR::Block> nodesToBeEvaluated(comp()->trMemory());
1847
TR_Queue<TR::Block> difficultNodesToBeEvaluated(comp()->trMemory());
1848
nodesToBeEvaluated.enqueue(endBlock);
1849
1850
TR::Block *currentBlock = NULL;
1851
1852
do
1853
{
1854
if (!nodesToBeEvaluated.isEmpty())
1855
currentBlock = nodesToBeEvaluated.dequeue();
1856
else if (!difficultNodesToBeEvaluated.isEmpty())
1857
currentBlock = difficultNodesToBeEvaluated.dequeue();
1858
else
1859
TR_ASSERT(0, "Neither Queue has a node left!\n");
1860
1861
if (blocksVisited->get(currentBlock->getNumber()))
1862
continue;
1863
// blocksVisited->set(currentBlock->getNumber()); // moving this downward a little!
1864
1865
if (currentBlock->getBlockSize() == -1 && (currentBlock != startBlock
1866
&& currentBlock != endBlock))
1867
TR_ASSERT(0, "labelGraph: a block does not have a valid size!\n");
1868
1869
//Part 1: Successor Test: ensure all my successors have been evaluated first and that they are not all restart blocks.
1870
1871
bool allRestarts = true;
1872
bool allVisited = true;
1873
for (auto e = currentBlock->getSuccessors().begin(); e != currentBlock->getSuccessors().end(); ++e)
1874
{
1875
TR::Block *dest = (*e)->getTo()->asBlock();
1876
1877
if (!blocksVisited->get(dest->getNumber()))
1878
{
1879
allVisited = false;
1880
break;
1881
}
1882
1883
if (!dest->isRestartBlock())
1884
{
1885
allRestarts = false;
1886
break;
1887
}
1888
}
1889
for (auto e = currentBlock->getExceptionSuccessors().begin(); e != currentBlock->getExceptionSuccessors().end(); ++e)
1890
{
1891
TR::Block *dest = (*e)->getTo()->asBlock();
1892
1893
if (!blocksVisited->get(dest->getNumber()))
1894
{
1895
allVisited = false;
1896
// break;
1897
}
1898
1899
if (dest->isPartialInlineBlock()) //(!dest->isRestartBlock())
1900
{
1901
// allRestarts=false;
1902
// break;
1903
}
1904
}
1905
1906
if (!allVisited && !currentBlock->isDifficultBlock())
1907
{
1908
1909
partialTrace(tracer(), "Requeueing block into difficult Nodes List %p %d because its successors have not been all visited \n", currentBlock, currentBlock->getNumber());
1910
currentBlock->setIsDifficultBlock();
1911
difficultNodesToBeEvaluated.enqueue(currentBlock);
1912
continue;
1913
}
1914
else if (currentBlock->isDifficultBlock())
1915
{
1916
//assuming all unvisited blocks are restarts.
1917
//which actually means doing nothing here, since I only mark allRestarts = false if I found a partial inline block.
1918
1919
blocksVisited->set(currentBlock->getNumber());
1920
1921
}
1922
else
1923
blocksVisited->set(currentBlock->getNumber());
1924
1925
//Part 2: Setting Flags on the Current Block
1926
int16_t minpartialfreq = MIN_PARTIAL_FREQUENCY;
1927
1928
1929
if (allRestarts && currentBlock != cfg->getEnd()->asBlock())
1930
{
1931
currentBlock->setRestartBlock();
1932
hasAtLeastOneRestartBlock = true;
1933
if (currentBlock->isPartialInlineBlock())
1934
{
1935
currentBlock->setPartialInlineBlock(false);
1936
if (currentBlock != startBlock && currentBlock != endBlock)
1937
{
1938
if (size > currentBlock->getBlockSize())
1939
size -= currentBlock->getBlockSize();
1940
}
1941
}
1942
}
1943
else if ((currentBlock->getFrequency() < minpartialfreq || currentBlock->isCold()) && currentBlock != startBlock && currentBlock != endBlock)
1944
{
1945
currentBlock->setRestartBlock();
1946
hasAtLeastOneRestartBlock = true;
1947
}
1948
else
1949
{
1950
currentBlock->setPartialInlineBlock();
1951
if (currentBlock != startBlock && currentBlock != endBlock)
1952
size += currentBlock->getBlockSize();
1953
}
1954
1955
if (currentBlock->isUnsanitizeable())
1956
unsanitizeableBlocks->enqueue(currentBlock);
1957
else if (currentBlock->containsCall()) //only need to enqueue it if its not unsanitizeable already
1958
callBlocks->enqueue(currentBlock);
1959
1960
// Part 3: Enqueue all Predecessors
1961
1962
for (auto e = currentBlock->getPredecessors().begin(); e != currentBlock->getPredecessors().end(); ++e)
1963
{
1964
TR::Block *dest = (*e)->getFrom()->asBlock();
1965
nodesToBeEvaluated.enqueue(dest);
1966
}
1967
for (auto e = currentBlock->getExceptionPredecessors().begin(); e != currentBlock->getExceptionPredecessors().end();
1968
++e)
1969
{
1970
TR::Block *dest = (*e)->getFrom()->asBlock();
1971
nodesToBeEvaluated.enqueue(dest);
1972
}
1973
1974
if (currentBlock->isRestartBlock()
1975
&& currentBlock->isPartialInlineBlock())
1976
TR_ASSERT(0, "currentBlock is both a restart block AND a partial inline block!\n");
1977
1978
}
1979
while (!nodesToBeEvaluated.isEmpty()
1980
|| !difficultNodesToBeEvaluated.isEmpty());
1981
1982
if (!hasAtLeastOneRestartBlock)
1983
return -1; // this means I should just do a full inline anyways
1984
return size;
1985
}
1986
#define MIN_PARTIAL_SIZE 100
1987
1988
bool
1989
TR_J9EstimateCodeSize::trimBlocksForPartialInlining(TR_CallTarget *calltarget, TR_Queue<TR::Block> *callBlocks)
1990
{
1991
TR_ASSERT(calltarget->_originatingBlock, "trimBlocksForPartialInlining: call target does not have an _originatingBlock set yet!\n");
1992
1993
if (comp()->getOption(TR_DisablePartialInlining) || calltarget->_calleeMethod->isSynchronized())
1994
{
1995
calltarget->_isPartialInliningCandidate = false;
1996
return false;
1997
}
1998
1999
TR_Queue<TR::Block> unsanitizeableBlocks(comp()->trMemory());
2000
2001
int32_t size = labelGraph(calltarget->_cfg, &unsanitizeableBlocks,
2002
callBlocks);
2003
2004
if (tracer()->partialLevel())
2005
{
2006
partialTrace(tracer(),"Dumping CFG for calltarget %p", calltarget);
2007
comp()->dumpFlowGraph(calltarget->_cfg);
2008
}
2009
2010
int32_t minpartialsize = MIN_PARTIAL_SIZE;
2011
2012
if (size > -1 && size + minpartialsize >= calltarget->_fullSize)
2013
{
2014
partialTrace(tracer()," Candidate partial size of %d is too close to full Size of %d to be of any benefit. Doing a full inline.",size, calltarget->_fullSize);
2015
}
2016
else if (size > -1) // a size of -1 means we didn't have any restart blocks - so no sense in doing a 'partial' inline
2017
{
2018
bool gs = true;
2019
while (!unsanitizeableBlocks.isEmpty())
2020
{
2021
TR::Block *aBlock = unsanitizeableBlocks.dequeue();
2022
if (!aBlock->isRestartBlock()) // if the unsanitizeable block is also a restart block, I don't care who it reaches.
2023
{
2024
calltarget->_originatingBlock->setIsUnsanitizeable(); // An unsanitizeable block remains in the inline
2025
2026
gs = !(graphSearch(calltarget->_cfg, aBlock,
2027
TR::Block::_restartBlock,
2028
(TR::Block::partialFlags) (TR::Block::_partialInlineBlock
2029
| TR::Block::_restartBlock)));
2030
if (!gs)
2031
{
2032
partialTrace(tracer(),"TrimBlocksForPartialInlining: Unsanitizeable block %p %d can reach a restart block.",aBlock, aBlock->getNumber());
2033
break;
2034
}
2035
}
2036
else
2037
partialTrace(tracer(),"TrimBlocksForPartialinlining: Unsanitizeable block %p %d is a restart block.",aBlock, aBlock->getNumber());
2038
}
2039
2040
if (gs)
2041
{
2042
gs = graphSearch(calltarget->_cfg,
2043
calltarget->_cfg->getStart()->asBlock(), TR::Block::_endBlock,
2044
TR::Block::_partialInlineBlock);
2045
if (!gs)
2046
{
2047
partialTrace(tracer(),"TrimBlocksForPartialInlining: No Complete Path from Start to End");
2048
}
2049
}
2050
2051
if (!gs)
2052
{
2053
calltarget->_isPartialInliningCandidate = false;
2054
return false;
2055
}
2056
2057
partialTrace(tracer(), "TrimBlocksForPartialInlining Found a Candidate. Setting PartialSize to %d. full size = %d",size, calltarget->_fullSize);
2058
calltarget->_partialSize = size;
2059
2060
return true;
2061
}
2062
else
2063
{
2064
if (!unsanitizeableBlocks.isEmpty())
2065
calltarget->_originatingBlock->setIsUnsanitizeable(); // A Full Inline with unsanitizeable blocks
2066
partialTrace(tracer(),"TrimBlocksForPartialInlining: No restart blocks found in candidate. Doing a full inline");
2067
}
2068
2069
calltarget->_isPartialInliningCandidate = false;
2070
return false;
2071
}
2072
2073
void
2074
TR_J9EstimateCodeSize::processGraph(TR_CallTarget *calltarget)
2075
{
2076
TR::CFG *cfg = calltarget->_cfg;
2077
calltarget->_partialInline = new (comp()->trHeapMemory()) TR_InlineBlocks(
2078
_inliner->fe(), _inliner->comp());
2079
TR_BitVector *blocksVisited = new (comp()->trStackMemory()) TR_BitVector(
2080
cfg->getNextNodeNumber(), comp()->trMemory(), stackAlloc);
2081
blocksVisited->empty();
2082
2083
TR::Block *startBlock = cfg->getStart()->asBlock();
2084
TR::Block *endBlock = cfg->getEnd()->asBlock();
2085
TR_Queue<TR::Block> nodesToBeEvaluated(comp()->trMemory());
2086
nodesToBeEvaluated.enqueue(startBlock);
2087
2088
do
2089
{
2090
TR::Block *currentBlock = nodesToBeEvaluated.dequeue();
2091
2092
if (blocksVisited->get(currentBlock->getNumber()))
2093
continue;
2094
blocksVisited->set(currentBlock->getNumber());
2095
2096
if (currentBlock != startBlock && currentBlock != endBlock)
2097
calltarget->_partialInline->addBlock(currentBlock);
2098
2099
for (auto e = currentBlock->getSuccessors().begin(); e != currentBlock->getSuccessors().end(); ++e)
2100
{
2101
TR::Block *dest = (*e)->getTo()->asBlock();
2102
if (dest->isPartialInlineBlock())
2103
nodesToBeEvaluated.enqueue(dest);
2104
}
2105
for (auto e = currentBlock->getExceptionSuccessors().begin(); e != currentBlock->getExceptionSuccessors().end(); ++e)
2106
{
2107
TR::Block *dest = (*e)->getTo()->asBlock();
2108
if (dest->isPartialInlineBlock())
2109
nodesToBeEvaluated.enqueue(dest);
2110
2111
calltarget->_partialInline->addExceptionBlock(dest); //only partial blocks will be processed. any exception block reachable from a partial block needs to be dealt with.
2112
}
2113
2114
}
2115
while (!nodesToBeEvaluated.isEmpty());
2116
2117
}
2118
2119
/***************************************
2120
* isPartialInliningCandidate()
2121
* Checks any call blocks as being unsanitizeable and if they can reach a restart
2122
* Generates the list of TR_InlineBlocks that are to be inlined.
2123
* ***************************************/
2124
2125
bool
2126
TR_J9EstimateCodeSize::isPartialInliningCandidate(TR_CallTarget *calltarget,
2127
TR_Queue<TR::Block> *callBlocks)
2128
{
2129
if (!calltarget->_isPartialInliningCandidate)
2130
return false;
2131
2132
while (!callBlocks->isEmpty())
2133
{
2134
TR::Block *callBlock = callBlocks->dequeue();
2135
2136
if (callBlock->isUnsanitizeable() && !callBlock->isRestartBlock())
2137
{
2138
calltarget->_originatingBlock->setIsUnsanitizeable();
2139
bool result = graphSearch(calltarget->_cfg, callBlock,
2140
TR::Block::_restartBlock,
2141
(TR::Block::partialFlags) (TR::Block::_partialInlineBlock
2142
| TR::Block::_restartBlock));
2143
if (result) // unsanitizeable block can reach a restart block
2144
{
2145
calltarget->_isPartialInliningCandidate = false;
2146
return false;
2147
}
2148
}
2149
2150
}
2151
2152
// we have a partial inlining candidate at this point. Now walk the graph and all P blocks to TR_InlineBlocks
2153
2154
processGraph(calltarget);
2155
2156
return true;
2157
}
2158
2159