Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/optimizer/InlinerTempForJ9.cpp
6000 views
1
/*******************************************************************************
2
* Copyright (c) 2000, 2021 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
#include <algorithm>
23
#include "j9cfg.h"
24
#include "optimizer/Inliner.hpp"
25
#include "optimizer/J9Inliner.hpp"
26
#include "optimizer/J9EstimateCodeSize.hpp"
27
#include "optimizer/VectorAPIExpansion.hpp"
28
29
#include "env/KnownObjectTable.hpp"
30
#include "compile/InlineBlock.hpp"
31
#include "compile/Method.hpp"
32
#include "compile/OSRData.hpp"
33
#include "compile/ResolvedMethod.hpp"
34
#include "env/CompilerEnv.hpp"
35
#include "env/CHTable.hpp"
36
#include "env/PersistentCHTable.hpp"
37
#include "env/VMJ9.h"
38
#include "env/jittypes.h"
39
#include "il/Block.hpp"
40
#include "il/Node.hpp"
41
#include "il/Node_inlines.hpp"
42
#include "il/ParameterSymbol.hpp"
43
#include "il/StaticSymbol.hpp"
44
#include "il/TreeTop.hpp"
45
#include "il/TreeTop_inlines.hpp"
46
#include "optimizer/CallInfo.hpp"
47
#include "optimizer/J9CallGraph.hpp"
48
#include "optimizer/PreExistence.hpp"
49
#include "optimizer/Structure.hpp"
50
#include "codegen/CodeGenerator.hpp"
51
#include "codegen/CodeGenerator_inlines.hpp"
52
#include "il/ILOpCodes.hpp"
53
#include "il/ILOps.hpp"
54
#include "ilgen/IlGenRequest.hpp"
55
#include "ilgen/IlGeneratorMethodDetails.hpp"
56
#include "ilgen/IlGeneratorMethodDetails_inlines.hpp"
57
#include "optimizer/Structure.hpp" // TR_RegionAnalysis
58
#include "optimizer/StructuralAnalysis.hpp"
59
#include "control/Recompilation.hpp" //TR_PersistentJittedBodyInfo
60
#include "control/RecompilationInfo.hpp" //TR_PersistentJittedBodyInfo
61
#include "optimizer/EstimateCodeSize.hpp"
62
#include "env/VMJ9.h"
63
#include "runtime/J9Profiler.hpp"
64
#include "ras/DebugCounter.hpp"
65
#include "j9consts.h"
66
#include "optimizer/TransformUtil.hpp"
67
68
namespace TR { class SimpleRegex; }
69
70
#define OPT_DETAILS "O^O INLINER: "
71
72
// == Hack markers ==
73
74
// To conserve owning method indexes, we share TR_ResolvedMethods even where
75
// the owning method differs. Usually that is done only for methods that the
76
// inliner never sees, but if we get aggressive and share TR_ResolvedMethods
77
// that are exposed to the inliner, then the inliner needs to make sure it
78
// doesn't rely on the "owning method" accurately representing the calling method.
79
//
80
// This is a fragile design that needs some more thought. We should either
81
// eliminate the limitations that motivate sharing in the first place, or else
82
// take the time to modify inliner (and everyone else) so it doesn't rely on
83
// owning method information to indicate the caller.
84
//
85
// For now, we mark such code with this macro.
86
//
87
#define OWNING_METHOD_MAY_NOT_BE_THE_CALLER (1)
88
89
#define MIN_NUM_CALLERS 20
90
#define MIN_FAN_IN_SIZE 50
91
#define SIZE_MULTIPLIER 4
92
#define FANIN_OTHER_BUCKET_THRESHOLD 0.5
93
#define DEFAULT_CONST_CLASS_WEIGHT 10
94
95
#undef TRACE_CSI_IN_INLINER
96
97
const char* TR_PrexArgument::priorKnowledgeStrings[] = { "", "(preexistent) ", "(fixed-class) ", "(known-object) " };
98
99
static bool isWarm(TR::Compilation *comp)
100
{
101
return comp->getMethodHotness() >= warm;
102
}
103
static bool isHot(TR::Compilation *comp)
104
{
105
return comp->getMethodHotness() >= hot;
106
}
107
static bool isScorching(TR::Compilation *comp)
108
{
109
return ((comp->getMethodHotness() >= scorching) || ((comp->getMethodHotness() >= veryHot) && comp->isProfilingCompilation())) ;
110
}
111
112
static int32_t getJ9InitialBytecodeSize(TR_ResolvedMethod * feMethod, TR::ResolvedMethodSymbol * methodSymbol, TR::Compilation *comp)
113
{
114
int32_t size = feMethod->maxBytecodeIndex();
115
116
if (methodSymbol && methodSymbol->getRecognizedMethod() == TR::java_util_ArrayList_remove)
117
{
118
size >>= 1;
119
}
120
121
if (feMethod->getRecognizedMethod() == TR::java_lang_String_indexOf_String_int ||
122
feMethod->getRecognizedMethod() == TR::java_lang_String_init_String ||
123
feMethod->getRecognizedMethod() == TR::java_lang_String_indexOf_fast ||
124
feMethod->getRecognizedMethod() == TR::java_math_BigDecimal_subMulSetScale ||
125
feMethod->getRecognizedMethod() == TR::java_math_BigDecimal_addAddMulSetScale ||
126
feMethod->getRecognizedMethod() == TR::java_math_BigDecimal_mulSetScale ||
127
feMethod->getRecognizedMethod() == TR::java_math_BigDecimal_noLLOverflowAdd ||
128
feMethod->getRecognizedMethod() == TR::java_math_BigDecimal_noLLOverflowMul ||
129
feMethod->getRecognizedMethod() == TR::java_math_BigDecimal_subMulAddAddMulSetScale ||
130
feMethod->getRecognizedMethod() == TR::com_ibm_ws_webcontainer_channel_WCCByteBufferOutputStream_printUnencoded ||
131
feMethod->getRecognizedMethod() == TR::java_lang_String_equals)
132
{
133
size >>= 1;
134
}
135
136
else if (feMethod->isDAAWrapperMethod())
137
{
138
size = 1;
139
}
140
141
else if (feMethod->isDAAIntrinsicMethod())
142
{
143
size >>= 3;
144
}
145
146
else if (feMethod->getRecognizedMethod() == TR::java_math_BigDecimal_valueOf )
147
{
148
size >>= 2;
149
}
150
else if (feMethod->getRecognizedMethod() == TR::java_math_BigDecimal_add ||
151
feMethod->getRecognizedMethod() == TR::java_lang_String_init_int_String_int_String_String ||
152
feMethod->getRecognizedMethod() == TR::com_ibm_jit_DecimalFormatHelper_formatAsDouble ||
153
feMethod->getRecognizedMethod() == TR::com_ibm_jit_DecimalFormatHelper_formatAsFloat)
154
{
155
size >>= 3;
156
}
157
158
else if (strncmp(feMethod->nameChars(), "toString", 8) == 0 ||
159
strncmp(feMethod->nameChars(), "multiLeafArrayCopy", 18) == 0)
160
{
161
size >>= 1;
162
}
163
else if (!comp->getOption(TR_DisableAdaptiveDumbInliner))
164
{
165
if (methodSymbol && !methodSymbol->mayHaveInlineableCall() && size <= 5) // favor the inlining of methods that are very small
166
size = 0;
167
}
168
169
TR_J9EstimateCodeSize::adjustEstimateForStringCompression(feMethod, size, TR_J9EstimateCodeSize::STRING_COMPRESSION_ADJUSTMENT_FACTOR);
170
171
return size;
172
}
173
174
static bool insideIntPipelineForEach(TR_ResolvedMethod *method, TR::Compilation *comp)
175
{
176
char *sig = "accept";
177
bool returnValue = true; //default is true since if first method is IntPipeline.forEach true is returned
178
179
//Searches up the owning method chain until IntPipeline.forEach is found
180
//If the first method passed into this function is IntPipeline$Head.forEach or IntPipeline.forEach, true is returned
181
//since IntPipeline$Head.forEach and IntPipeline.forEach needs to be inlined for JIT GPU.
182
//If not the method name is checked to see if it is called accept.
183
//If the method in the chain just before IntPipeline.forEach is accept, true is also returned
184
//This tries to inline accept and the methods inside accept
185
//IntPipelineHead.forEach must also be inlined.
186
if (method && comp->getOptions()->getEnableGPU(TR_EnableGPU) && comp->hasIntStreamForEach())
187
{
188
if (method->getRecognizedMethod() == TR::java_util_stream_IntPipelineHead_forEach)
189
return true;
190
191
while (method)
192
{
193
if (method->getRecognizedMethod() == TR::java_util_stream_IntPipeline_forEach)
194
return returnValue;
195
196
//If the current method is accept, true is returned if the next method is IntPipeline.forEach
197
//Otherwise, if the next method is IntPipeline.forEach, false is returned
198
if (strncmp(method->nameChars(), sig, strlen(sig)) == 0)
199
returnValue = true;
200
else
201
returnValue = false;
202
203
method = method->owningMethod();
204
}
205
}
206
207
return false;
208
}
209
210
bool
211
TR_J9InlinerPolicy::inlineRecognizedMethod(TR::RecognizedMethod method)
212
{
213
// if (method ==
214
// TR::java_lang_String_init_String_char)
215
// return false;
216
if (comp()->cg()->suppressInliningOfRecognizedMethod(method))
217
return false;
218
219
if (comp()->isConverterMethod(method) &&
220
comp()->canTransformConverterMethod(method))
221
return false;
222
223
// Check for memoizing methods
224
if (comp()->getOption(TR_DisableDememoization) || (comp()->getMethodHotness() < hot)) // TODO: This should actually check whether EA will run, but that's not crucial for correctness
225
{
226
switch (method)
227
{
228
case TR::java_lang_Integer_valueOf:
229
comp()->getMethodSymbol()->setHasNews(true);
230
return true;
231
default:
232
break;
233
}
234
}
235
else if (method == TR::java_lang_Integer_valueOf)
236
return false;
237
238
if (willBeInlinedInCodeGen(method))
239
return false;
240
241
return true;
242
243
}
244
245
int32_t
246
TR_J9InlinerPolicy::getInitialBytecodeSize(TR_ResolvedMethod *feMethod, TR::ResolvedMethodSymbol * methodSymbol, TR::Compilation *comp)
247
{
248
return getJ9InitialBytecodeSize(feMethod, methodSymbol, comp);
249
}
250
251
bool
252
TR_J9InlinerPolicy::aggressivelyInlineInLoops()
253
{
254
return _aggressivelyInlineInLoops;
255
}
256
257
void
258
TR_J9InlinerPolicy::determineAggressionInLoops(TR::ResolvedMethodSymbol *callerSymbol)
259
{
260
if (isHot(comp()) && OMR_InlinerPolicy::getInitialBytecodeSize(callerSymbol, comp()) < 100)
261
_aggressivelyInlineInLoops = true;
262
}
263
264
void
265
TR_J9InlinerPolicy::determineInliningHeuristic(TR::ResolvedMethodSymbol *callerSymbol)
266
{
267
determineAggressionInLoops(callerSymbol);
268
return;
269
}
270
271
void TR_MultipleCallTargetInliner::generateNodeEstimate::operator ()(TR_CallTarget *ct, TR::Compilation *comp)
272
{
273
int32_t size = getJ9InitialBytecodeSize(ct->_calleeMethod, 0, comp);
274
275
// only scale the inlining size when the method is non-empty - conversion of
276
// NaN/Inf to int is undefined and an empty method partially inlined instead
277
// of fully inlined is still empty
278
if(ct->_isPartialInliningCandidate && ct->_fullSize != 0)
279
{
280
size = size * ((float)(ct->_partialSize)/(float)(ct->_fullSize));
281
}
282
_nodeEstimate += size;
283
}
284
285
bool
286
TR_J9InlinerPolicy::mustBeInlinedEvenInDebug(TR_ResolvedMethod * calleeMethod, TR::TreeTop *callNodeTreeTop)
287
{
288
if (calleeMethod)
289
{
290
switch (calleeMethod->convertToMethod()->getMandatoryRecognizedMethod())
291
{
292
// call to invokeExactTargetAddress are generated out of thin air by our JSR292
293
// implementation, but we never want the VM or anyone else to know this so we must
294
// always inline the implementation
295
case TR::java_lang_invoke_MethodHandle_invokeExactTargetAddress:
296
{
297
TR::TreeTop *scanTT = callNodeTreeTop->getNextTreeTop();
298
TR::Node *nextCall = NULL;
299
300
while (scanTT &&
301
scanTT->getNode()->getByteCodeInfo().getByteCodeIndex() == callNodeTreeTop->getNode()->getByteCodeInfo().getByteCodeIndex() &&
302
scanTT->getNode()->getByteCodeInfo().getCallerIndex() == callNodeTreeTop->getNode()->getByteCodeInfo().getCallerIndex())
303
{
304
TR::Node *scanNode = scanTT->getNode();
305
if (scanNode && (scanNode->getOpCode().isCheck() || scanNode->getOpCodeValue() == TR::treetop))
306
scanNode = scanNode->getFirstChild();
307
308
if (scanNode->getOpCode().isCall())
309
{
310
nextCall = scanNode;
311
break;
312
}
313
scanTT = scanTT->getNextTreeTop();
314
}
315
316
debugTrace(tracer(), "considering nextOperation node n%dn", nextCall->getGlobalIndex());
317
if (nextCall && nextCall->getOpCode().hasSymbolReference() &&
318
nextCall->getSymbolReference()->getSymbol()->getMethodSymbol()->isComputedVirtual())
319
return true;
320
}
321
default:
322
break;
323
}
324
}
325
return false;
326
}
327
328
/** Test for methods that we wish to inline whenever possible.
329
330
Identify methods for which the benefits of inlining them into the caller
331
are particularly significant and which might not otherwise be chosen by
332
the inliner.
333
*/
334
bool
335
TR_J9InlinerPolicy::alwaysWorthInlining(TR_ResolvedMethod * calleeMethod, TR::Node *callNode)
336
{
337
if (!calleeMethod)
338
return false;
339
340
if (isInlineableJNI(calleeMethod, callNode))
341
return true;
342
343
if (calleeMethod->isDAAWrapperMethod())
344
return true;
345
346
if (isJSR292AlwaysWorthInlining(calleeMethod))
347
return true;
348
349
switch (calleeMethod->getRecognizedMethod())
350
{
351
case TR::sun_misc_Unsafe_getAndAddLong:
352
case TR::sun_misc_Unsafe_getAndSetLong:
353
return comp()->target().is32Bit();
354
case TR::java_lang_J9VMInternals_fastIdentityHashCode:
355
case TR::java_lang_Class_getSuperclass:
356
case TR::java_lang_String_regionMatchesInternal:
357
case TR::java_lang_String_regionMatches:
358
case TR::java_lang_Class_newInstance:
359
// we rely on inlining compareAndSwap so we see the inner native call and can special case it
360
case TR::com_ibm_jit_JITHelpers_compareAndSwapIntInObject:
361
case TR::com_ibm_jit_JITHelpers_compareAndSwapLongInObject:
362
case TR::com_ibm_jit_JITHelpers_compareAndSwapObjectInObject:
363
case TR::com_ibm_jit_JITHelpers_compareAndSwapIntInArray:
364
case TR::com_ibm_jit_JITHelpers_compareAndSwapLongInArray:
365
case TR::com_ibm_jit_JITHelpers_compareAndSwapObjectInArray:
366
case TR::com_ibm_jit_JITHelpers_jitHelpers:
367
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfLatin1:
368
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfUTF16:
369
case TR::java_lang_String_charAt:
370
case TR::java_lang_String_charAtInternal_I:
371
case TR::java_lang_String_charAtInternal_IB:
372
case TR::java_lang_String_checkIndex:
373
case TR::java_lang_String_coder:
374
case TR::java_lang_String_isLatin1:
375
case TR::java_lang_String_length:
376
case TR::java_lang_String_lengthInternal:
377
case TR::java_lang_String_isCompressed:
378
case TR::java_lang_StringBuffer_capacityInternal:
379
case TR::java_lang_StringBuffer_lengthInternalUnsynchronized:
380
case TR::java_lang_StringBuilder_capacityInternal:
381
case TR::java_lang_StringBuilder_lengthInternal:
382
case TR::java_lang_StringUTF16_charAt:
383
case TR::java_lang_StringUTF16_checkIndex:
384
case TR::java_lang_StringUTF16_length:
385
case TR::java_lang_StringUTF16_newBytesFor:
386
case TR::java_util_HashMap_get:
387
case TR::java_util_HashMap_getNode:
388
case TR::java_lang_String_getChars_charArray:
389
case TR::java_lang_String_getChars_byteArray:
390
case TR::java_lang_Integer_toUnsignedLong:
391
case TR::java_nio_Bits_byteOrder:
392
case TR::java_nio_ByteOrder_nativeOrder:
393
return true;
394
395
// In Java9 the following enum values match both sun.misc.Unsafe and
396
// jdk.internal.misc.Unsafe The sun.misc.Unsafe methods are simple
397
// wrappers to call jdk.internal impls, and we want to inline them. Since
398
// the same code can run with Java8 classes where sun.misc.Unsafe has the
399
// JNI impl, we need to differentiate by testing with isNative(). If it is
400
// native, then we don't need to inline it as it will be handled
401
// elsewhere.
402
case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z:
403
case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z:
404
case TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z:
405
case TR::sun_misc_Unsafe_copyMemory:
406
return !calleeMethod->isNative();
407
default:
408
break;
409
}
410
411
if (!strncmp(calleeMethod->classNameChars(), "java/util/concurrent/atomic/", strlen("java/util/concurrent/atomic/")))
412
{
413
return true;
414
}
415
416
int32_t length = calleeMethod->classNameLength();
417
char* className = calleeMethod->classNameChars();
418
419
if (length == 24 && !strncmp(className, "jdk/internal/misc/Unsafe", 24))
420
return true;
421
else if (length == 15 && !strncmp(className, "sun/misc/Unsafe", 15))
422
return true;
423
424
if (!comp()->getOption(TR_DisableForceInlineAnnotations) &&
425
comp()->fej9()->isForceInline(calleeMethod))
426
{
427
if (comp()->trace(OMR::inlining))
428
traceMsg(comp(), "@ForceInline was specified for %s, in alwaysWorthInlining\n", calleeMethod->signature(comp()->trMemory()));
429
return true;
430
}
431
432
return false;
433
}
434
435
/*
436
* Check if there is any method handle invoke left in the method body
437
*/
438
static bool checkForRemainingInlineableJSR292(TR::Compilation *comp, TR::ResolvedMethodSymbol *methodSymbol)
439
{
440
TR::NodeChecklist visited(comp);
441
for (TR::TreeTop *tt = methodSymbol->getFirstTreeTop(); tt ; tt = tt->getNextTreeTop())
442
{
443
TR::Node *ttNode = tt->getNode();
444
if (ttNode->getNumChildren() > 0)
445
{
446
TR::Node *node = ttNode->getFirstChild();
447
if (node->getOpCode().isCall() && !visited.contains(node))
448
{
449
visited.add(node);
450
if (node->getSymbolReference()->getSymbol()->getResolvedMethodSymbol())
451
{
452
TR_ResolvedMethod * resolvedMethod = node->getSymbolReference()->getSymbol()->getResolvedMethodSymbol()->getResolvedMethod();
453
if (!node->isTheVirtualCallNodeForAGuardedInlinedCall() &&
454
(comp->fej9()->isLambdaFormGeneratedMethod(resolvedMethod) ||
455
resolvedMethod->getRecognizedMethod() == TR::java_lang_invoke_MethodHandle_invokeBasic ||
456
resolvedMethod->convertToMethod()->isArchetypeSpecimen() ||
457
resolvedMethod->getRecognizedMethod() == TR::java_lang_invoke_MethodHandle_invokeExact))
458
{
459
return true;
460
}
461
}
462
}
463
}
464
}
465
return false;
466
}
467
468
void
469
TR_J9InlinerUtil::requestAdditionalOptimizations(TR_CallTarget *calltarget)
470
{
471
if (calltarget->_myCallSite->getDepth() == -1 // only do this for top level callee to prevent exponential walk of inlined trees
472
&& checkForRemainingInlineableJSR292(comp(), calltarget->_calleeSymbol))
473
{
474
_inliner->getOptimizer()->setRequestOptimization(OMR::methodHandleInvokeInliningGroup);
475
if (comp()->trace(OMR::inlining))
476
heuristicTrace(tracer(),"Requesting one more pass of targeted inlining due to method handle invoke in %s\n", tracer()->traceSignature(calltarget->_calleeSymbol));
477
}
478
}
479
480
void
481
TR_J9InlinerUtil::adjustByteCodeSize(TR_ResolvedMethod *calleeResolvedMethod, bool isInLoop, TR::Block *block, int &bytecodeSize)
482
{
483
traceMsg(comp(), "Reached new code \n");
484
int32_t blockNestingDepth = 1;
485
if (isInLoop)
486
{
487
char *tmptmp=0;
488
if (calleeResolvedMethod)
489
tmptmp = TR::Compiler->cls.classSignature(comp(), calleeResolvedMethod->containingClass(),trMemory());
490
491
bool doit = false;
492
493
if (((TR_J9InlinerPolicy *)inliner()->getPolicy())->aggressivelyInlineInLoops())
494
{
495
doit = true;
496
}
497
498
if (doit && calleeResolvedMethod && !strcmp(tmptmp,"Ljava/math/BigDecimal;"))
499
{
500
traceMsg(comp(), "Reached code for block nesting depth %d\n", blockNestingDepth);
501
if ((isInLoop || (blockNestingDepth > 1)) &&
502
(bytecodeSize > 10))
503
{
504
if (comp()->trace(OMR::inlining))
505
heuristicTrace(tracer(),"Exceeds Size Threshold: Scaled down size for call block %d from %d to %d\n", block->getNumber(), bytecodeSize, 10);
506
bytecodeSize = 15;
507
}
508
}
509
else
510
heuristicTrace(tracer(),"Omitting Big Decimal method from size readjustment, calleeResolvedMethod = %p, tmptmp =%s",calleeResolvedMethod, tmptmp);
511
}
512
}
513
514
TR::Node *
515
TR_J9InlinerPolicy::genCompressedRefs(TR::Node * address, bool genTT, int32_t isLoad)
516
{
517
static char *pEnv = feGetEnv("TR_UseTranslateInTrees");
518
519
if (performTransformation(comp(), "O^O Inliner: Generating compressedRefs anchor for node [%p]\n", address))
520
{
521
TR::Node *value = address;
522
if (pEnv && (isLoad < 0)) // store
523
value = address->getSecondChild();
524
TR::Node *newAddress = TR::Node::createCompressedRefsAnchor(value);
525
//traceMsg(comp(), "compressedRefs anchor %p generated\n", newAddress);
526
if (!pEnv && genTT)
527
{
528
if (!newAddress->getOpCode().isTreeTop())
529
newAddress = TR::Node::create(TR::treetop, 1, newAddress);
530
}
531
else
532
return newAddress;
533
}
534
return NULL;
535
}
536
537
538
TR::Node *
539
TR_J9InlinerPolicy::createUnsafeAddressWithOffset(TR::Node * unsafeCall)
540
{
541
if (comp()->target().is64Bit())
542
{
543
TR::Node *constNode = TR::Node::lconst(unsafeCall, ~(J9_SUN_FIELD_OFFSET_MASK));
544
return TR::Node::create(TR::aladd, 2, unsafeCall->getChild(1), TR::Node::create(TR::land, 2, unsafeCall->getChild(2), constNode));
545
}
546
547
return TR::Node::create(TR::aiadd, 2, unsafeCall->getChild(1), TR::Node::create(TR::iand, 2, TR::Node::create(TR::l2i, 1, unsafeCall->getChild(2)), TR::Node::iconst(unsafeCall, ~(J9_SUN_FIELD_OFFSET_MASK))));
548
}
549
550
void
551
TR_J9InlinerPolicy::createTempsForUnsafeCall( TR::TreeTop *callNodeTreeTop, TR::Node * unsafeCallNode )
552
{
553
//This method is intended to replace and generalize createTempsForUnsafePutGet
554
//Will go through all children of the unsafeCallNode and create a store tree that child, insert it before the call, and then replace the call's child with a load of the symref.
555
556
for (int32_t i = 0 ; i < unsafeCallNode->getNumChildren() ; ++i)
557
{
558
TR::Node *child = unsafeCallNode->getChild(i);
559
560
561
//create a store of the correct type and insert before call.
562
563
TR::DataType dataType = child->getDataType();
564
TR::SymbolReference *newSymbolReference = comp()->getSymRefTab()->createTemporary(comp()->getMethodSymbol(), dataType);
565
566
TR::Node *storeNode = TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(dataType), 1, 1, child, newSymbolReference);
567
TR::TreeTop *storeTree = TR::TreeTop::create(comp(), storeNode);
568
569
debugTrace(tracer(),"Creating store node %p with child %p",storeNode,child);
570
571
callNodeTreeTop->insertBefore(storeTree);
572
573
// Replace the old child with a load of the new sym ref
574
TR::Node *value = TR::Node::createWithSymRef(child, comp()->il.opCodeForDirectLoad(dataType), 0, newSymbolReference);
575
576
debugTrace(tracer(),"Replacing callnode %p child %p with %p",unsafeCallNode,unsafeCallNode->getChild(i),value);
577
578
unsafeCallNode->setAndIncChild(i, value);
579
child->recursivelyDecReferenceCount();
580
581
}
582
}
583
void
584
TR_J9InlinerPolicy::createTempsForUnsafePutGet(TR::Node*& unsafeAddress,
585
TR::Node* unsafeCall,
586
TR::TreeTop* callNodeTreeTop,
587
TR::Node*& offset,
588
TR::SymbolReference*& newSymbolReferenceForAddress,
589
bool isUnsafeGet)
590
{
591
TR::Node *oldUnsafeAddress = unsafeAddress;
592
TR::DataType dataType = unsafeAddress->getDataType();
593
TR::SymbolReference *newSymbolReference =
594
comp()->getSymRefTab()->createTemporary(comp()->getMethodSymbol(), dataType);
595
newSymbolReferenceForAddress = newSymbolReference;
596
TR::Node *storeNode =
597
TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(unsafeAddress->getDataType()),
598
1, 1, unsafeAddress, newSymbolReference);
599
TR::TreeTop *storeTree = TR::TreeTop::create(comp(), storeNode);
600
601
if (tracer()->debugLevel())
602
{
603
debugTrace(tracer(), "\tIn createTempsForUnsafePutGet. inserting store Tree before callNodeTT:\n");
604
comp()->getDebug()->print(comp()->getOutFile(), storeTree);
605
}
606
607
callNodeTreeTop->insertTreeTopsBeforeMe(storeTree);
608
609
// Replace the old child with a load of the new sym ref
610
unsafeAddress =
611
TR::Node::createWithSymRef(unsafeAddress,
612
comp()->il.opCodeForDirectLoad(unsafeAddress->getDataType()),
613
0, newSymbolReference);
614
615
debugTrace(tracer(), "\tIn createTempsForUnsafePutGet. replacing unsafeCall ( %p) child %p with %p\n", unsafeCall, unsafeCall->getChild(1), unsafeAddress);
616
617
unsafeCall->setAndIncChild(1, unsafeAddress);
618
619
TR::Node *oldOffset = offset;
620
dataType = offset->getDataType();
621
newSymbolReference = comp()->getSymRefTab()->createTemporary(comp()->getMethodSymbol(),
622
dataType);
623
storeNode = TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(offset->getDataType()),
624
1, 1, offset, newSymbolReference);
625
storeTree = TR::TreeTop::create(comp(), storeNode);
626
627
if (tracer()->debugLevel())
628
{
629
traceMsg(comp(), "\tIn createTempsForUnsafePutGet. inserting store Tree before callNodeTT 2:\n");
630
comp()->getDebug()->print(comp()->getOutFile(), storeTree);
631
}
632
633
callNodeTreeTop->insertTreeTopsBeforeMe(storeTree);
634
635
// Replace the old child with a load of the new sym ref
636
offset = TR::Node::createWithSymRef(offset,
637
comp()->il.opCodeForDirectLoad(offset->getDataType()),
638
0, newSymbolReference);
639
640
debugTrace(tracer(), "\tIn createTempsForUnsafePutGet. replacing unsafeCall ( %p) child %p with %p\n", unsafeCall, unsafeCall->getChild(2), offset);
641
642
unsafeCall->setAndIncChild(2, offset);
643
if (!isUnsafeGet)
644
{
645
TR::Node *value = unsafeCall->getChild(3);
646
TR::Node *oldValue = value;
647
dataType = value->getDataType();
648
newSymbolReference =
649
comp()->getSymRefTab()->createTemporary(comp()->getMethodSymbol(), dataType);
650
storeNode = TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(value->getDataType()), 1, 1, value, newSymbolReference);
651
storeTree = TR::TreeTop::create(comp(), storeNode);
652
callNodeTreeTop->insertTreeTopsBeforeMe(storeTree);
653
654
// Replace the old child with a load of the new sym ref
655
value = TR::Node::createWithSymRef(value,
656
comp()->il.opCodeForDirectLoad(value->getDataType()),
657
0, newSymbolReference);
658
unsafeCall->setAndIncChild(3, value);
659
oldValue->recursivelyDecReferenceCount();
660
}
661
662
oldUnsafeAddress->recursivelyDecReferenceCount();
663
oldOffset->recursivelyDecReferenceCount();
664
}
665
// Extra boolean is for AOT case when we can not simplify the Class Test so we need Array Case in that scenario, But we can put indirect in cold instead of fallthrough for the tests
666
TR::TreeTop*
667
TR_J9InlinerPolicy::genClassCheckForUnsafeGetPut(TR::Node* offset, bool isNotLowTagged)
668
{
669
// The low bit is tagged if the object being dereferenced is a
670
// java/lang/Class object. This is because this is a special case when
671
// an extra level of indirection is necessary
672
bool isILoad = (offset->getOpCodeValue() == TR::iload);
673
TR::Node *lowTag = NULL;
674
675
if (isILoad)
676
lowTag = TR::Node::create(TR::iand, 2, offset, TR::Node::iconst(1));
677
else
678
lowTag = TR::Node::create(TR::land, 2, offset, TR::Node::lconst(1));
679
680
TR::ILOpCodes op = isNotLowTagged ? (isILoad ? TR::ificmpne : TR::iflcmpne) : (isILoad ? TR::ificmpeq : TR::iflcmpeq);
681
// Create the if to check if an extra level of indirection is needed
682
TR::Node *cmp = TR::Node::createif(op, lowTag, lowTag->getSecondChild(), NULL);
683
TR::TreeTop* lowTagCmpTree = TR::TreeTop::create(comp(), cmp);
684
return lowTagCmpTree;
685
}
686
687
688
TR::TreeTop*
689
TR_J9InlinerPolicy::genClassCheckForUnsafeGetPut(TR::Node* offset)
690
{
691
// The low bit is tagged if the object being dereferenced is a
692
// java/lang/Class object. This is because this is a special case when
693
// an extra level of indirection is necessary
694
bool isILoad = (offset->getOpCodeValue() == TR::iload);
695
TR::Node *lowTag =
696
TR::Node::create(isILoad ? TR::iand : TR::land, 2, offset,
697
TR::Node::create(offset, isILoad ? TR::iconst : TR::lconst, 0, 0));
698
if (isILoad)
699
lowTag->getSecondChild()->setInt(1);
700
else
701
lowTag->getSecondChild()->setLongInt(1);
702
703
// Create the if to check if an extra level of indirection is needed
704
TR::Node *cmp = TR::Node::createif(isILoad ? TR::ificmpne : TR::iflcmpne,
705
lowTag, lowTag->getSecondChild(), NULL);
706
TR::TreeTop* lowTagCmpTree = TR::TreeTop::create(comp(), cmp);
707
return lowTagCmpTree;
708
}
709
710
711
TR::TreeTop*
712
TR_J9InlinerPolicy::genDirectAccessCodeForUnsafeGetPut(TR::Node* callNode,
713
bool conversionNeeded, bool isUnsafeGet)
714
{
715
//Generate the code for the direct access
716
TR::Node *directAccessNode = callNode->duplicateTree();
717
TR::TreeTop *directAccessTreeTop = TR::TreeTop::create(comp(), directAccessNode, NULL, NULL);
718
TR::Node* firstChild = directAccessNode->getFirstChild();
719
720
if (isUnsafeGet) {
721
firstChild = firstChild->getFirstChild();
722
//if there is a conversion node we need to go one level deeper
723
if (conversionNeeded)
724
firstChild = firstChild->getFirstChild();
725
}
726
else {
727
// if there is an anchor, the store is one level below
728
if (directAccessNode->getOpCodeValue() == TR::compressedRefs)
729
firstChild = firstChild->getFirstChild();
730
}
731
732
TR_ASSERT(((firstChild->getOpCodeValue() == TR::aiadd) ||
733
(firstChild->getOpCodeValue() == TR::aladd)), "Unexpected opcode in unsafe access\n");
734
TR::Node *grandChild = firstChild->getSecondChild();
735
firstChild->setAndIncChild(1, grandChild->getFirstChild());
736
grandChild->recursivelyDecReferenceCount();
737
738
// If a conversion is needed, the 'callNode' was constructed earlier on in createUnsafe(get/put)WithOffset
739
// While some of the children end up in the final trees, this constructed callNode does not.
740
// We need to dec the reference count otherwise a child of the callNode that ends up in the final trees will have an extra refcount and will cause
741
// an assert
742
if(conversionNeeded)
743
{
744
for(int32_t i=0 ; i< callNode->getNumChildren(); i++)
745
{
746
debugTrace(tracer(), "\t In genDirectAccessCodeForUnsafeGetPut, recursively dec'ing refcount of %p:\n", callNode->getChild(i));
747
748
callNode->getChild(i)->recursivelyDecReferenceCount();
749
}
750
}
751
752
return directAccessTreeTop;
753
}
754
755
756
TR::TreeTop*
757
TR_J9InlinerPolicy::genIndirectAccessCodeForUnsafeGetPut(TR::Node* directAccessOrTempStoreNode, TR::Node* unsafeAddress)
758
{
759
// Generate the indirect access code in the java/lang/Class case. First modify unsafeAddress which is a descendant
760
// of directAccessOrTempStoreNode and then make a copy of directAccessOrTempStoreNode.
761
TR::Node *oldAddressFirstChild = unsafeAddress->getFirstChild();
762
TR::Node *addressFirstChild =
763
TR::Node::createWithSymRef(TR::aloadi, 1, 1, oldAddressFirstChild,
764
comp()->getSymRefTab()->findOrCreateClassFromJavaLangClassSymbolRef());
765
addressFirstChild =
766
TR::Node::createWithSymRef(TR::aloadi, 1, 1, addressFirstChild,
767
comp()->getSymRefTab()->findOrCreateRamStaticsFromClassSymbolRef());
768
unsafeAddress->setAndIncChild(0, addressFirstChild);
769
oldAddressFirstChild->recursivelyDecReferenceCount();
770
771
TR::Node* indirectAccessOrTempStoreNode = directAccessOrTempStoreNode->duplicateTree();
772
773
// The directAccessNode has been replaced with the corresponding get (load)/put (store) node at this point. The
774
// purpose of this function is to create the indirect access (static access) for an unsafe get/put operation
775
// since in general we do not know whether the target of our get/put operation is to a static or instance field.
776
// As such for the indirect access (static access) case we need to reassign the symbol reference after duplicating
777
// the direct access node.
778
779
TR::Symbol* directSymbol = directAccessOrTempStoreNode->getSymbolReference()->getSymbol();
780
781
if (!directSymbol->isUnsafeShadowSymbol())
782
{
783
// We may have generated a store to a temp in case of a get operation
784
directSymbol = directAccessOrTempStoreNode->getFirstChild()->getSymbolReference()->getSymbol();
785
}
786
787
// Sanity check. Note this is fatal because under concurrent scavenge we could potentially miss a read barrier here.
788
TR_ASSERT_FATAL(directSymbol->isUnsafeShadowSymbol(), "Expected to find an unsafe symbol for the get/put operation.");
789
790
TR::Node* indirectAccessNode = indirectAccessOrTempStoreNode;
791
792
TR::Symbol* indirectSymbol = indirectAccessOrTempStoreNode->getSymbolReference()->getSymbol();
793
794
if (!indirectSymbol->isUnsafeShadowSymbol())
795
{
796
// We may have generated a store to a temp in case of a get operation
797
indirectAccessNode = indirectAccessOrTempStoreNode->getFirstChild();
798
}
799
800
TR::SymbolReference* indirectSymRef = comp()->getSymRefTab()->findOrCreateUnsafeSymbolRef(directSymbol->getDataType(), true, true, directSymbol->isVolatile());
801
802
indirectAccessNode->setSymbolReference(indirectSymRef);
803
804
return TR::TreeTop::create(comp(), indirectAccessOrTempStoreNode, NULL, NULL);
805
}
806
807
TR::Block *
808
TR_J9InlinerPolicy::addNullCheckForUnsafeGetPut(TR::Node* unsafeAddress,
809
TR::SymbolReference* newSymbolReferenceForAddress,
810
TR::TreeTop* callNodeTreeTop,
811
TR::TreeTop* directAccessTreeTop,
812
TR::TreeTop* arrayDirectAccessTreeTop,
813
TR::TreeTop* indirectAccessTreeTop)
814
{
815
//Generate the treetop for the null comparison
816
TR::Node *addrLoad =
817
TR::Node::createWithSymRef(unsafeAddress,
818
comp()->il.opCodeForDirectLoad(unsafeAddress->getDataType()),
819
0, newSymbolReferenceForAddress);
820
TR::Node *nullCmp =
821
TR::Node::createif(TR::ifacmpeq, addrLoad,
822
TR::Node::create(addrLoad, TR::aconst, 0, 0), NULL);
823
TR::TreeTop *nullComparisonTree = TR::TreeTop::create(comp(), nullCmp, NULL, NULL);
824
TR::TreeTop* ifTree = arrayDirectAccessTreeTop ? arrayDirectAccessTreeTop : indirectAccessTreeTop;
825
TR::TreeTop* elseTree = arrayDirectAccessTreeTop ? indirectAccessTreeTop : directAccessTreeTop;
826
// Connect the trees/add blocks etc. properly and split the original block
827
TR::Block * joinBlock =
828
callNodeTreeTop->getEnclosingBlock()->
829
createConditionalBlocksBeforeTree(callNodeTreeTop,
830
nullComparisonTree,
831
ifTree,
832
elseTree,
833
comp()->getFlowGraph(), false, false);
834
return joinBlock;
835
}
836
837
void
838
TR_J9InlinerPolicy::createAnchorNodesForUnsafeGetPut(TR::TreeTop* treeTop,
839
TR::DataType type, bool isUnsafeGet)
840
{
841
if (comp()->useCompressedPointers() && (type == TR::Address))
842
{
843
// create the anchor node only for the non-tagged case
844
845
TR::Node* node = treeTop->getNode();
846
TR::TreeTop *compRefTT =
847
TR::TreeTop::create(comp(), genCompressedRefs(isUnsafeGet?node->getFirstChild():node,
848
false));
849
if (compRefTT)
850
{
851
TR::TreeTop *prevTT = treeTop->getPrevTreeTop();
852
prevTT->join(compRefTT);
853
compRefTT->join(isUnsafeGet?treeTop:treeTop->getNextTreeTop());
854
}
855
}
856
}
857
858
void
859
TR_J9InlinerPolicy::genCodeForUnsafeGetPut(TR::Node* unsafeAddress,
860
TR::TreeTop* callNodeTreeTop,
861
TR::TreeTop* prevTreeTop,
862
TR::SymbolReference* newSymbolReferenceForAddress,
863
TR::TreeTop* directAccessTreeTop,
864
TR::TreeTop* lowTagCmpTree,
865
bool needNullCheck, bool isUnsafeGet,
866
bool conversionNeeded,
867
TR::Block * joinBlock,
868
TR_OpaqueClassBlock *javaLangClass,
869
TR::Node* orderedCallNode = NULL)
870
{
871
TR::CFG *cfg = comp()->getFlowGraph();
872
TR::Block *nullComparisonBlock = prevTreeTop->getEnclosingBlock();
873
TR::TreeTop* nullComparisonTree = nullComparisonBlock->getLastRealTreeTop();
874
TR::TreeTop *nullComparisonEntryTree = nullComparisonBlock->getEntry();
875
TR::TreeTop *nullComparisonExitTree = nullComparisonBlock->getExit();
876
//if conversionNeeded is true, we haven't generated and we don't need arrayDirectAccessBlock
877
TR::Block *arrayDirectAccessBlock = conversionNeeded ? nullComparisonTree->getNode()->getBranchDestination()->getNode()->getBlock() : NULL;
878
TR::Block *indirectAccessBlock;
879
TR::Block * directAccessBlock;
880
if (conversionNeeded)
881
{
882
//Generating block for direct access
883
indirectAccessBlock = nullComparisonBlock->getNextBlock();
884
directAccessBlock = TR::Block::createEmptyBlock(lowTagCmpTree->getNode(), comp(),
885
indirectAccessBlock->getFrequency());
886
directAccessBlock->append(directAccessTreeTop);
887
directAccessBlock->append(TR::TreeTop::create(comp(),
888
TR::Node::create(directAccessTreeTop->getNode(),
889
TR::Goto, 0, joinBlock->getEntry())));
890
arrayDirectAccessBlock->getExit()->insertTreeTopsAfterMe(directAccessBlock->getEntry(),
891
directAccessBlock->getExit());
892
cfg->addNode(directAccessBlock);
893
cfg->addEdge(TR::CFGEdge::createEdge(directAccessBlock, joinBlock, trMemory()));
894
}
895
else
896
{
897
directAccessBlock = nullComparisonBlock->getNextBlock();
898
indirectAccessBlock = nullComparisonTree->getNode()->getBranchDestination()->getNode()->getBlock();
899
indirectAccessBlock->setFrequency(VERSIONED_COLD_BLOCK_COUNT);
900
indirectAccessBlock->setIsCold();
901
nullComparisonTree->getNode()->setBranchDestination(directAccessBlock->getEntry());
902
}
903
904
debugTrace(tracer(), "\t In genCodeForUnsafeGetPut, Block %d created for direct Access\n", directAccessBlock->getNumber());
905
906
//Generating block for lowTagCmpTree
907
TR::Block *lowTagCmpBlock =
908
TR::Block::createEmptyBlock(unsafeAddress, comp(), conversionNeeded ? indirectAccessBlock->getFrequency() : directAccessBlock->getFrequency());
909
lowTagCmpBlock->append(lowTagCmpTree);
910
cfg->addNode(lowTagCmpBlock);
911
debugTrace(tracer(), "\t In genCodeForUnsafeGetPut, Block %d created for low tag comparison\n", lowTagCmpBlock->getNumber());
912
913
TR::Node *vftLoad = TR::Node::createWithSymRef(TR::aloadi, 1, 1, TR::Node::createWithSymRef(unsafeAddress, comp()->il.opCodeForDirectLoad(unsafeAddress->getDataType()), 0, newSymbolReferenceForAddress), comp()->getSymRefTab()->findOrCreateVftSymbolRef());
914
TR::TreeTop *isArrayTreeTop;
915
TR::Block *isArrayBlock;
916
TR::TreeTop *isClassTreeTop;
917
TR::Block *isClassBlock;
918
// If we need conversion or java/lang/Class is not loaded yet, we generate old sequence of tests
919
if (conversionNeeded || javaLangClass == NULL)
920
{
921
TR::Node *isArrayField = NULL;
922
if (comp()->target().is32Bit())
923
{
924
isArrayField = TR::Node::createWithSymRef(TR::iloadi, 1, 1, vftLoad, comp()->getSymRefTab()->findOrCreateClassAndDepthFlagsSymbolRef());
925
}
926
else
927
{
928
isArrayField = TR::Node::createWithSymRef(TR::lloadi, 1, 1, vftLoad, comp()->getSymRefTab()->findOrCreateClassAndDepthFlagsSymbolRef());
929
isArrayField = TR::Node::create(TR::l2i, 1, isArrayField);
930
}
931
TR::Node *andConstNode = TR::Node::create(isArrayField, TR::iconst, 0, TR::Compiler->cls.flagValueForArrayCheck(comp()));
932
TR::Node * andNode = TR::Node::create(TR::iand, 2, isArrayField, andConstNode);
933
TR::Node *isArrayNode = TR::Node::createif(TR::ificmpeq, andNode, andConstNode, NULL);
934
isArrayTreeTop = TR::TreeTop::create(comp(), isArrayNode, NULL, NULL);
935
isArrayBlock = TR::Block::createEmptyBlock(vftLoad, comp(), indirectAccessBlock->getFrequency());
936
isArrayBlock->append(isArrayTreeTop);
937
cfg->addNode(isArrayBlock);
938
isArrayNode->setBranchDestination(conversionNeeded ? arrayDirectAccessBlock->getEntry() : directAccessBlock->getEntry());
939
if (conversionNeeded)
940
{
941
indirectAccessBlock->getEntry()->insertTreeTopsBeforeMe(lowTagCmpBlock->getEntry(), lowTagCmpBlock->getExit());
942
lowTagCmpTree->getNode()->setBranchDestination(directAccessBlock->getEntry());
943
}
944
else
945
{
946
traceMsg(comp(),"\t\t Generating an isArray test as j9class of java/lang/Class is NULL");
947
directAccessBlock->getEntry()->insertTreeTopsBeforeMe(lowTagCmpBlock->getEntry(), lowTagCmpBlock->getExit());
948
lowTagCmpTree->getNode()->setBranchDestination(indirectAccessBlock->getEntry());
949
}
950
lowTagCmpBlock->getEntry()->insertTreeTopsBeforeMe(isArrayBlock->getEntry(),
951
isArrayBlock->getExit());
952
cfg->addEdge(TR::CFGEdge::createEdge(isArrayBlock, lowTagCmpBlock, trMemory()));
953
cfg->addEdge(TR::CFGEdge::createEdge(lowTagCmpBlock, indirectAccessBlock, trMemory()));
954
cfg->addEdge(TR::CFGEdge::createEdge(isArrayBlock, conversionNeeded ? arrayDirectAccessBlock : directAccessBlock, trMemory()));
955
cfg->addEdge(TR::CFGEdge::createEdge(nullComparisonBlock, isArrayBlock, trMemory()));
956
957
debugTrace(tracer(), "\t In genCodeForUnsafeGetPut, Block %d created for array check\n", isArrayBlock->getNumber());
958
}
959
else
960
{
961
// Following sequence of code generate isClassTest.
962
// ifacmpeq goto indirectAccess
963
// aload vft-symbol
964
// aconst J9Class of java/lang/Class
965
966
// Note for loadJavaLangClass node:
967
// AOT Relocation relies on guards to locate correct method for a call site, while inlined Unsafe calls do not have a guard.
968
// Therefore J9Class of java/lang/Class cannot be relocated correctly. Inserting any guard for an inlined Unsafe call is potentially
969
// expensive, especially for applications that intensively use Unsafe, such as Apache Spark.
970
// As a compromise, the node is given such BCI as if it is generated from the out-most call, so that J9Class can be correctly
971
// relocated without any guard.
972
TR::Node *loadJavaLangClass = TR::Node::createAddressNode(vftLoad, TR::aconst,(uintptr_t) javaLangClass);
973
loadJavaLangClass->getByteCodeInfo().setInvalidCallerIndex();
974
loadJavaLangClass->getByteCodeInfo().setZeroByteCodeIndex();
975
loadJavaLangClass->setIsClassPointerConstant(true);
976
977
TR::Node *isClassNode = TR::Node::createif(TR::ifacmpeq, vftLoad, loadJavaLangClass, NULL);
978
isClassTreeTop = TR::TreeTop::create(comp(), isClassNode, NULL, NULL);
979
isClassBlock = TR::Block::createEmptyBlock(vftLoad, comp(), directAccessBlock->getFrequency());
980
isClassBlock->append(isClassTreeTop);
981
cfg->addNode(isClassBlock);
982
directAccessBlock->getEntry()->insertTreeTopsBeforeMe(isClassBlock->getEntry(), isClassBlock->getExit());
983
lowTagCmpTree->getNode()->setBranchDestination(directAccessBlock->getEntry());
984
isClassNode->setBranchDestination(indirectAccessBlock->getEntry());
985
isClassBlock->getEntry()->insertTreeTopsBeforeMe(lowTagCmpBlock->getEntry(), lowTagCmpBlock->getExit());
986
cfg->addEdge(TR::CFGEdge::createEdge(isClassBlock,directAccessBlock, trMemory()));
987
cfg->addEdge(TR::CFGEdge::createEdge(isClassBlock,indirectAccessBlock, trMemory()));
988
cfg->addEdge(TR::CFGEdge::createEdge(nullComparisonBlock, lowTagCmpBlock, trMemory()));
989
cfg->addEdge(TR::CFGEdge::createEdge(lowTagCmpBlock, isClassBlock, trMemory()));
990
991
debugTrace(tracer(), "\t In genCodeForUnsafeGetPut, Block %d created for isClass Test\n", isClassBlock->getNumber());
992
}
993
cfg->addEdge(TR::CFGEdge::createEdge(lowTagCmpBlock, directAccessBlock, trMemory()));
994
//Generating treetop and block for array check
995
cfg->removeEdge(nullComparisonBlock, indirectAccessBlock);
996
if (needNullCheck)
997
{
998
TR::TreeTop *treeBeforeCmp = nullComparisonTree->getPrevTreeTop();
999
TR::TreeTop *nullchkTree =
1000
TR::TreeTop::create(comp(), treeBeforeCmp,
1001
TR::Node::createWithSymRef(TR::NULLCHK, 1, 1,
1002
TR::Node::create(TR::PassThrough, 1,
1003
TR::Node::createWithSymRef(unsafeAddress,
1004
comp()->il.opCodeForDirectLoad(unsafeAddress->getDataType()),
1005
0, newSymbolReferenceForAddress)),
1006
comp()->getSymRefTab()->findOrCreateNullCheckSymbolRef(comp()->getMethodSymbol())
1007
)
1008
);
1009
nullchkTree->getNode()->getByteCodeInfo().setCallerIndex(comp()->getCurrentInlinedSiteIndex());
1010
}
1011
1012
if (!isUnsafeGet && joinBlock && orderedCallNode)
1013
{
1014
TR::TreeTop *orderedCallTree = TR::TreeTop::create(comp(), orderedCallNode);
1015
joinBlock->prepend(orderedCallTree);
1016
}
1017
}
1018
1019
/*
1020
Converting Unsafe.get/put* routines into inlined code involves two cases:
1021
1) if the size of the element to put/get is 4 bytes or more
1022
2) if the size of the element to put/get is less than 4 bytes (boolean, byte, char, short)
1023
1024
In (1), there are two alternatives on how to read from/write to the object: direct and
1025
indirect write/read. The selection of alternatives is done by looking at three conditions:
1026
a) whether the object is NULL
1027
b) whether the object is array
1028
c) whether the object is of type java.lang.Class
1029
The pseudocode of the generated inline code for case (1) under normal compilation is :
1030
if (object == NULL)
1031
use direct access
1032
else if (offset is not low tagged)
1033
use direct access
1034
else if (object is type of java/lang/Class)
1035
use indirect access
1036
else
1037
use direct access
1038
1039
If we can not get the J9Class of java/lang/Class, we generate following sequence of tests
1040
if (object == NULL)
1041
use direct access
1042
else if (object it Array type)
1043
use direct access
1044
else if (offset is low tagged)
1045
use indirect access
1046
else
1047
use direct access
1048
1049
1050
In (2), there are three alternatives on how to read from/write the object. direct,
1051
direct with conversion, indirect. The same three conditions are used to decide which one
1052
to use based on the following pseudocode:
1053
if (object is NULL)
1054
use direct access with conversion
1055
else if (object is array)
1056
use direct access with conversion
1057
else if (object is of type Class)
1058
use indirect access
1059
else
1060
use direct access
1061
1062
- genClassCheckForUnsafeGetPut builds the treetop for condition (c) above.
1063
- genDirectAccessCodeForUnsafeGetPut completes the building of treetop for both "direct access" and
1064
"direct access with conversion"
1065
- genIndirectAccessCodeForUnsafeGetPut builds the treetop for indirect access
1066
- addNullCheckForUnsafeGetPut builds node for NULLness check (condition (a) above) and
1067
builds a diamond CFG based on that. The CFG will be completed in later stages.
1068
- createAnchorNodesForUnsafeGetPut creates compressed references in case they are needed
1069
- genCodeForUnsafeGetPut completes the CFG/code by adding the array check, Class check,
1070
and the direct access code.
1071
1072
Note that in case (2), i.e., when the conversion is needed, we generate code like the
1073
following for the "direct access with conversion" for Unsafe.getByte
1074
b2i
1075
ibload
1076
aiadd
1077
while the direct access code looks like
1078
iiload
1079
aiadd
1080
We will replace b2i and ibload by c2iu and icload for Unsafe.getChar, by
1081
s2i and isload for Unsafe.getShort, and by bu2i and ibload for Unsafe.getBoolean
1082
1083
For Unsafe.putByte and Unsafe.putBoolean, we generate
1084
ibstore
1085
i2b
1086
<some load node>
1087
We replace i2b and ibstore by i2c and icstore for Unsafe.getChar, and by i2s and isstore for
1088
Unsafe.getShort.
1089
*/
1090
1091
bool
1092
TR_J9InlinerPolicy::createUnsafePutWithOffset(TR::ResolvedMethodSymbol *calleeSymbol, TR::ResolvedMethodSymbol *callerSymbol, TR::TreeTop * callNodeTreeTop, TR::Node * unsafeCall, TR::DataType type, bool isVolatile, bool needNullCheck, bool isOrdered)
1093
{
1094
if (isVolatile && type == TR::Int64 && comp()->target().is32Bit() && !comp()->cg()->getSupportsInlinedAtomicLongVolatiles())
1095
return false;
1096
if (debug("traceUnsafe"))
1097
printf("createUnsafePutWithOffset %d in %s\n", type.getDataType(), comp()->signature());
1098
1099
debugTrace(tracer(), "\tcreateUnsafePutWithOffset. call tree %p offset(datatype) %d isvolatile %d needNullCheck %d isOrdered %d\n", callNodeTreeTop, type.getDataType(), isVolatile, needNullCheck, isOrdered);
1100
1101
// Truncate the value before inlining the call
1102
if (TR_J9MethodBase::isUnsafeGetPutBoolean(calleeSymbol->getRecognizedMethod()))
1103
{
1104
TR::TransformUtil::truncateBooleanForUnsafeGetPut(comp(), callNodeTreeTop);
1105
}
1106
1107
// Preserve null check on the unsafe object
1108
TR::TransformUtil::separateNullCheck(comp(), callNodeTreeTop, tracer()->debugLevel());
1109
1110
// Since the block has to be split, we need to create temps for the arguments to the call
1111
for (int i = 0; i < unsafeCall->getNumChildren(); i++)
1112
{
1113
TR::Node* child = unsafeCall->getChild(i);
1114
TR::Node* newChild = TR::TransformUtil::saveNodeToTempSlot(comp(), child, callNodeTreeTop);
1115
unsafeCall->setAndIncChild(i, newChild);
1116
child->recursivelyDecReferenceCount();
1117
}
1118
1119
TR::Node *offset = unsafeCall->getChild(2);
1120
TR::TreeTop *prevTreeTop = callNodeTreeTop->getPrevTreeTop();
1121
TR::SymbolReference *newSymbolReferenceForAddress = unsafeCall->getChild(1)->getSymbolReference();
1122
TR::SymbolReference * symRef = comp()->getSymRefTab()->findOrCreateUnsafeSymbolRef(type, true, false, isVolatile);
1123
TR::Node *orderedCallNode = NULL;
1124
1125
if (isOrdered)
1126
{
1127
symRef->getSymbol()->setOrdered();
1128
orderedCallNode = callNodeTreeTop->getNode()->duplicateTree();
1129
orderedCallNode->getFirstChild()->setDontInlinePutOrderedCall();
1130
1131
debugTrace(tracer(), "\t Duplicate Tree for ordered call, orderedCallNode = %p\n", orderedCallNode);
1132
}
1133
1134
static char *disableIllegalWriteReport = feGetEnv("TR_DisableIllegalWriteReport");
1135
TR::TreeTop* reportFinalFieldModification = NULL;
1136
if (!disableIllegalWriteReport && !comp()->getOption(TR_DisableGuardedStaticFinalFieldFolding))
1137
{
1138
reportFinalFieldModification = TR::TransformUtil::generateReportFinalFieldModificationCallTree(comp(), unsafeCall->getArgument(1)->duplicateTree());
1139
}
1140
1141
TR::Node * unsafeAddress = createUnsafeAddressWithOffset(unsafeCall);
1142
if (tracer()->debugLevel())
1143
{
1144
debugTrace(tracer(), "\t After createUnsafeAddressWithOffset, unsafeAddress = %p : \n", unsafeAddress);
1145
TR::TreeTop *tmpUnsafeAddressTT = TR::TreeTop::create(comp(), unsafeAddress);
1146
comp()->getDebug()->print(comp()->getOutFile(), tmpUnsafeAddressTT);
1147
}
1148
1149
TR::Node* valueWithoutConversion = unsafeCall->getChild(3);
1150
TR::Node* valueWithConversion = NULL;
1151
TR::Node* unsafeNodeWithConversion = NULL;
1152
1153
debugTrace(tracer(), "\tvalueWithouTConversion = %p\n", valueWithoutConversion);
1154
1155
1156
bool conversionNeeded = comp()->fe()->dataTypeForLoadOrStore(type) != type;
1157
if (conversionNeeded)
1158
{
1159
TR::ILOpCodes conversionOpCode =
1160
TR::ILOpCode::getProperConversion(comp()->fe()->dataTypeForLoadOrStore(type), type, true);
1161
TR::Node* conversionNode = TR::Node::create(conversionOpCode,
1162
1, valueWithoutConversion);
1163
valueWithConversion = conversionNode;
1164
unsafeNodeWithConversion = type == TR::Address && (TR::Compiler->om.writeBarrierType() != gc_modron_wrtbar_none)
1165
? TR::Node::createWithSymRef(TR::awrtbari, 3, 3, unsafeAddress, valueWithConversion, unsafeCall->getChild(1), symRef)
1166
: TR::Node::createWithSymRef(comp()->il.opCodeForIndirectArrayStore(type), 2, 2, unsafeAddress, valueWithConversion, symRef);
1167
1168
debugTrace(tracer(), "\tConversion is Needed, conversionNode = %p unsafeNodeWithConversion = %p valueWithConversion = %p\n", conversionNode, unsafeNodeWithConversion, valueWithConversion);
1169
1170
}
1171
TR::Node * unsafeNode = type == TR::Address && (TR::Compiler->om.writeBarrierType() != gc_modron_wrtbar_none)
1172
? TR::Node::createWithSymRef(TR::awrtbari, 3, 3, unsafeAddress, valueWithoutConversion, unsafeCall->getChild(1), symRef)
1173
: TR::Node::createWithSymRef(comp()->il.opCodeForIndirectStore(type), 2, 2, unsafeAddress, valueWithoutConversion, symRef);
1174
1175
1176
TR::TreeTop *oldCallNodeTreeTop = 0; // For Tracing Purposes Only
1177
if (tracer()->debugLevel())
1178
oldCallNodeTreeTop = TR::TreeTop::create(comp(),unsafeCall);
1179
1180
callNodeTreeTop->setNode(unsafeNode);
1181
1182
if (tracer()->debugLevel())
1183
{
1184
debugTrace(tracer(), "\t After callNodeTreeTop setNode callNodeTreeTop dump:\n");
1185
comp()->getDebug()->print(comp()->getOutFile(), callNodeTreeTop);
1186
debugTrace(tracer(), "\t After callNodeTreeTop setNode oldCallNodeTreeTop dump oldCallNodeTreeTop->getNode->getChild = %p:\n", oldCallNodeTreeTop->getNode() ? oldCallNodeTreeTop->getNode()->getFirstChild() : 0);
1187
comp()->getDebug()->print(comp()->getOutFile(), oldCallNodeTreeTop);
1188
}
1189
1190
1191
TR::TreeTop* directAccessTreeTop = genDirectAccessCodeForUnsafeGetPut(unsafeNode, false, false);
1192
1193
if (tracer()->debugLevel())
1194
{
1195
debugTrace(tracer(), "\t After genDirectAccessCodeForUnsafeGetPut, directAccessTreeTop dump:\n");
1196
comp()->getDebug()->print(comp()->getOutFile(), directAccessTreeTop);
1197
}
1198
1199
TR::TreeTop* arrayDirectAccessTreeTop = conversionNeeded
1200
? genDirectAccessCodeForUnsafeGetPut(unsafeNodeWithConversion, conversionNeeded, false)
1201
: NULL;
1202
1203
if (tracer()->debugLevel() && conversionNeeded)
1204
{
1205
debugTrace(tracer(), "\t After genDirectAccessCodeForUnsafeGetPut, arrayDirectAccessTreeTop dump:\n");
1206
comp()->getDebug()->print(comp()->getOutFile(), arrayDirectAccessTreeTop);
1207
}
1208
1209
TR::TreeTop* indirectAccessTreeTop = genIndirectAccessCodeForUnsafeGetPut(callNodeTreeTop->getNode(), unsafeAddress);
1210
1211
if (tracer()->debugLevel())
1212
{
1213
debugTrace(tracer(), "\t After genIndirectAccessCodeForUnsafeGetPut, indirectAccessTreeTop dump:\n");
1214
comp()->getDebug()->print(comp()->getOutFile(), indirectAccessTreeTop);
1215
}
1216
1217
if (indirectAccessTreeTop && indirectAccessTreeTop->getNode() && indirectAccessTreeTop->getNode()->getOpCode().isWrtBar())
1218
{
1219
debugTrace(tracer(), "Setting node %p as an unsafe static wrtbar\n", indirectAccessTreeTop->getNode());
1220
indirectAccessTreeTop->getNode()->setIsUnsafeStaticWrtBar(true);
1221
}
1222
1223
TR_OpaqueClassBlock *javaLangClass = comp()->getClassClassPointer(/* isVettedForAOT = */ true);
1224
// If we are not able to get javaLangClass it is still inefficient to put direct Access far
1225
// So in that case we will generate lowTagCmpTest to branch to indirect access if true
1226
bool needNotLowTagged = javaLangClass != NULL || conversionNeeded ;
1227
TR::TreeTop *lowTagCmpTree = genClassCheckForUnsafeGetPut(offset, needNotLowTagged);
1228
1229
if (tracer()->debugLevel())
1230
{
1231
debugTrace(tracer(), "\t After genClassCheckForUnsafeGetPut, lowTagCmpTree dump:\n");
1232
comp()->getDebug()->print(comp()->getOutFile(), lowTagCmpTree);
1233
}
1234
1235
TR::Block * joinBlock =
1236
addNullCheckForUnsafeGetPut(unsafeAddress, newSymbolReferenceForAddress,
1237
callNodeTreeTop, directAccessTreeTop,
1238
arrayDirectAccessTreeTop,
1239
indirectAccessTreeTop);
1240
1241
debugTrace(tracer(), "\t After addNullCHeckForUnsafeGetPut, joinBlock is %d\n", joinBlock->getNumber());
1242
1243
createAnchorNodesForUnsafeGetPut(directAccessTreeTop, type, false);
1244
if (arrayDirectAccessTreeTop)
1245
createAnchorNodesForUnsafeGetPut(arrayDirectAccessTreeTop, type, false);
1246
genCodeForUnsafeGetPut(unsafeAddress, callNodeTreeTop,
1247
prevTreeTop, newSymbolReferenceForAddress,
1248
directAccessTreeTop,
1249
lowTagCmpTree, needNullCheck, false, conversionNeeded,
1250
joinBlock, javaLangClass, orderedCallNode);
1251
1252
1253
// Test for static final field
1254
if (reportFinalFieldModification)
1255
{
1256
TR::Block* storeToStaticFieldBlock = indirectAccessTreeTop->getEnclosingBlock();
1257
auto isFinalStaticNode = TR::Node::createif(TR::iflcmpeq,
1258
TR::Node::create(TR::land, 2, offset->duplicateTree(), TR::Node::lconst(J9_SUN_FINAL_FIELD_OFFSET_TAG)),
1259
TR::Node::lconst(J9_SUN_FINAL_FIELD_OFFSET_TAG),
1260
NULL /*branchTarget*/);
1261
auto isFinalStaticTreeTop = TR::TreeTop::create(comp(), isFinalStaticNode);
1262
1263
TR::TransformUtil::createConditionalAlternatePath(comp(), isFinalStaticTreeTop, reportFinalFieldModification, storeToStaticFieldBlock, storeToStaticFieldBlock, comp()->getMethodSymbol()->getFlowGraph(), true /*markCold*/);
1264
1265
debugTrace(tracer(), "Created isFinal test node n%dn whose branch target is Block_%d to report illegal write to static final field\n",
1266
isFinalStaticNode->getGlobalIndex(), reportFinalFieldModification->getEnclosingBlock()->getNumber());
1267
1268
TR::DebugCounter::prependDebugCounter(comp(),
1269
TR::DebugCounter::debugCounterName(comp(),
1270
"illegalWriteReport/put/(%s %s)",
1271
comp()->signature(),
1272
comp()->getHotnessName(comp()->getMethodHotness())),
1273
reportFinalFieldModification->getNextTreeTop());
1274
1275
}
1276
1277
unsafeCall->recursivelyDecReferenceCount();
1278
return true;
1279
}
1280
1281
1282
TR::Node *
1283
TR_J9InlinerPolicy::createUnsafeMonitorOp(TR::ResolvedMethodSymbol *calleeSymbol, TR::ResolvedMethodSymbol *callerSymbol, TR::TreeTop * callNodeTreeTop, TR::Node * unsafeCall, bool isEnter)
1284
{
1285
bool isDirectJNI = unsafeCall->isPreparedForDirectJNI();
1286
// Expecting directToJNI to have loadaddr children, if not then we had better bail out
1287
if (isDirectJNI && unsafeCall->getChild(1)->getOpCodeValue() != TR::loadaddr)
1288
{
1289
traceMsg(comp(),"Unsafe Inlining: The Unsafe.monitorEnter/Exit() children are not loadaddr's as expected. Not inlining.\n");
1290
return unsafeCall;
1291
}
1292
1293
TR::Node::recreate(callNodeTreeTop->getNode(), TR::NULLCHK);
1294
callNodeTreeTop->getNode()->setSymbolReference(comp()->getSymRefTab()->findOrCreateNullCheckSymbolRef(comp()->getMethodSymbol()));
1295
1296
if (isEnter)
1297
{
1298
TR::Node::recreate(unsafeCall, TR::monent);
1299
TR::SymbolReference * monitorEnterSymbolRef = comp()->getSymRefTab()->findOrCreateMonitorEntrySymbolRef(comp()->getMethodSymbol());
1300
unsafeCall->setSymbolReference(monitorEnterSymbolRef);
1301
}
1302
else
1303
{
1304
TR::Node::recreate(unsafeCall, TR::monexit);
1305
TR::SymbolReference * monitorExitSymbolRef = comp()->getSymRefTab()->findOrCreateMonitorExitSymbolRef(comp()->getMethodSymbol());
1306
unsafeCall->setSymbolReference(monitorExitSymbolRef);
1307
}
1308
1309
TR::Node *oldChild = unsafeCall->getChild(0);
1310
// Anchor the unused oldChild
1311
callNodeTreeTop->insertBefore(TR::TreeTop::create(comp(), TR::Node::create(oldChild, TR::treetop, 1, oldChild)));
1312
if (isDirectJNI)
1313
{
1314
TR::Node::recreate(unsafeCall->getChild(1), TR::aload);
1315
}
1316
unsafeCall->setChild(0, unsafeCall->getChild(1));
1317
oldChild->recursivelyDecReferenceCount();
1318
unsafeCall->setChild(1, NULL);
1319
unsafeCall->setNumChildren(1);
1320
1321
if (!comp()->getOption(TR_DisableLiveMonitorMetadata))
1322
{
1323
TR::Node *storeNode = NULL;
1324
if (isEnter)
1325
{
1326
TR::SymbolReference * tempSymRef = comp()->getSymRefTab()->createTemporary(comp()->getMethodSymbol(), TR::Address);
1327
comp()->addAsMonitorAuto(tempSymRef, false);
1328
storeNode = TR::Node::createStore(tempSymRef, unsafeCall->getChild(0));
1329
}
1330
else
1331
{
1332
storeNode = TR::Node::create(unsafeCall,TR::monexitfence,0);
1333
}
1334
1335
1336
TR::TreeTop *storeTree = TR::TreeTop::create(comp(), storeNode);
1337
callNodeTreeTop->insertTreeTopsBeforeMe(storeTree);
1338
}
1339
1340
comp()->getMethodSymbol()->setMayContainMonitors(true);
1341
return unsafeCall;
1342
}
1343
1344
bool
1345
TR_J9InlinerPolicy::createUnsafeCASCallDiamond( TR::TreeTop *callNodeTreeTop, TR::Node *callNode)
1346
{
1347
// This method is used to create an if diamond around a call to any of the unsafe compare and swap methods
1348
// Codegens have a fast path for the compare and swaps, but cannot deal with the case where the offset value passed in to a the CAS is low tagged
1349
// (A low tagged offset value means the object being passed in is a java/lang/Class object, and we want a static field)
1350
1351
// This method assumes the offset node is of type long, and is the second child of the unsafe call.
1352
TR_InlinerDelimiter delimiter(tracer(),"createUnsafeCASCallDiamond");
1353
debugTrace(tracer(),"Transforming unsafe callNode = %p",callNode);
1354
1355
createTempsForUnsafeCall(callNodeTreeTop, callNode);
1356
1357
TR::Node *offsetNode = callNode->getChild(2);
1358
1359
TR::TreeTop *compareTree = genClassCheckForUnsafeGetPut(offsetNode);
1360
1361
// genClassCheck generates a ifcmpne offset&mask 1, meaning if it IS lowtagged (ie offset&mask == 1), the branch will be taken
1362
TR::TreeTop *ifTree = TR::TreeTop::create(comp(),callNodeTreeTop->getNode()->duplicateTree());
1363
ifTree->getNode()->getFirstChild()->setIsSafeForCGToFastPathUnsafeCall(true);
1364
1365
1366
TR::TreeTop *elseTree = TR::TreeTop::create(comp(),callNodeTreeTop->getNode()->duplicateTree());
1367
1368
1369
ifTree->getNode()->getFirstChild()->setVisitCount(_inliner->getVisitCount());
1370
elseTree->getNode()->getFirstChild()->setVisitCount(_inliner->getVisitCount());
1371
1372
1373
debugTrace(tracer(),"ifTree = %p elseTree = %p",ifTree->getNode(),elseTree->getNode());
1374
1375
1376
1377
// the call itself may be commoned, so we need to create a temp for the callnode itself
1378
TR::SymbolReference *newSymbolReference = 0;
1379
TR::DataType dataType = callNode->getDataType();
1380
if(callNode->getReferenceCount() > 1)
1381
{
1382
newSymbolReference = comp()->getSymRefTab()->createTemporary(comp()->getMethodSymbol(), dataType);
1383
TR::Node::recreate(callNode, comp()->il.opCodeForDirectLoad(dataType));
1384
callNode->setSymbolReference(newSymbolReference);
1385
callNode->removeAllChildren();
1386
1387
debugTrace(tracer(),"Unsafe call has refcount > 1. Replacing callnode with a load of symref %d",newSymbolReference->getReferenceNumber());
1388
}
1389
1390
1391
1392
1393
TR::Block *callBlock = callNodeTreeTop->getEnclosingBlock();
1394
1395
callBlock->createConditionalBlocksBeforeTree(callNodeTreeTop,compareTree, ifTree, elseTree, comp()->getFlowGraph(),false,false);
1396
1397
// the original call will be deleted by createConditionalBlocksBeforeTree, but if the refcount was > 1, we need to insert stores.
1398
1399
if (newSymbolReference)
1400
{
1401
TR::Node *ifStoreNode = TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(dataType), 1, 1, ifTree->getNode()->getFirstChild(), newSymbolReference);
1402
TR::TreeTop *ifStoreTree = TR::TreeTop::create(comp(), ifStoreNode);
1403
1404
ifTree->insertAfter(ifStoreTree);
1405
1406
debugTrace(tracer(),"Inserted store tree %p for if side of the diamond",ifStoreNode);
1407
1408
TR::Node *elseStoreNode = TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(dataType), 1, 1, elseTree->getNode()->getFirstChild(), newSymbolReference);
1409
TR::TreeTop *elseStoreTree = TR::TreeTop::create(comp(), elseStoreNode);
1410
1411
elseTree->insertAfter(elseStoreTree);
1412
1413
debugTrace(tracer(),"Inserted store tree %p for else side of the diamond",elseStoreNode);
1414
1415
}
1416
1417
1418
1419
return true;
1420
}
1421
1422
1423
1424
bool
1425
TR_J9InlinerPolicy::createUnsafeGetWithOffset(TR::ResolvedMethodSymbol *calleeSymbol, TR::ResolvedMethodSymbol *callerSymbol, TR::TreeTop * callNodeTreeTop, TR::Node * unsafeCall, TR::DataType type, bool isVolatile, bool needNullCheck)
1426
{
1427
if (isVolatile && type == TR::Int64 && comp()->target().is32Bit() && !comp()->cg()->getSupportsInlinedAtomicLongVolatiles())
1428
return false;
1429
1430
if (debug("traceUnsafe"))
1431
printf("createUnsafeGetWithOffset %s in %s\n", type.toString(), comp()->signature());
1432
1433
// Truncate the return before inlining the call
1434
if (TR_J9MethodBase::isUnsafeGetPutBoolean(calleeSymbol->getRecognizedMethod()))
1435
{
1436
TR::TransformUtil::truncateBooleanForUnsafeGetPut(comp(), callNodeTreeTop);
1437
}
1438
1439
// Preserve null check on the unsafe object
1440
TR::TransformUtil::separateNullCheck(comp(), callNodeTreeTop, tracer()->debugLevel());
1441
1442
TR::Node *unsafeAddress = unsafeCall->getChild(1);
1443
TR::Node *offset = unsafeCall->getChild(2);
1444
1445
TR::TreeTop *prevTreeTop = callNodeTreeTop->getPrevTreeTop();
1446
TR::SymbolReference *newSymbolReferenceForAddress = NULL;
1447
1448
// Since the block has to be split, we need to create temps for the arguments to the call
1449
// so that the right values are picked up in the 2 blocks that are targets of the if block
1450
// created for the inlining of the unsafe method
1451
//
1452
createTempsForUnsafePutGet(unsafeAddress, unsafeCall, callNodeTreeTop,
1453
offset, newSymbolReferenceForAddress, true);
1454
unsafeAddress = createUnsafeAddressWithOffset(unsafeCall);
1455
1456
for (int32_t j=0; j<unsafeCall->getNumChildren(); j++)
1457
unsafeCall->getChild(j)->recursivelyDecReferenceCount();
1458
unsafeCall->setNumChildren(1);
1459
1460
TR::SymbolReference* symRef = comp()->getSymRefTab()->findOrCreateUnsafeSymbolRef(type, true, false, isVolatile);
1461
bool conversionNeeded = comp()->fe()->dataTypeForLoadOrStore(type) != type;
1462
TR_ASSERT(unsafeCall == callNodeTreeTop->getNode()->getFirstChild(), "assumption not valid\n");
1463
TR::Node* unsafeCallWithConversion = NULL;
1464
TR::Node* callNodeWithConversion = NULL;
1465
if (conversionNeeded)
1466
{
1467
TR::Node* loadNode = TR::Node::createWithSymRef(comp()->il.opCodeForIndirectArrayLoad(type),
1468
1, 1, unsafeAddress, symRef);
1469
1470
bool unsignedType;
1471
1472
switch (calleeSymbol->getRecognizedMethod()) {
1473
//boolean and char are unsigned so we need an unsigned conversion
1474
case TR::sun_misc_Unsafe_getBoolean_jlObjectJ_Z:
1475
case TR::sun_misc_Unsafe_putBooleanVolatile_jlObjectJZ_V:
1476
case TR::sun_misc_Unsafe_getBooleanVolatile_jlObjectJ_Z:
1477
1478
case TR::sun_misc_Unsafe_getChar_jlObjectJ_C:
1479
case TR::sun_misc_Unsafe_getCharVolatile_jlObjectJ_C:
1480
case TR::sun_misc_Unsafe_getChar_J_C:
1481
unsignedType = true;
1482
break;
1483
//byte and short are signed so we need a signed conversion
1484
case TR::sun_misc_Unsafe_getByte_jlObjectJ_B:
1485
case TR::sun_misc_Unsafe_getByte_J_B:
1486
case TR::sun_misc_Unsafe_getByteVolatile_jlObjectJ_B:
1487
1488
case TR::sun_misc_Unsafe_getShort_jlObjectJ_S:
1489
case TR::sun_misc_Unsafe_getShortVolatile_jlObjectJ_S:
1490
case TR::sun_misc_Unsafe_getShort_J_S:
1491
unsignedType = false;
1492
break;
1493
default:
1494
TR_ASSERT(false, "all TR::sun_misc_Unsafe.get* methods must be handled.");
1495
}
1496
1497
TR::ILOpCodes conversionOpCode =
1498
TR::ILOpCode::getProperConversion(type,
1499
comp()->fe()->dataTypeForLoadOrStore(type),
1500
unsignedType);
1501
unsafeCallWithConversion = TR::Node::create(conversionOpCode, 1, loadNode);
1502
}
1503
1504
unsafeAddress->incReferenceCount();
1505
TR::Node::recreate(unsafeCall, comp()->il.opCodeForIndirectLoad(type));
1506
unsafeCall->setChild(0, unsafeAddress);
1507
unsafeCall->setSymbolReference(symRef);
1508
1509
TR::Node::recreate(callNodeTreeTop->getNode(), TR::treetop);
1510
TR::DataType dataType = unsafeCall->getDataType();
1511
TR::SymbolReference *newTemp = comp()->getSymRefTab()->createTemporary(comp()->getMethodSymbol(), dataType);
1512
TR::ILOpCodes storeOpCode = comp()->il.opCodeForDirectStore(unsafeCall->getDataType());
1513
TR::Node::recreate(callNodeTreeTop->getNode(), storeOpCode);
1514
callNodeTreeTop->getNode()->setSymbolReference(newTemp);
1515
1516
if (conversionNeeded)
1517
{
1518
callNodeWithConversion =
1519
TR::Node::createWithSymRef(storeOpCode, 1, 1, unsafeCallWithConversion, newTemp);
1520
}
1521
1522
TR::TreeTop* directAccessTreeTop =
1523
genDirectAccessCodeForUnsafeGetPut(callNodeTreeTop->getNode(), false, true);
1524
TR::TreeTop* arrayDirectAccessTreeTop = conversionNeeded
1525
? genDirectAccessCodeForUnsafeGetPut(callNodeWithConversion, conversionNeeded, true)
1526
: NULL;
1527
TR::TreeTop* indirectAccessTreeTop =
1528
genIndirectAccessCodeForUnsafeGetPut(callNodeTreeTop->getNode(), unsafeAddress);
1529
// If we are not able to get javaLangClass it is still inefficient to put direct Access far
1530
// So in that case we will generate lowTagCmpTest to branch to indirect access if true
1531
TR_OpaqueClassBlock *javaLangClass = comp()->fe()->getClassFromSignature("Ljava/lang/Class;",17, comp()->getCurrentMethod(),true);
1532
bool needNotLowTagged = javaLangClass != NULL || conversionNeeded ;
1533
// If we can get a J9Class or we need conversion we generate test to branch to direct access if low bit is not tagged
1534
// Else in case we get NULL instead of j9Class we generate test to branch to indirect access if low bit is tagged
1535
TR::TreeTop *lowTagCmpTree = genClassCheckForUnsafeGetPut(offset, needNotLowTagged);
1536
1537
TR::Block * joinBlock =
1538
addNullCheckForUnsafeGetPut(unsafeAddress, newSymbolReferenceForAddress,
1539
callNodeTreeTop, directAccessTreeTop,
1540
arrayDirectAccessTreeTop,
1541
indirectAccessTreeTop);
1542
1543
createAnchorNodesForUnsafeGetPut(directAccessTreeTop, type, true);
1544
if (arrayDirectAccessTreeTop)
1545
createAnchorNodesForUnsafeGetPut(arrayDirectAccessTreeTop, type, true);
1546
genCodeForUnsafeGetPut(unsafeAddress, callNodeTreeTop, prevTreeTop,
1547
newSymbolReferenceForAddress, directAccessTreeTop,
1548
lowTagCmpTree, needNullCheck, true, conversionNeeded,
1549
joinBlock, javaLangClass);
1550
1551
for (int32_t j=0; j<unsafeCall->getNumChildren(); j++)
1552
unsafeCall->getChild(j)->recursivelyDecReferenceCount();
1553
unsafeCall->setNumChildren(0);
1554
TR::Node::recreate(unsafeCall, comp()->il.opCodeForDirectLoad(unsafeCall->getDataType()));
1555
unsafeCall->setSymbolReference(newTemp);
1556
return true;
1557
1558
}
1559
1560
TR::Node *
1561
TR_J9InlinerPolicy::createUnsafeAddress(TR::Node * unsafeCall)
1562
{
1563
if (comp()->target().is64Bit())
1564
return unsafeCall->getChild(1); // should use l2a if we ever have one
1565
1566
return TR::Node::create(TR::l2i, 1, unsafeCall->getChild(1)); // should use i2a if we ever have one
1567
}
1568
1569
bool
1570
TR_J9InlinerPolicy::createUnsafePut(TR::ResolvedMethodSymbol *calleeSymbol, TR::ResolvedMethodSymbol *callerSymbol, TR::TreeTop * callNodeTreeTop, TR::Node * unsafeCall, TR::DataType type, bool compress)
1571
{
1572
if (debug("traceUnsafe"))
1573
printf("createUnsafePut %s in %s\n", type.toString(), comp()->signature());
1574
1575
// Preserve null check on the unsafe object
1576
TR::TransformUtil::separateNullCheck(comp(), callNodeTreeTop, tracer()->debugLevel());
1577
1578
TR::Node * address = createUnsafeAddress(unsafeCall);
1579
1580
TR::Node * value = unsafeCall->getChild(2);
1581
1582
TR::Node * unsafeNode;
1583
if (type == TR::Address)
1584
{
1585
if (comp()->target().is64Bit())
1586
{
1587
unsafeNode = TR::Node::createWithSymRef(TR::lstorei, 2, 2, address, value, comp()->getSymRefTab()->findOrCreateUnsafeSymbolRef(TR::Int64));
1588
}
1589
else
1590
{
1591
unsafeNode = TR::Node::create(TR::l2i, 1, value);
1592
unsafeNode = TR::Node::createWithSymRef(TR::istorei, 2, 2, address, unsafeNode, comp()->getSymRefTab()->findOrCreateUnsafeSymbolRef(TR::Int32));
1593
}
1594
}
1595
else
1596
{
1597
switch (type)
1598
{
1599
case TR::Int8:
1600
value = TR::Node::create(TR::i2b, 1, value);
1601
break;
1602
case TR::Int16:
1603
value = TR::Node::create(TR::i2s, 1, value);
1604
break;
1605
default:
1606
break;
1607
}
1608
unsafeNode = TR::Node::createWithSymRef(comp()->il.opCodeForIndirectArrayStore(type), 2, 2, address, value, comp()->getSymRefTab()->findOrCreateUnsafeSymbolRef(type));
1609
}
1610
1611
if (compress && comp()->useCompressedPointers() &&
1612
(type == TR::Address))
1613
{
1614
unsafeNode = genCompressedRefs(unsafeNode, false, -1);
1615
}
1616
1617
callNodeTreeTop->setNode(unsafeNode);
1618
unsafeCall->recursivelyDecReferenceCount();
1619
1620
return true;
1621
1622
}
1623
1624
bool
1625
TR_J9InlinerPolicy::createUnsafeGet(TR::ResolvedMethodSymbol *calleeSymbol, TR::ResolvedMethodSymbol *callerSymbol, TR::TreeTop * callNodeTreeTop, TR::Node * unsafeCall, TR::DataType type, bool compress)
1626
{
1627
if (debug("traceUnsafe"))
1628
printf("createUnsafeGet %s in %s\n", type.toString(), comp()->signature());
1629
1630
// Preserve null check on the unsafe object
1631
TR::TransformUtil::separateNullCheck(comp(), callNodeTreeTop, tracer()->debugLevel());
1632
1633
TR::Node * unsafeAddress = createUnsafeAddress(unsafeCall);
1634
1635
TR::Node * unsafeNode;
1636
if (type == TR::Address)
1637
{
1638
if (comp()->target().is64Bit())
1639
{
1640
unsafeAddress->incReferenceCount();
1641
1642
int32_t j;
1643
for (j=0; j<unsafeCall->getNumChildren(); j++)
1644
unsafeCall->getChild(j)->recursivelyDecReferenceCount();
1645
1646
unsafeCall->setNumChildren(1);
1647
TR::Node::recreate(unsafeCall, TR::lloadi);
1648
unsafeCall->setSymbolReference(comp()->getSymRefTab()->findOrCreateUnsafeSymbolRef(TR::Int64));
1649
unsafeCall->setChild(0, unsafeAddress);
1650
unsafeNode = unsafeCall;
1651
}
1652
else
1653
{
1654
unsafeNode = TR::Node::createWithSymRef(TR::iloadi, 1, 1, unsafeAddress, comp()->getSymRefTab()->findOrCreateUnsafeSymbolRef(TR::Int32));
1655
1656
unsafeNode->incReferenceCount();
1657
1658
int32_t j;
1659
for (j=0; j<unsafeCall->getNumChildren(); j++)
1660
unsafeCall->getChild(j)->recursivelyDecReferenceCount();
1661
1662
unsafeCall->setNumChildren(1);
1663
TR::Node::recreate(unsafeCall, TR::iu2l);
1664
unsafeCall->setChild(0, unsafeNode);
1665
}
1666
}
1667
else
1668
{
1669
bool unsignedType = false;
1670
bool needConversion = false;
1671
switch (type)
1672
{
1673
case TR::Int8:
1674
case TR::Int16:
1675
needConversion = true;
1676
break;
1677
default:
1678
break;
1679
}
1680
1681
switch (calleeSymbol->getRecognizedMethod())
1682
{
1683
case TR::sun_misc_Unsafe_getChar_J_C:
1684
unsignedType = true;
1685
break;
1686
default:
1687
break;
1688
}
1689
1690
if (needConversion)
1691
unsafeNode = TR::Node::createWithSymRef(comp()->il.opCodeForIndirectArrayLoad(type), 1, 1, unsafeAddress, comp()->getSymRefTab()->findOrCreateUnsafeSymbolRef(type));
1692
else
1693
unsafeNode = unsafeAddress;
1694
1695
unsafeNode->incReferenceCount();
1696
1697
int32_t j;
1698
for (j=0; j<unsafeCall->getNumChildren(); j++)
1699
unsafeCall->getChild(j)->recursivelyDecReferenceCount();
1700
1701
unsafeCall->setNumChildren(1);
1702
1703
switch (type)
1704
{
1705
case TR::Int8:
1706
TR::Node::recreate(unsafeCall, TR::b2i);
1707
break;
1708
case TR::Int16:
1709
if(unsignedType)
1710
TR::Node::recreate(unsafeCall, TR::su2i);
1711
else
1712
TR::Node::recreate(unsafeCall, TR::s2i);
1713
break;
1714
default:
1715
break;
1716
}
1717
1718
if (!needConversion)
1719
{
1720
TR::Node::recreate(unsafeCall, comp()->il.opCodeForIndirectArrayLoad(type));
1721
unsafeCall->setSymbolReference(comp()->getSymRefTab()->findOrCreateUnsafeSymbolRef(type));
1722
}
1723
1724
unsafeCall->setChild(0, unsafeNode);
1725
}
1726
1727
TR::TreeTop *compRefTT = NULL;
1728
if (compress && comp()->useCompressedPointers() &&
1729
(type == TR::Address))
1730
{
1731
// create the anchor node
1732
compRefTT = TR::TreeTop::create(comp(), genCompressedRefs(unsafeCall, false));
1733
}
1734
1735
if (compRefTT)
1736
{
1737
TR::TreeTop *prevTT = callNodeTreeTop->getPrevTreeTop();
1738
prevTT->join(compRefTT);
1739
}
1740
1741
TR::Node::recreate(callNodeTreeTop->getNode(), TR::treetop);
1742
1743
return true;
1744
1745
}
1746
1747
bool
1748
TR_J9InlinerPolicy::createUnsafeFence(TR::TreeTop *callNodeTreeTop, TR::Node *callNode, TR::ILOpCodes opCode)
1749
{
1750
TR::Node *fenceNode = TR::Node::createWithSymRef(callNode, opCode, 0, callNode->getSymbolReference());
1751
TR::Node::recreate(callNode, TR::PassThrough);
1752
TR::TreeTop *fenceTop = TR::TreeTop::create(comp(), fenceNode);
1753
callNodeTreeTop->insertAfter(fenceTop);
1754
return true;
1755
}
1756
1757
TR::Node *
1758
TR_J9InlinerPolicy::inlineGetClassAccessFlags(TR::ResolvedMethodSymbol *calleeSymbol, TR::ResolvedMethodSymbol *callerSymbol, TR::TreeTop * callNodeTreeTop, TR::Node * callNode)
1759
{
1760
if (
1761
comp()->getOption(TR_DisableInliningOfNatives) ||
1762
calleeSymbol->castToMethodSymbol()->getRecognizedMethod() != TR::sun_reflect_Reflection_getClassAccessFlags)
1763
return 0;
1764
1765
TR::Block * block = callNodeTreeTop->getEnclosingBlock();
1766
1767
TR::SymbolReference * modifiersSymRef = comp()->getSymRefTab()->createTemporary(callerSymbol, callNode->getDataType());
1768
1769
// generating "modifiers = J9VM_J9CLASS_FROM_HEAPCLASS(vmThread, *(j9object_t*)clazzRef)->romClass->modifiers;"
1770
1771
TR::Node *j9cNode;
1772
1773
if(callNode->isPreparedForDirectJNI())
1774
j9cNode = callNode->getSecondChild();
1775
else
1776
j9cNode = callNode->getFirstChild();
1777
1778
TR::Node::recreate(j9cNode, TR::aload);
1779
1780
j9cNode = TR::Node::createWithSymRef(TR::aloadi, 1, 1, j9cNode, comp()->getSymRefTab()->findOrCreateClassFromJavaLangClassSymbolRef());
1781
1782
TR::Node *nullCheckNode = TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, j9cNode, comp()->getSymRefTab()->findOrCreateNullCheckSymbolRef(callerSymbol));
1783
TR::TreeTop *nullCheckTree = TR::TreeTop::create(comp(), nullCheckNode);
1784
TR::Node *romclassNode = TR::Node::createWithSymRef(TR::aloadi, 1, 1, j9cNode, comp()->getSymRefTab()->findOrCreateClassRomPtrSymbolRef());
1785
TR::Node *classAccessFlagsNode = TR::Node::createWithSymRef(TR::iloadi, 1, 1, romclassNode, comp()->getSymRefTab()->findOrCreateClassIsArraySymbolRef());
1786
TR::Node *modifiersNode = TR::Node::createStore(modifiersSymRef, classAccessFlagsNode);
1787
TR::TreeTop *modifiersTree = TR::TreeTop::create(comp(), modifiersNode);
1788
callNodeTreeTop->insertBefore(modifiersTree);
1789
modifiersTree->insertBefore(nullCheckTree);
1790
1791
/*** need to generate this:
1792
* if (modifiers & J9AccClassInternalPrimitiveType) {
1793
* modifiers = J9AccAbstract | J9AccFinal | J9AccPublic;
1794
* } else {
1795
* modifiers &= 0xFFF;
1796
* }
1797
* return modifiers;
1798
*/
1799
// generating "if (modifiers & J9AccClassInternalPrimitiveType)"
1800
TR::Node *iAndNode = TR::Node::create(TR::iand, 2,
1801
TR::Node::createLoad(callNode, modifiersSymRef),
1802
TR::Node::iconst(callNode, (int32_t)comp()->fej9()->constClassFlagsPrimitive()));
1803
TR::Node *compareNode = TR::Node::createif(TR::ificmpne,
1804
iAndNode,
1805
TR::Node::iconst(callNode, 0),
1806
0);
1807
TR::TreeTop *compareTree = TR::TreeTop::create(comp(), compareNode);
1808
1809
// generating if-then part " modifiers = J9AccAbstract | J9AccFinal | J9AccPublic;"
1810
TR::Node *modifiersIfStrNode = TR::Node::createStore(modifiersSymRef,
1811
TR::Node::iconst(callNode, (int32_t)(comp()->fej9()->constClassFlagsAbstract() | comp()->fej9()->constClassFlagsFinal() | comp()->fej9()->constClassFlagsPublic()))
1812
);
1813
TR::TreeTop *ifTree = TR::TreeTop::create(comp(), modifiersIfStrNode);
1814
1815
1816
// generating else part " modifiers &= 0xFFF;"
1817
TR::Node *modifiersIAndNode = TR::Node::create(TR::iand, 2,
1818
TR::Node::createLoad(callNode, modifiersSymRef),
1819
TR::Node::iconst(callNode, 0xFFF));
1820
TR::Node *modifiersElseStrNode = TR::Node::createStore(modifiersSymRef, modifiersIAndNode);
1821
TR::TreeTop *elseTree = TR::TreeTop::create(comp(), modifiersElseStrNode);
1822
1823
// generating " return modifiers;"
1824
// - simply convert the original call node to an iload of the modifiers
1825
TR::Node *resultNode = callNode;
1826
TR::Node::recreate(callNode, TR::iload);
1827
callNode->removeAllChildren();
1828
callNode->setSymbolReference(modifiersSymRef);
1829
1830
block->createConditionalBlocksBeforeTree(callNodeTreeTop, compareTree, ifTree, elseTree, callerSymbol->getFlowGraph(), false);
1831
1832
return resultNode;
1833
1834
}
1835
1836
bool
1837
TR_J9InlinerPolicy::inlineUnsafeCall(TR::ResolvedMethodSymbol *calleeSymbol, TR::ResolvedMethodSymbol *callerSymbol, TR::TreeTop * callNodeTreeTop, TR::Node * callNode)
1838
{
1839
debugTrace(tracer(), "Unsafe Inlining: Trying to inline Unsafe Call at Node %p\n", callNode);
1840
1841
if (comp()->getOption(TR_DisableUnsafe))
1842
return false;
1843
1844
if (!callNode->getSymbol()->isResolvedMethod())
1845
return false;
1846
1847
if (comp()->fej9()->isAnyMethodTracingEnabled(calleeSymbol->getResolvedMethod()->getPersistentIdentifier()) &&
1848
!comp()->fej9()->traceableMethodsCanBeInlined())
1849
return false;
1850
1851
if ((TR::Compiler->vm.canAnyMethodEventsBeHooked(comp()) && !comp()->fej9()->methodsCanBeInlinedEvenIfEventHooksEnabled(comp())) ||
1852
(comp()->fej9()->isAnyMethodTracingEnabled(calleeSymbol->getResolvedMethod()->getPersistentIdentifier()) &&
1853
!comp()->fej9()->traceableMethodsCanBeInlined()))
1854
return false;
1855
1856
// I am not sure if having the same type between C/S and B/Z matters here.. ie. if the type is being used as the only distinguishing factor
1857
switch (callNode->getSymbol()->castToResolvedMethodSymbol()->getRecognizedMethod())
1858
{
1859
case TR::sun_misc_Unsafe_putByte_jlObjectJB_V:
1860
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int8, false);
1861
case TR::sun_misc_Unsafe_putBoolean_jlObjectJZ_V:
1862
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int8, false);
1863
case TR::sun_misc_Unsafe_putChar_jlObjectJC_V:
1864
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int16, false);
1865
case TR::sun_misc_Unsafe_putShort_jlObjectJS_V:
1866
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int16, false);
1867
case TR::sun_misc_Unsafe_putInt_jlObjectJI_V:
1868
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int32, false);
1869
case TR::sun_misc_Unsafe_putLong_jlObjectJJ_V:
1870
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int64, false);
1871
case TR::sun_misc_Unsafe_putFloat_jlObjectJF_V:
1872
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Float, false);
1873
case TR::sun_misc_Unsafe_putDouble_jlObjectJD_V:
1874
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Double, false);
1875
case TR::sun_misc_Unsafe_putObject_jlObjectJjlObject_V:
1876
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Address, false, true);
1877
1878
case TR::sun_misc_Unsafe_getBoolean_jlObjectJ_Z:
1879
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int8, false);
1880
case TR::sun_misc_Unsafe_getByte_jlObjectJ_B:
1881
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int8, false);
1882
case TR::sun_misc_Unsafe_getChar_jlObjectJ_C:
1883
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int16, false);
1884
case TR::sun_misc_Unsafe_getShort_jlObjectJ_S:
1885
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int16, false);
1886
case TR::sun_misc_Unsafe_getInt_jlObjectJ_I:
1887
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int32, false);
1888
case TR::sun_misc_Unsafe_getLong_jlObjectJ_J:
1889
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int64, false);
1890
case TR::sun_misc_Unsafe_getFloat_jlObjectJ_F:
1891
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Float, false);
1892
case TR::sun_misc_Unsafe_getDouble_jlObjectJ_D:
1893
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Double, false);
1894
case TR::sun_misc_Unsafe_getObject_jlObjectJ_jlObject:
1895
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Address, false, true);
1896
1897
case TR::sun_misc_Unsafe_putByteVolatile_jlObjectJB_V:
1898
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int8, true);
1899
case TR::sun_misc_Unsafe_putBooleanVolatile_jlObjectJZ_V:
1900
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int8, true);
1901
case TR::sun_misc_Unsafe_putCharVolatile_jlObjectJC_V:
1902
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int16, true);
1903
case TR::sun_misc_Unsafe_putShortVolatile_jlObjectJS_V:
1904
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int16, true);
1905
case TR::sun_misc_Unsafe_putIntVolatile_jlObjectJI_V:
1906
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int32, true);
1907
case TR::sun_misc_Unsafe_putLongVolatile_jlObjectJJ_V:
1908
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int64, true);
1909
case TR::sun_misc_Unsafe_putFloatVolatile_jlObjectJF_V:
1910
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Float, true);
1911
case TR::sun_misc_Unsafe_putDoubleVolatile_jlObjectJD_V:
1912
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Double, true);
1913
case TR::sun_misc_Unsafe_putObjectVolatile_jlObjectJjlObject_V:
1914
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Address, true, true);
1915
1916
case TR::sun_misc_Unsafe_monitorEnter_jlObject_V:
1917
return createUnsafeMonitorOp(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, true);
1918
case TR::sun_misc_Unsafe_monitorExit_jlObject_V:
1919
return createUnsafeMonitorOp(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, false);
1920
1921
case TR::sun_misc_Unsafe_putByteOrdered_jlObjectJB_V:
1922
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int8, false, false, true);
1923
case TR::sun_misc_Unsafe_putBooleanOrdered_jlObjectJZ_V:
1924
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int8, false, false, true);
1925
case TR::sun_misc_Unsafe_putCharOrdered_jlObjectJC_V:
1926
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int16, false, false, true);
1927
case TR::sun_misc_Unsafe_putShortOrdered_jlObjectJS_V:
1928
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int16, false, false, true);
1929
case TR::sun_misc_Unsafe_putIntOrdered_jlObjectJI_V:
1930
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int32, false, false, true);
1931
case TR::sun_misc_Unsafe_putLongOrdered_jlObjectJJ_V:
1932
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int64, false, false, true);
1933
case TR::sun_misc_Unsafe_putFloatOrdered_jlObjectJF_V:
1934
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Float, false, false, true);
1935
case TR::sun_misc_Unsafe_putDoubleOrdered_jlObjectJD_V:
1936
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Double, false, false, true);
1937
case TR::sun_misc_Unsafe_putObjectOrdered_jlObjectJjlObject_V:
1938
return createUnsafePutWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Address, false, true, true);
1939
1940
case TR::sun_misc_Unsafe_getBooleanVolatile_jlObjectJ_Z:
1941
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int8, true);
1942
case TR::sun_misc_Unsafe_getByteVolatile_jlObjectJ_B:
1943
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int8, true);
1944
case TR::sun_misc_Unsafe_getCharVolatile_jlObjectJ_C:
1945
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int16, true);
1946
case TR::sun_misc_Unsafe_getShortVolatile_jlObjectJ_S:
1947
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int16, true);
1948
case TR::sun_misc_Unsafe_getIntVolatile_jlObjectJ_I:
1949
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int32, true);
1950
case TR::sun_misc_Unsafe_getLongVolatile_jlObjectJ_J:
1951
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int64, true);
1952
case TR::sun_misc_Unsafe_getFloatVolatile_jlObjectJ_F:
1953
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Float, true);
1954
case TR::sun_misc_Unsafe_getDoubleVolatile_jlObjectJ_D:
1955
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Double, true);
1956
case TR::sun_misc_Unsafe_getObjectVolatile_jlObjectJ_jlObject:
1957
return createUnsafeGetWithOffset(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Address, true, true);
1958
1959
case TR::sun_misc_Unsafe_putByte_JB_V:
1960
case TR::org_apache_harmony_luni_platform_OSMemory_putByte_JB_V:
1961
return createUnsafePut(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int8);
1962
case TR::sun_misc_Unsafe_putChar_JC_V:
1963
return createUnsafePut(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int16);
1964
case TR::sun_misc_Unsafe_putShort_JS_V:
1965
case TR::org_apache_harmony_luni_platform_OSMemory_putShort_JS_V:
1966
return createUnsafePut(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int16);
1967
case TR::sun_misc_Unsafe_putInt_JI_V:
1968
case TR::org_apache_harmony_luni_platform_OSMemory_putInt_JI_V:
1969
return createUnsafePut(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int32);
1970
case TR::sun_misc_Unsafe_putLong_JJ_V:
1971
case TR::org_apache_harmony_luni_platform_OSMemory_putLong_JJ_V:
1972
return createUnsafePut(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int64);
1973
case TR::sun_misc_Unsafe_putFloat_JF_V:
1974
case TR::org_apache_harmony_luni_platform_OSMemory_putFloat_JF_V:
1975
return createUnsafePut(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Float);
1976
case TR::sun_misc_Unsafe_putDouble_JD_V:
1977
case TR::org_apache_harmony_luni_platform_OSMemory_putDouble_JD_V:
1978
return createUnsafePut(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Double);
1979
case TR::sun_misc_Unsafe_putAddress_JJ_V:
1980
case TR::org_apache_harmony_luni_platform_OSMemory_putAddress_JJ_V:
1981
return createUnsafePut(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Address, false);
1982
1983
case TR::sun_misc_Unsafe_getByte_J_B:
1984
case TR::org_apache_harmony_luni_platform_OSMemory_getByte_J_B:
1985
return createUnsafeGet(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int8);
1986
case TR::sun_misc_Unsafe_getChar_J_C:
1987
return createUnsafeGet(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int16);
1988
case TR::sun_misc_Unsafe_getShort_J_S:
1989
case TR::org_apache_harmony_luni_platform_OSMemory_getShort_J_S:
1990
return createUnsafeGet(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int16);
1991
case TR::sun_misc_Unsafe_getInt_J_I:
1992
case TR::org_apache_harmony_luni_platform_OSMemory_getInt_J_I:
1993
return createUnsafeGet(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int32);
1994
case TR::sun_misc_Unsafe_getLong_J_J:
1995
case TR::org_apache_harmony_luni_platform_OSMemory_getLong_J_J:
1996
return createUnsafeGet(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Int64);
1997
case TR::sun_misc_Unsafe_getFloat_J_F:
1998
case TR::org_apache_harmony_luni_platform_OSMemory_getFloat_J_F:
1999
return createUnsafeGet(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Float);
2000
case TR::sun_misc_Unsafe_getDouble_J_D:
2001
case TR::org_apache_harmony_luni_platform_OSMemory_getDouble_J_D:
2002
return createUnsafeGet(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Double);
2003
case TR::sun_misc_Unsafe_getAddress_J_J:
2004
case TR::org_apache_harmony_luni_platform_OSMemory_getAddress_J_J:
2005
return createUnsafeGet(calleeSymbol, callerSymbol, callNodeTreeTop, callNode, TR::Address, false);
2006
2007
case TR::sun_misc_Unsafe_loadFence:
2008
return createUnsafeFence(callNodeTreeTop, callNode, TR::loadFence);
2009
case TR::sun_misc_Unsafe_storeFence:
2010
return createUnsafeFence(callNodeTreeTop, callNode, TR::storeFence);
2011
case TR::sun_misc_Unsafe_fullFence:
2012
return createUnsafeFence(callNodeTreeTop, callNode, TR::fullFence);
2013
2014
case TR::sun_misc_Unsafe_staticFieldBase:
2015
return false; // todo
2016
case TR::sun_misc_Unsafe_staticFieldOffset:
2017
return false; // todo
2018
case TR::sun_misc_Unsafe_objectFieldOffset:
2019
return false; // todo
2020
2021
case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z:
2022
case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z:
2023
case TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z:
2024
if (callNode->isSafeForCGToFastPathUnsafeCall())
2025
return false;
2026
switch (callerSymbol->castToMethodSymbol()->getRecognizedMethod())
2027
{
2028
case TR::java_util_concurrent_ConcurrentHashMap_addCount:
2029
case TR::java_util_concurrent_ConcurrentHashMap_casTabAt:
2030
case TR::java_util_concurrent_ConcurrentHashMap_fullAddCount:
2031
case TR::java_util_concurrent_ConcurrentHashMap_helpTransfer:
2032
case TR::java_util_concurrent_ConcurrentHashMap_initTable:
2033
case TR::java_util_concurrent_ConcurrentHashMap_transfer:
2034
case TR::java_util_concurrent_ConcurrentHashMap_tryPresize:
2035
case TR::java_util_concurrent_ConcurrentHashMap_TreeBin_contendedLock:
2036
case TR::java_util_concurrent_ConcurrentHashMap_TreeBin_find:
2037
case TR::java_util_concurrent_ConcurrentHashMap_TreeBin_lockRoot:
2038
case TR::com_ibm_jit_JITHelpers_compareAndSwapIntInObject:
2039
case TR::com_ibm_jit_JITHelpers_compareAndSwapLongInObject:
2040
case TR::com_ibm_jit_JITHelpers_compareAndSwapObjectInObject:
2041
case TR::com_ibm_jit_JITHelpers_compareAndSwapIntInArray:
2042
case TR::com_ibm_jit_JITHelpers_compareAndSwapLongInArray:
2043
case TR::com_ibm_jit_JITHelpers_compareAndSwapObjectInArray:
2044
callNode->setIsSafeForCGToFastPathUnsafeCall(true);
2045
return callNode;
2046
default:
2047
return createUnsafeCASCallDiamond(callNodeTreeTop, callNode);
2048
}
2049
default:
2050
break;
2051
}
2052
2053
return false;
2054
}
2055
2056
2057
bool
2058
TR_J9InlinerPolicy::isInlineableJNI(TR_ResolvedMethod *method,TR::Node *callNode)
2059
{
2060
TR::Compilation* comp = TR::comp();
2061
TR::RecognizedMethod recognizedMethod = method->getRecognizedMethod();
2062
// Reflection's JNI
2063
//
2064
if (!comp->getOption(TR_DisableInliningOfNatives) &&
2065
recognizedMethod == TR::sun_reflect_Reflection_getClassAccessFlags)
2066
//return false;
2067
return true;
2068
2069
// Unsafe's JNIs
2070
//
2071
if (comp->getOption(TR_DisableUnsafe))
2072
return false;
2073
2074
// If this put ordered call node has already been inlined, do not inline it again (JTC-JAT 71313)
2075
if (callNode && callNode->isUnsafePutOrderedCall() && callNode->isDontInlinePutOrderedCall())
2076
{
2077
debugTrace(tracer(), "Unsafe Inlining: Unsafe Call %p already inlined\n", callNode);
2078
2079
return false;
2080
}
2081
2082
if ((TR::Compiler->vm.canAnyMethodEventsBeHooked(comp) && !comp->fej9()->methodsCanBeInlinedEvenIfEventHooksEnabled(comp)) ||
2083
(comp->fej9()->isAnyMethodTracingEnabled(method->getPersistentIdentifier()) &&
2084
!comp->fej9()->traceableMethodsCanBeInlined()))
2085
return false;
2086
2087
if (method->convertToMethod()->isUnsafeWithObjectArg(comp) || method->convertToMethod()->isUnsafeCAS(comp))
2088
{
2089
// In Java9 sun/misc/Unsafe methods are simple Java wrappers to JNI
2090
// methods in jdk.internal, and the enum values above match both. Only
2091
// return true for the methods that are native.
2092
if (!TR::Compiler->om.canGenerateArraylets() || (callNode && callNode->isUnsafeGetPutCASCallOnNonArray()))
2093
return method->isNative();
2094
else
2095
return false;
2096
}
2097
2098
switch (recognizedMethod)
2099
{
2100
2101
case TR::sun_misc_Unsafe_monitorEnter_jlObject_V:
2102
case TR::sun_misc_Unsafe_monitorExit_jlObject_V:
2103
2104
case TR::sun_misc_Unsafe_putByte_JB_V:
2105
case TR::org_apache_harmony_luni_platform_OSMemory_putByte_JB_V:
2106
case TR::sun_misc_Unsafe_putChar_JC_V:
2107
case TR::sun_misc_Unsafe_putShort_JS_V:
2108
case TR::org_apache_harmony_luni_platform_OSMemory_putShort_JS_V:
2109
case TR::sun_misc_Unsafe_putInt_JI_V:
2110
case TR::org_apache_harmony_luni_platform_OSMemory_putInt_JI_V:
2111
case TR::sun_misc_Unsafe_putLong_JJ_V:
2112
case TR::org_apache_harmony_luni_platform_OSMemory_putLong_JJ_V:
2113
case TR::sun_misc_Unsafe_putFloat_JF_V:
2114
case TR::org_apache_harmony_luni_platform_OSMemory_putFloat_JF_V:
2115
case TR::sun_misc_Unsafe_putDouble_JD_V:
2116
case TR::org_apache_harmony_luni_platform_OSMemory_putDouble_JD_V:
2117
case TR::sun_misc_Unsafe_putAddress_JJ_V:
2118
case TR::org_apache_harmony_luni_platform_OSMemory_putAddress_JJ_V:
2119
2120
case TR::sun_misc_Unsafe_getByte_J_B:
2121
case TR::org_apache_harmony_luni_platform_OSMemory_getByte_J_B:
2122
case TR::sun_misc_Unsafe_getChar_J_C:
2123
case TR::sun_misc_Unsafe_getShort_J_S:
2124
case TR::org_apache_harmony_luni_platform_OSMemory_getShort_J_S:
2125
case TR::sun_misc_Unsafe_getInt_J_I:
2126
case TR::org_apache_harmony_luni_platform_OSMemory_getInt_J_I:
2127
case TR::sun_misc_Unsafe_getLong_J_J:
2128
case TR::org_apache_harmony_luni_platform_OSMemory_getLong_J_J:
2129
case TR::sun_misc_Unsafe_getFloat_J_F:
2130
case TR::org_apache_harmony_luni_platform_OSMemory_getFloat_J_F:
2131
case TR::sun_misc_Unsafe_getDouble_J_D:
2132
case TR::org_apache_harmony_luni_platform_OSMemory_getDouble_J_D:
2133
case TR::sun_misc_Unsafe_getAddress_J_J:
2134
case TR::org_apache_harmony_luni_platform_OSMemory_getAddress_J_J:
2135
2136
case TR::sun_misc_Unsafe_loadFence:
2137
case TR::sun_misc_Unsafe_storeFence:
2138
case TR::sun_misc_Unsafe_fullFence:
2139
return true;
2140
2141
case TR::sun_misc_Unsafe_staticFieldBase:
2142
return false; // todo
2143
case TR::sun_misc_Unsafe_staticFieldOffset:
2144
return false; // todo
2145
case TR::sun_misc_Unsafe_objectFieldOffset:
2146
return false; // todo
2147
2148
default:
2149
break;
2150
}
2151
2152
return false;
2153
}
2154
2155
//first check J9 specific tryToInline methods and then general tryToInline methods
2156
bool
2157
TR_J9InlinerPolicy::tryToInline(TR_CallTarget * calltarget, TR_CallStack * callStack, bool toInline)
2158
{
2159
TR_ResolvedMethod *method = calltarget->_calleeMethod;
2160
2161
if (toInline && insideIntPipelineForEach(method, comp()))
2162
{
2163
if (comp()->trace(OMR::inlining))
2164
traceMsg(comp(), "forcing inlining of IntPipelineForEach or method inside it: %s\n", method->signature(comp()->trMemory()));
2165
2166
return true;
2167
}
2168
2169
if (toInline)
2170
{
2171
if (!comp()->getOption(TR_DisableForceInlineAnnotations) &&
2172
comp()->fej9()->isForceInline(method))
2173
{
2174
if (comp()->trace(OMR::inlining))
2175
traceMsg(comp(), "@ForceInline was specified for %s, in tryToInline\n", method->signature(comp()->trMemory()));
2176
return true;
2177
}
2178
}
2179
2180
if (OMR_InlinerPolicy::tryToInlineGeneral(calltarget, callStack, toInline))
2181
return true;
2182
2183
return false;
2184
}
2185
2186
bool
2187
TR_J9InlinerPolicy::inlineMethodEvenForColdBlocks(TR_ResolvedMethod *method)
2188
{
2189
bool insideForEach = insideIntPipelineForEach(method, comp());
2190
return insideForEach;
2191
}
2192
2193
void
2194
TR_J9InlinerPolicy::adjustFanInSizeInWeighCallSite(int32_t& weight,
2195
int32_t size,
2196
TR_ResolvedMethod* callee,
2197
TR_ResolvedMethod* caller,
2198
int32_t bcIndex)
2199
{
2200
/*
2201
Our goal is to use the ratio of the weight of a particular caller to the total weight to penalize the callers whose weights are relatively small.
2202
To reach that goal, we have to introduce two magic numbers: defaultWeight and TR::Options::INLINE_fanInCallGraphFactor.
2203
*defaultWeight is used when our caller belongs in the other bucket, so we don't have a meaningful weight to represent it.
2204
*INLINE_fanInCallGraphFactor is simply hand-tuned number by which we multiply our ratio.
2205
2206
INLINE_fanInCallGraphFactor is an integer number divided by 100. This allows us to avoid using float numbers for specifying the factor.
2207
*/
2208
2209
2210
2211
if (comp()->getMethodHotness() > warm)
2212
return;
2213
2214
static const char *qq = feGetEnv("TR_Min_FanIn_Size");
2215
static const uint32_t min_size = ( qq ) ? atoi(qq) : MIN_FAN_IN_SIZE;
2216
2217
uint32_t thresholdSize = (!comp()->getOption(TR_InlinerFanInUseCalculatedSize)) ? getJ9InitialBytecodeSize(callee, 0, comp()) : size;
2218
if (thresholdSize <= min_size) // if we are less than min_fan_in size, we don't want to apply fan-in heuristic
2219
{
2220
return;
2221
}
2222
2223
static const char *qqq = feGetEnv("TR_OtherBucketThreshold");
2224
static const float otherBucketThreshold = (qqq) ? (float) (atoi (qqq) /100.0) : FANIN_OTHER_BUCKET_THRESHOLD ;
2225
2226
//convenience
2227
TR_ResolvedJ9Method *resolvedJ9Callee = (TR_ResolvedJ9Method *) callee;
2228
TR_ResolvedJ9Method *resolvedJ9Caller = (TR_ResolvedJ9Method *) caller;
2229
2230
2231
uint32_t numCallers = 0, totalWeight = 0, fanInWeight = 0, otherBucketWeight = 0;
2232
resolvedJ9Callee->getFaninInfo(&numCallers, &totalWeight, &otherBucketWeight);
2233
2234
if (numCallers < MIN_NUM_CALLERS || (totalWeight > 0 && otherBucketWeight * 1.0 / totalWeight < otherBucketThreshold))
2235
return;
2236
2237
bool hasCaller = resolvedJ9Callee->getCallerWeight(resolvedJ9Caller, &fanInWeight, bcIndex);
2238
2239
if (size >= 0 && totalWeight && fanInWeight)
2240
{
2241
static const char *q4 = feGetEnv("TR_MagicNumber");
2242
static const int32_t magicNumber = q4 ? atoi (q4) : 1 ;
2243
2244
float dynamicFanInRatio = hasCaller ? ((float)totalWeight - (float)fanInWeight) / (float) totalWeight : (float) fanInWeight / (float) totalWeight;
2245
2246
int32_t oldWeight = weight;
2247
weight += weight*dynamicFanInRatio*magicNumber;
2248
2249
heuristicTrace (tracer(), "FANIN: callee %s in caller %s @ %d oldWeight %d weight %d",
2250
callee->signature(comp()->trMemory()),
2251
caller->signature(comp()->trMemory()),
2252
bcIndex, oldWeight, weight
2253
);
2254
2255
}
2256
}
2257
2258
bool TR_J9InlinerPolicy::_tryToGenerateILForMethod (TR::ResolvedMethodSymbol* calleeSymbol, TR::ResolvedMethodSymbol* callerSymbol, TR_CallTarget* calltarget)
2259
{
2260
bool success = false;
2261
TR::Node * callNode = calltarget->_myCallSite->_callNode;
2262
2263
TR::IlGeneratorMethodDetails storage;
2264
TR::IlGeneratorMethodDetails & ilGenMethodDetails = TR::IlGeneratorMethodDetails::create(storage, calleeSymbol->getResolvedMethod());
2265
if (!comp()->getOption(TR_DisablePartialInlining) && calltarget->_partialInline)
2266
{
2267
heuristicTrace(tracer(),"Doing a partialInline for method %s\n",tracer()->traceSignature(calleeSymbol));
2268
TR::PartialInliningIlGenRequest ilGenRequest(ilGenMethodDetails, callerSymbol, calltarget->_partialInline);
2269
2270
if (comp()->trace(OMR::inlining))
2271
{
2272
traceMsg(comp(), "ILGen of [%p] using request: ", callNode);
2273
ilGenRequest.print(comp()->fe(), comp()->getOutFile(), "\n");
2274
}
2275
success = calleeSymbol->genIL(comp()->fe(), comp(), comp()->getSymRefTab(), ilGenRequest);
2276
}
2277
else
2278
{
2279
TR::InliningIlGenRequest ilGenRequest(ilGenMethodDetails, callerSymbol);
2280
if (comp()->trace(OMR::inlining))
2281
{
2282
ilGenRequest.print(comp()->fe(), comp()->getOutFile(), "\n");
2283
}
2284
success = calleeSymbol->genIL(comp()->fe(), comp(), comp()->getSymRefTab(), ilGenRequest);
2285
}
2286
2287
return success;
2288
}
2289
2290
bool TR_J9InlinerPolicy::tryToInlineTrivialMethod (TR_CallStack* callStack, TR_CallTarget* calltarget)
2291
{
2292
TR::Node * callNode = calltarget->_myCallSite->_callNode;
2293
TR::ResolvedMethodSymbol * calleeSymbol = calltarget->_calleeSymbol;
2294
TR::TreeTop * callNodeTreeTop = calltarget->_myCallSite->_callNodeTreeTop;
2295
TR_VirtualGuardSelection *guard = calltarget->_guard;
2296
TR::ResolvedMethodSymbol * callerSymbol = callStack->_methodSymbol;
2297
2298
if (isInlineableJNI(calleeSymbol->getResolvedMethod(),callNode))
2299
{
2300
if (performTransformation(comp(), "%sInlining jni %s into %s\n", OPT_DETAILS, calleeSymbol->signature(comp()->trMemory()), callerSymbol->signature(comp()->trMemory())))
2301
{
2302
if (calltarget->_myCallSite->isIndirectCall())
2303
return true;
2304
2305
if (inlineGetClassAccessFlags(calleeSymbol, callerSymbol, callNodeTreeTop, callNode))
2306
guard->_kind = TR_NoGuard;
2307
else if (inlineUnsafeCall(calleeSymbol, callerSymbol, callNodeTreeTop, callNode))
2308
guard->_kind = TR_NoGuard;
2309
}
2310
return true;
2311
}
2312
2313
return false;
2314
}
2315
2316
bool
2317
TR_J9InlinerPolicy::adjustFanInSizeInExceedsSizeThreshold(int bytecodeSize,
2318
uint32_t& calculatedSize,
2319
TR_ResolvedMethod* callee,
2320
TR_ResolvedMethod* caller,
2321
int32_t bcIndex)
2322
{
2323
if (comp()->getMethodHotness() > warm)
2324
return false;
2325
2326
static const char *q = feGetEnv("TR_SizeMultiplier");
2327
static const uint32_t multiplier = ( q ) ? atoi (q) : SIZE_MULTIPLIER;
2328
2329
static const char *qq = feGetEnv("TR_Min_FanIn_Size");
2330
static const uint32_t min_size = ( qq ) ? atoi(qq) : MIN_FAN_IN_SIZE;
2331
2332
static const char *qqq = feGetEnv("TR_OtherBucketThreshold");
2333
static const float otherBucketThreshold = (qqq) ? (float) (atoi (qqq) /100.0) : FANIN_OTHER_BUCKET_THRESHOLD;
2334
2335
2336
uint32_t thresholdSize = (!comp()->getOption(TR_InlinerFanInUseCalculatedSize)) ? getJ9InitialBytecodeSize(callee, 0, comp()) : calculatedSize;
2337
if (thresholdSize <= min_size) // if we are less than min_fan_in size, we don't want to apply fan-in heuristic
2338
{
2339
return false;
2340
}
2341
2342
TR_ResolvedJ9Method *resolvedJ9Callee = (TR_ResolvedJ9Method *) callee;
2343
TR_ResolvedJ9Method *resolvedJ9Caller = (TR_ResolvedJ9Method *) caller;
2344
2345
uint32_t numCallers = 0, totalWeight = 0, otherBucketWeight = 0;
2346
float dynamicFanInRatio = 0.0;
2347
resolvedJ9Callee->getFaninInfo(&numCallers, &totalWeight, &otherBucketWeight);
2348
2349
if (numCallers < MIN_NUM_CALLERS || (totalWeight > 0 && otherBucketWeight * 1.0 / totalWeight < otherBucketThreshold))
2350
return false;
2351
2352
2353
2354
uint32_t weight = 0;
2355
bool hasCaller = resolvedJ9Callee->getCallerWeight(resolvedJ9Caller, &weight, bcIndex);
2356
2357
/*
2358
* We assume that if the caller lands in the other bucket it is not worth trouble inlining
2359
* There seem to be an empirical evidence to that.
2360
* If we increase the number of callers we remember up to 40
2361
* Still a considerable share of calls lands in the other bucket
2362
* This indirectly suggests that the other bucket typically consists of
2363
* a lot of infrequent caller-bcIndex pairs
2364
*/
2365
2366
if (!hasCaller && weight != ~0) //the caller is in the other bucket
2367
{
2368
heuristicTrace (tracer(), "FANIN: callee %s in caller %s @ %d exceeds thresholds due to the caller being in the other bucket",
2369
callee->signature(comp()->trMemory()),
2370
caller->signature(comp()->trMemory()),
2371
bcIndex
2372
);
2373
2374
return true;
2375
}
2376
2377
if (weight != ~0) //there is an entry for this particular caller
2378
dynamicFanInRatio = (float)weight / (float)totalWeight ;
2379
2380
int32_t oldCalculatedSize = calculatedSize;
2381
if (dynamicFanInRatio == 0.0)
2382
calculatedSize = bytecodeSize * multiplier; //weight == ~0 we don't know anything about the caller
2383
else
2384
calculatedSize = (uint32_t) ((float)bytecodeSize/dynamicFanInRatio);
2385
2386
heuristicTrace (tracer(), "FANIN: callee %s in caller %s @ %d oldCalculatedSize %d calculatedSize %d",
2387
callee->signature(comp()->trMemory()),
2388
caller->signature(comp()->trMemory()),
2389
bcIndex, oldCalculatedSize, calculatedSize
2390
);
2391
2392
return false;
2393
}
2394
2395
bool
2396
TR_J9InlinerPolicy::callMustBeInlined(TR_CallTarget *calltarget)
2397
{
2398
TR_ResolvedMethod *method = calltarget->_calleeMethod;
2399
2400
if (method->convertToMethod()->isArchetypeSpecimen())
2401
return true;
2402
2403
if (comp()->fej9()->isLambdaFormGeneratedMethod(method))
2404
return true;
2405
2406
if (insideIntPipelineForEach(method, comp()))
2407
{
2408
if (comp()->trace(OMR::inlining))
2409
traceMsg(comp(), "forcing inlining of IntPipelineForEach or method inside it: %s\n", method->signature(comp()->trMemory()));
2410
2411
return true;
2412
}
2413
2414
2415
if (comp()->getOption(TR_EnableSIMDLibrary) &&
2416
strncmp(calltarget->_calleeMethod->classNameChars(), "com/ibm/dataaccess/SIMD", 23) == 0)
2417
return true;
2418
2419
#ifdef ENABLE_GPU
2420
if (strncmp(calltarget->_calleeMethod->classNameChars(), "com/ibm/gpu/Kernel", 18) == 0)
2421
return true;
2422
#endif
2423
2424
2425
if (!comp()->getOption(TR_DisableForceInlineAnnotations) &&
2426
comp()->fej9()->isForceInline(method))
2427
{
2428
int32_t length = method->classNameLength();
2429
char* className = method->classNameChars();
2430
2431
bool vectorMethod = false;
2432
if (length >= 23 && !strncmp(className, "jdk/internal/vm/vector/", 23))
2433
vectorMethod = true;
2434
if (length >= 21 && !strncmp(className, "jdk/incubator/vector/", 21))
2435
vectorMethod = true;
2436
2437
if (vectorMethod)
2438
{
2439
if (comp()->trace(OMR::inlining))
2440
traceMsg(comp(), "@ForceInline was specified for %s, in callMustBeInlined\n", method->signature(comp()->trMemory()));
2441
return true;
2442
}
2443
}
2444
2445
return false;
2446
}
2447
2448
void
2449
TR_J9InlinerUtil::adjustCallerWeightLimit(TR::ResolvedMethodSymbol *callerSymbol, int &callerWeightLimit)
2450
{
2451
if (inliner()->getPolicy()->aggressiveSmallAppOpts() && (callerSymbol->getRecognizedMethod() == TR::java_util_GregorianCalendar_computeFields) && isHot(comp()))
2452
callerWeightLimit = 2600;
2453
}
2454
2455
2456
void
2457
TR_J9InlinerUtil::adjustMethodByteCodeSizeThreshold(TR::ResolvedMethodSymbol *callerSymbol, int &methodByteCodeSizeThreshold)
2458
{
2459
if (inliner()->getPolicy()->aggressiveSmallAppOpts() && (callerSymbol->getRecognizedMethod() == TR::java_util_GregorianCalendar_computeFields))
2460
methodByteCodeSizeThreshold = 400;
2461
}
2462
2463
2464
bool
2465
TR_J9InlinerPolicy::willBeInlinedInCodeGen(TR::RecognizedMethod method)
2466
{
2467
#ifdef J9VM_OPT_JAVA_CRYPTO_ACCELERATION
2468
if (willInlineCryptoMethodInCodeGen(method))
2469
{
2470
return true;
2471
}
2472
#endif
2473
2474
return false;
2475
}
2476
2477
bool
2478
TR_J9InlinerPolicy::skipHCRGuardForCallee(TR_ResolvedMethod *callee)
2479
{
2480
// TODO: This is a very hacky way of avoiding HCR guards on sensitive String Compression methods which allows idiom
2481
// recognition to work. It also avoids unnecessary block splitting in performance sensitive methods for String
2482
// operations that are quite common. Can we do something better?
2483
TR::RecognizedMethod rm = callee->getRecognizedMethod();
2484
switch (rm)
2485
{
2486
case TR::java_lang_String_charAtInternal_I:
2487
case TR::java_lang_String_charAtInternal_IB:
2488
case TR::java_lang_String_lengthInternal:
2489
case TR::java_lang_String_isCompressed:
2490
case TR::java_lang_StringUTF16_length:
2491
case TR::java_lang_StringBuffer_capacityInternal:
2492
case TR::java_lang_StringBuffer_lengthInternalUnsynchronized:
2493
case TR::java_lang_StringBuilder_capacityInternal:
2494
case TR::java_lang_StringBuilder_lengthInternal:
2495
return true;
2496
default:
2497
break;
2498
}
2499
2500
// VectorSupport intrinsic candidates should not be redefined by the user
2501
if (rm >= TR::FirstVectorMethod &&
2502
rm <= TR::LastVectorIntrinsicMethod)
2503
return true;
2504
2505
// Skip HCR guard for non-public methods in java/lang/invoke package. These methods
2506
// are related to implementation details of MethodHandle and VarHandle
2507
int32_t length = callee->classNameLength();
2508
char* className = callee->classNameChars();
2509
if (length > 17
2510
&& !strncmp("java/lang/invoke/", className, 17)
2511
&& !callee->isPublic())
2512
return true;
2513
2514
return false;
2515
}
2516
2517
TR_J9InlinerPolicy::TR_J9InlinerPolicy(TR::Compilation *comp)
2518
: _aggressivelyInlineInLoops(false), OMR_InlinerPolicy(comp)
2519
{
2520
2521
}
2522
2523
TR_J9JSR292InlinerPolicy::TR_J9JSR292InlinerPolicy(TR::Compilation *comp)
2524
: TR_J9InlinerPolicy(comp)
2525
{
2526
2527
}
2528
2529
TR_J9InlinerUtil::TR_J9InlinerUtil(TR::Compilation *comp)
2530
: OMR_InlinerUtil(comp)
2531
{
2532
2533
}
2534
2535
TR_Inliner::TR_Inliner(TR::OptimizationManager *manager)
2536
: TR::Optimization(manager)
2537
{}
2538
2539
int32_t TR_Inliner::perform()
2540
{
2541
//static bool enableInliningInOSR = feGetEnv("TR_disableInliningInOSR") != NULL;
2542
2543
//Disabled, but putting in here an env option to enable it for testing
2544
//this is not the best spot but the other didn't work out
2545
//and it's temporary anyways
2546
static const char* enableMT4Testing = feGetEnv("TR_EnableMT4Testing");
2547
2548
if (!enableMT4Testing)
2549
comp()->setOption(TR_DisableMultiTargetInlining);
2550
2551
2552
TR::ResolvedMethodSymbol * sym = comp()->getMethodSymbol();
2553
if (sym->mayHaveInlineableCall() && optimizer()->isEnabled(OMR::inlining))
2554
{
2555
comp()->getFlowGraph()->setStructure(NULL);
2556
2557
TR_MultipleCallTargetInliner inliner(optimizer(),this);
2558
if (manager()->numPassesCompleted() == 0)
2559
inliner.setFirstPass();
2560
inliner.performInlining(sym);
2561
manager()->incNumPassesCompleted();
2562
comp()->getFlowGraph()->resetFrequencies();
2563
comp()->getFlowGraph()->setFrequencies();
2564
}
2565
2566
// this should run after all inlining is done in order not to
2567
// miss any VectorAPI methods
2568
if (TR_VectorAPIExpansion::findVectorMethods(comp()))
2569
comp()->getMethodSymbol()->setHasVectorAPI(true);
2570
2571
return 1; // cost??
2572
}
2573
2574
const char *
2575
TR_Inliner::optDetailString() const throw()
2576
{
2577
return "O^O INLINER: ";
2578
}
2579
2580
template <typename FunctObj>
2581
void TR_MultipleCallTargetInliner::recursivelyWalkCallTargetAndPerformAction(TR_CallTarget *ct, FunctObj &action)
2582
{
2583
2584
debugTrace(tracer(),"recursivelyWalkingCallTargetAndPerformAction: Considering Target %p. node estimate before = %d maxbcindex = %d",ct,action.getNodeEstimate(),getPolicy()->getInitialBytecodeSize(ct->_calleeMethod, 0, comp()));
2585
2586
action(ct,comp());
2587
2588
TR_CallSite *callsite = 0;
2589
for(callsite = ct->_myCallees.getFirst() ; callsite ; callsite = callsite->getNext() )
2590
{
2591
for (int32_t i = 0 ; i < callsite->numTargets() ; i++)
2592
{
2593
recursivelyWalkCallTargetAndPerformAction(callsite->getTarget(i),action);
2594
}
2595
}
2596
2597
2598
}
2599
2600
int32_t
2601
TR_MultipleCallTargetInliner::applyArgumentHeuristics(TR_LinkHead<TR_ParameterMapping> &map, int32_t originalWeight, TR_CallTarget *target)
2602
{
2603
int32_t weight = originalWeight;
2604
TR_PrexArgInfo *argInfo = target->_ecsPrexArgInfo;
2605
2606
static char *disableCCI=feGetEnv("TR_DisableConstClassInlining");
2607
static char *pEnvconstClassWeight=feGetEnv("TR_constClassWeight");
2608
static int constClassWeight = pEnvconstClassWeight ? atoi(pEnvconstClassWeight) : DEFAULT_CONST_CLASS_WEIGHT;
2609
2610
int32_t fraction = comp()->getOptions()->getInlinerArgumentHeuristicFraction();
2611
for(TR_ParameterMapping * parm = map.getFirst(); parm ; parm = parm->getNext())
2612
{
2613
if(parm->_parameterNode->getOpCode().isLoadConst())
2614
{
2615
weight = weight * (fraction-1) / fraction;
2616
heuristicTrace(tracer(),"Setting weight to %d because arg is load const.",weight);
2617
}
2618
else if (parm->_parameterNode->getOpCodeValue() == TR::aload && parm->_parameterNode->getSymbolReference()->getSymbol()->isConstObjectRef())
2619
{
2620
weight = weight * (fraction-1) / fraction;
2621
heuristicTrace(tracer(),"Setting weight to %d because arg is const object reference.",weight);
2622
}
2623
else if (!disableCCI &&
2624
(parm->_parameterNode->getOpCodeValue() == TR::aloadi) &&
2625
(parm->_parameterNode->getSymbolReference() == comp()->getSymRefTab()->findJavaLangClassFromClassSymbolRef()))
2626
{
2627
weight = constClassWeight;
2628
heuristicTrace(tracer(),"Setting weight to %d because arg is const Class reference.",weight);
2629
}
2630
else if( parm->_parameterNode->getDataType() == TR::Address)
2631
{
2632
weight = comp()->fej9()->adjustedInliningWeightBasedOnArgument(weight,parm->_parameterNode, parm->_parmSymbol,comp());
2633
heuristicTrace(tracer(),"Setting weight to %d after frontend adjusted weight for address parm %p\n",weight,parm->_parameterNode);
2634
}
2635
2636
if (!disableCCI && argInfo)
2637
{
2638
TR_PrexArgument *argPrexInfo = argInfo->get(parm->_parmSymbol->getOrdinal());
2639
if (argPrexInfo && argPrexInfo->hasKnownObjectIndex())
2640
{
2641
weight = constClassWeight;
2642
heuristicTrace(tracer(),"Setting weight to %d because arg is known object parm %p\n",weight,parm->_parameterNode);
2643
break;
2644
}
2645
}
2646
}
2647
2648
weight -= (map.getSize() * 4);
2649
heuristicTrace(tracer(),"Setting weight to %d (subtracting numArgs*4)", weight);
2650
2651
return weight;
2652
}
2653
2654
2655
//---------------------------------------------------------------------
2656
// TR_InlinerBase eliminateTailRecursion
2657
//---------------------------------------------------------------------
2658
2659
bool
2660
TR_MultipleCallTargetInliner::eliminateTailRecursion(
2661
TR::ResolvedMethodSymbol * calleeSymbol, TR_CallStack * callStack,
2662
TR::TreeTop * callNodeTreeTop, TR::Node * parent, TR::Node * callNode, TR_VirtualGuardSelection *guard)
2663
{
2664
if (comp()->getOption(TR_DisableTailRecursion))
2665
return false;
2666
2667
if (_disableTailRecursion)
2668
return false;
2669
2670
TR::TreeTop * nextTT = callNodeTreeTop->getNextRealTreeTop();
2671
for (;;)
2672
{
2673
if (nextTT->getNode()->getOpCodeValue() == TR::Goto)
2674
nextTT = nextTT->getNode()->getBranchDestination()->getNextRealTreeTop();
2675
else if (nextTT->getNode()->getOpCodeValue() == TR::BBEnd)
2676
nextTT = nextTT->getNextTreeTop()->getNextRealTreeTop();
2677
else break;
2678
}
2679
2680
if (!nextTT->getNode()->getOpCode().isReturn())
2681
return false;
2682
2683
TR_ResolvedMethod * calleeResolvedMethod = calleeSymbol->getResolvedMethod();
2684
if (comp()->isDLT() && comp()->getCurrentMethod()->isSameMethod(calleeResolvedMethod))
2685
return false;
2686
if (calleeResolvedMethod->numberOfExceptionHandlers() > 0)
2687
{
2688
// todo check that none of the parameters are referenced in a catch block...that may not be enough
2689
if (debug("traceETR"))
2690
printf("potential to eliminate an eh aware tail recursion to %s\n", tracer()->traceSignature(calleeResolvedMethod));
2691
return false;
2692
}
2693
2694
if (guard->_kind != TR_NoGuard && calleeResolvedMethod->virtualMethodIsOverridden())
2695
return false; // we can't generate the correct virtual guard
2696
2697
for (TR_CallStack * cs = callStack; cs->_methodSymbol != calleeSymbol; cs = cs->getNext())
2698
if (cs->_method->numberOfExceptionHandlers() > 0) // || cs->_method->isSynchronized())
2699
return false;
2700
2701
TR::ResolvedMethodSymbol * callerSymbol = callStack->_methodSymbol;
2702
2703
if (!callerSymbol->getResolvedMethod()->isSameMethod(calleeResolvedMethod))
2704
return false; // todo ... handle this case
2705
2706
// check for any parms being marked pre-existent or fixed
2707
//
2708
bool parmIsFixedOrFinal = false;
2709
ListIterator<TR::ParameterSymbol> parms(&calleeSymbol->getParameterList());
2710
for (TR::ParameterSymbol *p = parms.getFirst(); p; p = parms.getNext())
2711
{
2712
if (p->getIsPreexistent() || p->getFixedType())
2713
parmIsFixedOrFinal = true;
2714
}
2715
2716
if (parmIsFixedOrFinal)
2717
return false;
2718
2719
TR::Block * branchDestination = calleeSymbol->getFirstTreeTop()->getNode()->getBlock();
2720
2721
TR::Block * block = callNodeTreeTop->getEnclosingBlock();
2722
if (nextTT->getNode()->getOpCodeValue() != TR::Return && nextTT->getNode()->getFirstChild() != callNode)
2723
{
2724
if (nextTT->getNode()->getFirstChild()->getOpCodeValue() != TR::iadd)
2725
return false;
2726
2727
if (nextTT->getNode()->getFirstChild()->getSecondChild() != callNode)
2728
return false;
2729
2730
TR::Node * arithmeticNode = nextTT->getNode()->getFirstChild()->getFirstChild();
2731
if (arithmeticNode->getReferenceCount() > 1)
2732
return false;
2733
2734
if (block->getPredecessors().empty() || (block->getPredecessors().size() > 1))
2735
return false;
2736
2737
TR::Block * conditionBlock = toBlock(block->getPredecessors().front()->getFrom());
2738
if (conditionBlock->getSuccessors().size() != 2)
2739
return false;
2740
2741
TR::Block * otherBranch = toBlock(conditionBlock->getSuccessors().front()->getTo() == block ? (*(++conditionBlock->getSuccessors().begin()))->getTo() : conditionBlock->getSuccessors().front()->getTo());
2742
TR::Node * returnNode = otherBranch->getFirstRealTreeTop()->getNode();
2743
if (returnNode->getOpCodeValue() != TR::ireturn)
2744
return false;
2745
2746
TR::Node * returnValue = returnNode->getFirstChild();
2747
if (returnValue->getOpCodeValue() != TR::iconst || returnValue->getInt() != 0)
2748
return false;
2749
2750
if (debug("arithmeticSeries"))
2751
{
2752
// idiv
2753
// imul
2754
// iload #101[0x1
2755
// iadd
2756
// iload #101[0
2757
// iconst 1
2758
// iconst 2
2759
TR::TreeTop * ifTreeTop = conditionBlock->getLastRealTreeTop();
2760
2761
TR::TreeTop::create(comp(), ifTreeTop->getPrevTreeTop(),
2762
TR::Node::create(TR::ireturn, 1,
2763
TR::Node::create(TR::idiv, 2,
2764
TR::Node::create(TR::imul, 2,
2765
arithmeticNode,
2766
TR::Node::create(TR::iadd, 2,
2767
arithmeticNode,
2768
TR::Node::create(returnNode, TR::iconst, 0, 1))),
2769
TR::Node::create(returnNode, TR::iconst, 0, 2))));
2770
2771
2772
callerSymbol->removeTree(ifTreeTop);
2773
TR::CFG * cfg = callerSymbol->getFlowGraph();
2774
cfg->removeEdge(conditionBlock->getSuccessors().front());
2775
cfg->removeEdge(*(++conditionBlock->getSuccessors().begin()));
2776
cfg->addEdge(conditionBlock, cfg->getEnd());
2777
return true;
2778
}
2779
2780
TR::DataType dt = TR::Int32;
2781
TR::SymbolReference * temp = comp()->getSymRefTab()->createTemporary(calleeSymbol, dt);
2782
returnNode->setAndIncChild(0, TR::Node::createLoad(returnNode, temp));
2783
returnValue->decReferenceCount();
2784
TR::Block * generatedFirstBlock = calleeSymbol->prependEmptyFirstBlock();
2785
generatedFirstBlock->setFrequency(conditionBlock->getFrequency());
2786
generatedFirstBlock->append(TR::TreeTop::create(comp(), TR::Node::createStore(temp, returnValue)));
2787
arithmeticNode = TR::Node::copy(arithmeticNode);
2788
arithmeticNode->decReferenceCount();
2789
TR::TreeTop::create(comp(), callNodeTreeTop->getPrevTreeTop(),
2790
TR::Node::createStore(temp, TR::Node::create(TR::iadd, 2, TR::Node::createLoad(returnNode, temp), arithmeticNode)));
2791
}
2792
2793
if (!performTransformation(comp(), "%sEliminating tail recursion to %s\n", OPT_DETAILS, tracer()->traceSignature(calleeResolvedMethod)))
2794
return false;
2795
2796
//please don't move this if. It needs to be done after all early exits but exactly before
2797
//we do any transformations
2798
if (!comp()->incInlineDepth(calleeSymbol, callNode, !callNode->getOpCode().isCallIndirect(), guard, calleeResolvedMethod->classOfMethod(), 0))
2799
{
2800
return false;
2801
}
2802
2803
2804
_disableInnerPrex = true;
2805
2806
TR::CFG * callerCFG = callerSymbol->getFlowGraph();
2807
TR::TreeTop * prevTreeTop = callNodeTreeTop->getPrevTreeTop();
2808
2809
if (parent->getOpCode().isNullCheck())
2810
prevTreeTop = parent->extractTheNullCheck(prevTreeTop);
2811
2812
assignArgumentsToParameters(calleeSymbol, prevTreeTop, callNode);
2813
2814
TR::CFGEdge * backEdge;
2815
if (guard->_kind != TR_NoGuard)
2816
{
2817
// TR::Block *block2 = block->split(callNodeTreeTop, callerCFG);
2818
// block->append(TR::TreeTop::create(comp(), createVirtualGuard(callNode, callNode->getSymbol()->castToResolvedMethodSymbol(), branchDestination->getEntry(), false, (void *)calleeResolvedMethod->classOfMethod(), false)));
2819
// // branchDestination->setIsCold(); <--- branch destination is NOT cold
2820
// block2->setIsCold();
2821
// backEdge = TR::CFGEdge::createEdge(block, branchDestination, trMemory());
2822
// callerCFG->addEdge(backEdge);
2823
2824
TR::Block *gotoBlock = block->split(callNodeTreeTop, callerCFG);
2825
TR::Block *block2 = gotoBlock->split(callNodeTreeTop, callerCFG);
2826
2827
TR::Node *gotoNode = TR::Node::create(callNode, TR::Goto);
2828
gotoNode->setBranchDestination(branchDestination->getEntry());
2829
gotoBlock->append(TR::TreeTop::create(comp(), gotoNode));
2830
2831
// calleeResolvedMethod will be inlined with a virtual guard v.
2832
// At this point we need to create another virtual guard v' for the
2833
// recursive call. v' needs a calleeIndex that is different from the one
2834
// for v (otherwise we cannot distinguish between the two virtual guards)
2835
// We achieve this by artificially incrementing the inlining depth as if
2836
// we inlined calleeResolvedMethod again.
2837
2838
TR::Node *vguardNode = createVirtualGuard(callNode,
2839
callNode->getSymbol()->castToResolvedMethodSymbol(),
2840
block2->getEntry(),
2841
comp()->getCurrentInlinedSiteIndex(),//branchDestination->getEntry()->getNode()->getInlinedSiteIndex(),
2842
calleeResolvedMethod->classOfMethod(), false, guard);
2843
block->append(TR::TreeTop::create(comp(), vguardNode));
2844
callerCFG->addEdge(block, block2);
2845
2846
TR::CFGEdge *origEdge = gotoBlock->getSuccessors().front();
2847
backEdge = TR::CFGEdge::createEdge(gotoBlock, branchDestination, trMemory());
2848
callerCFG->addEdge(backEdge);
2849
callerCFG->removeEdge(origEdge);
2850
if (guard->_kind == TR_ProfiledGuard)
2851
{
2852
if (block->getFrequency() < 0)
2853
block2->setFrequency(block->getFrequency());
2854
else
2855
{
2856
if (guard->isHighProbablityProfiledGuard())
2857
block2->setFrequency(MAX_COLD_BLOCK_COUNT+1);
2858
else
2859
block2->setFrequency(TR::Block::getScaledSpecializedFrequency(block->getFrequency()));
2860
}
2861
}
2862
else
2863
{
2864
block2->setFrequency(VERSIONED_COLD_BLOCK_COUNT);
2865
block2->setIsCold();
2866
}
2867
}
2868
else
2869
{
2870
callNodeTreeTop->setNode(TR::Node::create(callNode, TR::Goto, 0, branchDestination->getEntry()));
2871
TR_ASSERT((block->getSuccessors().size() == 1), "eliminateTailRecursion, block with call does not have exactly 1 successor");
2872
TR::CFGEdge * existingSuccessorEdge = block->getSuccessors().front();
2873
backEdge = TR::CFGEdge::createEdge(block, branchDestination, trMemory());
2874
callerCFG->addEdge(backEdge);
2875
callerCFG->removeEdge(existingSuccessorEdge);
2876
if (block->getLastRealTreeTop() != callNodeTreeTop)
2877
callerSymbol->removeTree(block->getLastRealTreeTop());
2878
TR_ASSERT(block->getLastRealTreeTop() == callNodeTreeTop, "eliminateTailRecursion call isn't last or second last tree in block");
2879
}
2880
2881
if (comp()->getProfilingMode() == JitProfiling)
2882
{
2883
TR::Node *asyncNode = TR::Node::createWithSymRef(callNode, TR::asynccheck, 0, comp()->getSymRefTab()->findOrCreateAsyncCheckSymbolRef(comp()->getMethodSymbol()));
2884
block->prepend(TR::TreeTop::create(comp(), asyncNode));
2885
}
2886
2887
backEdge->setCreatedByTailRecursionElimination(true);
2888
calleeSymbol->setMayHaveLoops(true);
2889
comp()->decInlineDepth(); // undo what we artificially did before
2890
return true;
2891
}
2892
2893
void
2894
TR_MultipleCallTargetInliner::assignArgumentsToParameters(TR::ResolvedMethodSymbol * calleeSymbol, TR::TreeTop * prevTreeTop, TR::Node * callNode)
2895
{
2896
int32_t i = callNode->getFirstArgumentIndex();
2897
ListIterator<TR::ParameterSymbol> parms(&calleeSymbol->getParameterList());
2898
for (TR::ParameterSymbol * p = parms.getFirst(); p; ++i, p = parms.getNext())
2899
{
2900
TR::SymbolReference * sr = comp()->getSymRefTab()->findOrCreateAutoSymbol(calleeSymbol, p->getSlot(), p->getDataType(), true);
2901
TR::Node * arg = callNode->getChild(i);
2902
if (arg->getReferenceCount() != 1 || !arg->getOpCode().hasSymbolReference() || arg->getSymbolReference() != sr)
2903
{
2904
arg->decReferenceCount(); // logically remove it from the call noe
2905
2906
// Consider,
2907
// void foo(int a, int b) { .... return foo(b, a); }
2908
// We're going to create 'a = b; b = a;' which will assign the modified value of 'a' to 'b'.
2909
// To get the original value of 'a' assigned to 'b' we
2910
// create a treetop before the assignments so that 'a' is evaluated before it is modified.
2911
//
2912
prevTreeTop = TR::TreeTop::create(comp(), prevTreeTop, TR::Node::create(TR::treetop, 1, arg));
2913
2914
TR::Node *storeNode = TR::Node::createStore(sr, arg);
2915
2916
TR::TreeTop::create(comp(), prevTreeTop, storeNode);
2917
TR::Node * newArg = TR::Node::createLoad(arg, sr);
2918
2919
if (arg->getType().isBCD())
2920
{
2921
storeNode->setDecimalPrecision(arg->getDecimalPrecision());
2922
newArg->setDecimalPrecision(arg->getDecimalPrecision());
2923
}
2924
2925
if (i == 1 && i == callNode->getFirstArgumentIndex() && callNode->getChild(0)->getChild(0) == arg)
2926
{
2927
arg->decReferenceCount();
2928
callNode->getChild(0)->setAndIncChild(0, newArg);
2929
}
2930
callNode->setAndIncChild(i, newArg);
2931
}
2932
}
2933
}
2934
2935
TR_MultipleCallTargetInliner::TR_MultipleCallTargetInliner(TR::Optimizer *optimizer, TR::Optimization *optimization)
2936
: TR_InlinerBase(optimizer, optimization)
2937
{
2938
2939
}
2940
2941
2942
void
2943
TR_MultipleCallTargetInliner::walkCallSite(
2944
TR::ResolvedMethodSymbol * calleeSymbol, TR_CallStack * callStack,
2945
TR::TreeTop * callNodeTreeTop, TR::Node * parent, TR::Node * callNode, TR_VirtualGuardSelection *guard,
2946
TR_OpaqueClassBlock * thisClass, bool inlineNonRecursively, int32_t walkDepth)
2947
{
2948
TR::ResolvedMethodSymbol * callerSymbol = callStack->_methodSymbol;
2949
2950
int32_t bytecodeSize = getPolicy()->getInitialBytecodeSize(calleeSymbol->getResolvedMethod(), calleeSymbol, comp());
2951
2952
///comp()->getFlowGraph()->setMaxFrequency(-1);
2953
///comp()->getFlowGraph()->setMaxEdgeFrequency(-1);
2954
2955
TR_J9InnerPreexistenceInfo innerPrexInfo(comp(), calleeSymbol, callStack, callNodeTreeTop, callNode, guard->_kind);
2956
2957
bool genILSucceeded = false;
2958
vcount_t visitCount = comp()->getVisitCount();
2959
if (!calleeSymbol->getFirstTreeTop())
2960
{
2961
//if (comp()->trace(inlining))
2962
dumpOptDetails(comp(), "O^O INLINER: Peeking into the IL from walkCallSites as part of the inlining heuristic for [%p]\n", calleeSymbol);
2963
2964
//comp()->setVisitCount(1);
2965
genILSucceeded = (NULL != calleeSymbol->getResolvedMethod()->genMethodILForPeekingEvenUnderMethodRedefinition(calleeSymbol, comp()));
2966
//comp()->setVisitCount(visitCount);
2967
}
2968
2969
dumpOptDetails(comp(), " -- %s\n", genILSucceeded? "succeeded" : "failed");
2970
2971
///if (!inlineNonRecursively && calleeSymbol->mayHaveInlineableCall())
2972
2973
if (!inlineNonRecursively && genILSucceeded && calleeSymbol->mayHaveInlineableCall())
2974
{
2975
walkCallSites(calleeSymbol, callStack, &innerPrexInfo, walkDepth+1);
2976
}
2977
//calleeSymbol->setFirstTreeTop(NULL); // We can reuse the peeked trees. If we're doing real ILGen, the trees will be re-created anyway.
2978
}
2979
2980
void
2981
TR_MultipleCallTargetInliner::walkCallSites(TR::ResolvedMethodSymbol * callerSymbol, TR_CallStack * prevCallStack, TR_InnerPreexistenceInfo *innerPrexInfo, int32_t walkDepth)
2982
{
2983
heuristicTrace(tracer(),"**WalkCallSites: depth %d\n",walkDepth);
2984
if (walkDepth > MAX_ECS_RECURSION_DEPTH / 4 )
2985
return;
2986
2987
TR_InlinerDelimiter delimiter(tracer(),"walkCallSites");
2988
2989
TR_CallStack callStack(comp(), callerSymbol, callerSymbol->getResolvedMethod(), prevCallStack, 0);
2990
2991
if (innerPrexInfo)
2992
callStack._innerPrexInfo = innerPrexInfo;
2993
2994
if (prevCallStack == 0)
2995
callStack.initializeControlFlowInfo(callerSymbol);
2996
2997
bool currentBlockHasExceptionSuccessors = false;
2998
bool prevDisableTailRecursion = _disableTailRecursion;
2999
bool prevDisableInnerPrex = _disableInnerPrex;
3000
_disableTailRecursion = false;
3001
_disableInnerPrex = false;
3002
3003
bool isCold = false;
3004
for (TR::TreeTop * tt = callerSymbol->getFirstTreeTop(); tt && (walkDepth==0); tt = tt->getNextTreeTop())
3005
{
3006
TR::Node * parent = tt->getNode();
3007
3008
if (parent->getOpCodeValue() == TR::BBStart)
3009
{
3010
isCold = false;
3011
TR::Block *block = parent->getBlock();
3012
3013
if (prevCallStack == 0 && !block->isExtensionOfPreviousBlock())
3014
callStack.makeBasicBlockTempsAvailable(_availableBasicBlockTemps);
3015
3016
// dont inline into cold blocks
3017
//
3018
if (block->isCold() ||
3019
!block->getExceptionPredecessors().empty())
3020
{
3021
isCold = true;
3022
}
3023
3024
currentBlockHasExceptionSuccessors = !block->getExceptionSuccessors().empty();
3025
3026
if (prevCallStack == 0)
3027
callStack.updateState(block);
3028
_isInLoop = callStack._inALoop;
3029
}
3030
else if (parent->getNumChildren())
3031
{
3032
TR::Node * node = parent->getChild(0);
3033
if (node->getOpCode().isCall() && node->getVisitCount() != _visitCount)
3034
{
3035
TR::Symbol *sym = node->getSymbol();
3036
if (!isCold)
3037
{
3038
3039
///TR::ResolvedMethodSymbol * calleeSymbol = isInlineable(&callStack, node, guard, thisClass,tt);
3040
TR::SymbolReference *symRef = node->getSymbolReference();
3041
TR::MethodSymbol *calleeSymbol = symRef->getSymbol()->castToMethodSymbol();
3042
3043
//TR_CallSite *callsite = new (trStackMemory()) TR_CallSite (symRef->getOwningMethod(comp()), tt, parent, node, calleeSymbol->getMethod(), 0, (int32_t)symRef->getOffset(), symRef->getCPIndex(), 0, calleeSymbol->getResolvedMethodSymbol(), node->getOpCode().isCallIndirect(), calleeSymbol->isInterface(), node->getByteCodeInfo(), comp());
3044
3045
3046
TR_CallSite *callsite = TR_CallSite::create(tt, parent, node,
3047
0, symRef, (TR_ResolvedMethod*) 0,
3048
comp(), trMemory() , stackAlloc);
3049
3050
3051
debugTrace(tracer(),"**WalkCallSites: Analysing Call at call node %p . Creating callsite %p to encapsulate call.",node,callsite);
3052
getSymbolAndFindInlineTargets(&callStack, callsite);
3053
3054
heuristicTrace(tracer(),"**WalkCallSites:Searching for Targets returned %d targets for call at node %p. ",callsite->numTargets(),node);
3055
3056
///if (calleeSymbol)
3057
if (callsite->numTargets())
3058
{
3059
bool flag=false;
3060
for(int32_t i=0 ; i < callsite->numTargets() && flag==false; i++)
3061
{
3062
3063
// TR::ResolvedMethodSymbol *calleeResolvedSymbol = calleeSymbol->getResolvedMethodSymbol();
3064
// if (!calleeResolvedSymbol)
3065
// continue;
3066
3067
bool walkCall = false;
3068
3069
if (! (
3070
callsite->getTarget(i)->_calleeSymbol->isVMInternalNative() ||
3071
callsite->getTarget(i)->_calleeSymbol->isHelper() ||
3072
callsite->getTarget(i)->_calleeSymbol->isNative() ||
3073
callsite->getTarget(i)->_calleeSymbol->isSystemLinkageDispatch() ||
3074
callsite->getTarget(i)->_calleeSymbol->isJITInternalNative() ||
3075
callsite->getTarget(i)->_calleeSymbol->getResolvedMethod()->isAbstract()
3076
))
3077
{
3078
if (TR::Compiler->mtd.isCompiledMethod(callsite->getTarget(i)->_calleeSymbol->getResolvedMethod()->getPersistentIdentifier()))
3079
{
3080
TR_PersistentJittedBodyInfo * bodyInfo = ((TR_ResolvedJ9Method*) callsite->getTarget(i)->_calleeSymbol->getResolvedMethodSymbol()->getResolvedMethod())->getExistingJittedBodyInfo();
3081
if (bodyInfo &&
3082
bodyInfo->getHotness() < warm &&
3083
!bodyInfo->getIsProfilingBody())
3084
walkCall = true;
3085
}
3086
else
3087
walkCall = true;
3088
}
3089
if (symRef->getOwningMethodSymbol(comp()) != callerSymbol)
3090
{
3091
walkCall = false;
3092
}
3093
3094
if (walkCall)
3095
{
3096
// TR_ResolvedMethod * calleeResolvedMethod = calleeResolvedSymbol->getResolvedMethod();
3097
TR_CallStack * cs = callStack.isCurrentlyOnTheStack(callsite->getTarget(i)->_calleeMethod, 1);
3098
TR_PersistentMethodInfo * methodInfo = TR_PersistentMethodInfo::get(callsite->getTarget(i)->_calleeMethod); //calleeResolvedMethod);
3099
bool alreadyVisited = false;
3100
3101
if (methodInfo && methodInfo->wasScannedForInlining())
3102
{
3103
//printf("Already visited\n");
3104
debugTrace(tracer(),"Walk call sites for scanning: methodInfo %p already visited\n", methodInfo);
3105
3106
alreadyVisited = true;
3107
}
3108
3109
if (!(!alreadyVisited &&
3110
cs &&
3111
callsite->getTarget(i)->_calleeSymbol == callsite->_callNode->getSymbol() &&
3112
eliminateTailRecursion(cs->_methodSymbol, &callStack, callsite->_callNodeTreeTop, callsite->_parent, callsite->_callNode, callsite->getTarget(i)->_guard)))
3113
{
3114
// walkCallSite(calleeResolvedSymbol, &callStack, tt, parent, node, guard, thisClass, false, walkDepth);
3115
walkCallSite(callsite->getTarget(i)->_calleeSymbol, &callStack,callsite->_callNodeTreeTop,callsite->_parent,callsite->_callNode,callsite->getTarget(i)->_guard,callsite->getTarget(i)->_receiverClass,false,walkDepth);
3116
debugTrace(tracer(),"Walk call sites for scanning: at call site: %s\n", tracer()->traceSignature(callsite->getTarget(i)->_calleeSymbol));
3117
3118
// TR::SymbolReference * symRef = node->getSymbolReference();
3119
// TR_CallSite *callsite = new (trStackMemory()) TR_CallSite (symRef->getOwningMethod(comp()),
3120
// tt,
3121
// parent,
3122
// node,
3123
// calleeResolvedSymbol->getMethod(),
3124
// 0,
3125
// (int32_t)symRef->getOffset(),
3126
// symRef->getCPIndex(),
3127
// 0,
3128
// calleeResolvedSymbol->getResolvedMethodSymbol(),
3129
// node->getOpCode().isCallIndirect(),
3130
// calleeResolvedSymbol->isInterface(),
3131
// node->getByteCodeInfo(),
3132
// comp());
3133
3134
weighCallSite(&callStack, callsite, currentBlockHasExceptionSuccessors, true);
3135
3136
if(tracer()->debugLevel())
3137
{
3138
tracer()->dumpCallSite(callsite, "Dumping Call Site after Weighing");
3139
}
3140
3141
if (methodInfo)
3142
{
3143
methodInfo->setWasScannedForInlining(true);
3144
debugTrace(tracer(),"Walk call sites for scanning: set scaneed for methodInfo %p\n", methodInfo);
3145
}
3146
//printf("Walk %s, method info %p\n", calleeResolvedSymbol->signature(trMemory()), methodInfo);
3147
}
3148
}
3149
}
3150
} //end for loop over call targets
3151
}
3152
node->setVisitCount(_visitCount);
3153
}
3154
}
3155
}
3156
3157
_disableTailRecursion = prevDisableTailRecursion;
3158
_disableInnerPrex = prevDisableInnerPrex;
3159
}
3160
3161
bool TR_MultipleCallTargetInliner::inlineCallTargets(TR::ResolvedMethodSymbol *callerSymbol, TR_CallStack *prevCallStack, TR_InnerPreexistenceInfo *innerPrexInfo)
3162
{
3163
TR_InlinerDelimiter delimiter(tracer(),"TR_MultipleCallTargetInliner::inlineCallTargets");
3164
3165
TR_CallStack callStack(comp(), callerSymbol, callerSymbol->getResolvedMethod(), prevCallStack, 0, true);
3166
3167
if (innerPrexInfo)
3168
callStack._innerPrexInfo = innerPrexInfo;
3169
3170
if (prevCallStack == 0)
3171
callStack.initializeControlFlowInfo(callerSymbol);
3172
3173
bool anySuccess = false;
3174
bool anySuccess2 = false;
3175
3176
bool currentBlockHasExceptionSuccessors = false;
3177
3178
bool prevDisableTailRecursion = _disableTailRecursion;
3179
bool prevDisableInnerPrex = _disableInnerPrex;
3180
bool prevInliningAsWeWalk = _inliningAsWeWalk;
3181
3182
_disableTailRecursion = false;
3183
_disableInnerPrex = false;
3184
bool isCold = false;
3185
3186
{
3187
TR_InlinerDelimiter delimiter(tracer(),"collectTargets");
3188
3189
int32_t thisCallSite = callerSymbol->getFirstTreeTop()->getNode()->getInlinedSiteIndex();
3190
3191
TR::TreeTop *nextTree = NULL;
3192
for (TR::TreeTop * tt = callerSymbol->getFirstTreeTop(); tt; tt = nextTree)
3193
{
3194
// Inlining can add code downstream of our traversal. We need to skip that code.
3195
//
3196
nextTree = tt->getNextTreeTop();
3197
3198
TR::Node * parent = tt->getNode();
3199
3200
if (prevCallStack)
3201
_inliningAsWeWalk = true;
3202
3203
if (parent->getOpCodeValue() == TR::BBStart)
3204
{
3205
isCold = false;
3206
TR::Block *block = parent->getBlock();
3207
3208
if (prevCallStack == 0 && !block->isExtensionOfPreviousBlock())
3209
callStack.makeBasicBlockTempsAvailable(_availableBasicBlockTemps);
3210
3211
// dont inline into cold blocks
3212
if (block->isCold() || !block->getExceptionPredecessors().empty())
3213
{
3214
isCold = true;
3215
}
3216
3217
// FIXME: the following assumes that catch blocks are at the end of the method
3218
// which may not generally be true. Correct fix is to do either dom-pdom or
3219
// structural analysis before this opt, and mark the cold-paths, and skipping
3220
// cold blocks would automagically do the trick
3221
//
3222
//if (!block->getExceptionPredecessors().empty())
3223
// break; // dont inline into catch blocks
3224
3225
currentBlockHasExceptionSuccessors = !block->getExceptionSuccessors().empty();
3226
3227
if (prevCallStack == 0)
3228
callStack.updateState(block);
3229
}
3230
else if (parent->getNumChildren())
3231
{
3232
TR::Node * node = parent->getChild(0);
3233
if (node->getOpCode().isFunctionCall() && node->getVisitCount() != _visitCount)
3234
{
3235
TR_CallStack::SetCurrentCallNode sccn(callStack, node);
3236
3237
TR::Symbol *sym = node->getSymbol();
3238
if (!isCold && !node->isTheVirtualCallNodeForAGuardedInlinedCall())
3239
{
3240
TR::SymbolReference * symRef = node->getSymbolReference();
3241
TR::MethodSymbol * calleeSymbol = symRef->getSymbol()->castToMethodSymbol();
3242
3243
TR_CallSite *callsite = TR_CallSite::create(tt, parent, node,
3244
0, symRef, (TR_ResolvedMethod*) 0,
3245
comp(), trMemory() , stackAlloc);
3246
3247
if (prevCallStack==0)
3248
{
3249
heuristicTrace(tracer(),"\n");
3250
heuristicTrace(tracer(),"^^^ Top Level: Analysing Call at call node %p . Creating callsite %p to encapsulate call.",node,callsite);
3251
}
3252
3253
getSymbolAndFindInlineTargets(&callStack, callsite);
3254
3255
if (!prevCallStack && callsite->numTargets() > 0)
3256
{
3257
// buildPrexArgInfo and propagateArgs use the caller symbol to look up the invoke bytecode.
3258
// In a pass of inliner after the first (which happens in JSR292), the invoke bytecode won't
3259
// be in the top-level method's bytecode, and the caller can vary with the call node.
3260
TR::ResolvedMethodSymbol *thisCallSiteCallerSymbol = node->getSymbolReference()->getOwningMethodSymbol(comp());
3261
TR_PrexArgInfo* compArgInfo = TR_PrexArgInfo::buildPrexArgInfoForMethodSymbol(thisCallSiteCallerSymbol, tracer());
3262
3263
if (tracer()->heuristicLevel())
3264
{
3265
alwaysTrace(tracer(), "compArgInfo :");
3266
compArgInfo->dumpTrace();
3267
}
3268
compArgInfo->clearArgInfoForNonInvariantArguments(thisCallSiteCallerSymbol, tracer());
3269
TR_PrexArgInfo::propagateArgsFromCaller(thisCallSiteCallerSymbol, callsite, compArgInfo, tracer());
3270
if (tracer()->heuristicLevel())
3271
{
3272
alwaysTrace(tracer(), "callsite->getTarget(0)->_ecsPrexArgInfo :");
3273
callsite->getTarget(0)->_ecsPrexArgInfo->dumpTrace();
3274
}
3275
}
3276
3277
heuristicTrace(tracer(),"Searching for Targets returned %d targets for call at node %p. ",callsite->numTargets(),node);
3278
3279
if (callsite->numTargets())
3280
{
3281
bool flag = false;
3282
for (int32_t i = 0; i < callsite->numTargets() && flag == false; i++)
3283
{
3284
TR_CallStack *cs = callStack.isCurrentlyOnTheStack(callsite->getTarget(i)->_calleeMethod,1);
3285
if (cs && callsite->getTarget(i)->_calleeSymbol == node->getSymbol() &&
3286
eliminateTailRecursion( cs->_methodSymbol, &callStack, callsite->_callNodeTreeTop, callsite->_parent,callsite->_callNode,callsite->getTarget(i)->_guard) )
3287
{
3288
anySuccess2 = true;
3289
flag = true;
3290
}
3291
}
3292
3293
if (!flag)
3294
{
3295
if (prevCallStack == 0)//we only weigh base level calls.. All other calls we proceed right to inlining
3296
{
3297
weighCallSite(&callStack, callsite, currentBlockHasExceptionSuccessors);
3298
3299
if (tracer()->debugLevel())
3300
{
3301
tracer()->dumpCallSite(callsite, "Dumping Call Site after Weighing");
3302
}
3303
}
3304
else
3305
{
3306
// with !comp()->getOption(TR_DisableNewInliningInfrastructure)
3307
// prevCallStack == 0 will always be true?
3308
3309
for (int32_t i=0; i<callsite->numTargets(); i++)
3310
{
3311
heuristicTrace(tracer(),"call depth > 0 . Inlining call at node %p",node);
3312
anySuccess2 |= inlineCallTarget(&callStack, callsite->getTarget(i), true);
3313
}
3314
}
3315
}
3316
else
3317
{
3318
// when flag is true do nothing
3319
}
3320
}
3321
else
3322
{
3323
heuristicTrace(tracer(),"Found No Inlineable targets for call at node %p\n",node);
3324
debugTrace(tracer(),"Adding callsite %p to list of deadCallSites",callsite);
3325
_deadCallSites.add(callsite);
3326
}
3327
}
3328
else
3329
{
3330
TR::SymbolReference * symRef = node->getSymbolReference();
3331
TR::MethodSymbol * calleeSymbol = symRef->getSymbol()->castToMethodSymbol();
3332
heuristicTrace(tracer(),"Block containing call node %p %s. Skipping call.", node, node->isTheVirtualCallNodeForAGuardedInlinedCall() ? "is on the cold side of a guard" : "is cold");
3333
tracer()->insertCounter(Cold_Block,tt);
3334
}
3335
3336
node->setVisitCount(_visitCount);
3337
}
3338
else if (node->getOpCode().isCall())
3339
{
3340
debugTrace(tracer(),"Failing for an unknown reason. TreeTop = %p, node = %p nodevisitCount = %d _visitCount = %d getInlinedSiteIndex() = %d thisCallSite = %d. ",
3341
tt, node, node->getVisitCount(), _visitCount,node->getInlinedSiteIndex(),thisCallSite);
3342
}
3343
}
3344
}
3345
}
3346
3347
for (TR_CallTarget *target = _callTargets.getFirst(); target; target = target->getNext())
3348
target->_prexArgInfo = getUtil()->computePrexInfo(target);
3349
3350
if (prevCallStack == 0)
3351
{
3352
TR_InlinerDelimiter delimiter(tracer(),"refineCallgraph");
3353
3354
int32_t size = getPolicy()->getInitialBytecodeSize(callerSymbol, comp());
3355
int32_t limit = _callerWeightLimit;
3356
int32_t totalWeight=0;
3357
3358
TR_CallTarget * calltarget = NULL;
3359
3360
if (comp()->getOption(TR_TraceAll))
3361
{
3362
traceMsg(comp(), "\n\n~~~ Call site weights for %s\n", comp()->signature());
3363
traceMsg(comp(), "original size: %d\n", size);
3364
traceMsg(comp(), "Inlining weight limit: %d\n", limit);
3365
totalWeight = 0;
3366
for (calltarget = _callTargets.getFirst(); calltarget; calltarget = calltarget->getNext())
3367
{
3368
totalWeight += calltarget->_weight;
3369
traceMsg(comp(), "Calltarget %p callnode %p %s\n", calltarget, &calltarget->_myCallSite->_callNode, tracer()->traceSignature(calltarget->_calleeSymbol));
3370
traceMsg(comp(), "Site size: %d site weight %d call-graph adjusted weight %lf, total weight %d\n", calltarget->_size, calltarget->_weight, calltarget->_callGraphAdjustedWeight, totalWeight);
3371
}
3372
}
3373
3374
static const char * p = feGetEnv("TR_TrivialWeightForLimit");
3375
int32_t trivialWeightForLimit = 30;
3376
3377
if (p)
3378
{
3379
trivialWeightForLimit = atoi(p);
3380
printf("Using trivial weight limit of %d\n", trivialWeightForLimit);
3381
}
3382
3383
TR_CallTarget* callTargetToChop = NULL;
3384
{
3385
bool doneInlining = false;
3386
int32_t totalWeight = 0;
3387
TR_CallTarget * prev = 0;
3388
for (calltarget = _callTargets.getFirst(); calltarget; prev = calltarget, calltarget = calltarget->getNext())
3389
{
3390
totalWeight += calltarget->_weight;
3391
if (doneInlining)
3392
tracer()->insertCounter(Exceeded_Caller_Budget,calltarget->_myCallSite->_callNodeTreeTop);
3393
else if (totalWeight > limit && calltarget->_weight > trivialWeightForLimit)
3394
{
3395
callTargetToChop = calltarget;
3396
doneInlining = true;
3397
}
3398
}
3399
}
3400
3401
TR_CallTarget * prev = 0;
3402
int32_t estimatedNumberOfNodes = getCurrentNumberOfNodes();
3403
debugTrace(tracer(), "Initially, estimatedNumberOfNodes = %d\n", estimatedNumberOfNodes);
3404
for (calltarget = _callTargets.getFirst(); calltarget != callTargetToChop; prev = calltarget, calltarget = calltarget->getNext())
3405
{
3406
generateNodeEstimate myEstimate;
3407
recursivelyWalkCallTargetAndPerformAction(calltarget, myEstimate);
3408
estimatedNumberOfNodes += myEstimate.getNodeEstimate();
3409
3410
debugTrace(tracer(),"Estimated Number of Nodes is %d after calltarget %p",estimatedNumberOfNodes,calltarget);
3411
3412
float factor = 1.1F; // this factor was chosen based on a study of a large WAS app that showed that getMaxBytecodeindex was 92% accurate compared to nodes generated
3413
3414
if ((uint32_t)(estimatedNumberOfNodes*factor) > _nodeCountThreshold)
3415
{
3416
callTargetToChop = calltarget;
3417
debugTrace(tracer(),"estimate nodes exceeds _nodeCountThreshold, chopped off targets staring from %p, lastTargetToInline %p\n", callTargetToChop, prev);
3418
break;
3419
}
3420
}
3421
3422
processChoppedOffCallTargets(prev, callTargetToChop, estimatedNumberOfNodes);
3423
if (comp()->getOption(TR_TraceAll) || tracer()->heuristicLevel())
3424
{
3425
tracer()->dumpCallGraphs(&_callTargets);
3426
tracer()->dumpDeadCalls(&_deadCallSites);
3427
}
3428
}
3429
3430
if (prevCallStack == 0)
3431
{
3432
for (TR_CallTarget* calltarget = _callTargets.getFirst(); calltarget; calltarget = calltarget->getNext())
3433
{
3434
debugTrace(tracer(), "marking calltarget %p of %p as MT_Marked", calltarget, calltarget->_myCallSite);
3435
calltarget->_failureReason = MT_Marked;
3436
}
3437
3438
for (TR_CallTarget* calltarget = _callTargets.getFirst(); calltarget; calltarget = calltarget->getNext())
3439
{
3440
TR_CallSite* clSite = calltarget->_myCallSite;
3441
for (int i = 0 ; i < clSite->numTargets(); i++)
3442
{
3443
if (clSite->getTarget(i)->_failureReason != MT_Marked)
3444
{
3445
debugTrace(tracer(), "removing calltarget %p of %p as it isn't in _callTargets", clSite->getTarget(i), calltarget->_myCallSite);
3446
clSite->removecalltarget(i, tracer(), Not_Sane);
3447
i--;
3448
}
3449
}
3450
}
3451
}
3452
3453
if (prevCallStack == 0)
3454
{
3455
TR_InlinerDelimiter delimiter(tracer(),"inlineTransformation");
3456
3457
TR_CallTarget * calltarget = NULL;
3458
3459
heuristicTrace(tracer(),"Starting Transformation Phase\n");
3460
//static int si, sj;
3461
//printf("graph exceeded %d times out of %d", (calltarget ? ++si : si) , ++sj);
3462
3463
// We must inline in tree order because of the way temp sharing is done.
3464
// there are two types of temporaries inliner generates - availablebasicblocktemps, which are used when inlining breaks a block, and commoning must be broken and
3465
// availableTemps, which are generally used for when a parameter needs a temporary created for it.
3466
// all the methods that deal with temps (parametertoargumentmapper, handleinjectedbasicblock,transforminlinedfunction) will consult these lists
3467
// usually, it will search a list, and if it doesn't find a temp, search the second list.
3468
// the problem is when inlining out of order and with the fact that both temp lists can be consulted, it is possible that a temp will get misused.
3469
// an example will be a call lower down was inlined first and created a temp t1, for a parameter (the block doesn't get split). It gets added to availableTemps after inlining.
3470
// After, higher up (in the same block) another call now gets inlined, and splits the block. handleinjectedbasicblock now goes and breaks commoning around this higher up call.
3471
// when this happens, it can grab the temp t1 from the availableTemps list and reuse it for breaking commoning. Now there are two stores to t1 in the same block. If there was any
3472
// commoning that existed after the second store to t1 that was supposed to get broken, it will now load a bad value of t1.
3473
3474
3475
for (TR::TreeTop * tt = callerSymbol->getFirstTreeTop(); tt; tt = tt->getNextTreeTop())
3476
{
3477
TR::Node * parent = tt->getNode();
3478
if (tt->getNode()->getNumChildren() && tt->getNode()->getChild(0)->getOpCode().isCall())
3479
{
3480
debugTrace(tracer()," (Second Iteration) Found a call at tt %p node %p",tt,tt->getNode());
3481
TR_CallStack::SetCurrentCallNode sccn(callStack, tt->getNode()->getChild(0));
3482
for (calltarget = _callTargets.getFirst(); calltarget; calltarget = calltarget->getNext())
3483
{
3484
if(tracer()->debugLevel())
3485
debugTrace(tracer()," (Second Iteration) Considering call target %p ByteCodeIndex = %d calltarget->_myCallSite->_callNodeTreeTop = %p"
3486
" alreadyInlined = %d signature = %s",
3487
calltarget,calltarget->_myCallSite->_callNode->getByteCodeIndex(),calltarget->_myCallSite->_callNodeTreeTop,
3488
calltarget->_alreadyInlined,tracer()->traceSignature(calltarget->_calleeSymbol));
3489
3490
if (calltarget->_myCallSite->_callNodeTreeTop == tt && !calltarget->_alreadyInlined)
3491
{
3492
TR::TreeTop* oldTt = tt;
3493
bool success = inlineCallTarget(&callStack, calltarget, true, NULL, &tt);
3494
anySuccess |= success;
3495
debugTrace(tracer(), "(Second Iteration) call target %p node %p. success = %d anySuccess = %d",calltarget, oldTt->getNode(),success,anySuccess);
3496
}
3497
}
3498
}
3499
if (parent->getOpCodeValue() == TR::BBStart &&
3500
!parent->getBlock()->isExtensionOfPreviousBlock())
3501
callStack.makeBasicBlockTempsAvailable(_availableBasicBlockTemps);
3502
}
3503
}
3504
3505
_disableTailRecursion = prevDisableTailRecursion;
3506
_disableInnerPrex = prevDisableInnerPrex;
3507
_inliningAsWeWalk = prevInliningAsWeWalk;
3508
3509
callStack.commit();
3510
return anySuccess;
3511
}
3512
3513
void TR_MultipleCallTargetInliner::weighCallSite( TR_CallStack * callStack , TR_CallSite *callsite, bool currentBlockHasExceptionSuccessors, bool dontAddCalls)
3514
{
3515
TR_J9InlinerPolicy *j9inlinerPolicy = (TR_J9InlinerPolicy *) getPolicy();
3516
TR_InlinerDelimiter delimiter(tracer(), "weighCallSite");
3517
3518
for (int32_t k = 0; k < callsite->numTargets(); k++)
3519
{
3520
uint32_t size = 0;
3521
3522
TR_EstimateCodeSize::raiiWrapper ecsWrapper(this, tracer(), _maxRecursiveCallByteCodeSizeEstimate);
3523
TR_EstimateCodeSize *ecs = ecsWrapper.getCodeEstimator();
3524
3525
bool possiblyVeryHotLargeCallee = false;
3526
bool wouldBenefitFromInlining = false;
3527
3528
TR_CallTarget *calltarget = callsite->getTarget(k);
3529
3530
//for partial inlining:
3531
calltarget->_originatingBlock = callsite->_callNodeTreeTop->getEnclosingBlock();
3532
3533
3534
heuristicTrace(tracer(),"222 Weighing Call Target %p (node = %p)",calltarget,callsite->_callNode);
3535
3536
if (calltarget->_calleeSymbol && calltarget->_calleeSymbol->getResolvedMethod() &&
3537
comp()->isGeneratedReflectionMethod(calltarget->_calleeSymbol->getResolvedMethod()) &&
3538
!comp()->isGeneratedReflectionMethod(comp()->getCurrentMethod()))
3539
return;
3540
3541
if (calltarget->_calleeSymbol->getRecognizedMethod() == TR::java_lang_Class_newInstance ||
3542
calltarget->_calleeSymbol->getRecognizedMethod() == TR::java_util_Arrays_fill ||
3543
calltarget->_calleeSymbol->getRecognizedMethod() == TR::java_util_Arrays_equals ||
3544
calltarget->_calleeSymbol->getRecognizedMethod() == TR::java_lang_String_equals ||
3545
calltarget->_calleeSymbol->getRecognizedMethod() == TR::sun_io_ByteToCharSingleByte_convert ||
3546
calltarget->_calleeSymbol->getRecognizedMethod() == TR::sun_io_ByteToCharDBCS_EBCDIC_convert ||
3547
calltarget->_calleeSymbol->getRecognizedMethod() == TR::sun_io_CharToByteSingleByte_convert ||
3548
calltarget->_calleeSymbol->getRecognizedMethod() == TR::sun_io_ByteToCharSingleByte_JITintrinsicConvert ||
3549
calltarget->_calleeSymbol->getRecognizedMethod() == TR::java_math_BigDecimal_subMulAddAddMulSetScale)
3550
{
3551
//This resetting of visit count is safe to do because all nodes and blocks in Estimate Code Size die once ecs returns
3552
vcount_t origVisitCount = comp()->getVisitCount();
3553
3554
ecs->calculateCodeSize(calltarget, callStack);
3555
//This resetting of visit count is safe to do because all nodes and blocks in Estimate Code Size die once ecs returns
3556
comp()->setVisitCount(origVisitCount);
3557
3558
if(calltarget->_isPartialInliningCandidate && calltarget->_partialInline)
3559
calltarget->_partialInline->setCallNodeTreeTop(callsite->_callNodeTreeTop);
3560
heuristicTrace(tracer(),"Setting size to 10 for recognized call target at node %p",calltarget->_myCallSite->_callNode);
3561
size = 10;
3562
}
3563
else
3564
{
3565
if (currentBlockHasExceptionSuccessors && ecs->aggressivelyInlineThrows())
3566
{
3567
_maxRecursiveCallByteCodeSizeEstimate <<= 3;
3568
heuristicTrace(tracer(),"Setting _maxRecursiveCallByteCodeSizeEstimate to %d because current block has exception successors and want to aggressively inline throws 1.",_maxRecursiveCallByteCodeSizeEstimate);
3569
}
3570
if (ecs->aggressivelyInlineThrows())
3571
_EDODisableInlinedProfilingInfo = true;
3572
3573
//This resetting of visit count is safe to do because all nodes and blocks in Estimate Code Size die once ecs returns
3574
vcount_t origVisitCount = comp()->getVisitCount();
3575
3576
bool inlineit = ecs->calculateCodeSize(calltarget, callStack);
3577
//This resetting of visit count is safe to do because all nodes and blocks in Estimate Code Size die once ecs returns
3578
comp()->setVisitCount(origVisitCount);
3579
3580
3581
debugTrace(tracer()," Original ecs size = %d, _maxRecursiveCallByteCodeSizeEstimate = %d ecs _realSize = %d optimisticSize = %d inlineit = %d error = %s ecs.sizeThreshold = %d",
3582
size,_maxRecursiveCallByteCodeSizeEstimate,ecs->getSize(),ecs->getOptimisticSize(),inlineit,ecs->getError(),ecs->getSizeThreshold());
3583
3584
size = ecs->getSize();
3585
3586
if (!inlineit && !callMustBeInlinedRegardlessOfSize(callsite))
3587
{
3588
if (isWarm(comp()))
3589
{
3590
if (comp()->isServerInlining())
3591
{
3592
if (callsite->_callNode->getInlinedSiteIndex() < 0) //Ensures setWarmCallGraphTooBig is only called for methods with inline index -1 (indexes >=0 can happen when inliner is called after inlining has already occured
3593
comp()->getCurrentMethod()->setWarmCallGraphTooBig (callsite->_callNode->getByteCodeInfo().getByteCodeIndex(), comp());
3594
else
3595
heuristicTrace(tracer(),"Not calling setWarmCallGraphTooBig on callNode %p because it is not from method being compiled bc index %d inlinedsiteindex %d",callsite->_callNode,callsite->_callNode->getByteCodeInfo().getByteCodeIndex(),callsite->_callNode->getInlinedSiteIndex());
3596
if (comp()->trace(OMR::inlining))
3597
heuristicTrace(tracer(),"inliner: Marked call as warm callee too big: %d > %d: %s\n", size, ecs->getSizeThreshold(), tracer()->traceSignature(calltarget->_calleeSymbol));
3598
//printf("inliner: Marked call as warm callee too big: %d > %d: %s\n", nonRecursiveSize, sizeThreshold, calleeSymbol->signature(trMemory()));
3599
}
3600
}
3601
tracer()->insertCounter(ECS_Failed,calltarget->_myCallSite->_callNodeTreeTop);
3602
heuristicTrace(tracer(),"Not Adding Call Target %p to list of targets to be inlined");
3603
if (comp()->cg()->traceBCDCodeGen())
3604
{
3605
traceMsg(comp(), "q^q : failing to inline %s into %s (callNode %p on line_no=%d) due to code size\n",
3606
tracer()->traceSignature(calltarget->_calleeSymbol),tracer()->traceSignature(callStack->_methodSymbol),
3607
callsite->_callNode,comp()->getLineNumber(callsite->_callNode));
3608
}
3609
continue;
3610
}
3611
3612
if (calltarget->_isPartialInliningCandidate && calltarget->_partialInline)
3613
calltarget->_partialInline->setCallNodeTreeTop(callsite->_callNodeTreeTop);
3614
3615
heuristicTrace(tracer(),"WeighCallSite: For Target %p node %p signature %s, estimation returned a size of %d",
3616
calltarget,calltarget->_myCallSite->_callNode,tracer()->traceSignature(calltarget),size);
3617
3618
if (currentBlockHasExceptionSuccessors && ecs->aggressivelyInlineThrows())
3619
{
3620
_maxRecursiveCallByteCodeSizeEstimate >>= 3;
3621
size >>= 3;
3622
size = std::max<uint32_t>(1, size);
3623
3624
heuristicTrace(tracer(),"Setting size to %d because current block has exception successors and want to aggressively inline throws 2",size);
3625
}
3626
if (callMustBeInlinedRegardlessOfSize(calltarget->_myCallSite))
3627
{
3628
heuristicTrace(tracer(), "calltarget->_fullSize: %d size: %d", calltarget->_fullSize, size);
3629
size = 0;
3630
heuristicTrace(tracer(), "Setting size to %d because call is dominate hot based on PDF", size);
3631
}
3632
3633
3634
wouldBenefitFromInlining = false;
3635
possiblyVeryHotLargeCallee = false;
3636
if ((((comp()->getMethodHotness() == veryHot) &&
3637
comp()->isProfilingCompilation()) ||
3638
(comp()->getMethodHotness() == scorching)) &&
3639
(size > _maxRecursiveCallByteCodeSizeEstimate/2))
3640
possiblyVeryHotLargeCallee = true;
3641
3642
if (calltarget->_calleeSymbol->isSynchronised())
3643
{
3644
size >>= 1; // could help gvp
3645
heuristicTrace(tracer(),"Setting size to %d because call is Synchronized",size);
3646
if (comp()->getMethodHotness() >= hot)
3647
{
3648
size >>= 1; // could help escape analysis as well
3649
heuristicTrace(tracer(),"Setting size to %d because call is Synchronized and also hot",size);
3650
}
3651
wouldBenefitFromInlining = true;
3652
}
3653
3654
if (strstr(calltarget->_calleeSymbol->signature(trMemory()),"BigDecimal.add("))
3655
{
3656
size >>=2;
3657
heuristicTrace(tracer(),"Setting size to %d because call is BigDecimal.add",size);
3658
}
3659
3660
if (isHot(comp()))
3661
{
3662
TR_ResolvedMethod *m = calltarget->_calleeSymbol->getResolvedMethod();
3663
char *sig = "toString";
3664
if (strncmp(m->nameChars(), sig, strlen(sig)) == 0)
3665
{
3666
size >>= 1;
3667
heuristicTrace(tracer(),"Setting size to %d because call is toString and compile is hot",size);
3668
}
3669
else
3670
{
3671
sig = "multiLeafArrayCopy";
3672
if (strncmp(m->nameChars(), sig, strlen(sig)) == 0)
3673
{
3674
size >>= 1;
3675
heuristicTrace(tracer(),"Setting size to %d because call is multiLeafArrayCopy and compile is hot",size);
3676
}
3677
}
3678
3679
if (calltarget->_calleeSymbol->getRecognizedMethod() == TR::java_math_BigDecimal_valueOf)
3680
{
3681
size >>= 2;
3682
heuristicTrace(tracer(),"Setting size to %d because call is BigDecimal_valueOf and compile is hot",size);
3683
}
3684
}
3685
3686
int32_t frequency1 = 0, frequency2 = 0;
3687
int32_t origSize = size;
3688
bool isCold;
3689
TR::TreeTop *callNodeTreeTop = calltarget->_myCallSite->_callNodeTreeTop;
3690
if (callNodeTreeTop)
3691
{
3692
// HACK: Get frequency from both sources, and use both. You're
3693
// only cold if you're cold according to both.
3694
3695
frequency1 = comp()->convertNonDeterministicInput(comp()->fej9()->getIProfilerCallCount(callsite->_callNode->getByteCodeInfo(), comp()), MAX_BLOCK_COUNT + MAX_COLD_BLOCK_COUNT, randomGenerator(), 0);
3696
TR::Block * block = callNodeTreeTop->getEnclosingBlock();
3697
frequency2 = comp()->convertNonDeterministicInput(block->getFrequency(), MAX_BLOCK_COUNT + MAX_COLD_BLOCK_COUNT, randomGenerator(), 0);
3698
3699
TR::TreeTop *tt = callNodeTreeTop;
3700
while (tt && (frequency2 == -1))
3701
{
3702
while (tt->getNode()->getOpCodeValue() != TR::BBStart) tt = tt->getPrevTreeTop();
3703
3704
TR::Block *block = NULL;
3705
if (tt) block = tt->getNode()->getBlock();
3706
if (block && tt->getNode()->getInlinedSiteIndex()<0)
3707
{
3708
frequency2 = comp()->convertNonDeterministicInput(block->getFrequency(), MAX_BLOCK_COUNT + MAX_COLD_BLOCK_COUNT, randomGenerator(), 0);
3709
}
3710
3711
tt = tt->getPrevTreeTop();
3712
}
3713
3714
if ((frequency1 <= 0) && ((0 <= frequency2) && (frequency2 <= MAX_COLD_BLOCK_COUNT)))
3715
{
3716
isCold = true;
3717
}
3718
// For optServer in hot/scorching I want the old thresholds 1000 0 0 (high degree of inlining)
3719
// For optSever in warm I want the new thresholds 9000 5000 1500
3720
// For noServer I want no change for high frequency but inhibit inlining in cold blocks ==> 10000 5000 1500
3721
if (TR::isJ9() && !comp()->getMethodSymbol()->doJSR292PerfTweaks() && calltarget->_calleeMethod &&
3722
!alwaysWorthInlining(calltarget->_calleeMethod, callsite->_callNode))
3723
{
3724
int32_t maxFrequency = MAX_BLOCK_COUNT + MAX_COLD_BLOCK_COUNT;
3725
int32_t borderFrequency = 9000;
3726
int32_t coldBorderFrequency = 5000;
3727
int32_t veryColdBorderFrequency = 1500;
3728
if (comp()->isServerInlining())
3729
{
3730
if (comp()->getOption(TR_DisableConservativeInlining) ||
3731
(comp()->getOptLevel() >= hot) ||
3732
getJ9InitialBytecodeSize(calltarget->_calleeMethod, 0, comp()) < comp()->getOptions()->getAlwaysWorthInliningThreshold())// use old thresholds
3733
{
3734
borderFrequency = 1000;
3735
coldBorderFrequency = 0;
3736
veryColdBorderFrequency = 0;
3737
}
3738
}
3739
else
3740
{
3741
borderFrequency = 10000;
3742
if (comp()->getOptLevel() >= hot)
3743
{
3744
coldBorderFrequency = 0;
3745
veryColdBorderFrequency = 0;
3746
}
3747
}
3748
3749
// Did the user specify specific values? If so, use those
3750
if (comp()->getOptions()->getInlinerCGBorderFrequency() >= 0)
3751
borderFrequency = comp()->getOptions()->getInlinerCGBorderFrequency();
3752
if (comp()->getOptions()->getInlinerCGColdBorderFrequency() >= 0)
3753
coldBorderFrequency = comp()->getOptions()->getInlinerCGColdBorderFrequency();
3754
if (comp()->getOptions()->getInlinerCGVeryColdBorderFrequency() >= 0)
3755
veryColdBorderFrequency = comp()->getOptions()->getInlinerCGVeryColdBorderFrequency();
3756
3757
if (comp()->trace(OMR::inlining))
3758
heuristicTrace(tracer(),"WeighCallSite: Considering shrinking call %p with frequency %d\n", callsite->_callNode, frequency2);
3759
3760
bool largeCompiledCallee = !comp()->getOption(TR_InlineVeryLargeCompiledMethods) &&
3761
isLargeCompiledMethod(calltarget->_calleeMethod, size, frequency2);
3762
if (largeCompiledCallee)
3763
{
3764
size = size*TR::Options::_inlinerVeryLargeCompiledMethodAdjustFactor;
3765
}
3766
else if (frequency2 > borderFrequency)
3767
{
3768
float factor = (float)(maxFrequency-frequency2)/(float)maxFrequency;
3769
factor = std::max(factor, 0.4f);
3770
3771
float avgMethodSize = (float)size/(float)ecs->getNumOfEstimatedCalls();
3772
float numCallsFactor = (float)(avgMethodSize)/110.0f;
3773
3774
numCallsFactor = std::max(factor, 0.1f);
3775
3776
if (size > 100)
3777
{
3778
size = (int)((float)size * factor * numCallsFactor);
3779
if (size < 100) size = 100;
3780
}
3781
else
3782
{
3783
size = (int)((float)size * factor * numCallsFactor);
3784
}
3785
if (comp()->trace(OMR::inlining))
3786
heuristicTrace(tracer(), "WeighCallSite: Adjusted call-graph size for call node %p, from %d to %d\n", callsite->_callNode, origSize, size);
3787
}
3788
else if ((frequency2 > 0) && (frequency2 < veryColdBorderFrequency)) // luke-warm block
3789
{
3790
float factor = (float)frequency2 / (float)maxFrequency;
3791
//factor = std::max(factor, 0.1f);
3792
size = (int)((float)size / (factor*factor)); // make the size look bigger to inline less
3793
if (comp()->trace(OMR::inlining))
3794
heuristicTrace(tracer(), "WeighCallSite: Adjusted call-graph size for call node %p, from %d to %d\n", callsite->_callNode, origSize, size);
3795
}
3796
else if ((frequency2 >= 0) && (frequency2 < coldBorderFrequency)) // very cold block
3797
{
3798
//to avoid division by zero crash. Semantically freqs of 0 and 1 should be pretty close given maxFrequency of 10K
3799
int adjFrequency2 = frequency2 ? frequency2 : 1;
3800
float factor = (float)adjFrequency2 / (float)maxFrequency;
3801
//factor = std::max(factor, 0.1f);
3802
size = (int)((float)size / factor);
3803
3804
3805
if (comp()->trace(OMR::inlining))
3806
heuristicTrace(tracer(),"WeighCallSite: Adjusted call-graph size for call node %p, from %d to %d\n", callsite->_callNode, origSize, size);
3807
}
3808
else
3809
{
3810
if (comp()->trace(OMR::inlining))
3811
heuristicTrace(tracer(),"WeighCallSite: Not adjusted call-graph size for call node %p, size %d\n", callsite->_callNode, origSize);
3812
}
3813
}
3814
}
3815
3816
bool toInline = getPolicy()->tryToInline(calltarget, callStack, true);
3817
heuristicTrace(tracer(),"WeighCallSite: For Target %p node %p, size after size mangling %d",calltarget,calltarget->_myCallSite->_callNode,size);
3818
3819
if (!toInline && !forceInline(calltarget) && (size > _maxRecursiveCallByteCodeSizeEstimate || ecs->recursedTooDeep() == true))
3820
{
3821
if (isWarm(comp()))
3822
{
3823
if (comp()->isServerInlining())
3824
{
3825
if (callsite->_callNode->getInlinedSiteIndex() < 0) //Ensures setWarmCallGraphTooBig is only called for methods with inline index -1 (indexes >=0 can happen when inliner is called after inlining has already occured
3826
comp()->getCurrentMethod()->setWarmCallGraphTooBig (callsite->_callNode->getByteCodeInfo().getByteCodeIndex(), comp());
3827
else
3828
heuristicTrace(tracer(),"Not calling setWarmCallGraphTooBig on callNode %p because it is not from method being compiled bc index %d inlinedsiteindex %d",callsite->_callNode,callsite->_callNode->getByteCodeInfo().getByteCodeIndex(),callsite->_callNode->getInlinedSiteIndex());
3829
if (comp()->trace(OMR::inlining))
3830
heuristicTrace(tracer(),"inliner: Marked call as warm callee too big: %d > %d: %s\n", size, ecs->getSizeThreshold(), tracer()->traceSignature(calltarget->_calleeSymbol));
3831
}
3832
//printf("inliner: Marked call as warm callee too big: %d > %d: %s\n", nonRecursiveSize, sizeThreshold, calleeSymbol->signature(trMemory()));
3833
}
3834
tracer()->insertCounter(Callee_Too_Many_Bytecodes,calltarget->_myCallSite->_callNodeTreeTop);
3835
TR::Options::INLINE_calleeToDeep ++;
3836
if (comp()->trace(OMR::inlining))
3837
traceMsg(comp(), "inliner: size exceeds call graph size threshold: %d > %d: %s\n", size, _maxRecursiveCallByteCodeSizeEstimate, tracer()->traceSignature(calltarget->_calleeSymbol));
3838
3839
callsite->removecalltarget(k,tracer(),Exceeds_Size_Threshold);
3840
k--;
3841
continue;
3842
}
3843
}
3844
3845
if (comp()->getOption(TR_DisableNewInliningInfrastructure))
3846
calltarget->_isPartialInliningCandidate = false;
3847
3848
int32_t weight = size;
3849
int32_t origWeight = weight;
3850
3851
heuristicTrace(tracer(),"Beginning of Weight Calculation. Setting weight to %d",weight);
3852
3853
3854
if (!comp()->getOption(TR_DisableInlinerFanIn))
3855
{
3856
j9inlinerPolicy->adjustFanInSizeInWeighCallSite (weight,
3857
size,
3858
calltarget->_calleeSymbol->getResolvedMethod(),
3859
callsite->_callerResolvedMethod,
3860
callsite->_callNode->getByteCodeIndex());
3861
}
3862
3863
if (callStack->_inALoop)
3864
{
3865
weight >>= 2; // divide by 4
3866
if(getPolicy()->aggressiveSmallAppOpts())
3867
weight >>=3;
3868
heuristicTrace(tracer(),"Setting weight to %d because call is in a loop.",weight);
3869
}
3870
else if (!callStack->_alwaysCalled)
3871
{
3872
if ( getPolicy()->aggressiveSmallAppOpts() && comp()->getMethodSymbol()->getRecognizedMethod() == TR::java_util_GregorianCalendar_computeFields)
3873
{
3874
TR::TreeTop *callNodeTreeTop = calltarget->_myCallSite->_callNodeTreeTop;
3875
int32_t adjustment = 5;
3876
if (callNodeTreeTop)
3877
{
3878
TR::Block * block = callNodeTreeTop->getEnclosingBlock();
3879
int32_t frequency = block->getFrequency();
3880
if(frequency > 1000)
3881
adjustment = 10;
3882
else
3883
adjustment = (frequency*10)/10000;
3884
}
3885
3886
adjustment = std::max(5, adjustment);
3887
if (adjustment == 0)
3888
adjustment = 5;
3889
weight = (weight * 10) / adjustment;
3890
}
3891
heuristicTrace(tracer(),"Setting weight to %d because call is not always called.",weight);
3892
}
3893
3894
int32_t weightBeforeLookingForBenefits = weight;
3895
3896
bool isLambdaFormGeneratedMethod = comp()->fej9()->isLambdaFormGeneratedMethod(calltarget->_calleeMethod);
3897
if ((calltarget->_calleeMethod->convertToMethod()->isArchetypeSpecimen() && calltarget->_calleeMethod->getMethodHandleLocation()) || isLambdaFormGeneratedMethod)
3898
{
3899
static char *methodHandleThunkWeightFactorStr = feGetEnv("TR_methodHandleThunkWeightFactor");
3900
static int32_t methodHandleThunkWeightFactor = methodHandleThunkWeightFactorStr? atoi(methodHandleThunkWeightFactorStr) : 10;
3901
// MethodHandle thunks benefit a great deal from inlining so let's encourage them.
3902
weight /= methodHandleThunkWeightFactor;
3903
heuristicTrace(tracer(),"Setting weight to %d because callee is MethodHandle thunk.",weight);
3904
}
3905
3906
3907
TR_LinkHead<TR_ParameterMapping> map;
3908
if(!((TR_J9InlinerPolicy *)getPolicy())->validateArguments(calltarget,map)) //passing map by reference
3909
{
3910
continue; // arguments are not valid for this calltarget
3911
}
3912
3913
weight = applyArgumentHeuristics(map,weight, calltarget);
3914
3915
if (calltarget->_calleeMethod->isDAAWrapperMethod())
3916
{
3917
weight = 1;
3918
heuristicTrace(tracer(),"Setting DAA wrapper methods weights to minimum(%d).", weight);
3919
}
3920
3921
#ifdef ENABLE_SPMD_SIMD
3922
if (/*calltarget->_calleeSymbol->getRecognizedMethod() == TR::com_ibm_simt_SPMDKernel_execute ||*/
3923
calltarget->_calleeSymbol->getRecognizedMethod() == TR::com_ibm_simt_SPMDKernel_kernel
3924
)
3925
{
3926
weight = 1;
3927
traceMsg(comp(), "Setting SIMD kernel methods weights to minimum(%d) node %p.", weight, callsite->_callNode);
3928
}
3929
#endif
3930
3931
if (weightBeforeLookingForBenefits != weight)
3932
wouldBenefitFromInlining = true;
3933
3934
if (possiblyVeryHotLargeCallee && !wouldBenefitFromInlining)
3935
{
3936
// Increase the possibility of callee reaching scorching with profiling done in it since
3937
// it is possible we collect more useful profile data if it is not inlined
3938
// (since the profiling budget for the caller might get exhausted on some
3939
// other long running loop earlier than the place where the callee is inlined)
3940
// Only do this if the current method is very hot (so callee will likely be very hot if it
3941
// is large) and there are no obvious benefits from inlining the large callee.
3942
// Important in _228_jack since we do not want to inline the Move method into getNextTokenFromStream
3943
//
3944
3945
if ( getPolicy()->aggressiveSmallAppOpts() && comp()->getMethodSymbol()->getRecognizedMethod() == TR::java_util_GregorianCalendar_computeFields)
3946
{
3947
3948
TR::TreeTop *callNodeTreeTop = calltarget->_myCallSite->_callNodeTreeTop;
3949
int32_t adjustment = 5;
3950
if (callNodeTreeTop)
3951
{
3952
TR::Block * block = callNodeTreeTop->getEnclosingBlock();
3953
int32_t frequency = block->getFrequency();
3954
if(frequency > 1000)
3955
adjustment = 10;
3956
else
3957
adjustment = (frequency*10)/10000;
3958
}
3959
3960
adjustment = std::max(5, adjustment);
3961
if (adjustment == 0)
3962
adjustment = 5;
3963
weight = (weight * 10) / adjustment;
3964
3965
}
3966
else
3967
weight <<= 1;
3968
heuristicTrace(tracer(),"Setting weight to %d because method is possibly a very hot and large callee", weight);
3969
}
3970
3971
if (ecs->isLeaf())
3972
{
3973
weight -= 4;
3974
heuristicTrace(tracer(),"Setting weight to %d because method is a leaf method", weight);
3975
}
3976
3977
static char *xyz = feGetEnv("TR_MaxWeightReduction");
3978
uint32_t maxWeightReduction = xyz ? atoi(xyz) : 8;
3979
if (weight < (origWeight/maxWeightReduction))
3980
{
3981
weight = origWeight/8;
3982
heuristicTrace(tracer(),"Setting weight to %d because weight is less than originalWeight/8", weight);
3983
}
3984
3985
3986
float callGraphAdjustedWeight = 0.0f;
3987
int32_t callGraphWeight = -1;
3988
TR_ValueProfileInfoManager * profileManager = TR_ValueProfileInfoManager::get(comp());
3989
bool callGraphEnabled = profileManager->isCallGraphProfilingEnabled(comp()) && !_EDODisableInlinedProfilingInfo;
3990
3991
if (callGraphEnabled)
3992
callGraphAdjustedWeight = profileManager->getAdjustedInliningWeight(calltarget->_myCallSite->_callNode, weight, comp());
3993
3994
//There's (almost) no way to get out of adding the call site to the list of call sites.
3995
//Exceptions: 1) you blow your budget
3996
// 2) reflection
3997
// We only WeighCallSite() for the original method. So we will only have a list of call sites of top level methods.
3998
3999
calltarget->_size = size;
4000
calltarget->_weight = (int32_t)(weight/calltarget->_frequencyAdjustment);
4001
calltarget->_callGraphAdjustedWeight = callGraphAdjustedWeight/calltarget->_frequencyAdjustment;
4002
4003
heuristicTrace(tracer(),"WeighCallSite: Adding call target %p node %p with calleeSymbol = %p to list of call sites with guard %p kind = %d"
4004
"type = %d weight = %d (adjusted by frequencyAdjustment of %d) callGraphAdjustedWeight = %d",
4005
calltarget,calltarget->_myCallSite->_callNode,calltarget->_calleeSymbol,calltarget->_guard,calltarget->_guard->_kind,
4006
calltarget->_guard->_type,calltarget->_weight,calltarget->_frequencyAdjustment, calltarget->_callGraphAdjustedWeight);
4007
4008
TR_CallTarget *calltargetiterator = _callTargets.getFirst(), * prevTarget = 0;
4009
bool dontinsert = false;
4010
if (dontAddCalls == true)
4011
{
4012
// printf("Would have inserted bogus call to list from walkcallsites\n");fflush(stdout);
4013
dontinsert=true;
4014
}
4015
for (; calltargetiterator; prevTarget = calltargetiterator, calltargetiterator=calltargetiterator->getNext())
4016
if (callGraphEnabled)
4017
{
4018
if (callGraphAdjustedWeight < calltargetiterator->_callGraphAdjustedWeight)
4019
{
4020
_callTargets.insertAfter(prevTarget, calltarget);
4021
dontinsert=true;
4022
break;
4023
}
4024
else if ((callGraphAdjustedWeight == calltargetiterator->_callGraphAdjustedWeight) &&
4025
(weight < calltargetiterator->_weight))
4026
{
4027
_callTargets.insertAfter(prevTarget, calltarget);
4028
//return;
4029
dontinsert=true;
4030
break;
4031
}
4032
}
4033
else if (weight < calltargetiterator->_weight)
4034
{
4035
_callTargets.insertAfter(prevTarget, calltarget);
4036
//return;
4037
dontinsert=true;
4038
break;
4039
}
4040
if(!dontinsert)
4041
_callTargets.insertAfter(prevTarget, calltarget);
4042
} //end for loop over call targets
4043
4044
4045
heuristicTrace(tracer(),"^^^ Done Weighing of all targets in CallSite %p callnode %p\n",callsite, callsite->_callNode);
4046
}
4047
4048
bool TR_MultipleCallTargetInliner::inlineSubCallGraph(TR_CallTarget* calltarget)
4049
{
4050
TR_J9InlinerPolicy *j9inlinerPolicy = (TR_J9InlinerPolicy *) getPolicy();
4051
/*
4052
* keep the target if it meets either of the following condition:
4053
* 1. It's a JSR292 related method. This condition allows inlining method handle thunk chain without inlining the leaf java method.
4054
* 2. It's force inline target
4055
* 3. It's a method deemed always worth inlining, e.g. an Unsafe method
4056
* which would otherwise generate a j2i transition.
4057
*/
4058
TR::Node *callNode = NULL; // no call node has been generated yet
4059
if (j9inlinerPolicy->isJSR292Method(calltarget->_calleeMethod)
4060
|| forceInline(calltarget)
4061
|| j9inlinerPolicy->alwaysWorthInlining(calltarget->_calleeMethod, callNode))
4062
{
4063
for (TR_CallSite* callsite = calltarget->_myCallees.getFirst(); callsite ; callsite = callsite->getNext())
4064
{
4065
for (int32_t i = 0 ; i < callsite->numTargets() ; i++)
4066
inlineSubCallGraph(callsite->getTarget(i));
4067
}
4068
return true;
4069
}
4070
4071
calltarget->_myCallSite->removecalltarget(calltarget, tracer(), Trimmed_List_of_Callees);
4072
return false;
4073
}
4074
4075
void TR_MultipleCallTargetInliner::processChoppedOffCallTargets(TR_CallTarget *lastTargetToInline, TR_CallTarget* firstChoppedOffcalltarget, int estimatedNumberOfNodes)
4076
{
4077
if (firstChoppedOffcalltarget)
4078
{
4079
TR_CallTarget * calltarget = firstChoppedOffcalltarget;
4080
for (; calltarget; calltarget = calltarget->getNext())
4081
{
4082
if (inlineSubCallGraph(calltarget))
4083
{
4084
generateNodeEstimate myEstimate;
4085
recursivelyWalkCallTargetAndPerformAction(calltarget, myEstimate);
4086
estimatedNumberOfNodes += myEstimate.getNodeEstimate();
4087
/*
4088
* ForceInline targets and JSR292 methods should always be inlined regarless of budget. However, with
4089
* inlining methodhandle chain in normal inlining, the number of nodes can be tremendous resulting in
4090
* compilations, especially those with higher opt level, eating up too much CPU time. The heuristic here
4091
* is added to prevent compilations consuming too much CPU.
4092
*/
4093
static bool dontAbortCompilationEvenWithLargeInliningNodesEstimation = feGetEnv("TR_DontAbortCompilationEvenWithLargeInliningNodesEstimation") ? true: false;
4094
if (!dontAbortCompilationEvenWithLargeInliningNodesEstimation && estimatedNumberOfNodes > 50000 && comp()->getMethodHotness() >= hot)
4095
comp()->failCompilation<TR::ExcessiveComplexity>("too many nodes if forced inlining targets are included");
4096
4097
if (lastTargetToInline)
4098
lastTargetToInline->setNext(calltarget);
4099
else
4100
_callTargets.setFirst(calltarget);
4101
lastTargetToInline = calltarget;
4102
}
4103
}
4104
}
4105
4106
if (lastTargetToInline)
4107
lastTargetToInline->setNext(NULL);
4108
else _callTargets.setFirst(NULL);
4109
}
4110
4111
//Note, this function is shared by all FE's. If you are changing the heuristic for your FE only, you need to push this method into the various FE's FEInliner.cpp file.
4112
int32_t TR_MultipleCallTargetInliner::scaleSizeBasedOnBlockFrequency(int32_t bytecodeSize, int32_t frequency, int32_t borderFrequency, TR_ResolvedMethod * calleeResolvedMethod, TR::Node *callNode, int32_t coldBorderFrequency)
4113
{
4114
int32_t maxFrequency = MAX_BLOCK_COUNT + MAX_COLD_BLOCK_COUNT;
4115
4116
bool largeCompiledCallee = !comp()->getOption(TR_InlineVeryLargeCompiledMethods) &&
4117
isLargeCompiledMethod(calleeResolvedMethod, bytecodeSize, frequency);
4118
if (largeCompiledCallee)
4119
{
4120
bytecodeSize = bytecodeSize * TR::Options::_inlinerVeryLargeCompiledMethodAdjustFactor;
4121
}
4122
else if (frequency > borderFrequency)
4123
{
4124
int32_t oldSize = 0;
4125
if (comp()->trace(OMR::inlining))
4126
oldSize = bytecodeSize;
4127
4128
float factor = (float)(maxFrequency-frequency)/(float)maxFrequency;
4129
factor = getScalingFactor(factor);
4130
4131
4132
bytecodeSize = (int32_t)((float)bytecodeSize * factor);
4133
if (bytecodeSize < 10) bytecodeSize = 10;
4134
4135
heuristicTrace(tracer(),"exceedsSizeThreshold (mct): Scaled down size for call from %d to %d", oldSize, bytecodeSize);
4136
}
4137
else if (frequency < coldBorderFrequency)
4138
{
4139
int32_t oldSize = 0;
4140
if (comp()->trace(OMR::inlining))
4141
oldSize = bytecodeSize;
4142
4143
//to avoid division by zero crash. Semantically freqs of 0 and 1 should be pretty close given maxFrequency of 10K
4144
int adjFrequency = frequency ? frequency : 1;
4145
4146
float factor = (float)adjFrequency / (float)maxFrequency;
4147
float weight = (float)bytecodeSize / (factor*factor);
4148
bytecodeSize = (weight > 0x7fffffff) ? 0x7fffffff : ((int32_t)weight);
4149
4150
heuristicTrace(tracer(),"exceedsSizeThreshold: Scaled up size for call from %d to %d", oldSize, bytecodeSize);
4151
}
4152
return bytecodeSize;
4153
}
4154
4155
4156
bool TR_MultipleCallTargetInliner::isLargeCompiledMethod(TR_ResolvedMethod *calleeResolvedMethod, int32_t bytecodeSize, int32_t callerBlockFrequency)
4157
{
4158
TR_OpaqueMethodBlock* methodCallee = calleeResolvedMethod->getPersistentIdentifier();
4159
if (!calleeResolvedMethod->isInterpreted())
4160
{
4161
TR_PersistentJittedBodyInfo * bodyInfo = ((TR_ResolvedJ9Method*) calleeResolvedMethod)->getExistingJittedBodyInfo();
4162
if ((comp()->getMethodHotness() <= warm))
4163
{
4164
if (bodyInfo &&
4165
(bodyInfo->getHotness() >= warm))
4166
{
4167
// hot and scorching bodies should never be inlined to warm or cooler bodies
4168
if (bodyInfo->getHotness() >= hot)
4169
{
4170
return true;
4171
}
4172
4173
// Allow inlining of big methods into high frequency blocks
4174
if (callerBlockFrequency > comp()->getOptions()->getLargeCompiledMethodExemptionFreqCutoff())
4175
return false;
4176
4177
int32_t veryLargeCompiledMethodThreshold = comp()->getOptions()->getInlinerVeryLargeCompiledMethodThreshold();
4178
int32_t veryLargeCompiledMethodFaninThreshold = comp()->getOptions()->getInlinerVeryLargeCompiledMethodFaninThreshold();
4179
// Subdue inliner in low frequency blocks
4180
if (callerBlockFrequency > 0)
4181
{
4182
if ((2 * callerBlockFrequency) < comp()->getOptions()->getLargeCompiledMethodExemptionFreqCutoff())
4183
{
4184
veryLargeCompiledMethodThreshold = 100;
4185
veryLargeCompiledMethodFaninThreshold = 0;
4186
}
4187
}
4188
4189
uint32_t numCallers = 0, totalWeight = 0;
4190
((TR_ResolvedJ9Method *) calleeResolvedMethod)->getFaninInfo(&numCallers, &totalWeight);
4191
if ((numCallers > veryLargeCompiledMethodFaninThreshold) &&
4192
(bytecodeSize > veryLargeCompiledMethodThreshold))
4193
{
4194
return true;
4195
}
4196
}
4197
}
4198
else if (comp()->getMethodHotness() < scorching)
4199
{
4200
// scorching compiled methods should not be inlined in compiles below scorching
4201
// unless we are preparing to upgrade the compile and need profiling info
4202
if (bodyInfo &&
4203
(bodyInfo->getHotness() >= scorching) &&
4204
!(comp()->isProfilingCompilation() && comp()->getMethodHotness() == veryHot))
4205
{
4206
return true;
4207
}
4208
}
4209
}
4210
return false;
4211
}
4212
4213
4214
bool
4215
TR_MultipleCallTargetInliner::exceedsSizeThreshold(TR_CallSite *callSite, int bytecodeSize, TR::Block *block, TR_ByteCodeInfo & bcInfo, int32_t numLocals, TR_ResolvedMethod * callerResolvedMethod, TR_ResolvedMethod * calleeResolvedMethod,TR::Node *callNode, bool allConsts)
4216
{
4217
if (alwaysWorthInlining(calleeResolvedMethod, callNode))
4218
return false;
4219
4220
TR_J9InlinerPolicy *j9InlinerPolicy = (TR_J9InlinerPolicy *)getPolicy();
4221
static char *polymorphicCalleeSizeThresholdStr = feGetEnv("TR_InlinerPolymorphicConservatismCalleeSize");
4222
int polymorphicCalleeSizeThreshold = polymorphicCalleeSizeThresholdStr ? atoi(polymorphicCalleeSizeThresholdStr) : 10;
4223
static char *polymorphicRootSizeThresholdStr = feGetEnv("TR_InlinerPolymorphicConservatismRootSize");
4224
int polymorphicRootSizeThreshold = polymorphicRootSizeThresholdStr ? atoi(polymorphicRootSizeThresholdStr) : 30;
4225
static char *trustedInterfacePattern = feGetEnv("TR_TrustedPolymorphicInterfaces");
4226
static TR::SimpleRegex *trustedInterfaceRegex = trustedInterfacePattern ? TR::SimpleRegex::create(trustedInterfacePattern) : NULL;
4227
// we need to be conservative about inlining potentially highly polymorphic interface calls for
4228
// functional frameworks like scala - we limit this to hot and above
4229
// if the callsite is highly polymorphic but the following conditions are meet, still inline the callee
4230
// 1. the compiling method is scorching
4231
// 2. the callee is scorching OR queued for veryhot/scorching compile
4232
int32_t outterMethodSize = getJ9InitialBytecodeSize(callSite->_callerResolvedMethod, 0, comp());
4233
if (comp()->getMethodHotness() > warm && callSite->isInterface()
4234
&& bytecodeSize > polymorphicCalleeSizeThreshold
4235
&& outterMethodSize > polymorphicRootSizeThreshold
4236
&& ((bytecodeSize * 100) / outterMethodSize) < 6
4237
&& (!callSite->_ecsPrexArgInfo || !callSite->_ecsPrexArgInfo->get(0) || !callSite->_ecsPrexArgInfo->get(0)->getClass())
4238
&& comp()->fej9()->maybeHighlyPolymorphic(comp(), callSite->_callerResolvedMethod, callSite->_cpIndex, callSite->_interfaceMethod, callSite->_receiverClass)
4239
&& (!trustedInterfaceRegex || !TR::SimpleRegex::match(trustedInterfaceRegex, callSite->_interfaceMethod->signature(trMemory()), false)))
4240
{
4241
TR_PersistentJittedBodyInfo *bodyInfo = NULL;
4242
if (!calleeResolvedMethod->isInterpretedForHeuristics() && !calleeResolvedMethod->isJITInternalNative())
4243
{
4244
bodyInfo = ((TR_ResolvedJ9Method*) calleeResolvedMethod)->getExistingJittedBodyInfo();
4245
}
4246
if (((!bodyInfo && !calleeResolvedMethod->isInterpretedForHeuristics() && !calleeResolvedMethod->isJITInternalNative()) //jitted method without bodyInfo must be scorching
4247
|| (bodyInfo && bodyInfo->getHotness() == scorching)
4248
|| comp()->fej9()->isQueuedForVeryHotOrScorching(calleeResolvedMethod, comp()))
4249
&& (comp()->getMethodHotness() == scorching))
4250
debugTrace(tracer(), "### inline this callee because the compiling method and callee are both scorching even though it's potentially highly polymorphic callsite initialCalleeSymbol = %s callerResolvedMethod = %s calleeResolvedMethod = %s\n",
4251
tracer()->traceSignature(callSite->_interfaceMethod), tracer()->traceSignature(callerResolvedMethod), tracer()->traceSignature(calleeResolvedMethod));
4252
else
4253
{
4254
debugTrace(tracer(), "### exceeding size threshold for potentially highly polymorphic callsite initialCalleeSymbol = %s callerResolvedMethod = %s calleeResolvedMethod = %s\n",
4255
tracer()->traceSignature(callSite->_interfaceMethod), tracer()->traceSignature(callerResolvedMethod), tracer()->traceSignature(calleeResolvedMethod));
4256
TR::DebugCounter::incStaticDebugCounter(comp(), TR::DebugCounter::debugCounterName(comp(), "inliner.highlyPolymorphicFail/(%s)/%s/(%s)/sizes=%d.%d", comp()->signature(), comp()->getHotnessName(comp()->getMethodHotness()), callSite->_interfaceMethod->signature(trMemory()), bytecodeSize, outterMethodSize));
4257
return true;
4258
}
4259
}
4260
4261
if (comp()->getMethodHotness() > warm && !comp()->isServerInlining())
4262
return TR_InlinerBase::exceedsSizeThreshold(callSite, bytecodeSize, block, bcInfo,numLocals,callerResolvedMethod,calleeResolvedMethod,callNode,allConsts);
4263
4264
heuristicTrace(tracer(),"### Checking multiple call target inliner sizeThreshold. bytecodeSize = %d, block = %p, numLocals = %p, callerResolvedMethod = %s,"
4265
" calleeResolvedMethod = %s opt server is %d methodHotness = %d warm = %d\n",
4266
bytecodeSize,block,numLocals,tracer()->traceSignature(callerResolvedMethod),tracer()->traceSignature(calleeResolvedMethod), comp()->isServerInlining(),comp()->getMethodHotness(), warm);
4267
4268
int32_t origbytecodeSize=bytecodeSize;
4269
bool isCold = false;
4270
int32_t frequency1 = 0, frequency2 = 0;
4271
4272
4273
// In the old days we used 2000,50 for optServer, 2500,1000 for noOptServer
4274
// Now we want to use 6000, 1500 for optServer
4275
4276
if (block)
4277
{
4278
int32_t borderFrequency;
4279
int32_t veryColdBorderFrequency;
4280
4281
getBorderFrequencies(borderFrequency, veryColdBorderFrequency, calleeResolvedMethod, callNode);
4282
4283
4284
// HACK: Get frequency from both sources, and use both. You're
4285
// only cold if you're cold according to both.
4286
4287
bool isLambdaFormGeneratedMethod = comp()->fej9()->isLambdaFormGeneratedMethod(callerResolvedMethod);
4288
// TODO: we should ignore frequency for thunk archetype, however, this require performance evaluation
4289
bool frequencyIsInaccurate = isLambdaFormGeneratedMethod;
4290
4291
frequency1 = comp()->convertNonDeterministicInput(comp()->fej9()->getIProfilerCallCount(bcInfo, comp()), MAX_BLOCK_COUNT + MAX_COLD_BLOCK_COUNT, randomGenerator(), 0);
4292
frequency2 = comp()->convertNonDeterministicInput(block->getFrequency(), MAX_BLOCK_COUNT + MAX_COLD_BLOCK_COUNT, randomGenerator(), 0);
4293
if (frequency1 > frequency2 && callerResolvedMethod->convertToMethod()->isArchetypeSpecimen())
4294
frequency2 = frequency1;
4295
4296
if ((frequency1 <= 0) && ((0 <= frequency2) && (frequency2 <= MAX_COLD_BLOCK_COUNT)) &&
4297
!alwaysWorthInlining(calleeResolvedMethod, callNode) &&
4298
!frequencyIsInaccurate)
4299
{
4300
isCold = true;
4301
}
4302
4303
debugTrace(tracer(), "exceedsSizeThreshold: Call with block_%d has frequency1 %d frequency2 %d ", block->getNumber(), frequency1, frequency2);
4304
4305
if (allowBiggerMethods() &&
4306
!comp()->getMethodSymbol()->doJSR292PerfTweaks() &&
4307
calleeResolvedMethod &&
4308
!frequencyIsInaccurate &&
4309
!j9InlinerPolicy->isInlineableJNI(calleeResolvedMethod, callNode))
4310
{
4311
bytecodeSize = scaleSizeBasedOnBlockFrequency(bytecodeSize,frequency2,borderFrequency, calleeResolvedMethod,callNode,veryColdBorderFrequency);
4312
}
4313
else if (getPolicy()->aggressiveSmallAppOpts())
4314
{
4315
traceMsg(comp(), "Reached new code 2\n");
4316
int32_t blockNestingDepth = 1;
4317
if (_isInLoop)
4318
{
4319
char *tmptmp =0;
4320
if (calleeResolvedMethod)
4321
tmptmp = TR::Compiler->cls.classSignature(comp(), calleeResolvedMethod->containingClass(), trMemory());
4322
bool doit = false;
4323
4324
if (j9InlinerPolicy->aggressivelyInlineInLoops())
4325
{
4326
doit = true;
4327
}
4328
4329
if (doit && calleeResolvedMethod && !strcmp(tmptmp,"Ljava/math/BigDecimal;"))
4330
{
4331
traceMsg(comp(), "Reached code for block nesting depth %d\n", blockNestingDepth);
4332
if ((_isInLoop || (blockNestingDepth > 1)) &&
4333
(bytecodeSize > 10))
4334
{
4335
if (comp()->trace(OMR::inlining))
4336
heuristicTrace(tracer(),"Exceeds Size Threshold: Scaled down size for call block %d from %d to %d\n", block->getNumber(), bytecodeSize, 10);
4337
bytecodeSize = 10;
4338
}
4339
}
4340
else
4341
heuristicTrace(tracer(),"Omitting Big Decimal method from size readjustment, calleeResolvedMethod = %p tmptmp = %s",calleeResolvedMethod, tmptmp);
4342
}
4343
}
4344
}
4345
4346
// reduce size if your args are consts
4347
if (callNode)
4348
{
4349
heuristicTrace(tracer(),"In ExceedsSizeThreshold. Reducing size from %d",bytecodeSize);
4350
4351
int32_t originalbcSize = bytecodeSize;
4352
uint32_t numArgs = callNode->getNumChildren();
4353
bool allconstsfromCallNode = true;
4354
4355
uint32_t i = callNode->getFirstArgumentIndex();
4356
4357
if( callNode->getOpCode().isCall() &&
4358
!callNode->getSymbolReference()->isUnresolved() &&
4359
callNode->getSymbolReference()->getSymbol()->getMethodSymbol() &&
4360
!callNode->getSymbolReference()->getSymbol()->castToMethodSymbol()->isHelper() &&
4361
!callNode->getSymbolReference()->getSymbol()->castToMethodSymbol()->isSystemLinkageDispatch() &&
4362
!callNode->getSymbolReference()->getSymbol()->castToMethodSymbol()->isStatic() )
4363
++i;
4364
4365
for (; i < numArgs; ++i)
4366
{
4367
4368
// printf("callNode = %p\n");fflush(stdout);
4369
// printf("callNode->getOpCode().isCall() = %p\n");fflush(stdout);
4370
// printf("callNode->getSymbolReference() = %p\n");fflush(stdout);
4371
// pr
4372
4373
TR::Node * arg = callNode->getChild(i);
4374
if (arg->getOpCode().isLoadConst())
4375
{
4376
heuristicTrace(tracer(),"Node %p is load const\n",arg);
4377
bytecodeSize = bytecodeSize - (bytecodeSize/10);
4378
}
4379
else if (arg->getOpCodeValue() == TR::aload && arg->getSymbolReference()->getSymbol()->isConstObjectRef())
4380
{
4381
heuristicTrace(tracer(),"Node %p is aload const\n",arg);
4382
bytecodeSize = bytecodeSize - (bytecodeSize/10);
4383
}
4384
else
4385
{
4386
heuristicTrace(tracer(),"Node %p is not const\n",arg);
4387
allconstsfromCallNode = false;
4388
}
4389
}
4390
4391
4392
if (!allconstsfromCallNode)
4393
bytecodeSize = originalbcSize;
4394
4395
else if (bytecodeSize < originalbcSize && originalbcSize > 100)
4396
{
4397
/*
4398
char tmpian[1024];
4399
comp()->fej9()->sampleSignature(calleeResolvedMethod->getPersistentIdentifier(), tmpian, 1024, trMemory());
4400
printf("R size of bc %d to %d meth %s comp %s\n",originalbcSize,bytecodeSize,tmpian,comp()->signature());
4401
fflush(stdout);
4402
*/
4403
}
4404
4405
heuristicTrace(tracer()," to %d because of const arguments", bytecodeSize);
4406
4407
/* if ( bytecodeSize != originalbcSize )
4408
{
4409
char tmpian[1024];
4410
comp()->fej9()->sampleSignature(calleeResolvedMethod->getPersistentIdentifier(), tmpian, 1024, trMemory());
4411
printf("R size of bc %d to %d meth %s comp %s\n",originalbcSize,bytecodeSize,tmpian,comp()->signature());
4412
fflush(stdout);
4413
}
4414
*/
4415
}
4416
else if (allConsts)
4417
{
4418
4419
int32_t originalbcSize=bytecodeSize;
4420
heuristicTrace(tracer(),"In ExceedsSizeThreshold. Reducing size from %d",bytecodeSize);
4421
4422
int32_t numArgs = calleeResolvedMethod->numberOfExplicitParameters();
4423
for (int32_t i=0 ; i < numArgs; ++i)
4424
{
4425
bytecodeSize = bytecodeSize - (bytecodeSize/10);
4426
}
4427
4428
heuristicTrace(tracer()," to %d because of const arguments",bytecodeSize);
4429
4430
/*
4431
if (bytecodeSize < originalbcSize && originalbcSize > 100)
4432
{
4433
char tmpian[1024];
4434
comp()->fe()->sampleSignature(calleeResolvedMethod->getPersistentIdentifier(), tmpian, 1024, trMemory());
4435
printf("R size of bc %d to %d meth %s comp %s\n",originalbcSize,bytecodeSize,tmpian,comp()->signature());
4436
fflush(stdout);
4437
}
4438
*/
4439
}
4440
4441
static const char *qq;
4442
static uint32_t min_size = ( qq = feGetEnv("TR_Min_FanIn_Size")) ? atoi(qq) : MIN_FAN_IN_SIZE;
4443
static const char *q;
4444
static uint32_t multiplier = ( q = feGetEnv("TR_SizeMultiplier")) ? atoi (q) : SIZE_MULTIPLIER;
4445
uint32_t calculatedSize = bytecodeSize; //(bytecodeSize - MIN_FAN_IN_SIZE);
4446
4447
if (!comp()->getOption(TR_DisableInlinerFanIn)) // TODO: make the default for everybody
4448
{
4449
// In JIT, having low caller information is equivalent to lack of information. We want to exclude only cases where we know we have alot of fan-in
4450
if (j9InlinerPolicy->adjustFanInSizeInExceedsSizeThreshold(bytecodeSize, calculatedSize, calleeResolvedMethod, callerResolvedMethod, bcInfo.getByteCodeIndex()))
4451
{
4452
return true;
4453
}
4454
}
4455
// else
4456
// {
4457
// calculatedSize=bytecodeSize;
4458
// }
4459
4460
4461
if (isCold && (bytecodeSize > _methodInColdBlockByteCodeSizeThreshold))
4462
{
4463
if (block)
4464
{
4465
TR::DebugCounter::prependDebugCounter(comp(), "inliner.callSites/failed/coldCallee/tooManyBytecodes", block->getFirstRealTreeTop());
4466
TR::DebugCounter::prependDebugCounter(comp(), "inliner.callSites/failed/coldCallee/tooManyBytecodes:#bytecodeSize", block->getFirstRealTreeTop(), bytecodeSize);
4467
TR::DebugCounter::prependDebugCounter(comp(), "inliner.callSites/failed/coldCallee/tooManyBytecodes:#excess", block->getFirstRealTreeTop(), bytecodeSize-_methodInColdBlockByteCodeSizeThreshold);
4468
}
4469
heuristicTrace(tracer(),"### Exceeds Size Threshold because call is cold and has a bytecodeSize %d > _methodInColdBlockByteCodeSizeThreshold %d", bytecodeSize,_methodInColdBlockByteCodeSizeThreshold);
4470
return true; // exceeds size threshold
4471
}
4472
4473
if(bytecodeSize > _methodInWarmBlockByteCodeSizeThreshold || calculatedSize > _methodInWarmBlockByteCodeSizeThreshold*multiplier)
4474
{
4475
if (block)
4476
{
4477
TR::DebugCounter::prependDebugCounter(comp(), "inliner.callSites/failed/warmCallee/tooManyBytecodes", block->getFirstRealTreeTop());
4478
TR::DebugCounter::prependDebugCounter(comp(), "inliner.callSites/failed/warmCallee/tooManyBytecodes:#bytecodeSize", block->getFirstRealTreeTop(), bytecodeSize);
4479
TR::DebugCounter::prependDebugCounter(comp(), "inliner.callSites/failed/warmCallee/tooManyBytecodes:#excess", block->getFirstRealTreeTop(), bytecodeSize - _methodInWarmBlockByteCodeSizeThreshold);
4480
}
4481
4482
if( bytecodeSize <= _methodInWarmBlockByteCodeSizeThreshold )
4483
{
4484
heuristicTrace(tracer(),"### Exceeds Size Threshold because calculatedSize %d > _methodInWarmBlockByteCodeSizeThreshold*multiplier %d (excessive Fan In)",calculatedSize,_methodInWarmBlockByteCodeSizeThreshold*multiplier);
4485
4486
static const char *r = feGetEnv("TR_PrintFanIn");
4487
if(r)
4488
{
4489
char calleeName2[1024];
4490
4491
4492
printf("Method %p %s excluded because it has fan in ratio of %f threshold = %d size = %d original = %d.\n",
4493
calleeResolvedMethod,
4494
comp()->fej9()->sampleSignature(calleeResolvedMethod->getPersistentIdentifier(), calleeName2, 1024, comp()->trMemory()),
4495
(float)calculatedSize/(float) bytecodeSize,_methodInWarmBlockByteCodeSizeThreshold*multiplier,calculatedSize,bytecodeSize);
4496
4497
}
4498
}
4499
else
4500
heuristicTrace(tracer(),"### Exceeds Size Threshold because bytecodeSize %d > _methodInWarmBlockByteCodeSizeThreshold %d",bytecodeSize,_methodInWarmBlockByteCodeSizeThreshold);
4501
4502
return true; // Exceeds size threshold
4503
}
4504
4505
if (isWarm(comp()))
4506
{
4507
if (OWNING_METHOD_MAY_NOT_BE_THE_CALLER && calleeResolvedMethod->owningMethodDoesntMatter())
4508
{
4509
// callerResolvedMethod may not correspond to the caller listed in bcInfo, so it's
4510
// not safe to call isWarmCallGraphTooBig.
4511
}
4512
else if (comp()->isServerInlining() &&
4513
!alwaysWorthInlining(calleeResolvedMethod, NULL) &&
4514
callerResolvedMethod->isWarmCallGraphTooBig(bcInfo.getByteCodeIndex(), comp()) &&
4515
!isHot(comp()))
4516
{
4517
heuristicTrace(tracer(),"### Avoiding estimation (even though size is reasonable) of call %s.(Exceeding Size Threshold)", tracer()->traceSignature(calleeResolvedMethod));
4518
return true;
4519
}
4520
}
4521
4522
heuristicTrace(tracer(),"### Did not exceed size threshold, bytecodeSize %d <= inlineThreshold %d",bytecodeSize, _methodInWarmBlockByteCodeSizeThreshold);
4523
4524
// Does not exceed size threshold
4525
return false;
4526
}
4527
4528
bool TR_J9InlinerPolicy::canInlineMethodWhileInstrumenting(TR_ResolvedMethod *method)
4529
{
4530
if ((TR::Compiler->vm.isSelectiveMethodEnterExitEnabled(comp()) && !comp()->fej9()->methodsCanBeInlinedEvenIfEventHooksEnabled(comp())) ||
4531
(comp()->fej9()->isAnyMethodTracingEnabled(method->getPersistentIdentifier()) &&
4532
!comp()->fej9()->traceableMethodsCanBeInlined()))
4533
return false;
4534
else return true;
4535
}
4536
4537
bool TR_J9InlinerPolicy::shouldRemoveDifferingTargets(TR::Node *callNode)
4538
{
4539
if (!OMR_InlinerPolicy::shouldRemoveDifferingTargets(callNode))
4540
return false;
4541
4542
TR::RecognizedMethod rm =
4543
callNode->getSymbol()->castToMethodSymbol()->getRecognizedMethod();
4544
4545
return rm != TR::java_lang_invoke_MethodHandle_invokeBasic;
4546
}
4547
4548
void
4549
TR_J9InlinerUtil::refineInlineGuard(TR::Node *callNode, TR::Block *&block1, TR::Block *&block2,
4550
bool &appendTestToBlock1, TR::ResolvedMethodSymbol * callerSymbol, TR::TreeTop *cursorTree,
4551
TR::TreeTop *&virtualGuard, TR::Block *block4)
4552
{
4553
TR::CFG * callerCFG = callerSymbol->getFlowGraph();
4554
TR_PrexArgInfo *argInfo = comp()->getCurrentInlinedCallArgInfo();
4555
if (argInfo)
4556
{
4557
if (comp()->usesPreexistence()) // Mark the preexistent arguments (maybe redundant if VP has already done that and provided the info in argInfo)
4558
{
4559
int32_t firstArgIndex = callNode->getFirstArgumentIndex();
4560
for (int32_t c = callNode->getNumChildren() -1; c >= firstArgIndex; c--)
4561
{
4562
TR::Node *argument = callNode->getChild(c);
4563
TR_PrexArgument *p = argInfo->get(c - firstArgIndex);
4564
if (p && p->usedProfiledInfo())
4565
{
4566
TR_OpaqueClassBlock *pc = p->getFixedProfiledClass();
4567
if (pc)
4568
{
4569
//printf("Creating new guards in method %s caller %s callee %s\n", comp()->signature(), callerSymbol->getResolvedMethod()->signature(trMemory()), calleeSymbol->getResolvedMethod()->signature(trMemory()));
4570
//fflush(stdout);
4571
4572
4573
TR::Block *origBlock1 = block1;
4574
TR::Block *newBlock = block1;
4575
TR::Block *newBlock2 = TR::Block::createEmptyBlock(callNode, comp(), block1->getFrequency());
4576
callerCFG->addNode(newBlock2);
4577
if (!appendTestToBlock1)
4578
{
4579
newBlock = TR::Block::createEmptyBlock(callNode, comp());
4580
callerCFG->addNode(newBlock);
4581
callerCFG->addEdge(block1, newBlock);
4582
callerCFG->addEdge(newBlock, block2);
4583
//callerCFG->addEdge(newBlock, newBlock2); //block4);
4584
callerCFG->copyExceptionSuccessors(block1, newBlock);
4585
callerCFG->removeEdge(block1, block2);
4586
}
4587
4588
TR::SymbolReferenceTable *symRefTab = comp()->getSymRefTab();
4589
4590
TR::Node * aconstNode = TR::Node::aconst(callNode, (uintptr_t)pc);
4591
aconstNode->setIsClassPointerConstant(true);
4592
4593
TR::Node *guard = NULL;
4594
TR::DataType dataType = argument->getDataType();
4595
TR::SymbolReference *newSymbolReference = comp()->getSymRefTab()->createTemporary(callerSymbol, dataType);
4596
4597
TR::Node *storeNode = TR::Node::createWithSymRef(comp()->il.opCodeForDirectStore(argument->getDataType()), 1, 1, argument, newSymbolReference);
4598
TR::TreeTop *storeTree = TR::TreeTop::create(comp(), storeNode);
4599
TR::TreeTop *nextTree = cursorTree->getNextTreeTop();
4600
4601
cursorTree->join(storeTree);
4602
storeTree->join(nextTree);
4603
cursorTree = storeTree;
4604
4605
TR::Node * aconstNullNode = TR::Node::create(callNode, TR::aconst, 0);
4606
TR::Node * nullcmp = TR::Node::createWithSymRef(argument, comp()->il.opCodeForDirectLoad(argument->getDataType()), 0, newSymbolReference);
4607
guard = TR::Node::createif(TR::ifacmpeq, nullcmp, aconstNullNode, block2->getEntry());
4608
4609
TR::TreeTop *newTreeTop = newBlock->append(TR::TreeTop::create(comp(), guard));
4610
if (!appendTestToBlock1)
4611
{
4612
newBlock->setDoNotProfile();
4613
block1->getExit()->join(newBlock->getEntry());
4614
newBlock->getExit()->join(block2->getEntry());
4615
}
4616
else
4617
virtualGuard = newTreeTop;
4618
4619
block1 = newBlock;
4620
block2 = block1->getNextBlock();
4621
//appendTestToBlock1 = false;
4622
4623
newBlock = newBlock2;
4624
callerCFG->addEdge(block1, newBlock);
4625
callerCFG->addEdge(newBlock, block2);
4626
callerCFG->addEdge(newBlock, block4);
4627
if (appendTestToBlock1)
4628
callerCFG->removeEdge(origBlock1, block4);
4629
callerCFG->copyExceptionSuccessors(block1, newBlock);
4630
// callerCFG->removeEdge(block1, block2);
4631
4632
TR::Node * vft = TR::Node::createWithSymRef(TR::aloadi, 1, 1, TR::Node::createWithSymRef(argument, comp()->il.opCodeForDirectLoad(argument->getDataType()), 0, newSymbolReference), symRefTab->findOrCreateVftSymbolRef());
4633
guard = TR::Node::createif(TR::ifacmpne, vft, aconstNode, block4->getEntry());
4634
4635
//argument->recursivelyDecReferenceCount();
4636
//callNode->setAndIncChild(c, TR::Node::create(comp()->il.opCodeForDirectLoad(argument->getDataType()), 0, newSymbolReference));
4637
4638
newTreeTop = newBlock->append(TR::TreeTop::create(comp(), guard));
4639
//if (!appendTestToBlock1)
4640
{
4641
newBlock->setDoNotProfile();
4642
block1->getExit()->join(newBlock->getEntry());
4643
newBlock->getExit()->join(block2->getEntry());
4644
}
4645
//else
4646
// virtualGuard = newTreeTop;
4647
4648
block1 = newBlock;
4649
block2 = block1->getNextBlock();
4650
appendTestToBlock1 = false;
4651
}
4652
}
4653
}
4654
}
4655
}
4656
}
4657
4658
void
4659
TR_J9InlinerUtil::refineInliningThresholds(TR::Compilation *comp, int32_t &callerWeightLimit, int32_t &maxRecursiveCallByteCodeSizeEstimate, int32_t &methodByteCodeSizeThreshold, int32_t &methodInWarmBlockByteCodeSizeThreshold, int32_t &methodInColdBlockByteCodeSizeThreshold, int32_t &nodeCountThreshold, int32_t size)
4660
{
4661
comp->fej9()->setInlineThresholds(comp, callerWeightLimit, maxRecursiveCallByteCodeSizeEstimate, methodByteCodeSizeThreshold,
4662
methodInWarmBlockByteCodeSizeThreshold, methodInColdBlockByteCodeSizeThreshold, nodeCountThreshold, size);
4663
}
4664
4665
bool
4666
TR_J9InlinerPolicy::doCorrectnessAndSizeChecksForInlineCallTarget(TR_CallStack *callStack, TR_CallTarget *calltarget, bool inlinefromgraph, TR_PrexArgInfo *argInfo)
4667
{
4668
// I think it would be a good idea to dump the list of Call Targets before doing the first inlineCallTarget.
4669
// You could get creative in your dumps: calltargets in order, calltargets by callsite, etc.
4670
4671
TR_LinkHead<TR_ParameterMapping> map;
4672
if (!validateArguments(calltarget, map))
4673
{
4674
TR_ASSERT(comp()->fej9()->canAllowDifferingNumberOrTypesOfArgsAndParmsInInliner(), "Error, call target has a parameter mapping issue.");
4675
4676
return false;
4677
}
4678
4679
4680
debugTrace(tracer(),"bool inlinecallTarget: calltarget %p calltarget->mycallsite %p calltarget->alreadyInlined = %d inlinefromgraph = %d currentNumberOfNodes = %d",calltarget,calltarget->_myCallSite,calltarget->_alreadyInlined, inlinefromgraph, _inliner->getCurrentNumberOfNodes());
4681
4682
TR_ASSERT(!(calltarget->_alreadyInlined && calltarget->_myCallSite->_callNode->isTheVirtualCallNodeForAGuardedInlinedCall()), "inlineCallTarget: trying to inline the virtual call node for a guarded inline call that's already been inlined!");
4683
int32_t nodeCount = _inliner->getCurrentNumberOfNodes();
4684
4685
int32_t sitesSize = (int32_t)(comp()->getNumInlinedCallSites());
4686
4687
if (sitesSize >= inliner()->getMaxInliningCallSites() && !inliner()->forceInline(calltarget))
4688
{
4689
tracer()->insertCounter(Exceeded_Caller_SiteSize,calltarget->_myCallSite->_callNodeTreeTop);
4690
if (comp()->trace(OMR::inlining))
4691
traceMsg(comp(), "inliner: failed: Caller has too many call sites %s\n", tracer()->traceSignature(calltarget->_calleeSymbol));
4692
return false;
4693
}
4694
4695
bool ignoreThisSmallMethod = getInitialBytecodeSize(calltarget->_calleeMethod, calltarget->_calleeSymbol, comp()) <= 20;
4696
//static int si, sj; ++sj;
4697
if (((_inliner->getNumAsyncChecks() > HIGH_LOOP_COUNT-5) || ((uint32_t)nodeCount > _inliner->getNodeCountThreshold())) && !_inliner->forceInline(calltarget) && !ignoreThisSmallMethod)
4698
{
4699
// getCurrentNumberOfNodes may be unreliable so we must recompute
4700
if (((uint32_t)(nodeCount = comp()->generateAccurateNodeCount()) > _inliner->getNodeCountThreshold()) || (_inliner->getNumAsyncChecks() > HIGH_LOOP_COUNT-5))
4701
{
4702
tracer()->insertCounter(Exceeded_Caller_Node_Budget,calltarget->_myCallSite->_callNodeTreeTop);
4703
4704
TR::Options::INLINE_calleeHasTooManyNodes++;
4705
TR::Options::INLINE_calleeHasTooManyNodesSum += nodeCount;
4706
if (comp()->trace(OMR::inlining))
4707
traceMsg(comp(), "inliner: failed: Caller has too many nodes %s while considering callee %s nodeCount = %d nodeCountThreshold = %d\n",comp()->signature(), tracer()->traceSignature(calltarget->_calleeSymbol),nodeCount,_inliner->getNodeCountThreshold());
4708
return false;
4709
}
4710
}
4711
4712
return true;
4713
}
4714
4715
bool
4716
TR_J9InlinerPolicy::validateArguments(TR_CallTarget *calltarget, TR_LinkHead<TR_ParameterMapping> &map)
4717
{
4718
calltarget->_calleeSymbol->setParameterList();
4719
4720
ListIterator<TR::ParameterSymbol> parms(&(calltarget->_calleeSymbol->getParameterList()));
4721
4722
int32_t numParms = calltarget->_calleeSymbol->getParameterList().getSize();
4723
int32_t numArgs = (int32_t) (calltarget->_myCallSite->_callNode->getNumChildren());
4724
4725
numArgs = numArgs - calltarget->_myCallSite->_callNode->getFirstArgumentIndex();
4726
4727
if (calltarget->_calleeSymbol->getResolvedMethod()->isJNINative() && calltarget->_calleeSymbol->getResolvedMethod()->isStatic() && calltarget->_myCallSite->_callNode->isPreparedForDirectJNI())
4728
numArgs--;
4729
4730
if (numArgs != numParms)
4731
{
4732
heuristicTrace(tracer(), "Number of Parameters %d and Arguments %d Differ. Removing Call Target for Safety's sake.", numParms, numArgs);
4733
calltarget->_myCallSite->removecalltarget(calltarget, tracer(), Cant_Match_Parms_to_Args);
4734
4735
TR_ASSERT(comp()->fej9()->canAllowDifferingNumberOrTypesOfArgsAndParmsInInliner() , "Can't match args to parms, arg number mismatch");
4736
4737
return false;
4738
}
4739
4740
inliner()->createParmMap(calltarget->_calleeSymbol, map);
4741
4742
4743
TR_ParameterMapping * parm = map.getFirst();
4744
int32_t argNodeIndex = calltarget->_myCallSite->_callNode->getFirstArgumentIndex();
4745
if (argNodeIndex == 0 && calltarget->_calleeSymbol->getResolvedMethod()->isJNINative() && calltarget->_calleeSymbol->getResolvedMethod()->isStatic() && calltarget->_myCallSite->_callNode->isPreparedForDirectJNI())
4746
argNodeIndex++;
4747
4748
for ( ; parm ; parm = parm->getNext(), ++argNodeIndex )
4749
{
4750
TR::Node *arg = calltarget->_myCallSite->_callNode->getChild(argNodeIndex);
4751
4752
parm->_parameterNode = arg;
4753
4754
if (arg->getDataType() != parm->_parmSymbol->getDataType() &&
4755
(parm->_parmSymbol->getDataType() != TR::Aggregate))
4756
{
4757
heuristicTrace(tracer(), "For argNodeIndex %d, data type of node %p does not match data type of parameter. Removing Call Target for Safety's sake.", argNodeIndex, arg);
4758
calltarget->_myCallSite->removecalltarget(calltarget, tracer(), Cant_Match_Parms_to_Args);
4759
4760
if (!comp()->fej9()->canAllowDifferingNumberOrTypesOfArgsAndParmsInInliner())
4761
TR_ASSERT(0, "Can't match args to parms. Data type mismatch.");
4762
4763
return false;
4764
}
4765
}
4766
return true;
4767
}
4768
4769
bool
4770
TR_J9InlinerPolicy::supressInliningRecognizedInitialCallee(TR_CallSite* callsite, TR::Compilation* comp)
4771
{
4772
TR_ResolvedMethod * initialCalleeMethod = callsite->_initialCalleeMethod;
4773
4774
if (initialCalleeMethod)
4775
{
4776
switch (initialCalleeMethod->getRecognizedMethod())
4777
{
4778
/*
4779
* Inline this group of methods when the compiling method is shared method handle thunk.
4780
* Otherwise, they can be folded away by VP and should not be inlined here.
4781
*/
4782
case TR::java_lang_invoke_DirectHandle_nullCheckIfRequired:
4783
case TR::java_lang_invoke_PrimitiveHandle_initializeClassIfRequired:
4784
case TR::java_lang_invoke_MethodHandle_invokeExactTargetAddress:
4785
{
4786
TR::IlGeneratorMethodDetails & details = comp->ilGenRequest().details();
4787
if (details.isMethodHandleThunk())
4788
{
4789
J9::MethodHandleThunkDetails & thunkDetails = static_cast<J9::MethodHandleThunkDetails &>(details);
4790
return thunkDetails.isCustom();
4791
}
4792
return true;
4793
}
4794
4795
default:
4796
break;
4797
}
4798
}
4799
return (callsite->_callNode && comp->fej9()->supressInliningRecognizedInitialCallee(callsite, comp));
4800
}
4801
4802
static bool
4803
isDecimalFormatPattern(TR::Compilation *comp, TR_ResolvedMethod *method)
4804
{
4805
// look for the NumberFormat pattern
4806
//
4807
// [ 0], 0, JBaload0getfield
4808
// [ 1], 1, JBgetfield Ljava/text/NumberFormat;
4809
// [ 4], 4, JBaload1 Ljava/math/BigDecimal;
4810
// [ 5], 5, JBinvokevirtual BigDecimal.doubleValue()D or BigDecimal.floatValue()F
4811
// [ 8], 8, JBf2d if floatValue
4812
// [ 8], 8, JBinvokevirtual NumberFormat.format(D)Ljava/lang/String;
4813
// [ b], 11, JBreturn1
4814
//
4815
// don't inline such methods as we want to run stringpeepholes when compiled on their own
4816
// so that the decimalFormat optimization can be applied
4817
//
4818
4819
TR_J9ByteCodeIterator bci(0, static_cast<TR_ResolvedJ9Method *> (method), comp->fej9(), comp);
4820
4821
// maxbytecode could be 12 or 13 depending on whether
4822
// doubleValue or floatValue is called
4823
//
4824
if (bci.maxByteCodeIndex() > 13)
4825
return false;
4826
int32_t bcCount = 0;
4827
TR::DataType type = TR::NoType;
4828
uint32_t fieldOffset;
4829
bool isUnresolvedInCP;
4830
TR_J9ByteCode bc = bci.first();
4831
if (bc == J9BCunknown) return false;
4832
if (bc != J9BCaload0) return false;
4833
bc = bci.next(); // matched 1st bc
4834
4835
if (bc == J9BCgetfield)
4836
{
4837
bool isVolatile, isPrivate, resolved;
4838
resolved = method->fieldAttributes(comp, bci.next2Bytes(), &fieldOffset, &type, &isVolatile, 0, &isPrivate, false, &isUnresolvedInCP);
4839
if (!resolved || isUnresolvedInCP)
4840
return false;
4841
else if (type != TR::Address)
4842
return false;
4843
4844
// TODO: make sure the field is recognized as a NumberFormat
4845
//
4846
bc = bci.next(); //matched 2nd bc
4847
}
4848
else
4849
return false;
4850
4851
if (bc != J9BCaload1)
4852
return false;
4853
bc = bci.next(); // matched 3rd bc
4854
4855
bool isFloat = false;
4856
if (bc == J9BCinvokevirtual)
4857
{
4858
int32_t cpIndex = bci.next2Bytes();
4859
TR_ResolvedMethod *resolvedMethod = method->getResolvedVirtualMethod(comp, cpIndex, true, &isUnresolvedInCP);
4860
if (resolvedMethod &&
4861
(resolvedMethod->getRecognizedMethod() == TR::java_math_BigDecimal_doubleValue ||
4862
resolvedMethod->getRecognizedMethod() == TR::java_math_BigDecimal_floatValue))
4863
{
4864
if (resolvedMethod->getRecognizedMethod() == TR::java_math_BigDecimal_floatValue)
4865
isFloat = true;
4866
bc = bci.next(); // matched 4th bc
4867
}
4868
else
4869
return false;
4870
}
4871
else
4872
return false;
4873
4874
if (isFloat)
4875
{
4876
if (bc != J9BCf2d)
4877
return false;
4878
bc = bci.next(); // matched 5th bc if floatValue
4879
}
4880
4881
if (bc == J9BCinvokevirtual)
4882
{
4883
int32_t cpIndex = bci.next2Bytes();
4884
TR_ResolvedMethod *resolvedMethod = method->getResolvedVirtualMethod(comp, cpIndex, true, &isUnresolvedInCP);
4885
if (resolvedMethod &&
4886
resolvedMethod->getRecognizedMethod() == TR::java_text_NumberFormat_format)
4887
{
4888
bc = bci.next(); // matched 5th (or 6th) bc
4889
}
4890
else
4891
return false;
4892
}
4893
else
4894
return false;
4895
4896
if (bc != J9BCgenericReturn)
4897
return false; // matched 6th (or 7th) bc
4898
4899
///traceMsg(comp, "pattern matched successfully\n");
4900
return true;
4901
}
4902
4903
TR_InlinerFailureReason
4904
TR_J9JSR292InlinerPolicy::checkIfTargetInlineable(TR_CallTarget* target, TR_CallSite* callsite, TR::Compilation* comp)
4905
{
4906
// for GPU, skip all the heuristic for JSR292 related methods
4907
if (comp->hasIntStreamForEach())
4908
return DontInline_Callee;
4909
4910
TR_ResolvedMethod * resolvedMethod = target->_calleeSymbol ? target->_calleeSymbol->getResolvedMethod():target->_calleeMethod;
4911
if (!isJSR292Method(resolvedMethod))
4912
return DontInline_Callee;
4913
4914
if (isJSR292AlwaysWorthInlining(resolvedMethod))
4915
return InlineableTarget;
4916
else if (comp->getCurrentMethod()->convertToMethod()->isArchetypeSpecimen() ||
4917
comp->getCurrentMethod()->getRecognizedMethod() == TR::java_lang_invoke_MethodHandle_invokeExact)
4918
{
4919
//we are ourselves an archetype specimen, so we can inline other archetype speciman
4920
return InlineableTarget;
4921
}
4922
else if (comp->getMethodHotness() >= warm)
4923
{
4924
// We are not an archetype specimen
4925
// but because we're warm (or greater) we are allowed to inline JSR292 methods
4926
return InlineableTarget;
4927
}
4928
4929
//we are not an archetype specimen ourselves and we are cold or below, No inlining of JSR292 methods.
4930
return DontInline_Callee;
4931
}
4932
4933
TR_InlinerFailureReason
4934
TR_J9InlinerPolicy::checkIfTargetInlineable(TR_CallTarget* target, TR_CallSite* callsite, TR::Compilation* comp)
4935
{
4936
if (comp->compileRelocatableCode() && comp->getMethodHotness() <= cold)
4937
{
4938
// If we are an AOT cold compile, don't inline
4939
return DontInline_Callee;
4940
}
4941
4942
TR_ResolvedMethod * resolvedMethod = target->_calleeSymbol ? target->_calleeSymbol->getResolvedMethod():target->_calleeMethod;
4943
4944
if (!isInlineableJNI(resolvedMethod,callsite->_callNode) || callsite->isIndirectCall())
4945
{
4946
if (!target->_calleeMethod->isCompilable(comp->trMemory()) || !target->_calleeMethod->isInlineable(comp))
4947
{
4948
return Not_Compilable_Callee;
4949
}
4950
4951
if (target->_calleeMethod->isJNINative())
4952
{
4953
return JNI_Callee;
4954
}
4955
}
4956
4957
TR::RecognizedMethod rm = resolvedMethod->getRecognizedMethod();
4958
4959
// Don't inline methods that are going to be reduced in ilgen or UnsafeFastPath
4960
switch (rm)
4961
{
4962
case TR::com_ibm_jit_JITHelpers_getByteFromArray:
4963
case TR::com_ibm_jit_JITHelpers_getByteFromArrayByIndex:
4964
case TR::com_ibm_jit_JITHelpers_getByteFromArrayVolatile:
4965
case TR::com_ibm_jit_JITHelpers_getCharFromArray:
4966
case TR::com_ibm_jit_JITHelpers_getCharFromArrayByIndex:
4967
case TR::com_ibm_jit_JITHelpers_getCharFromArrayVolatile:
4968
case TR::com_ibm_jit_JITHelpers_getIntFromArray:
4969
case TR::com_ibm_jit_JITHelpers_getIntFromArrayVolatile:
4970
case TR::com_ibm_jit_JITHelpers_getIntFromObject:
4971
case TR::com_ibm_jit_JITHelpers_getIntFromObjectVolatile:
4972
case TR::com_ibm_jit_JITHelpers_getLongFromArray:
4973
case TR::com_ibm_jit_JITHelpers_getLongFromArrayVolatile:
4974
case TR::com_ibm_jit_JITHelpers_getLongFromObject:
4975
case TR::com_ibm_jit_JITHelpers_getLongFromObjectVolatile:
4976
case TR::com_ibm_jit_JITHelpers_getObjectFromArray:
4977
case TR::com_ibm_jit_JITHelpers_getObjectFromArrayVolatile:
4978
case TR::com_ibm_jit_JITHelpers_getObjectFromObject:
4979
case TR::com_ibm_jit_JITHelpers_getObjectFromObjectVolatile:
4980
case TR::com_ibm_jit_JITHelpers_putByteInArray:
4981
case TR::com_ibm_jit_JITHelpers_putByteInArrayByIndex:
4982
case TR::com_ibm_jit_JITHelpers_putByteInArrayVolatile:
4983
case TR::com_ibm_jit_JITHelpers_putCharInArray:
4984
case TR::com_ibm_jit_JITHelpers_putCharInArrayByIndex:
4985
case TR::com_ibm_jit_JITHelpers_putCharInArrayVolatile:
4986
case TR::com_ibm_jit_JITHelpers_putIntInArray:
4987
case TR::com_ibm_jit_JITHelpers_putIntInArrayVolatile:
4988
case TR::com_ibm_jit_JITHelpers_putIntInObject:
4989
case TR::com_ibm_jit_JITHelpers_putIntInObjectVolatile:
4990
case TR::com_ibm_jit_JITHelpers_putLongInArray:
4991
case TR::com_ibm_jit_JITHelpers_putLongInArrayVolatile:
4992
case TR::com_ibm_jit_JITHelpers_putLongInObject:
4993
case TR::com_ibm_jit_JITHelpers_putLongInObjectVolatile:
4994
case TR::com_ibm_jit_JITHelpers_putObjectInArray:
4995
case TR::com_ibm_jit_JITHelpers_putObjectInArrayVolatile:
4996
case TR::com_ibm_jit_JITHelpers_putObjectInObject:
4997
case TR::com_ibm_jit_JITHelpers_putObjectInObjectVolatile:
4998
case TR::com_ibm_jit_JITHelpers_byteToCharUnsigned:
4999
case TR::com_ibm_jit_JITHelpers_acmplt:
5000
case TR::com_ibm_jit_JITHelpers_isArray:
5001
case TR::com_ibm_jit_JITHelpers_getJ9ClassFromObject32:
5002
case TR::com_ibm_jit_JITHelpers_getJ9ClassFromObject64:
5003
case TR::com_ibm_jit_JITHelpers_getClassInitializeStatus:
5004
case TR::java_lang_StringUTF16_getChar:
5005
case TR::java_lang_StringUTF16_putChar:
5006
case TR::java_lang_StringUTF16_toBytes:
5007
case TR::java_lang_invoke_MethodHandle_asType:
5008
return DontInline_Callee;
5009
default:
5010
break;
5011
}
5012
5013
/**
5014
* Do not inline LambdaForm generated reinvoke() methods as they are on the
5015
* slow path and may consume inlining budget.
5016
*/
5017
if (comp->fej9()->isLambdaFormGeneratedMethod(resolvedMethod))
5018
{
5019
if (resolvedMethod->nameLength() == strlen("reinvoke") &&
5020
!strncmp(resolvedMethod->nameChars(), "reinvoke", strlen("reinvoke")))
5021
{
5022
traceMsg(comp, "Intentionally avoided inlining generated %.*s.%.*s%.*s\n",
5023
resolvedMethod->classNameLength(), resolvedMethod->classNameChars(),
5024
resolvedMethod->nameLength(), resolvedMethod->nameChars(),
5025
resolvedMethod->signatureLength(), resolvedMethod->signatureChars());
5026
return DontInline_Callee;
5027
}
5028
}
5029
5030
if (comp->getOptions()->getEnableGPU(TR_EnableGPU))
5031
{
5032
switch (rm)
5033
{
5034
case TR::java_util_stream_AbstractPipeline_evaluate:
5035
traceMsg(comp, "Intentionally avoided inlining evaluate\n");
5036
return Recognized_Callee;
5037
break;
5038
default:
5039
break;
5040
}
5041
}
5042
5043
if (comp->getOptions()->getEnableGPU(TR_EnableGPUEnableMath))
5044
{
5045
switch (rm)
5046
{
5047
case TR::java_lang_Math_abs_F:
5048
case TR::java_lang_Math_abs_D:
5049
case TR::java_lang_Math_exp:
5050
case TR::java_lang_Math_log:
5051
case TR::java_lang_Math_sqrt:
5052
case TR::java_lang_Math_sin:
5053
case TR::java_lang_Math_cos:
5054
traceMsg(comp, "Intentionally avoided inlining MathMethod\n");
5055
return Recognized_Callee;
5056
default:
5057
break;
5058
}
5059
}
5060
5061
#ifdef J9VM_OPT_JAVA_CRYPTO_ACCELERATION
5062
if (comp->fej9()->inlineRecognizedCryptoMethod(target, comp))
5063
{
5064
return Recognized_Callee;
5065
}
5066
#endif
5067
5068
if (
5069
rm == TR::com_ibm_jit_JITHelpers_toUpperIntrinsicLatin1 ||
5070
rm == TR::com_ibm_jit_JITHelpers_toLowerIntrinsicUTF16 ||
5071
rm == TR::com_ibm_jit_JITHelpers_toUpperIntrinsicLatin1 ||
5072
rm == TR::com_ibm_jit_JITHelpers_toLowerIntrinsicUTF16 ||
5073
5074
rm == TR::java_lang_String_compressedArrayCopy_BIBII ||
5075
rm == TR::java_lang_String_compressedArrayCopy_BICII ||
5076
rm == TR::java_lang_String_compressedArrayCopy_CIBII ||
5077
rm == TR::java_lang_String_compressedArrayCopy_CICII ||
5078
rm == TR::java_lang_String_decompressedArrayCopy_BIBII ||
5079
rm == TR::java_lang_String_decompressedArrayCopy_BICII ||
5080
rm == TR::java_lang_String_decompressedArrayCopy_CIBII ||
5081
rm == TR::java_lang_String_decompressedArrayCopy_CICII ||
5082
rm == TR::java_lang_Math_max_D ||
5083
rm == TR::java_lang_Math_min_D ||
5084
//DAA Intrinsic methods will get reduced if intrinsics are on, so don't consider it as a target
5085
(resolvedMethod->isDAAMarshallingIntrinsicMethod() && !comp->getOption(TR_DisableMarshallingIntrinsics)) ||
5086
(resolvedMethod->isDAAPackedDecimalIntrinsicMethod() && !comp->getOption(TR_DisablePackedDecimalIntrinsics)) ||
5087
5088
// dont inline methods that contain the NumberFormat pattern
5089
// this is because we want to catch the opportunity with stringpeepholes
5090
// and stringpeepholes runs before inliner. so if the calleemethod contained
5091
// the pattern and it got inlined, we would never find the pattern
5092
isDecimalFormatPattern(comp, target->_calleeMethod))
5093
{
5094
return Recognized_Callee;
5095
}
5096
5097
return InlineableTarget;
5098
}
5099
5100
bool TR_J9InlinerPolicy::isJSR292Method(TR_ResolvedMethod *resolvedMethod)
5101
{
5102
if (isJSR292AlwaysWorthInlining(resolvedMethod))
5103
return true;
5104
5105
TR::RecognizedMethod method = resolvedMethod->getRecognizedMethod();
5106
if (method == TR::java_lang_invoke_MethodHandle_invokeExact)
5107
return true;
5108
return false;
5109
}
5110
5111
bool TR_J9InlinerPolicy::isJSR292AlwaysWorthInlining(TR_ResolvedMethod *resolvedMethod)
5112
{
5113
TR::RecognizedMethod method = resolvedMethod->getRecognizedMethod();
5114
if (method == TR::java_lang_invoke_MethodHandle_invokeExact)
5115
return true;
5116
5117
if (TR_J9MethodBase::isVarHandleOperationMethod(method))
5118
return true;
5119
5120
if (isJSR292SmallGetterMethod(resolvedMethod))
5121
return true;
5122
5123
if (isJSR292SmallHelperMethod(resolvedMethod))
5124
return true;
5125
5126
if (resolvedMethod->convertToMethod()->isArchetypeSpecimen())
5127
return true;
5128
5129
if (TR::comp()->fej9()->isLambdaFormGeneratedMethod(resolvedMethod))
5130
return true;
5131
5132
return false;
5133
}
5134
5135
bool TR_J9InlinerPolicy::isJSR292SmallHelperMethod(TR_ResolvedMethod *resolvedMethod)
5136
{
5137
TR::RecognizedMethod method = resolvedMethod->getRecognizedMethod();
5138
switch (method)
5139
{
5140
case TR::java_lang_invoke_ConvertHandleFilterHelpers_object2J:
5141
case TR::java_lang_invoke_ConvertHandleFilterHelpers_number2J:
5142
case TR::java_lang_invoke_MethodHandle_doCustomizationLogic:
5143
case TR::java_lang_invoke_MethodHandle_undoCustomizationLogic:
5144
return true;
5145
5146
default:
5147
break;
5148
}
5149
return false;
5150
}
5151
5152
bool TR_J9InlinerPolicy::isJSR292SmallGetterMethod(TR_ResolvedMethod *resolvedMethod)
5153
{
5154
TR::RecognizedMethod method = resolvedMethod->getRecognizedMethod();
5155
// small getters
5156
switch (method)
5157
{
5158
case TR::java_lang_invoke_MutableCallSite_getTarget:
5159
case TR::java_lang_invoke_MethodHandle_type:
5160
case TR::java_lang_invoke_DirectMethodHandle_internalMemberName:
5161
case TR::java_lang_invoke_MethodHandleImpl_CountingWrapper_getTarget:
5162
return true;
5163
5164
default:
5165
break;
5166
}
5167
return false;
5168
}
5169
5170
void
5171
TR_J9InlinerUtil::estimateAndRefineBytecodeSize(TR_CallSite* callsite, TR_CallTarget* calltarget, TR_CallStack *callStack, int32_t &bytecodeSize)
5172
{
5173
if (comp()->getOptLevel() >= warm && bytecodeSize > 100)
5174
{
5175
//We call to calculateCodeSize to simply get an estimate.
5176
//We don't want the original calltarget to be modified and become inconsistent in any way
5177
//Please see 196749 for more details
5178
5179
calltarget->_originatingBlock = (callsite->_callerBlock != NULL) ? callsite->_callerBlock : (callsite->_callNodeTreeTop ? callsite->_callNodeTreeTop->getEnclosingBlock() : 0);
5180
bool estimateIsFine = false;
5181
if (calltarget->_originatingBlock && calltarget->_calleeSymbol)
5182
{
5183
TR_CallTarget callTargetClone (*calltarget);
5184
TR_EstimateCodeSize::raiiWrapper ecsWrapper(inliner(), tracer(), inliner()->getMaxRecursiveCallByteCodeSizeEstimate());
5185
TR_EstimateCodeSize *ecs = ecsWrapper.getCodeEstimator();
5186
vcount_t origVisitCount = comp()->getVisitCount();
5187
estimateIsFine = ecs->calculateCodeSize(&callTargetClone, callStack, false);
5188
comp()->setVisitCount(origVisitCount);
5189
5190
if (estimateIsFine)
5191
{
5192
if (comp()->trace(OMR::inlining))
5193
traceMsg( comp(), "Partial estimate for this target %d, full size %d, real bytecode size %d\n", callTargetClone._partialSize, callTargetClone._fullSize, bytecodeSize);
5194
5195
bytecodeSize = callTargetClone._fullSize;
5196
if (comp()->trace(OMR::inlining))
5197
traceMsg( comp(), "Reducing bytecode size to %d\n", bytecodeSize);
5198
}
5199
}
5200
}
5201
}
5202
5203
TR_PrexArgInfo* TR_PrexArgInfo::argInfoFromCaller(TR::Node* callNode, TR_PrexArgInfo* callerArgInfo)
5204
{
5205
TR::Compilation* compilation = TR::comp();
5206
bool tracePrex = compilation->trace(OMR::inlining) || compilation->trace(OMR::invariantArgumentPreexistence);
5207
5208
int32_t firstArgIndex = callNode->getFirstArgumentIndex();
5209
int32_t numArgs = callNode->getNumArguments();
5210
int32_t numChildren = callNode->getNumChildren();
5211
5212
TR_PrexArgInfo* argInfo = new (compilation->trHeapMemory()) TR_PrexArgInfo(numArgs, compilation->trMemory());
5213
5214
for (int32_t i = firstArgIndex; i < numChildren; i++)
5215
{
5216
TR::Node* child = callNode->getChild(i);
5217
if (TR_PrexArgInfo::hasArgInfoForChild(child, callerArgInfo))
5218
{
5219
argInfo->set(i - firstArgIndex, TR_PrexArgInfo::getArgForChild(child, callerArgInfo));
5220
if (tracePrex)
5221
traceMsg(compilation, "Arg %d is from caller\n", i - firstArgIndex);
5222
}
5223
}
5224
return argInfo;
5225
}
5226
5227
TR_PrexArgInfo *
5228
TR_J9InlinerUtil::computePrexInfo(TR_CallTarget *target, TR_PrexArgInfo *callerArgInfo)
5229
{
5230
if (comp()->getOption(TR_DisableInlinerArgsPropagation))
5231
return NULL;
5232
5233
TR_CallSite *site = target->_myCallSite;
5234
if (!site || !site->_callNode)
5235
return NULL;
5236
5237
bool tracePrex = comp()->trace(OMR::inlining) || comp()->trace(OMR::invariantArgumentPreexistence);
5238
5239
auto prexArgInfoFromTarget = createPrexArgInfoForCallTarget(target->_guard, target->_calleeMethod);
5240
auto prexArgInfoFromCallSite = TR_J9InlinerUtil::computePrexInfo(inliner(), site, callerArgInfo);
5241
auto prexArgInfo = TR_PrexArgInfo::enhance(prexArgInfoFromTarget, prexArgInfoFromCallSite, comp());
5242
5243
if (tracePrex && prexArgInfo)
5244
{
5245
traceMsg(comp(), "PREX.inl: argInfo for target %p\n", target);
5246
prexArgInfo->dumpTrace();
5247
}
5248
5249
// At this stage, we can improve the type of the virtual guard we are going to use
5250
// For a non-overridden guard or for an interface guard, if it makes sense, try to use a vft-test
5251
// based virtual guard
5252
//
5253
bool disablePreexForChangedGuard = false;
5254
TR_PersistentCHTable * chTable = comp()->getPersistentInfo()->getPersistentCHTable();
5255
TR_PersistentClassInfo *thisClassInfo = chTable->findClassInfoAfterLocking(target->_receiverClass, comp());
5256
if (target->_calleeSymbol->hasThisCalls() &&
5257
target->_receiverClass &&
5258
!TR::Compiler->cls.isAbstractClass(comp(), target->_receiverClass) &&
5259
!fe()->classHasBeenExtended(target->_receiverClass) &&
5260
thisClassInfo &&
5261
thisClassInfo->isInitialized() &&
5262
((target->_guard->_kind == TR_NonoverriddenGuard && target->_guard->_type == TR_NonoverriddenTest) ||
5263
(target->_guard->_kind == TR_InterfaceGuard)) &&
5264
performTransformation(comp(), "O^O VIRTUAL GUARD IMPROVE: Changed guard kind %s type %s to use VFT test\n", tracer()->getGuardKindString(target->_guard), tracer()->getGuardTypeString(target->_guard)))
5265
{
5266
target->_guard->_type = TR_VftTest;
5267
target->_guard->_thisClass = target->_receiverClass;
5268
}
5269
5270
return prexArgInfo;
5271
}
5272
5273
TR_PrexArgInfo *
5274
TR_J9InlinerUtil::computePrexInfo(TR_CallTarget *target)
5275
{
5276
return computePrexInfo(target, NULL);
5277
}
5278
5279
/** \brief
5280
* Find the def to an auto or parm before treetop in a extended basic block
5281
*
5282
* \return
5283
* The treetop containing the def (the store)
5284
*/
5285
static TR::TreeTop*
5286
defToAutoOrParmInEBB(TR::Compilation* comp, TR::TreeTop* treetop, TR::SymbolReference* symRef, TR::Node** valueNode)
5287
{
5288
for (;treetop != NULL; treetop= treetop->getPrevTreeTop())
5289
{
5290
auto ttNode = treetop->getNode();
5291
if (ttNode->getOpCodeValue() == TR::BBStart)
5292
{
5293
auto block = ttNode->getBlock();
5294
if (!block->isExtensionOfPreviousBlock())
5295
return NULL;
5296
else
5297
continue;
5298
}
5299
5300
auto storeNode = ttNode->getStoreNode();
5301
if (storeNode &&
5302
storeNode->getOpCode().isStoreDirect() &&
5303
storeNode->getSymbolReference() == symRef)
5304
{
5305
auto child = storeNode->getFirstChild();
5306
// If the child is also an auto, keep walking the trees to find the child's def
5307
if (child->getOpCode().hasSymbolReference() &&
5308
child->getSymbolReference()->getSymbol()->isAuto() &&
5309
!child->getSymbolReference()->hasKnownObjectIndex())
5310
{
5311
symRef = child->getSymbolReference();
5312
continue;
5313
}
5314
5315
if (valueNode)
5316
*valueNode = child;
5317
5318
return treetop;
5319
}
5320
}
5321
5322
return NULL;
5323
}
5324
5325
/** \brief
5326
* Find the first occurrence of the load in a extended basic block
5327
*
5328
* \return
5329
* The treetop containing the first occurrence of the load
5330
*/
5331
static TR::TreeTop*
5332
getFirstOccurrenceOfLoad(TR::Compilation* comp, TR::TreeTop* treetop, TR::Node* loadNode)
5333
{
5334
// Get the first treetop of this EBB.
5335
auto treetopEntry = treetop->getEnclosingBlock()->startOfExtendedBlock()->getEntry();
5336
auto visitCount = comp->incOrResetVisitCount();
5337
5338
for (treetop = treetopEntry; treetop != NULL; treetop = treetop->getNextTreeTop())
5339
{
5340
auto ttNode = treetop->getNode();
5341
if (ttNode->containsNode(loadNode, visitCount))
5342
{
5343
return treetop;
5344
}
5345
}
5346
return NULL;
5347
}
5348
5349
TR_PrexArgInfo *
5350
TR_J9InlinerUtil::computePrexInfo(TR_InlinerBase *inliner, TR_CallSite* site, TR_PrexArgInfo *callerArgInfo)
5351
{
5352
TR::Compilation* comp = inliner->comp();
5353
5354
if (comp->getOption(TR_DisableInlinerArgsPropagation))
5355
return NULL;
5356
5357
if (!site->_callNode)
5358
return NULL;
5359
5360
auto callNode = site->_callNode;
5361
5362
// We want to avoid degrading info we already have from another source like VP.
5363
// Unfortunately that desire mucks up this function with a lot of logic that
5364
// looks like constraint merging, and is redundant with what VP already does.
5365
//
5366
TR_PrexArgInfo *prexArgInfo = NULL;
5367
// Interface call doesn't have a resolved _intialCalleeMethod, so callee can be NULL
5368
auto callee = site->_initialCalleeMethod;
5369
5370
bool tracePrex = comp->trace(OMR::inlining) || comp->trace(OMR::invariantArgumentPreexistence);
5371
if (tracePrex)
5372
traceMsg(comp, "PREX.inl: Populating prex argInfo for [%p] %s %s\n", callNode, callNode->getOpCode().getName(), callNode->getSymbol()->castToMethodSymbol()->getMethod()->signature(inliner->trMemory(), stackAlloc));
5373
5374
int32_t firstArgIndex = callNode->getFirstArgumentIndex();
5375
for (int32_t c = callNode->getNumChildren() -1; c >= firstArgIndex; c--)
5376
{
5377
int32_t argOrdinal = c - firstArgIndex;
5378
5379
TR::Node *argument = callNode->getChild(c);
5380
if (tracePrex)
5381
{
5382
traceMsg(comp, "PREX.inl: Child %d [%p] n%dn %s %s\n",
5383
c, argument,
5384
argument->getGlobalIndex(),
5385
argument->getOpCode().getName(),
5386
argument->getOpCode().hasSymbolReference()? argument->getSymbolReference()->getName(comp->getDebug()) : "");
5387
}
5388
5389
if (!argument->getOpCode().hasSymbolReference() || argument->getDataType() != TR::Address)
5390
continue;
5391
5392
auto symRef = argument->getSymbolReference();
5393
auto symbol = symRef->getSymbol();
5394
5395
TR_PrexArgument* prexArg = NULL;
5396
5397
if (c == callNode->getFirstArgumentIndex() &&
5398
callee &&
5399
callee->convertToMethod()->isArchetypeSpecimen() &&
5400
callee->getMethodHandleLocation() &&
5401
comp->getOrCreateKnownObjectTable())
5402
{
5403
// Here's a situation where inliner is taking it upon itself to draw
5404
// conclusions about known objects. VP won't get a chance to figure this
5405
// out before we go ahead and do the inlining, so we'd better populate
5406
// the prex info now.
5407
//
5408
// (If VP did this stuff instead of inliner, it might work a bit more naturally.)
5409
//
5410
TR::KnownObjectTable::Index methodHandleIndex = comp->getKnownObjectTable()->getOrCreateIndexAt(callee->getMethodHandleLocation());
5411
prexArg = new (inliner->trHeapMemory()) TR_PrexArgument(methodHandleIndex, comp);
5412
if (tracePrex)
5413
{
5414
TR::Node *mh = callNode->getArgument(0);
5415
traceMsg(comp, "PREX.inl: %p: %p is known object obj%d in inlined call [%p]\n", prexArg, mh, methodHandleIndex, callNode);
5416
}
5417
}
5418
else if (symRef->hasKnownObjectIndex())
5419
{
5420
prexArg = new (inliner->trHeapMemory()) TR_PrexArgument(symRef->getKnownObjectIndex(), comp);
5421
if (tracePrex)
5422
traceMsg(comp, "PREX.inl: %p: is symref known object obj%d\n", prexArg, symRef->getKnownObjectIndex());
5423
}
5424
else if (argument->hasKnownObjectIndex())
5425
{
5426
prexArg = new (inliner->trHeapMemory()) TR_PrexArgument(argument->getKnownObjectIndex(), comp);
5427
if (tracePrex)
5428
traceMsg(comp, "PREX.inl: %p: is node known object obj%d\n", prexArg, argument->getKnownObjectIndex());
5429
}
5430
else if (argument->getOpCodeValue() == TR::aload)
5431
{
5432
OMR::ParameterSymbol *parmSymbol = symbol->getParmSymbol();
5433
if (parmSymbol && !prexArg)
5434
{
5435
if (parmSymbol->getFixedType())
5436
{
5437
prexArg = new (inliner->trHeapMemory()) TR_PrexArgument(TR_PrexArgument::ClassIsFixed, (TR_OpaqueClassBlock *) parmSymbol->getFixedType());
5438
if (tracePrex)
5439
{
5440
char *sig = TR::Compiler->cls.classSignature(comp, (TR_OpaqueClassBlock*)parmSymbol->getFixedType(), inliner->trMemory());
5441
traceMsg(comp, "PREX.inl: %p: is load of parm with fixed class %p %s\n", prexArg, parmSymbol->getFixedType(), sig);
5442
}
5443
}
5444
if (parmSymbol->getIsPreexistent())
5445
{
5446
int32_t len = 0;
5447
const char *sig = parmSymbol->getTypeSignature(len);
5448
TR_OpaqueClassBlock *clazz = comp->fe()->getClassFromSignature(sig, len, site->_callerResolvedMethod);
5449
5450
if (clazz)
5451
{
5452
prexArg = new (inliner->trHeapMemory()) TR_PrexArgument(TR_PrexArgument::ClassIsPreexistent, clazz);
5453
if (tracePrex)
5454
traceMsg(comp, "PREX.inl: %p: is preexistent\n", prexArg);
5455
}
5456
}
5457
}
5458
else if (symbol->isAuto())
5459
{
5460
TR::Node* valueNode = NULL;
5461
TR::TreeTop* ttForFirstOccurrence = getFirstOccurrenceOfLoad(comp, site->_callNodeTreeTop, argument);
5462
TR_ASSERT_FATAL(ttForFirstOccurrence, "Could not get a treetop for the first occurence of %p", argument);
5463
defToAutoOrParmInEBB(comp, ttForFirstOccurrence, symRef, &valueNode);
5464
if (valueNode &&
5465
valueNode->getOpCode().hasSymbolReference() &&
5466
valueNode->getSymbolReference()->hasKnownObjectIndex())
5467
{
5468
prexArg = new (inliner->trHeapMemory()) TR_PrexArgument(valueNode->getSymbolReference()->getKnownObjectIndex(), comp);
5469
if (tracePrex)
5470
traceMsg(comp, "PREX.inl: %p: is known object obj%d, argument n%dn has def from n%dn %s %s\n",
5471
prexArg,
5472
prexArg->getKnownObjectIndex(),
5473
argument->getGlobalIndex(),
5474
valueNode->getGlobalIndex(),
5475
valueNode->getOpCode().getName(),
5476
valueNode->getSymbolReference()->getName(comp->getDebug()));
5477
}
5478
}
5479
}
5480
else if (symRef == comp->getSymRefTab()->findJavaLangClassFromClassSymbolRef())
5481
{
5482
TR::Node *argFirstChild = argument->getFirstChild();
5483
if (argFirstChild->getOpCodeValue() == TR::loadaddr &&
5484
argFirstChild->getSymbol()->isStatic() &&
5485
!argFirstChild->getSymbolReference()->isUnresolved() &&
5486
argFirstChild->getSymbol()->isClassObject() &&
5487
argFirstChild->getSymbol()->castToStaticSymbol()->getStaticAddress())
5488
{
5489
uintptr_t objectReferenceLocation = (uintptr_t)argFirstChild->getSymbolReference()->getSymbol()->castToStaticSymbol()->getStaticAddress();
5490
TR::KnownObjectTable *knot = comp->getOrCreateKnownObjectTable();
5491
if (knot)
5492
{
5493
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
5494
auto knownObjectIndex = knot->getOrCreateIndexAt((uintptr_t*)(objectReferenceLocation + fej9->getOffsetOfJavaLangClassFromClassField()));
5495
prexArg = new (comp->trHeapMemory()) TR_PrexArgument(knownObjectIndex, comp);
5496
if (tracePrex)
5497
traceMsg(comp, "PREX.inl is known java/lang/Class obj%d\n", prexArg, knownObjectIndex);
5498
}
5499
}
5500
}
5501
5502
if (prexArg)
5503
{
5504
if (!prexArgInfo)
5505
prexArgInfo = new (inliner->trHeapMemory()) TR_PrexArgInfo(callNode->getNumArguments(), inliner->trMemory());
5506
prexArgInfo->set(argOrdinal, prexArg);
5507
}
5508
}
5509
5510
if (tracePrex)
5511
traceMsg(comp, "PREX.inl: Done populating prex argInfo for %s %p\n", callNode->getOpCode().getName(), callNode);
5512
5513
if (tracePrex && prexArgInfo)
5514
{
5515
traceMsg(comp, "PREX.inl: argInfo for callsite %p\n", site);
5516
prexArgInfo->dumpTrace();
5517
}
5518
5519
if (callerArgInfo)
5520
{
5521
if (tracePrex)
5522
traceMsg(comp, "PREX.inl: Propagating prex argInfo from caller for [%p] %s %s\n",
5523
callNode,
5524
callNode->getOpCode().getName(),
5525
callNode->getSymbol()->castToMethodSymbol()->getMethod()->signature(inliner->trMemory(), stackAlloc));
5526
5527
TR_PrexArgInfo* argsFromCaller = TR_PrexArgInfo::argInfoFromCaller(callNode, callerArgInfo);
5528
prexArgInfo = TR_PrexArgInfo::enhance(prexArgInfo, argsFromCaller, comp);
5529
5530
if (tracePrex)
5531
{
5532
traceMsg(comp, "PREX.inl: argInfo for callsite %p after propagating argInfo from caller\n", site);
5533
prexArgInfo->dumpTrace();
5534
}
5535
}
5536
5537
return prexArgInfo;
5538
}
5539
5540
bool TR_J9InlinerUtil::needTargetedInlining(TR::ResolvedMethodSymbol *callee)
5541
{
5542
// Trees from archetype specimens may not match the archetype method's bytecodes,
5543
// so there may be some calls things that inliner missed.
5544
//
5545
// Tactically, we also inline again based on hasMethodHandleInvokes because EstimateCodeSize
5546
// doesn't yet cope with invokeHandle, invokeHandleGeneric, and invokeDynamic (but it should).
5547
//
5548
if (callee->getMethod()->isArchetypeSpecimen() ||
5549
callee->hasMethodHandleInvokes())
5550
return true;
5551
return false;
5552
}
5553
5554
/** Find arguments which refer to constant classes
5555
If a parameter refers to a constant class, set the known object index in _ecsPrexArgInfo
5556
of the target.
5557
*/
5558
void TR_J9InlinerUtil::checkForConstClass(TR_CallTarget *target, TR_LogTracer *tracer)
5559
{
5560
static char *disableCCI=feGetEnv("TR_DisableConstClassInlining");
5561
5562
if (disableCCI || !tracer || !target) return;
5563
5564
TR_CallSite *site = target->_myCallSite;
5565
if (!site) return;
5566
5567
TR::Node* callNode = site->_callNode;
5568
if (!callNode) return;
5569
5570
TR_PrexArgInfo *ecsArgInfo = target->_ecsPrexArgInfo;
5571
if (!ecsArgInfo) return;
5572
5573
TR::Compilation * comp = tracer->comp();
5574
bool tracePrex = comp->trace(OMR::inlining) || comp->trace(OMR::invariantArgumentPreexistence);
5575
5576
if (tracePrex)
5577
traceMsg(comp, "checkForConstClass parm for [%p] %s %s\n", callNode, callNode->getOpCode().getName(), callNode->getSymbol()->castToMethodSymbol()->getMethod()->signature(comp->trMemory(), stackAlloc));
5578
5579
// loop over args
5580
int32_t firstArgIndex = callNode->getFirstArgumentIndex();
5581
for (int32_t c = callNode->getNumChildren()-1; c >= firstArgIndex; c--)
5582
{
5583
int32_t argOrdinal = c - firstArgIndex;
5584
5585
// Check that argOrdinal is a valid index for ecsArgInfo.
5586
if (argOrdinal >= ecsArgInfo->getNumArgs())
5587
{
5588
traceMsg(comp, "checkForConstClass skipping c=%d because argOrdinal(%d) >= numArgs(%d)\n", c, argOrdinal, ecsArgInfo->getNumArgs());
5589
continue;
5590
}
5591
5592
TR_PrexArgument *prexArgument = ecsArgInfo->get(argOrdinal);
5593
5594
PrexKnowledgeLevel priorKnowledge = TR_PrexArgument::knowledgeLevel(prexArgument);
5595
5596
TR::Node *argument = callNode->getChild(c);
5597
if (tracePrex)
5598
{
5599
traceMsg(comp, "checkForConstClass: Child %d [%p] arg %p %s%s %s\n",
5600
c, argument, prexArgument, TR_PrexArgument::priorKnowledgeStrings[priorKnowledge],
5601
argument->getOpCode().getName(),
5602
argument->getOpCode().hasSymbolReference()? argument->getSymbolReference()->getName(comp->getDebug()) : "");
5603
}
5604
5605
TR::KnownObjectTable::Index knownObjectIndex;
5606
bool knownObjectClass = false;
5607
5608
if (argument->getOpCode().hasSymbolReference() &&
5609
(argument->getSymbolReference() == comp->getSymRefTab()->findJavaLangClassFromClassSymbolRef()))
5610
{
5611
TR::Node *argFirstChild = argument->getFirstChild();
5612
if (argFirstChild->getOpCode().hasSymbolReference() &&
5613
argFirstChild->getSymbol()->isStatic() &&
5614
!argFirstChild->getSymbolReference()->isUnresolved() &&
5615
argFirstChild->getSymbol()->isClassObject())
5616
{
5617
uintptr_t objectReferenceLocation = (uintptr_t)argFirstChild->getSymbolReference()->getSymbol()->castToStaticSymbol()->getStaticAddress();
5618
if (objectReferenceLocation)
5619
{
5620
TR::KnownObjectTable *knot = comp->getOrCreateKnownObjectTable();
5621
if (knot)
5622
{
5623
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
5624
knownObjectIndex = knot->getOrCreateIndexAt((uintptr_t*)(objectReferenceLocation + fej9->getOffsetOfJavaLangClassFromClassField()));
5625
knownObjectClass = true;
5626
}
5627
}
5628
}
5629
}
5630
5631
if (argument->getOpCode().hasSymbolReference() && (knownObjectClass || argument->getSymbolReference()->hasKnownObjectIndex()))
5632
{
5633
if (priorKnowledge < KNOWN_OBJECT)
5634
{
5635
if (knownObjectClass)
5636
{
5637
ecsArgInfo->set(argOrdinal, new (comp->trStackMemory()) TR_PrexArgument(knownObjectIndex, comp));
5638
if (tracePrex)
5639
traceMsg(comp, "checkForConstClass: %p: is known object obj%d (knownObjectClass)\n", ecsArgInfo->get(argOrdinal), knownObjectIndex);
5640
}
5641
else
5642
{
5643
ecsArgInfo->set(argOrdinal, new (comp->trStackMemory()) TR_PrexArgument(argument->getSymbolReference()->getKnownObjectIndex(), comp));
5644
if (tracePrex)
5645
traceMsg(comp, "checkForConstClass: %p: is known object obj%d\n", ecsArgInfo->get(argOrdinal), argument->getSymbolReference()->getKnownObjectIndex());
5646
}
5647
}
5648
}
5649
5650
} // for each arg
5651
5652
return;
5653
5654
} // checkForConstClass
5655
5656
//@TODO this can be re-used as we start building prexargs for every callsite
5657
TR_PrexArgInfo* TR_PrexArgInfo::buildPrexArgInfoForMethodSymbol(TR::ResolvedMethodSymbol* methodSymbol, TR_LogTracer* tracer)
5658
{
5659
int numArgs = methodSymbol->getParameterList().getSize();
5660
TR_ResolvedMethod *feMethod = methodSymbol->getResolvedMethod();
5661
ListIterator<TR::ParameterSymbol> parms(&methodSymbol->getParameterList());
5662
5663
TR::Compilation *comp = tracer->comp();
5664
5665
TR_PrexArgInfo *argInfo = new (comp->trHeapMemory()) TR_PrexArgInfo(numArgs, comp->trMemory());
5666
heuristicTrace(tracer, "PREX-CSI: Populating parmInfo of current method %s\n", feMethod->signature(comp->trMemory()));
5667
int index = 0;
5668
/**
5669
* For non-static method, first slot of the paramters is populated with the class owning the method. Most of the time,
5670
* we should be able to get the class pointer using class signature but in case of hidden classes which according to
5671
* JEP 371, cannot be used directly by bytecode instructions in other classes also is not possible to refer to them in
5672
* paramters except for the method from the same hidden class. In any case, for non-static methods instead of making
5673
* VM query to get the owning class, extract that information from resolvedMethod.
5674
*/
5675
for (TR::ParameterSymbol *p = parms.getFirst(); p != NULL; index++, p = parms.getNext())
5676
{
5677
TR_ASSERT(index < numArgs, "out of bounds!");
5678
int32_t len = 0;
5679
const char *sig = p->getTypeSignature(len);
5680
5681
if (*sig == 'L' || *sig == 'Q')
5682
{
5683
TR_OpaqueClassBlock *clazz = (index == 0 && !methodSymbol->isStatic()) ? feMethod->containingClass() : comp->fe()->getClassFromSignature(sig, len, feMethod);
5684
if (clazz)
5685
{
5686
argInfo->set(index, new (comp->trHeapMemory()) TR_PrexArgument(TR_PrexArgument::ClassIsPreexistent, clazz));
5687
heuristicTrace(tracer, "PREX-CSI: Parm %d class %p in %p is %.*s\n", index, argInfo->get(index)->getClass(), argInfo->get(index), len, sig);
5688
}
5689
}
5690
}
5691
return argInfo;
5692
}
5693
5694
5695
static void populateClassNameSignature(TR::Method *m, TR_ResolvedMethod* caller, TR_OpaqueClassBlock* &c, char* &nc, int32_t &nl, char* &sc, int32_t &sl)
5696
{
5697
int32_t len = m->classNameLength();
5698
char* cs = TR::Compiler->cls.classNameToSignature(m->classNameChars(), len, TR::comp());
5699
c = caller->fe()->getClassFromSignature(cs, len, caller);
5700
nc = m->nameChars();
5701
nl = m->nameLength();
5702
sc = m->signatureChars();
5703
sl = m->signatureLength();
5704
}
5705
5706
static char* classSignature (TR::Method * m, TR::Compilation* comp) //tracer helper
5707
{
5708
int32_t len = m->classNameLength();
5709
return TR::Compiler->cls.classNameToSignature(m->classNameChars(), len /*don't care, cos this gives us a null terminated string*/, comp);
5710
}
5711
5712
static bool treeMatchesCallSite(TR::TreeTop* tt, TR::ResolvedMethodSymbol* callerSymbol, TR_CallSite* callsite, TR_LogTracer* tracer)
5713
{
5714
if (tt->getNode()->getNumChildren()>0 &&
5715
tt->getNode()->getFirstChild()->getOpCode().isCall() &&
5716
tt->getNode()->getFirstChild()->getByteCodeIndex() == callsite->_bcInfo.getByteCodeIndex())
5717
{
5718
TR::Node* callNode = tt->getNode()->getFirstChild();
5719
5720
TR::MethodSymbol* callNodeMS = callNode->getSymbolReference()->getSymbol()->castToMethodSymbol();
5721
TR_ASSERT(callNodeMS, "isCall returned true!");
5722
5723
if (callNodeMS->isHelper())
5724
{
5725
return false;
5726
}
5727
5728
TR_OpaqueClassBlock *callSiteClass, *callNodeClass;
5729
5730
char *callSiteNameChars, *callNodeNameChars,
5731
*callSiteSignatureChars, *callNodeSignatureChars;
5732
5733
int32_t callSiteNameLength, callNodeNameLength,
5734
callSiteSignatureLength, callNodeSignatureLength;
5735
5736
5737
populateClassNameSignature (callsite->_initialCalleeMethod ?
5738
callsite->_initialCalleeMethod->convertToMethod() : //TR_ResolvedMethod doesn't extend TR::Method
5739
callsite->_interfaceMethod,
5740
callerSymbol->getResolvedMethod(),
5741
callSiteClass,
5742
callSiteNameChars, callSiteNameLength,
5743
callSiteSignatureChars, callSiteSignatureLength
5744
);
5745
5746
5747
populateClassNameSignature (callNodeMS->getMethod(),
5748
callerSymbol->getResolvedMethod(),
5749
callNodeClass,
5750
callNodeNameChars, callNodeNameLength,
5751
callNodeSignatureChars, callNodeSignatureLength
5752
);
5753
5754
5755
5756
//make sure classes are compatible
5757
5758
if (!callNodeClass || !callSiteClass || callerSymbol->getResolvedMethod()->fe()->isInstanceOf (callNodeClass, callSiteClass, true, true, true) != TR_yes)
5759
{
5760
if (tracer->heuristicLevel())
5761
{
5762
TR::Compilation* comp = TR::comp(); //won't be evaluated unless tracing is on
5763
heuristicTrace(tracer, "ARGS PROPAGATION: Incompatible classes: callSiteClass %p (%s) callNodeClass %p (%s)",
5764
callSiteClass,
5765
classSignature(callsite->_initialCalleeMethod ?
5766
callsite->_initialCalleeMethod->convertToMethod() :
5767
callsite->_interfaceMethod,
5768
comp),
5769
callNodeClass,
5770
classSignature(callNodeMS->getMethod(), comp)
5771
);
5772
}
5773
return false;
5774
}
5775
5776
//compare names and signatures
5777
if (callSiteNameLength != callNodeNameLength ||
5778
strncmp(callSiteNameChars, callNodeNameChars, callSiteNameLength) ||
5779
callSiteSignatureLength != callNodeSignatureLength ||
5780
strncmp(callSiteSignatureChars, callNodeSignatureChars, callSiteSignatureLength))
5781
{
5782
heuristicTrace(tracer, "ARGS PROPAGATION: Signature mismatch: callSite class %.*s callNode class %.*s",
5783
callSiteNameLength, callSiteNameChars, callNodeNameLength, callNodeNameChars);
5784
return false;
5785
}
5786
5787
//heuristicTrace(tracer, "ARGS PROPAGATION: matched the node!!!");
5788
return true;
5789
}
5790
5791
return false;
5792
}
5793
5794
TR::TreeTop* TR_PrexArgInfo::getCallTree(TR::ResolvedMethodSymbol* methodSymbol, TR_CallSite* callsite, TR_LogTracer* tracer)
5795
{
5796
if (callsite->_callNodeTreeTop)
5797
return callsite->_callNodeTreeTop;
5798
5799
for (TR::TreeTop* tt = methodSymbol->getFirstTreeTop(); tt; tt=tt->getNextTreeTop())
5800
{
5801
if (treeMatchesCallSite(tt, methodSymbol, callsite, tracer))
5802
return tt;
5803
}
5804
5805
heuristicTrace(tracer, "ARGS PROPAGATION: Couldn't find a matching node for callsite %p bci %d", callsite, callsite->_bcInfo.getByteCodeIndex());
5806
return NULL;
5807
}
5808
5809
TR::Node* TR_PrexArgInfo::getCallNode(TR::ResolvedMethodSymbol* methodSymbol, TR_CallSite* callsite, TR_LogTracer* tracer)
5810
{
5811
if (callsite->_callNode)
5812
return callsite->_callNode;
5813
5814
for (TR::TreeTop* tt = methodSymbol->getFirstTreeTop(); tt; tt=tt->getNextTreeTop())
5815
{
5816
if (treeMatchesCallSite(tt, methodSymbol, callsite, tracer))
5817
return tt->getNode()->getFirstChild();
5818
}
5819
5820
heuristicTrace(tracer, "ARGS PROPAGATION: Couldn't find a matching node for callsite %p bci %d", callsite, callsite->_bcInfo.getByteCodeIndex());
5821
return NULL;
5822
}
5823
5824
bool TR_PrexArgInfo::hasArgInfoForChild (TR::Node *child, TR_PrexArgInfo * argInfo)
5825
{
5826
if (child->getOpCode().hasSymbolReference() &&
5827
child->getSymbolReference()->getSymbol()->isParm() &&
5828
child->getSymbolReference()->getSymbol()->getParmSymbol()->getOrdinal() < argInfo->getNumArgs() &&
5829
argInfo->get(child->getSymbolReference()->getSymbol()->getParmSymbol()->getOrdinal()))
5830
return true;
5831
5832
5833
return false;
5834
}
5835
5836
TR_PrexArgument* TR_PrexArgInfo::getArgForChild(TR::Node *child, TR_PrexArgInfo* argInfo)
5837
{
5838
TR_ASSERT(child->getOpCode().hasSymbolReference() &&
5839
child->getSymbolReference()->getSymbol()->isParm() &&
5840
child->getSymbolReference()->getSymbol()->getParmSymbol()->getOrdinal() < argInfo->getNumArgs() && argInfo->get(child->getSymbolReference()->getSymbol()->getParmSymbol()->getOrdinal())
5841
, "hasArgInfoForChild should have returned false");
5842
5843
return argInfo->get(child->getSymbolReference()->getSymbol()->getParmSymbol()->getOrdinal());
5844
}
5845
5846
void TR_PrexArgInfo::propagateReceiverInfoIfAvailable (TR::ResolvedMethodSymbol* methodSymbol, TR_CallSite* callsite,
5847
TR_PrexArgInfo * argInfo, TR_LogTracer *tracer)
5848
{
5849
//this implies we have some argInfo available
5850
TR_ASSERT(argInfo, "otherwise we shouldn't even peek");
5851
TR::Node* callNode = TR_PrexArgInfo::getCallNode(methodSymbol, callsite, tracer);
5852
TR::Compilation *comp = tracer->comp();
5853
heuristicTrace(tracer, "ARGS PROPAGATION: trying to propagate receiver's info for callsite %p at %p", callsite, callNode);
5854
if (!callNode || comp->getOption(TR_DisableInlinerArgsPropagation))
5855
return;
5856
5857
uint32_t numOfArgs = callNode->getNumChildren()-callNode->getFirstArgumentIndex();
5858
5859
if (numOfArgs<1)
5860
return;
5861
//TR_ASSERT(numOfArgs > 0, "argsinfo index out of bounds");
5862
5863
TR::Node* child = callNode->getChild(callNode->getFirstArgumentIndex());
5864
5865
if (TR_PrexArgInfo::hasArgInfoForChild(child, argInfo))
5866
{
5867
heuristicTrace(tracer, "ARGS PROPAGATION: the receiver for callsite %p is also one of the caller's args", callsite);
5868
callsite->_ecsPrexArgInfo = new (comp->trHeapMemory()) TR_PrexArgInfo(numOfArgs, comp->trMemory());
5869
callsite->_ecsPrexArgInfo->set(0, TR_PrexArgInfo::getArgForChild(child, argInfo));
5870
}
5871
}
5872
5873
bool TR_PrexArgInfo::validateAndPropagateArgsFromCalleeSymbol(TR_PrexArgInfo* argsFromSymbol, TR_PrexArgInfo* argsFromTarget, TR_LogTracer *tracer)
5874
{
5875
if (!argsFromSymbol || !argsFromTarget || tracer->comp()->getOption(TR_DisableInlinerArgsPropagation))
5876
{
5877
heuristicTrace(tracer, "ARGS PROPAGATION: argsFromSymbol %p or argsFromTarget %p are missing\n", argsFromSymbol, argsFromTarget);
5878
return true;
5879
}
5880
5881
heuristicTrace(tracer, "ARGS PROPAGATION: argsFromSymbol (from calleeSymbol)");
5882
if (tracer->heuristicLevel())
5883
argsFromSymbol->dumpTrace();
5884
5885
//validation
5886
TR_FrontEnd* fe = tracer->comp()->fe();
5887
int32_t numArgsToEnhance = std::min(argsFromTarget->getNumArgs(), argsFromSymbol->getNumArgs());
5888
for (int32_t i = 0; i < numArgsToEnhance; i++)
5889
{
5890
if (!argsFromTarget->get(i) || !argsFromTarget->get(i)->getClass()) //no incoming class info
5891
continue;
5892
5893
if (!argsFromSymbol->get(i) || !argsFromSymbol->get(i)->getClass())
5894
{
5895
heuristicTrace(tracer, "ARGS PROPAGATION: No class info for arg %d from symbol. ", i);
5896
return false; //TODO: This can be relaxed
5897
//just make a copy of incoming args
5898
//and clear the info for this particular slot
5899
}
5900
5901
/*
5902
At this point class types from argsFromSymbol and argsFromTarget MUST be compatible
5903
Incompatibility might mean that we are inlining dead code
5904
*/
5905
if (fe->isInstanceOf(argsFromSymbol->get(i)->getClass(), argsFromTarget->get(i)->getClass(), true, true, true) != TR_yes &&
5906
fe->isInstanceOf(argsFromTarget->get(i)->getClass(), argsFromSymbol->get(i)->getClass(), true, true, true) != TR_yes)
5907
{
5908
return false;
5909
}
5910
}
5911
5912
5913
TR_PrexArgInfo::enhance(argsFromTarget, argsFromSymbol, tracer->comp()); //otherwise just pick more specific
5914
5915
heuristicTrace(tracer, "ARGS PROPAGATION: final argInfo after merging argsFromTarget %p", argsFromTarget);
5916
if (tracer->heuristicLevel())
5917
argsFromTarget->dumpTrace();
5918
5919
return true;
5920
}
5921
5922
5923
void TR_PrexArgInfo::clearArgInfoForNonInvariantArguments(TR::ResolvedMethodSymbol* methodSymbol, TR_LogTracer* tracer)
5924
{
5925
if (tracer->comp()->getOption(TR_DisableInlinerArgsPropagation))
5926
return;
5927
5928
bool cleanedAnything = false;
5929
for (TR::TreeTop * tt = methodSymbol->getFirstTreeTop(); tt; tt = tt->getNextTreeTop())
5930
{
5931
TR::Node* storeNode = tt->getNode()->getStoreNode();
5932
5933
5934
if (!storeNode || !storeNode->getSymbolReference()->getSymbol()->isParm())
5935
continue;
5936
5937
TR_ASSERT(storeNode->getSymbolReference(), "stores should have symRefs");
5938
TR::ParameterSymbol* parmSymbol = storeNode->getSymbolReference()->getSymbol()->getParmSymbol();
5939
if (parmSymbol->getOrdinal() < getNumArgs())
5940
{
5941
debugTrace(tracer, "ARGS PROPAGATION: unsetting an arg [%i] of argInfo %p", parmSymbol->getOrdinal(), this);
5942
set(parmSymbol->getOrdinal(), NULL);
5943
cleanedAnything = true;
5944
}
5945
}
5946
5947
if (cleanedAnything)
5948
{
5949
debugTrace(tracer, "ARGS PROPAGATION: argInfo %p after clear arg info for non-invariant arguments", this);
5950
if (tracer->heuristicLevel())
5951
dumpTrace();
5952
}
5953
}
5954
5955
void TR_PrexArgInfo::propagateArgsFromCaller(TR::ResolvedMethodSymbol* methodSymbol, TR_CallSite* callsite,
5956
TR_PrexArgInfo * argInfo, TR_LogTracer *tracer)
5957
{
5958
if (tracer->comp()->getOption(TR_DisableInlinerArgsPropagation))
5959
return;
5960
5961
TR_ASSERT(argInfo, "otherwise we shouldn't even peek");
5962
TR::Node* callNode = TR_PrexArgInfo::getCallNode(methodSymbol, callsite, tracer);
5963
heuristicTrace(tracer, "ARGS PROPAGATION: trying to propagate arg info from caller symbol to callsite %p at %p", callsite, callNode);
5964
5965
if (!callNode)
5966
return;
5967
5968
//If we are dealing with indirect calls, temporary use callsite->_ecsPrexArgInfo->get(0)
5969
//instead of argInfo->get(0). This is because the former might have been reseted by
5970
//findCallSiteTarget if it couldn't use argInfo->get(0).
5971
//In such case, propagating argInfo->get(0) any longer might be incorrect.
5972
5973
TR_PrexArgument* receiverPrexArg = NULL;
5974
TR::Node *receiverChild = callNode->getChild(callNode->getFirstArgumentIndex());
5975
if (callsite->_ecsPrexArgInfo)
5976
{
5977
if (TR_PrexArgInfo::hasArgInfoForChild(receiverChild, argInfo))
5978
{
5979
receiverPrexArg = TR_PrexArgInfo::getArgForChild(receiverChild, argInfo);
5980
argInfo->set(receiverChild->getSymbolReference()->getSymbol()->getParmSymbol()->getOrdinal(), callsite->_ecsPrexArgInfo->get(0));
5981
}
5982
}
5983
5984
heuristicTrace(tracer, "ARGS PROPAGATION: argsFromTarget before args propagation");
5985
for (int i = 0; i < callsite->numTargets(); i++)
5986
if (tracer->heuristicLevel())
5987
callsite->getTarget(i)->_ecsPrexArgInfo->dumpTrace();
5988
5989
for (int i = callNode->getFirstArgumentIndex(); i < callNode->getNumChildren(); i++)
5990
{
5991
TR::Node* child = callNode->getChild(i);
5992
if (TR_PrexArgInfo::hasArgInfoForChild(child, argInfo))
5993
{
5994
heuristicTrace(tracer, "ARGS PROPAGATION: arg %d at callsite %p matches caller's arg %d", i, callsite, child->getSymbolReference()->getSymbol()->getParmSymbol()->getOrdinal());
5995
5996
for (int j = 0; j < callsite->numTargets(); j++)
5997
{
5998
if (!callsite->getTarget(j)->_ecsPrexArgInfo)
5999
continue;
6000
6001
TR_PrexArgInfo* targetArgInfo = callsite->getTarget(j)->_ecsPrexArgInfo;
6002
6003
if (i - callNode->getFirstArgumentIndex() >= targetArgInfo->getNumArgs())
6004
continue;
6005
6006
if (!targetArgInfo->get(i - callNode->getFirstArgumentIndex()))
6007
targetArgInfo->set(i - callNode->getFirstArgumentIndex(), TR_PrexArgInfo::getArgForChild(child, argInfo));
6008
}
6009
}
6010
}
6011
6012
// Call checkForConstClass on each target so that uses of constant classes
6013
// are identified. (The information will be used by applyArgumentHeuristics)
6014
for (int j = 0; j < callsite->numTargets(); j++)
6015
{
6016
TR_J9InlinerUtil::checkForConstClass(callsite->getTarget(j), tracer);
6017
}
6018
6019
//Restoring argInfo (see setting receiverPrexArg above)
6020
if (receiverPrexArg)
6021
{
6022
argInfo->set(receiverChild->getSymbolReference()->getSymbol()->getParmSymbol()->getOrdinal(), receiverPrexArg);
6023
}
6024
6025
if (tracer->heuristicLevel())
6026
{
6027
heuristicTrace(tracer, "ARGS PROPAGATION: ArgInfo after propagating the args from the caller");
6028
for (int i = 0; i < callsite->numTargets(); i++)
6029
callsite->getTarget(i)->_ecsPrexArgInfo->dumpTrace();
6030
}
6031
}
6032
6033
void
6034
TR_J9InlinerUtil::refineColdness(TR::Node* node, bool& isCold)
6035
{
6036
bool inlineableJNI = false;
6037
TR::SymbolReference * symRef = node->getSymbolReference();
6038
if(symRef->getSymbol()->isResolvedMethod()
6039
&& symRef->getSymbol()->castToResolvedMethodSymbol()->getResolvedMethod())
6040
inlineableJNI = static_cast<TR_J9InlinerPolicy*>(inliner()->getPolicy())->isInlineableJNI(symRef->getSymbol()->castToResolvedMethodSymbol()->getResolvedMethod(),node);
6041
6042
isCold = isCold && !inlineableJNI;
6043
}
6044
6045
void
6046
TR_J9InlinerUtil::computeMethodBranchProfileInfo (TR::Block * cfgBlock, TR_CallTarget* calltarget, TR::ResolvedMethodSymbol* callerSymbol)
6047
{
6048
if (cfgBlock) //isn't this equal to genILSucceeded??
6049
{
6050
6051
TR::ResolvedMethodSymbol * calleeSymbol = calltarget->_calleeSymbol;
6052
TR::TreeTop * callNodeTreeTop = calltarget->_myCallSite->_callNodeTreeTop;
6053
6054
TR_MethodBranchProfileInfo *mbpInfo = TR_MethodBranchProfileInfo::getMethodBranchProfileInfo(cfgBlock->getEntry()->getNode()->getInlinedSiteIndex(), comp());
6055
if (!mbpInfo)
6056
{
6057
TR::Block *block = callNodeTreeTop->getEnclosingBlock();
6058
6059
mbpInfo = TR_MethodBranchProfileInfo::addMethodBranchProfileInfo (cfgBlock->getEntry()->getNode()->getInlinedSiteIndex(), comp());
6060
6061
calleeSymbol->getFlowGraph()->computeInitialBlockFrequencyBasedOnExternalProfiler(comp());
6062
uint32_t firstBlockFreq = calleeSymbol->getFlowGraph()->getInitialBlockFrequency();
6063
6064
int32_t blockFreq = block->getFrequency();
6065
if (blockFreq < 0)
6066
blockFreq = 6;
6067
6068
float freqScaleFactor = 0.0;
6069
if (callerSymbol->getFirstTreeTop()->getNode()->getBlock()->getFrequency() > 0)
6070
{
6071
freqScaleFactor = (float)(blockFreq)/callerSymbol->getFirstTreeTop()->getNode()->getBlock()->getFrequency();
6072
if (callerSymbol->getFlowGraph()->getInitialBlockFrequency() > 0)
6073
freqScaleFactor *= (float)(callerSymbol->getFlowGraph()->getInitialBlockFrequency())/(float)firstBlockFreq;
6074
}
6075
mbpInfo->setInitialBlockFrequency(firstBlockFreq);
6076
mbpInfo->setCallFactor(freqScaleFactor);
6077
6078
calleeSymbol->getFlowGraph()->setFrequencies();
6079
6080
if (comp()->getOption(TR_TraceBFGeneration))
6081
{
6082
traceMsg(comp(), "Setting initial block count for a call with index %d to be %d, call factor %f where block %d (%p) and blockFreq = %d\n", cfgBlock->getEntry()->getNode()->getInlinedSiteIndex(), firstBlockFreq, freqScaleFactor, block->getNumber(), block, blockFreq);
6083
traceMsg(comp(), "first block freq %d and initial block freq %d\n", callerSymbol->getFirstTreeTop()->getNode()->getBlock()->getFrequency(), callerSymbol->getFlowGraph()->getInitialBlockFrequency());
6084
}
6085
}
6086
}
6087
}
6088
6089
TR_TransformInlinedFunction *
6090
TR_J9InlinerUtil::getTransformInlinedFunction(TR::ResolvedMethodSymbol *callerSymbol, TR::ResolvedMethodSymbol *calleeSymbol, TR::Block *blockContainingTheCall, TR::TreeTop *callNodeTreeTop,
6091
TR::Node *callNode, TR_ParameterToArgumentMapper & pam, TR_VirtualGuardSelection *guard, List<TR::SymbolReference> & tempList,
6092
List<TR::SymbolReference> & availableTemps, List<TR::SymbolReference> & availableBasicBlockTemps)
6093
{
6094
return new (comp()->trStackMemory()) TR_J9TransformInlinedFunction(comp(), tracer(), callerSymbol, calleeSymbol, blockContainingTheCall, callNodeTreeTop, callNode, pam, guard, tempList, availableTemps, availableBasicBlockTemps);
6095
}
6096
6097
TR_J9TransformInlinedFunction::TR_J9TransformInlinedFunction(
6098
TR::Compilation *c, TR_InlinerTracer *tracer,TR::ResolvedMethodSymbol * callerSymbol, TR::ResolvedMethodSymbol * calleeSymbol,
6099
TR::Block * callNodeBlock, TR::TreeTop * callNodeTreeTop, TR::Node * callNode,
6100
TR_ParameterToArgumentMapper & mapper, TR_VirtualGuardSelection *guard,
6101
List<TR::SymbolReference> & temps, List<TR::SymbolReference> & availableTemps,
6102
List<TR::SymbolReference> & availableTemps2)
6103
: TR_TransformInlinedFunction(c, tracer, callerSymbol, calleeSymbol, callNodeBlock, callNodeTreeTop, callNode, mapper, guard, temps, availableTemps, availableTemps2)
6104
{
6105
}
6106
6107
void
6108
TR_J9TransformInlinedFunction::transform(){
6109
TR_ResolvedMethod * calleeResolvedMethod = _calleeSymbol->getResolvedMethod();
6110
if (calleeResolvedMethod->isSynchronized() && !_callNode->canDesynchronizeCall())
6111
{
6112
if (comp()->trace(OMR::inlining))
6113
traceMsg(comp(), "Wrapping in try region for synchronized method\n");
6114
transformSynchronizedMethod(calleeResolvedMethod);
6115
}
6116
TR_TransformInlinedFunction::transform();
6117
}
6118
6119
void
6120
TR_J9TransformInlinedFunction::transformSynchronizedMethod(TR_ResolvedMethod * calleeMethod)
6121
{
6122
// If an inlined synchronized method ends with a throw then we have to unlock the monitor.
6123
// The stack unwinder does this if the function isn't inlined, but the unwinder doesn't know
6124
// about the inlined version (unless or until we enhance the meta data).
6125
// If we change to use the meta data, care must be taken since some call
6126
// sites may have been desynchronized even though the method is marked as
6127
// synchronized.
6128
//
6129
wrapCalleeInTryRegion(true, false, calleeMethod);
6130
}
6131
6132
void
6133
TR_J9TransformInlinedFunction::wrapCalleeInTryRegion(bool isSynchronized, bool putCatchInCaller, TR_ResolvedMethod * calleeMethod)
6134
{
6135
TR_InlinerDelimiter delimiter(tracer(),"tif.wrapCalleeInTryRegion");
6136
int32_t handlerIndex = calleeMethod->numberOfExceptionHandlers();
6137
TR::TreeTop * prevTreeTop = _calleeSymbol->getLastTreeTop(), * originalLastTreeTop = prevTreeTop;
6138
6139
TR::CFG *calleeCFG = _calleeSymbol->getFlowGraph();
6140
TR::Block *catchBlock = NULL;
6141
TR::Block *block = NULL;
6142
TR_ScratchList<TR::Block> newCatchBlocks(trMemory());
6143
6144
TR_CatchBlockProfileInfo * catchInfo = TR_CatchBlockProfileInfo::get(comp());
6145
if (catchInfo && catchInfo->getCatchCounter() >= TR_CatchBlockProfileInfo::EDOThreshold)
6146
{
6147
// For each explicit throw in the callee add an explicit catch block so that we have a chance
6148
// of converting throws to gotos.
6149
//
6150
for (TR::TreeTop * tt = _calleeSymbol->getFirstTreeTop(); tt != originalLastTreeTop; tt = tt->getNextTreeTop())
6151
{
6152
TR::Node * node = tt->getNode();
6153
if (node->getOpCodeValue() == TR::BBStart)
6154
block = node->getBlock();
6155
else if (node->getNumChildren() > 0 &&
6156
(node = node->getFirstChild())->getOpCodeValue() == TR::athrow &&
6157
(node = node->getFirstChild())->getOpCodeValue() == TR::New &&
6158
(node = node->getFirstChild())->getOpCodeValue() == TR::loadaddr &&
6159
node->getSymbol()->isClassObject() && !node->getSymbolReference()->isUnresolved())
6160
{
6161
TR::SymbolReference * symRef = node->getSymbolReference();
6162
int32_t catchBlockHandler = handlerIndex++;
6163
prevTreeTop = createThrowCatchBlock(isSynchronized, putCatchInCaller, calleeCFG, block, prevTreeTop, symRef, catchBlockHandler, newCatchBlocks);
6164
}
6165
}
6166
}
6167
6168
if (isSynchronized)
6169
catchBlock = appendCatchBlockForInlinedSyncMethod(calleeMethod, prevTreeTop, 0, handlerIndex);
6170
else
6171
catchBlock = appendCatchBlockToRethrowException(calleeMethod, prevTreeTop, putCatchInCaller, 0, handlerIndex, true);
6172
6173
TR::Block * monEnterBlock = _calleeSymbol->getFirstTreeTop()->getNode()->getBlock();
6174
for (TR::CFGNode * n = calleeCFG->getFirstNode(); n; n = n->getNext())
6175
if (!catchBlock->hasSuccessor(n) &&
6176
(!isSynchronized || (n != monEnterBlock && !isSyncReturnBlock(comp(), toBlock(n)))) &&
6177
!toBlock(n)->isOSRCodeBlock() &&
6178
!toBlock(n)->isOSRCatchBlock())
6179
calleeCFG->addExceptionEdge(n, catchBlock);
6180
6181
// now add the catch blocks (important to do it here so that the above iterator doesn't find these blocks)
6182
calleeCFG->addNode(catchBlock);
6183
6184
ListIterator<TR::Block> bi(&newCatchBlocks);
6185
for (TR::Block * b = bi.getFirst(); b; b = bi.getNext())
6186
calleeCFG->addNode(b);
6187
6188
if (comp()->trace(OMR::inlining))
6189
comp()->dumpMethodTrees("Callee Trees", _calleeSymbol);
6190
}
6191
6192
TR::TreeTop *
6193
TR_J9TransformInlinedFunction::createThrowCatchBlock(bool isSynchronized, bool putCatchInCaller,
6194
TR::CFG *calleeCFG, TR::Block *block, TR::TreeTop *prevTreeTop,
6195
TR::SymbolReference *symRef, int32_t handlerIndex,
6196
TR_ScratchList<TR::Block> & newCatchBlocks)
6197
{
6198
TR_InlinerDelimiter delimiter(tracer(),"tif.createThrowCatchBlock");
6199
TR::Block *catchBlock;
6200
if (isSynchronized)
6201
{
6202
catchBlock = appendCatchBlockForInlinedSyncMethod(
6203
symRef->getOwningMethod(comp()), prevTreeTop, symRef->getCPIndex(), handlerIndex, false);
6204
catchBlock->setSpecializedDesyncCatchBlock();
6205
catchBlock->setIsSynchronizedHandler();
6206
}
6207
else
6208
catchBlock = appendCatchBlockToRethrowException(
6209
symRef->getOwningMethod(comp()), prevTreeTop, putCatchInCaller, symRef->getCPIndex(), handlerIndex, false);
6210
6211
TR::TreeTop *lastRealTree = catchBlock->getLastRealTreeTop();
6212
if (!lastRealTree->getNode()->getOpCode().isBranch()) // if !isSynchronized, this condition will be true
6213
prevTreeTop = catchBlock->getExit();
6214
else
6215
{
6216
TR::Block *monexitBlock = catchBlock->getExit()->getNextTreeTop()->getNode()->getBlock();
6217
TR::Block *rethrowBlock = lastRealTree->getNode()->getBranchDestination()->getNode()->getBlock();
6218
prevTreeTop = rethrowBlock->getExit();
6219
newCatchBlocks.add(monexitBlock);
6220
newCatchBlocks.add(rethrowBlock);
6221
}
6222
calleeCFG->addExceptionEdge(block, catchBlock);
6223
newCatchBlocks.add(catchBlock);
6224
6225
return prevTreeTop;
6226
}
6227
6228
TR::Block *
6229
TR_J9TransformInlinedFunction::appendCatchBlockToRethrowException(
6230
TR_ResolvedMethod * calleeMethod, TR::TreeTop * prevTreeTop, bool putCatchInCaller, int32_t catchType, int32_t handlerIndex, bool addBlocks)
6231
{
6232
TR_InlinerDelimiter delimiter(tracer(),"tif.appendCatchBlockToRethrowException");
6233
TR::SymbolReferenceTable * symRefTab = comp()->getSymRefTab();
6234
6235
TR::Node *modelNode;
6236
if (putCatchInCaller)
6237
modelNode = _callNode;
6238
else
6239
modelNode = _calleeSymbol->getFirstTreeTop()->getNode();
6240
//TR::Node * lastNode = prevTreeTop->getNode();
6241
6242
TR::Block * catchBlock = TR::Block::createEmptyBlock(modelNode, comp());
6243
catchBlock->setHandlerInfo(catchType, (uint8_t)comp()->getInlineDepth(), handlerIndex, calleeMethod, comp());
6244
6245
if (comp()->getOption(TR_EnableThisLiveRangeExtension))
6246
{
6247
if (!_calleeSymbol->isStatic() &&
6248
(!comp()->fej9()->isClassFinal(_calleeSymbol->getResolvedMethod()->containingClass()) ||
6249
comp()->fej9()->hasFinalizer(_calleeSymbol->getResolvedMethod()->containingClass())))
6250
{
6251
TR::Node *anchoredThis = TR::Node::createWithSymRef(modelNode, TR::aload, 0, symRefTab->findOrCreateAutoSymbol(_calleeSymbol, 0, TR::Address));
6252
TR::SymbolReference *tempSymRef = comp()->getSymRefTab()->findOrCreateThisRangeExtensionSymRef(_calleeSymbol);
6253
TR::TreeTop *storeTT = TR::TreeTop::create(comp(), TR::Node::createStore(tempSymRef, anchoredThis));
6254
catchBlock->append(storeTT);
6255
}
6256
}
6257
6258
// rethrow the exception
6259
//
6260
TR::SymbolReference * tempSymRef = 0;
6261
TR::Node * loadExcpSymbol = TR::Node::createWithSymRef(modelNode, TR::aload, 0, symRefTab->findOrCreateExcpSymbolRef());
6262
catchBlock->append(TR::TreeTop::create(comp(), TR::Node::createWithSymRef(TR::athrow, 1, 1, loadExcpSymbol, symRefTab->findOrCreateAThrowSymbolRef(_calleeSymbol))));
6263
6264
TR::CFG * calleeCFG = _calleeSymbol->getFlowGraph();
6265
calleeCFG->addEdge(catchBlock, calleeCFG->getEnd());
6266
6267
prevTreeTop->join(catchBlock->getEntry());
6268
return catchBlock;
6269
}
6270
// } RTSJ Support ends
6271
6272
TR::Block *
6273
TR_J9TransformInlinedFunction::appendCatchBlockForInlinedSyncMethod(
6274
TR_ResolvedMethod * calleeResolvedMethod, TR::TreeTop * prevTreeTop, int32_t catchType, int32_t handlerIndex, bool addBlocks)
6275
{
6276
TR_InlinerDelimiter delimiter(tracer(),"tif.appendCatchBlockForInlinedSyncMethod");
6277
TR::SymbolReferenceTable * symRefTab = comp()->getSymRefTab();
6278
6279
TR::Node * lastNode = _calleeSymbol->getFirstTreeTop()->getNode(); //prevTreeTop->getNode();
6280
TR::Block * catchBlock = TR::Block::createEmptyBlock(lastNode, comp());
6281
catchBlock->setHandlerInfo(catchType, (uint8_t)comp()->getInlineDepth(), handlerIndex, calleeResolvedMethod, comp());
6282
catchBlock->setIsSynchronizedHandler();
6283
catchBlock->setIsSyntheticHandler();
6284
6285
// store the exception symbol into a temp
6286
//
6287
TR::SymbolReference * tempSymRef = 0;
6288
TR::Node * excpSymbol = TR::Node::createWithSymRef(lastNode, TR::aload, 0, symRefTab->findOrCreateExcpSymbolRef());
6289
OMR_InlinerUtil::storeValueInATemp(comp(), excpSymbol, tempSymRef, catchBlock->getEntry(), _callerSymbol, _tempList, _availableTemps, &_availableTemps2);
6290
6291
// unlock the monitor
6292
//
6293
TR::Node * monitorArg, *monitorArgHandle;
6294
if (_calleeSymbol->isStatic())
6295
{
6296
monitorArgHandle = TR::Node::createWithSymRef(lastNode, TR::loadaddr, 0,
6297
symRefTab->findOrCreateClassSymbol (_calleeSymbol, 0, _calleeSymbol->getResolvedMethod()->containingClass()));
6298
monitorArgHandle = TR::Node::createWithSymRef(TR::aloadi, 1, 1, monitorArgHandle, symRefTab->findOrCreateJavaLangClassFromClassSymbolRef());
6299
}
6300
else
6301
monitorArgHandle = TR::Node::createWithSymRef(lastNode, TR::aload, 0, symRefTab->findOrCreateAutoSymbol(_calleeSymbol, 0, TR::Address));
6302
6303
TR::CFG * calleeCFG = _calleeSymbol->getFlowGraph();
6304
TR::Block *monexitBlock = catchBlock;
6305
TR::Block *rethrowBlock = catchBlock;
6306
bool createdStoreForMonitorExit = false;
6307
if (!_calleeSymbol->isStatic())
6308
{
6309
monexitBlock = TR::Block::createEmptyBlock(lastNode, comp());
6310
rethrowBlock = TR::Block::createEmptyBlock(lastNode, comp());
6311
if (addBlocks)
6312
{
6313
calleeCFG->addNode(monexitBlock);
6314
calleeCFG->addNode(rethrowBlock);
6315
}
6316
6317
monitorArg = monitorArgHandle;
6318
6319
if (!comp()->getOption(TR_DisableLiveMonitorMetadata) &&
6320
_calleeSymbol->isSynchronised() &&
6321
_calleeSymbol->getSyncObjectTemp())
6322
{
6323
TR::TreeTop *storeTT = TR::TreeTop::create(comp(), (TR::Node::create(lastNode,TR::monexitfence,0)));
6324
catchBlock->append(storeTT);
6325
createdStoreForMonitorExit = true;
6326
}
6327
6328
TR::Node *ifNode = TR::Node::createif(TR::ifacmpeq, monitorArg->duplicateTree(), TR::Node::aconst(monitorArg, 0),rethrowBlock->getEntry());
6329
catchBlock->append(TR::TreeTop::create(comp(), ifNode));
6330
ifNode->getByteCodeInfo().setDoNotProfile(1);
6331
6332
catchBlock->getExit()->join(monexitBlock->getEntry());
6333
monexitBlock->getExit()->join(rethrowBlock->getEntry());
6334
calleeCFG->addEdge(monexitBlock, rethrowBlock);
6335
calleeCFG->addEdge(catchBlock, rethrowBlock);
6336
calleeCFG->addEdge(catchBlock, monexitBlock);
6337
}
6338
else
6339
monitorArg = monitorArgHandle;
6340
6341
6342
// add the store to track liveMonitors
6343
//
6344
if (!comp()->getOption(TR_DisableLiveMonitorMetadata) &&
6345
!createdStoreForMonitorExit &&
6346
_calleeSymbol->isSynchronised() &&
6347
_calleeSymbol->getSyncObjectTemp())
6348
{
6349
TR::Node *addrNode = TR::Node::create(monitorArg, TR::iconst, 0, 0);
6350
TR::TreeTop *storeTT = TR::TreeTop::create(comp(), (TR::Node::create(lastNode,TR::monexitfence,0)));
6351
monexitBlock->append(storeTT);
6352
}
6353
6354
TR::Node *monexitNode = TR::Node::createWithSymRef(TR::monexit, 1, 1, monitorArg, symRefTab->findOrCreateMonitorExitSymbolRef(_calleeSymbol));
6355
monexitNode->setSyncMethodMonitor(true);
6356
monexitBlock->append(TR::TreeTop::create(comp(), monexitNode));
6357
6358
if (comp()->getOption(TR_EnableThisLiveRangeExtension))
6359
{
6360
if (!_calleeSymbol->isStatic() &&
6361
(!comp()->fej9()->isClassFinal(_calleeSymbol->getResolvedMethod()->containingClass()) ||
6362
comp()->fej9()->hasFinalizer(_calleeSymbol->getResolvedMethod()->containingClass())))
6363
{
6364
TR::Node *anchoredThis = TR::Node::createWithSymRef(lastNode, TR::aload, 0, symRefTab->findOrCreateAutoSymbol(_calleeSymbol, 0, TR::Address));
6365
TR::SymbolReference *tempSymRef = comp()->getSymRefTab()->findOrCreateThisRangeExtensionSymRef(_calleeSymbol);
6366
TR::TreeTop *storeTT = TR::TreeTop::create(comp(), TR::Node::createStore(tempSymRef, anchoredThis));
6367
monexitBlock->append(storeTT);
6368
}
6369
}
6370
6371
6372
// rethrow the exception
6373
//
6374
TR::Node * temp = TR::Node::createWithSymRef(lastNode, TR::aload, 0, tempSymRef);
6375
rethrowBlock->append(TR::TreeTop::create(comp(), TR::Node::createWithSymRef(TR::athrow, 1, 1, temp, symRefTab->findOrCreateThrowUnreportedExceptionSymbolRef(_calleeSymbol))));
6376
6377
calleeCFG->addEdge(rethrowBlock, calleeCFG->getEnd());
6378
6379
prevTreeTop->join(catchBlock->getEntry());
6380
return catchBlock;
6381
}
6382
6383
bool
6384
TR_J9TransformInlinedFunction::isSyncReturnBlock(TR::Compilation *comp, TR::Block * b)
6385
{
6386
TR::TreeTop * tt = b->getEntry();
6387
if (!tt) return false;
6388
6389
tt = tt->getNextTreeTop();
6390
TR::Node * node = tt->getNode();
6391
6392
if (node->getOpCode().getOpCodeValue() == TR::monexitfence)
6393
tt = tt->getNextTreeTop();
6394
6395
if (node->getOpCode().isStore() && (node->getSymbolReference() == comp->getSymRefTab()->findThisRangeExtensionSymRef()))
6396
tt = tt->getNextTreeTop();
6397
6398
node = tt->getNode();
6399
if (node->getOpCodeValue() == TR::treetop || node->getOpCode().isNullCheck())
6400
node = node->getFirstChild();
6401
6402
if (node->getOpCodeValue() != TR::monexit)
6403
return false;
6404
6405
tt = tt->getNextTreeTop();
6406
if (!tt || !tt->getNode()->getOpCode().isReturn())
6407
return false;
6408
6409
return true;
6410
}
6411
6412
/*
6413
* if the initialCalleeMethod of this callsite is not overridden, add this method as the target of the callsite
6414
*/
6415
bool
6416
TR_J9InlinerUtil::addTargetIfMethodIsNotOverridenInReceiversHierarchy(TR_IndirectCallSite *callsite)
6417
{
6418
TR_PersistentCHTable *chTable = comp()->getPersistentInfo()->getPersistentCHTable();
6419
6420
if( !chTable->isOverriddenInThisHierarchy(callsite->_initialCalleeMethod, callsite->_receiverClass, callsite->_vftSlot, comp()) &&
6421
!comp()->getOption(TR_DisableHierarchyInlining))
6422
{
6423
if(comp()->trace(OMR::inlining))
6424
{
6425
int32_t len;
6426
bool isClassObsolete = comp()->getPersistentInfo()->isObsoleteClass((void*)callsite->_receiverClass, comp()->fe());
6427
if(!isClassObsolete)
6428
{
6429
char *s = TR::Compiler->cls.classNameChars(comp(), callsite->_receiverClass, len);
6430
heuristicTrace(tracer(),"Virtual call to %s is not overridden in the hierarchy of thisClass %*s\n",tracer()->traceSignature(callsite->_initialCalleeMethod), len, s);
6431
}
6432
else
6433
{
6434
heuristicTrace(tracer(),"Virtual call to %s is not overridden in the hierarchy of thisClass <obsolete class>\n",tracer()->traceSignature(callsite->_initialCalleeMethod));
6435
}
6436
}
6437
6438
TR_VirtualGuardSelection *guard = (fe()->classHasBeenExtended(callsite->_receiverClass)) ?
6439
new (comp()->trHeapMemory()) TR_VirtualGuardSelection(TR_HierarchyGuard, TR_MethodTest) :
6440
new (comp()->trHeapMemory()) TR_VirtualGuardSelection(TR_HierarchyGuard, TR_VftTest, callsite->_receiverClass);
6441
callsite->addTarget(comp()->trMemory(),inliner(),guard,callsite->_initialCalleeMethod,callsite->_receiverClass,heapAlloc);
6442
return true;
6443
}
6444
return false;
6445
}
6446
6447
int32_t
6448
TR_J9InlinerUtil::getCallCount(TR::Node *callNode)
6449
{
6450
return comp()->fej9()->getIProfilerCallCount(callNode->getByteCodeInfo(), comp());
6451
}
6452
6453
TR_ResolvedMethod*
6454
TR_J9InlinerUtil::findSingleJittedImplementer(TR_IndirectCallSite *callsite)
6455
{
6456
return comp()->getPersistentInfo()->getPersistentCHTable()->findSingleJittedImplementer(callsite->_receiverClass, callsite->_vftSlot, callsite->_callerResolvedMethod, comp(), callsite->_initialCalleeSymbol);
6457
}
6458
6459
bool
6460
TR_J9InlinerUtil::addTargetIfThereIsSingleImplementer (TR_IndirectCallSite *callsite)
6461
{
6462
static bool disableSingleJittedImplementerInlining = feGetEnv("TR_DisableSingleJittedImplementerInlining") ? true : false;
6463
TR_ResolvedMethod *implementer; // A temp to be used to find an implementer in abstract implementer analysis
6464
//findSingleJittedImplementer J9Virtual also knows about interfaces needs to be virtual
6465
if (!disableSingleJittedImplementerInlining && comp()->getMethodHotness() >= hot &&
6466
(implementer = callsite->findSingleJittedImplementer(inliner())))
6467
{
6468
if (comp()->trace(OMR::inlining))
6469
traceMsg(comp(), "inliner: Abstract method %s currently has a single jitted implementation %s\n",
6470
inliner()->tracer()->traceSignature(callsite->_initialCalleeMethod), implementer->signature(comp()->trMemory()));
6471
6472
if (!comp()->cg()->getSupportsProfiledInlining())
6473
{
6474
return false;
6475
}
6476
6477
TR_VirtualGuardSelection *guard;
6478
if (callsite->_receiverClass && !fe()->classHasBeenExtended(callsite->_receiverClass))
6479
guard = new (comp()->trHeapMemory()) TR_VirtualGuardSelection(TR_ProfiledGuard, TR_VftTest, implementer->classOfMethod());
6480
else
6481
guard = new (comp()->trHeapMemory()) TR_VirtualGuardSelection(TR_ProfiledGuard, TR_MethodTest);
6482
callsite->addTarget(comp()->trMemory(),inliner(),guard,implementer,implementer->classOfMethod(),heapAlloc);
6483
return true;
6484
}
6485
return false;
6486
}
6487
6488
TR_PrexArgInfo*
6489
TR_J9InlinerUtil::createPrexArgInfoForCallTarget(TR_VirtualGuardSelection *guard, TR_ResolvedMethod *implementer)
6490
{
6491
TR_PrexArgInfo *myPrexArgInfo = NULL;
6492
//if CSI (context sensitive inlining + args propagation) enabled we still want to create an argInfo for args propagation
6493
if (!comp()->getOption(TR_DisableInlinerArgsPropagation) && comp()->fej9()->supportsContextSensitiveInlining())
6494
{
6495
//rather than sticking in a not-null check in TR_J9EstimateCodeSize::realEstimateCodeSize and duplicating the line below
6496
//we might as well put a supportsContextSensitiveInlining check in here
6497
myPrexArgInfo = new (comp()->trHeapMemory()) TR_PrexArgInfo(implementer->numberOfParameters(), comp()->trMemory());
6498
if( guard->_type == TR_VftTest)
6499
{
6500
6501
TR_ASSERT(implementer, "no implementer!\n");
6502
TR_ASSERT(!implementer->isStatic(), "method is static\n");
6503
6504
myPrexArgInfo->set(0, new (comp()->trHeapMemory()) TR_PrexArgument(TR_PrexArgument::ClassIsFixed, guard->_thisClass));
6505
6506
if (tracer()->heuristicLevel())
6507
{
6508
int32_t len;
6509
char *s = TR::Compiler->cls.classNameChars(comp(), guard->_thisClass, len);
6510
heuristicTrace(tracer(),"Created an argInfo to fix receiver to class %s",s);
6511
}
6512
}
6513
6514
bool isArchetypeSpecimen =
6515
implementer->convertToMethod()->isArchetypeSpecimen()
6516
&& implementer->getMethodHandleLocation() != NULL;
6517
6518
bool isMCS = guard->_kind == TR_MutableCallSiteTargetGuard;
6519
6520
bool isLambdaFormMCS =
6521
isMCS && comp()->fej9()->isLambdaFormGeneratedMethod(implementer);
6522
6523
if ((isArchetypeSpecimen || isLambdaFormMCS) && comp()->getOrCreateKnownObjectTable())
6524
{
6525
TR::KnownObjectTable::Index mhIndex = TR::KnownObjectTable::UNKNOWN;
6526
if (isLambdaFormMCS)
6527
{
6528
mhIndex = guard->_mutableCallSiteEpoch;
6529
}
6530
else
6531
{
6532
uintptr_t *mhLocation = implementer->getMethodHandleLocation();
6533
mhIndex = comp()->getKnownObjectTable()->getOrCreateIndexAt(mhLocation);
6534
}
6535
6536
auto prexArg = new (comp()->trHeapMemory()) TR_PrexArgument(mhIndex, comp());
6537
if (isMCS)
6538
prexArg->setTypeInfoForInlinedBody();
6539
myPrexArgInfo->set(0, prexArg);
6540
}
6541
}
6542
return myPrexArgInfo;
6543
}
6544
6545
TR_InnerPreexistenceInfo *
6546
TR_J9InlinerUtil::createInnerPrexInfo(TR::Compilation * c, TR::ResolvedMethodSymbol *methodSymbol, TR_CallStack *callStack,
6547
TR::TreeTop *callTree, TR::Node *callNode,
6548
TR_VirtualGuardKind guardKind)
6549
{
6550
return new(comp()->trStackMemory())TR_J9InnerPreexistenceInfo(c, methodSymbol, callStack, callTree, callNode, guardKind);
6551
}
6552
6553
//---------------------------------------------------------------------
6554
// TR_J9InnerPreexistenceInfo::ParmInfo
6555
//---------------------------------------------------------------------
6556
TR_J9InnerPreexistenceInfo::ParmInfo::ParmInfo(TR::ParameterSymbol *innerParm, TR::ParameterSymbol *outerParm)
6557
: _innerParm(innerParm), _outerParm(outerParm), _isInvariant(true)
6558
{}
6559
6560
bool
6561
TR_J9InnerPreexistenceInfo::perform(TR::Compilation *comp, TR::Node *guardNode, bool & disableTailRecursion)
6562
{
6563
static char *disable = feGetEnv("TR_DisableIPREX");
6564
if (disable ||
6565
!comp->getOptimizer()->isEnabled(OMR::innerPreexistence) ||
6566
comp->getOption(TR_FullSpeedDebug) ||
6567
comp->getHCRMode() != TR::none ||
6568
guardNode->isHCRGuard() ||
6569
guardNode->isBreakpointGuard() ||
6570
comp->compileRelocatableCode())
6571
return false;
6572
6573
// perform() is a misnomer -- most of the work is already done by the constructor
6574
// at this stage - we just find what is the best way to utilize the information
6575
//
6576
if (!comp->performVirtualGuardNOPing())
6577
return false;
6578
6579
// If we have inner assumptions - then we must register the assumptions on the
6580
// virtual guard
6581
//
6582
if (hasInnerAssumptions())
6583
{
6584
TR_VirtualGuard *virtualGuard = comp->findVirtualGuardInfo(guardNode);
6585
TR_ASSERT(virtualGuard, "Must have an outer guard to have inner assumptions");
6586
6587
disableTailRecursion = true;
6588
ListIterator<TR_InnerAssumption> it(&getInnerAssumptions());
6589
for (TR_InnerAssumption *a = it.getFirst(); a; a = it.getNext())
6590
virtualGuard->addInnerAssumption(a);
6591
}
6592
else
6593
{
6594
// Else, see if we can directly devirtualize this call by using inner preexistence
6595
//
6596
TR_VirtualGuard *virtualGuard = comp->findVirtualGuardInfo(guardNode);
6597
PreexistencePoint *point = getPreexistencePoint(0); // ie. see if the 'this' for the call preexists
6598
if (point &&
6599
performTransformation(comp, "%sIPREX: remove virtual guard for inlined call %p to %s because it inner preexists parm ordinal %d of %s\n",
6600
OPT_DETAILS, _callNode, _methodSymbol->getResolvedMethod()->signature(trMemory()),
6601
point->_ordinal, point->_callStack->_methodSymbol->getResolvedMethod()->signature(trMemory())))
6602
{
6603
TR_ASSERT(virtualGuard, "we cannot directly devirtualize anything thats not guarded");
6604
6605
//_callNode->devirtualizeCall(_callTree);
6606
6607
// Add an inner assumption on the outer guard
6608
//
6609
TR_InnerAssumption *a = new (comp->trHeapMemory()) TR_InnerAssumption(point->_ordinal, virtualGuard);
6610
((TR_J9InnerPreexistenceInfo *)point->_callStack->_innerPrexInfo)->addInnerAssumption(a);
6611
disableTailRecursion = true;
6612
6613
// Tell compilation that this guard is to be removed
6614
//
6615
comp->removeVirtualGuard(virtualGuard);
6616
6617
// "Remove" the guard node
6618
//
6619
TR_ASSERT(guardNode->getOpCodeValue() == TR::ificmpne ||
6620
guardNode->getOpCodeValue() == TR::iflcmpne ||
6621
guardNode->getOpCodeValue() == TR::ifacmpne,
6622
"Wrong kind of if discovered for a virtual guard");
6623
guardNode->getFirstChild()->recursivelyDecReferenceCount();
6624
guardNode->setAndIncChild(0, guardNode->getSecondChild());
6625
guardNode->resetIsTheVirtualGuardForAGuardedInlinedCall();
6626
6627
// FIXME:
6628
//printf("---$$$--- inner prex in %s\n", comp->signature());
6629
6630
((TR::Optimizer*)comp->getOptimizer())->setRequestOptimization(OMR::treeSimplification, true);
6631
6632
return true;
6633
}
6634
}
6635
return false;
6636
}
6637
6638
//---------------------------------------------------------------------
6639
// TR_J9InnerPreexistenceInfo
6640
//---------------------------------------------------------------------
6641
6642
TR_J9InnerPreexistenceInfo::TR_J9InnerPreexistenceInfo(TR::Compilation * c, TR::ResolvedMethodSymbol *methodSymbol,
6643
TR_CallStack *callStack, TR::TreeTop *treeTop,
6644
TR::Node *callNode, TR_VirtualGuardKind guardKind)
6645
:TR_InnerPreexistenceInfo(c, methodSymbol, callStack, treeTop, callNode, guardKind)
6646
{
6647
static char *disable = feGetEnv("TR_DisableIPREX");
6648
if (!c->getOptimizer()->isEnabled(OMR::innerPreexistence) ||
6649
c->compileRelocatableCode() ||
6650
disable ||
6651
!_methodSymbol ||
6652
c->getHCRMode() == TR::traditional)
6653
return;
6654
6655
_numArgs = methodSymbol->getParameterList().getSize();
6656
_parameters = (ParmInfo **) trMemory()->allocateStackMemory(_numArgs * sizeof(ParmInfo*));
6657
memset(_parameters, 0, _numArgs * sizeof(ParmInfo*));
6658
6659
// Initialize the Parameter Info Array
6660
//
6661
ListIterator<TR::ParameterSymbol> parmIt(&methodSymbol->getParameterList());
6662
int32_t ordinal = 0;
6663
for (TR::ParameterSymbol *p = parmIt.getFirst(); p; p = parmIt.getNext(), ordinal++)
6664
{
6665
if (p->getDataType() == TR::Address)
6666
{
6667
_parameters[ordinal] = new (trStackMemory()) ParmInfo(p);
6668
}
6669
}
6670
6671
// Walk the IL of the method to find out which parms are invariant
6672
//
6673
for (TR::TreeTop *tt = methodSymbol->getFirstTreeTop();
6674
tt; tt = tt->getNextRealTreeTop())
6675
{
6676
TR::Node *node = tt->getNode();
6677
if (node->getOpCodeValue() == TR::treetop)
6678
node = node->getFirstChild();
6679
6680
if (node->getOpCode().isStoreDirect() && node->getDataType() == TR::Address)
6681
{
6682
TR::Symbol *symbol = node->getSymbolReference()->getSymbol();
6683
if (symbol->isParm())
6684
{
6685
getParmInfo(symbol->getParmSymbol()->getOrdinal())->setNotInvariant();
6686
}
6687
}
6688
}
6689
6690
// Figure out how the parms of the caller method tie together with the parms
6691
// of this method
6692
//
6693
if (_callNode) // we are being inlined
6694
{
6695
TR::Node *node = _callNode;
6696
TR_ASSERT(callStack, "must have a call stack if we are being inlined from somewhere\n");
6697
6698
int32_t firstArgIndex = node->getFirstArgumentIndex();
6699
for (int32_t c = node->getNumChildren() - 1; c >= firstArgIndex; --c)
6700
{
6701
TR::Node *argument = node->getChild(c);
6702
if (argument->getOpCodeValue() == TR::aload)
6703
{
6704
TR::ParameterSymbol *parmSymbol = argument->getSymbolReference()->getSymbol()->getParmSymbol();
6705
if (parmSymbol && c - firstArgIndex<ordinal)
6706
{
6707
ParmInfo *parmInfo = getParmInfo(c - firstArgIndex);
6708
if (parmInfo) parmInfo->setOuterSymbol(parmSymbol);
6709
}
6710
}
6711
}
6712
}
6713
6714
}
6715
6716
TR_J9InnerPreexistenceInfo::PreexistencePoint *
6717
TR_J9InnerPreexistenceInfo::getPreexistencePoint(int32_t ordinal)
6718
{
6719
if (hasInnerAssumptions()) return 0;
6720
ParmInfo *parmInfo = getParmInfo(ordinal);
6721
if (!parmInfo->_outerParm) return 0;
6722
if (!_callStack) return 0;
6723
6724
return ((TR_J9InnerPreexistenceInfo *)_callStack->_innerPrexInfo)->getPreexistencePointImpl(parmInfo->_outerParm->getOrdinal(), _callStack);
6725
}
6726
6727
TR_J9InnerPreexistenceInfo::PreexistencePoint *
6728
TR_J9InnerPreexistenceInfo::getPreexistencePointImpl(int32_t ordinal, TR_CallStack *prevCallStack)
6729
{
6730
ParmInfo *parmInfo = getParmInfo(ordinal);
6731
if (!parmInfo->isInvariant()) return 0;
6732
if (!_callStack) return 0;
6733
6734
PreexistencePoint *point = 0;
6735
if (parmInfo->_outerParm)
6736
point = ((TR_J9InnerPreexistenceInfo *)_callStack->_innerPrexInfo)->getPreexistencePointImpl(parmInfo->_outerParm->getOrdinal(), _callStack);
6737
6738
if (!point)
6739
{
6740
6741
if (_guardKind != TR_ProfiledGuard && (_guardKind != TR_NoGuard || !comp()->hasIntStreamForEach())) // FIXME: this limitation can be removed by doing the tree transformation
6742
point = new (trStackMemory()) PreexistencePoint(prevCallStack, ordinal);
6743
}
6744
6745
return point;
6746
}
6747
6748
bool TR_J9InlinerPolicy::dontPrivatizeArgumentsForRecognizedMethod(TR::RecognizedMethod recognizedMethod)
6749
{
6750
static char *aggressiveJSR292Opts = feGetEnv("TR_aggressiveJSR292Opts");
6751
if (aggressiveJSR292Opts && strchr(aggressiveJSR292Opts, '2'))
6752
{
6753
switch (recognizedMethod)
6754
{
6755
case TR::java_lang_invoke_MethodHandle_invokeExactTargetAddress:
6756
return true;
6757
6758
default:
6759
break;
6760
}
6761
}
6762
return false;
6763
}
6764
6765
bool
6766
TR_J9InlinerPolicy::replaceSoftwareCheckWithHardwareCheck(TR_ResolvedMethod *calleeMethod)
6767
{
6768
if (calleeMethod && comp()->cg()->getSupportsBDLLHardwareOverflowCheck() &&
6769
((strncmp(calleeMethod->signature(comp()->trMemory()), "java/math/BigDecimal.noLLOverflowAdd(JJJ)Z", 42) == 0) ||
6770
(strncmp(calleeMethod->signature(comp()->trMemory()), "java/math/BigDecimal.noLLOverflowMul(JJJ)Z", 42) == 0)))
6771
return true;
6772
else return false;
6773
}
6774
6775
bool
6776
TR_J9InlinerPolicy::suitableForRemat(TR::Compilation *comp, TR::Node *callNode, TR_VirtualGuardSelection *guard)
6777
{
6778
float profiledGuardProbabilityThreshold = 0.6f;
6779
static char *profiledGuardProbabilityThresholdStr = feGetEnv("TR_ProfiledGuardRematProbabilityThreshold");
6780
if (profiledGuardProbabilityThresholdStr)
6781
{
6782
profiledGuardProbabilityThreshold = ((float)atof(profiledGuardProbabilityThresholdStr));
6783
}
6784
6785
bool suitableForRemat = true;
6786
TR_AddressInfo *valueInfo = static_cast<TR_AddressInfo*>(TR_ValueProfileInfoManager::getProfiledValueInfo(callNode, comp, AddressInfo));
6787
if (guard->isHighProbablityProfiledGuard())
6788
{
6789
if (comp->getMethodHotness() <= warm && comp->getPersistentInfo()->getJitState() == STARTUP_STATE)
6790
{
6791
suitableForRemat = false;
6792
TR::DebugCounter::incStaticDebugCounter(comp, TR::DebugCounter::debugCounterName(comp, "profiledPrivArgRemat/unsuitableForRemat/warmHighProb"));
6793
}
6794
else
6795
{
6796
TR::DebugCounter::incStaticDebugCounter(comp, TR::DebugCounter::debugCounterName(comp, "profiledPrivArgRemat/suitableForRemat/highProb"));
6797
}
6798
}
6799
else if (valueInfo)
6800
{
6801
if (valueInfo->getTopProbability() >= profiledGuardProbabilityThreshold)
6802
{
6803
TR::DebugCounter::incStaticDebugCounter(comp, TR::DebugCounter::debugCounterName(comp, "profiledPrivArgRemat/suitableForRemat/probability=%d", ((int32_t)(valueInfo->getTopProbability() * 100))));
6804
}
6805
else
6806
{
6807
TR::DebugCounter::incStaticDebugCounter(comp, TR::DebugCounter::debugCounterName(comp, "profiledPrivArgRemat/unsuitableForRemat/probability=%d", ((int32_t)(valueInfo->getTopProbability() * 100))));
6808
suitableForRemat = false;
6809
}
6810
}
6811
else
6812
{
6813
TR::DebugCounter::incStaticDebugCounter(comp, TR::DebugCounter::debugCounterName(comp, "profiledPrivArgRemat/unsuitableForRemat/noinfo"));
6814
suitableForRemat = false;
6815
}
6816
return suitableForRemat;
6817
}
6818
6819
TR_J9InlinerTracer::TR_J9InlinerTracer(TR::Compilation *comp, TR_FrontEnd *fe, TR::Optimization *opt)
6820
: TR_InlinerTracer(comp, fe, opt)
6821
{}
6822
6823
TR_InlinerTracer *
6824
TR_J9InlinerUtil::getInlinerTracer(TR::Optimization *optimization)
6825
{
6826
return new (comp()->trHeapMemory()) TR_J9InlinerTracer(comp(),fe(),optimization);
6827
}
6828
6829
void TR_J9InlinerTracer::dumpProfiledClasses (ListIterator<TR_ExtraAddressInfo>& sortedValuesIt, uint32_t totalFrequency)
6830
{
6831
if(heuristicLevel())
6832
{
6833
TR_ExtraAddressInfo *profiledInfo;
6834
for (profiledInfo = sortedValuesIt.getFirst(); profiledInfo != NULL; profiledInfo = sortedValuesIt.getNext())
6835
{
6836
int32_t freq = profiledInfo->_frequency;
6837
TR_OpaqueClassBlock* tempreceiverClass = (TR_OpaqueClassBlock *) profiledInfo->_value;
6838
float val = (float)freq/(float)totalFrequency;
6839
int32_t len = 1;
6840
bool isClassObsolete = comp()->getPersistentInfo()->isObsoleteClass((void*)tempreceiverClass, comp()->fe());
6841
6842
if(!isClassObsolete)
6843
{
6844
const char *className = TR::Compiler->cls.classNameChars(comp(), tempreceiverClass, len);
6845
heuristicTrace(this , "receiverClass %s has a profiled frequency of %f", className,val);
6846
}
6847
else
6848
{
6849
heuristicTrace(this, "receiverClass %p is obsolete and has profiled frequency of %f",tempreceiverClass,val);
6850
}
6851
}
6852
}
6853
6854
}
6855
6856