Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/codegen/J9CodeGenerator.cpp
6000 views
1
/*******************************************************************************
2
* Copyright (c) 2000, 2021 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
23
#if defined(J9ZOS390)
24
#pragma csect(CODE,"TRJ9CGBase#C")
25
#pragma csect(STATIC,"TRJ9CGBase#S")
26
#pragma csect(TEST,"TRJ9CGBase#T")
27
#endif
28
29
#include <algorithm>
30
#include "codegen/AheadOfTimeCompile.hpp"
31
#include "codegen/CodeGenerator.hpp"
32
#include "codegen/CodeGenerator_inlines.hpp"
33
#include "codegen/PicHelpers.hpp"
34
#include "codegen/Relocation.hpp"
35
#include "codegen/Instruction.hpp"
36
#include "codegen/MonitorState.hpp"
37
#include "compile/AOTClassInfo.hpp"
38
#include "compile/Compilation.hpp"
39
#include "compile/OSRData.hpp"
40
#include "compile/VirtualGuard.hpp"
41
#include "control/Recompilation.hpp"
42
#include "control/RecompilationInfo.hpp"
43
#include "env/CompilerEnv.hpp"
44
#include "env/VMAccessCriticalSection.hpp"
45
#include "env/VMJ9.h"
46
#include "env/jittypes.h"
47
#include "env/j9method.h"
48
#include "il/AutomaticSymbol.hpp"
49
#include "il/Block.hpp"
50
#include "il/LabelSymbol.hpp"
51
#include "il/Node.hpp"
52
#include "il/Node_inlines.hpp"
53
#include "il/NodePool.hpp"
54
#include "il/ParameterSymbol.hpp"
55
#include "il/StaticSymbol.hpp"
56
#include "il/Symbol.hpp"
57
#include "infra/Assert.hpp"
58
#include "infra/BitVector.hpp"
59
#include "infra/ILWalk.hpp"
60
#include "infra/List.hpp"
61
#include "optimizer/Structure.hpp"
62
#include "optimizer/TransformUtil.hpp"
63
#include "ras/Delimiter.hpp"
64
#include "ras/DebugCounter.hpp"
65
#include "runtime/CodeCache.hpp"
66
#include "runtime/CodeCacheExceptions.hpp"
67
#include "runtime/CodeCacheManager.hpp"
68
#include "env/CHTable.hpp"
69
#include "env/PersistentCHTable.hpp"
70
71
#define OPT_DETAILS "O^O CODE GENERATION: "
72
73
74
J9::CodeGenerator::CodeGenerator(TR::Compilation *comp) :
75
OMR::CodeGeneratorConnector(comp),
76
_gpuSymbolMap(comp->allocator()),
77
_stackLimitOffsetInMetaData(comp->fej9()->thisThreadGetStackLimitOffset()),
78
_uncommonedNodes(comp->trMemory(), stackAlloc),
79
_liveMonitors(NULL),
80
_nodesSpineCheckedList(getTypedAllocator<TR::Node*>(comp->allocator())),
81
_jniCallSites(getTypedAllocator<TR_Pair<TR_ResolvedMethod,TR::Instruction> *>(comp->allocator())),
82
_monitorMapping(std::less<ncount_t>(), MonitorMapAllocator(comp->trMemory()->heapMemoryRegion())),
83
_dummyTempStorageRefNode(NULL)
84
{
85
/**
86
* Do not add CodeGenerator initialization logic here.
87
* Use the \c initialize() method instead.
88
*/
89
}
90
91
void
92
J9::CodeGenerator::initialize()
93
{
94
self()->OMR::CodeGeneratorConnector::initialize();
95
}
96
97
TR_J9VMBase *
98
J9::CodeGenerator::fej9()
99
{
100
return (TR_J9VMBase *)self()->fe();
101
}
102
103
// J9
104
static TR::Node *lowerCASValues(
105
TR::Node *parent,
106
int32_t childNum,
107
TR::Node *address,
108
TR::Compilation *comp,
109
TR::Node *shftOffset,
110
bool isLowMem,
111
TR::Node *heapBase)
112
{
113
TR::Node *l2iNode = NULL;
114
115
if ((address->getOpCodeValue() == TR::aconst) &&
116
(address->getAddress() == 0))
117
{
118
l2iNode = TR::Node::create(address, TR::iconst, 0, 0);
119
}
120
else
121
{
122
// -J9JIT_COMPRESSED_POINTER-
123
// if the value is known to be null or if using lowMemHeap, do not
124
// generate a compression sequence
125
//
126
TR::Node *a2lNode = TR::Node::create(TR::a2l, 1, address);
127
bool isNonNull = false;
128
if (address->isNonNull())
129
isNonNull = true;
130
131
TR::Node *addNode = NULL;
132
133
if (address->isNull() || isLowMem)
134
{
135
addNode = a2lNode;
136
}
137
else
138
{
139
if (isNonNull)
140
a2lNode->setIsNonZero(true);
141
addNode = TR::Node::create(TR::lsub, 2, a2lNode, heapBase);
142
addNode->setContainsCompressionSequence(true);
143
if (isNonNull)
144
addNode->setIsNonZero(true);
145
}
146
147
if (shftOffset)
148
{
149
addNode = TR::Node::create(TR::lushr, 2, addNode, shftOffset);
150
addNode->setContainsCompressionSequence(true);
151
}
152
153
if (isNonNull)
154
addNode->setIsNonZero(true);
155
156
l2iNode = TR::Node::create(TR::l2i, 1, addNode);
157
if (isNonNull)
158
l2iNode->setIsNonZero(true);
159
160
if (address->isNull())
161
l2iNode->setIsNull(true);
162
}
163
164
parent->setAndIncChild(childNum, l2iNode);
165
address->recursivelyDecReferenceCount();
166
return l2iNode;
167
}
168
169
170
// J9
171
//
172
// convert dual operators from DAG representation to cyclic representation by cloning
173
// eg
174
// luaddh
175
// xh
176
// yh
177
// ladd
178
// xl
179
// yl
180
// ==> luaddh <=== replace dummy node with this third child to complete cycle
181
//
182
void
183
J9::CodeGenerator::lowerDualOperator(
184
TR::Node *parent,
185
int32_t childNumber,
186
TR::TreeTop *treeTop)
187
{
188
if (parent == NULL)
189
{
190
// should never need to process treetops
191
return;
192
}
193
194
// any parent may have an adjunct
195
TR::Node *child = parent->getChild(childNumber);
196
if (child->isAdjunct())
197
{
198
TR_ASSERT(!child->isDualCyclic(), "Visitcount problem: trying to clone node %p when it has already been cloned.\n", child);
199
200
// create clone with space for third child, but still with two children
201
TR::Node *clone = self()->createOrFindClonedNode(child, 3);
202
if (1 && performTransformation(self()->comp(), "%sCreating Cyclic Dual Representation, replacing %p (%s) by %p under %p (childNumber %d).\n",
203
OPT_DETAILS, child, child->getOpCode().getName(), clone, parent, childNumber))
204
{
205
child = clone;
206
parent->setChild(childNumber, child);
207
if (parent->isDualHigh() && (childNumber == 2))
208
{
209
// build cycle
210
TR_ASSERT(!parent->isDualCyclic(), "Attempting to lower a dual operator node %p that has already been lowered.\n", parent);
211
child->setNumChildren(3);
212
child->setAndIncChild(2, parent);
213
}
214
}
215
}
216
}
217
218
219
// J9
220
void
221
J9::CodeGenerator::lowerCompressedRefs(
222
TR::TreeTop *treeTop,
223
TR::Node *node,
224
vcount_t visitCount,
225
TR_BitVector *childrenToBeLowered)
226
{
227
if (node->getOpCode().isCall() && childrenToBeLowered)
228
{
229
TR_BitVectorIterator bvi(*childrenToBeLowered);
230
while (bvi.hasMoreElements())
231
{
232
int32_t nextChild = bvi.getNextElement();
233
TR::Node *valueChild = node->getChild(nextChild);
234
if (valueChild->getOpCode().is8Byte())
235
{
236
TR::Node *shftOffset = NULL;
237
if (TR::Compiler->om.compressedReferenceShiftOffset() > 0)
238
{
239
shftOffset = TR::Node::create(node, TR::iconst, 0, TR::Compiler->om.compressedReferenceShiftOffset());
240
}
241
242
TR::Node *heapBase = TR::Node::create(node, TR::lconst, 0, 0);
243
lowerCASValues(node, nextChild, valueChild, self()->comp(), shftOffset, true, heapBase);
244
}
245
}
246
247
return;
248
}
249
250
251
TR::Node *loadOrStoreNode = node->getFirstChild();
252
253
/*
254
decompression:
255
actual = compress + heap_base
256
257
and compression:
258
compress = actual - heap_base
259
260
iaload f l2a
261
aload O ladd
262
lshl
263
i2l
264
iiload f
265
aload O
266
iconst shftKonst
267
lconst HB
268
269
-or- if the field is known to be null
270
l2a
271
i2l
272
iiload f
273
aload O
274
275
276
iastore f iistore f
277
aload O aload O
278
value l2i
279
lshr
280
lsub
281
a2l
282
aload O
283
lconst HB
284
iconst shftKonst
285
286
-or- if the field is known to be null
287
iistore f
288
aload O
289
l2i
290
a2l <- nop on most platforms
291
aload O
292
293
- J9JIT_COMPRESS_POINTER 32-bit -
294
295
DEPRECATED - do *not* use, kept here for historical reasons
296
297
compress = actual - heapBase + shadowBase = actual + disp
298
actual = compress - disp
299
300
iaload f i2a
301
aload O isub
302
iiload f
303
aload O
304
iconst HB
305
306
iastore f iistore f
307
aload O aload O
308
iushr // iushr only there to distinguish between
309
iadd // real iistores with iadds as the value
310
a2i
311
value
312
iconst HB
313
iconst 0
314
315
*/
316
317
// dont process loads/stores twice
318
// cannot use visitCounts because compressedRefs
319
// trees may appear after checks (in which case the node
320
// would have already been visited, preventing lowering)
321
//
322
TR::ILOpCodes convertOp = self()->comp()->target().is64Bit() ? TR::l2a : TR::i2a;
323
if (loadOrStoreNode->getOpCodeValue() == convertOp)
324
return;
325
else if (loadOrStoreNode->getOpCode().isStoreIndirect())
326
{
327
convertOp = self()->comp()->target().is64Bit() ? TR::l2i : TR::iushr;
328
if (loadOrStoreNode->getSecondChild()->getOpCodeValue() == convertOp)
329
return;
330
}
331
332
TR::Node *heapBase = node->getSecondChild();
333
334
TR::SymbolReference *symRef = loadOrStoreNode->getSymbolReference();
335
TR::ILOpCodes loadOrStoreOp;
336
bool isLoad = true;
337
338
TR::Node *address = NULL;
339
340
bool shouldBeCompressed = false;
341
if (loadOrStoreNode->getOpCode().isLoadIndirect() ||
342
loadOrStoreNode->getOpCode().isStoreIndirect() ||
343
loadOrStoreNode->getOpCodeValue() == TR::arrayset)
344
{
345
shouldBeCompressed = TR::TransformUtil::fieldShouldBeCompressed(loadOrStoreNode, self()->comp());
346
if (!shouldBeCompressed)
347
{
348
// catch cases when a compressedRefs anchor is created for specific
349
// unsafe loads by inliner
350
//
351
if (loadOrStoreNode->getSymbol()->isUnsafeShadowSymbol())
352
shouldBeCompressed = true;
353
}
354
// Don't de-compress loads created by dynamicLitPool
355
if (loadOrStoreNode->getOpCode().isLoadIndirect() &&
356
loadOrStoreNode->getSymbolReference()->isFromLiteralPool())
357
shouldBeCompressed = false;
358
}
359
360
if (loadOrStoreNode->getOpCode().isLoadIndirect() && shouldBeCompressed)
361
{
362
if (self()->comp()->target().cpu.isZ() && TR::Compiler->om.readBarrierType() != gc_modron_readbar_none)
363
{
364
dumpOptDetails(self()->comp(), "converting to ardbari %p under concurrent scavenge on Z.\n", node);
365
self()->createReferenceReadBarrier(treeTop, loadOrStoreNode);
366
return;
367
}
368
369
// base object
370
address = loadOrStoreNode->getFirstChild();
371
loadOrStoreOp = TR::Compiler->om.readBarrierType() != gc_modron_readbar_none || loadOrStoreNode->getOpCode().isReadBar() ? self()->comp()->il.opCodeForIndirectReadBarrier(TR::Int32) :
372
self()->comp()->il.opCodeForIndirectLoad(TR::Int32);
373
}
374
else if ((loadOrStoreNode->getOpCode().isStoreIndirect() ||
375
loadOrStoreNode->getOpCodeValue() == TR::arrayset) &&
376
shouldBeCompressed)
377
{
378
// store value
379
address = loadOrStoreNode->getSecondChild();
380
381
loadOrStoreOp = self()->comp()->il.opCodeForIndirectStore(TR::Int32);
382
isLoad = false;
383
}
384
else
385
{
386
dumpOptDetails(self()->comp(), "compression sequence %p is not in required form\n", node);
387
return;
388
}
389
390
// in future if shifted offsets are used, this value will be
391
// a positive non-zero constant
392
//
393
TR::Node *shftOffset = NULL;
394
if (TR::Compiler->om.compressedReferenceShiftOffset() > 0)
395
shftOffset = TR::Node::create(loadOrStoreNode, TR::iconst, 0, TR::Compiler->om.compressedReferenceShiftOffset());
396
397
if (isLoad)
398
{
399
TR::Node *newLoad = TR::Node::createWithSymRef(loadOrStoreOp, 1, 1, address, symRef);
400
newLoad->setByteCodeInfo(loadOrStoreNode->getByteCodeInfo());
401
402
if (loadOrStoreNode->isNonNull())
403
newLoad->setIsNonZero(true);
404
405
// FIXME: this breaks commoning of address (which could be a regLoad)
406
// it would be nice to get the node flags on the original
407
//TR::Node *newLoad = loadOrStoreNode->duplicateTree();
408
//TR::Node::recreate(newLoad, loadOrStoreOp);
409
410
// -J9JIT_COMPRESSED_POINTER-
411
//
412
TR::Node *iu2lNode = TR::Node::create(TR::iu2l, 1, newLoad);
413
414
TR::Node *addNode = iu2lNode;
415
if (loadOrStoreNode->isNonNull())
416
addNode->setIsNonZero(true);
417
418
// if the load is known to be null or if using lowMemHeap, do not
419
// generate a compression sequence
420
addNode = iu2lNode;
421
if (shftOffset)
422
{
423
addNode = TR::Node::create(TR::lshl, 2, iu2lNode, shftOffset);
424
addNode->setContainsCompressionSequence(true);
425
}
426
427
TR::Node::recreate(loadOrStoreNode, TR::l2a);
428
address->decReferenceCount();
429
loadOrStoreNode->setAndIncChild(0, addNode);
430
loadOrStoreNode->setNumChildren(1);
431
}
432
else
433
{
434
// All evaluators makes an assumption that if we are loading or storing
435
// object references they will have decompression or compression sequence for
436
// the child being loaded or stored respectively. In case of storing
437
// object reference, in some cases it might need actual object decompressed
438
// address.
439
// Due to above assumptions made by the codegen we should not do any
440
// optimization here on compression/decompression sequence which could
441
// break this assumption and lead to undefined behaviour.
442
// See openj9#12597 for more details
443
444
// -J9JIT_COMPRESSED_POINTER-
445
// if the value is known to be null or if using lowMemHeap, do not
446
// generate a compression sequence
447
//
448
TR::Node *a2lNode = TR::Node::create(TR::a2l, 1, address);
449
bool isNonNull = false;
450
if (address->isNonNull())
451
isNonNull = true;
452
453
TR::Node *addNode = NULL;
454
addNode = a2lNode;
455
456
if (shftOffset)
457
{
458
addNode = TR::Node::create(TR::lushr, 2, addNode, shftOffset);
459
addNode->setContainsCompressionSequence(true);
460
}
461
462
if (isNonNull)
463
addNode->setIsNonZero(true);
464
465
TR::Node *l2iNode = TR::Node::create(TR::l2i, 1, addNode);
466
if (isNonNull)
467
l2iNode->setIsNonZero(true);
468
469
if (address->isNull())
470
l2iNode->setIsNull(true);
471
472
// recreating an arrayset node will replace the TR::arrayset with an istorei, which is undesired
473
// as arrayset nodes can set indirect references
474
if (!loadOrStoreNode->getOpCode().isWrtBar() && loadOrStoreNode->getOpCodeValue() != TR::arrayset)
475
{
476
TR::Node::recreate(loadOrStoreNode, loadOrStoreOp);
477
}
478
479
loadOrStoreNode->setAndIncChild(1, l2iNode);
480
address->recursivelyDecReferenceCount();
481
}
482
}
483
484
bool
485
J9::CodeGenerator::supportVMInternalNatives()
486
{
487
return !self()->comp()->compileRelocatableCode();
488
}
489
490
// J9
491
//
492
static bool scanForNativeMethodsUntilMonitorNode(TR::TreeTop *firstTree, TR::Compilation *comp)
493
{
494
TR::TreeTop *currTree = firstTree;
495
while (currTree)
496
{
497
TR::Node *currNode = currTree->getNode();
498
//traceMsg(comp(), "-> Looking at node %p\n", currNode);
499
500
if ((currNode->getOpCodeValue() == TR::monexit) ||
501
(currNode->getOpCodeValue() == TR::monent))
502
{
503
return false;
504
}
505
else if (currNode->getNumChildren() > 0 &&
506
currNode->getFirstChild()->getNumChildren() > 0 &&
507
((currNode->getFirstChild()->getOpCodeValue() == TR::monexit) ||
508
(currNode->getFirstChild()->getOpCodeValue() == TR::monent))
509
)
510
{
511
return false;
512
}
513
514
515
TR::Node *callTestNode = NULL;
516
517
if (currNode->getOpCode().isCall() &&
518
!currNode->getSymbolReference()->isUnresolved() &&
519
currNode->getSymbol()->castToMethodSymbol()->isNative())
520
{
521
callTestNode = currNode;
522
}
523
else if (currNode->getNumChildren() > 0 &&
524
currNode->getFirstChild()->getOpCode().isCall() &&
525
!currNode->getFirstChild()->getSymbolReference()->isUnresolved() &&
526
currNode->getFirstChild()->getSymbol()->castToMethodSymbol()->isNative())
527
{
528
callTestNode = currNode->getFirstChild();
529
}
530
531
if (callTestNode)
532
{
533
TR::ResolvedMethodSymbol *symbol = callTestNode->getSymbol()->castToResolvedMethodSymbol();
534
if (strstr(symbol->signature(comp->trMemory()), "java/lang/Object.notify") ||
535
strstr(symbol->signature(comp->trMemory()), "java/lang/Object.wait"))
536
return true;
537
}
538
539
currTree = currTree->getNextTreeTop();
540
}
541
542
return false;
543
}
544
545
546
void
547
J9::CodeGenerator::preLowerTrees()
548
{
549
550
OMR::CodeGeneratorConnector::preLowerTrees();
551
552
/*
553
* These initializations should move from OMR to J9
554
555
int32_t symRefCount = comp()->getSymRefCount();
556
_localsThatAreStored = new (comp()->trHeapMemory()) TR_BitVector(symRefCount, comp()->trMemory(), heapAlloc);
557
_numLocalsWhenStoreAnalysisWasDone = symRefCount;
558
*/
559
560
// For dual operator lowering
561
_uncommonedNodes.reset();
562
_uncommonedNodes.init(64, true);
563
}
564
565
566
void
567
J9::CodeGenerator::lowerTreesPreTreeTopVisit(TR::TreeTop *tt, vcount_t visitCount)
568
{
569
OMR::CodeGeneratorConnector::lowerTreesPreTreeTopVisit(tt, visitCount);
570
571
TR::Node *node = tt->getNode();
572
573
if (self()->getSupportsBDLLHardwareOverflowCheck() && node->getNumChildren() > 0 &&
574
node->getFirstChild() && node->getFirstChild()->getOpCodeValue() == TR::icall &&
575
node->getFirstChild()->getSymbol() &&
576
(node->getFirstChild()->getSymbol()->castToMethodSymbol()->getRecognizedMethod() == TR::java_math_BigDecimal_noLLOverflowAdd))
577
{
578
node->getFirstChild()->getChild(2)->setNodeRequiresConditionCodes(true);
579
}
580
581
}
582
583
584
void
585
J9::CodeGenerator::lowerTreesPreChildrenVisit(TR::Node *parent, TR::TreeTop *treeTop, vcount_t visitCount)
586
{
587
OMR::CodeGeneratorConnector::lowerTreesPreChildrenVisit(parent, treeTop, visitCount);
588
589
/*
590
rip out a SpineCHK under two conditions.
591
1. If the first child has been the first child of another SpineCHK
592
2. If the first child is not an array access. This can happens when
593
an array access node is changed by common sub expression
594
*/
595
596
bool doIt = false;
597
598
if ( (parent->getOpCodeValue() == TR::BNDCHKwithSpineCHK) || (parent->getOpCodeValue() == TR::SpineCHK) )
599
{
600
TR::Node *firstChild = parent->getFirstChild();
601
TR::ILOpCode opcode = firstChild->getOpCode();
602
603
if( ( (opcode.isLoad() || opcode.isStore() ) && opcode.hasSymbolReference() && firstChild->getSymbolReference() != NULL &&
604
firstChild->getSymbolReference()->getSymbol()->isArrayShadowSymbol()) || opcode.isArrayRef())
605
{
606
//first child of SpineCHK is an array load or store, check if this is the first time we evaluate this node
607
bool found = (std::find(_nodesSpineCheckedList.begin(), _nodesSpineCheckedList.end(), firstChild) != _nodesSpineCheckedList.end());
608
if ( (firstChild->getVisitCount() == visitCount) && found )
609
{
610
// we have check this array access before, rip out SpineCHK
611
doIt = true;
612
}
613
else
614
{
615
_nodesSpineCheckedList.push_front(firstChild);
616
}
617
}
618
else
619
{
620
// the first child is not an array access, rip out SpineCHK
621
doIt = true;
622
}
623
624
if( doIt )
625
{
626
int32_t i = 0;
627
int32_t numChildren = parent->getNumChildren();
628
TR::TreeTop *prevTreeTop = treeTop;
629
TR::TreeTop *nextTreeTop = treeTop->getNextTreeTop();
630
while (i < numChildren)
631
{
632
TR::Node *childToBeAnchored = parent->getChild(i);
633
TR::TreeTop *anchorTree = TR::TreeTop::create(self()->comp(), TR::Node::create(TR::treetop, 1, childToBeAnchored), NULL, NULL);
634
prevTreeTop->join(anchorTree);
635
anchorTree->join(nextTreeTop);
636
prevTreeTop = anchorTree;
637
i++;
638
}
639
TR::TransformUtil::removeTree(self()->comp(), treeTop);
640
return;
641
}
642
}
643
644
if (parent->getOpCode().isFunctionCall())
645
{
646
// J9
647
//
648
// Hiding compressedref logic from CodeGen doesn't seem a good practise, the evaluator always need the uncompressedref node for write barrier,
649
// therefore, this part is deprecated. It'll be removed once P and Z update their corresponding evaluators.
650
static bool UseOldCompareAndSwapObject = (bool)feGetEnv("TR_UseOldCompareAndSwapObject");
651
if (self()->comp()->useCompressedPointers() && (UseOldCompareAndSwapObject || !(self()->comp()->target().cpu.isX86() || self()->comp()->target().cpu.isARM64())))
652
{
653
TR::MethodSymbol *methodSymbol = parent->getSymbol()->castToMethodSymbol();
654
// In Java9 Unsafe could be the jdk.internal JNI method or the sun.misc ordinary method wrapper,
655
// while in Java8 it can only be the sun.misc package which will itself contain the JNI method.
656
// Test for isNative to distinguish between them.
657
if ((methodSymbol->getRecognizedMethod() == TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z) &&
658
methodSymbol->isNative() &&
659
(!TR::Compiler->om.canGenerateArraylets() || parent->isUnsafeGetPutCASCallOnNonArray()) && parent->isSafeForCGToFastPathUnsafeCall())
660
{
661
TR_BitVector childrenToBeLowered(parent->getNumChildren(), self()->comp()->trMemory(), stackAlloc);
662
childrenToBeLowered.set(3);
663
childrenToBeLowered.set(4);
664
self()->lowerCompressedRefs(treeTop, parent, visitCount, &childrenToBeLowered);
665
}
666
}
667
}
668
669
// J9
670
//
671
if (parent->getOpCode().hasSymbolReference() &&
672
(parent->getSymbolReference() == self()->comp()->getSymRefTab()->findThisRangeExtensionSymRef()))
673
TR::Node::recreate(parent, TR::treetop);
674
675
676
// J9
677
//
678
if (parent->getOpCode().isCall() &&
679
!parent->getSymbolReference()->isUnresolved() &&
680
parent->getSymbolReference()->getSymbol()->getMethodSymbol() &&
681
!parent->getSymbolReference()->getSymbol()->castToMethodSymbol()->isHelper() &&
682
!parent->getSymbolReference()->getSymbol()->castToMethodSymbol()->isSystemLinkageDispatch() &&
683
parent->getSymbolReference()->getSymbol()->getResolvedMethodSymbol())
684
{
685
//this code should match the one in genInvoke (Walker.cpp)
686
if (parent->getSymbolReference()->getSymbol()->castToResolvedMethodSymbol()->getRecognizedMethod() == TR::com_ibm_jit_JITHelpers_getAddressAsPrimitive32 ||
687
parent->getSymbolReference()->getSymbol()->castToResolvedMethodSymbol()->getRecognizedMethod() == TR::com_ibm_jit_JITHelpers_getAddressAsPrimitive64
688
)
689
parent->removeChild(0);
690
691
if (parent->getSymbolReference()->getSymbol()->castToResolvedMethodSymbol()->getRecognizedMethod() == TR::com_ibm_jit_JITHelpers_getAddressAsPrimitive32)
692
TR::Node::recreate(parent, TR::a2i);
693
else if (parent->getSymbolReference()->getSymbol()->castToResolvedMethodSymbol()->getRecognizedMethod() == TR::com_ibm_jit_JITHelpers_getAddressAsPrimitive64)
694
TR::Node::recreate(parent, TR::a2l);
695
}
696
697
// J9
698
//
699
if (self()->comp()->useCompressedPointers())
700
{
701
if (parent->getOpCodeValue() == TR::compressedRefs)
702
self()->lowerCompressedRefs(treeTop, parent, visitCount, NULL);
703
}
704
else if (TR::Compiler->om.readBarrierType() != gc_modron_readbar_none)
705
{
706
self()->createReferenceReadBarrier(treeTop, parent);
707
}
708
709
// J9
710
//
711
// Prepare to lower dual operators
712
//
713
for (int32_t childCount = 0; childCount < parent->getNumChildren(); childCount++)
714
{
715
self()->lowerDualOperator(parent, childCount, treeTop);
716
}
717
718
}
719
720
void
721
J9::CodeGenerator::createReferenceReadBarrier(TR::TreeTop* treeTop, TR::Node* parent)
722
{
723
if (parent->getOpCodeValue() != TR::aloadi)
724
return;
725
726
TR::Symbol* symbol = parent->getSymbolReference()->getSymbol();
727
// isCollectedReference() responds false to generic int shadows because their type
728
// is int. However, address type generic int shadows refer to collected slots.
729
730
if (symbol == TR::comp()->getSymRefTab()->findGenericIntShadowSymbol() || symbol->isCollectedReference())
731
{
732
TR::Node::recreate(parent, TR::ardbari);
733
if (treeTop->getNode()->getOpCodeValue() == TR::NULLCHK &&
734
treeTop->getNode()->getChild(0)->getOpCodeValue() != TR::PassThrough &&
735
treeTop->getNode()->getChild(0)->getChild(0) == parent)
736
{
737
treeTop->insertBefore(TR::TreeTop::create(self()->comp(),
738
TR::Node::createWithSymRef(TR::NULLCHK, 1, 1,
739
TR::Node::create(TR::PassThrough, 1, parent),
740
treeTop->getNode()->getSymbolReference())));
741
treeTop->getNode()->setSymbolReference(NULL);
742
TR::Node::recreate(treeTop->getNode(), TR::treetop);
743
}
744
else if (treeTop->getNode()->getOpCodeValue() == TR::NULLCHK &&
745
treeTop->getNode()->getChild(0) == parent)
746
{
747
treeTop->insertBefore(TR::TreeTop::create(self()->comp(),
748
TR::Node::createWithSymRef(TR::NULLCHK, 1, 1,
749
TR::Node::create(TR::PassThrough, 1, parent->getChild(0)),
750
treeTop->getNode()->getSymbolReference())));
751
treeTop->getNode()->setSymbolReference(NULL);
752
TR::Node::recreate(treeTop->getNode(), TR::treetop);
753
}
754
else
755
{
756
treeTop->insertBefore(TR::TreeTop::create(self()->comp(), TR::Node::create(parent, TR::treetop, 1, parent)));
757
}
758
}
759
760
}
761
762
void
763
J9::CodeGenerator::lowerTreeIfNeeded(
764
TR::Node *node,
765
int32_t childNumberOfNode,
766
TR::Node *parent,
767
TR::TreeTop *tt)
768
{
769
TR_J9VMBase *fej9 = (TR_J9VMBase *)(self()->comp()->fe());
770
OMR::CodeGeneratorConnector::lowerTreeIfNeeded(node, childNumberOfNode, parent, tt);
771
772
if (node->getOpCode().isCall() &&
773
!node->getSymbol()->castToMethodSymbol()->isHelper())
774
{
775
TR::RecognizedMethod rm = node->getSymbol()->castToMethodSymbol()->getRecognizedMethod();
776
777
if(rm == TR::java_lang_invoke_MethodHandle_invokeBasic ||
778
rm == TR::java_lang_invoke_MethodHandle_linkToStatic ||
779
rm == TR::java_lang_invoke_MethodHandle_linkToSpecial ||
780
rm == TR::java_lang_invoke_MethodHandle_linkToVirtual ||
781
rm == TR::java_lang_invoke_MethodHandle_linkToInterface)
782
{
783
// invokeBasic and linkTo* are signature-polymorphic, so the VM needs to know the number of argument slots
784
// for the INL call in order to locate the start of the arguments on the stack. The arg slot count is stored
785
// in vmThread.tempSlot.
786
//
787
// Furthermore, for unresolved invokedynamic and invokehandle bytecodes, we create a dummy TR_ResolvedMethod call to
788
// linkToStatic. The appendix object in the invoke cache array entry could be NULL, which we cannot determine at compile
789
// time when the callSite/invokeCache table entries are unresolved. The VM would have to remove the appendix
790
// object, which would require knowing the number of stack slots of the ROM method signature plus the slots occupied
791
// by the receiver object (for invokehandle only) and appendix object. This would be equivalent to the number of
792
// parameter slots of the linkToStatic call - 1.
793
// To pass that information, we store to vmThread.floatTemp1 field. The name of the field is
794
// misleading, as it is an lconst/iconst being stored. If the linkToStatic call is not for an unresolved
795
// invokedynamic/invokehandle, then the JIT would not create a push of a null appendix object, so the VM would not
796
// need to do any stack adjustments. In those cases, -1 is stored in floatTemp1. This is also done for linkToSpecial,
797
// as it shares the same handling mechanism in the VM. No stack adjustments are necessary for linkToSpecial, so the value
798
// stored in floatTemp1 will always be -1.
799
TR::Node * numArgsNode = NULL;
800
TR::Node * numArgSlotsNode = NULL;
801
TR::Node * tempSlotStoreNode = NULL;
802
TR::Node * floatTemp1StoreNode = NULL;
803
bool is64Bit = self()->comp()->target().is64Bit();
804
TR::ILOpCodes storeOpCode;
805
int32_t numParameterStackSlots = node->getSymbol()->castToResolvedMethodSymbol()->getNumParameterSlots();
806
if (is64Bit)
807
{
808
storeOpCode = TR::lstore;
809
numArgSlotsNode = TR::Node::lconst(node, numParameterStackSlots);
810
}
811
else
812
{
813
storeOpCode = TR::istore;
814
numArgSlotsNode = TR::Node::iconst(node, numParameterStackSlots);
815
}
816
tempSlotStoreNode = TR::Node::createStore(self()->comp()->getSymRefTab()->findOrCreateVMThreadTempSlotFieldSymbolRef(),
817
numArgSlotsNode,
818
storeOpCode);
819
tempSlotStoreNode->setByteCodeIndex(node->getByteCodeIndex());
820
TR::TreeTop::create(self()->comp(), tt->getPrevTreeTop(), tempSlotStoreNode);
821
822
if (rm == TR::java_lang_invoke_MethodHandle_linkToStatic || rm == TR::java_lang_invoke_MethodHandle_linkToSpecial)
823
{
824
int32_t numArgs;
825
if (node->getSymbolReference()->getSymbol()->isDummyResolvedMethod())
826
numArgs = numParameterStackSlots - 1 ;
827
else
828
numArgs = -1;
829
830
numArgsNode = is64Bit ? TR::Node::lconst(node, numArgs) :
831
TR::Node::iconst(node, numArgs);
832
833
floatTemp1StoreNode = TR::Node::createStore(self()->comp()->getSymRefTab()->findOrCreateVMThreadFloatTemp1SymbolRef(),
834
numArgsNode,
835
storeOpCode);
836
floatTemp1StoreNode->setByteCodeIndex(node->getByteCodeIndex());
837
TR::TreeTop::create(self()->comp(), tt->getPrevTreeTop(), floatTemp1StoreNode);
838
}
839
}
840
}
841
842
// J9
843
//
844
// if we found this iterator method inlined in a scorching method
845
// we should attempt to prefetch where it's used for performance
846
// structure is needed to determine the loop size to use proper prefetch stride
847
if (!self()->shouldBuildStructure() &&
848
(self()->comp()->getMethodHotness() >= scorching) &&
849
!tt->getEnclosingBlock()->isCold() &&
850
strstr(fej9->sampleSignature(node->getOwningMethod(), 0, 0, self()->trMemory()),"java/util/TreeMap$UnboundedValueIterator.next()"))
851
{
852
self()->setShouldBuildStructure();
853
}
854
855
// J9
856
if (node->getOpCode().isCall() &&
857
node->isUnsafePutOrderedCall() &&
858
node->isDontInlinePutOrderedCall())
859
{
860
// Remove this treetop
861
tt->getPrevTreeTop()->setNextTreeTop(tt->getNextTreeTop());
862
tt->getNextTreeTop()->setPrevTreeTop(tt->getPrevTreeTop());
863
tt->getNode()->recursivelyDecReferenceCount();
864
return;
865
}
866
867
// J9
868
if (!self()->comp()->getOption(TR_DisableUnsafe) &&
869
node->getOpCode().isCall() &&
870
node->getOpCodeValue() == TR::call &&
871
!TR::Compiler->om.canGenerateArraylets() &&
872
((node->getSymbol()->castToMethodSymbol()->getRecognizedMethod() == TR::java_nio_Bits_copyToByteArray) ||
873
(node->getSymbol()->castToMethodSymbol()->getRecognizedMethod() == TR::java_nio_Bits_copyFromByteArray)) &&
874
!fej9->isAnyMethodTracingEnabled(node->getSymbol()->castToResolvedMethodSymbol()->getResolvedMethod()->getPersistentIdentifier()) &&
875
performTransformation(self()->comp(), "%s Change recognized nio call to arraycopy [%p] \n", OPT_DETAILS, node))
876
{
877
bool from = false;
878
if (node->getSymbol()->castToMethodSymbol()->getRecognizedMethod() == TR::java_nio_Bits_copyFromByteArray)
879
from = true;
880
881
TR::Node *nativeSrc;
882
TR::Node *javaTarget;
883
TR::Node *nativeOffset;
884
885
if (from)
886
{
887
nativeSrc = node->getChild(2);
888
javaTarget = node->getFirstChild();
889
nativeOffset = node->getSecondChild();
890
}
891
else
892
{
893
nativeSrc = node->getFirstChild();
894
javaTarget = node->getSecondChild();
895
nativeOffset = node->getChild(2);
896
}
897
898
TR::Node *javaOffset;
899
if (self()->comp()->target().is64Bit())
900
javaOffset = TR::Node::lconst(node, (int64_t) TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
901
else
902
javaOffset = TR::Node::iconst(node, TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
903
904
TR::Node *len = node->getChild(3);
905
906
TR::Node *oldNativeSrc = nativeSrc;
907
TR::Node *oldNativeOffset = nativeOffset;
908
TR::Node *oldLen = len;
909
910
if (self()->comp()->target().is32Bit())
911
{
912
nativeSrc = TR::Node::create(TR::l2i, 1, nativeSrc);
913
nativeOffset = TR::Node::create(TR::l2i, 1, nativeOffset);
914
len = TR::Node::create(TR::l2i, 1, len);
915
}
916
917
TR::Node::recreate(node, TR::arraycopy);
918
919
TR::Node *nativeAddr;
920
TR::Node *javaAddr;
921
922
if (self()->comp()->target().is32Bit())
923
{
924
nativeAddr = nativeSrc;
925
javaOffset = TR::Node::create(TR::iadd, 2, javaOffset, nativeOffset);
926
javaAddr = TR::Node::create(TR::aiadd, 2, javaTarget, javaOffset);
927
}
928
else
929
{
930
nativeAddr = nativeSrc;
931
javaOffset = TR::Node::create(TR::ladd, 2, javaOffset, nativeOffset);
932
javaAddr = TR::Node::create(TR::aladd, 2, javaTarget, javaOffset);
933
}
934
935
node->setNumChildren(3);
936
937
if (from)
938
{
939
node->setAndIncChild(0, javaAddr);
940
node->setAndIncChild(1, nativeAddr);
941
node->setAndIncChild(2, len);
942
}
943
else
944
{
945
node->setAndIncChild(0, nativeAddr);
946
node->setAndIncChild(1, javaAddr);
947
node->setAndIncChild(2, len);
948
}
949
950
javaTarget->recursivelyDecReferenceCount();
951
oldNativeSrc->recursivelyDecReferenceCount();
952
oldNativeOffset->recursivelyDecReferenceCount();
953
oldLen->recursivelyDecReferenceCount();
954
955
node->setArrayCopyElementType(TR::Int8);
956
node->setForwardArrayCopy(true);
957
}
958
959
// J9
960
if (self()->comp()->compileRelocatableCode() && (node->getOpCodeValue() == TR::loadaddr) && parent && ((parent->getOpCodeValue() == TR::instanceof) || (parent->getOpCodeValue() == TR::checkcast)))
961
{
962
TR::TreeTop::create(self()->comp(), tt->getPrevTreeTop(), TR::Node::create(TR::treetop, 1, node));
963
}
964
965
// J9
966
if (node->getOpCode().hasSymbolReference() &&
967
(node->getOpCode().isLoad() ||
968
node->getOpCode().isStore()))
969
{
970
TR::SymbolReference *symRef = node->getSymbolReference();
971
TR::Symbol *symbol = symRef->getSymbol();
972
if (symbol->isVolatile() && node->getDataType() == TR::Int64 && !symRef->isUnresolved() && self()->comp()->target().is32Bit() &&
973
!self()->getSupportsInlinedAtomicLongVolatiles())
974
{
975
bool isLoad = false;
976
TR::SymbolReference * volatileLongSymRef = NULL;
977
if (node->getOpCode().isLoadVar())
978
{
979
volatileLongSymRef = self()->comp()->getSymRefTab()->findOrCreateRuntimeHelper(TR_volatileReadLong, false, false, true);
980
isLoad = true;
981
}
982
else
983
volatileLongSymRef = self()->comp()->getSymRefTab()->findOrCreateRuntimeHelper(TR_volatileWriteLong, false, false, true);
984
985
node->setSymbolReference(volatileLongSymRef);
986
987
TR::Node * address = NULL;
988
if (node->getOpCode().isIndirect())
989
address = node->getFirstChild();
990
991
TR::Node * addrNode = NULL;
992
if (address)
993
{
994
if (symRef->getOffset() == 0)
995
addrNode = address;
996
else
997
{
998
addrNode = TR::Node::create(TR::aiadd, 2, address,
999
TR::Node::create(node, TR::iconst, 0, symRef->getOffset()));
1000
addrNode->setIsInternalPointer(true);
1001
}
1002
}
1003
1004
if (isLoad)
1005
{
1006
if (node->getOpCode().isIndirect())
1007
{
1008
if (tt->getNode()->getOpCodeValue() == TR::NULLCHK)
1009
{
1010
TR::Node * nullchkNode =
1011
TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, TR::Node::create(TR::PassThrough, 1, address), tt->getNode()->getSymbolReference());
1012
tt->getPrevTreeTop()->insertAfter(TR::TreeTop::create(self()->comp(), nullchkNode));
1013
TR::Node::recreate(tt->getNode(), TR::treetop);
1014
}
1015
node->setNumChildren(1);
1016
node->setAndIncChild(0, addrNode);
1017
}
1018
else
1019
{
1020
TR::Node * statics = TR::Node::createWithSymRef(node, TR::loadaddr, 0, symRef);
1021
node->setNumChildren(1);
1022
node->setAndIncChild(0, statics);
1023
}
1024
1025
if ((tt->getNode()->getOpCodeValue() != TR::treetop) ||
1026
(tt->getNode()->getFirstChild() != node))
1027
{
1028
TR::Node * ttNode = TR::Node::create(TR::treetop, 1, node);
1029
tt->getPrevTreeTop()->insertAfter(TR::TreeTop::create(self()->comp(), ttNode));
1030
}
1031
}
1032
else
1033
{
1034
if (node->getOpCode().isIndirect())
1035
{
1036
if (tt->getNode()->getOpCodeValue() == TR::NULLCHK)
1037
{
1038
TR::Node * nullchkNode =
1039
TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, TR::Node::create(TR::PassThrough, 1, address), tt->getNode()->getSymbolReference());
1040
tt->getPrevTreeTop()->insertAfter(TR::TreeTop::create(self()->comp(), nullchkNode));
1041
TR::Node::recreate(tt->getNode(), TR::treetop);
1042
}
1043
1044
node->setNumChildren(2);
1045
node->setChild(0, node->getSecondChild());
1046
node->setAndIncChild(1, addrNode);
1047
}
1048
else
1049
{
1050
TR::Node * statics = TR::Node::createWithSymRef(node, TR::loadaddr, 0, symRef);
1051
node->setNumChildren(2);
1052
node->setAndIncChild(1, statics);
1053
}
1054
1055
TR::Node * ttNode = tt->getNode();
1056
if (ttNode == node)
1057
{
1058
TR::Node * newTTNode = TR::Node::create(TR::treetop, 1, ttNode);
1059
tt->setNode(newTTNode);
1060
}
1061
}
1062
1063
if (isLoad)
1064
TR::Node::recreate(node, TR::lcall);
1065
else
1066
TR::Node::recreate(node, TR::call);
1067
1068
if (address)
1069
address->recursivelyDecReferenceCount();
1070
}
1071
}
1072
1073
// J9 (currentTimeMillis & OSR)
1074
if (node->getOpCode().isStore())
1075
{
1076
if ((node->getType().isInt64() &&
1077
node->isNOPLongStore()) ||
1078
(node->getSymbol()->isAutoOrParm() &&
1079
node->storedValueIsIrrelevant()))
1080
{
1081
TR_ASSERT(node == tt->getNode(), "A store is expected to be the root of its treetop");
1082
1083
// Remove this treetop
1084
tt->getPrevTreeTop()->setNextTreeTop(tt->getNextTreeTop());
1085
tt->getNextTreeTop()->setPrevTreeTop(tt->getPrevTreeTop());
1086
node->recursivelyDecReferenceCount();
1087
}
1088
else
1089
{
1090
// Needed for OSR
1091
//
1092
_localsThatAreStored->set(node->getSymbolReference()->getReferenceNumber());
1093
}
1094
}
1095
// J9
1096
else if (node->getOpCodeValue() == TR::monent ||
1097
node->getOpCodeValue() == TR::monexit ||
1098
node->getOpCodeValue() == TR::tstart )
1099
{
1100
TR_OpaqueClassBlock * monClass = node->getMonitorClass(self()->comp()->getCurrentMethod());
1101
if (monClass)
1102
self()->addMonClass(node, monClass);
1103
//Clear the hidden second child that may be used by code generation
1104
node->setMonitorClassInNode(NULL);
1105
}
1106
1107
1108
// J9
1109
if (self()->comp()->getOption(TR_ReservingLocks) &&
1110
node->getOpCodeValue() == TR::monent)
1111
{
1112
TR_OpaqueMethodBlock *owningMethod = node->getOwningMethod();
1113
TR_OpaqueClassBlock *classPointer = fej9->getClassOfMethod(owningMethod);
1114
TR_PersistentClassInfo * persistentClassInfo =
1115
self()->comp()->getPersistentInfo()->getPersistentCHTable()->findClassInfoAfterLocking(classPointer, self()->comp());
1116
1117
if (persistentClassInfo && persistentClassInfo->isReservable())
1118
{
1119
bool allowedToReserve = !scanForNativeMethodsUntilMonitorNode(tt->getNextTreeTop(), self()->comp());
1120
if (!allowedToReserve)
1121
{
1122
persistentClassInfo->setReservable(false);
1123
#if defined(J9VM_OPT_JITSERVER)
1124
// This is currently the only place where this flag gets cleared. For JITServer, we should propagate it to the client,
1125
// to avoid having to call scanForNativeMethodsUntilMonitorNode again.
1126
if (auto stream = TR::CompilationInfo::getStream())
1127
{
1128
stream->write(JITServer::MessageType::CHTable_clearReservable, classPointer);
1129
stream->read<JITServer::Void>();
1130
}
1131
#endif /* defined(J9VM_OPT_JITSERVER) */
1132
}
1133
}
1134
}
1135
1136
// J9
1137
if ( self()->comp()->getOptions()->enableDebugCounters()
1138
&& node->getOpCode().isCall()
1139
&& node->getSymbol()->getMethodSymbol() // compjazz 45988: zEmulator arrayset currently isCall and uses a generic int shadow. Can't assume it's a method.
1140
&& !node->getSymbol()->castToMethodSymbol()->isHelper())
1141
{
1142
bool insertByteCode = TR::Options::_debugCounterInsertByteCode;
1143
bool insertJittedBody = TR::Options::_debugCounterInsertJittedBody;
1144
bool insertMethod = TR::Options::_debugCounterInsertMethod;
1145
1146
const char *caller = self()->comp()->signature();
1147
const char *callee = node->getSymbol()->castToMethodSymbol()->getMethod()->signature(self()->trMemory(), stackAlloc);
1148
TR_ByteCodeInfo &bcInfo = node->getByteCodeInfo();
1149
if (insertByteCode)
1150
{
1151
TR::DebugCounter::prependDebugCounter(self()->comp(), TR::DebugCounter::debugCounterName(self()->comp(),
1152
"compilationReport.instructions:byByteCode.numInvocations.(%s)=%d", caller, node->getByteCodeInfo().getByteCodeIndex()), tt);
1153
}
1154
if (insertJittedBody)
1155
{
1156
TR::DebugCounter::prependDebugCounter(self()->comp(), TR::DebugCounter::debugCounterName(self()->comp(),
1157
"compilationReport.instructions:byJittedBody.numInvocations.(%s).%s", caller, self()->comp()->getHotnessName()), tt);
1158
}
1159
if (insertMethod)
1160
{
1161
TR::DebugCounter::prependDebugCounter(self()->comp(), TR::DebugCounter::debugCounterName(self()->comp(),
1162
"compilationReport.instructions:byMethod.numInvocations.(%s)", caller), tt);
1163
}
1164
TR::DebugCounter::prependDebugCounter(self()->comp(), TR::DebugCounter::debugCounterName(self()->comp(), "callers/(%s)/%d=%d", caller, bcInfo.getCallerIndex(), bcInfo.getByteCodeIndex()), tt);
1165
TR::DebugCounter::prependDebugCounter(self()->comp(), TR::DebugCounter::debugCounterName(self()->comp(), "callees/(%s)/(%s)/%d=%d", callee, caller, bcInfo.getCallerIndex(), bcInfo.getByteCodeIndex()), tt);
1166
}
1167
1168
// J9
1169
//
1170
// We uncommon all the address children of a direct to JNI call
1171
// because if the loadaddr is commoned across a GC point, we cannot mark the register
1172
// as collected (since it is not a Java object) and we can have a problem if we do not
1173
// mark in some other way if the stack grows (and is moved) at the GC point. The problem
1174
// will be that the register will continue to point at the old stack.
1175
//
1176
if (node->getOpCode().isCall() &&
1177
node->getSymbol()->getMethodSymbol() &&
1178
node->isPreparedForDirectJNI())
1179
{
1180
int32_t i;
1181
for (i = 0; i < node->getNumChildren(); ++i)
1182
{
1183
TR::Node * n = node->getChild(i);
1184
if (n->getDataType() == TR::Address)
1185
{
1186
//TR_ASSERT((n->getOpCodeValue() == TR::loadaddr), "Address child of JNI call is not a loadaddr\n");
1187
if ((n->getOpCodeValue() == TR::loadaddr) &&
1188
(n->getReferenceCount() > 1) &&
1189
n->getSymbol()->isAutoOrParm())
1190
{
1191
//printf("Uncommoned address child of JNI call in %s\n", comp()->signature());
1192
TR::Node *dupChild = n->duplicateTree();
1193
node->setAndIncChild(i, dupChild);
1194
n->recursivelyDecReferenceCount();
1195
}
1196
}
1197
}
1198
}
1199
1200
// J9
1201
// code to push recompilation of methods whose caller is scorching
1202
if (self()->comp()->getOption(TR_EnableRecompilationPushing) &&
1203
!self()->getCurrentBlock()->isCold() &&
1204
self()->comp()->allowRecompilation() &&
1205
self()->comp()->getMethodHotness()>=veryHot &&
1206
node->getOpCode().isCall() &&
1207
self()->comp()->getPersistentInfo()->getNumLoadedClasses() < TR::Options::_bigAppThreshold &&
1208
node->getSymbol()->getMethodSymbol() &&
1209
!node->isPreparedForDirectJNI())
1210
{
1211
bool pushCall = true;
1212
TR::MethodSymbol *methodSymbol = node->getSymbol()->getMethodSymbol();
1213
TR::SymbolReference *methodSymRef = node->getSymbolReference();
1214
1215
if (methodSymRef->isUnresolved())
1216
{
1217
pushCall = false;
1218
}
1219
else if (!node->getOpCode().isCallDirect() || methodSymbol->isVirtual())
1220
{
1221
TR_ResolvedMethod *resolvedMethod = node->getSymbol()->getResolvedMethodSymbol()->getResolvedMethod();
1222
if (!resolvedMethod || node->isTheVirtualCallNodeForAGuardedInlinedCall() ||
1223
resolvedMethod->virtualMethodIsOverridden() ||
1224
resolvedMethod->isAbstract() ||
1225
(resolvedMethod == self()->comp()->getCurrentMethod()))
1226
{
1227
pushCall = false;
1228
}
1229
}
1230
1231
if (pushCall)
1232
{
1233
if (!((methodSymbol && methodSymbol->getResolvedMethodSymbol() &&
1234
methodSymbol->getResolvedMethodSymbol()->getResolvedMethod() &&
1235
methodSymbol->getResolvedMethodSymbol()->getResolvedMethod()->isInterpretedForHeuristics()) ||
1236
methodSymbol->isVMInternalNative() ||
1237
methodSymbol->isHelper() ||
1238
methodSymbol->isNative() ||
1239
methodSymbol->isSystemLinkageDispatch() ||
1240
methodSymbol->isJITInternalNative()) &&
1241
methodSymbol->getResolvedMethodSymbol() &&
1242
methodSymbol->getResolvedMethodSymbol()->getResolvedMethod())
1243
{
1244
TR_PersistentJittedBodyInfo * bodyInfo = ((TR_ResolvedJ9Method*) methodSymbol->getResolvedMethodSymbol()->getResolvedMethod())->getExistingJittedBodyInfo();
1245
//printf("Pushing node %p\n", node);
1246
//fflush(stdout);
1247
if (bodyInfo &&
1248
bodyInfo->getHotness() <= warm && !bodyInfo->getIsProfilingBody())
1249
{
1250
TR_ResolvedMethod *method = methodSymbol->castToResolvedMethodSymbol()->getResolvedMethod();
1251
int isInLoop = -1;
1252
TR::Block * block = self()->getCurrentBlock();
1253
1254
TR_BlockStructure * blockStructure = block->getStructureOf();
1255
if (blockStructure)
1256
{
1257
TR_Structure *parentStructure = blockStructure->getParent();
1258
while (parentStructure)
1259
{
1260
TR_RegionStructure *region = parentStructure->asRegion();
1261
if (region->isNaturalLoop() ||
1262
region->containsInternalCycles())
1263
{
1264
isInLoop = region->getNumber();
1265
break;
1266
}
1267
parentStructure = parentStructure->getParent();
1268
}
1269
}
1270
1271
//printf ("Scorching method %s is calling warm (or called method) %s, in block_%d with frequency %d, is in loop %d\n", comp()->getMethodSymbol()->signature(), fej9->sampleSignature(method->getPersistentIdentifier(), 0, 0, trMemory()), getCurrentBlock()->getNumber(), getCurrentBlock()->getFrequency(), isInLoop);
1272
if ((self()->getCurrentBlock()->getFrequency() > MAX_COLD_BLOCK_COUNT) || (isInLoop && self()->getCurrentBlock()->getFrequency()==0))
1273
{
1274
bodyInfo->setCounter(1);
1275
bodyInfo->setIsPushedForRecompilation();
1276
}
1277
}
1278
}
1279
}
1280
}
1281
1282
// J9
1283
if (node->getOpCodeValue() == TR::instanceof)
1284
{
1285
TR::Node * topNode = tt->getNode();
1286
if (topNode->getNumChildren() > 0)
1287
topNode = topNode->getFirstChild();
1288
1289
if (topNode->getOpCode().isCall() &&
1290
topNode->getSymbol()->getMethodSymbol() &&
1291
(topNode->getSymbol()->getMethodSymbol()->isSystemLinkageDispatch() ||
1292
topNode->isPreparedForDirectJNI()))
1293
{
1294
TR::Node * ttNode = TR::Node::create(TR::treetop, 1, node);
1295
tt->getPrevTreeTop()->insertAfter(TR::TreeTop::create(self()->comp(), ttNode));
1296
}
1297
}
1298
1299
// J9
1300
if (node->getOpCodeValue() == TR::NULLCHK)
1301
{
1302
if ((node->getFirstChild()->getReferenceCount() == 1) &&
1303
node->getFirstChild()->getOpCode().isLoadVar() &&
1304
!node->getFirstChild()->getSymbolReference()->getSymbol()->isVolatile())
1305
{
1306
TR::Node::recreate(node->getFirstChild(), TR::PassThrough);
1307
}
1308
}
1309
1310
// J9
1311
//
1312
//Anchoring node to either extract register pressure(performance)
1313
//or ensure instanceof doesn't have a parent node of CALL (correctness)
1314
//
1315
char *anchoringReason = "register hog";
1316
switch (node->getOpCodeValue())
1317
{
1318
// Extract heavy register pressure trees when dictated at the start of the walk
1319
// (typically IA32 on system linkage calls when there is one fewer register).
1320
case TR::ldiv:
1321
case TR::lrem:
1322
case TR::lcmp:
1323
{
1324
1325
if (!self()->removeRegisterHogsInLowerTreesWalk())
1326
break;
1327
}
1328
// Instanceof might requires this transformation for the reason of either register pressure
1329
// or ensure no parent node of CALL
1330
case TR::instanceof:
1331
{
1332
// For correctness, all we really need here is to ensure instanceof
1333
// doesn't have a parent of native call. But even java call would
1334
// create control flow to setup linkage, which might cause conflict
1335
// with the instanceof control flow.
1336
if(parent->getOpCode().isCall())
1337
{
1338
anchoringReason = "call-like";
1339
}
1340
1341
else if (!self()->removeRegisterHogsInLowerTreesWalk())
1342
break;
1343
1344
TR::Node *ttNode = TR::Node::create(TR::treetop, 1, node);
1345
tt->getPrevTreeTop()->insertAfter(TR::TreeTop::create(self()->comp(), ttNode));
1346
if (self()->comp()->getOption(TR_TraceCG))
1347
traceMsg(self()->comp(), "Anchoring %s node %s [%p] under treetop [%p]\n", anchoringReason, node->getOpCode().getName(), node, ttNode);
1348
break;
1349
}
1350
default:
1351
break;
1352
1353
}
1354
1355
}
1356
1357
1358
static bool isArraySizeSymbolRef(TR::SymbolReference *s, TR::SymbolReferenceTable *symRefTab)
1359
{
1360
// TODO: Move to compile/SymbolReferenceTable.hpp
1361
return (s!=NULL) && (s == symRefTab->findContiguousArraySizeSymbolRef() || s == symRefTab->findDiscontiguousArraySizeSymbolRef());
1362
}
1363
1364
1365
void
1366
J9::CodeGenerator::moveUpArrayLengthStores(TR::TreeTop *insertionPoint)
1367
{
1368
// 174954: Until TR::arraylength has a symref with proper aliasing, we have to
1369
// make sure that stores to the array length field occur before all arraylength
1370
// trees. Good news is that all such stores are inserted by escape
1371
// analysis, so they always have a loadaddr as one child and a constant as
1372
// the other, so they can be trivially moved to the top of the block.
1373
//
1374
for (TR::TreeTop *tt = insertionPoint->getNextTreeTop(); tt; tt = tt->getNextTreeTop())
1375
{
1376
if (tt->getNode()->getOpCodeValue() == TR::BBStart && !tt->getNode()->getBlock()->isExtensionOfPreviousBlock())
1377
break;
1378
TR::Node *store = tt->getNode()->getStoreNode();
1379
if (store && store->getOpCode().isStoreIndirect() && isArraySizeSymbolRef(store->getSymbolReference(), self()->symRefTab()))
1380
{
1381
if (store->getFirstChild()->getOpCodeValue() != TR::loadaddr)
1382
{
1383
dumpOptDetails(self()->comp(), "MOVE UP ARRAY LENGTH STORES: WARNING! First child of %p is %s; expected loadaddr\n", store, store->getFirstChild()->getOpCode().getName());
1384
}
1385
else if (!store->getSecondChild()->getOpCode().isLoadConst())
1386
{
1387
dumpOptDetails(self()->comp(), "MOVE UP ARRAY LENGTH STORES: WARNING! Second child of %p is %s; expected const\n", store, store->getSecondChild()->getOpCode().getName());
1388
}
1389
else
1390
{
1391
dumpOptDetails(self()->comp(), "MOVE UP ARRAY LENGTH STORES: Moving %s %p up after %p\n", tt->getNode()->getOpCode().getName(), tt->getNode(), insertionPoint->getNode());
1392
tt->unlink(false);
1393
insertionPoint->insertAfter(tt);
1394
insertionPoint = tt;
1395
}
1396
}
1397
}
1398
}
1399
1400
1401
void
1402
J9::CodeGenerator::zeroOutAutoOnEdge(
1403
TR::SymbolReference *liveAutoSymRef,
1404
TR::Block *block,
1405
TR::Block *succBlock,
1406
TR::list<TR::Block*> *newBlocks,
1407
TR_ScratchList<TR::Node> *fsdStores)
1408
{
1409
TR::Block *storeBlock = NULL;
1410
if ((succBlock->getPredecessors().size() == 1))
1411
storeBlock = succBlock;
1412
else
1413
{
1414
for (auto blocksIt = newBlocks->begin(); blocksIt != newBlocks->end(); ++blocksIt)
1415
{
1416
if ((*blocksIt)->getSuccessors().front()->getTo()->asBlock() == succBlock)
1417
{
1418
storeBlock = *blocksIt;
1419
break;
1420
}
1421
}
1422
}
1423
1424
if (!storeBlock)
1425
{
1426
TR::TreeTop * startTT = succBlock->getEntry();
1427
TR::Node * startNode = startTT->getNode();
1428
TR::Node * glRegDeps = NULL;
1429
if (startNode->getNumChildren() > 0)
1430
glRegDeps = startNode->getFirstChild();
1431
1432
TR::Block * newBlock = block->splitEdge(block, succBlock, self()->comp(), NULL, false);
1433
1434
if (debug("traceFSDSplit"))
1435
diagnostic("\nSplitting edge, create new intermediate block_%d", newBlock->getNumber());
1436
1437
if (glRegDeps)
1438
{
1439
TR::Node *duplicateGlRegDeps = glRegDeps->duplicateTree();
1440
TR::Node *origDuplicateGlRegDeps = duplicateGlRegDeps;
1441
duplicateGlRegDeps = TR::Node::copy(duplicateGlRegDeps);
1442
newBlock->getEntry()->getNode()->setNumChildren(1);
1443
newBlock->getEntry()->getNode()->setAndIncChild(0, origDuplicateGlRegDeps);
1444
for (int32_t i = origDuplicateGlRegDeps->getNumChildren() - 1; i >= 0; --i)
1445
{
1446
TR::Node * dep = origDuplicateGlRegDeps->getChild(i);
1447
if(self()->comp()->getOption(TR_MimicInterpreterFrameShape) || self()->comp()->getOption(TR_PoisonDeadSlots))
1448
dep->setRegister(NULL); // basically need to do prepareNodeForInstructionSelection
1449
duplicateGlRegDeps->setAndIncChild(i, dep);
1450
}
1451
if(self()->comp()->getOption(TR_MimicInterpreterFrameShape) || self()->comp()->getOption(TR_PoisonDeadSlots))
1452
{
1453
TR::Node *glRegDepsParent;
1454
if ( (newBlock->getSuccessors().size() == 1)
1455
&& newBlock->getSuccessors().front()->getTo()->asBlock()->getEntry() == newBlock->getExit()->getNextTreeTop())
1456
{
1457
glRegDepsParent = newBlock->getExit()->getNode();
1458
}
1459
else
1460
{
1461
glRegDepsParent = newBlock->getExit()->getPrevTreeTop()->getNode();
1462
TR_ASSERT(glRegDepsParent->getOpCodeValue() == TR::Goto, "Expected block to fall through or end in goto; it ends with %s %s\n",
1463
self()->getDebug()->getName(glRegDepsParent->getOpCodeValue()), self()->getDebug()->getName(glRegDepsParent));
1464
}
1465
if (self()->comp()->getOption(TR_TraceCG))
1466
traceMsg(self()->comp(), "zeroOutAutoOnEdge: glRegDepsParent is %s\n", self()->getDebug()->getName(glRegDepsParent));
1467
glRegDepsParent->setNumChildren(1);
1468
glRegDepsParent->setAndIncChild(0, duplicateGlRegDeps);
1469
}
1470
else //original path
1471
{
1472
newBlock->getExit()->getNode()->setNumChildren(1);
1473
newBlock->getExit()->getNode()->setAndIncChild(0, duplicateGlRegDeps);
1474
}
1475
}
1476
1477
newBlock->setLiveLocals(new (self()->trHeapMemory()) TR_BitVector(*succBlock->getLiveLocals()));
1478
newBlock->getEntry()->getNode()->setLabel(generateLabelSymbol(self()));
1479
1480
1481
if (self()->comp()->getOption(TR_PoisonDeadSlots))
1482
{
1483
if (self()->comp()->getOption(TR_TraceCG))
1484
traceMsg(self()->comp(), "POISON DEAD SLOTS --- New Block Created %d\n", newBlock->getNumber());
1485
newBlock->setIsCreatedAtCodeGen();
1486
}
1487
1488
newBlocks->push_front(newBlock);
1489
storeBlock = newBlock;
1490
}
1491
TR::Node *storeNode;
1492
1493
if (self()->comp()->getOption(TR_PoisonDeadSlots))
1494
storeNode = self()->generatePoisonNode(block, liveAutoSymRef);
1495
else
1496
storeNode = TR::Node::createStore(liveAutoSymRef, TR::Node::aconst(block->getEntry()->getNode(), 0));
1497
1498
if (storeNode)
1499
{
1500
TR::TreeTop *storeTree = TR::TreeTop::create(self()->comp(), storeNode);
1501
storeBlock->prepend(storeTree);
1502
fsdStores->add(storeNode);
1503
}
1504
}
1505
1506
1507
void
1508
J9::CodeGenerator::doInstructionSelection()
1509
{
1510
J9::SetMonitorStateOnBlockEntry::LiveMonitorStacks liveMonitorStacks(
1511
(J9::SetMonitorStateOnBlockEntry::LiveMonitorStacksComparator()),
1512
J9::SetMonitorStateOnBlockEntry::LiveMonitorStacksAllocator(self()->comp()->trMemory()->heapMemoryRegion()));
1513
1514
1515
1516
// Set default value for pre-prologue size
1517
//
1518
TR::ResolvedMethodSymbol * methodSymbol = self()->comp()->getJittedMethodSymbol();
1519
self()->setPrePrologueSize(4 + (methodSymbol->isJNI() ? 4 : 0));
1520
1521
if (self()->comp()->getOption(TR_TraceCG))
1522
diagnostic("\n<selection>");
1523
1524
if (self()->comp()->getOption(TR_TraceCG) || debug("traceGRA"))
1525
self()->comp()->getDebug()->setupToDumpTreesAndInstructions("Performing Instruction Selection");
1526
1527
self()->beginInstructionSelection();
1528
1529
{
1530
TR::StackMemoryRegion stackMemoryRegion(*self()->trMemory());
1531
1532
TR_BitVector * liveLocals = self()->getLiveLocals();
1533
TR_BitVector nodeChecklistBeforeDump(self()->comp()->getNodeCount(), self()->trMemory(), stackAlloc, growable);
1534
1535
/*
1536
To enable instruction scheduling (both in the compiler and in out-of-order hardware),
1537
we would prefer not to reuse the same memory to represent multiple temps inside
1538
a tight loop. At higher opt levels, therefore, we only free variable size symrefs at
1539
back edges. To prevent pathological cases from consuming too much stack space, we set a
1540
cap on the number of extended basic blocks before we stop waiting for a back edge and free
1541
our temps anyway.
1542
*/
1543
const uint32_t MAX_EBBS_BEFORE_FREEING_VARIABLE_SIZED_SYMREFS = 10;
1544
uint32_t numEBBsSinceFreeingVariableSizeSymRefs = 0;
1545
1546
TR_BitVector * liveMonitors = 0;
1547
TR_Stack<TR::SymbolReference *> * liveMonitorStack = 0;
1548
int32_t numMonitorLocals = 0;
1549
static bool traceLiveMonEnv = feGetEnv("TR_traceLiveMonitors") ? true : false;
1550
bool traceLiveMon = self()->comp()->getOption(TR_TraceLiveMonitorMetadata) || traceLiveMonEnv;
1551
1552
_lmmdFailed = false;
1553
if (self()->comp()->getMethodSymbol()->mayContainMonitors())
1554
{
1555
if(traceLiveMon)
1556
traceMsg(self()->comp(),"In doInstructionSelection: Method may contain monitors\n");
1557
1558
if (!liveLocals)
1559
self()->comp()->getMethodSymbol()->resetLiveLocalIndices();
1560
1561
ListIterator<TR::AutomaticSymbol> locals(&self()->comp()->getMethodSymbol()->getAutomaticList());
1562
for (TR::AutomaticSymbol * a = locals.getFirst(); a; a = locals.getNext())
1563
if (a->holdsMonitoredObject())
1564
{
1565
if(traceLiveMon)
1566
traceMsg(self()->comp(),"\tSymbol %p contains monitored object\n",a);
1567
if (!liveLocals)
1568
{
1569
if(traceLiveMon)
1570
traceMsg(self()->comp(),"\tsetting LiveLocalIndex to %d on symbol %p\n",numMonitorLocals+1,a);
1571
a->setLiveLocalIndex(numMonitorLocals++, self()->fe());
1572
}
1573
else if (a->getLiveLocalIndex() + 1 > numMonitorLocals)
1574
{
1575
if(traceLiveMon)
1576
traceMsg(self()->comp(),"\tsetting numMonitorLocals to %d while considering symbol %p\n",a->getLiveLocalIndex()+1,a);
1577
numMonitorLocals = a->getLiveLocalIndex() + 1;
1578
}
1579
}
1580
1581
if (numMonitorLocals)
1582
{
1583
J9::SetMonitorStateOnBlockEntry monitorState(self()->comp(), &liveMonitorStacks);
1584
1585
if(traceLiveMon)
1586
traceMsg(self()->comp(),"\tCreated monitorState %p\n",&monitorState);
1587
1588
monitorState.set(_lmmdFailed, traceLiveMon);
1589
if (traceLiveMon)
1590
traceMsg(self()->comp(), "found numMonitorLocals %d\n", numMonitorLocals);
1591
}
1592
else if(traceLiveMon)
1593
traceMsg(self()->comp(),"\tnumMonitorLocals = %d\n",numMonitorLocals);
1594
}
1595
1596
TR::SymbolReference **liveLocalSyms = NULL;
1597
TR_BitVector *unsharedSymsBitVector = NULL;
1598
int32_t maxLiveLocalIndex = -1;
1599
TR::list<TR::Block*> newBlocks(getTypedAllocator<TR::Block*>(self()->comp()->allocator()));
1600
TR_ScratchList<TR::Node> fsdStores(self()->trMemory());
1601
if (self()->comp()->getOption(TR_MimicInterpreterFrameShape) || self()->comp()->getOption(TR_PoisonDeadSlots))
1602
{
1603
if (self()->comp()->areSlotsSharedByRefAndNonRef() || self()->comp()->getOption(TR_PoisonDeadSlots))
1604
{
1605
TR_ScratchList<TR::SymbolReference> participatingLocals(self()->trMemory());
1606
1607
TR::SymbolReference *autoSymRef = NULL;
1608
int32_t symRefNumber;
1609
int32_t symRefCount = self()->comp()->getSymRefCount();
1610
TR::SymbolReferenceTable *symRefTab = self()->comp()->getSymRefTab();
1611
for (symRefNumber = symRefTab->getIndexOfFirstSymRef(); symRefNumber < symRefCount; symRefNumber++)
1612
{
1613
autoSymRef = symRefTab->getSymRef(symRefNumber);
1614
if (autoSymRef &&
1615
autoSymRef->getSymbol() &&
1616
autoSymRef->getSymbol()->isAuto() &&
1617
(autoSymRef->getSymbol()->castToAutoSymbol()->getLiveLocalIndex() != (uint16_t)-1))
1618
{
1619
TR::AutomaticSymbol * autoSym = autoSymRef->getSymbol()->castToAutoSymbol();
1620
if (methodSymbol->getAutomaticList().find(autoSym))
1621
{
1622
participatingLocals.add(autoSymRef);
1623
1624
if (autoSym->getLiveLocalIndex() > maxLiveLocalIndex)
1625
maxLiveLocalIndex = autoSym->getLiveLocalIndex();
1626
}
1627
}
1628
}
1629
1630
liveLocalSyms = (TR::SymbolReference **)self()->trMemory()->allocateStackMemory((maxLiveLocalIndex+1)*sizeof(TR::SymbolReference *));
1631
memset(liveLocalSyms, 0, (maxLiveLocalIndex+1)*sizeof(TR::SymbolReference *));
1632
unsharedSymsBitVector = new (self()->trStackMemory()) TR_BitVector(maxLiveLocalIndex+1, self()->trMemory(), stackAlloc);
1633
1634
ListIterator<TR::SymbolReference> participatingLocalsIt(&participatingLocals);
1635
for (autoSymRef = participatingLocalsIt.getFirst(); autoSymRef; autoSymRef = participatingLocalsIt.getNext())
1636
{
1637
TR::AutomaticSymbol * autoSym = autoSymRef->getSymbol()->castToAutoSymbol();
1638
liveLocalSyms[autoSym->getLiveLocalIndex()] = autoSymRef;
1639
if (!autoSym->isSlotSharedByRefAndNonRef())
1640
{
1641
//dumpOptDetails("Unshared symRef %d live local index %d\n", autoSymRef->getReferenceNumber(), autoSym->getLiveLocalIndex());
1642
unsharedSymsBitVector->set(autoSym->getLiveLocalIndex());
1643
}
1644
}
1645
}
1646
else
1647
liveLocals = NULL;
1648
}
1649
1650
bool fixedUpBlock = false;
1651
1652
for (TR::TreeTop *tt = self()->comp()->getStartTree(); tt; tt = self()->getCurrentEvaluationTreeTop()->getNextTreeTop())
1653
{
1654
if(traceLiveMon)
1655
traceMsg(self()->comp(),"\tWalking TreeTops at tt %p with node %p\n",tt,tt->getNode());
1656
1657
TR::Instruction *prevInstr = self()->getAppendInstruction();
1658
TR::Node * node = tt->getNode();
1659
TR::ILOpCodes opCode = node->getOpCodeValue();
1660
1661
TR::Node * firstChild = node->getNumChildren() > 0 ? node->getFirstChild() : 0;
1662
1663
if (opCode == TR::BBStart)
1664
{
1665
fixedUpBlock = false;
1666
TR::Block *block = node->getBlock();
1667
self()->setCurrentEvaluationBlock(block);
1668
self()->resetMethodModifiedByRA();
1669
1670
liveMonitorStack = (liveMonitorStacks.find(block->getNumber()) != liveMonitorStacks.end()) ?
1671
liveMonitorStacks[block->getNumber()] :
1672
NULL;
1673
1674
if (!block->isExtensionOfPreviousBlock())
1675
{
1676
// If we are keeping track of live locals, set up the live locals for
1677
// this block
1678
//
1679
if (liveLocals)
1680
{
1681
if (block->getLiveLocals())
1682
liveLocals = new (self()->trHeapMemory()) TR_BitVector(*block->getLiveLocals());
1683
else
1684
{
1685
liveLocals = new (self()->trHeapMemory()) TR_BitVector(*liveLocals);
1686
liveLocals->empty();
1687
}
1688
1689
if (self()->comp()->areSlotsSharedByRefAndNonRef() && unsharedSymsBitVector)
1690
{
1691
*liveLocals |= *unsharedSymsBitVector;
1692
}
1693
}
1694
1695
1696
if (liveMonitorStack)
1697
{
1698
liveMonitors = new (self()->trHeapMemory()) TR_BitVector(numMonitorLocals, self()->trMemory());
1699
if (traceLiveMon)
1700
traceMsg(self()->comp(), "created liveMonitors bitvector at block_%d for stack %p size %d\n",
1701
block->getNumber(), liveMonitorStack,
1702
liveMonitorStack->size());
1703
for (int32_t i = liveMonitorStack->size() - 1; i >= 0; --i)
1704
{
1705
if (traceLiveMon)
1706
traceMsg(self()->comp(), "about to set liveMonitors for symbol %p\n",(*liveMonitorStack)[i]->getSymbol());
1707
liveMonitors->set((*liveMonitorStack)[i]->getSymbol()->castToRegisterMappedSymbol()->getLiveLocalIndex());
1708
if (traceLiveMon)
1709
traceMsg(self()->comp(), "setting livemonitor %d at block_%d\n",
1710
(*liveMonitorStack)[i]->getSymbol()->castToRegisterMappedSymbol()->getLiveLocalIndex(),
1711
block->getNumber());
1712
}
1713
}
1714
else
1715
{
1716
liveMonitors = 0;
1717
if (traceLiveMon)
1718
traceMsg(self()->comp(), "no liveMonitorStack for block_%d\n", block->getNumber());
1719
}
1720
numEBBsSinceFreeingVariableSizeSymRefs++;
1721
}
1722
1723
if (self()->getDebug())
1724
self()->getDebug()->roundAddressEnumerationCounters();
1725
1726
#if DEBUG
1727
// Verify that we are only being more conservative by inheriting the live
1728
// local information from the previous block.
1729
//
1730
else if (liveLocals && debug("checkBlockEntryLiveLocals"))
1731
{
1732
TR_BitVector *extendedBlockLocals = new (self()->trStackMemory()) TR_BitVector(*(block->getLiveLocals()));
1733
*extendedBlockLocals -= *(liveLocals);
1734
1735
TR_ASSERT(extendedBlockLocals->isEmpty(),
1736
"Live local information is *less* pessimistic!\n");
1737
}
1738
#endif
1739
}
1740
else if (opCode == TR::BBEnd)
1741
{
1742
TR::Block *b = self()->getCurrentEvaluationBlock();
1743
1744
// checks for consistent monitorStack
1745
//
1746
for (auto e = b->getSuccessors().begin(); e != b->getSuccessors().end(); ++e)
1747
if ((*e)->getTo() == self()->comp()->getFlowGraph()->getEnd())
1748
{
1749
// block could end in a throw,
1750
//
1751
TR::TreeTop *lastRealTT = b->getLastRealTreeTop();
1752
// last warm blocks could end in a goto
1753
//
1754
if (lastRealTT->getNode()->getOpCode().isGoto())
1755
lastRealTT = lastRealTT->getPrevTreeTop();
1756
1757
TR::Node *n = lastRealTT->getNode();
1758
if (n->getOpCodeValue() == TR::treetop ||
1759
n->getOpCode().isCheck())
1760
n = n->getFirstChild();
1761
1762
bool endsInThrow = false;
1763
if (n->getOpCode().isCall() &&
1764
(n->getSymbolReference()->getReferenceNumber() == TR_aThrow))
1765
endsInThrow = true;
1766
else if (n->getOpCodeValue() == TR::Return)
1767
{
1768
// a check that is going to fail
1769
//
1770
TR::TreeTop *prev = lastRealTT->getPrevTreeTop();
1771
if ((prev->getNode()->getOpCodeValue() == TR::asynccheck) ||
1772
(prev->getNode()->getOpCodeValue() == TR::treetop))
1773
prev = prev->getPrevTreeTop();
1774
if (prev->getNode()->getOpCode().isCheck())
1775
endsInThrow = true;
1776
}
1777
1778
if (liveMonitorStack &&
1779
liveMonitorStack->size() != 0 &&
1780
!endsInThrow)
1781
dumpOptDetails(self()->comp(), "liveMonitorStack must be empty, unbalanced monitors found! %d\n",
1782
liveMonitorStack->size());
1783
1784
if (traceLiveMon)
1785
{
1786
traceMsg(self()->comp(), "liveMonitorStack %p at CFG end (syncMethod %d)", liveMonitorStack,
1787
self()->comp()->getMethodSymbol()->isSynchronised());
1788
if (liveMonitorStack)
1789
traceMsg(self()->comp(), " size %d\n", liveMonitorStack->size());
1790
else
1791
traceMsg(self()->comp(), " size empty\n");
1792
}
1793
break;
1794
}
1795
1796
bool endOfEBB = !(b->getNextBlock() && b->getNextBlock()->isExtensionOfPreviousBlock());
1797
if (endOfEBB &&
1798
(b->branchesBackwards() ||
1799
numEBBsSinceFreeingVariableSizeSymRefs > MAX_EBBS_BEFORE_FREEING_VARIABLE_SIZED_SYMREFS))
1800
{
1801
if (self()->traceBCDCodeGen())
1802
traceMsg(self()->comp(),"\tblock_%d branches backwards, so free all symbols in the _variableSizeSymRefPendingFreeList\n",b->getNumber());
1803
self()->freeAllVariableSizeSymRefs();
1804
numEBBsSinceFreeingVariableSizeSymRefs = 0;
1805
}
1806
}
1807
1808
if (((opCode == TR::BBEnd) && !fixedUpBlock) ||
1809
node->getOpCode().isBranch() ||
1810
node->getOpCode().isSwitch())
1811
{
1812
fixedUpBlock = true;
1813
//GCMAP
1814
if ( (self()->comp()->getOption(TR_MimicInterpreterFrameShape) && self()->comp()->areSlotsSharedByRefAndNonRef() ) || self()->comp()->getOption(TR_PoisonDeadSlots))
1815
{
1816
// TODO : look at last warm block code above
1817
//
1818
if ((!self()->comp()->getOption(TR_PoisonDeadSlots)&& liveLocals) || (self()->comp()->getOption(TR_PoisonDeadSlots) && self()->getCurrentEvaluationBlock()->getLiveLocals()))
1819
{
1820
newBlocks.clear();
1821
TR::Block *block = self()->getCurrentEvaluationBlock();
1822
1823
TR_BitVectorIterator bvi(self()->comp()->getOption(TR_PoisonDeadSlots) ? *block->getLiveLocals(): *liveLocals);
1824
1825
if (self()->comp()->getOption(TR_TraceCG) && self()->comp()->getOption(TR_PoisonDeadSlots))
1826
traceMsg(self()->comp(), "POISON DEAD SLOTS --- Parent Block Number: %d\n", block->getNumber());
1827
1828
1829
while (bvi.hasMoreElements())
1830
{
1831
int32_t liveLocalIndex = bvi.getNextElement();
1832
TR_ASSERT((liveLocalIndex <= maxLiveLocalIndex), "Symbol has live local index higher than computed max\n");
1833
TR::SymbolReference * liveAutoSymRef = liveLocalSyms[liveLocalIndex];
1834
1835
if (self()->comp()->getOption(TR_TraceCG) && self()->comp()->getOption(TR_PoisonDeadSlots))
1836
traceMsg(self()->comp(), "POISON DEAD SLOTS --- Parent Block: %d, Maintained Live Local: %d\n", block->getNumber(), liveAutoSymRef->getReferenceNumber());
1837
1838
if(self()->comp()->getOption(TR_PoisonDeadSlots) && (!liveAutoSymRef || block->isCreatedAtCodeGen()))
1839
{
1840
//Don't process a block we created to poison a dead slot.
1841
continue;
1842
}
1843
1844
if(!liveAutoSymRef)
1845
{
1846
continue;
1847
}
1848
TR::AutomaticSymbol * liveAutoSym = liveAutoSymRef->getSymbol()->castToAutoSymbol();
1849
1850
//For slot poisoning, a monitored object is still in the GCMaps even if its liveness has ended.
1851
//
1852
if ((liveAutoSym->getType().isAddress() && liveAutoSym->isSlotSharedByRefAndNonRef()) || (self()->comp()->getOption(TR_PoisonDeadSlots) && !liveAutoSymRef->getSymbol()->holdsMonitoredObject()))
1853
{
1854
for (auto succ = block->getSuccessors().begin(); succ != block->getSuccessors().end();)
1855
{
1856
auto next = succ;
1857
++next;
1858
if ((*succ)->getTo() == self()->comp()->getFlowGraph()->getEnd())
1859
{
1860
succ = next;
1861
continue;
1862
}
1863
TR::Block *succBlock = (*succ)->getTo()->asBlock();
1864
1865
if (self()->comp()->getOption(TR_PoisonDeadSlots) && succBlock->isExtensionOfPreviousBlock())
1866
{
1867
if (self()->comp()->getOption(TR_TraceCG) && self()->comp()->getOption(TR_PoisonDeadSlots))
1868
traceMsg(self()->comp(), "POISON DEAD SLOTS --- Successor Block Number %d is extension of Parent Block %d ... skipping \n", succBlock->getNumber(), block->getNumber());
1869
succ = next;
1870
continue; //We cannot poison in an extended block as gcmaps are still live for maintained live locals !!!! if target of jump, ignore, could be extension
1871
}
1872
1873
if (self()->comp()->getOption(TR_TraceCG) && self()->comp()->getOption(TR_PoisonDeadSlots))
1874
traceMsg(self()->comp(), "POISON DEAD SLOTS --- Successor Block Number %d, of Parent Block %d \n", succBlock->getNumber(), block->getNumber());
1875
1876
TR_BitVector *succLiveLocals = succBlock->getLiveLocals();
1877
if (succLiveLocals && !succLiveLocals->get(liveLocalIndex)) //added
1878
{
1879
bool zeroOut = true;
1880
TR_BitVectorIterator sbvi(*succLiveLocals);
1881
while (sbvi.hasMoreElements())
1882
{
1883
int32_t succLiveLocalIndex = sbvi.getNextElement();
1884
TR_ASSERT((succLiveLocalIndex <= maxLiveLocalIndex), "Symbol has live local index higher than computed max\n");
1885
TR::SymbolReference * succLiveAutoSymRef = liveLocalSyms[succLiveLocalIndex];
1886
if (self()->comp()->getOption(TR_TraceCG) && self()->comp()->getOption(TR_PoisonDeadSlots))
1887
traceMsg(self()->comp(), "POISON DEAD SLOTS --- Successor Block %d contains live local %d\n", succBlock->getNumber(), succLiveAutoSymRef->getReferenceNumber());
1888
1889
TR::AutomaticSymbol * succLiveAutoSym = succLiveAutoSymRef->getSymbol()->castToAutoSymbol();
1890
if ( (succLiveAutoSym->getType().isAddress() && succLiveAutoSym->isSlotSharedByRefAndNonRef()) || self()->comp()->getOption(TR_PoisonDeadSlots))
1891
{
1892
if ((succLiveAutoSym->getGCMapIndex() == liveAutoSym->getGCMapIndex()) ||
1893
((TR::Symbol::convertTypeToNumberOfSlots(succLiveAutoSym->getDataType()) == 2) &&
1894
((succLiveAutoSym->getGCMapIndex()+1) == liveAutoSym->getGCMapIndex())))
1895
{
1896
zeroOut = false;
1897
break;
1898
}
1899
}
1900
}
1901
1902
if (zeroOut)
1903
{
1904
self()->comp()->getFlowGraph()->setStructure(0);
1905
self()->zeroOutAutoOnEdge(liveAutoSymRef, block, succBlock, &newBlocks, &fsdStores);
1906
}
1907
}
1908
succ = next;
1909
}
1910
1911
// TODO : Think about exc edges case below
1912
//
1913
for (auto esucc = block->getExceptionSuccessors().begin(); esucc != block->getExceptionSuccessors().end(); ++esucc)
1914
{
1915
TR::Block *esuccBlock = (*esucc)->getTo()->asBlock();
1916
//since we have asked the liveness analysis to assume that uses in the OSR block don't exist
1917
//in certain cases, this code thinks that some locals are dead in the OSR block so it tries
1918
//to zero them out. But we don't want that to happen
1919
if (esuccBlock->isOSRCodeBlock() || esuccBlock->isOSRCatchBlock())
1920
continue;
1921
TR_BitVector *esuccLiveLocals = esuccBlock->getLiveLocals();
1922
TR_ASSERT(esuccLiveLocals, "No live locals for successor block\n");
1923
if (!esuccLiveLocals->get(liveLocalIndex))
1924
{
1925
bool zeroOut = true;
1926
TR_BitVectorIterator sbvi(*esuccLiveLocals);
1927
while (sbvi.hasMoreElements())
1928
{
1929
int32_t succLiveLocalIndex = sbvi.getNextElement();
1930
TR_ASSERT((succLiveLocalIndex <= maxLiveLocalIndex), "Symbol has live local index higher than computed max\n");
1931
TR::SymbolReference * succLiveAutoSymRef = liveLocalSyms[succLiveLocalIndex];
1932
TR::AutomaticSymbol * succLiveAutoSym = succLiveAutoSymRef->getSymbol()->castToAutoSymbol();
1933
if ( succLiveAutoSym->getType().isAddress() && ( succLiveAutoSym->isSlotSharedByRefAndNonRef() || self()->comp()->getOption(TR_PoisonDeadSlots)))
1934
{
1935
if ((succLiveAutoSym->getGCMapIndex() == liveAutoSym->getGCMapIndex()) ||
1936
((TR::Symbol::convertTypeToNumberOfSlots(succLiveAutoSym->getDataType()) == 2) &&
1937
((succLiveAutoSym->getGCMapIndex()+1) == liveAutoSym->getGCMapIndex())))
1938
{
1939
zeroOut = false;
1940
break;
1941
}
1942
}
1943
}
1944
1945
if (zeroOut)
1946
{
1947
TR::TreeTop *cursorTree = esuccBlock->getEntry()->getNextTreeTop();
1948
TR::TreeTop *endTree = esuccBlock->getExit();
1949
bool storeExists = false;
1950
while (cursorTree != endTree)
1951
{
1952
TR::Node *cursorNode = cursorTree->getNode();
1953
if (cursorNode->getOpCode().isStore())
1954
{
1955
if ((cursorNode->getSymbolReference() == liveAutoSymRef) &&
1956
(cursorNode->getFirstChild()->getOpCodeValue() == TR::aconst) &&
1957
(cursorNode->getFirstChild()->getAddress() == 0 || cursorNode->getFirstChild()->getAddress() == 0xdeadf00d ))
1958
{
1959
storeExists = true;
1960
break;
1961
}
1962
}
1963
else
1964
break;
1965
1966
cursorTree = cursorTree->getNextTreeTop();
1967
}
1968
1969
if (!storeExists)
1970
{
1971
TR::Node *storeNode;
1972
if (self()->comp()->getOption(TR_PoisonDeadSlots))
1973
storeNode = self()->generatePoisonNode(block, liveAutoSymRef);
1974
else
1975
storeNode = TR::Node::createStore(liveAutoSymRef, TR::Node::aconst(block->getEntry()->getNode(), 0));
1976
if (storeNode)
1977
{
1978
TR::TreeTop *storeTree = TR::TreeTop::create(self()->comp(), storeNode);
1979
esuccBlock->prepend(storeTree);
1980
fsdStores.add(storeNode);
1981
}
1982
}
1983
}
1984
}
1985
}
1986
}
1987
}
1988
}
1989
}
1990
}
1991
1992
self()->setLiveLocals(liveLocals);
1993
self()->setLiveMonitors(liveMonitors);
1994
1995
if (self()->comp()->getOption(TR_TraceCG) || debug("traceGRA"))
1996
{
1997
// any evaluator that handles multiple trees will need to dump
1998
// the others
1999
self()->comp()->getDebug()->saveNodeChecklist(nodeChecklistBeforeDump);
2000
self()->comp()->getDebug()->dumpSingleTreeWithInstrs(tt, NULL, true, false, true, true);
2001
trfprintf(self()->comp()->getOutFile(),"\n------------------------------\n");
2002
trfflush(self()->comp()->getOutFile());
2003
}
2004
2005
self()->setLastInstructionBeforeCurrentEvaluationTreeTop(self()->getAppendInstruction());
2006
self()->setCurrentEvaluationTreeTop(tt);
2007
self()->setImplicitExceptionPoint(NULL);
2008
2009
bool doEvaluation = true;
2010
if ((node->getOpCode().isStore() &&
2011
node->getSymbol()->holdsMonitoredObject() &&
2012
!node->isLiveMonitorInitStore()) || node->getOpCode().getOpCodeValue() == TR::monexitfence)
2013
{
2014
if (traceLiveMon)
2015
{
2016
traceMsg(self()->comp(), "liveMonitorStack %p ", liveMonitorStack);
2017
if (liveMonitorStack)
2018
traceMsg(self()->comp(), " size %d\n", liveMonitorStack->size());
2019
else
2020
traceMsg(self()->comp(), " size empty\n");
2021
traceMsg(self()->comp(), "Looking at Node %p with symbol %p",node,node->getSymbol());
2022
}
2023
bool isMonent = node->getOpCode().getOpCodeValue() != TR::monexitfence;
2024
if (isMonent)
2025
{
2026
// monent
2027
if (liveMonitors)
2028
liveMonitors = new (self()->trHeapMemory()) TR_BitVector(*liveMonitors);
2029
else
2030
liveMonitors = new (self()->trHeapMemory()) TR_BitVector(numMonitorLocals, self()->trMemory());
2031
2032
// add this monent to the block's stack
2033
//
2034
if (liveMonitorStack)
2035
{
2036
liveMonitorStack->push(node->getSymbolReference());
2037
if (traceLiveMon)
2038
traceMsg(self()->comp(), "pushing symref %p (#%u) onto monitor stack\n", node->getSymbolReference(), node->getSymbolReference()->getReferenceNumber());
2039
}
2040
2041
liveMonitors->set(node->getSymbol()->castToRegisterMappedSymbol()->getLiveLocalIndex());
2042
2043
if (traceLiveMon)
2044
traceMsg(self()->comp(), "monitor %p went live at node %p\n", node->getSymbol(), node);
2045
}
2046
else if (!isMonent)
2047
{
2048
if (liveMonitorStack)
2049
{
2050
// monexit
2051
TR_ASSERT(liveMonitors, "inconsistent live monitor state");
2052
2053
// pop this monexit from the block's stack
2054
//
2055
if (!liveMonitorStack->isEmpty())
2056
{
2057
liveMonitors = new (self()->trHeapMemory()) TR_BitVector(*liveMonitors);
2058
2059
if (self()->comp()->getOption(TR_PoisonDeadSlots))
2060
{
2061
TR::SymbolReference *symRef = liveMonitorStack->pop();
2062
liveMonitors->reset(symRef->getSymbol()->castToRegisterMappedSymbol()->getLiveLocalIndex());
2063
TR::Node *storeNode = NULL;
2064
2065
if (self()->comp()->getOption(TR_TraceCG) && self()->comp()->getOption(TR_PoisonDeadSlots))
2066
traceMsg(self()->comp(), "POISON DEAD SLOTS --- MonExit Block Number: %d\n", self()->getCurrentEvaluationBlock()->getNumber());
2067
2068
storeNode = self()->generatePoisonNode(self()->getCurrentEvaluationBlock(), symRef);
2069
if (storeNode)
2070
{
2071
TR::TreeTop *storeTree = TR::TreeTop::create(self()->comp(), storeNode);
2072
self()->getCurrentEvaluationBlock()->prepend(storeTree);
2073
fsdStores.add(storeNode);
2074
}
2075
}
2076
else
2077
{
2078
TR::SymbolReference *symref = liveMonitorStack->pop();
2079
2080
if (traceLiveMon)
2081
traceMsg(self()->comp(), "popping symref %p (#%u) off monitor stack\n", symref, symref->getReferenceNumber());
2082
liveMonitors->reset(symref->getSymbol()->castToRegisterMappedSymbol()->getLiveLocalIndex());
2083
}
2084
2085
}
2086
else
2087
{
2088
dumpOptDetails(self()->comp(), "liveMonitorStack %p is inconsistently empty at node %p!\n", liveMonitorStack, node);
2089
if (liveMonitors)
2090
liveMonitors->empty();
2091
}
2092
2093
if (traceLiveMon)
2094
traceMsg(self()->comp(), "monitor %p went dead at node %p\n", node->getSymbol(), node);
2095
}
2096
// no need to generate code for this store
2097
//
2098
doEvaluation = false;
2099
}
2100
}
2101
2102
#ifdef TR_TARGET_S390
2103
if (self()->getAddStorageReferenceHints())
2104
self()->addStorageReferenceHints(node);
2105
#endif
2106
2107
2108
if (doEvaluation)
2109
self()->evaluate(node);
2110
2111
2112
if (self()->comp()->getOption(TR_TraceCG) || debug("traceGRA"))
2113
{
2114
TR::Instruction *lastInstr = self()->getAppendInstruction();
2115
tt->setLastInstruction(lastInstr == prevInstr ? 0 : lastInstr);
2116
}
2117
2118
if (liveLocals)
2119
{
2120
TR::AutomaticSymbol * liveSym = 0;
2121
if (debug("checkBlockEntryLiveLocals"))
2122
{
2123
// Check for a store into a local.
2124
// If so, this local becomes live at this point.
2125
//
2126
if (node->getOpCode().isStore())
2127
{
2128
liveSym = node->getSymbol()->getAutoSymbol();
2129
}
2130
2131
// Check for a loadaddr of a local object.
2132
// If so, this local object becomes live at this point.
2133
//
2134
else if (opCode == TR::treetop)
2135
{
2136
if (firstChild->getOpCodeValue() == TR::loadaddr)
2137
liveSym = firstChild->getSymbol()->getLocalObjectSymbol();
2138
}
2139
if (liveSym && liveSym->getLiveLocalIndex() == (uint16_t)-1)
2140
liveSym = NULL;
2141
}
2142
else
2143
{
2144
// Check for a store into a collected local reference.
2145
// If so, this local becomes live at this point.
2146
//
2147
if ((opCode == TR::astore) &&
2148
((!self()->comp()->getOption(TR_MimicInterpreterFrameShape)) ||
2149
(!self()->comp()->areSlotsSharedByRefAndNonRef()) ||
2150
(!fsdStores.find(node))))
2151
{
2152
liveSym = node->getSymbol()->getAutoSymbol();
2153
}
2154
2155
// Check for a loadaddr of a local object containing collected references.
2156
// If so, this local object becomes live at this point.
2157
//
2158
else if (opCode == TR::treetop)
2159
{
2160
if (firstChild->getOpCodeValue() == TR::loadaddr)
2161
liveSym = firstChild->getSymbol()->getLocalObjectSymbol();
2162
}
2163
if (liveSym && !liveSym->isCollectedReference())
2164
liveSym = NULL;
2165
}
2166
2167
bool newLiveLocals = false;
2168
if (liveSym)
2169
{
2170
liveLocals = new (self()->trHeapMemory()) TR_BitVector(*liveLocals);
2171
newLiveLocals = true;
2172
if (liveSym)
2173
liveLocals->set(liveSym->getLiveLocalIndex());
2174
}
2175
2176
// In interpreter-frame mode a reference can share a slot with a nonreference so when we see a store to a nonreference
2177
// we need to make sure that any reference that it shares a slot with is marked as dead
2178
//
2179
if (self()->comp()->getOption(TR_MimicInterpreterFrameShape) && node->getOpCode().isStore() && opCode != TR::astore)
2180
{
2181
TR::AutomaticSymbol * nonRefStoreSym = node->getSymbol()->getAutoSymbol();
2182
if (nonRefStoreSym && (nonRefStoreSym->getGCMapIndex() != -1)) //defect 147894: don't check this for GCMapIndex = -1
2183
{
2184
bool isTwoSlots = ( TR::Symbol::convertTypeToNumberOfSlots(nonRefStoreSym->getDataType()) == 2);
2185
if (isTwoSlots || nonRefStoreSym->isSlotSharedByRefAndNonRef())
2186
{
2187
ListIterator<TR::AutomaticSymbol> autoIterator(&methodSymbol->getAutomaticList());
2188
if (!newLiveLocals)
2189
liveLocals = new (self()->trHeapMemory()) TR_BitVector(*liveLocals);
2190
for (TR::AutomaticSymbol * autoSym = autoIterator.getFirst(); autoSym; autoSym = autoIterator.getNext())
2191
if (autoSym->getType().isAddress() && autoSym->getLiveLocalIndex() != (uint16_t)-1)
2192
{
2193
if (autoSym->getGCMapIndex() == nonRefStoreSym->getGCMapIndex())
2194
liveLocals->reset(autoSym->getLiveLocalIndex());
2195
else if (isTwoSlots && autoSym->getGCMapIndex() == nonRefStoreSym->getGCMapIndex() + 1)
2196
liveLocals->reset(autoSym->getLiveLocalIndex());
2197
}
2198
}
2199
}
2200
}
2201
}
2202
2203
bool compactVSSStack = false;
2204
if (!self()->comp()->getOption(TR_DisableVSSStackCompaction))
2205
{
2206
if (self()->comp()->getMethodHotness() < hot)
2207
compactVSSStack = true;
2208
else if (self()->comp()->getOption(TR_ForceVSSStackCompaction))
2209
compactVSSStack = true;
2210
}
2211
2212
if (compactVSSStack && !_variableSizeSymRefPendingFreeList.empty())
2213
{
2214
TR::Node *ttNode = (node->getOpCodeValue() == TR::treetop) ? node->getFirstChild() : node;
2215
auto it = _variableSizeSymRefPendingFreeList.begin();
2216
TR::SymbolReference *symRef;
2217
while (it != _variableSizeSymRefPendingFreeList.end())
2218
{
2219
//Element is removed within freeVariableSizeSymRef. Need a reference to next element
2220
auto next = it;
2221
++next;
2222
TR_ASSERT((*it)->getSymbol()->isVariableSizeSymbol(),"symRef #%d must contain a variable size symbol\n",(*it)->getReferenceNumber());
2223
auto *sym = (*it)->getSymbol()->getVariableSizeSymbol();
2224
bool found = (std::find(_variableSizeSymRefFreeList.begin(), _variableSizeSymRefFreeList.end(), (*it)) != _variableSizeSymRefFreeList.end());
2225
if (self()->traceBCDCodeGen())
2226
{
2227
if (sym->getNodeToFreeAfter())
2228
traceMsg(self()->comp(),"pending free temps : looking at symRef #%d (%s) refCount %d sym->getNodeToFreeAfter() %p ttNode %p (find sym in list %d)\n",
2229
(*it)->getReferenceNumber(),self()->getDebug()->getName(sym),sym->getReferenceCount(),
2230
sym->getNodeToFreeAfter(),ttNode,found);
2231
else
2232
traceMsg(self()->comp(),"pending free temps : looking at symRef #%d (%s) refCount %d (find sym in list %d)\n",
2233
(*it)->getReferenceNumber(),self()->getDebug()->getName(sym),sym->getReferenceCount(),found);
2234
}
2235
if (!sym->isAddressTaken() && !found)
2236
{
2237
TR::Node *nodeToFreeAfter = sym->getNodeToFreeAfter();
2238
bool nodeToFreeAfterIsCurrentNode = nodeToFreeAfter && (ttNode==nodeToFreeAfter);
2239
if (sym->getReferenceCount() == 0 &&
2240
(!nodeToFreeAfter || nodeToFreeAfterIsCurrentNode))
2241
{
2242
self()->freeVariableSizeSymRef(*it); // will also remove sym from the pending free list
2243
}
2244
else if (sym->getReferenceCount() > 0 && nodeToFreeAfterIsCurrentNode)
2245
{
2246
if (self()->traceBCDCodeGen())
2247
traceMsg(self()->comp(),"\treset nodeToFreeAfter %p->NULL for sym %p with refCount %d > 0\n",nodeToFreeAfter,sym,sym->getReferenceCount());
2248
sym->setNodeToFreeAfter(NULL);
2249
}
2250
else
2251
{
2252
// We'd like to assert the following, but refcounts are unsigned, so we can't
2253
//TR_ASSERT(sym->getReferenceCount() >= 0,"sym %p refCount %d should be >= 0\n",sym,sym->getReferenceCount());
2254
}
2255
}
2256
it = next;
2257
}
2258
}
2259
2260
if (self()->comp()->getOption(TR_TraceCG) || debug("traceGRA"))
2261
{
2262
self()->comp()->getDebug()->restoreNodeChecklist(nodeChecklistBeforeDump);
2263
if (tt == self()->getCurrentEvaluationTreeTop())
2264
{
2265
trfprintf(self()->comp()->getOutFile(),"------------------------------\n");
2266
self()->comp()->getDebug()->dumpSingleTreeWithInstrs(tt, prevInstr->getNext(), true, true, true, false);
2267
}
2268
else
2269
{
2270
// dump all the trees that the evaluator handled
2271
trfprintf(self()->comp()->getOutFile(),"------------------------------");
2272
for (TR::TreeTop *dumptt = tt; dumptt != self()->getCurrentEvaluationTreeTop()->getNextTreeTop(); dumptt = dumptt->getNextTreeTop())
2273
{
2274
trfprintf(self()->comp()->getOutFile(),"\n");
2275
self()->comp()->getDebug()->dumpSingleTreeWithInstrs(dumptt, NULL, true, false, true, false);
2276
}
2277
// all instructions are on the tt tree
2278
self()->comp()->getDebug()->dumpSingleTreeWithInstrs(tt, prevInstr->getNext(), false, true, false, false);
2279
}
2280
trfflush(self()->comp()->getOutFile());
2281
}
2282
}
2283
2284
if (self()->traceBCDCodeGen())
2285
traceMsg(self()->comp(),"\tinstruction selection is complete so free all symbols in the _variableSizeSymRefPendingFreeList\n");
2286
2287
self()->freeAllVariableSizeSymRefs();
2288
2289
#if defined(TR_TARGET_S390)
2290
// Virtual function insertInstructionPrefetches is implemented only for s390 platform,
2291
// for all other platforms the function is empty
2292
//
2293
self()->insertInstructionPrefetches();
2294
#endif
2295
} // Stack memory region ends
2296
2297
if (self()->comp()->getOption(TR_TraceCG) || debug("traceGRA"))
2298
self()->comp()->incVisitCount();
2299
2300
if (self()->getDebug())
2301
self()->getDebug()->roundAddressEnumerationCounters();
2302
2303
self()->endInstructionSelection();
2304
2305
if (self()->comp()->getOption(TR_TraceCG))
2306
diagnostic("</selection>\n");
2307
}
2308
2309
bool
2310
J9::CodeGenerator::allowGuardMerging()
2311
{
2312
return self()->fej9()->supportsGuardMerging();
2313
}
2314
2315
void
2316
J9::CodeGenerator::populateOSRBuffer()
2317
{
2318
2319
if (!self()->comp()->getOption(TR_EnableOSR))
2320
return;
2321
2322
//The following struct definitions are coming from VM include files and are intended as
2323
//a legend for OSR buffer
2324
2325
// typedef struct J9OSRBuffer {
2326
// U_32 numberOfFrames;
2327
// // frames here - reachable by "(J9OSRFrame*)(buffer + 1)"
2328
// } J9OSRBuffer;
2329
// typedef struct J9OSRFrame {
2330
// J9Method *method;
2331
// U_8 *bytecodePC;
2332
// U_32 maxStack;
2333
// U_32 pendingStackHeight;
2334
// // stack slots here - reachable by "(UDATA*)(frame + 1)"
2335
// } J9OSRFrame;
2336
2337
self()->comp()->getOSRCompilationData()->buildSymRefOrderMap();
2338
const TR_Array<TR_OSRMethodData *>& methodDataArray = self()->comp()->getOSRCompilationData()->getOSRMethodDataArray();
2339
bool traceOSR = self()->comp()->getOption(TR_TraceOSR);
2340
uint32_t maxScratchBufferSize = 0;
2341
const int firstSymChildIndex = 2;
2342
2343
/*
2344
for (int32_t i = 0; i < methodDataArray.size(); ++i)
2345
for (int32_t j = i+1; j < methodDataArray.size(); ++j)
2346
TR_ASSERT((methodDataArray[i] == NULL && methodDataArray[j] == NULL) || (methodDataArray[i] != methodDataArray[j]),
2347
"methodDataArray elements %d and %d are equal\n", i, j);
2348
*/
2349
2350
TR::Block * block = NULL;
2351
for(TR::TreeTop * tt = self()->comp()->getStartTree(); tt; tt = tt->getNextTreeTop())
2352
{
2353
// Write pending pushes, parms, and locals to vmthread's OSR buffer
2354
2355
TR::Node* n = tt->getNode();
2356
if (n->getOpCodeValue() == TR::BBStart)
2357
{
2358
block = n->getBlock();
2359
continue;
2360
}
2361
if (n->getOpCodeValue() == TR::treetop && n->getNumChildren() == 1)
2362
n = n->getFirstChild();
2363
else
2364
continue;
2365
if (n->getOpCodeValue() != TR::call ||
2366
n->getSymbolReference()->getReferenceNumber() != TR_prepareForOSR)
2367
continue;
2368
2369
TR::Node *callNode = n;
2370
TR_OSRMethodData* osrMethodData = methodDataArray[callNode->getChild(1)->getInt()+1];
2371
TR_ASSERT(osrMethodData != NULL && osrMethodData->getOSRCodeBlock() != NULL,
2372
"osr method data or its block is NULL\n");
2373
2374
if (traceOSR)
2375
traceMsg(self()->comp(), "Lowering trees in OSR block_%d...\n", block->getNumber());
2376
2377
//osrFrameIndex is a field in the vmThread that is initialized by the VM to the offset
2378
//of the start of the first (deepest) frame in the OSR buffer
2379
//Once we are done with generating code that populates the current frame, we generate code
2380
//that advances this field to the point to the next frame at the end of the OSR code block
2381
//of each frame
2382
TR::ResolvedMethodSymbol *methSym = osrMethodData->getMethodSymbol();
2383
TR::Node *osrBufferNode = TR::Node::createLoad(callNode, self()->symRefTab()->findOrCreateOSRBufferSymbolRef());
2384
TR::Node *osrFrameIndex = TR::Node::createLoad(callNode, self()->symRefTab()->findOrCreateOSRFrameIndexSymbolRef());
2385
TR::Node *osrScratchBufferNode = TR::Node::createLoad(callNode, self()->symRefTab()->findOrCreateOSRScratchBufferSymbolRef());
2386
2387
TR::TreeTop* insertionPoint = tt->getPrevRealTreeTop();
2388
bool inlinesAnyMethod = osrMethodData->inlinesAnyMethod();
2389
2390
if (traceOSR)
2391
traceMsg(self()->comp(), "callerIndex %d: max pending push slots=%d, # of auto slots=%d, # of arg slots=%d\n",
2392
osrMethodData->getInlinedSiteIndex(), methSym->getNumPPSlots(),
2393
methSym->getResolvedMethod()->numberOfTemps(), methSym->getNumParameterSlots());
2394
2395
uint32_t numOfSymsThatShareSlot = 0;
2396
int32_t scratchBufferOffset = 0;
2397
for (int32_t child = firstSymChildIndex; child+2 < callNode->getNumChildren(); child += 3)
2398
{
2399
TR::Node* loadNode = callNode->getChild(child);
2400
int32_t symRefNumber = callNode->getChild(child+1)->getInt();
2401
int32_t symRefOrder = callNode->getChild(child+2)->getInt();
2402
TR::SymbolReference* symRef = self()->symRefTab()->getSymRef(symRefNumber);
2403
2404
int32_t specialCasedSlotIndex = -1;
2405
//if (methSym->getSyncObjectTemp() == symRef)
2406
if (symRef->getSymbol()->holdsMonitoredObject())
2407
specialCasedSlotIndex = methSym->getSyncObjectTempIndex();
2408
//else if (methSym->getThisTempForObjectCtor() == symRef)
2409
else if (symRef->getSymbol()->isThisTempForObjectCtor())
2410
specialCasedSlotIndex = methSym->getThisTempForObjectCtorIndex();
2411
//else if (methSym->getATCDeferredCountTemp() == symRef)
2412
2413
int32_t slotIndex = symRef->getCPIndex() >= methSym->getFirstJitTempIndex()
2414
? methSym->getFirstJitTempIndex()
2415
: symRef->getCPIndex();
2416
2417
if (symRef->getCPIndex() >= methSym->getFirstJitTempIndex())
2418
{
2419
TR_ASSERT(((slotIndex == methSym->getSyncObjectTempIndex()) ||
2420
(slotIndex == methSym->getThisTempForObjectCtorIndex())), "Unknown temp sym ref being written to the OSR buffer; probably needs special casing\n");
2421
}
2422
2423
if (specialCasedSlotIndex != -1)
2424
slotIndex = specialCasedSlotIndex;
2425
2426
int32_t symSize = symRef->getSymbol()->getSize();
2427
bool sharedSlot = (symRefOrder != -1);
2428
if (sharedSlot)
2429
{
2430
insertionPoint = self()->genSymRefStoreToArray(callNode, osrScratchBufferNode, NULL, loadNode, scratchBufferOffset, insertionPoint);
2431
osrMethodData->addScratchBufferOffset(slotIndex, symRefOrder, scratchBufferOffset);
2432
scratchBufferOffset += symSize;
2433
numOfSymsThatShareSlot++;
2434
}
2435
else
2436
{
2437
TR::DataType dt = symRef->getSymbol()->getDataType();
2438
bool takesTwoSlots = dt == TR::Int64 || dt == TR::Double;
2439
int32_t offset = osrMethodData->slotIndex2OSRBufferIndex(slotIndex, symSize, takesTwoSlots);
2440
insertionPoint = self()->genSymRefStoreToArray(callNode, osrBufferNode, osrFrameIndex, loadNode, offset, insertionPoint);
2441
}
2442
}
2443
2444
/*
2445
* dead slots are bookkept together with shared slots under involuntary OSR
2446
* increase this number to indicate the existence of entries for dead slots
2447
*/
2448
if (osrMethodData->hasSlotSharingOrDeadSlotsInfo() && numOfSymsThatShareSlot == 0)
2449
numOfSymsThatShareSlot++;
2450
2451
osrMethodData->setNumOfSymsThatShareSlot(numOfSymsThatShareSlot);
2452
maxScratchBufferSize = (maxScratchBufferSize > scratchBufferOffset) ? maxScratchBufferSize : scratchBufferOffset;
2453
2454
if (traceOSR)
2455
{
2456
traceMsg(self()->comp(), "%s %s %s: written out bytes in OSR buffer\n",
2457
osrMethodData->getInlinedSiteIndex() == -1 ? "Method," : "Inlined method,",
2458
inlinesAnyMethod? "inlines another method,": "doesn't inline any method,",
2459
methSym->signature(self()->trMemory()));
2460
}
2461
int32_t totalNumOfSlots = osrMethodData->getTotalNumOfSlots();
2462
//The OSR helper call will print the contents of the OSR buffer (if trace option is on)
2463
//and populate the OSR buffer with the correct values of the shared slots (if there is any)
2464
bool emitCall = false;
2465
2466
if ((numOfSymsThatShareSlot > 0) ||
2467
self()->comp()->getOption(TR_EnablePrepareForOSREvenIfThatDoesNothing))
2468
emitCall = true;
2469
2470
int32_t startIndex = 0;
2471
if (emitCall)
2472
startIndex = firstSymChildIndex;
2473
2474
for (int32_t i = startIndex; i < callNode->getNumChildren(); i++)
2475
callNode->getChild(i)->recursivelyDecReferenceCount();
2476
2477
if (emitCall)
2478
{
2479
callNode->setNumChildren(firstSymChildIndex+1);
2480
TR_ASSERT(totalNumOfSlots < (1 << 16) - 1, "only 16 bits are reserved for number of slots");
2481
TR_ASSERT(numOfSymsThatShareSlot < (1 << 16) -1, "only 16 bits are reserved for number of syms that share slots");
2482
callNode->setAndIncChild(firstSymChildIndex,
2483
TR::Node::create(callNode, TR::iconst, 0, totalNumOfSlots | (numOfSymsThatShareSlot << 16)));
2484
insertionPoint = tt;
2485
}
2486
else
2487
{
2488
TR::TreeTop *prev = tt->getPrevTreeTop();
2489
TR::TreeTop *next = tt->getNextTreeTop();
2490
prev->join(next);
2491
insertionPoint = prev;
2492
}
2493
2494
//at the end of each OSR code block, we need to advance osrFrameIndex such that
2495
//it points to the beginning of the next osr frame
2496
//osrFrameIndex += osrMethodData->getTotalDataSize();
2497
TR::TreeTop* osrFrameIndexAdvanceTreeTop = TR::TreeTop::create(self()->comp(),
2498
TR::Node::createStore(self()->symRefTab()->findOrCreateOSRFrameIndexSymbolRef(),
2499
TR::Node::create(TR::iadd, 2, osrFrameIndex,
2500
TR::Node::create(callNode, TR::iconst, 0, osrMethodData->getTotalDataSize())
2501
)
2502
)
2503
);
2504
insertionPoint->insertTreeTopsAfterMe(osrFrameIndexAdvanceTreeTop);
2505
}
2506
2507
2508
for (int32_t i = 0; i < methodDataArray.size(); i++)
2509
{
2510
TR_OSRMethodData* osrMethodData = methodDataArray[i];
2511
//osrMethodData can be NULL when the inlined method didn't cause a call to ILGen (e.g., a jni method)
2512
if (methodDataArray[i] == NULL)
2513
continue;
2514
//Initialize the number of syms that share slots to zero if it's hasn't been already initialized.
2515
if (osrMethodData->getNumOfSymsThatShareSlot() == -1)
2516
{
2517
osrMethodData->setNumOfSymsThatShareSlot(0);
2518
}
2519
}
2520
2521
self()->comp()->getOSRCompilationData()->setMaxScratchBufferSize(maxScratchBufferSize);
2522
}
2523
2524
static void addValidationRecords(TR::CodeGenerator *cg)
2525
{
2526
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->comp()->fe());
2527
2528
TR::list<TR::AOTClassInfo*>* classInfo = cg->comp()->_aotClassInfo;
2529
if (!classInfo->empty())
2530
{
2531
for (auto info = classInfo->begin(); info != classInfo->end(); ++info)
2532
{
2533
traceMsg(cg->comp(), "processing AOT class info: %p in %s\n", *info, cg->comp()->signature());
2534
traceMsg(cg->comp(), "ramMethod: %p cp: %p cpIndex: %x relo %d\n", (*info)->_method, (*info)->_constantPool, (*info)->_cpIndex, (*info)->_reloKind);
2535
traceMsg(cg->comp(), "clazz: %p classChain: %p\n", (*info)->_clazz, (*info)->_classChain);
2536
2537
TR_OpaqueMethodBlock *ramMethod = (*info)->_method;
2538
2539
int32_t siteIndex = -1;
2540
2541
if (ramMethod != cg->comp()->getCurrentMethod()->getPersistentIdentifier()) // && info->_reloKind != TR_ValidateArbitraryClass)
2542
{
2543
int32_t i;
2544
for (i = 0; i < cg->comp()->getNumInlinedCallSites(); i++)
2545
{
2546
TR_InlinedCallSite &ics = cg->comp()->getInlinedCallSite(i);
2547
TR_OpaqueMethodBlock *inlinedMethod = fej9->getInlinedCallSiteMethod(&ics);
2548
2549
traceMsg(cg->comp(), "\tinline site %d inlined method %p\n", i, inlinedMethod);
2550
if (ramMethod == inlinedMethod)
2551
{
2552
traceMsg(cg->comp(), "\t\tmatch!\n");
2553
siteIndex = i;
2554
break;
2555
}
2556
}
2557
2558
if (i >= (int32_t) cg->comp()->getNumInlinedCallSites())
2559
{
2560
// this assumption isn't associated with a method directly in the compilation
2561
// so we can't use a constant pool approach to validate: transform into TR_ValidateArbitraryClass
2562
// kind of overkill for TR_ValidateStaticField, but still correct
2563
(*info)->_reloKind = TR_ValidateArbitraryClass;
2564
siteIndex = -1; // invalidate main compiled method
2565
traceMsg(cg->comp(), "\ttransformed into TR_ValidateArbitraryClass\n");
2566
}
2567
}
2568
2569
traceMsg(cg->comp(), "Found inlined site %d\n", siteIndex);
2570
2571
TR_ASSERT(siteIndex < (int32_t) cg->comp()->getNumInlinedCallSites(), "did not find AOTClassInfo %p method in inlined site table", *info);
2572
2573
cg->addExternalRelocation(new (cg->trHeapMemory()) TR::ExternalRelocation(NULL,
2574
(uint8_t *)(intptr_t)siteIndex,
2575
(uint8_t *)(*info),
2576
(*info)->_reloKind, cg),
2577
__FILE__, __LINE__, NULL);
2578
}
2579
}
2580
}
2581
2582
static void addSVMValidationRecords(TR::CodeGenerator *cg)
2583
{
2584
TR::SymbolValidationManager::SymbolValidationRecordList &validationRecords = cg->comp()->getSymbolValidationManager()->getValidationRecordList();
2585
if (cg->comp()->getOption(TR_UseSymbolValidationManager))
2586
{
2587
// Add the flags in TR_AOTMethodHeader on the compile run
2588
TR_AOTMethodHeader *aotMethodHeaderEntry = cg->comp()->getAotMethodHeaderEntry();
2589
aotMethodHeaderEntry->flags |= TR_AOTMethodHeader_UsesSymbolValidationManager;
2590
2591
for (auto it = validationRecords.begin(); it != validationRecords.end(); it++)
2592
{
2593
cg->addExternalRelocation(new (cg->trHeapMemory()) TR::ExternalRelocation(NULL,
2594
(uint8_t *)(*it),
2595
(*it)->_kind, cg),
2596
__FILE__, __LINE__, NULL);
2597
}
2598
}
2599
}
2600
2601
static TR_ExternalRelocationTargetKind getReloKindFromGuardSite(TR::CodeGenerator *cg, TR_AOTGuardSite *site)
2602
{
2603
TR_ExternalRelocationTargetKind type;
2604
2605
switch (site->getType())
2606
{
2607
case TR_DirectMethodGuard:
2608
if (site->getGuard()->getSymbolReference()->getSymbol()->getMethodSymbol()->isStatic())
2609
type = TR_InlinedStaticMethodWithNopGuard;
2610
else if (site->getGuard()->getSymbolReference()->getSymbol()->getMethodSymbol()->isSpecial())
2611
type = TR_InlinedSpecialMethodWithNopGuard;
2612
else if (site->getGuard()->getSymbolReference()->getSymbol()->getMethodSymbol()->isVirtual())
2613
type = TR_InlinedVirtualMethodWithNopGuard;
2614
else
2615
TR_ASSERT(0, "unexpected AOTDirectMethodGuard method symbol");
2616
break;
2617
2618
case TR_NonoverriddenGuard:
2619
type = TR_InlinedVirtualMethodWithNopGuard;
2620
break;
2621
case TR_RemovedNonoverriddenGuard:
2622
type = TR_InlinedVirtualMethod;
2623
break;
2624
2625
case TR_InterfaceGuard:
2626
type = TR_InlinedInterfaceMethodWithNopGuard;
2627
break;
2628
case TR_RemovedInterfaceGuard:
2629
traceMsg(cg->comp(), "TR_RemovedInterfaceMethod\n");
2630
type = TR_InlinedInterfaceMethod;
2631
break;
2632
2633
case TR_AbstractGuard:
2634
type = TR_InlinedAbstractMethodWithNopGuard;
2635
break;
2636
2637
case TR_HCRGuard:
2638
// devinmp: TODO/FIXME this should arrange to create an AOT
2639
// relocation which, when loaded, creates a
2640
// TR_PatchNOPedGuardSiteOnClassRedefinition or similar.
2641
// Here we would previously create a TR_HCR relocation,
2642
// which is for replacing J9Class or J9Method pointers.
2643
// These would be the 'unresolved' variant
2644
// (TR_RedefinedClassUPicSite), which would (hopefully) never
2645
// get patched. If it were patched, it seems like it would
2646
// replace code with a J9Method pointer.
2647
if (!cg->comp()->getOption(TR_UseOldHCRGuardAOTRelocations))
2648
type = TR_NoRelocation;
2649
else
2650
type = TR_HCR;
2651
break;
2652
2653
case TR_MethodEnterExitGuard:
2654
if (site->getGuard()->getCallNode()->getOpCodeValue() == TR::MethodEnterHook)
2655
type = TR_CheckMethodEnter;
2656
else if (site->getGuard()->getCallNode()->getOpCodeValue() == TR::MethodExitHook)
2657
type = TR_CheckMethodExit;
2658
else
2659
TR_ASSERT(0,"Unexpected TR_MethodEnterExitGuard at site %p guard %p node %p\n",
2660
site, site->getGuard(), site->getGuard()->getCallNode());
2661
break;
2662
2663
case TR_RemovedProfiledGuard:
2664
traceMsg(cg->comp(), "TR_ProfiledInlinedMethodRelocation\n");
2665
type = TR_ProfiledInlinedMethodRelocation;
2666
break;
2667
2668
case TR_ProfiledGuard:
2669
if (site->getGuard()->getTestType() == TR_MethodTest)
2670
{
2671
type = TR_ProfiledMethodGuardRelocation;
2672
traceMsg(cg->comp(), "TR_ProfiledMethodGuardRelocation\n");
2673
}
2674
else if (site->getGuard()->getTestType() == TR_VftTest)
2675
{
2676
type = TR_ProfiledClassGuardRelocation;
2677
traceMsg(cg->comp(), "TR_ProfiledClassGuardRelocation\n");
2678
}
2679
else
2680
TR_ASSERT(false, "unexpected profiled guard test type");
2681
break;
2682
2683
case TR_BreakpointGuard:
2684
traceMsg(cg->comp(), "TR_Breakpoint\n");
2685
type = TR_Breakpoint;
2686
break;
2687
2688
default:
2689
TR_ASSERT(false, "got a unknown/non-AOT guard at AOT site");
2690
cg->comp()->failCompilation<J9::AOTRelocationRecordGenerationFailure>("Unknown/non-AOT guard at AOT site");
2691
break;
2692
}
2693
2694
return type;
2695
}
2696
2697
static void processAOTGuardSites(TR::CodeGenerator *cg, uint32_t inlinedCallSize, TR_InlinedSiteHastTableEntry *orderedInlinedSiteListTable)
2698
{
2699
TR::list<TR_AOTGuardSite*> *aotGuardSites = cg->comp()->getAOTGuardPatchSites();
2700
for(auto it = aotGuardSites->begin(); it != aotGuardSites->end(); ++it)
2701
{
2702
// first, figure out the appropriate relocation record type from the guard type and symbol
2703
TR_ExternalRelocationTargetKind type = getReloKindFromGuardSite(cg, (*it));
2704
2705
switch (type) // relocation record type
2706
{
2707
case TR_InlinedStaticMethodWithNopGuard:
2708
case TR_InlinedSpecialMethodWithNopGuard:
2709
case TR_InlinedVirtualMethodWithNopGuard:
2710
case TR_InlinedInterfaceMethodWithNopGuard:
2711
case TR_InlinedAbstractMethodWithNopGuard:
2712
case TR_ProfiledClassGuardRelocation:
2713
case TR_ProfiledMethodGuardRelocation:
2714
case TR_ProfiledInlinedMethodRelocation:
2715
case TR_InlinedVirtualMethod:
2716
case TR_InlinedInterfaceMethod:
2717
{
2718
TR_ASSERT(inlinedCallSize, "TR_AOT expect inlinedCallSize to be larger than 0\n");
2719
intptr_t inlinedSiteIndex = (intptr_t)(*it)->getGuard()->getCurrentInlinedSiteIndex();
2720
TR_InlinedSiteLinkedListEntry *entry = (TR_InlinedSiteLinkedListEntry *)cg->comp()->trMemory()->allocateMemory(sizeof(TR_InlinedSiteLinkedListEntry), heapAlloc);
2721
2722
entry->reloType = type;
2723
entry->location = (uint8_t *)(*it)->getLocation();
2724
entry->destination = (uint8_t *)(*it)->getDestination();
2725
entry->guard = (uint8_t *)(*it)->getGuard();
2726
entry->next = NULL;
2727
2728
if (orderedInlinedSiteListTable[inlinedSiteIndex].first)
2729
{
2730
orderedInlinedSiteListTable[inlinedSiteIndex].last->next = entry;
2731
orderedInlinedSiteListTable[inlinedSiteIndex].last = entry;
2732
}
2733
else
2734
{
2735
orderedInlinedSiteListTable[inlinedSiteIndex].first = entry;
2736
orderedInlinedSiteListTable[inlinedSiteIndex].last = entry;
2737
}
2738
}
2739
break;
2740
2741
case TR_CheckMethodEnter:
2742
case TR_CheckMethodExit:
2743
case TR_HCR:
2744
cg->addExternalRelocation(new (cg->trHeapMemory()) TR::ExternalRelocation((uint8_t *)(*it)->getLocation(),
2745
(uint8_t *)(*it)->getDestination(),
2746
type, cg),
2747
__FILE__, __LINE__, NULL);
2748
break;
2749
2750
case TR_Breakpoint:
2751
cg->addExternalRelocation(new (cg->trHeapMemory()) TR::ExternalRelocation((uint8_t *)(*it)->getLocation(),
2752
(uint8_t *)(intptr_t)(*it)->getGuard()->getCurrentInlinedSiteIndex(),
2753
(uint8_t *)(*it)->getDestination(),
2754
type, cg),
2755
__FILE__, __LINE__, NULL);
2756
break;
2757
2758
case TR_NoRelocation:
2759
break;
2760
2761
default:
2762
TR_ASSERT(false, "got a unknown/non-AOT guard at AOT site");
2763
cg->comp()->failCompilation<J9::AOTRelocationRecordGenerationFailure>("Unknown/non-AOT guard at AOT site");
2764
break;
2765
}
2766
}
2767
}
2768
2769
static void addInlinedSiteRelocation(TR::CodeGenerator *cg,
2770
TR_ExternalRelocationTargetKind reloType,
2771
uint8_t *reloLocation,
2772
int32_t inlinedSiteIndex,
2773
TR::SymbolReference *callSymref,
2774
TR_OpaqueClassBlock *receiver,
2775
uint8_t *destinationAddress)
2776
{
2777
TR_ASSERT_FATAL(reloType != TR_NoRelocation, "TR_NoRelocation specified as reloType for inlinedSiteIndex=%d, reloLocation=%p, callSymref=%p, receiver=%p",
2778
inlinedSiteIndex, reloLocation, callSymref, receiver);
2779
2780
TR_RelocationRecordInformation *info = new (cg->comp()->trHeapMemory()) TR_RelocationRecordInformation();
2781
info->data1 = static_cast<uintptr_t>(inlinedSiteIndex);
2782
info->data2 = reinterpret_cast<uintptr_t>(callSymref);
2783
info->data3 = reinterpret_cast<uintptr_t>(receiver);
2784
info->data4 = reinterpret_cast<uintptr_t>(destinationAddress);
2785
2786
cg->addExternalRelocation(new (cg->trHeapMemory()) TR::ExternalRelocation(reloLocation, (uint8_t *)info, reloType, cg),
2787
__FILE__,__LINE__, NULL);
2788
}
2789
2790
static void addInliningTableRelocations(TR::CodeGenerator *cg, uint32_t inlinedCallSize, TR_InlinedSiteHastTableEntry *orderedInlinedSiteListTable)
2791
{
2792
// If have inlined calls, now add the relocation records in descending order
2793
// of inlined site index (at relocation time, the order is reverse)
2794
if (inlinedCallSize > 0)
2795
{
2796
for (int32_t counter = inlinedCallSize - 1; counter >= 0 ; counter--)
2797
{
2798
TR_InlinedSiteLinkedListEntry *currentSite = orderedInlinedSiteListTable[counter].first;
2799
2800
if (currentSite)
2801
{
2802
do
2803
{
2804
TR_VirtualGuard *guard = reinterpret_cast<TR_VirtualGuard *>(currentSite->guard);
2805
2806
addInlinedSiteRelocation(cg, currentSite->reloType, currentSite->location, counter, guard->getSymbolReference(), guard->getThisClass(), currentSite->destination);
2807
2808
currentSite = currentSite->next;
2809
}
2810
while(currentSite);
2811
}
2812
else
2813
{
2814
TR_AOTMethodInfo *methodInfo = cg->comp()->getInlinedAOTMethodInfo(counter);
2815
2816
addInlinedSiteRelocation(cg, methodInfo->reloKind, NULL, counter, methodInfo->callSymRef, methodInfo->receiver, NULL);
2817
}
2818
}
2819
}
2820
}
2821
2822
void
2823
J9::CodeGenerator::processRelocations()
2824
{
2825
// Project neutral non-AOT processRelocation
2826
// This should be done first to ensure that the
2827
// external relocations are generated after the
2828
// code is in its final form.
2829
OMR::CodeGeneratorConnector::processRelocations();
2830
2831
if (self()->comp()->compileRelocatableCode())
2832
{
2833
uint32_t inlinedCallSize = self()->comp()->getNumInlinedCallSites();
2834
2835
// Create temporary hashtable for ordering AOT guard relocations
2836
TR_InlinedSiteHastTableEntry *orderedInlinedSiteListTable = NULL;
2837
if (inlinedCallSize > 0)
2838
{
2839
orderedInlinedSiteListTable= (TR_InlinedSiteHastTableEntry*)self()->comp()->trMemory()->allocateMemory(sizeof(TR_InlinedSiteHastTableEntry) * inlinedCallSize, heapAlloc);
2840
memset(orderedInlinedSiteListTable, 0, sizeof(TR_InlinedSiteHastTableEntry)*inlinedCallSize);
2841
}
2842
2843
// Traverse list of AOT-specific guards and create relocation records
2844
processAOTGuardSites(self(), inlinedCallSize, orderedInlinedSiteListTable);
2845
2846
// Add non-SVM validation records
2847
addValidationRecords(self());
2848
2849
// If have inlined calls, now add the relocation records in descending order of inlined site index (at relocation time, the order is reverse)
2850
addInliningTableRelocations(self(), inlinedCallSize, orderedInlinedSiteListTable);
2851
}
2852
2853
#if defined(J9VM_OPT_JITSERVER)
2854
if (self()->comp()->compileRelocatableCode() || self()->comp()->isOutOfProcessCompilation())
2855
#else
2856
if (self()->comp()->compileRelocatableCode())
2857
#endif /* defined(J9VM_OPT_JITSERVER) */
2858
{
2859
// Add SVM validation records
2860
addSVMValidationRecords(self());
2861
2862
// Now call the platform specific processing of relocations
2863
self()->getAheadOfTimeCompile()->processRelocations();
2864
}
2865
2866
// Traverse the AOT/external labels
2867
for (auto aotIterator = self()->getExternalRelocationList().begin(); aotIterator != self()->getExternalRelocationList().end(); ++aotIterator)
2868
{
2869
(*aotIterator)->apply(self());
2870
}
2871
}
2872
2873
#if defined(J9VM_OPT_JITSERVER)
2874
void J9::CodeGenerator::addExternalRelocation(TR::Relocation *r, const char *generatingFileName, uintptr_t generatingLineNumber, TR::Node *node, TR::ExternalRelocationPositionRequest where)
2875
{
2876
TR_ASSERT(generatingFileName, "External relocation location has improper NULL filename specified");
2877
if (self()->comp()->compileRelocatableCode() || self()->comp()->isOutOfProcessCompilation())
2878
{
2879
TR::RelocationDebugInfo *genData = new(self()->trHeapMemory()) TR::RelocationDebugInfo;
2880
genData->file = generatingFileName;
2881
genData->line = generatingLineNumber;
2882
genData->node = node;
2883
self()->addExternalRelocation(r, genData, where);
2884
}
2885
}
2886
2887
void J9::CodeGenerator::addExternalRelocation(TR::Relocation *r, TR::RelocationDebugInfo* info, TR::ExternalRelocationPositionRequest where)
2888
{
2889
if (self()->comp()->compileRelocatableCode() || self()->comp()->isOutOfProcessCompilation())
2890
{
2891
TR_ASSERT(info, "External relocation location does not have associated debug information");
2892
r->setDebugInfo(info);
2893
switch (where)
2894
{
2895
case TR::ExternalRelocationAtFront:
2896
_externalRelocationList.push_front(r);
2897
break;
2898
2899
case TR::ExternalRelocationAtBack:
2900
_externalRelocationList.push_back(r);
2901
break;
2902
2903
default:
2904
TR_ASSERT_FATAL(
2905
false,
2906
"invalid TR::ExternalRelocationPositionRequest %d",
2907
where);
2908
break;
2909
}
2910
}
2911
}
2912
#endif /* defined(J9VM_OPT_JITSERVER) */
2913
2914
void J9::CodeGenerator::addProjectSpecializedRelocation(uint8_t *location, uint8_t *target, uint8_t *target2,
2915
TR_ExternalRelocationTargetKind kind, char *generatingFileName, uintptr_t generatingLineNumber, TR::Node *node)
2916
{
2917
(target2 == NULL) ?
2918
self()->addExternalRelocation(new (self()->trHeapMemory()) TR::ExternalRelocation(location, target, kind, self()),
2919
generatingFileName, generatingLineNumber, node) :
2920
self()->addExternalRelocation(new (self()->trHeapMemory()) TR::ExternalRelocation(location, target, target2, kind, self()),
2921
generatingFileName, generatingLineNumber, node);
2922
}
2923
2924
void J9::CodeGenerator::addProjectSpecializedRelocation(TR::Instruction *instr, uint8_t *target, uint8_t *target2,
2925
TR_ExternalRelocationTargetKind kind, char *generatingFileName, uintptr_t generatingLineNumber, TR::Node *node)
2926
{
2927
(target2 == NULL) ?
2928
self()->addExternalRelocation(new (self()->trHeapMemory()) TR::BeforeBinaryEncodingExternalRelocation(instr, target, kind, self()),
2929
generatingFileName, generatingLineNumber, node) :
2930
self()->addExternalRelocation(new (self()->trHeapMemory()) TR::BeforeBinaryEncodingExternalRelocation(instr, target, target2, kind, self()),
2931
generatingFileName, generatingLineNumber, node);
2932
}
2933
2934
void J9::CodeGenerator::addProjectSpecializedPairRelocation(uint8_t *location, uint8_t *location2, uint8_t *target,
2935
TR_ExternalRelocationTargetKind kind, char *generatingFileName, uintptr_t generatingLineNumber, TR::Node *node)
2936
{
2937
self()->addExternalRelocation(new (self()->trHeapMemory()) TR::ExternalOrderedPair32BitRelocation(location, location2, target, kind, self()),
2938
generatingFileName, generatingLineNumber, node);
2939
}
2940
2941
2942
TR::Node *
2943
J9::CodeGenerator::createOrFindClonedNode(TR::Node *node, int32_t numChildren)
2944
{
2945
TR_HashId index;
2946
if (!_uncommonedNodes.locate(node->getGlobalIndex(), index))
2947
{
2948
// has not been uncommoned already, clone and store for later
2949
TR::Node *clone = TR::Node::copy(node, numChildren);
2950
_uncommonedNodes.add(node->getGlobalIndex(), index, clone);
2951
node = clone;
2952
}
2953
else
2954
{
2955
// found previously cloned node
2956
node = (TR::Node *) _uncommonedNodes.getData(index);
2957
}
2958
return node;
2959
}
2960
2961
2962
void
2963
J9::CodeGenerator::jitAddUnresolvedAddressMaterializationToPatchOnClassRedefinition(void *firstInstruction)
2964
{
2965
TR_J9VMBase *fej9 = (TR_J9VMBase *)(self()->fe());
2966
#if defined(J9VM_OPT_JITSERVER)
2967
if (self()->comp()->compileRelocatableCode() || self()->comp()->isOutOfProcessCompilation())
2968
#else
2969
if (self()->comp()->compileRelocatableCode())
2970
#endif /* defined(J9VM_OPT_JITSERVER) */
2971
{
2972
self()->addExternalRelocation(new (self()->trHeapMemory()) TR::ExternalRelocation((uint8_t *)firstInstruction, 0, TR_HCR, self()),
2973
__FILE__,__LINE__, NULL);
2974
}
2975
else
2976
{
2977
createClassRedefinitionPicSite((void*)-1, firstInstruction, 1 /* see OMR::RuntimeAssumption::isForAddressMaterializationSequence */, true, self()->comp()->getMetadataAssumptionList());
2978
self()->comp()->setHasClassRedefinitionAssumptions();
2979
}
2980
}
2981
2982
2983
// J9
2984
//
2985
void
2986
J9::CodeGenerator::compressedReferenceRematerialization()
2987
{
2988
TR::TreeTop * tt;
2989
TR::Node *node;
2990
TR_J9VMBase *fej9 = (TR_J9VMBase *)(self()->fe());
2991
2992
static bool disableRematforCP = feGetEnv("TR_DisableWrtBarOpt") != NULL;
2993
2994
// The compressedrefs remat opt removes decompression/compression sequences from
2995
// loads/stores where there doesn't exist a gc point between the load and the store,
2996
// and the load doesn't need to be dereferenced.
2997
// The opt needs to be disabled for the following cases:
2998
// 1. In Guarded Storage, we can't not do a guarded load because the object that is loaded may
2999
// not be in the root set, and as a consequence, may get moved.
3000
// 2. For read barriers in field watch, the vmhelpers are GC points and therefore the object might be moved
3001
if (TR::Compiler->om.readBarrierType() != gc_modron_readbar_none || self()->comp()->getOption(TR_EnableFieldWatch))
3002
{
3003
if (TR::Compiler->om.readBarrierType() != gc_modron_readbar_none)
3004
traceMsg(self()->comp(), "The compressedrefs remat opt is disabled because Concurrent Scavenger is enabled\n");
3005
if (self()->comp()->getOption(TR_EnableFieldWatch))
3006
traceMsg(self()->comp(), "The compressedrefs remat opt is disabled because field watch is enabled\n");
3007
disableRematforCP = true;
3008
}
3009
3010
// no need to rematerialize for lowMemHeap
3011
if (self()->comp()->useCompressedPointers() &&
3012
(TR::Compiler->om.compressedReferenceShift() != 0) &&
3013
!disableRematforCP)
3014
{
3015
if (self()->comp()->getOption(TR_TraceCG))
3016
self()->comp()->dumpMethodTrees("Trees before this remat phase", self()->comp()->getMethodSymbol());
3017
3018
List<TR::Node> rematerializedNodes(self()->trMemory());
3019
vcount_t visitCount = self()->comp()->incVisitCount();
3020
TR::SymbolReference *autoSymRef = NULL;
3021
for (tt = self()->comp()->getStartTree(); tt; tt = tt->getNextTreeTop())
3022
{
3023
node = tt->getNode();
3024
if (node->getOpCodeValue() == TR::BBStart && !node->getBlock()->isExtensionOfPreviousBlock())
3025
{
3026
3027
ListIterator<TR::Node> nodesIt(&rematerializedNodes);
3028
for (TR::Node * rematNode = nodesIt.getFirst(); rematNode != NULL; rematNode = nodesIt.getNext())
3029
{
3030
if (rematNode->getReferenceCount() == 0)
3031
rematNode->getFirstChild()->recursivelyDecReferenceCount();
3032
}
3033
3034
rematerializedNodes.deleteAll();
3035
}
3036
3037
bool alreadyVisitedFirstChild = false;
3038
if ((node->getOpCodeValue() == TR::compressedRefs) &&
3039
(node->getFirstChild()->getOpCodeValue() == TR::l2a))
3040
{
3041
if (node->getFirstChild()->getVisitCount() == visitCount)
3042
alreadyVisitedFirstChild = true;
3043
}
3044
3045
self()->rematerializeCompressedRefs(autoSymRef, tt, NULL, -1, node, visitCount, &rematerializedNodes);
3046
3047
if ((node->getOpCodeValue() == TR::compressedRefs) &&
3048
(node->getFirstChild()->getOpCodeValue() == TR::l2a))
3049
{
3050
TR::TreeTop *prevTree = tt->getPrevTreeTop();
3051
TR::TreeTop *nextTree = tt->getNextTreeTop();
3052
if (node->getFirstChild()->getReferenceCount() > 1)
3053
{
3054
if (!alreadyVisitedFirstChild)
3055
{
3056
if (!rematerializedNodes.find(node->getFirstChild()))
3057
{
3058
////traceMsg(comp(), "Adding %p\n", node->getFirstChild());
3059
rematerializedNodes.add(node->getFirstChild());
3060
}
3061
node->getFirstChild()->setVisitCount(visitCount-1);
3062
}
3063
3064
3065
if (rematerializedNodes.find(node->getFirstChild()))
3066
{
3067
TR::Node *cursorNode = node->getFirstChild()->getFirstChild();
3068
while (cursorNode &&
3069
(cursorNode->getOpCodeValue() != TR::iu2l))
3070
cursorNode = cursorNode->getFirstChild();
3071
3072
TR::Node *ttNode = TR::Node::create(TR::treetop, 1, cursorNode);
3073
3074
///traceMsg(comp(), "5 ttNode %p\n", ttNode);
3075
TR::TreeTop *treeTop = TR::TreeTop::create(self()->comp(), ttNode);
3076
TR::TreeTop *prevTreeTop = tt->getPrevTreeTop();
3077
prevTreeTop->join(treeTop);
3078
treeTop->join(tt);
3079
prevTree = treeTop;
3080
}
3081
}
3082
3083
node->getFirstChild()->recursivelyDecReferenceCount();
3084
node->getSecondChild()->recursivelyDecReferenceCount();
3085
prevTree->join(nextTree);
3086
}
3087
3088
if (node->canGCandReturn())
3089
{
3090
ListIterator<TR::Node> nodesIt(&rematerializedNodes);
3091
for (TR::Node * rematNode = nodesIt.getFirst(); rematNode != NULL; rematNode = nodesIt.getNext())
3092
{
3093
if (rematNode->getVisitCount() != visitCount)
3094
{
3095
rematNode->setVisitCount(visitCount);
3096
TR::Node *ttNode = TR::Node::create(TR::treetop, 1, rematNode);
3097
3098
///traceMsg(comp(), "5 ttNode %p\n", ttNode);
3099
TR::TreeTop *treeTop = TR::TreeTop::create(self()->comp(), ttNode);
3100
TR::TreeTop *prevTree = tt->getPrevTreeTop();
3101
prevTree->join(treeTop);
3102
treeTop->join(tt);
3103
}
3104
}
3105
rematerializedNodes.deleteAll();
3106
}
3107
}
3108
if (self()->comp()->getOption(TR_TraceCG))
3109
self()->comp()->dumpMethodTrees("Trees after this remat phase", self()->comp()->getMethodSymbol());
3110
3111
if (self()->shouldYankCompressedRefs())
3112
{
3113
visitCount = self()->comp()->incVisitCount();
3114
vcount_t secondVisitCount = self()->comp()->incVisitCount();
3115
TR::TreeTop *nextTree = NULL;
3116
for (tt = self()->comp()->getStartTree(); tt; tt = nextTree)
3117
{
3118
node = tt->getNode();
3119
nextTree = tt->getNextTreeTop();
3120
self()->yankCompressedRefs(tt, NULL, -1, node, visitCount, secondVisitCount);
3121
}
3122
3123
if (self()->comp()->getOption(TR_TraceCG))
3124
self()->comp()->dumpMethodTrees("Trees after this yank phase", self()->comp()->getMethodSymbol());
3125
}
3126
}
3127
3128
if (self()->comp()->useCompressedPointers() &&
3129
!disableRematforCP)
3130
{
3131
for (tt = self()->comp()->getStartTree(); tt; tt = tt->getNextTreeTop())
3132
{
3133
node = tt->getNode();
3134
3135
if ((node->getOpCodeValue() == TR::compressedRefs) &&
3136
(node->getFirstChild()->getOpCodeValue() == TR::l2a))
3137
{
3138
TR::TreeTop *prevTree = tt->getPrevTreeTop();
3139
TR::TreeTop *nextTree = tt->getNextTreeTop();
3140
3141
if (nextTree->getNode()->getOpCode().isNullCheck())
3142
{
3143
TR::Node *firstChild = nextTree->getNode()->getFirstChild();
3144
TR::Node *reference = NULL;
3145
if (firstChild->getOpCodeValue() == TR::l2a)
3146
{
3147
TR::ILOpCodes loadOp = self()->comp()->il.opCodeForIndirectLoad(TR::Int32);
3148
while (firstChild->getOpCodeValue() != loadOp)
3149
firstChild = firstChild->getFirstChild();
3150
reference = firstChild->getFirstChild();
3151
}
3152
else
3153
reference = nextTree->getNode()->getNullCheckReference();
3154
3155
if (reference == node->getFirstChild())
3156
{
3157
node->getFirstChild()->recursivelyDecReferenceCount();
3158
node->getSecondChild()->recursivelyDecReferenceCount();
3159
prevTree->join(nextTree);
3160
}
3161
}
3162
}
3163
}
3164
}
3165
3166
}
3167
3168
3169
3170
void
3171
J9::CodeGenerator::rematerializeCompressedRefs(
3172
TR::SymbolReference * & autoSymRef,
3173
TR::TreeTop *tt,
3174
TR::Node *parent,
3175
int32_t childNum,
3176
TR::Node *node,
3177
vcount_t visitCount,
3178
List<TR::Node> *rematerializedNodes)
3179
{
3180
if (node->getVisitCount() == visitCount)
3181
return;
3182
3183
node->setVisitCount(visitCount);
3184
3185
bool alreadyVisitedNullCheckReference = false;
3186
bool alreadyVisitedReferenceInNullTest = false;
3187
bool alreadyVisitedReferenceInStore = false;
3188
3189
TR::Node *reference = NULL;
3190
TR::Node *address = NULL;
3191
3192
if (node->getOpCode().isNullCheck())
3193
{
3194
// check for either
3195
// a) HB!=0
3196
// l2a
3197
// ladd (compressionSequence)
3198
// b) HB=0, shifted offsets
3199
// l2a
3200
// lshl
3201
//
3202
if ((node->getFirstChild()->getOpCodeValue() == TR::l2a) &&
3203
(((node->getFirstChild()->getFirstChild()->getOpCodeValue() == TR::ladd) &&
3204
node->getFirstChild()->getFirstChild()->containsCompressionSequence()) ||
3205
(node->getFirstChild()->getFirstChild()->getOpCodeValue() == TR::lshl)))
3206
{
3207
TR::ILOpCodes loadOp = self()->comp()->il.opCodeForIndirectLoad(TR::Int32);
3208
TR::Node *n = node->getFirstChild();
3209
while (n->getOpCodeValue() != loadOp)
3210
n = n->getFirstChild();
3211
reference = n->getFirstChild();
3212
}
3213
else
3214
{
3215
reference = node->getNullCheckReference();
3216
}
3217
3218
if (reference->getVisitCount() == visitCount)
3219
alreadyVisitedNullCheckReference = true;
3220
}
3221
3222
if ((node->getOpCodeValue() == TR::ifacmpeq) ||
3223
(node->getOpCodeValue() == TR::ifacmpne))
3224
{
3225
TR::Node *cmpValue = node->getFirstChild();
3226
if (cmpValue->getVisitCount() == visitCount)
3227
alreadyVisitedReferenceInNullTest = true;
3228
}
3229
3230
if (node->getOpCode().isStoreIndirect())
3231
{
3232
// check for either
3233
// a) HB!=0
3234
// l2a
3235
// lsub (compressionSequence)
3236
// b) HB=0, shifted offsets
3237
// l2a
3238
// lshr
3239
//
3240
bool isCompressed = false;
3241
if ((node->getSecondChild()->getOpCodeValue() == TR::l2i) &&
3242
(((node->getSecondChild()->getFirstChild()->getOpCodeValue() == TR::lsub) ||
3243
(node->getSecondChild()->getFirstChild()->getOpCodeValue() == TR::lushr)) &&
3244
node->getSecondChild()->getFirstChild()->containsCompressionSequence()))
3245
{
3246
TR::Node *n = node->getSecondChild()->getFirstChild();
3247
while (n->getOpCodeValue() != TR::a2l)
3248
n = n->getFirstChild();
3249
address = n->getFirstChild();
3250
isCompressed = true;
3251
}
3252
3253
if (address && (address->getVisitCount() == visitCount))
3254
alreadyVisitedReferenceInStore = true;
3255
3256
// check for loads that have occurred before this store
3257
// if so, anchor the load right before the store
3258
//
3259
self()->anchorRematNodesIfNeeded(node, tt, rematerializedNodes);
3260
}
3261
else if ((node->getOpCodeValue() == TR::arraycopy) || (node->getOpCodeValue() == TR::arrayset))
3262
{
3263
self()->anchorRematNodesIfNeeded(node, tt, rematerializedNodes);
3264
}
3265
3266
if (node->getOpCodeValue() == TR::l2a)
3267
{
3268
rematerializedNodes->remove(node);
3269
}
3270
3271
if ((node->getOpCodeValue() == TR::l2a) &&
3272
((node->getFirstChild()->getOpCodeValue() == TR::ladd &&
3273
node->getFirstChild()->containsCompressionSequence()) ||
3274
((node->getFirstChild()->getOpCodeValue() == TR::lshl) &&
3275
self()->isAddressScaleIndexSupported((1 << TR::Compiler->om.compressedReferenceShiftOffset())))))
3276
{
3277
if (parent &&
3278
(node->getReferenceCount() > 1) &&
3279
((parent->getOpCode().isStoreIndirect() && (childNum == 0)) ||
3280
parent->getOpCode().isLoadVar() ||
3281
(self()->getSupportsConstantOffsetInAddressing() && parent->getOpCode().isArrayRef() &&
3282
(self()->canFoldLargeOffsetInAddressing() || parent->getSecondChild()->getOpCode().isLoadConst()))) &&
3283
performTransformation(self()->comp(), "%sRematerializing node %p(%s) in decompression sequence\n", OPT_DETAILS, node, node->getOpCode().getName()))
3284
{
3285
if ((node->getReferenceCount() > 1) &&
3286
!rematerializedNodes->find(node))
3287
{
3288
rematerializedNodes->add(node);
3289
}
3290
3291
TR::Node *dupNode= NULL;//
3292
TR::Node *cursorNode = node;
3293
TR::Node *cursorParent = parent;
3294
int32_t cursorChildNum = childNum;
3295
while (cursorNode &&
3296
((cursorNode->getOpCodeValue() != TR::iu2l) ||
3297
(cursorNode->getFirstChild()->getOpCodeValue() != TR::iloadi)))
3298
{
3299
TR::Node *copyCursorNode = TR::Node::copy(cursorNode);
3300
copyCursorNode->setReferenceCount(0);
3301
if (cursorNode == node)
3302
dupNode = copyCursorNode;
3303
3304
for (int32_t j = 0; j < cursorNode->getNumChildren(); ++j)
3305
{
3306
TR::Node *cursorChild = cursorNode->getChild(j);
3307
copyCursorNode->setAndIncChild(j, cursorChild);
3308
}
3309
3310
cursorParent->setAndIncChild(cursorChildNum, copyCursorNode);
3311
cursorNode->decReferenceCount();
3312
3313
cursorParent = cursorNode;
3314
cursorChildNum = 0;
3315
cursorNode = cursorNode->getFirstChild();
3316
}
3317
3318
node->setVisitCount(visitCount-1);
3319
dupNode->setVisitCount(visitCount);
3320
node = dupNode;
3321
}
3322
else
3323
{
3324
if (node->getReferenceCount() > 1)
3325
{
3326
// on x86, prevent remat of the l2a again thereby allowing
3327
// nodes to use the result of the add already done
3328
//
3329
if (!self()->canFoldLargeOffsetInAddressing())
3330
{
3331
if (!rematerializedNodes->find(node))
3332
rematerializedNodes->add(node);
3333
node->setVisitCount(visitCount-1);
3334
}
3335
}
3336
else
3337
rematerializedNodes->remove(node);
3338
3339
if (parent &&
3340
((parent->getOpCode().isArrayRef() &&
3341
!self()->canFoldLargeOffsetInAddressing() &&
3342
!parent->getSecondChild()->getOpCode().isLoadConst()) ||
3343
!self()->getSupportsConstantOffsetInAddressing()) &&
3344
performTransformation(self()->comp(), "%sYanking %p(%s) in decompression sequence\n", OPT_DETAILS, node, node->getOpCode().getName()))
3345
{
3346
if ((node->getOpCodeValue() == TR::l2a) &&
3347
(node->getFirstChild()->getOpCodeValue() == TR::ladd))
3348
{
3349
TR::TreeTop *cursorTree = tt;
3350
while (cursorTree)
3351
{
3352
bool addTree = false;
3353
TR::Node *cursorNode = cursorTree->getNode();
3354
if (cursorNode->getOpCodeValue() == TR::NULLCHK)
3355
{
3356
TR::Node *nullchkRef = cursorNode->getNullCheckReference();
3357
if ((nullchkRef->getOpCodeValue() == TR::l2a) &&
3358
(nullchkRef->getFirstChild() == node->getFirstChild()->getFirstChild()))
3359
{
3360
addTree = true;
3361
}
3362
}
3363
3364
if (!addTree && (cursorNode->getOpCodeValue() == TR::treetop) &&
3365
(cursorNode->getFirstChild() == node->getFirstChild()->getFirstChild()))
3366
{
3367
addTree = true;
3368
}
3369
3370
if (addTree)
3371
{
3372
TR::Node *ttNode = TR::Node::create(TR::treetop, 1, node);
3373
3374
if (self()->comp()->getOption(TR_TraceCG))
3375
traceMsg(self()->comp(), "Placing treetop %p (to hide delay) after tree %p for l2a %p\n", ttNode, cursorNode, node);
3376
3377
TR::TreeTop *treeTop = TR::TreeTop::create(self()->comp(), ttNode);
3378
TR::TreeTop *nextTT = cursorTree->getNextTreeTop();
3379
cursorTree->join(treeTop);
3380
treeTop->join(nextTT);
3381
break;
3382
}
3383
else
3384
{
3385
if ((cursorNode->getOpCodeValue() == TR::BBStart) &&
3386
(!cursorNode->getBlock()->isExtensionOfPreviousBlock()))
3387
break;
3388
}
3389
3390
cursorTree = cursorTree->getPrevTreeTop();
3391
}
3392
}
3393
}
3394
}
3395
}
3396
3397
for (int32_t i = 0; i < node->getNumChildren(); ++i)
3398
{
3399
TR::Node *child = node->getChild(i);
3400
self()->rematerializeCompressedRefs(autoSymRef, tt, node, i, child, visitCount, rematerializedNodes);
3401
}
3402
3403
static bool disableBranchlessPassThroughNULLCHK = feGetEnv("TR_disableBranchlessPassThroughNULLCHK") != NULL;
3404
if (node->getOpCode().isNullCheck() && reference &&
3405
(self()->performsChecksExplicitly() || (disableBranchlessPassThroughNULLCHK && node->getFirstChild()->getOpCodeValue() == TR::PassThrough)) &&
3406
((node->getFirstChild()->getOpCodeValue() == TR::l2a) ||
3407
(reference->getOpCodeValue() == TR::l2a)) &&
3408
performTransformation(self()->comp(), "%sTransforming null check reference %p in null check node %p to be checked explicitly\n", OPT_DETAILS, reference, node))
3409
{
3410
if (node->getFirstChild()->getOpCodeValue() != TR::PassThrough)
3411
{
3412
TR::Node *immChild = node->getFirstChild();
3413
TR::Node *ttNode = NULL;
3414
bool addedToList = false;
3415
if (node->getOpCode().isResolveCheck())
3416
{
3417
ttNode = TR::Node::createWithSymRef(TR::ResolveCHK, 1, 1, immChild, node->getSymbolReference());
3418
TR::Node::recreate(node, TR::NULLCHK);
3419
}
3420
else
3421
{
3422
if (immChild->getOpCodeValue() == TR::l2a)
3423
{
3424
if ((immChild->getReferenceCount() > 1) &&
3425
!rematerializedNodes->find(immChild))
3426
{
3427
rematerializedNodes->add(immChild);
3428
addedToList = true;
3429
}
3430
3431
immChild->setVisitCount(visitCount-1);
3432
TR::Node *anchorNode = TR::Node::create(TR::treetop, 1, immChild->getFirstChild()->getFirstChild());
3433
TR::TreeTop *anchorTree = TR::TreeTop::create(self()->comp(), anchorNode);
3434
immChild->getFirstChild()->getFirstChild()->setVisitCount(visitCount-1);
3435
TR::TreeTop *nextTT = tt->getNextTreeTop();
3436
tt->join(anchorTree);
3437
anchorTree->join(nextTT);
3438
3439
TR::Node *n = immChild->getFirstChild();
3440
{
3441
while ((n != reference) &&
3442
(n->getOpCodeValue() != TR::l2a))
3443
{
3444
n->setVisitCount(visitCount-1);
3445
n = n->getFirstChild();
3446
}
3447
}
3448
}
3449
else
3450
ttNode = TR::Node::create(TR::treetop, 1, immChild);
3451
}
3452
3453
if (ttNode)
3454
{
3455
TR::TreeTop *treeTop = TR::TreeTop::create(self()->comp(), ttNode);
3456
immChild->setVisitCount(visitCount-1);
3457
TR::TreeTop *nextTT = tt->getNextTreeTop();
3458
tt->join(treeTop);
3459
treeTop->join(nextTT);
3460
}
3461
3462
TR::Node *passThroughNode = TR::Node::create(TR::PassThrough, 1, reference);
3463
passThroughNode->setVisitCount(visitCount);
3464
node->setAndIncChild(0, passThroughNode);
3465
if (ttNode || !addedToList)
3466
immChild->recursivelyDecReferenceCount();
3467
else
3468
immChild->decReferenceCount();
3469
}
3470
3471
if ((reference->getOpCodeValue() == TR::l2a) &&
3472
(!alreadyVisitedNullCheckReference || (reference->getReferenceCount() == 1)) &&
3473
(((reference->getFirstChild()->getOpCodeValue() == TR::ladd) &&
3474
reference->getFirstChild()->containsCompressionSequence()) ||
3475
reference->getFirstChild()->getOpCodeValue() == TR::lshl) &&
3476
performTransformation(self()->comp(), "%sStrength reducing null check reference %p in null check node %p \n", OPT_DETAILS, reference, node))
3477
{
3478
bool addedToList = false;
3479
if (node->getFirstChild()->getOpCodeValue() == TR::PassThrough)
3480
{
3481
if ((reference->getReferenceCount() > 1) &&
3482
!rematerializedNodes->find(reference))
3483
{
3484
rematerializedNodes->add(reference);
3485
addedToList = true;
3486
}
3487
3488
TR::Node *passThroughNode = node->getFirstChild();
3489
TR::Node *grandChild = reference->getFirstChild()->getFirstChild();
3490
TR::Node *l2aNode = TR::Node::create(TR::l2a, 1, grandChild);
3491
if (reference->isNonNull())
3492
l2aNode->setIsNonNull(true);
3493
else if (reference->isNull())
3494
l2aNode->setIsNull(true);
3495
passThroughNode->setAndIncChild(0, l2aNode);
3496
if (addedToList)
3497
reference->decReferenceCount();
3498
else
3499
reference->recursivelyDecReferenceCount();
3500
reference->setVisitCount(visitCount-1);
3501
}
3502
}
3503
}
3504
3505
if ((node->getOpCodeValue() == TR::ifacmpeq) ||
3506
(node->getOpCodeValue() == TR::ifacmpne))
3507
{
3508
TR::Node *reference = node->getFirstChild();
3509
TR::Node *secondChild = node->getSecondChild();
3510
3511
if ((reference->getOpCodeValue() == TR::l2a) &&
3512
(!alreadyVisitedReferenceInNullTest || (reference->getReferenceCount() == 1)) &&
3513
(((reference->getFirstChild()->getOpCodeValue() == TR::ladd) &&
3514
reference->getFirstChild()->containsCompressionSequence())||
3515
reference->getFirstChild()->getOpCodeValue() == TR::lshl))
3516
{
3517
if ((secondChild->getOpCodeValue() == TR::aconst) &&
3518
(secondChild->getAddress() == 0) &&
3519
performTransformation(self()->comp(), "%sTransforming reference %p in null comparison node %p \n", OPT_DETAILS, reference, node))
3520
{
3521
bool addedToList = false;
3522
if ((reference->getReferenceCount() > 1) &&
3523
!rematerializedNodes->find(reference))
3524
{
3525
rematerializedNodes->add(reference);
3526
addedToList = true;
3527
}
3528
3529
TR::Node *compressedValue = reference->getFirstChild()->getFirstChild();
3530
TR::Node *l2aNode = TR::Node::create(TR::l2a, 1, compressedValue);
3531
if (reference->isNonNull())
3532
l2aNode->setIsNonNull(true);
3533
else if (reference->isNull())
3534
l2aNode->setIsNull(true);
3535
3536
node->setAndIncChild(0, l2aNode);
3537
if (addedToList)
3538
reference->decReferenceCount();
3539
else
3540
reference->recursivelyDecReferenceCount();
3541
reference->setVisitCount(visitCount-1);
3542
}
3543
}
3544
}
3545
3546
if (address && node->getOpCode().isStoreIndirect())
3547
{
3548
if (address->getOpCodeValue() == TR::l2a && (address->getReferenceCount() == 1 || !alreadyVisitedReferenceInStore) &&
3549
((address->getFirstChild()->getOpCodeValue() == TR::ladd && address->getFirstChild()->containsCompressionSequence()) ||
3550
address->getFirstChild()->getOpCodeValue() == TR::lshl))
3551
{
3552
// Check for write barriers that we can skip and which are not underneath an ArrayStoreCHK. In these cases we are safe
3553
// to optimize the write barrier to a simple store, thus avoiding the need to compress / uncompress the pointer.
3554
if (node->getOpCode().isWrtBar() && node->skipWrtBar())
3555
{
3556
// This check is overly conservative to ensure functional correctness.
3557
bool isPossiblyUnderArrayStoreCheck = tt->getNode()->getOpCodeValue() == TR::ArrayStoreCHK || (node->getReferenceCount() > 1 && !tt->getNode()->getOpCode().isResolveCheck());
3558
3559
if (!isPossiblyUnderArrayStoreCheck && performTransformation(self()->comp(), "%sStoring compressed pointer [%p] directly into %p in tree %p\n", OPT_DETAILS, address, node, tt->getNode()))
3560
{
3561
bool addedToList = false;
3562
if ((address->getReferenceCount() > 1) && !rematerializedNodes->find(address))
3563
{
3564
rematerializedNodes->add(address);
3565
addedToList = true;
3566
}
3567
3568
TR::Node *l2iNode = NULL;
3569
TR::ILOpCodes loadOp = self()->comp()->il.opCodeForIndirectLoad(TR::Int32);
3570
TR::Node *n = address;
3571
while (n->getOpCodeValue() != loadOp)
3572
n = n->getFirstChild();
3573
l2iNode = n;
3574
3575
if (node->getOpCode().isWrtBar())
3576
{
3577
int32_t lastChildNum = node->getNumChildren()-1;
3578
node->getChild(lastChildNum)->recursivelyDecReferenceCount();
3579
node->setNumChildren(lastChildNum);
3580
}
3581
3582
TR::Node::recreate(node, self()->comp()->il.opCodeForIndirectStore(TR::Int32));
3583
3584
TR::Node *immChild = node->getSecondChild();
3585
node->setAndIncChild(1, l2iNode);
3586
3587
address->incReferenceCount();
3588
immChild->recursivelyDecReferenceCount();
3589
3590
if (addedToList)
3591
address->decReferenceCount();
3592
else
3593
address->recursivelyDecReferenceCount();
3594
3595
address->setVisitCount(visitCount-1);
3596
}
3597
}
3598
}
3599
}
3600
}
3601
3602
3603
void
3604
J9::CodeGenerator::yankCompressedRefs(
3605
TR::TreeTop *tt,
3606
TR::Node *parent,
3607
int32_t childNum,
3608
TR::Node *node,
3609
vcount_t visitCount,
3610
vcount_t secondVisitCount)
3611
{
3612
if (node->getVisitCount() >= visitCount)
3613
return;
3614
3615
node->setVisitCount(visitCount);
3616
3617
for (int32_t i = 0; i < node->getNumChildren(); ++i)
3618
{
3619
TR::Node *child = node->getChild(i);
3620
self()->yankCompressedRefs(tt, node, i, child, visitCount, secondVisitCount);
3621
}
3622
3623
if (parent &&
3624
(parent->getOpCodeValue() == TR::treetop) &&
3625
(node->getOpCodeValue() == TR::l2a) &&
3626
(node->getFirstChild()->getOpCodeValue() == TR::ladd &&
3627
node->getFirstChild()->containsCompressionSequence()))
3628
{
3629
3630
//printf("Looking at node %p in %s\n", node, comp()->signature()); fflush(stdout);
3631
TR::TreeTop *firstTree = tt;
3632
TR::TreeTop *lastTree = tt;
3633
bool nullCheckTree = false;
3634
bool exprNeedsChecking = true;
3635
if ((node->getFirstChild()->getFirstChild()->getOpCodeValue() == TR::iu2l) &&
3636
(node->getFirstChild()->getFirstChild()->getFirstChild()->getOpCodeValue() == TR::iloadi) &&
3637
((node->getFirstChild()->getFirstChild()->getFirstChild()->getFirstChild()->getOpCode().isLoadVarDirect() &&
3638
node->getFirstChild()->getFirstChild()->getFirstChild()->getFirstChild()->getSymbolReference()->getSymbol()->isAutoOrParm()) ||
3639
(node->getFirstChild()->getFirstChild()->getFirstChild()->getFirstChild()->getOpCodeValue() == TR::aRegStore)))
3640
exprNeedsChecking = false;
3641
3642
TR::TreeTop *prevTree = tt->getPrevTreeTop();
3643
TR::Node *prevNode = prevTree->getNode();
3644
if (prevNode->getOpCodeValue() == TR::NULLCHK)
3645
{
3646
if (prevNode->getFirstChild()->getOpCodeValue() == TR::PassThrough)
3647
{
3648
TR::Node *reference = prevNode->getNullCheckReference();
3649
if ((reference == node) ||
3650
((reference->getOpCodeValue() == TR::l2a) &&
3651
(reference->getFirstChild() == node->getFirstChild()->getFirstChild())))
3652
{
3653
nullCheckTree = true;
3654
firstTree = prevTree;
3655
prevTree = prevTree->getPrevTreeTop();
3656
prevNode = prevTree->getNode();
3657
}
3658
}
3659
}
3660
3661
if ((prevNode->getOpCodeValue() == TR::treetop) &&
3662
(prevNode->getFirstChild() == node->getFirstChild()->getFirstChild()))
3663
firstTree = prevTree;
3664
else
3665
firstTree = tt;
3666
3667
if (firstTree != tt)
3668
{
3669
TR_BitVector symbolReferencesInNode(self()->comp()->getSymRefCount(), self()->comp()->trMemory(), stackAlloc);
3670
3671
////bool canYank = collectSymRefs(node, &symbolReferencesInNode, secondVisitCount);
3672
// since symRefs need to be collected for each treetop, we'll need a fresh visitCount
3673
// for every walk of a tree
3674
//
3675
bool canYank = self()->collectSymRefs(node, &symbolReferencesInNode, self()->comp()->incVisitCount());
3676
3677
TR_BitVector intersection(self()->comp()->getSymRefCount(), self()->comp()->trMemory(), stackAlloc);
3678
3679
//printf("canYank %d node %d in %s\n", canYank, node, comp()->signature()); fflush(stdout);
3680
3681
if (canYank)
3682
{
3683
TR::TreeTop *cursorTree = firstTree->getPrevTreeTop();
3684
int32_t numTrees = 0;
3685
while (cursorTree)
3686
{
3687
numTrees++;
3688
TR::Node *cursorNode = cursorTree->getNode();
3689
//printf("canYank %d node %p cursor %p in %s\n", canYank, node, cursorNode, comp()->signature()); fflush(stdout);
3690
TR::Node *childNode = NULL;
3691
if (cursorNode->getNumChildren() > 0)
3692
childNode = cursorNode->getFirstChild();
3693
3694
if (cursorNode && cursorNode->getOpCode().hasSymbolReference() &&
3695
(cursorNode->getOpCode().isStore() || cursorNode->getOpCode().isCall()))
3696
{
3697
if (symbolReferencesInNode.get(cursorNode->getSymbolReference()->getReferenceNumber()))
3698
{
3699
break;
3700
}
3701
3702
intersection.empty();
3703
cursorNode->getSymbolReference()->getUseDefAliases().getAliasesAndUnionWith(intersection);
3704
intersection &= symbolReferencesInNode;
3705
if (!intersection.isEmpty())
3706
{
3707
break;
3708
}
3709
}
3710
3711
if (childNode && childNode->getOpCode().hasSymbolReference())
3712
{
3713
if (childNode && childNode->getOpCode().hasSymbolReference() &&
3714
(childNode->getOpCode().isStore() || childNode->getOpCode().isCall()))
3715
{
3716
if (symbolReferencesInNode.get(childNode->getSymbolReference()->getReferenceNumber()))
3717
{
3718
break;
3719
}
3720
3721
intersection.empty();
3722
childNode->getSymbolReference()->getUseDefAliases().getAliasesAndUnionWith(intersection);
3723
intersection &= symbolReferencesInNode;
3724
if (!intersection.isEmpty())
3725
{
3726
break;
3727
}
3728
}
3729
}
3730
3731
if (nullCheckTree)
3732
{
3733
if (cursorNode->getOpCode().isStore())
3734
{
3735
if (cursorNode->getSymbol()->isStatic() ||
3736
cursorNode->getSymbol()->isShadow() ||
3737
!cursorNode->getSymbolReference()->getUseonlyAliases().isZero(self()->comp()))
3738
{
3739
break;
3740
}
3741
}
3742
}
3743
3744
if (cursorNode->exceptionsRaised())
3745
{
3746
if (nullCheckTree || exprNeedsChecking)
3747
break;
3748
}
3749
3750
if (cursorNode->getOpCodeValue() == TR::BBStart)
3751
{
3752
break;
3753
}
3754
3755
cursorTree = cursorTree->getPrevTreeTop();
3756
}
3757
3758
if (cursorTree != firstTree->getPrevTreeTop())
3759
{
3760
/////printf("Yanking l2a node %p past %d trees in %s\n", node, numTrees, comp()->signature()); fflush(stdout);
3761
TR::TreeTop *nextTree = cursorTree->getNextTreeTop();
3762
TR::TreeTop *prevTreeAtSrc = firstTree->getPrevTreeTop();
3763
TR::TreeTop *nextTreeAtSrc = lastTree->getNextTreeTop();
3764
prevTreeAtSrc->join(nextTreeAtSrc);
3765
cursorTree->join(firstTree);
3766
lastTree->join(nextTree);
3767
}
3768
}
3769
}
3770
}
3771
}
3772
3773
3774
void
3775
J9::CodeGenerator::anchorRematNodesIfNeeded(
3776
TR::Node *node,
3777
TR::TreeTop *tt,
3778
List<TR::Node> *rematerializedNodes)
3779
{
3780
TR::SymbolReference *symRef = node->getSymbolReference();
3781
TR::SparseBitVector aliases(self()->comp()->allocator());
3782
if (symRef->sharesSymbol())
3783
symRef->getUseDefAliases().getAliases(aliases);
3784
3785
ListIterator<TR::Node> nodesIt(rematerializedNodes);
3786
for (TR::Node * rematNode = nodesIt.getFirst(); rematNode != NULL; rematNode = nodesIt.getNext())
3787
{
3788
if (rematNode->getOpCodeValue() == TR::l2a)
3789
{
3790
TR::Node *load = rematNode->getFirstChild();
3791
while (load->getOpCodeValue() != TR::iu2l)
3792
load = load->getFirstChild();
3793
load = load->getFirstChild();
3794
if (load->getOpCode().isLoadIndirect() &&
3795
((load->getSymbolReference() == node->getSymbolReference()) ||
3796
(aliases.ValueAt(load->getSymbolReference()->getReferenceNumber()))))
3797
{
3798
rematerializedNodes->remove(rematNode);
3799
rematNode->setVisitCount(self()->comp()->getVisitCount());
3800
if (self()->comp()->getOption(TR_TraceCG))
3801
{
3802
if (node->getOpCode().isStoreIndirect())
3803
traceMsg(self()->comp(), "Found previous load %p same as store %p, anchoring load\n", load, node);
3804
else
3805
traceMsg(self()->comp(), "Found previous load %p aliases with node %p, anchoring load\n", load, node);
3806
}
3807
TR::Node *ttNode = TR::Node::create(TR::treetop, 1, rematNode);
3808
TR::TreeTop *treeTop = TR::TreeTop::create(self()->comp(), ttNode);
3809
TR::TreeTop *prevTree = tt->getPrevTreeTop();
3810
prevTree->join(treeTop);
3811
treeTop->join(tt);
3812
}
3813
}
3814
}
3815
}
3816
3817
3818
/**
3819
* Insert asyncCheck's before method returns. Without this, methods
3820
* with no loops or calls will never be sa/mpled, and will be stuck
3821
* forever at their initial opt-level. (Important for mpegaudio,
3822
* which has some large, warm methods with no loops or calls).
3823
*/
3824
void J9::CodeGenerator::insertEpilogueYieldPoints()
3825
{
3826
// Look for all returns, and insert async check before them
3827
TR::CFG * cfg = self()->comp()->getFlowGraph();
3828
3829
for (TR::TreeTop * treeTop = self()->comp()->getStartTree(); treeTop; treeTop = treeTop->getNextTreeTop())
3830
{
3831
3832
TR::Node * node = treeTop->getNode();
3833
TR::ILOpCodes opCode = node->getOpCodeValue();
3834
3835
if (opCode == TR::BBStart)
3836
{
3837
TR::Block * block = node->getBlock();
3838
3839
TR::TreeTop * tt1 = block->getLastRealTreeTop();
3840
TR::Node * node1 = tt1->getNode();
3841
3842
if (node1->getOpCode().isReturn())
3843
{
3844
TR::TreeTop *prevTT = tt1->getPrevTreeTop();
3845
if (node1->getNumChildren()>0)
3846
{
3847
//anchor the return value
3848
TR::Node *ttNode = TR::Node::create(TR::treetop, 1, node1->getFirstChild());
3849
TR::TreeTop *anchorTree = TR::TreeTop::create(self()->comp(), ttNode);
3850
prevTT->join(anchorTree);
3851
anchorTree->join(tt1);
3852
prevTT = anchorTree;
3853
}
3854
3855
TR::Node *asyncNode = TR::Node::createWithSymRef(node, TR::asynccheck, 0,
3856
self()->comp()->getSymRefTab()->findOrCreateAsyncCheckSymbolRef(self()->comp()->getMethodSymbol()));
3857
TR::TreeTop *asyncTree = TR::TreeTop::create(self()->comp(), asyncNode);
3858
prevTT->join(asyncTree);
3859
asyncTree->join(tt1);
3860
treeTop = tt1->getNextTreeTop();
3861
#if 0
3862
// Asynccheck's need to be at the beginning of blocks
3863
TR::Block * returnBlock = block->split(tt1, cfg);
3864
treeTop = tt1->getNextTreeTop();
3865
TR::Node *asyncNode = TR::Node::createWithSymRef(node, TR::asynccheck, 1, 0,
3866
comp()->getSymRefTab()->findOrCreateAsyncCheckSymbolRef(comp()->getMethodSymbol()));
3867
TR::TreeTop *asyncTree = TR::TreeTop::create(comp(), asyncNode);
3868
3869
returnBlock->prepend(asyncTree);
3870
#endif
3871
}
3872
}
3873
}
3874
}
3875
3876
3877
TR::TreeTop *
3878
J9::CodeGenerator::genSymRefStoreToArray(
3879
TR::Node* refNode,
3880
TR::Node* arrayAddressNode,
3881
TR::Node* firstOffset,
3882
TR::Node* loadNode,
3883
int32_t secondOffset,
3884
TR::TreeTop* insertionPoint)
3885
{
3886
TR::Node* offsetNode;
3887
3888
if (firstOffset)
3889
offsetNode = TR::Node::create(TR::iadd, 2,
3890
firstOffset,
3891
TR::Node::create(refNode, TR::iconst, 0, secondOffset));
3892
else
3893
offsetNode = TR::Node::create(refNode, TR::iconst, 0, secondOffset);
3894
3895
if (self()->comp()->target().is64Bit())
3896
{
3897
offsetNode = TR::Node::create(TR::i2l, 1, offsetNode);
3898
}
3899
3900
TR::Node* addrNode = TR::Node::create(self()->comp()->target().is64Bit()?TR::aladd:TR::aiadd,
3901
2, arrayAddressNode, offsetNode);
3902
TR::Node* storeNode =
3903
TR::Node::createWithSymRef(self()->comp()->il.opCodeForIndirectStore(loadNode->getDataType()), 2, 2,
3904
addrNode, loadNode,
3905
self()->symRefTab()->findOrCreateGenericIntShadowSymbolReference(0));
3906
TR::TreeTop* storeTreeTop = TR::TreeTop::create(self()->comp(), storeNode);
3907
insertionPoint->insertTreeTopsAfterMe(storeTreeTop);
3908
return storeTreeTop;
3909
}
3910
3911
3912
bool
3913
J9::CodeGenerator::collectSymRefs(
3914
TR::Node *node,
3915
TR_BitVector *symRefs,
3916
vcount_t visitCount)
3917
{
3918
if (node->getVisitCount() >= visitCount)
3919
return true;
3920
3921
node->setVisitCount(visitCount);
3922
3923
3924
if (node->getOpCode().hasSymbolReference())
3925
{
3926
if (node->getOpCode().isLoadVar())
3927
{
3928
TR::SymbolReference *symRef = node->getSymbolReference();
3929
symRef->getUseDefAliases().getAliasesAndUnionWith(*symRefs);
3930
3931
symRefs->set(symRef->getReferenceNumber());
3932
}
3933
else
3934
return false;
3935
}
3936
3937
for (int32_t i = 0; i < node->getNumChildren(); ++i)
3938
{
3939
TR::Node *child = node->getChild(i);
3940
if (!self()->collectSymRefs(child, symRefs, visitCount))
3941
return false;
3942
}
3943
3944
return true;
3945
}
3946
3947
bool
3948
J9::CodeGenerator::willGenerateNOPForVirtualGuard(TR::Node *node)
3949
{
3950
TR::Compilation *comp = self()->comp();
3951
3952
if (!(node->isNopableInlineGuard() || node->isHCRGuard() || node->isOSRGuard())
3953
|| !self()->getSupportsVirtualGuardNOPing())
3954
return false;
3955
3956
TR_VirtualGuard *virtualGuard = comp->findVirtualGuardInfo(node);
3957
3958
if (!((comp->performVirtualGuardNOPing() || node->isHCRGuard() || node->isOSRGuard() || self()->needClassAndMethodPointerRelocations()) &&
3959
comp->isVirtualGuardNOPingRequired(virtualGuard)) &&
3960
virtualGuard->canBeRemoved())
3961
return false;
3962
3963
if ( node->getOpCodeValue() != TR::ificmpne
3964
&& node->getOpCodeValue() != TR::ifacmpne
3965
&& node->getOpCodeValue() != TR::iflcmpne)
3966
{
3967
// not expecting reversed comparison
3968
// Raise an assume if the optimizer requested that this virtual guard must be NOPed
3969
//
3970
TR_ASSERT(virtualGuard->canBeRemoved(), "virtualGuardHelper: a non-removable virtual guard cannot be NOPed");
3971
3972
return false;
3973
}
3974
3975
return true;
3976
}
3977
3978
/** \brief
3979
* Following codegen phase walks the blocks in the CFG and checks for the virtual guard performing TR_MethodTest
3980
* and guarding an inlined interface call.
3981
*
3982
* \details
3983
* Virtual Guard performing TR_MethodTest would look like following.
3984
* n1n BBStart <block_X>
3985
* ...
3986
* n2n ifacmpne goto -> nXXn
3987
* n3n aloadi <offset of inlined method in VTable>
3988
* n4n aload <vft>
3989
* n5n aconst <J9Method of inlined method>
3990
* n6n BBEnd <block_X>
3991
* For virtual dispatch sequence, we know that this is the safe check but in case of interface call, classes implementing
3992
* that interface would have different size of VTable. This makes executing above check unsafe when VTable of the class of
3993
* the receiver object is smaller, effectively making reference in n3n to pointing to a garbage location which might lead
3994
* to a segmentation fault if the reference in not memory mapped or if bychance it contains J9Method pointer of same inlined
3995
* method then it will execute a code which should not be executed.
3996
* For this kind of Virtual guards which are not nop'd we need to add a range check to make sure the address we are going to
3997
* access is pointing to a valid location in VTable. There are mainly two ways we can add this range check test. First one is
3998
* during the conception of the virtual guard. There are many downsides of doing so especially when other optimizations which
3999
* can moved guards around (for example loop versioner, virtualguard head merger, etc) needs to make sure to move range check
4000
* test around as well. Other way is to scan for this type of guards after optimization is finished like here in CodeGen Phase
4001
* and add a range check test here.
4002
* At the end of this function, we would have following code around them method test.
4003
* BBStart <block_X>
4004
* ...
4005
* ifacmple goto nXXn
4006
* aloadi <offset of VTableHeader.size from J9Class*>
4007
* aload <vft>
4008
* aconst <Index of the inlined method in VTable of class of inlined method>
4009
* BBEnd <block_X>
4010
*
4011
* BBStart <block_Y>
4012
* ifacmpne goto -> nXXn
4013
* aloadi <offset of inlined method in VTable>
4014
* aload <vft>
4015
* aconst <J9Method of inlined method>
4016
* BBEnd <block_Y>
4017
*/
4018
void
4019
J9::CodeGenerator::fixUpProfiledInterfaceGuardTest()
4020
{
4021
TR::Compilation *comp = self()->comp();
4022
TR::CFG * cfg = comp->getFlowGraph();
4023
TR::NodeChecklist checklist(comp);
4024
for (TR::AllBlockIterator iter(cfg, comp); iter.currentBlock() != NULL; ++iter)
4025
{
4026
TR::Block *block = iter.currentBlock();
4027
TR::TreeTop *treeTop = block->getLastRealTreeTop();
4028
TR::Node *node = treeTop->getNode();
4029
if (node->getOpCode().isIf() && node->isTheVirtualGuardForAGuardedInlinedCall() && !checklist.contains(node))
4030
{
4031
TR_VirtualGuard *vg = comp->findVirtualGuardInfo(node);
4032
// Mainly we need to make sure that virtual guard which performs the TR_MethodTest and can be NOP'd are needed the range check.
4033
if (vg && vg->getTestType() == TR_MethodTest && !(self()->willGenerateNOPForVirtualGuard(node)))
4034
{
4035
TR::SymbolReference *callSymRef = vg->getSymbolReference();
4036
TR_ASSERT_FATAL(callSymRef != NULL, "Guard n%dn for the inlined call should have stored symbol reference for the call", node->getGlobalIndex());
4037
if (callSymRef->getSymbol()->castToMethodSymbol()->isInterface())
4038
{
4039
TR::DebugCounter::incStaticDebugCounter(comp, TR::DebugCounter::debugCounterName(comp, "profiledInterfaceTest/({%s}{%s})", comp->signature(), comp->getHotnessName(comp->getMethodHotness())));
4040
dumpOptDetails(comp, "Need to add a rangecheck before n%dn in block_%d\n",node->getGlobalIndex(), block->getNumber());
4041
4042
// We need a VFT Load of the receiver object to get the VTableHeader.size to check the range. As this operation is happening during codegen phase, only
4043
// known concrete way we can have this information is through aloadi child of the guard that has single child which is vft load of receiver object.
4044
// Now instead of accessing VFT load from the child of the aloadi, we could have treetop's the aloadi during inlining where we generate the virtual guard
4045
// to access information from the treetop. Because of the same reasons lined up behind adding range check test during codegen phase in the description of this function,
4046
// we would need to make changes in all optimizations moving Virtual Guard around to keep that treetop together before guard which will be very difficult to enforce.
4047
// Also as children of virtual guard is very self contained and atm it is very unlikely that other optimizations are going to find opportunity of manipulating them and
4048
// Because of the fact that it is very unlikely that we will have another aloadi node with same VTable offset of same receiver object, this child would not be commoned out
4049
// and have only single reference in this virtual guard therefore splitting of block will not store it to temp slot.
4050
// In rare case child of the virtual guard is manipulated then illegal memory reference load would hace occured before the Virtual Guard which
4051
// is already a bug as mentioned in the description of this function and it would be safer to fail compilation.
4052
TR::Node *vTableLoad = node->getFirstChild();
4053
if (!(vTableLoad->getOpCodeValue() == TR::aloadi && comp->getSymRefTab()->isVtableEntrySymbolRef(vTableLoad->getSymbolReference())))
4054
comp->failCompilation<TR::CompilationException>("Abort compilation as Virtual Guard has generated illegal memory reference");
4055
TR::Node *vTableSizeOfReceiver = NULL;
4056
TR::Node *rangeCheckTest = NULL;
4057
if (self()->comp()->target().is64Bit())
4058
{
4059
vTableSizeOfReceiver = TR::Node::createWithSymRef(TR::lloadi, 1, 1, vTableLoad->getFirstChild(),
4060
comp->getSymRefTab()->findOrCreateVtableEntrySymbolRef(comp->getMethodSymbol(),
4061
sizeof(J9Class)+ offsetof(J9VTableHeader, size)));
4062
rangeCheckTest = TR::Node::createif(TR::iflcmple, vTableSizeOfReceiver,
4063
TR::Node::lconst(node, (vTableLoad->getSymbolReference()->getOffset() - sizeof(J9Class) - sizeof(J9VTableHeader)) / sizeof(UDATA)) ,
4064
node->getBranchDestination());
4065
}
4066
else
4067
{
4068
vTableSizeOfReceiver = TR::Node::createWithSymRef(TR::iloadi, 1, 1, vTableLoad->getFirstChild(),
4069
comp->getSymRefTab()->findOrCreateVtableEntrySymbolRef(comp->getMethodSymbol(),
4070
sizeof(J9Class)+ offsetof(J9VTableHeader, size)));
4071
rangeCheckTest = TR::Node::createif(TR::ificmple, vTableSizeOfReceiver,
4072
TR::Node::iconst(node, (vTableLoad->getSymbolReference()->getOffset() - sizeof(J9Class) - sizeof(J9VTableHeader)) / sizeof(UDATA)) ,
4073
node->getBranchDestination());
4074
}
4075
TR::TreeTop *rangeTestTT = TR::TreeTop::create(comp, treeTop->getPrevTreeTop(), rangeCheckTest);
4076
TR::Block *newBlock = block->split(treeTop, cfg, false, false);
4077
cfg->addEdge(block, node->getBranchDestination()->getEnclosingBlock());
4078
newBlock->setIsExtensionOfPreviousBlock();
4079
if (node->getNumChildren() == 3)
4080
{
4081
TR::Node *currentBlockGlRegDeps = node->getChild(2);
4082
TR::Node *exitGlRegDeps = TR::Node::create(TR::GlRegDeps, currentBlockGlRegDeps->getNumChildren());
4083
for (int i = 0; i < currentBlockGlRegDeps->getNumChildren(); i++)
4084
{
4085
TR::Node *child = currentBlockGlRegDeps->getChild(i);
4086
exitGlRegDeps->setAndIncChild(i, child);
4087
}
4088
rangeCheckTest->addChildren(&exitGlRegDeps, 1);
4089
}
4090
// While walking all blocks in CFG, when we find the location to add the range check, it will split the original block and
4091
// We will have actual Virtual Guard in new block. As Block Iterator guarantees to visit all block in the CFG,
4092
// While going over the blocks, we will encounter same virtual guard in newly created block after split.
4093
// We need to make sure we are not examining already visited guard.
4094
// Add checked virtual guard node to NodeChecklist to make sure we check all the nodes only once.
4095
checklist.add(node);
4096
}
4097
}
4098
}
4099
}
4100
}
4101
4102
4103
4104
void
4105
J9::CodeGenerator::allocateLinkageRegisters()
4106
{
4107
if (self()->comp()->isGPUCompilation())
4108
return;
4109
4110
TR::Delimiter d(self()->comp(), self()->comp()->getOptions()->getAnyOption(TR_TraceOptDetails|TR_CountOptTransformations), "AllocateLinkageRegisters");
4111
4112
if (!self()->prepareForGRA())
4113
{
4114
dumpOptDetails(self()->comp(), " prepareForGRA failed -- giving up\n");
4115
return;
4116
}
4117
4118
TR::Block *firstBlock = self()->comp()->getStartBlock();
4119
const int32_t numParms = self()->comp()->getMethodSymbol()->getParameterList().getSize();
4120
4121
if (numParms == 0) return ;
4122
4123
TR_BitVector globalRegsWithRegLoad(self()->getNumberOfGlobalRegisters(), self()->comp()->trMemory(), stackAlloc); // indexed by global register number
4124
TR_BitVector killedParms(numParms, self()->comp()->trMemory(), stackAlloc); // indexed by parm->getOrdinal()
4125
TR::Node **regLoads = (TR::Node**)self()->trMemory()->allocateStackMemory(numParms*sizeof(regLoads[0])); // indexed by parm->getOrdinal() to give the RegLoad for a given parm
4126
memset(regLoads, 0, numParms*sizeof(regLoads[0]));
4127
4128
// If the first block is in a loop, then it can be reached by parm stores in other blocks.
4129
// Conservatively, don't use RegLoads for any parm that is stored anywhere in the method.
4130
//
4131
if (firstBlock->getPredecessors().size() > 1)
4132
{
4133
// Rather than put regStores in all predecessors, we give up.
4134
//
4135
dumpOptDetails(self()->comp(), " First basic block is in a loop -- giving up\n");
4136
return;
4137
}
4138
4139
// Initialize regLoads and usedGlobalRegs from the RegLoads already present on the BBStart node
4140
//
4141
TR::Node *bbStart = self()->comp()->getStartTree()->getNode();
4142
TR_ASSERT(bbStart->getOpCodeValue() == TR::BBStart, "assertion failure");
4143
TR::Node *oldRegDeps = (bbStart->getNumChildren() > 0)? bbStart->getFirstChild() : NULL;
4144
if (oldRegDeps)
4145
{
4146
TR_ASSERT(oldRegDeps->getOpCodeValue() == TR::GlRegDeps, "assertion failure");
4147
for (uint16_t i=0; i < oldRegDeps->getNumChildren(); i++)
4148
{
4149
TR::Node *regLoad = oldRegDeps->getChild(i);
4150
TR_ASSERT(regLoad->getSymbol() && regLoad->getSymbol()->isParm(), "First basic block can have only parms live on entry");
4151
dumpOptDetails(self()->comp(), " Parm %d has RegLoad %s\n", regLoad->getSymbol()->getParmSymbol()->getOrdinal(), self()->comp()->getDebug()->getName(regLoad));
4152
regLoads[regLoad->getSymbol()->getParmSymbol()->getOrdinal()] = regLoad;
4153
if (regLoad->getType().isInt64() && self()->comp()->target().is32Bit() && !self()->use64BitRegsOn32Bit())
4154
{
4155
globalRegsWithRegLoad.set(regLoad->getLowGlobalRegisterNumber());
4156
globalRegsWithRegLoad.set(regLoad->getHighGlobalRegisterNumber());
4157
}
4158
else
4159
{
4160
globalRegsWithRegLoad.set(regLoad->getGlobalRegisterNumber());
4161
}
4162
}
4163
}
4164
if (self()->comp()->getOption(TR_TraceOptDetails))
4165
{
4166
dumpOptDetails(self()->comp(), " Initial globalRegsWithRegLoad: ");
4167
self()->getDebug()->print(self()->comp()->getOptions()->getLogFile(), &globalRegsWithRegLoad);
4168
dumpOptDetails(self()->comp(), "\n");
4169
}
4170
4171
4172
// Recursively replace parm loads with regLoads; create new RegLoads as necessary
4173
//
4174
vcount_t visitCount = self()->comp()->incVisitCount();
4175
int32_t numRegLoadsAdded = 0;
4176
for(TR::TreeTop *tt = firstBlock->getFirstRealTreeTop(); tt; tt = tt->getNextTreeTop())
4177
{
4178
TR::Node *node = tt->getNode();
4179
if (node->getOpCodeValue() == TR::BBStart && !node->getBlock()->isExtensionOfPreviousBlock())
4180
break;
4181
numRegLoadsAdded += self()->changeParmLoadsToRegLoads(node, regLoads, &globalRegsWithRegLoad, killedParms, visitCount);
4182
if (node->getOpCode().isStoreDirect() && node->getSymbol()->isParm())
4183
{
4184
killedParms.set(node->getSymbol()->getParmSymbol()->getOrdinal());
4185
if (self()->comp()->getOption(TR_TraceOptDetails))
4186
{
4187
dumpOptDetails(self()->comp(), " Found store %s\n killedParms is now ", self()->comp()->getDebug()->getName(node));
4188
self()->getDebug()->print(self()->comp()->getOptions()->getLogFile(), &killedParms);
4189
dumpOptDetails(self()->comp(), "\n");
4190
}
4191
}
4192
}
4193
4194
// Make sure all RegLoads are present on the BBStart's regdeps
4195
//
4196
if (numRegLoadsAdded > 0)
4197
{
4198
uint16_t numOldRegDeps = oldRegDeps? oldRegDeps->getNumChildren() : 0;
4199
uint16_t numNewRegDeps = numOldRegDeps + numRegLoadsAdded;
4200
4201
// Create GlRegDeps
4202
//
4203
TR::Node *newRegDeps = TR::Node::create(bbStart, TR::GlRegDeps, numNewRegDeps);
4204
uint16_t childNum=0;
4205
4206
for (int32_t parmNum=0; parmNum < numParms; parmNum++)
4207
if (regLoads[parmNum])
4208
newRegDeps->setAndIncChild(childNum++, regLoads[parmNum]);
4209
4210
// Remove existing regdeps from oldRegDeps
4211
//
4212
for (childNum = 0; childNum < numOldRegDeps; childNum++)
4213
oldRegDeps->getChild(childNum)->decReferenceCount();
4214
4215
// Stick the new regDeps on bbStart
4216
//
4217
bbStart->setAndIncChild(0, newRegDeps);
4218
bbStart->setNumChildren(1);
4219
4220
dumpOptDetails(self()->comp(), " Created new GlRegDeps %s on BBStart %s\n",
4221
self()->comp()->getDebug()->getName(newRegDeps),
4222
self()->comp()->getDebug()->getName(bbStart));
4223
}
4224
}
4225
4226
4227
void
4228
J9::CodeGenerator::swapChildrenIfNeeded(TR::Node *store, char *optDetails)
4229
{
4230
TR::Node *valueChild = store->getValueChild();
4231
4232
// swap children to increase the chances of being able to use location "a" as an accumulator instead of needing a temp copy
4233
//
4234
// could also do this for another commutative operation -- like pdmul, but the advantage isn't as clear with multiply as the
4235
// the relative size of the operands and how the instruction is actually encoded are important factors too for determining the best operand ordering
4236
// reorder:
4237
// pdstore "a"
4238
// pdadd
4239
// x
4240
// pdload "a"
4241
// to:
4242
// pdstore "a"
4243
// pdadd
4244
// pdload "a"
4245
// x
4246
//
4247
if (valueChild->getOpCode().isCommutative() && (valueChild->getOpCode().isPackedAdd()))
4248
{
4249
if (valueChild->getFirstChild()->getOpCode().isLoadVar() &&
4250
valueChild->getSecondChild()->getOpCode().isLoadVar() &&
4251
valueChild->getFirstChild()->getSymbolReference() == valueChild->getSecondChild()->getSymbolReference())
4252
{
4253
// avoid continual swapping of this case
4254
// pdstore "a"
4255
// pdadd
4256
// pdload "a"
4257
// pdload "a"
4258
}
4259
else if (valueChild->getSecondChild()->getOpCode().isLoadVar() &&
4260
(valueChild->getSecondChild()->getSymbolReference() == store->getSymbolReference()) &&
4261
!self()->comp()->getOption(TR_DisableBCDArithChildOrdering) &&
4262
performTransformation(self()->comp(), "%s%s valueChild %s [%s] second child %s [%s] symRef matches store symRef (#%d) so swap children\n",
4263
optDetails, store->getOpCode().getName(),valueChild->getOpCode().getName(),
4264
valueChild->getName(self()->comp()->getDebug()), valueChild->getSecondChild()->getOpCode().getName(),valueChild->getSecondChild()->getName(self()->comp()->getDebug()),store->getSymbolReference()->getReferenceNumber()))
4265
{
4266
valueChild->swapChildren();
4267
}
4268
}
4269
}
4270
4271
4272
uint16_t
4273
J9::CodeGenerator::changeParmLoadsToRegLoads(TR::Node *node, TR::Node **regLoads, TR_BitVector *globalRegsWithRegLoad, TR_BitVector &killedParms, vcount_t visitCount)
4274
{
4275
if (node->getVisitCount() == visitCount)
4276
{
4277
return 0;
4278
}
4279
else
4280
node->setVisitCount(visitCount);
4281
4282
uint16_t numNewRegLoads = 0;
4283
4284
if (node->getOpCode().isLoadAddr() && node->getOpCode().hasSymbolReference() && node->getSymbol()->isParm())
4285
{
4286
killedParms.set(node->getSymbol()->getParmSymbol()->getOrdinal());
4287
if (self()->comp()->getOption(TR_TraceOptDetails))
4288
{
4289
dumpOptDetails(self()->comp(), " Found loadaddr %s\n killedParms is now ", self()->comp()->getDebug()->getName(node));
4290
self()->getDebug()->print(self()->comp()->getOptions()->getLogFile(), &killedParms);
4291
dumpOptDetails(self()->comp(), "\n");
4292
}
4293
}
4294
4295
if (node->getOpCode().isLoadVar() && node->getSymbol()->isParm())
4296
{
4297
TR::ParameterSymbol *parm = node->getSymbol()->getParmSymbol();
4298
int8_t lri = parm->getLinkageRegisterIndex();
4299
TR::ILOpCodes regLoadOp = self()->comp()->il.opCodeForRegisterLoad(parm->getDataType());
4300
4301
if (regLoads[parm->getOrdinal()] == NULL && lri != -1 && !killedParms.isSet(parm->getOrdinal()))
4302
{
4303
// Transmute this node into a regLoad
4304
4305
if ((node->getType().isInt64() && self()->comp()->target().is32Bit() && !self()->use64BitRegsOn32Bit()))
4306
{
4307
if (self()->getDisableLongGRA())
4308
{
4309
dumpOptDetails(self()->comp(), " GRA not supported for longs; leaving %s unchanged\n", self()->comp()->getDebug()->getName(node));
4310
}
4311
else
4312
{
4313
// Endianness affects how longs are passed
4314
//
4315
int8_t lowLRI, highLRI;
4316
if (self()->comp()->target().cpu.isBigEndian())
4317
{
4318
highLRI = lri;
4319
lowLRI = lri+1;
4320
}
4321
else
4322
{
4323
lowLRI = lri;
4324
highLRI = lri+1;
4325
}
4326
TR_GlobalRegisterNumber lowReg = self()->getLinkageGlobalRegisterNumber(lowLRI, node->getDataType());
4327
TR_GlobalRegisterNumber highReg = self()->getLinkageGlobalRegisterNumber(highLRI, node->getDataType());
4328
4329
if (lowReg != -1 && highReg != -1 && !globalRegsWithRegLoad->isSet(lowReg) && !globalRegsWithRegLoad->isSet(highReg)
4330
&& performTransformation(self()->comp(), "O^O LINKAGE REGISTER ALLOCATION: transforming %s into %s\n", self()->comp()->getDebug()->getName(node), self()->comp()->getDebug()->getName(regLoadOp)))
4331
{
4332
// Both halves are in regs, and both regs are available.
4333
// Transmute load into regload
4334
//
4335
if(parm->getDataType() == TR::Aggregate)
4336
{
4337
dumpOptDetails(self()->comp(), "\tNot doing transformation for parm %p because it is an aggregate.\n",node);
4338
}
4339
else
4340
{
4341
TR::Node::recreate(node, self()->comp()->il.opCodeForRegisterLoad(parm->getDataType()));
4342
node->setLowGlobalRegisterNumber(lowReg);
4343
node->setHighGlobalRegisterNumber(highReg);
4344
4345
// Update state to include the new regLoad
4346
//
4347
regLoads[parm->getOrdinal()] = node;
4348
globalRegsWithRegLoad->set(lowReg);
4349
globalRegsWithRegLoad->set(highReg);
4350
numNewRegLoads++;
4351
}
4352
}
4353
}
4354
}
4355
else if (self()->comp()->target().cpu.isZ() && self()->comp()->target().isLinux() && parm->getDataType() == TR::Aggregate &&
4356
(parm->getSize() <= 2 || parm->getSize() == 4 || parm->getSize() == 8))
4357
{
4358
// On zLinux aggregates with a size of 1, 2, 4 or 8 bytes are passed by value in registers.
4359
// Otherwise they are passed by reference via buffer
4360
// Here transform the value in register to aggregate again
4361
TR::DataType dt = TR::NoType;
4362
if (parm->getSize() == 8)
4363
dt = (node->getOpCode().isDouble()) ? TR::Double : TR::Int64;
4364
else if (parm->getSize() == 4)
4365
dt = (node->getOpCode().isFloat()) ? TR::Float : TR::Int32;
4366
else if (parm->getSize() == 2)
4367
dt = TR::Int16;
4368
else if (parm->getSize() == 1)
4369
dt = TR::Int8;
4370
4371
// if not 64 bit and data type is 64 bit, need to place it into two registers
4372
if ((self()->comp()->target().is32Bit() && !self()->use64BitRegsOn32Bit()) && dt == TR::Int64)
4373
{
4374
TR_GlobalRegisterNumber lowReg = self()->getLinkageGlobalRegisterNumber(lri+1, dt);
4375
TR_GlobalRegisterNumber highReg = self()->getLinkageGlobalRegisterNumber(lri, dt);
4376
4377
if (lowReg != -1 && highReg != -1 && !globalRegsWithRegLoad->isSet(lowReg) && !globalRegsWithRegLoad->isSet(highReg) &&
4378
performTransformation(self()->comp(), "O^O LINKAGE REGISTER ALLOCATION: transforming aggregate parm %s into xRegLoad\n", self()->comp()->getDebug()->getName(node)))
4379
{
4380
TR::Node::recreate(node, self()->comp()->il.opCodeForRegisterLoad(dt));
4381
4382
node->setLowGlobalRegisterNumber(lowReg);
4383
node->setHighGlobalRegisterNumber(highReg);
4384
4385
globalRegsWithRegLoad->set(lowReg);
4386
globalRegsWithRegLoad->set(highReg);
4387
4388
regLoads[parm->getOrdinal()] = node;
4389
numNewRegLoads++;
4390
}
4391
}
4392
else
4393
{
4394
TR_GlobalRegisterNumber reg = self()->getLinkageGlobalRegisterNumber(lri, dt);
4395
4396
if (reg != -1 && !globalRegsWithRegLoad->isSet(reg) &&
4397
performTransformation(self()->comp(), "O^O LINKAGE REGISTER ALLOCATION: transforming aggregate parm %s into xRegLoad\n", self()->comp()->getDebug()->getName(node)))
4398
{
4399
TR::Node::recreate(node, self()->comp()->il.opCodeForRegisterLoad(dt));
4400
4401
node->setGlobalRegisterNumber(reg);
4402
globalRegsWithRegLoad->set(reg);
4403
4404
regLoads[parm->getOrdinal()] = node;
4405
numNewRegLoads++;
4406
}
4407
}
4408
}
4409
else
4410
{
4411
TR_GlobalRegisterNumber reg = self()->getLinkageGlobalRegisterNumber(parm->getLinkageRegisterIndex(), node->getDataType());
4412
if (reg != -1 && !globalRegsWithRegLoad->isSet(reg)
4413
&& performTransformation(self()->comp(), "O^O LINKAGE REGISTER ALLOCATION: transforming %s into %s\n", self()->comp()->getDebug()->getName(node), self()->comp()->getDebug()->getName(regLoadOp)))
4414
{
4415
// Transmute load into regload
4416
//
4417
if(parm->getDataType() == TR::Aggregate) // for aggregates, must look at node type to determine register type as parm type is still 'aggregate'
4418
{
4419
dumpOptDetails(self()->comp(), "\tNot doing transformation for parm %p because it is an aggregate.\n",node);
4420
}
4421
else
4422
{
4423
TR::Node::recreate(node, self()->comp()->il.opCodeForRegisterLoad(parm->getDataType()));
4424
node->setGlobalRegisterNumber(reg);
4425
4426
// Update state to include the new regLoad
4427
//
4428
regLoads[parm->getOrdinal()] = node;
4429
globalRegsWithRegLoad->set(reg);
4430
numNewRegLoads++;
4431
}
4432
}
4433
}
4434
}
4435
else
4436
{
4437
// We already have a regLoad for this parm.
4438
// It's awkward to common the parm at this point because we'd need a pointer to its parent.
4439
// Let's conservatively do nothing, on the assumption that CSE usually
4440
// commons all the parm loads anyway, so we should rarely hit this
4441
// case.
4442
}
4443
}
4444
else
4445
{
4446
for (int i = 0; i < node->getNumChildren(); i++)
4447
numNewRegLoads += self()->changeParmLoadsToRegLoads(node->getChild(i), regLoads, globalRegsWithRegLoad, killedParms, visitCount);
4448
}
4449
4450
return numNewRegLoads;
4451
}
4452
4453
4454
void
4455
J9::CodeGenerator::setUpForInstructionSelection()
4456
{
4457
self()->comp()->incVisitCount();
4458
4459
// prepareNodeForInstructionSelection is called during a separate walk of the treetops because
4460
// the _register and _label fields are unioned members of a node. prepareNodeForInstructionSelection
4461
// zeros the _register field while the second for loop sets label fields on destination nodes.
4462
//
4463
TR::TreeTop * tt=NULL, *prev = NULL;
4464
4465
if (self()->comp()->getOption(TR_EnableOSR))
4466
{
4467
TR::Block *block;
4468
for (tt = self()->comp()->getStartTree(); tt; tt = tt->getNextTreeTop())
4469
{
4470
if (tt->getNode()->getOpCodeValue() == TR::BBStart)
4471
{
4472
block = tt->getNode()->getBlock();
4473
if (!block->isOSRCodeBlock())
4474
{
4475
tt = block->getExit();
4476
continue;
4477
}
4478
}
4479
self()->eliminateLoadsOfLocalsThatAreNotStored(tt->getNode(), -1);
4480
}
4481
4482
self()->comp()->incVisitCount();
4483
}
4484
4485
for (tt = self()->comp()->getStartTree(); tt; tt = tt->getNextTreeTop())
4486
{
4487
self()->prepareNodeForInstructionSelection(tt->getNode());
4488
}
4489
4490
bool doRefinedAliasing = self()->enableRefinedAliasSets();
4491
4492
if (doRefinedAliasing)
4493
{
4494
_refinedAliasWalkCollector.methodInfo = TR_PersistentMethodInfo::get(self()->comp());
4495
_refinedAliasWalkCollector.killsEverything = !_refinedAliasWalkCollector.methodInfo;
4496
_refinedAliasWalkCollector.killsAddressStatics = false;
4497
_refinedAliasWalkCollector.killsIntStatics = false;
4498
_refinedAliasWalkCollector.killsNonIntPrimitiveStatics = false;
4499
_refinedAliasWalkCollector.killsAddressFields = false;
4500
_refinedAliasWalkCollector.killsIntFields = false;
4501
_refinedAliasWalkCollector.killsNonIntPrimitiveFields = false;
4502
_refinedAliasWalkCollector.killsAddressArrayShadows = false;
4503
_refinedAliasWalkCollector.killsIntArrayShadows = false;
4504
_refinedAliasWalkCollector.killsNonIntPrimitiveArrayShadows = false;
4505
}
4506
4507
for (tt = self()->comp()->getStartTree(); tt; prev=tt, tt = tt->getNextTreeTop())
4508
{
4509
TR::Node * node = tt->getNode();
4510
4511
if ((node->getOpCodeValue() == TR::treetop) ||
4512
node->getOpCode().isAnchor() ||
4513
node->getOpCode().isCheck())
4514
{
4515
node = node->getFirstChild();
4516
if (node->getOpCode().isResolveCheck() && doRefinedAliasing)
4517
{
4518
_refinedAliasWalkCollector.killsEverything = true;
4519
}
4520
}
4521
4522
TR::ILOpCode & opcode = node->getOpCode();
4523
4524
if (opcode.getOpCodeValue() == TR::BBStart)
4525
{
4526
self()->setCurrentBlock(node->getBlock());
4527
}
4528
else if (opcode.isLoadVarOrStore())
4529
{
4530
TR::Symbol * sym = node->getSymbol();
4531
TR::AutomaticSymbol *local = sym->getAutoSymbol();
4532
if (local)
4533
{
4534
local->incReferenceCount();
4535
}
4536
else if (doRefinedAliasing && !_refinedAliasWalkCollector.killsEverything)
4537
{
4538
if (sym->getStaticSymbol())
4539
{
4540
if (sym->getType().isAddress()) _refinedAliasWalkCollector.killsAddressStatics = true;
4541
else if (sym->getType().isInt32()) _refinedAliasWalkCollector.killsIntStatics = true;
4542
else _refinedAliasWalkCollector.killsNonIntPrimitiveStatics = true;
4543
}
4544
else if (sym->isArrayShadowSymbol())
4545
{
4546
if (sym->getType().isAddress()) _refinedAliasWalkCollector.killsAddressArrayShadows = true;
4547
else if (sym->getType().isInt32()) _refinedAliasWalkCollector.killsIntArrayShadows = true;
4548
else _refinedAliasWalkCollector.killsNonIntPrimitiveArrayShadows = true;
4549
}
4550
else if (sym->getShadowSymbol())
4551
{
4552
if (sym->getType().isAddress()) _refinedAliasWalkCollector.killsAddressFields = true;
4553
else if (sym->getType().isInt32()) _refinedAliasWalkCollector.killsIntFields = true;
4554
else _refinedAliasWalkCollector.killsNonIntPrimitiveFields = true;
4555
}
4556
}
4557
}
4558
else if (opcode.isBranch())
4559
{
4560
if (node->getBranchDestination()->getNode()->getLabel() == NULL)
4561
{
4562
// need to get the label type from the target block for RAS
4563
TR::LabelSymbol * label =
4564
TR::LabelSymbol::create(self()->trHeapMemory(),self(),node->getBranchDestination()->getNode()->getBlock());
4565
4566
node->getBranchDestination()->getNode()->setLabel(label);
4567
4568
}
4569
}
4570
else if (opcode.isJumpWithMultipleTargets() && !opcode.isSwitch())
4571
{
4572
for (auto e = self()->getCurrentBlock()->getSuccessors().begin(); e != self()->getCurrentBlock()->getSuccessors().end(); ++e)
4573
{
4574
if (toBlock((*e)->getTo())->getEntry()!=NULL &&
4575
toBlock((*e)->getTo())->getEntry()->getNode()->getLabel() == NULL)
4576
{
4577
TR::LabelSymbol * label = generateLabelSymbol(self());
4578
toBlock((*e)->getTo())->getEntry()->getNode()->setLabel(label);
4579
}
4580
}
4581
}
4582
else if (opcode.isSwitch())
4583
{
4584
uint16_t upperBound = node->getCaseIndexUpperBound();
4585
for (int i = 1; i < upperBound; ++i)
4586
{
4587
if (node->getChild(i)->getBranchDestination()->getNode()->getLabel() == NULL)
4588
{
4589
TR::LabelSymbol *label = generateLabelSymbol(self());
4590
node->getChild(i)->getBranchDestination()->getNode()->setLabel(label);
4591
4592
}
4593
}
4594
}
4595
else if (opcode.isCall() || opcode.getOpCodeValue() == TR::arraycopy)
4596
{
4597
self()->setUpStackSizeForCallNode(node);
4598
4599
if (doRefinedAliasing)
4600
{
4601
TR::ResolvedMethodSymbol * callSymbol = node->getSymbol()->getResolvedMethodSymbol();
4602
TR_PersistentMethodInfo * callInfo;
4603
if (!_refinedAliasWalkCollector.killsEverything && !opcode.isCallIndirect() && callSymbol &&
4604
(callInfo = TR_PersistentMethodInfo::get(callSymbol->getResolvedMethod())) &&
4605
callInfo->hasRefinedAliasSets())
4606
{
4607
if (!callInfo->doesntKillAddressStatics()) _refinedAliasWalkCollector.killsAddressStatics = true;
4608
if (!callInfo->doesntKillIntStatics()) _refinedAliasWalkCollector.killsIntStatics = true;
4609
if (!callInfo->doesntKillNonIntPrimitiveStatics()) _refinedAliasWalkCollector.killsNonIntPrimitiveStatics = true;
4610
if (!callInfo->doesntKillAddressFields()) _refinedAliasWalkCollector.killsAddressFields = true;
4611
if (!callInfo->doesntKillIntFields()) _refinedAliasWalkCollector.killsIntFields = true;
4612
if (!callInfo->doesntKillNonIntPrimitiveFields()) _refinedAliasWalkCollector.killsNonIntPrimitiveFields = true;
4613
if (!callInfo->doesntKillAddressArrayShadows()) _refinedAliasWalkCollector.killsAddressArrayShadows = true;
4614
if (!callInfo->doesntKillIntArrayShadows()) _refinedAliasWalkCollector.killsIntArrayShadows = true;
4615
if (!callInfo->doesntKillNonIntPrimitiveArrayShadows()) _refinedAliasWalkCollector.killsNonIntPrimitiveArrayShadows = true;
4616
}
4617
else
4618
{
4619
_refinedAliasWalkCollector.killsEverything = true;
4620
}
4621
}
4622
4623
}
4624
else if (opcode.getOpCodeValue() == TR::monent)
4625
{
4626
_refinedAliasWalkCollector.killsEverything = true;
4627
}
4628
}
4629
4630
if (doRefinedAliasing && !_refinedAliasWalkCollector.killsEverything)
4631
{
4632
TR_PersistentMethodInfo *methodInfo = _refinedAliasWalkCollector.methodInfo;
4633
4634
methodInfo->setDoesntKillEverything(true);
4635
if (!_refinedAliasWalkCollector.killsAddressStatics) methodInfo->setDoesntKillAddressStatics(true);
4636
if (!_refinedAliasWalkCollector.killsIntStatics) methodInfo->setDoesntKillIntStatics(true);
4637
if (!_refinedAliasWalkCollector.killsNonIntPrimitiveStatics) methodInfo->setDoesntKillNonIntPrimitiveStatics(true);
4638
if (!_refinedAliasWalkCollector.killsAddressFields) methodInfo->setDoesntKillAddressFields(true);
4639
if (!_refinedAliasWalkCollector.killsIntFields) methodInfo->setDoesntKillIntFields(true);
4640
if (!_refinedAliasWalkCollector.killsNonIntPrimitiveFields) methodInfo->setDoesntKillNonIntPrimitiveFields(true);
4641
if (!_refinedAliasWalkCollector.killsAddressArrayShadows) methodInfo->setDoesntKillAddressArrayShadows(true);
4642
if (!_refinedAliasWalkCollector.killsIntArrayShadows) methodInfo->setDoesntKillIntArrayShadows(true);
4643
if (!_refinedAliasWalkCollector.killsNonIntPrimitiveArrayShadows) methodInfo->setDoesntKillNonIntPrimitiveArrayShadows(true);
4644
}
4645
4646
if (self()->comp()->target().cpu.isX86() && self()->getInlinedGetCurrentThreadMethod())
4647
{
4648
TR::RealRegister *ebpReal = self()->getRealVMThreadRegister();
4649
4650
if (ebpReal)
4651
{
4652
ebpReal->setState(TR::RealRegister::Locked);
4653
ebpReal->setAssignedRegister(ebpReal->getRegister());
4654
}
4655
}
4656
}
4657
4658
bool
4659
J9::CodeGenerator::wantToPatchClassPointer(TR::Compilation *comp,
4660
const TR_OpaqueClassBlock *allegedClassPointer,
4661
const uint8_t *inCodeAt)
4662
{
4663
return TR::CodeGenerator::wantToPatchClassPointer(comp, allegedClassPointer, "in code at", inCodeAt);
4664
}
4665
4666
bool
4667
J9::CodeGenerator::wantToPatchClassPointer(const TR_OpaqueClassBlock *allegedClassPointer, const uint8_t *inCodeAt)
4668
{
4669
return TR::CodeGenerator::wantToPatchClassPointer(self()->comp(), allegedClassPointer, inCodeAt);
4670
}
4671
4672
bool
4673
J9::CodeGenerator::wantToPatchClassPointer(const TR_OpaqueClassBlock *allegedClassPointer, const TR::Node *forNode)
4674
{
4675
return TR::CodeGenerator::wantToPatchClassPointer(self()->comp(), allegedClassPointer, "for node", forNode);
4676
}
4677
4678
bool
4679
J9::CodeGenerator::supportsJitMethodEntryAlignment()
4680
{
4681
return self()->fej9()->supportsJitMethodEntryAlignment();
4682
}
4683
4684
bool
4685
J9::CodeGenerator::mustGenerateSwitchToInterpreterPrePrologue()
4686
{
4687
TR::Compilation *comp = self()->comp();
4688
4689
return comp->usesPreexistence() ||
4690
comp->getOption(TR_EnableHCR) ||
4691
!comp->fej9()->isAsyncCompilation() ||
4692
comp->getOption(TR_FullSpeedDebug);
4693
}
4694
4695
extern void VMgenerateCatchBlockBBStartPrologue(TR::Node *node, TR::Instruction *fenceInstruction, TR::CodeGenerator *cg);
4696
4697
void
4698
J9::CodeGenerator::generateCatchBlockBBStartPrologue(
4699
TR::Node *node,
4700
TR::Instruction *fenceInstruction)
4701
{
4702
if (self()->comp()->fej9vm()->getReportByteCodeInfoAtCatchBlock())
4703
{
4704
// Note we should not use `fenceInstruction` here because it is not the first instruction in this BB. The first
4705
// instruction is a label that incoming branches will target. We will use this label (first instruction in the
4706
// block) in `createMethodMetaData` to populate a list of non-mergeable GC maps so as to ensure the GC map at the
4707
// catch block entry is always present if requested.
4708
node->getBlock()->getFirstInstruction()->setNeedsGCMap();
4709
}
4710
4711
VMgenerateCatchBlockBBStartPrologue(node, fenceInstruction, self());
4712
}
4713
4714
void
4715
J9::CodeGenerator::registerAssumptions()
4716
{
4717
for(auto it = self()->getJNICallSites().begin();
4718
it != self()->getJNICallSites().end(); ++it)
4719
{
4720
TR_OpaqueMethodBlock *method = (*it)->getKey()->getPersistentIdentifier();
4721
TR::Instruction *i = (*it)->getValue();
4722
#ifdef J9VM_OPT_JITSERVER
4723
if (self()->comp()->isOutOfProcessCompilation())
4724
{
4725
// For JITServer we need to build a list of assumptions that will be sent to client at end of compilation
4726
intptr_t offset = i->getBinaryEncoding() - self()->getBinaryBufferStart();
4727
SerializedRuntimeAssumption* sar =
4728
new (self()->trHeapMemory()) SerializedRuntimeAssumption(RuntimeAssumptionOnRegisterNative, (uintptr_t)method, offset);
4729
self()->comp()->getSerializedRuntimeAssumptions().push_front(sar);
4730
}
4731
else
4732
#endif // J9VM_OPT_JITSERVER
4733
{
4734
TR_PatchJNICallSite::make(self()->fe(), self()->trPersistentMemory(), (uintptr_t) method, i->getBinaryEncoding(), self()->comp()->getMetadataAssumptionList());
4735
}
4736
}
4737
}
4738
4739
void
4740
J9::CodeGenerator::jitAddPicToPatchOnClassUnload(void *classPointer, void *addressToBePatched)
4741
{
4742
#ifdef J9VM_OPT_JITSERVER
4743
if (self()->comp()->isOutOfProcessCompilation())
4744
{
4745
intptr_t offset = (uint8_t*)addressToBePatched - self()->getBinaryBufferStart();
4746
SerializedRuntimeAssumption* sar =
4747
new (self()->trHeapMemory()) SerializedRuntimeAssumption(RuntimeAssumptionOnClassUnload, (uintptr_t)classPointer, offset, sizeof(uintptr_t));
4748
self()->comp()->getSerializedRuntimeAssumptions().push_front(sar);
4749
}
4750
else
4751
#endif // J9VM_OPT_JITSERVER
4752
{
4753
createClassUnloadPicSite(classPointer, addressToBePatched, sizeof(uintptr_t), self()->comp()->getMetadataAssumptionList());
4754
self()->comp()->setHasClassUnloadAssumptions();
4755
}
4756
}
4757
4758
void
4759
J9::CodeGenerator::jitAdd32BitPicToPatchOnClassUnload(void *classPointer, void *addressToBePatched)
4760
{
4761
#ifdef J9VM_OPT_JITSERVER
4762
if (self()->comp()->isOutOfProcessCompilation())
4763
{
4764
intptr_t offset = (uint8_t*)addressToBePatched - self()->getBinaryBufferStart();
4765
SerializedRuntimeAssumption* sar =
4766
new (self()->trHeapMemory()) SerializedRuntimeAssumption(RuntimeAssumptionOnClassUnload, (uintptr_t)classPointer, offset, 4);
4767
self()->comp()->getSerializedRuntimeAssumptions().push_front(sar);
4768
}
4769
else
4770
#endif // J9VM_OPT_JITSERVER
4771
{
4772
createClassUnloadPicSite(classPointer, addressToBePatched,4, self()->comp()->getMetadataAssumptionList());
4773
self()->comp()->setHasClassUnloadAssumptions();
4774
}
4775
}
4776
4777
void
4778
J9::CodeGenerator::jitAddPicToPatchOnClassRedefinition(void *classPointer, void *addressToBePatched, bool unresolved)
4779
{
4780
if (!self()->comp()->compileRelocatableCode())
4781
{
4782
#ifdef J9VM_OPT_JITSERVER
4783
if (self()->comp()->isOutOfProcessCompilation())
4784
{
4785
TR_RuntimeAssumptionKind kind = unresolved ? RuntimeAssumptionOnClassRedefinitionUPIC : RuntimeAssumptionOnClassRedefinitionPIC;
4786
uintptr_t key = unresolved ? (uintptr_t)-1 : (uintptr_t)classPointer;
4787
intptr_t offset = (uint8_t*)addressToBePatched - self()->getBinaryBufferStart();
4788
SerializedRuntimeAssumption* sar =
4789
new (self()->trHeapMemory()) SerializedRuntimeAssumption(kind, key, offset, sizeof(uintptr_t));
4790
self()->comp()->getSerializedRuntimeAssumptions().push_front(sar);
4791
}
4792
else
4793
#endif // J9VM_OPT_JITSERVER
4794
{
4795
createClassRedefinitionPicSite(unresolved? (void*)-1 : classPointer, addressToBePatched, sizeof(uintptr_t), unresolved, self()->comp()->getMetadataAssumptionList());
4796
self()->comp()->setHasClassRedefinitionAssumptions();
4797
}
4798
}
4799
}
4800
4801
void
4802
J9::CodeGenerator::jitAdd32BitPicToPatchOnClassRedefinition(void *classPointer, void *addressToBePatched, bool unresolved)
4803
{
4804
if (!self()->comp()->compileRelocatableCode())
4805
{
4806
#ifdef J9VM_OPT_JITSERVER
4807
if (self()->comp()->isOutOfProcessCompilation())
4808
{
4809
TR_RuntimeAssumptionKind kind = unresolved ? RuntimeAssumptionOnClassRedefinitionUPIC : RuntimeAssumptionOnClassRedefinitionPIC;
4810
uintptr_t key = unresolved ? (uintptr_t)-1 : (uintptr_t)classPointer;
4811
intptr_t offset = (uint8_t*)addressToBePatched - self()->getBinaryBufferStart();
4812
SerializedRuntimeAssumption* sar =
4813
new (self()->trHeapMemory()) SerializedRuntimeAssumption(kind, key, offset, 4);
4814
self()->comp()->getSerializedRuntimeAssumptions().push_front(sar);
4815
}
4816
else
4817
#endif // J9VM_OPT_JITSERVER
4818
{
4819
createClassRedefinitionPicSite(unresolved? (void*)-1 : classPointer, addressToBePatched, 4, unresolved, self()->comp()->getMetadataAssumptionList());
4820
self()->comp()->setHasClassRedefinitionAssumptions();
4821
}
4822
}
4823
}
4824
4825
4826
void
4827
J9::CodeGenerator::createHWPRecords()
4828
{
4829
if (self()->comp()->getPersistentInfo()->isRuntimeInstrumentationEnabled() &&
4830
self()->comp()->getOption(TR_EnableHardwareProfileIndirectDispatch))
4831
{
4832
self()->comp()->fej9()->createHWProfilerRecords(self()->comp());
4833
}
4834
}
4835
4836
4837
TR::Linkage *
4838
J9::CodeGenerator::createLinkageForCompilation()
4839
{
4840
return self()->getLinkage(self()->comp()->getJittedMethodSymbol()->getLinkageConvention());
4841
}
4842
4843
4844
TR::TreeTop *
4845
J9::CodeGenerator::lowerTree(TR::Node *root, TR::TreeTop *treeTop)
4846
{
4847
return self()->fej9()->lowerTree(self()->comp(), root, treeTop);
4848
}
4849
4850
4851
bool
4852
J9::CodeGenerator::needClassAndMethodPointerRelocations()
4853
{
4854
return self()->fej9()->needClassAndMethodPointerRelocations();
4855
}
4856
4857
bool
4858
J9::CodeGenerator::needRelocationsForLookupEvaluationData()
4859
{
4860
return self()->fej9()->needRelocationsForLookupEvaluationData();
4861
}
4862
4863
bool
4864
J9::CodeGenerator::needRelocationsForStatics()
4865
{
4866
return self()->fej9()->needRelocationsForStatics();
4867
}
4868
4869
bool
4870
J9::CodeGenerator::needRelocationsForCurrentMethodPC()
4871
{
4872
return self()->fej9()->needRelocationsForCurrentMethodPC();
4873
}
4874
4875
bool
4876
J9::CodeGenerator::needRelocationsForHelpers()
4877
{
4878
return self()->fej9()->needRelocationsForHelpers();
4879
}
4880
4881
#if defined(J9VM_OPT_JITSERVER)
4882
bool
4883
J9::CodeGenerator::needRelocationsForBodyInfoData()
4884
{
4885
return self()->fej9()->needRelocationsForBodyInfoData();
4886
}
4887
4888
bool
4889
J9::CodeGenerator::needRelocationsForPersistentInfoData()
4890
{
4891
return self()->fej9()->needRelocationsForPersistentInfoData();
4892
}
4893
#endif /* defined(J9VM_OPT_JITSERVER) */
4894
4895
4896
bool
4897
J9::CodeGenerator::isMethodInAtomicLongGroup(TR::RecognizedMethod rm)
4898
{
4899
switch (rm)
4900
{
4901
case TR::java_util_concurrent_atomic_AtomicLong_addAndGet:
4902
case TR::java_util_concurrent_atomic_AtomicLongArray_addAndGet:
4903
case TR::java_util_concurrent_atomic_AtomicLongArray_decrementAndGet:
4904
case TR::java_util_concurrent_atomic_AtomicLongArray_getAndAdd:
4905
case TR::java_util_concurrent_atomic_AtomicLongArray_getAndDecrement:
4906
case TR::java_util_concurrent_atomic_AtomicLongArray_getAndIncrement:
4907
case TR::java_util_concurrent_atomic_AtomicLongArray_getAndSet:
4908
case TR::java_util_concurrent_atomic_AtomicLongArray_incrementAndGet:
4909
case TR::java_util_concurrent_atomic_AtomicLong_decrementAndGet:
4910
case TR::java_util_concurrent_atomic_AtomicLong_getAndAdd:
4911
case TR::java_util_concurrent_atomic_AtomicLong_getAndDecrement:
4912
case TR::java_util_concurrent_atomic_AtomicLong_getAndIncrement:
4913
case TR::java_util_concurrent_atomic_AtomicLong_getAndSet:
4914
case TR::java_util_concurrent_atomic_AtomicLong_incrementAndGet:
4915
return true;
4916
4917
default:
4918
return false;
4919
}
4920
}
4921
4922
4923
void
4924
J9::CodeGenerator::trimCodeMemoryToActualSize()
4925
{
4926
uint8_t *bufferStart = self()->getBinaryBufferStart();
4927
size_t actualCodeLengthInBytes = self()->getCodeEnd() - bufferStart;
4928
4929
TR::VMAccessCriticalSection trimCodeMemoryAllocation(self()->comp());
4930
self()->getCodeCache()->trimCodeMemoryAllocation(bufferStart, actualCodeLengthInBytes);
4931
}
4932
4933
4934
void
4935
J9::CodeGenerator::reserveCodeCache()
4936
{
4937
self()->setCodeCache(self()->fej9()->getDesignatedCodeCache(self()->comp()));
4938
if (!self()->getCodeCache()) // Cannot reserve a cache; all are used
4939
{
4940
// We may reach this point if all code caches have been used up
4941
// If some code caches have some space but cannot be used because they are reserved
4942
// we will throw an exception in the call to getDesignatedCodeCache
4943
4944
if (self()->comp()->compileRelocatableCode())
4945
{
4946
self()->comp()->failCompilation<TR::RecoverableCodeCacheError>("Cannot reserve code cache");
4947
}
4948
4949
self()->comp()->failCompilation<TR::CodeCacheError>("Cannot reserve code cache");
4950
}
4951
}
4952
4953
4954
uint8_t *
4955
J9::CodeGenerator::allocateCodeMemoryInner(
4956
uint32_t warmCodeSizeInBytes,
4957
uint32_t coldCodeSizeInBytes,
4958
uint8_t **coldCode,
4959
bool isMethodHeaderNeeded)
4960
{
4961
TR::Compilation *comp = self()->comp();
4962
4963
TR::CodeCache * codeCache = self()->getCodeCache();
4964
if (!codeCache)
4965
{
4966
if (comp->compileRelocatableCode())
4967
{
4968
comp->failCompilation<TR::RecoverableCodeCacheError>("Failed to get current code cache");
4969
}
4970
4971
comp->failCompilation<TR::CodeCacheError>("Failed to get current code cache");
4972
}
4973
4974
TR_ASSERT(codeCache->isReserved(), "Code cache should have been reserved.");
4975
4976
bool hadClassUnloadMonitor;
4977
bool hadVMAccess = self()->fej9()->releaseClassUnloadMonitorAndAcquireVMaccessIfNeeded(comp, &hadClassUnloadMonitor);
4978
4979
uint8_t *warmCode = TR::CodeCacheManager::instance()->allocateCodeMemory(
4980
warmCodeSizeInBytes,
4981
coldCodeSizeInBytes,
4982
&codeCache,
4983
coldCode,
4984
self()->fej9()->needsContiguousCodeAndDataCacheAllocation(),
4985
isMethodHeaderNeeded);
4986
4987
self()->fej9()->acquireClassUnloadMonitorAndReleaseVMAccessIfNeeded(comp, hadVMAccess, hadClassUnloadMonitor);
4988
4989
if (codeCache != self()->getCodeCache())
4990
{
4991
TR_ASSERT(!codeCache || codeCache->isReserved(), "Substitute code cache isn't marked as reserved");
4992
comp->setRelocatableMethodCodeStart(warmCode);
4993
self()->switchCodeCacheTo(codeCache);
4994
}
4995
4996
if (!warmCode)
4997
{
4998
if (jitConfig->runtimeFlags & J9JIT_CODE_CACHE_FULL)
4999
{
5000
comp->failCompilation<TR::CodeCacheError>("Failed to allocate code memory");
5001
}
5002
5003
comp->failCompilation<TR::RecoverableCodeCacheError>("Failed to allocate code memory");
5004
}
5005
5006
TR_ASSERT_FATAL( !((warmCodeSizeInBytes && !warmCode) || (coldCodeSizeInBytes && !coldCode)), "Allocation failed but didn't throw an exception");
5007
5008
return warmCode;
5009
}
5010
5011
5012
TR::Node *
5013
J9::CodeGenerator::generatePoisonNode(TR::Block *currentBlock, TR::SymbolReference *liveAutoSymRef)
5014
{
5015
bool poisoned = true;
5016
TR::Node *storeNode = NULL;
5017
5018
if (liveAutoSymRef->getSymbol()->getType().isAddress())
5019
storeNode = TR::Node::createStore(liveAutoSymRef, TR::Node::aconst(currentBlock->getEntry()->getNode(), 0x0));
5020
else if (liveAutoSymRef->getSymbol()->getType().isInt64())
5021
storeNode = TR::Node::createStore(liveAutoSymRef, TR::Node::lconst(currentBlock->getEntry()->getNode(), 0xc1aed1e5));
5022
else if (liveAutoSymRef->getSymbol()->getType().isInt32())
5023
storeNode = TR::Node::createStore(liveAutoSymRef, TR::Node::iconst(currentBlock->getEntry()->getNode(), 0xc1aed1e5));
5024
else
5025
poisoned = false;
5026
5027
TR::Compilation *comp = self()->comp();
5028
if (comp->getOption(TR_TraceCG) && comp->getOption(TR_PoisonDeadSlots))
5029
{
5030
if (poisoned)
5031
{
5032
traceMsg(comp, "POISON DEAD SLOTS --- Live local %d from parent block %d going dead .... poisoning slot with node 0x%x .\n", liveAutoSymRef->getReferenceNumber() , currentBlock->getNumber(), storeNode);
5033
}
5034
else
5035
{
5036
traceMsg(comp, "POISON DEAD SLOTS --- Live local %d of unsupported type from parent block %d going dead .... poisoning skipped.\n", liveAutoSymRef->getReferenceNumber() , currentBlock->getNumber());
5037
}
5038
}
5039
5040
return storeNode;
5041
}
5042
5043
uint32_t
5044
J9::CodeGenerator::initializeLinkageInfo(void *linkageInfoPtr)
5045
{
5046
J9::PrivateLinkage::LinkageInfo *linkageInfo = (J9::PrivateLinkage::LinkageInfo *)linkageInfoPtr;
5047
5048
TR::Recompilation * recomp = self()->comp()->getRecompilationInfo();
5049
if (recomp && recomp->couldBeCompiledAgain())
5050
{
5051
if (recomp->useSampling())
5052
linkageInfo->setSamplingMethodBody();
5053
else
5054
linkageInfo->setCountingMethodBody();
5055
}
5056
5057
linkageInfo->setReservedWord((self()->getBinaryBufferCursor() - self()->getCodeStart()));
5058
linkageInfo->setReturnInfo(self()->comp()->getReturnInfo());
5059
5060
return linkageInfo->getWord();
5061
}
5062
5063
// I need to preserve the type information for monitorenter/exit through
5064
// code generation, but the secondChild is being used for other monitor
5065
// optimizations and I can't find anywhere to stick it on the TR::Node.
5066
// Creating the node with more children doesn't seem to help either.
5067
//
5068
void
5069
J9::CodeGenerator::addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz)
5070
{
5071
_monitorMapping[monNode->getGlobalIndex()] = clazz;
5072
}
5073
5074
TR_OpaqueClassBlock *
5075
J9::CodeGenerator::getMonClass(TR::Node* monNode)
5076
{
5077
auto it = _monitorMapping.find(monNode->getGlobalIndex());
5078
return it != _monitorMapping.end() ? it->second : NULL;
5079
}
5080
5081
TR_YesNoMaybe
5082
J9::CodeGenerator::isMonitorValueBasedOrValueType(TR::Node* monNode)
5083
{
5084
if (TR::Compiler->om.areValueTypesEnabled() || TR::Compiler->om.areValueBasedMonitorChecksEnabled())
5085
{
5086
TR_OpaqueClassBlock *clazz = self()->getMonClass(monNode);
5087
5088
if (!clazz)
5089
return TR_maybe;
5090
5091
//java.lang.Object class is only set when monitor is java.lang.Object but not its subclass
5092
if (clazz == self()->comp()->getObjectClassPointer())
5093
return TR_no;
5094
5095
// J9ClassIsValueType is mutually exclusive to J9ClassHasIdentity
5096
if (!TR::Compiler->om.areValueBasedMonitorChecksEnabled() && TR::Compiler->cls.classHasIdentity(clazz))
5097
return TR_no;
5098
5099
if (!TR::Compiler->cls.isConcreteClass(self()->comp(), clazz))
5100
return TR_maybe;
5101
5102
if (TR::Compiler->cls.isValueBasedOrValueTypeClass(clazz))
5103
return TR_yes;
5104
}
5105
return TR_no;
5106
}
5107
5108
bool
5109
J9::CodeGenerator::isProfiledClassAndCallSiteCompatible(TR_OpaqueClassBlock *profiledClass, TR_OpaqueClassBlock *callSiteMethodClass)
5110
{
5111
/* Check if the profiled class should be allowed to be used for a guarded devirtualization of a particular call site.
5112
A call site can end up with an incompatible profiled class in two ways.
5113
1) The inlining context of this compile might allow for type refinement of a callSite class. If this the profiledClass
5114
is from a call chain that differs to the current compile inlining, then it's possible that the profiledClass is
5115
incompatible with the refined type at the callSite in this compile. Historically the JIT would go as far as
5116
converting an invokeInterface to an invokeVirtual based on this type refinement, which would result in a crash if the
5117
profiledClass was incompatible. Due to correctness issues, interface->virtual conversions was removed, but we can
5118
still refine the class type for an invokevirtual resulting in the same profiledClass incompatibility which can result
5119
in an ineffectual guarded devirtualization but not a crash.
5120
2) With shared classes, a J9ROMClass can be shared among classes of different class-loaders. Since profiling data is keyed
5121
by the bytecode address, the profiled data from all classes sharing the same J9ROMClass will be merged. Because of this,
5122
a profiled class can be derived from the profiling of a method in a class that is incompatible with the call site.
5123
So how to do we ensure compatibility?
5124
In most cases an isInstanceOf() check is enough to ensure that the profiled class is compatible, but this can fail when the
5125
callSiteMethodClass is an Interface. This happens when the call site is calling a method of an Abstract class which is
5126
not implemented by the class but is required by an Interface that the Abstract class implements. In such a case the Abstract
5127
class's VFT entries for all unimplemented methods will point at the Interface methods. By default the JIT uses the class of
5128
the VFT entry method to populate the callSiteMethodClass. When the Interface is defined in a parent class-loader, it's
5129
possible for an incompatible profiled class to implement the same parent class-loader Interface and as a result pass the
5130
isInstanceOf() test. Therefore we can only use the isInstanceOf() check when the callSiteMethodClass is not an Interface.
5131
5132
*/
5133
if (!fej9()->isInterfaceClass(callSiteMethodClass) && fej9()->isInstanceOf(profiledClass, callSiteMethodClass, true, true) == TR_yes)
5134
{
5135
return true;
5136
}
5137
return false;
5138
}
5139
5140