Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/optimizer/IdiomRecognition.cpp
6000 views
1
/*******************************************************************************
2
* Copyright (c) 2000, 2022 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
23
#include "optimizer/IdiomRecognition.hpp"
24
25
#include <stdint.h>
26
#include <stdio.h>
27
#include <stdlib.h>
28
#include <string.h>
29
#include "codegen/CodeGenerator.hpp"
30
#include "env/FrontEnd.hpp"
31
#include "codegen/RecognizedMethods.hpp"
32
#include "compile/Compilation.hpp"
33
#include "compile/CompilationTypes.hpp"
34
#include "control/Options.hpp"
35
#include "control/Options_inlines.hpp"
36
#include "control/Recompilation.hpp"
37
#include "control/RecompilationInfo.hpp"
38
#include "cs2/bitvectr.h"
39
#include "env/CompilerEnv.hpp"
40
#include "env/StackMemoryRegion.hpp"
41
#include "env/TRMemory.hpp"
42
#include "il/Block.hpp"
43
#include "il/DataTypes.hpp"
44
#include "il/ILOpCodes.hpp"
45
#include "il/ILOps.hpp"
46
#include "il/MethodSymbol.hpp"
47
#include "il/Node.hpp"
48
#include "il/Node_inlines.hpp"
49
#include "il/Symbol.hpp"
50
#include "il/SymbolReference.hpp"
51
#include "il/TreeTop.hpp"
52
#include "il/TreeTop_inlines.hpp"
53
#include "infra/Assert.hpp"
54
#include "infra/BitVector.hpp"
55
#include "infra/Cfg.hpp"
56
#include "infra/Flags.hpp"
57
#include "infra/HashTab.hpp"
58
#include "infra/Link.hpp"
59
#include "infra/List.hpp"
60
#include "infra/SimpleRegex.hpp"
61
#include "infra/TRCfgEdge.hpp"
62
#include "infra/TRCfgNode.hpp"
63
#include "optimizer/IdiomRecognitionUtils.hpp"
64
#include "optimizer/LoopCanonicalizer.hpp"
65
#include "optimizer/Optimization_inlines.hpp"
66
#include "optimizer/OptimizationManager.hpp"
67
#include "optimizer/Optimizer.hpp"
68
#include "optimizer/Structure.hpp"
69
#include "optimizer/TransformUtil.hpp"
70
#include "optimizer/UseDefInfo.hpp"
71
#include "ras/Debug.hpp"
72
#include "omrformatconsts.h"
73
#if defined(J9VM_OPT_JITSERVER)
74
#include "env/JITServerAllocationRegion.hpp"
75
#endif
76
77
#define OPT_DETAILS "O^O NEWLOOPREDUCER: "
78
#define VERBOSE 0
79
#define STRESS_TEST 0
80
#define ALLOW_FAST_VERSIONED_LOOP 1
81
#define EXCLUDE_COLD_LOOP (!STRESS_TEST)
82
#define SHOW_CANDIDATES 0
83
84
#define IDIOM_SIZE_FACTOR 15
85
#define MAX_PREPARED_GRAPH (36+STRESS_TEST*5)
86
static TR_CISCGraph *preparedCISCGraphs[MAX_PREPARED_GRAPH];
87
static int32_t numPreparedCISCGraphs;
88
static TR_Hotness minimumHotnessPrepared;
89
90
#define SHOW_BCINDICES 1
91
#define SHOW_STATISTICS 1
92
93
/************************************/
94
/*********** ************/
95
/*********** TR_CISCNode ************/
96
/*********** ************/
97
/************************************/
98
99
void
100
TR_CISCNode::initializeMembers(uint32_t opc, uint16_t id, int16_t dagId, uint16_t ncfgs, uint16_t nchildren)
101
{
102
initializeLists();
103
_flags.clear();
104
setOpcode(opc);
105
_id = id;
106
_dagId = dagId;
107
_numChildren = nchildren;
108
_numSuccs = ncfgs;
109
_latestDest = 0;
110
_otherInfo = 0;
111
if (_ilOpCode.isStoreDirect()) setIsStoreDirect();
112
switch(opc)
113
{
114
case TR_ahconst:
115
setIsLightScreening();
116
// fall through
117
case TR_allconst:
118
case TR_variable:
119
case TR_variableORconst:
120
case TR_quasiConst:
121
case TR_quasiConst2:
122
case TR_arrayindex:
123
case TR_arraybase:
124
setIsNecessaryScreening();
125
break;
126
}
127
}
128
129
//*****************************************************************************************
130
// It analyzes pOpc and tOpc are equivalent.
131
// Note that it handles wildcard nodes as shown below.
132
//*****************************************************************************************
133
bool
134
TR_CISCNode::isEqualOpc(TR_CISCNode *t)
135
{
136
//TR_ASSERT((int)TR::NumIlOps == TR_variable, "assumption for reducing compilation time");
137
static_assert((int)TR::NumIlOps == TR_variable,
138
"assumption for reducing compilation time");
139
140
int32_t pOpc = _opcode;
141
int32_t tOpc = t->_opcode;
142
143
if (pOpc == tOpc) return true;
144
else if (pOpc > TR::NumIlOps) // Please see the above assumption
145
{
146
switch(pOpc)
147
{
148
case TR_booltable:
149
return (tOpc == TR::Case || t->_ilOpCode.isIf()) && !t->isOutsideOfLoop();
150
case TR_variableORconst:
151
return (tOpc == TR_variable || t->_ilOpCode.isLoadConst());
152
case TR_quasiConst2:
153
if (tOpc == TR::iloadi)
154
return !t->getHeadOfTrNode()->getSymbol()->isArrayShadowSymbol(); // true if non-array
155
// else fall through
156
case TR_quasiConst:
157
return (tOpc == TR_variable || t->_ilOpCode.isLoadConst() || tOpc == TR::arraylength);
158
case TR_iaddORisub:
159
return (tOpc == TR::iadd || tOpc == TR::isub);
160
case TR_conversion:
161
return (t->_ilOpCode.isConversion());
162
case TR_ifcmpall:
163
return (t->_ilOpCode.isIf());
164
case TR_ishrall:
165
return (tOpc == TR::ishr || tOpc == TR::iushr);
166
case TR_bitop1:
167
return (t->_ilOpCode.isAnd() || t->_ilOpCode.isOr() || t->_ilOpCode.isXor());
168
case TR_arrayindex:
169
return (tOpc == TR_variable || tOpc == TR::iadd);
170
case TR_arraybase:
171
return (tOpc == TR_variable || tOpc == TR::aloadi);
172
case TR_ahconst:
173
case TR_allconst:
174
return t->_ilOpCode.isLoadConst();
175
case TR_inbload:
176
return (t->_ilOpCode.isLoadIndirect() && !t->_ilOpCode.isByte());
177
case TR_inbstore:
178
return (t->_ilOpCode.isStoreIndirect() && !t->_ilOpCode.isByte());
179
case TR_indload:
180
return (t->_ilOpCode.isLoadIndirect());
181
case TR_indstore:
182
return (t->_ilOpCode.isStoreIndirect() || tOpc == TR::awrtbari);
183
case TR_ibcload:
184
return (t->_ilOpCode.isLoadIndirect() && (t->_ilOpCode.isByte() || (t->_ilOpCode.isShort() && t->_ilOpCode.isUnsigned())));
185
case TR_ibcstore:
186
return (t->_ilOpCode.isStoreIndirect() && (t->_ilOpCode.isByte() || (t->_ilOpCode.isShort() && t->_ilOpCode.isUnsigned())));
187
}
188
}
189
return false;
190
}
191
192
//*****************************************************************************************
193
// convert opcode to its name for debug print
194
//*****************************************************************************************
195
const char *
196
TR_CISCNode::getName(TR_CISCOps op, TR::Compilation * comp)
197
{
198
if (op < (TR_CISCOps)TR::NumIlOps)
199
{
200
TR::ILOpCode opCode;
201
opCode.setOpCodeValue((enum TR::ILOpCodes)op);
202
return opCode.getName();
203
}
204
switch(op)
205
{
206
case TR_variable: return "Var";
207
case TR_booltable: return "booltable";
208
case TR_entrynode: return "entrynode";
209
case TR_exitnode: return "exitnode";
210
case TR_ahconst: return "ahconst";
211
case TR_allconst: return "constall";
212
case TR_inbload: return "inbload";
213
case TR_inbstore: return "inbstore";
214
case TR_indload: return "indload";
215
case TR_indstore: return "indstore";
216
case TR_ibcload: return "ibcload";
217
case TR_ibcstore: return "ibcstore";
218
case TR_variableORconst: return "variableORconst";
219
case TR_quasiConst: return "quasiConst";
220
case TR_quasiConst2: return "quasiConst2";
221
case TR_iaddORisub: return "iaddORisub";
222
case TR_arrayindex: return "arrayindex";
223
case TR_arraybase: return "arraybase";
224
case TR_conversion: return "conversion";
225
case TR_ifcmpall: return "ifcmpall";
226
case TR_ishrall: return "ishrall";
227
case TR_bitop1: return "bitop1";
228
}
229
return "Unknown";
230
}
231
232
233
//*****************************************************************************************
234
// Debug print to file
235
//*****************************************************************************************
236
void
237
TR_CISCNode::dump(TR::FILE *pOutFile, TR::Compilation * comp)
238
{
239
int32_t i;
240
char buf[256];
241
const char *name = getName((TR_CISCOps)_opcode, comp);
242
if (isValidOtherInfo())
243
{
244
sprintf(buf, "%s %d", name, _otherInfo);
245
}
246
else
247
{
248
sprintf(buf, "%s", name);
249
}
250
traceMsg(comp, "[%p] %3d %2d%c %-11s", this, _id, _dagId, isOutsideOfLoop() ? ' ' : 'L', buf);
251
traceMsg(comp, " [");
252
for (i = 0; i < _numSuccs; i++)
253
{
254
traceMsg(comp, "%d",_succs[i]->_id);
255
if (i < _numSuccs-1) traceMsg(comp, " ");
256
}
257
traceMsg(comp, "]");
258
traceMsg(comp, " [");
259
for (i = 0; i < _numChildren; i++)
260
{
261
traceMsg(comp, "%d",_children[i]->_id);
262
if (i < _numChildren-1) traceMsg(comp, " ");
263
}
264
traceMsg(comp, "]");
265
266
ListIterator<TR_CISCNode> ci(&_chains);
267
TR_CISCNode *n;
268
if (!_chains.isEmpty())
269
{
270
traceMsg(comp, " chains[");
271
for (n = ci.getFirst(); n; n = ci.getNext())
272
{
273
traceMsg(comp, "%d ",n->_id);
274
}
275
traceMsg(comp, "]");
276
}
277
278
if (!_dest.isEmpty())
279
{
280
traceMsg(comp, " dest=");
281
ci.set(&_dest);
282
for (n = ci.getFirst(); n; n = ci.getNext())
283
{
284
traceMsg(comp, "%d ",n->_id);
285
}
286
}
287
288
if (!_hintChildren.isEmpty())
289
{
290
traceMsg(comp, " hint=");
291
ci.set(&_hintChildren);
292
for (n = ci.getFirst(); n; n = ci.getNext())
293
{
294
traceMsg(comp, "%d ",n->_id);
295
}
296
}
297
298
#if 0
299
if (!_preds.isEmpty())
300
{
301
ci.set(&_preds);
302
traceMsg(comp, " preds[");
303
for (n = ci.getFirst(); n; n = ci.getNext())
304
{
305
traceMsg(comp, "%d ",n->_id);
306
}
307
traceMsg(comp, "]");
308
}
309
#endif
310
311
if (isCISCNodeModified())
312
{
313
traceMsg(comp, "\t(Modified)");
314
}
315
316
if (isOptionalNode())
317
{
318
traceMsg(comp, "\t(Optional)");
319
}
320
321
// Print out the TR_Nodes that correspond with the given CISCNode.
322
if (getTrNodeInfo())
323
{
324
if (!getTrNodeInfo()->isEmpty())
325
{
326
traceMsg(comp, "\tTR::Node:[");
327
ListIterator<TrNodeInfo> ni(getTrNodeInfo());
328
for (TrNodeInfo *n = ni.getFirst(); n != NULL; n = ni.getNext())
329
{
330
traceMsg(comp, "%s,",comp->getDebug()->getName((TR::Node*)n->_node));
331
}
332
traceMsg(comp, "]");
333
}
334
}
335
traceMsg(comp, "\n");
336
}
337
338
339
//*****************************************************************************************
340
// Debug print to stdout
341
//*****************************************************************************************
342
void
343
TR_CISCNode::printStdout()
344
{
345
int32_t i;
346
char buf[256];
347
if (isValidOtherInfo())
348
{
349
sprintf(buf, "%d %d", _opcode, _otherInfo);
350
}
351
else
352
{
353
sprintf(buf, "%d", _opcode);
354
}
355
printf("[%p] %3d %2d%c %-11s", this, _id, _dagId, isOutsideOfLoop() ? ' ' : 'L', buf);
356
printf(" [");
357
for (i = 0; i < _numSuccs; i++)
358
{
359
printf("%d",_succs[i]->_id);
360
if (i < _numSuccs-1) printf(" ");
361
}
362
printf("]");
363
printf(" [");
364
for (i = 0; i < _numChildren; i++)
365
{
366
printf("%d",_children[i]->_id);
367
if (i < _numChildren-1) printf(" ");
368
}
369
printf("]");
370
371
ListIterator<TR_CISCNode> ci(&_chains);
372
TR_CISCNode *n;
373
if (!_chains.isEmpty())
374
{
375
printf(" chains[");
376
for (n = ci.getFirst(); n; n = ci.getNext())
377
{
378
printf("%d ",n->_id);
379
}
380
printf("]");
381
}
382
383
if (!_dest.isEmpty())
384
{
385
printf(" dest=");
386
ci.set(&_dest);
387
for (n = ci.getFirst(); n; n = ci.getNext())
388
{
389
printf("%d ",n->_id);
390
}
391
}
392
393
if (!_hintChildren.isEmpty())
394
{
395
printf(" hint=");
396
ci.set(&_hintChildren);
397
for (n = ci.getFirst(); n; n = ci.getNext())
398
{
399
printf("%d ",n->_id);
400
}
401
}
402
403
if (isCISCNodeModified())
404
{
405
printf("\t(Modified)");
406
}
407
408
if (isOptionalNode())
409
{
410
printf("\t(Optional)");
411
}
412
413
printf("\n");
414
}
415
416
417
//*****************************************************************************************
418
// It replaces the successor and maintains the predecessor of the original successor.
419
//*****************************************************************************************
420
void
421
TR_CISCNode::replaceSucc(uint32_t index, TR_CISCNode *to)
422
{
423
TR_ASSERT(index < _numSuccs, "TR_CISCNode::replaceSucc index out of range");
424
TR_CISCNode *old = _succs[index];
425
if (old)
426
{
427
old->_preds.remove(this);
428
}
429
setSucc(index, to);
430
}
431
432
433
//*****************************************************************************************
434
// It replaces the child and maintains the parent of the original child.
435
//*****************************************************************************************
436
void
437
TR_CISCNode::replaceChild(uint32_t index, TR_CISCNode *ch)
438
{
439
TR_ASSERT(index < _numChildren, "TR_CISCNode::replaceChild index out of range");
440
TR_CISCNode *org = _children[index];
441
if (org)
442
{
443
org->_parents.remove(this);
444
}
445
setChild(index, ch);
446
}
447
448
449
450
451
//*****************************************************************************************
452
// To sort out leaf nodes, these functions related to checkParent analyze whether ancestors
453
// of the pattern node and those of the target node are equivalent.
454
//*****************************************************************************************
455
456
#define DEBUG_CHECKPARENTS 0
457
#define DEPTH_CHECKPARENTS 7
458
459
#if 0 // recursive version, just for reference
460
bool
461
TR_CISCNode::checkParents(TR_CISCNode *const o, const int8_t level)
462
{
463
#if DEBUG_CHECKPARENTS
464
int i;
465
for (i = DEPTH_CHECKPARENTS-level; --i >= 0; ) traceMsg(comp(), " ");
466
traceMsg(comp(), "checkParents %d:%d...\n", _id, o->_id);
467
#endif
468
if (level > 0)
469
{
470
ListElement<TR_CISCNode> *ple;
471
for (ple = _parents.getListHead(); ple; ple = ple->getNextElement())
472
{
473
TR_CISCNode *const pn = ple->getData();
474
uint16_t pIndex = 0;
475
const bool commutative = pn->isCommutative();
476
if (!commutative)
477
{
478
for (; pIndex < pn->getNumChildren(); pIndex++ )
479
{
480
if (pn->getChild(pIndex) == this) break;
481
}
482
}
483
TR_ASSERT(pIndex < pn->getNumChildren(), "error!");
484
ListElement<TR_CISCNode> *tle;
485
const bool isPnParentsEmpty = pn->_parents.isEmpty();
486
const bool isPnOptional = pn->isOptionalNode();
487
for (tle = o->_parents.getListHead(); tle; tle = tle->getNextElement())
488
{
489
TR_CISCNode *const tn = tle->getData();
490
if (pn->isEqualOpc(tn))
491
{
492
if ((commutative || tn->getChild(pIndex) == o) &&
493
pn->checkParents(tn, level-1)) goto find;
494
}
495
else
496
{
497
if (tn->getIlOpCode().isStoreDirect())
498
{
499
if (tn->getChild(0) == o && !pn->isChildDirectlyConnected())
500
{
501
if (this->checkParents(tn->getChild(1), level-1)) goto find;
502
}
503
}
504
else
505
{ /* search one more depth */
506
ListElement<TR_CISCNode> *tcle;
507
for (tcle = tn->_parents.getListHead(); tcle; tcle = tcle->getNextElement())
508
{
509
if (pn->isEqualOpc(tcle->getData()) &&
510
(commutative || tcle->getData()->getChild(pIndex) == tn) &&
511
pn->checkParents(tcle->getData(), level-1)) goto find;
512
}
513
}
514
}
515
}
516
TR_ASSERT(!tle, "error");
517
if (isPnOptional)
518
{
519
if (isPnParentsEmpty ||
520
pn->isSkipParentsCheck() ||
521
pn->checkParents(o, level-1)) goto find;
522
}
523
#if DEBUG_CHECKPARENTS
524
for (i = DEPTH_CHECKPARENTS-level; --i >= 0; ) traceMsg(comp(), " ");
525
traceMsg(comp(), "checkParents %d:%d failed due to pid %d\n", _id, o->_id, pn->_id);
526
#endif
527
return false;
528
529
find:;
530
}
531
}
532
#if DEBUG_CHECKPARENTS
533
for (i = DEPTH_CHECKPARENTS-level; --i >= 0; ) traceMsg(comp(), " ");
534
traceMsg(comp(), "checkParents %d:%d succeed\n", _id, o->_id);
535
#endif
536
return true;
537
}
538
#endif
539
540
541
542
//*****************************************************************************************
543
// It recursively marks dead (negligible).
544
//*****************************************************************************************
545
void
546
TR_CISCNode::deadAllChildren()
547
{
548
int32_t i;
549
if (!getParents()->isSingleton() || // this node is multi-referenced or
550
_ilOpCode.canRaiseException() || // any side effectable instructions
551
_ilOpCode.isCall() ||
552
_ilOpCode.isReturn() ||
553
_ilOpCode.isStore() ||
554
_ilOpCode.isBranch()) return;
555
556
setIsNegligible();
557
for (i = _numChildren; --i >= 0; )
558
_children[i]->deadAllChildren();
559
}
560
561
562
563
//*****************************************************************************************
564
// This class is for creating non-recursive version of checkParents.
565
//*****************************************************************************************
566
struct TR_StackForCheckParents
567
{
568
TR_StackForCheckParents() {}
569
TR_StackForCheckParents(TR_CISCNode *p, TR_CISCNode *t,
570
ListElement<TR_CISCNode> *ple, ListElement<TR_CISCNode> *tle, ListElement<TR_CISCNode> *tcle,
571
TR_CISCNode *pn, TR_CISCNode *tn,
572
uint16_t pIndex, int8_t level, int8_t label)
573
{ initialize(p, t, ple, tle, tcle, pn, tn, pIndex, level, label); }
574
575
void initialize(TR_CISCNode *p, TR_CISCNode *t,
576
ListElement<TR_CISCNode> *ple, ListElement<TR_CISCNode> *tle, ListElement<TR_CISCNode> *tcle,
577
TR_CISCNode *pn, TR_CISCNode *tn,
578
uint16_t pIndex, int8_t level, int8_t label)
579
{
580
_p = p;
581
_t = t;
582
_ple = ple;
583
_tle = tle;
584
_tcle = tcle;
585
_pn = pn;
586
_tn = tn;
587
_pIndex = pIndex;
588
_level = level;
589
_label = label;
590
}
591
592
TR_CISCNode *_p;
593
TR_CISCNode *_t;
594
ListElement<TR_CISCNode> *_ple;
595
ListElement<TR_CISCNode> *_tle;
596
ListElement<TR_CISCNode> *_tcle;
597
TR_CISCNode *_pn;
598
TR_CISCNode *_tn;
599
uint16_t _pIndex;
600
int8_t _level;
601
int8_t _label;
602
};
603
604
//*****************************************************************************************
605
// It analyzes ancestors of p and those of t are equivalent.
606
// The maximum search depth can be given by "level".
607
// Note: There is a recursive version of this function named "checkParents" above, which
608
// is easier to understand.
609
//*****************************************************************************************
610
bool
611
TR_CISCNode::checkParentsNonRec(TR_CISCNode *p, TR_CISCNode *t, int8_t level, TR::Compilation *comp)
612
{
613
ListElement<TR_CISCNode> *ple = NULL;
614
ListElement<TR_CISCNode> *tle = NULL;
615
ListElement<TR_CISCNode> *tcle = NULL;
616
TR_CISCNode *pn = NULL;
617
TR_CISCNode *tn = NULL;
618
uint16_t pIndex;
619
bool ret;
620
TR_StackForCheckParents stackParents[DEPTH_CHECKPARENTS];
621
TR_StackForCheckParents *st;
622
const int8_t initial_level = level;
623
624
while(true)
625
{
626
first:
627
TR_CISCNode *parm1 = 0;
628
ret = true;
629
#if DEBUG_CHECKPARENTS
630
int32_t i;
631
for (i = initial_level-level; --i >= 0; ) traceMsg(comp, " ");
632
traceMsg(comp, "checkParents %d:%d...\n", p->_id, t->_id);
633
#endif
634
if (level > 0)
635
{
636
for (ple = p->_parents.getListHead(); ple; ple = ple->getNextElement())
637
{
638
pn = ple->getData();
639
pIndex = 0;
640
if (!pn->isCommutative())
641
{
642
for (; pIndex < pn->getNumChildren(); pIndex++ )
643
{
644
if (pn->getChild(pIndex) == p) break;
645
}
646
}
647
TR_ASSERT(pIndex < pn->getNumChildren(), "error!");
648
for (tle = t->_parents.getListHead(); tle; tle = tle->getNextElement())
649
{
650
tn = tle->getData();
651
TR_CISCNode *parm2;
652
parm1 = 0;
653
if (pn->isEqualOpc(tn))
654
{
655
if (pn->isCommutative() || tn->getChild(pIndex) == t)
656
{
657
parm1 = pn;
658
parm2 = tn;
659
}
660
}
661
else
662
{
663
if (tn->getIlOpCode().isStoreDirect())
664
{
665
if (tn->getChild(0) == t && !pn->isChildDirectlyConnected())
666
{
667
parm1 = p;
668
parm2 = tn->getChild(1);
669
}
670
}
671
else
672
{ /* search one more depth */
673
for (tcle = tn->_parents.getListHead(); tcle; tcle = tcle->getNextElement())
674
{
675
if (pn->isEqualOpc(tcle->getData()) &&
676
(pn->isCommutative() || tcle->getData()->getChild(pIndex) == tn))
677
{
678
level--;
679
st = stackParents + level;
680
st->initialize(p, t, ple, tle, tcle, pn, tn, pIndex, level+1, 0);
681
p = pn;
682
t = tcle->getData();
683
goto first;
684
685
label0:;
686
}
687
}
688
}
689
}
690
if (parm1)
691
{
692
level--;
693
st = stackParents + level;
694
st->initialize(p, t, ple, tle, tcle, pn, tn, pIndex, level+1, 1);
695
p = parm1;
696
t = parm2;
697
goto first;
698
699
label1:;
700
}
701
}
702
TR_ASSERT(!tle, "error");
703
if (pn->isOptionalNode())
704
{
705
if (!pn->_parents.isEmpty() && !pn->isSkipParentsCheck())
706
{
707
level--;
708
st = stackParents + level;
709
st->initialize(p, t, ple, tle, tcle, pn, tn, pIndex, level+1, 2);
710
p = pn;
711
//t = t;
712
goto first;
713
714
label2:;
715
}
716
else
717
goto find;
718
}
719
#if DEBUG_CHECKPARENTS
720
for (i = initial_level-level; --i >= 0; ) traceMsg(comp, " ");
721
traceMsg(comp, "checkParents %d:%d failed due to pid %d\n", p->_id, t->_id, pn->_id);
722
#endif
723
ret = false;
724
goto final;
725
726
find:;
727
}
728
}
729
ret = true;
730
#if DEBUG_CHECKPARENTS
731
for (i = initial_level-level; --i >= 0; ) traceMsg(comp, " ");
732
traceMsg(comp, "checkParents %d:%d succeed\n", p->_id, t->_id);
733
#endif
734
final:
735
if (level == initial_level) break; /* exit loop */
736
st = stackParents + level;
737
738
p = st->_p;
739
t = st->_t;
740
ple = st->_ple;
741
tle = st->_tle;
742
tcle = st->_tcle;
743
pn = st->_pn;
744
tn = st->_tn;
745
pIndex = st->_pIndex;
746
level = st->_level;
747
parm1 = 0;
748
749
if (ret) goto find;
750
else
751
{
752
if (st->_label == 0) goto label0;
753
else if (st->_label == 1) goto label1;
754
else goto label2;
755
}
756
}
757
return ret;
758
}
759
760
761
762
//*****************************************************************************************
763
// Swap successors and reverse the opcode of the branch
764
//*****************************************************************************************
765
void
766
TR_CISCNode::reverseBranchOpCodes()
767
{
768
TR_ASSERT(_opcode < TR::NumIlOps && _ilOpCode.isIf(), "error: not isIf");
769
TR_ASSERT(_numSuccs == 2, "error: _numSuccs != 2");
770
TR_CISCNode *swap = _succs[0];
771
_succs[0] = _succs[1];
772
_succs[1] = swap;
773
setOpcode(_ilOpCode.getOpCodeForReverseBranch());
774
}
775
776
777
//*****************************************************************************************
778
// Return whether this node and all nodes in the "_chains" are in the same loop body.
779
//*****************************************************************************************
780
bool
781
TR_CISCNode::checkDagIdInChains()
782
{
783
uint16_t thisDagID = _dagId;
784
ListIterator<TR_CISCNode> ci(&_chains);
785
TR_CISCNode *c;
786
for (c = ci.getFirst(); c; c = ci.getNext())
787
if (c->_dagId != thisDagID) return false;
788
return true;
789
}
790
791
//*****************************************************************************************
792
// Return TreeTop for getBranchDestination or fallthrough
793
//*****************************************************************************************
794
TR::TreeTop *
795
TR_CISCNode::getDestination(bool isFallThrough)
796
{
797
TR::TreeTop *ret;
798
TR::Node *nRepTrNode = getHeadOfTrNode();
799
if (getOpcode() != nRepTrNode->getOpCodeValue())
800
{
801
TR_ASSERT(getOpcode() == nRepTrNode->getOpCode().getOpCodeForReverseBranch(), "error");
802
isFallThrough = !isFallThrough;
803
}
804
805
if (isFallThrough)
806
{
807
for (ret = getHeadOfTreeTop()->getNextTreeTop();
808
ret->getNode()->getOpCodeValue() != TR::BBStart;
809
ret = ret->getNextTreeTop());
810
}
811
else
812
{
813
ret = nRepTrNode->getBranchDestination();
814
}
815
return ret;
816
}
817
818
819
820
821
/************************************/
822
/*********** ************/
823
/*********** TR_CISCHash ************/
824
/*********** ************/
825
/************************************/
826
TR_CISCNode *TR_CISCHash::find(uint64_t key)
827
{
828
TR_ASSERT(_numBuckets > 0, "error: _numBuckets == 0!");
829
uint32_t index = key % _numBuckets;
830
struct HashTableEntry *p;
831
for (p = _buckets[index]; p != 0; p = p->_next)
832
{
833
if (p->_key == key) return p->_node;
834
}
835
return 0;
836
}
837
838
bool
839
TR_CISCHash::add(uint64_t key, TR_CISCNode *node, bool checkExist)
840
{
841
TR_ASSERT(_numBuckets > 0, "error: _numBuckets == 0!");
842
uint32_t index = key % _numBuckets;
843
if (checkExist)
844
{
845
struct HashTableEntry *p;
846
for (p = _buckets[index]; p != 0; p = p->_next)
847
{
848
if (p->_key == key) return false;
849
}
850
}
851
struct HashTableEntry *newEntry = (struct HashTableEntry *)trMemory()->allocateMemory(sizeof(*newEntry), _allocationKind);
852
newEntry->_next = _buckets[index];
853
newEntry->_key = key;
854
newEntry->_node = node;
855
_buckets[index] = newEntry;
856
return true;
857
}
858
859
860
861
862
863
864
865
/********************************************/
866
/*********** ************/
867
/*********** TR_CISCGraphAspects ************/
868
/*********** ************/
869
/********************************************/
870
//*****************************************************************************************
871
// We used this property to exclude those idioms which are unlikely to be matched against
872
// the target loop to limit the extra compilation time.
873
// We can consider the nodes of an idiom, and if a graph is missing any of those nodes,
874
// we already know no topological embedding exists.
875
// For each idiom, we prepared a bit-vector that represents the required IL nodes.
876
// We compare the bit-vector of every idiom graph with that of the target graph to exclude
877
// the unlikely matched idioms.
878
//*****************************************************************************************
879
880
void
881
TR_CISCGraphAspects::setLoadAspects(uint32_t val, bool orExistAccess)
882
{
883
TR_ASSERT(val <= loadMasks, "error!");
884
if (orExistAccess && (val & 0xFF)) val |= existAccess;
885
set(val);
886
}
887
888
void
889
TR_CISCGraphAspects::setStoreAspects(uint32_t val, bool orExistAccess)
890
{
891
TR_ASSERT(val <= loadMasks, "error!");
892
if (orExistAccess && (val & 0xFF)) val |= existAccess;
893
set(val << storeShiftCount);
894
}
895
896
void
897
TR_CISCGraphAspects::modifyAspects()
898
{
899
uint32_t load = getLoadAspects();
900
uint32_t store = getStoreAspects();
901
uint32_t temp = load & store & 0xff;
902
if (temp) set(sameTypeLoadStore);
903
}
904
905
void
906
TR_CISCGraphAspects::print(TR::Compilation *comp, bool noaspects)
907
{
908
traceMsg(comp, "CISCGraph%sAspects is %08x\n",noaspects ? "No" : "", getValue());
909
}
910
911
void
912
TR_CISCGraphAspectsWithCounts::print(TR::Compilation *comp, bool noaspects)
913
{
914
traceMsg(comp, "CISCGraph%sAspects is %08x\n",noaspects ? "No" : "", getValue());
915
traceMsg(comp, "min counts: if=%d, indirectLoad=%d, indirectStore=%d\n",
916
_ifCount, _indirectLoadCount, _indirectStoreCount);
917
}
918
919
void
920
TR_CISCGraphAspectsWithCounts::setAspectsByOpcode(int opc)
921
{
922
switch(opc)
923
{
924
case TR_inbload:
925
setLoadAspects(existAccess | nbValue);
926
incIndirectLoadCount();
927
break;
928
case TR_inbstore:
929
setStoreAspects(existAccess | nbValue);
930
incIndirectStoreCount();
931
break;
932
case TR_ibcload:
933
case TR_indload:
934
setLoadAspects(existAccess);
935
incIndirectLoadCount();
936
break;
937
case TR_ibcstore:
938
case TR_indstore:
939
setStoreAspects(existAccess);
940
incIndirectStoreCount();
941
break;
942
case TR_ifcmpall:
943
incIfCount();
944
break;
945
case TR::ishr:
946
case TR::iushr:
947
case TR::lshr:
948
case TR::lushr:
949
set(shr);
950
break;
951
case TR::lmulh:
952
case TR::imulh:
953
case TR::imul:
954
case TR::lmul:
955
set(mul);
956
break;
957
case TR::irem:
958
case TR::lrem:
959
set(reminder);
960
break;
961
case TR::idiv:
962
case TR::ldiv:
963
set(division);
964
break;
965
case TR::BNDCHK:
966
set(bndchk);
967
break;
968
case TR::isub:
969
set(isub);
970
break;
971
case TR::iadd:
972
set(iadd);
973
break;
974
default:
975
if (opc < TR::NumIlOps)
976
{
977
TR::ILOpCode opCode;
978
opCode.setOpCodeValue((enum TR::ILOpCodes)opc);
979
if (opCode.isLoadIndirect())
980
{
981
setLoadAspects(existAccess | opCode.getSize());
982
incIndirectLoadCount();
983
}
984
else if (opCode.isStoreIndirect())
985
{
986
setStoreAspects(existAccess | opCode.getSize());
987
incIndirectStoreCount();
988
}
989
else if (opCode.isCall())
990
{
991
set(call);
992
}
993
else if (opCode.isIf() || opCode.isSwitch())
994
{
995
incIfCount();
996
}
997
else if (opCode.isAnd() || opCode.isOr() || opCode.isXor())
998
{
999
set(bitop1);
1000
}
1001
}
1002
break;
1003
}
1004
}
1005
1006
1007
/*************************************/
1008
/*********** ************/
1009
/*********** TR_CISCGraph ************/
1010
/*********** ************/
1011
/*************************************/
1012
void
1013
TR_CISCGraph::setEssentialNodes(TR_CISCGraph *tgt)
1014
{
1015
ListIterator<TR_CISCNode> ni(tgt->getNodes());
1016
TR_CISCNode *n;
1017
1018
for (n = ni.getFirst(); n; n = ni.getNext())
1019
{
1020
if (n->getIlOpCode().isStore() ||
1021
n->getIlOpCode().isCall())
1022
{
1023
n->setIsEssentialNode();
1024
}
1025
}
1026
}
1027
1028
static bool graphsInitialized = false;
1029
// Register all idioms at start up.
1030
//
1031
void
1032
TR_CISCGraph::makePreparedCISCGraphs(TR::Compilation *c)
1033
{
1034
if (graphsInitialized)
1035
return;
1036
else
1037
graphsInitialized = true;
1038
1039
#if defined(J9VM_OPT_JITSERVER)
1040
// Prepared CISC graphs are static, i.e. initialized only once.
1041
// Need to use the global allocator here.
1042
if (c->isOutOfProcessCompilation())
1043
{
1044
JITServer::GlobalAllocationRegion globalAllocationRegion(c->fej9()->_compInfoPT);
1045
initializeGraphs(c);
1046
}
1047
else
1048
#endif
1049
{
1050
initializeGraphs(c);
1051
}
1052
}
1053
1054
void
1055
TR_CISCGraph::initializeGraphs(TR::Compilation *c)
1056
{
1057
int32_t num = 0;
1058
bool genTRxx = c->cg()->getSupportsArrayTranslateTRxx();
1059
bool genSIMD = c->cg()->getSupportsVectorRegisters() && !c->getOption(TR_DisableSIMDArrayTranslate);
1060
bool genTRTO255 = c->cg()->getSupportsArrayTranslateTRTO255();
1061
bool genTRTO = c->cg()->getSupportsArrayTranslateTRTO();
1062
bool genTROTNoBreak = c->cg()->getSupportsArrayTranslateTROTNoBreak();
1063
bool genTROT = c->cg()->getSupportsArrayTranslateTROT();
1064
bool genTRT = c->cg()->getSupportsArrayTranslateAndTest();
1065
bool genMemcpy = c->cg()->getSupportsReferenceArrayCopy() || c->cg()->getSupportsPrimitiveArrayCopy();
1066
bool genMemset = c->cg()->getSupportsArraySet();
1067
bool genMemcmp = c->cg()->getSupportsArrayCmp();
1068
bool genIDiv2Mul = c->cg()->getSupportsLoweringConstIDiv();
1069
bool genLDiv2Mul = c->cg()->getSupportsLoweringConstLDiv();
1070
// FIXME: We need getSupportsCountDecimalDigit() like interface
1071
// this idiom is only enabled on 390 for the moment
1072
1073
#if defined(J9VM_OPT_JITSERVER)
1074
// Enabling genDecimal generates the TROT instruction on Z which is currently not
1075
// relocatable for remote compiles. Thus we disable this option for remote compiles for now.
1076
bool genDecimal = c->target().cpu.isZ() && !c->isOutOfProcessCompilation();
1077
#else
1078
bool genDecimal = c->target().cpu.isZ();
1079
#endif /* defined(J9VM_OPT_JITSERVER) */
1080
bool genBitOpMem = c->target().cpu.isZ();
1081
bool is64Bit = c->target().is64Bit();
1082
bool isBig = c->target().cpu.isBigEndian();
1083
int32_t ctrl = (is64Bit ? CISCUtilCtl_64Bit : 0) | (isBig ? CISCUtilCtl_BigEndian : 0);
1084
1085
// THESE ARE NOT GUARANTEED OR TESTED TO WORK ON WCODE.
1086
// Problems encountered include ahSize=0 on WCode leading to hash collision when adding node for make*CISCGraphs.
1087
if (genMemcmp)
1088
{
1089
preparedCISCGraphs[num] = makeMemCmpGraph(c, ctrl);
1090
setEssentialNodes(preparedCISCGraphs[num++]);
1091
preparedCISCGraphs[num] = makeMemCmpIndexOfGraph(c, ctrl);
1092
setEssentialNodes(preparedCISCGraphs[num++]);
1093
preparedCISCGraphs[num] = makeMemCmpSpecialGraph(c, ctrl);
1094
setEssentialNodes(preparedCISCGraphs[num++]);
1095
}
1096
if (genTRT)
1097
{
1098
preparedCISCGraphs[num] = makeTRTGraph(c, ctrl);
1099
setEssentialNodes(preparedCISCGraphs[num++]);
1100
preparedCISCGraphs[num] = makeTRTGraph2(c, ctrl);
1101
setEssentialNodes(preparedCISCGraphs[num++]);
1102
preparedCISCGraphs[num] = makeTRT4NestedArrayGraph(c, ctrl);
1103
setEssentialNodes(preparedCISCGraphs[num++]);
1104
//preparedCISCGraphs[num] = makeTRT4NestedArrayIfGraph(c, ctrl); setEssentialNodes(preparedCISCGraphs[num]); num++;
1105
}
1106
if (genMemset)
1107
{
1108
preparedCISCGraphs[num] = makeMemSetGraph(c, ctrl);
1109
setEssentialNodes(preparedCISCGraphs[num++]);
1110
#if STRESS_TEST
1111
preparedCISCGraphs[num] = makeMixedMemSetGraph(c, ctrl);
1112
setEssentialNodes(preparedCISCGraphs[num++]);
1113
#endif
1114
preparedCISCGraphs[num] = makePtrArraySetGraph(c, ctrl);
1115
setEssentialNodes(preparedCISCGraphs[num++]);
1116
// Causes perf degradations on Xalan strlen16 opportunities. SRSTU is only better on long strings.
1117
//preparedCISCGraphs[num] = makeStrlen16Graph(c, ctrl);
1118
//setEssentialNodes(preparedCISCGraphs[num++]);
1119
}
1120
if (genMemcpy)
1121
{
1122
preparedCISCGraphs[num] = makeMemCpyGraph(c, ctrl);
1123
setEssentialNodes(preparedCISCGraphs[num++]);
1124
preparedCISCGraphs[num] = makeMemCpyDecGraph(c, ctrl);
1125
setEssentialNodes(preparedCISCGraphs[num++]);
1126
preparedCISCGraphs[num] = makeMemCpySpecialGraph(c, ctrl);
1127
setEssentialNodes(preparedCISCGraphs[num++]);
1128
preparedCISCGraphs[num] = makeMemCpyByteToCharGraph(c, ctrl);
1129
setEssentialNodes(preparedCISCGraphs[num++]);
1130
preparedCISCGraphs[num] = makeMemCpyByteToCharBndchkGraph(c, ctrl);
1131
setEssentialNodes(preparedCISCGraphs[num++]);
1132
preparedCISCGraphs[num] = makeMemCpyCharToByteGraph(c, ctrl);
1133
setEssentialNodes(preparedCISCGraphs[num++]);
1134
preparedCISCGraphs[num] = makeMEMCPYChar2ByteGraph2(c, ctrl);
1135
setEssentialNodes(preparedCISCGraphs[num++]);
1136
preparedCISCGraphs[num] = makeMEMCPYChar2ByteMixedGraph(c, ctrl);
1137
setEssentialNodes(preparedCISCGraphs[num++]);
1138
// disabled for now
1139
#if STRESS_TEST
1140
preparedCISCGraphs[num] = makeMEMCPYByte2IntGraph(c, ctrl); setEssentialNodes(preparedCISCGraphs[num]); num++;
1141
preparedCISCGraphs[num] = makeMEMCPYInt2ByteGraph(c, ctrl); setEssentialNodes(preparedCISCGraphs[num]); num++;
1142
#endif
1143
}
1144
1145
if (genTRTO255 || genTRTO || genSIMD || genTRxx)
1146
{
1147
preparedCISCGraphs[num] = makeCopyingTRTxGraph(c, ctrl, 0);
1148
setEssentialNodes(preparedCISCGraphs[num++]);
1149
preparedCISCGraphs[num] = makeCopyingTRTxGraph(c, ctrl, 1);
1150
setEssentialNodes(preparedCISCGraphs[num++]);
1151
preparedCISCGraphs[num] = makeCopyingTRTxGraph(c, ctrl, 2);
1152
setEssentialNodes(preparedCISCGraphs[num++]);
1153
preparedCISCGraphs[num] = makeCopyingTRTxThreeIfsGraph(c, ctrl);
1154
setEssentialNodes(preparedCISCGraphs[num++]);
1155
preparedCISCGraphs[num] = makeCopyingTRTOInduction1Graph(c, ctrl, 0);
1156
setEssentialNodes(preparedCISCGraphs[num++]);
1157
preparedCISCGraphs[num] = makeCopyingTRTOInduction1Graph(c, ctrl, 1);
1158
setEssentialNodes(preparedCISCGraphs[num++]);
1159
preparedCISCGraphs[num] = makeCopyingTRTOInduction1Graph(c, ctrl, 2);
1160
setEssentialNodes(preparedCISCGraphs[num++]);
1161
1162
}
1163
1164
if (genTROTNoBreak || genTROT || genSIMD || genTRxx)
1165
{
1166
preparedCISCGraphs[num] = makeCopyingTROxGraph(c, ctrl, 0);
1167
setEssentialNodes(preparedCISCGraphs[num++]);
1168
preparedCISCGraphs[num] = makeCopyingTROxGraph(c, ctrl, 1);
1169
setEssentialNodes(preparedCISCGraphs[num++]);
1170
}
1171
1172
if (genTRxx)
1173
{
1174
if (c->getOption(TR_EnableCopyingTROTInduction1Idioms))
1175
{
1176
preparedCISCGraphs[num] = makeCopyingTROTInduction1Graph(c, ctrl, 0);
1177
setEssentialNodes(preparedCISCGraphs[num++]);
1178
preparedCISCGraphs[num] = makeCopyingTROTInduction1Graph(c, ctrl, 1);
1179
setEssentialNodes(preparedCISCGraphs[num++]);
1180
}
1181
preparedCISCGraphs[num] = makeCopyingTROOSpecialGraph(c, ctrl);
1182
setEssentialNodes(preparedCISCGraphs[num++]);
1183
#if STRESS_TEST
1184
preparedCISCGraphs[num] = makeCopyingTRTTSpecialGraph(c, ctrl);
1185
setEssentialNodes(preparedCISCGraphs[num++]);
1186
#endif
1187
if (is64Bit)
1188
{
1189
preparedCISCGraphs[num] = makeCopyingTRTOGraphSpecial(c, ctrl);
1190
setEssentialNodes(preparedCISCGraphs[num++]);
1191
}
1192
preparedCISCGraphs[num] = makeTROTArrayGraph(c, ctrl);
1193
setEssentialNodes(preparedCISCGraphs[num++]);
1194
preparedCISCGraphs[num] = makeTRTOArrayGraph(c, ctrl);
1195
setEssentialNodes(preparedCISCGraphs[num++]);
1196
preparedCISCGraphs[num] = makeTRTOArrayGraphSpecial(c, ctrl);
1197
setEssentialNodes(preparedCISCGraphs[num++]);
1198
}
1199
if (genDecimal)
1200
{
1201
// Needs to be modified
1202
preparedCISCGraphs[num] = makeCountDecimalDigitIntGraph(c, ctrl, genIDiv2Mul);
1203
setEssentialNodes(preparedCISCGraphs[num++]);
1204
preparedCISCGraphs[num] = makeIntToStringGraph(c, ctrl, genIDiv2Mul);
1205
setEssentialNodes(preparedCISCGraphs[num++]);
1206
preparedCISCGraphs[num] = makeCountDecimalDigitLongGraph(c, ctrl, genLDiv2Mul);
1207
setEssentialNodes(preparedCISCGraphs[num++]);
1208
#if STRESS_TEST
1209
preparedCISCGraphs[num] = makeLongToStringGraph(c, ctrl); setEssentialNodes(preparedCISCGraphs[num]); num++;
1210
#endif
1211
}
1212
if (genBitOpMem)
1213
{
1214
preparedCISCGraphs[num] = makeBitOpMemGraph(c, ctrl);
1215
setEssentialNodes(preparedCISCGraphs[num++]);
1216
}
1217
1218
TR_ASSERT(num <= MAX_PREPARED_GRAPH, "incorrect number of graphs!");
1219
numPreparedCISCGraphs = num;
1220
1221
// set minimumHotnessPrepared;
1222
minimumHotnessPrepared = scorching;
1223
for (;--num >= 0;)
1224
{
1225
TR_Hotness hotness = preparedCISCGraphs[num]->getHotness();
1226
if (minimumHotnessPrepared > hotness)
1227
minimumHotnessPrepared = hotness;
1228
}
1229
}
1230
1231
void
1232
TR_CISCGraph::dump(TR::FILE *pOutFile, TR::Compilation * comp)
1233
{
1234
traceMsg(comp, "CISCGraph of %s\n",_titleOfCISC);
1235
_aspects.print(comp, false);
1236
_noaspects.print(comp, true);
1237
ListIterator<TR_CISCNode> ni(getNodes());
1238
TR_CISCNode *n;
1239
1240
#if 1
1241
traceMsg(comp, "!! Note !! Showing reverse order for convenience\n");
1242
TR_ScratchList<TR_CISCNode> reorder(comp->trMemory());
1243
for (n = ni.getFirst(); n; n = ni.getNext())
1244
{
1245
reorder.add(n);
1246
}
1247
ni.set(&reorder);
1248
#endif
1249
traceMsg(comp, " ptr id dagId(L=Loop) succ children (chains) (dest) (hintChildren) (flags) (TRNodeInfo)\n");
1250
for (n = ni.getFirst(); n; n = ni.getNext())
1251
{
1252
n->dump(pOutFile, comp);
1253
}
1254
1255
traceMsg(comp, "\nOrder by Data\n");
1256
ni.set(&_orderByData);
1257
for (n = ni.getFirst(); n; n = ni.getNext())
1258
{
1259
n->dump(pOutFile, comp);
1260
}
1261
}
1262
1263
1264
//*****************************************************************************************
1265
// It registers TR::Block, TR::TreeTop, and TR::Node into TR_CISCNode.
1266
// To find TR_CISCNode by TR_Node, it also adds TR_CISCNode into a hash table.
1267
//*****************************************************************************************
1268
void
1269
TR_CISCGraph::addTrNode(TR_CISCNode *n, TR::Block *block, TR::TreeTop *top, TR::Node *trNode)
1270
{
1271
n->addTrNode(block, top, trNode);
1272
bool ret = addTrNode2CISCNode(trNode, n);
1273
TR_ASSERT(ret, "addTrNode2CISCNode returns failure");
1274
}
1275
1276
1277
//*****************************************************************************************
1278
// To find TR_CISCNode by opcode, it adds TR_CISCNode into a hash table.
1279
//*****************************************************************************************
1280
void
1281
TR_CISCGraph::addOpc2CISCNode(TR_CISCNode *n)
1282
{
1283
if (_opc2CISCNode.getNumBuckets() > 0)
1284
{
1285
bool ret;
1286
bool registerNode = false;
1287
switch(n->getOpcode())
1288
{
1289
case TR::lconst:
1290
if (!n->isValidOtherInfo()) break;
1291
// else fall through
1292
case TR_variable:
1293
case TR::sconst:
1294
case TR::iconst:
1295
case TR::bconst:
1296
TR_ASSERT(n->isValidOtherInfo(), "error");
1297
// fall through
1298
case TR_booltable:
1299
case TR_entrynode:
1300
case TR_exitnode:
1301
case TR_ahconst:
1302
case TR_arrayindex:
1303
case TR_arraybase:
1304
registerNode = true;
1305
break;
1306
}
1307
if (registerNode)
1308
{
1309
ret = addOpc2CISCNode(n->getOpcode(), n->isValidOtherInfo(), n->getOtherInfo(), n);
1310
TR_ASSERT(ret, "addOpc2CISCNode returns failure");
1311
}
1312
}
1313
}
1314
1315
1316
//*****************************************************************************************
1317
// Add TR_CISCNode to the graph.
1318
// It also adds its aspects, TR::Block, TR::TreeTop, TR_Node, and hash table.
1319
//*****************************************************************************************
1320
void
1321
TR_CISCGraph::addNode(TR_CISCNode *n, TR::Block *block, TR::TreeTop *top, TR::Node *trNode)
1322
{
1323
TR_ASSERT((block == 0 && top == 0 && trNode == 0) ||
1324
(block != 0 && top != 0 && trNode != 0), "error"); // all 0 or all Non-0
1325
_nodes.add(n);
1326
if (isRecordingAspectsByOpcode()) _aspects.setAspectsByOpcode(n);
1327
if (trNode != 0)
1328
{
1329
TR_ASSERT(n->getTrNodeInfo()->isEmpty(), "n->getTrNodeInfo() exists???");
1330
addTrNode(n, block, top, trNode);
1331
}
1332
addOpc2CISCNode(n);
1333
}
1334
1335
1336
//*****************************************************************************************
1337
// Search a store for the node "target" until the node "to"
1338
//*****************************************************************************************
1339
TR_CISCNode *
1340
TR_CISCGraph::searchStore(TR_CISCNode *target, TR_CISCNode *to)
1341
{
1342
TR_CISCNode *v = target;
1343
if (v->isLoadVarDirect()) v = v->getChild(0);
1344
if (v->getOpcode() != TR_variable) return 0;
1345
TR_BitVector visited(getNumNodes(), trMemory());
1346
1347
TR_CISCNode *t = target;
1348
while (true)
1349
{
1350
if (t->isStoreDirect() &&
1351
t->getChild(1) == v) return t;
1352
1353
if (t->getNumSuccs() == 0) break;
1354
visited.set(t->getID());
1355
1356
t = t->getSucc(0);
1357
if (t == target || t == to || t == getExitNode() || visited.isSet(t->getID())) return 0;
1358
}
1359
1360
return 0;
1361
}
1362
1363
1364
/**************************************************/
1365
/*********** ************/
1366
/*********** TR_PCISCGraph ************/
1367
/*********** (PersistentAlloc version) ************/
1368
/*********** ************/
1369
/**************************************************/
1370
void
1371
TR_PCISCGraph::addNode(TR_CISCNode *n, TR::Block *block, TR::TreeTop *top, TR::Node *trNode)
1372
{
1373
TR_ASSERT(block == 0 && top == 0 && trNode == 0, "error"); // all NULL
1374
TR_PersistentList<TR_CISCNode> *p = (TR_PersistentList<TR_CISCNode> *)&_nodes;
1375
p->add(n);
1376
addOpc2CISCNode(n);
1377
}
1378
1379
1380
void
1381
TR_CISCGraph::createDagId2NodesTable()
1382
{
1383
TR_ASSERT(_numDagIds > 0, "TR_CISCGraph::createDagId2NodesTable(), _numDagIds <= 0???");
1384
if (!isNoFragmentDagId()) defragDagId();
1385
uint32_t size = sizeof(*_dagId2Nodes) * _numDagIds;
1386
_dagId2Nodes = (List<TR_CISCNode> *)trMemory()->allocateMemory(size, heapAlloc);
1387
memset(_dagId2Nodes, 0, size);
1388
for (int i = 0; i < _numDagIds; i++) _dagId2Nodes[i].setRegion(trMemory()->heapMemoryRegion());
1389
ListIterator<TR_CISCNode> nodesLi(&_nodes);
1390
TR_CISCNode *n;
1391
for (n = nodesLi.getFirst(); n; n = nodesLi.getNext())
1392
{
1393
int32_t dagId = n->getDagID();
1394
TR_ASSERT(dagId < _numDagIds, "TR_CISCGraph::createDagId2NodesTable(), dagId is out of range");
1395
_dagId2Nodes[dagId].add(n);
1396
}
1397
}
1398
1399
// remove all the BBStart/BBEnd nodes
1400
void
1401
TR_CISCGraph::createOrderByData()
1402
{
1403
_orderByData.init();
1404
ListIterator<TR_CISCNode> nodesLi(&_nodes);
1405
TR_CISCNode *n;
1406
for (n = nodesLi.getFirst(); n; n = nodesLi.getNext())
1407
{
1408
if (n->getNumChildren() > 0 ||
1409
!n->getParents()->isEmpty())
1410
{
1411
_orderByData.add(n);
1412
}
1413
else
1414
{
1415
switch(n->getOpcode())
1416
{
1417
case TR_entrynode:
1418
case TR_exitnode:
1419
_orderByData.add(n);
1420
break;
1421
}
1422
}
1423
}
1424
}
1425
1426
1427
void
1428
TR_PCISCGraph::createDagId2NodesTable()
1429
{
1430
TR_ASSERT(_numDagIds > 0, "TR_PCISCGraph::createDagId2NodesTable(), _numDagIds <= 0???");
1431
if (!isNoFragmentDagId()) defragDagId();
1432
uint32_t size = sizeof(*_dagId2Nodes) * _numDagIds;
1433
_dagId2Nodes = (TR_PersistentList<TR_CISCNode> *)jitPersistentAlloc(size);
1434
memset(_dagId2Nodes, 0, size);
1435
ListIterator<TR_CISCNode> nodesLi(&_nodes);
1436
TR_CISCNode *n;
1437
TR_PersistentList<TR_CISCNode> *list;
1438
for (n = nodesLi.getFirst(); n; n = nodesLi.getNext())
1439
{
1440
int32_t dagId = n->getDagID();
1441
TR_ASSERT(dagId < _numDagIds, "TR_PCISCGraph::createDagId2NodesTable(), dagId is out of range");
1442
list = (TR_PersistentList<TR_CISCNode> *)_dagId2Nodes + dagId;
1443
list->add(n);
1444
}
1445
}
1446
1447
1448
void
1449
TR_PCISCGraph::createOrderByData()
1450
{
1451
TR_PersistentList<TR_CISCNode> *list = (TR_PersistentList<TR_CISCNode> *)&_orderByData;
1452
ListIterator<TR_CISCNode> nodesLi(&_nodes);
1453
TR_CISCNode *n;
1454
for (n = nodesLi.getFirst(); n; n = nodesLi.getNext())
1455
{
1456
if (n->getNumChildren() > 0 ||
1457
!n->getParents()->isEmpty())
1458
{
1459
list->add(n);
1460
}
1461
else
1462
{
1463
switch(n->getOpcode())
1464
{
1465
case TR_entrynode:
1466
case TR_exitnode:
1467
list->add(n);
1468
break;
1469
}
1470
}
1471
}
1472
}
1473
1474
1475
//*****************************************************************************************
1476
// Import UD/DU information of TR::Node to TR_CISCNode._chain
1477
//*****************************************************************************************
1478
void
1479
TR_CISCGraph::importUDchains(TR::Compilation *comp, TR_UseDefInfo *useDefInfo, bool reinitialize)
1480
{
1481
ListIterator<TR_CISCNode> nodesLi(&_nodes);
1482
TR_CISCNode *n;
1483
const int32_t firstUseIndex = useDefInfo->getFirstUseIndex();
1484
1485
if (isSetUDDUchains()) // already done before
1486
{
1487
if (!reinitialize) return;
1488
for (n = nodesLi.getFirst(); n; n = nodesLi.getNext())
1489
{
1490
n->initChains();
1491
}
1492
}
1493
1494
setIsSetUDDUchains();
1495
for (n = nodesLi.getFirst(); n; n = nodesLi.getNext())
1496
{
1497
if (n->isLoadVarDirect())
1498
{
1499
if (n->getHeadOfTrNodeInfo()->_node->getSymbol()->isAutoOrParm())
1500
{
1501
/* set UD-chains to TR_CISCNode._chain */
1502
TR_ASSERT(n->getTrNodeInfo()->isSingleton(), "direct load must correspond to a single TR node");
1503
TR::Node *trNode = n->getTrNodeInfo()->getListHead()->getData()->_node;
1504
int32_t useDefIndex = trNode->getUseDefIndex();
1505
TR_ASSERT(useDefInfo->isUseIndex(useDefIndex), "error!");
1506
TR_UseDefInfo::BitVector info(comp->allocator());
1507
useDefInfo->getUseDef(info, useDefIndex);
1508
TR_ASSERT(!info.IsZero(), "no defs!");
1509
TR_UseDefInfo::BitVector::Cursor cursor(info);
1510
for (cursor.SetToFirstOne(); cursor.Valid(); cursor.SetToNextOne())
1511
{
1512
int32_t defIndex = cursor;
1513
TR_ASSERT(defIndex < useDefInfo->getNumDefsOnEntry() || useDefInfo->isDefIndex(defIndex), "error!");
1514
TR_CISCNode *defNode = getCISCNode(useDefInfo->getNode(defIndex));
1515
if (!defNode)
1516
{
1517
n->addChain(_entryNode, true);
1518
}
1519
else
1520
{
1521
n->addChain(defNode);
1522
}
1523
}
1524
}
1525
}
1526
else if (n->isStoreDirect())
1527
{
1528
/* set DU-chains to TR_CISCNode._chain */
1529
TR_ASSERT(n->getTrNodeInfo()->isSingleton(), "direct store must correspond to a single TR node");
1530
TR::Node *trNode = n->getTrNodeInfo()->getListHead()->getData()->_node;
1531
int32_t useDefIndex = trNode->getUseDefIndex();
1532
if (useDefIndex == 0) continue;
1533
TR_ASSERT(useDefInfo->isDefIndex(useDefIndex), "error!");
1534
TR_UseDefInfo::BitVector info(comp->allocator());
1535
useDefInfo->getUsesFromDef(info, useDefIndex);
1536
if (!info.IsZero())
1537
{
1538
TR_UseDefInfo::BitVector::Cursor cursor(info);
1539
for (cursor.SetToFirstOne(); cursor.Valid(); cursor.SetToNextOne())
1540
{
1541
int32_t useIndex = (int32_t) cursor + firstUseIndex;
1542
TR_ASSERT(useDefInfo->isUseIndex(useIndex), "error!");
1543
TR_CISCNode *useNode = getCISCNode(useDefInfo->getNode(useIndex));
1544
if (!useNode)
1545
{
1546
n->addChain(_exitNode, true);
1547
}
1548
else
1549
{
1550
n->addChain(useNode);
1551
}
1552
}
1553
}
1554
else
1555
{
1556
n->setIsNegligible();
1557
n->getChild(0)->deadAllChildren();
1558
}
1559
}
1560
else
1561
{
1562
TR_CISCNode *c, *p, *v;
1563
ListIterator<TR_CISCNode> ci(n->getHintChildren());
1564
for (c = ci.getFirst(); c; c = ci.getNext())
1565
{
1566
ListIterator<TR_CISCNode> pi(c->getParents());
1567
for (p = pi.getFirst(); p; p = pi.getNext())
1568
{
1569
if (p->isStoreDirect())
1570
{
1571
v = p->getChild(1); // variable
1572
bool find = false;
1573
for (int i = n->getNumChildren(); --i >= 0; )
1574
if (n->getChild(i) == v)
1575
{
1576
find = true;
1577
break;
1578
}
1579
if (find)
1580
{
1581
// set UD/DU chains
1582
p->addChain(n);
1583
n->addChain(p);
1584
}
1585
}
1586
}
1587
}
1588
}
1589
}
1590
}
1591
1592
1593
1594
//*****************************************************************************************
1595
// Defragment dagIds and set _noFragmentDagId
1596
//*****************************************************************************************
1597
int32_t
1598
TR_CISCGraph::defragDagId()
1599
{
1600
ListIterator<TR_CISCNode> nodesLi(&_nodes);
1601
TR_CISCNode *n;
1602
int32_t curId, newId;
1603
1604
n = nodesLi.getFirst();
1605
newId = 0;
1606
curId = n->getDagID();
1607
for (; n; n = nodesLi.getNext())
1608
{
1609
int32_t nDagId = n->getDagID();
1610
if (curId != nDagId)
1611
{
1612
TR_ASSERT(curId < nDagId, "Error!");
1613
curId = nDagId;
1614
newId++;
1615
}
1616
n->setDagID(newId);
1617
}
1618
newId++;
1619
_numDagIds = newId;
1620
setNoFragmentDagId();
1621
return newId;
1622
}
1623
1624
1625
1626
//*****************************************************************************************
1627
// Set the flag _isOutsideOfLoop to all of the nodes in outside of the loop body
1628
//*****************************************************************************************
1629
void
1630
TR_CISCGraph::setOutsideOfLoopFlag(uint16_t loopBodyDagId)
1631
{
1632
ListIterator<TR_CISCNode> li(&_nodes);
1633
TR_CISCNode *n;
1634
for (n = li.getFirst(); n; n = li.getNext())
1635
{
1636
if (n->getDagID() != loopBodyDagId) n->setOutsideOfLoop();
1637
}
1638
}
1639
1640
1641
1642
/***********************************************/
1643
/*********** ************/
1644
/*********** TR_CFGReversePostOrder ************/
1645
/*********** ************/
1646
/***********************************************/
1647
1648
// do a reverse post-order walk of the CFG
1649
// The result is stored in _revPost.
1650
//
1651
1652
List<TR::CFGNode> *
1653
TR_CFGReversePostOrder::compute(TR::CFG *cfg)
1654
{
1655
createReversePostOrder(cfg, cfg->getEnd());
1656
return &_revPost;
1657
}
1658
1659
void
1660
TR_CFGReversePostOrder::createReversePostOrder(TR::CFG *cfg, TR::CFGNode *n)
1661
{
1662
TR_StackForRevPost *stack;
1663
TR_BitVector *visited = new (cfg->comp()->trStackMemory()) TR_BitVector(cfg->getNextNodeNumber(), cfg->comp()->trMemory(), stackAlloc);
1664
TR_LinkHead<TR_StackForRevPost> stackRevPost;
1665
1666
visited->set(n->getNumber());
1667
auto edge = n->getPredecessors().begin();
1668
while(true)
1669
{
1670
bool alreadyVisited = true;
1671
while(edge != n->getPredecessors().end())
1672
{
1673
TR::CFGNode *predBlock = (*edge)->getFrom();
1674
if (!visited->isSet(predBlock->getNumber()))
1675
{
1676
// push the predBlock onto the stack
1677
//
1678
stack = new (cfg->comp()->trStackMemory()) TR_StackForRevPost();
1679
stack->n = n;
1680
stack->le = ++edge;
1681
stackRevPost.add(stack);
1682
1683
n = predBlock;
1684
visited->set(n->getNumber());
1685
edge = n->getPredecessors().begin();
1686
alreadyVisited = false;
1687
break;
1688
}
1689
++edge;
1690
}
1691
1692
if (alreadyVisited)
1693
{
1694
// add current block to the final list
1695
//
1696
_revPost.append(n);
1697
1698
// pick next block to be processed
1699
//
1700
if (!(stack = stackRevPost.pop()))
1701
break;
1702
n = stack->n;
1703
edge = stack->le;
1704
}
1705
}
1706
}
1707
1708
void
1709
TR_CFGReversePostOrder::dump(TR::Compilation * comp)
1710
{
1711
ListIterator<TR::CFGNode> ni(&_revPost);
1712
TR::CFGNode *n;
1713
traceMsg(comp, "Generated Reverse post order of CFG: ");
1714
for (n = ni.getFirst(); n; n = ni.getNext())
1715
traceMsg(comp, "%d->", n->getNumber());
1716
traceMsg(comp, "\n");
1717
}
1718
1719
#if 0
1720
//*****************************************************************************************
1721
// These functions are recursive versions, and they are disabled for now.
1722
//*****************************************************************************************
1723
void
1724
TR_CFGReversePostOrder::initReversePostOrder()
1725
{
1726
_visited.init(_cfg->getNextNodeNumber());
1727
_revPost.init();
1728
}
1729
1730
1731
void
1732
TR_CFGReversePostOrder::initReversePostOrder(TR::CFG *cfg)
1733
{
1734
_cfg = cfg;
1735
initReversePostOrder();
1736
}
1737
1738
void
1739
TR_CFGReversePostOrder::createReversePostOrderRec(TR::CFGNode *n)
1740
{
1741
_visited.set(n->getNumber());
1742
for (auto le = n->getPredecessors().begin(); le != n->getPredecessors().end(); ++le)
1743
{
1744
TR::CFGNode *from = (*le)->getFrom();
1745
if (!_visited.isSet(from->getNumber()))
1746
{
1747
createReversePostOrderRec(from);
1748
}
1749
}
1750
_revPost.append(n);
1751
}
1752
#endif
1753
1754
1755
1756
/*********** FOR OPTIMIZATION ************/
1757
1758
/******************************************/
1759
/*********** ************/
1760
/*********** TR_CISCNodeRegion ************/
1761
/*********** ************/
1762
/******************************************/
1763
TR_CISCNodeRegion *
1764
TR_CISCNodeRegion::clone()
1765
{
1766
TR_CISCNodeRegion *c = new (getRegion()) TR_CISCNodeRegion(_bvnum, getRegion());
1767
ListElement<TR_CISCNode> *le;
1768
c->_flags = _flags;
1769
for (le = _pHead; le; le = le->getNextElement())
1770
c->append(le->getData());
1771
return c;
1772
}
1773
1774
1775
/******************************************/
1776
/*********** ************/
1777
/*********** TR_NodeDuplicator ************/
1778
/*********** ************/
1779
/******************************************/
1780
TR::Node *
1781
TR_NodeDuplicator::restructureTree(TR::Node *oldTree, TR::Node *newTree)
1782
{
1783
TR_ASSERT(oldTree->getNumChildren() == newTree->getNumChildren(), "error");
1784
for (int i = 0; i < oldTree->getNumChildren(); i++)
1785
{
1786
TR::Node *oldChild = oldTree->getChild(i);
1787
TR_Pair<TR::Node,TR::Node> *pair;
1788
ListElement<TR_Pair<TR::Node,TR::Node> > *le;
1789
1790
// Search for oldChild
1791
for (le = _list.getListHead(); le; le = le->getNextElement())
1792
{
1793
pair = le->getData();
1794
if (pair->getKey() == oldChild) break;
1795
}
1796
1797
if (le)
1798
{ // found
1799
newTree->setAndIncChild(i, pair->getValue());
1800
}
1801
else
1802
{
1803
TR::Node *newChild = newTree->getChild(i);
1804
pair = new (trHeapMemory()) TR_Pair<TR::Node,TR::Node>(oldChild, newChild);
1805
_list.add(pair);
1806
restructureTree(oldChild, newChild);
1807
}
1808
}
1809
return newTree;
1810
}
1811
1812
TR::Node *
1813
TR_NodeDuplicator::duplicateTree(TR::Node *org)
1814
{
1815
TR::Node *newNode = org->duplicateTree();
1816
return restructureTree(org, newNode);
1817
}
1818
1819
1820
/******************************************/
1821
/*********** ************/
1822
/*********** TR_UseTreeTopMap ************/
1823
/*********** ************/
1824
/******************************************/
1825
1826
TR_UseTreeTopMap::TR_UseTreeTopMap(TR::Compilation * comp, TR::Optimizer * optimizer)
1827
: _useToParentMap(comp->trMemory(), stackAlloc)
1828
{
1829
_buildAllMap = false;
1830
_compilation = comp;
1831
_optimizer = optimizer;
1832
}
1833
1834
int32_t
1835
TR_UseTreeTopMap::buildAllMap()
1836
{
1837
if (_buildAllMap) return 0;
1838
_info = _optimizer->getUseDefInfo();
1839
if (0==_info) return 0;
1840
TR::TreeTop* currentTree = comp()->getStartTree();
1841
_useToParentMap.init(_info->getTotalNodes());
1842
comp()->incVisitCount();
1843
1844
for (; currentTree; currentTree = currentTree->getNextTreeTop())
1845
{
1846
buildUseTreeTopMap(currentTree,currentTree->getNode());
1847
}
1848
1849
_buildAllMap = true;
1850
return 1;
1851
}
1852
1853
/**
1854
* Build a map of use indices to their parent TreeTops (usedef datastructure doesn't have this)
1855
*/
1856
typedef TR_Pair<TR::Node,TR::TreeTop> UseInfo;
1857
void TR_UseTreeTopMap::buildUseTreeTopMap(TR::TreeTop* treeTop,TR::Node *node)
1858
{
1859
vcount_t currCount = comp()->getVisitCount();
1860
if (currCount == node->getVisitCount()) return;
1861
node->setVisitCount(currCount);
1862
1863
for (int32_t childIndex=0;childIndex < node->getNumChildren();++childIndex)
1864
{
1865
TR::Node *childNode = node->getChild(childIndex);
1866
int32_t useIndex = childNode->getUseDefIndex();
1867
if (_info->isUseIndex(useIndex)) // hash it.
1868
{
1869
TR_HashId hashIndex;
1870
TR_ScratchList<UseInfo> *tlist;
1871
if (_useToParentMap.locate(useIndex,hashIndex))
1872
{
1873
tlist = (TR_ScratchList<UseInfo> *)_useToParentMap.getData(hashIndex);
1874
}
1875
else
1876
{
1877
tlist = new (comp()->trStackMemory()) TR_ScratchList<UseInfo>(comp()->trMemory());
1878
_useToParentMap.add(useIndex,hashIndex,tlist);
1879
}
1880
1881
UseInfo * useInfo = new (comp()->trStackMemory()) UseInfo(childNode,treeTop);
1882
tlist->add(useInfo);
1883
}
1884
buildUseTreeTopMap(treeTop,childNode);
1885
}
1886
}
1887
1888
// findParentTreeTop returns the parent TreeTop that contains the given useNode
1889
TR::TreeTop * TR_UseTreeTopMap::findParentTreeTop(TR::Node *useNode)
1890
{
1891
TR_ASSERT(_buildAllMap, "Use Treetop map is not initialized.");
1892
TR_HashId hashId;
1893
int32_t useIndex = useNode->getUseDefIndex();
1894
bool found = _useToParentMap.locate(useIndex,hashId);
1895
TR_ASSERT(found,"No entry exists for %d %x\n",useIndex,hashId);
1896
TR_ScratchList<UseInfo> *list= (TR_ScratchList<UseInfo> *)_useToParentMap.getData(hashId);
1897
ListIterator<UseInfo> listCursor(list);
1898
for (listCursor.getFirst(); listCursor.getCurrent(); listCursor.getNext())
1899
{
1900
UseInfo *useInfoPtr = listCursor.getCurrent();
1901
if (useNode == useInfoPtr->getKey())
1902
return useInfoPtr->getValue();
1903
}
1904
// Parent treetop may not be found if an earlier transformation has removed the useNode.
1905
// In that case, we'll conservatively return NULL.
1906
return NULL;
1907
}
1908
1909
1910
/*******************************************/
1911
/*********** ************/
1912
/*********** TR_CISCTransformer ************/
1913
/*********** ************/
1914
/*******************************************/
1915
TR_CISCTransformer::TR_CISCTransformer(TR::OptimizationManager *manager)
1916
: TR_LoopTransformer(manager),
1917
_candidatesForShowing(manager->trMemory()),
1918
_candidateBBStartEnd(0),
1919
_backPatchList(manager->trMemory()),
1920
_beforeInsertions(manager->trMemory()),
1921
_afterInsertions(manager->trMemory()),
1922
_bblistPred(manager->trMemory()),
1923
_bblistBody(manager->trMemory()),
1924
_bblistSucc(manager->trMemory()),
1925
_candidatesForRegister(manager->trMemory()),
1926
_useTreeTopMap(manager->comp(), manager->optimizer()),
1927
_BitsKeepAliveList(manager->trMemory())
1928
{
1929
_afterInsertionsIdiom = (ListHeadAndTail<TR::Node> *) trMemory()->allocateHeapMemory(sizeof(ListHeadAndTail<TR::Node>)*2);
1930
memset(_afterInsertionsIdiom, 0, sizeof(ListHeadAndTail<TR::Node>)*2);
1931
for (int32_t i = 0; i < 2; ++i)
1932
_afterInsertionsIdiom[i].setRegion(trMemory()->heapMemoryRegion());
1933
1934
_lastCFGNode = 0;
1935
_backPatchList.init();
1936
_embeddedForData = _embeddedForCFG = 0;
1937
_flagsForTransformer.clear();
1938
_loopStructure = NULL;
1939
if (SHOW_CANDIDATES) setShowingCandidates();
1940
// construct the idiom graphs and register them
1941
//
1942
// TO_BE_ENABLED
1943
TR_CISCGraph::makePreparedCISCGraphs(manager->comp());
1944
}
1945
1946
//*****************************************************************************************
1947
// return whether the structure is a fast versioned loop
1948
//*****************************************************************************************
1949
bool
1950
TR_CISCTransformer::isInsideOfFastVersionedLoop(TR_RegionStructure *l)
1951
{
1952
TR_RegionStructure *parent = l;
1953
while(true)
1954
{
1955
if (////parent->getVersionedLoop() &&
1956
!parent->getEntryBlock()->isCold()
1957
#if 0 // We should optimize a fail path of value profiling
1958
&& !parent->getEntryBlock()->isRare()
1959
#endif
1960
)
1961
{
1962
return true; // It is inside of the fast versioned loop.
1963
}
1964
TR_Structure *p = parent->getParent();
1965
if (!p || !(parent = p->asRegion())) break;
1966
}
1967
return false;
1968
}
1969
1970
1971
// createLoopCandidates populates the given list with natural loop candidates
1972
// which contains structure information and is not cold. The return value of
1973
// this call dictates whether we found candidates or not.
1974
bool
1975
TR_CISCTransformer::createLoopCandidates(List<TR_RegionStructure> *loopCandidates)
1976
{
1977
bool enableTracing = trace();
1978
1979
1980
loopCandidates->init();
1981
TR_ScratchList<TR_Structure> whileLoops(trMemory());
1982
ListAppender<TR_Structure> whileLoopsInnerFirst(&whileLoops);
1983
TR_ScratchList<TR_Structure> doWhileLoops(trMemory());
1984
ListAppender<TR_Structure> doWhileLoopsInnerFirst(&doWhileLoops);
1985
TR_ScratchList<TR_Structure> *candidate;
1986
comp()->incVisitCount();
1987
1988
detectWhileLoops(whileLoopsInnerFirst, whileLoops, doWhileLoopsInnerFirst, doWhileLoops, _cfg->getStructure(), true);
1989
// join both lists so all loop
1990
// candidates are analyzed
1991
ListElement<TR_Structure> *last = whileLoops.getLastElement();
1992
if (last)
1993
{
1994
last->setNextElement(doWhileLoops.getListHead());
1995
candidate = &whileLoops;
1996
}
1997
else
1998
candidate = &doWhileLoops;
1999
2000
int32_t loopCount = 0;
2001
if (!candidate->isEmpty())
2002
{
2003
if (enableTracing)
2004
traceMsg(comp(), "createLoopCandidates: Evaluating list of loop candidates.\n");
2005
2006
ListIterator<TR_Structure> whileLoopsIt(candidate);
2007
TR_Structure *nextWhileLoop;
2008
2009
for (nextWhileLoop = whileLoopsIt.getFirst(); nextWhileLoop != 0; nextWhileLoop = whileLoopsIt.getNext())
2010
{
2011
TR_RegionStructure *naturalLoop = nextWhileLoop->asRegion();
2012
TR_ASSERT(naturalLoop && naturalLoop->isNaturalLoop(),"CISCGraph, expecting natural loop");
2013
if (!naturalLoop || !naturalLoop->isNaturalLoop())
2014
{
2015
if (trace() && naturalLoop)
2016
traceMsg(comp(), "\tRejected loop %d - not a natural loop?\n", naturalLoop->getNumber());
2017
continue;
2018
}
2019
TR_StructureSubGraphNode *entryGraphNode = naturalLoop->getEntry();
2020
TR_BlockStructure *loopBlockStructure = entryGraphNode->getStructure()->asBlock();
2021
if (!loopBlockStructure)
2022
{
2023
if (enableTracing)
2024
traceMsg(comp(), "\tRejected loop %d - no block structure.\n", naturalLoop->getNumber());
2025
continue;
2026
}
2027
if (!naturalLoop->containsOnlyAcyclicRegions())
2028
{
2029
if (enableTracing)
2030
traceMsg(comp(), "\tRejected loop %d - not inner most loop.\n", naturalLoop->getNumber());
2031
continue; // inner most loop
2032
}
2033
if (loopBlockStructure->getBlock()->isCold())
2034
{
2035
if (enableTracing)
2036
traceMsg(comp(), "\tRejected loop %d - cold loop.\n", naturalLoop->getNumber());
2037
continue; // cold loop
2038
}
2039
loopCount++;
2040
loopCandidates->add(naturalLoop);
2041
2042
if (enableTracing)
2043
traceMsg(comp(), "\tAccepted loop %d as candidate.\n", naturalLoop->getNumber());
2044
}
2045
#if SHOW_STATISTICS
2046
if (showMesssagesStdout() && loopCount)
2047
if (comp()->getMethodHotness() == warm || isAfterVersioning()) printf("!! #Loop=%d\n",loopCount);
2048
#endif
2049
}
2050
2051
if (enableTracing)
2052
traceMsg(comp(), "createLoopCandidates: %d loop candidates found.\n", loopCount);
2053
2054
return !loopCandidates->isEmpty();
2055
}
2056
2057
// prepare the loop for transformation
2058
//
2059
TR::Block *
2060
TR_CISCTransformer::addPreHeaderIfNeeded(TR_RegionStructure *region)
2061
{
2062
TR::Block *loopEntry = region->getEntry()->getStructure()->asBlock()->getBlock();
2063
TR::Block *preHeader = NULL;
2064
for (auto e = loopEntry->getPredecessors().begin(); e != loopEntry->getPredecessors().end(); ++e)
2065
{
2066
TR::Block *predBlock = toBlock((*e)->getFrom());
2067
// ignore backedges
2068
if (region->contains(predBlock->getStructureOf(), region->getParent()))
2069
continue;
2070
2071
if (predBlock->getStructureOf() &&
2072
predBlock->getStructureOf()->isLoopInvariantBlock())
2073
{
2074
preHeader = predBlock;
2075
break;
2076
}
2077
}
2078
2079
if (!preHeader)
2080
{
2081
preHeader = TR::Block::createEmptyBlock(loopEntry->getEntry()->getNode(), comp(), loopEntry->getFrequency(), loopEntry);
2082
_cfg->addNode(preHeader);
2083
TR::Block *prevBlock = loopEntry->getPrevBlock();
2084
if (prevBlock)
2085
prevBlock->getExit()->join(preHeader->getEntry());
2086
preHeader->getExit()->join(loopEntry->getEntry());
2087
_cfg->addEdge(preHeader, loopEntry);
2088
2089
// iterate again and fixup the preds to branch to the
2090
// new pre-header
2091
//
2092
TR_ScratchList<TR::CFGEdge> removedEdges(trMemory());
2093
TR::CFGEdge *e = NULL;
2094
for (auto e = loopEntry->getPredecessors().begin(); e != loopEntry->getPredecessors().end(); ++e)
2095
{
2096
TR::Block *predBlock = toBlock((*e)->getFrom());
2097
// ignore backedges
2098
if (region->contains(predBlock->getStructureOf(), region->getParent()))
2099
continue;
2100
2101
traceMsg(comp(), "fixing predecessor %d\n", predBlock->getNumber());
2102
removedEdges.add(*e);
2103
_cfg->addEdge(predBlock, preHeader);
2104
TR::Node *branchNode = predBlock->getExit()->getPrevRealTreeTop()->getNode();
2105
if (branchNode->getOpCode().isBranch() || branchNode->getOpCode().isBranch())
2106
{
2107
if (branchNode->getBranchDestination()->getNode()->getBlock() == loopEntry)
2108
{
2109
branchNode->setBranchDestination(preHeader->getEntry());
2110
}
2111
}
2112
else if (branchNode->getOpCode().isSwitch())
2113
{
2114
for (int32_t i = branchNode->getCaseIndexUpperBound() - 1; i > 0; --i)
2115
{
2116
if (branchNode->getChild(i)->getBranchDestination()->getNode()->getBlock() == loopEntry)
2117
branchNode->getChild(i)->setBranchDestination(preHeader->getEntry());
2118
}
2119
}
2120
}
2121
ListIterator<TR::CFGEdge> rIt(&removedEdges);
2122
for (e = rIt.getFirst(); e; e = rIt.getNext())
2123
{
2124
_cfg->removeEdge(e);
2125
}
2126
traceMsg(comp(), "added preheader block_%d\n", preHeader->getNumber());
2127
}
2128
2129
return preHeader;
2130
}
2131
2132
2133
// return the predecessor block of the loop entry
2134
//
2135
TR::Block *
2136
TR_CISCTransformer::findPredecessorBlockOfLoopEntry(TR_RegionStructure *loop)
2137
{
2138
TR::Block *loopEntry = loop->getEntry()->getStructure()->asBlock()->getBlock();
2139
if (true /*pred.isDoubleton()*/ )
2140
{
2141
for (auto edge = loopEntry->getPredecessors().begin(); edge != loopEntry->getPredecessors().end(); ++edge)
2142
{
2143
TR::Block *from = toBlock((*edge)->getFrom());
2144
if ((from->getSuccessors().size() == 1) &&
2145
from->getParentStructureIfExists(_cfg) != loop)
2146
return from;
2147
}
2148
}
2149
return 0;
2150
}
2151
2152
2153
//*****************************************************************************************
2154
// analyze whether the loop is frequently iterated.
2155
//*****************************************************************************************
2156
void
2157
TR_CISCTransformer::analyzeHighFrequencyLoop(TR_CISCGraph *graph,
2158
TR_RegionStructure *naturalLoop)
2159
{
2160
if (trace())
2161
traceMsg(comp(), "\tAnalyzing if loop is frequently iterated\n");
2162
bool isInsideOfFastVersioned = isInsideOfFastVersionedLoop(naturalLoop);
2163
bool highFrequency = true;
2164
#if !STRESS_TEST // If STRESS_TEST is true, BB frequency checks are ignored.
2165
TR::Block *loopEntry;
2166
int32_t loopEntryFrequency = -1;
2167
ListIterator<TR::Block> bi(&_bblistBody);
2168
for (loopEntry = bi.getFirst(); loopEntry; loopEntry = bi.getNext())
2169
{
2170
if (loopEntryFrequency < loopEntry->getFrequency())
2171
loopEntryFrequency = loopEntry->getFrequency();
2172
}
2173
if (trace()) traceMsg(comp(), "\t\tLoop Frequency=%d\n",loopEntryFrequency); // the freq of the loop entry
2174
if (loopEntryFrequency <= 0)
2175
{
2176
#if ALLOW_FAST_VERSIONED_LOOP
2177
highFrequency = isInsideOfFastVersioned;
2178
#else
2179
highFrequency = false;
2180
#endif
2181
}
2182
else
2183
{
2184
TR::Block *outer = findPredecessorBlockOfLoopEntry(naturalLoop);
2185
if (!outer || outer->getFrequency() < 0)
2186
{
2187
// If there is no freq information for the predecessor BB of the loop
2188
if (_bblistSucc.isSingleton())
2189
{
2190
// Try the successor block if it is single.
2191
outer = _bblistSucc.getListHead()->getData();
2192
if (outer->getFrequency() > loopEntryFrequency) outer = 0;
2193
}
2194
if (!outer || outer->getFrequency() < 0)
2195
{
2196
// Use the freq of the method entry block for the reference.
2197
outer = _cfg->getStart()->getSuccessors().front()->getTo()->asBlock();
2198
}
2199
}
2200
if (outer)
2201
{
2202
int32_t outerFrequency = outer->getFrequency();
2203
if (outerFrequency < 1) outerFrequency = 1;
2204
if (trace()) traceMsg(comp(), "\t\tOuter block %d: Frequency=%d Inner/Outer Ratio:(%f)\n",outer->getNumber(),outerFrequency, (double)loopEntryFrequency/(double)outerFrequency);
2205
if (loopEntryFrequency < outerFrequency * cg()->arrayTranslateAndTestMinimumNumberOfIterations()) highFrequency = false;
2206
}
2207
}
2208
#endif
2209
if (trace()) traceMsg(comp(), "\t\thighFrequency=%d\n",highFrequency);
2210
graph->setHotness(comp()->getMethodHotness(), highFrequency);
2211
graph->setInsideOfFastVersioned(isInsideOfFastVersioned);
2212
}
2213
2214
2215
2216
2217
//*****************************************************************************************
2218
// Tree Simplification for the Idiom Recognition
2219
//*****************************************************************************************
2220
/*
2221
ificmpxx
2222
isub
2223
iconst A
2224
iload B
2225
iconst C
2226
|
2227
V
2228
ificmpyy (yy is the swapChildrenOpCodes of xx)
2229
iload B
2230
iconst A-C
2231
-------------
2232
ificmpxx
2233
iadd
2234
iconst A
2235
iload B
2236
iconst C
2237
|
2238
V
2239
ificmpxx
2240
iload B
2241
iconst C-A
2242
-------------
2243
ificmplt
2244
isub
2245
iload A
2246
iload B
2247
iconst 1
2248
|
2249
V
2250
ificmple
2251
isub
2252
iload A
2253
iload B
2254
iconst 0
2255
|
2256
V
2257
ificmpge
2258
iload B
2259
iload A
2260
*/
2261
void
2262
TR_CISCTransformer::easyTreeSimplification(TR::Node *const node)
2263
{
2264
bool modified = false;
2265
if (node->getOpCode().isIf())
2266
{
2267
TR::Node *iconstC = node->getChild(1);
2268
if (iconstC->getOpCodeValue() != TR::iconst || iconstC->getReferenceCount() > 1) return;
2269
if (node->getOpCodeValue() == TR::ificmplt &&
2270
iconstC->getInt() == 1)
2271
{
2272
traceMsg(comp(), "\t\teasyTreeSimplification: Node: %p converted from ificmplt with 1 to ifcmple with 0", node);
2273
TR::Node::recreate(node, TR::ificmple);
2274
iconstC->setInt(0);
2275
}
2276
2277
TR::Node *addOrSub = node->getChild(0);
2278
if (!addOrSub->getOpCode().isAdd() && !addOrSub->getOpCode().isSub()) return;
2279
if (addOrSub->getReferenceCount() > 1) return;
2280
2281
TR::Node *iloadB = addOrSub->getChild(1);
2282
if (iloadB->getOpCodeValue() != TR::iload) return;
2283
if (iloadB->getReferenceCount() > 1) return;
2284
2285
TR::Node *iconstA = addOrSub->getChild(0);
2286
if (iconstA->getOpCodeValue() == TR::iconst)
2287
{
2288
if (addOrSub->getOpCode().isSub())
2289
{
2290
TR::Node::recreate(node, node->getOpCode().getOpCodeForSwapChildren());
2291
node->setAndIncChild(0, iloadB);
2292
iconstC->setInt(iconstA->getInt() - iconstC->getInt());
2293
}
2294
else
2295
{
2296
TR_ASSERT(addOrSub->getOpCode().isSub(), "error");
2297
node->setAndIncChild(0, iloadB);
2298
iconstC->setInt(iconstC->getInt() - iconstA->getInt());
2299
}
2300
addOrSub->recursivelyDecReferenceCount();
2301
modified = true;
2302
}
2303
else if (iconstA->getOpCodeValue() == TR::iload)
2304
{
2305
TR::Node *iloadA = iconstA;
2306
if (iloadA->getReferenceCount() > 1 ||
2307
!addOrSub->getOpCode().isSub()) return;
2308
if (node->getOpCodeValue() == TR::ificmple &&
2309
iconstC->getInt() == 0)
2310
{
2311
TR::Node::recreate(node, TR::ificmpge);
2312
node->setChild(0, iloadB);
2313
node->setChild(1, iloadA);
2314
modified = true;
2315
}
2316
}
2317
}
2318
if (modified && trace())
2319
traceMsg(comp(), "\t\teasyTreeSimplification: The tree %p is simplified.\n", node);
2320
}
2321
2322
2323
2324
2325
//*****************************************************************************************
2326
// analyze the tree "top" and add each node
2327
//*****************************************************************************************
2328
TR_CISCNode *
2329
TR_CISCTransformer::addAllSubNodes(TR_CISCGraph *const graph, TR::Block *const block, TR::TreeTop *const top,
2330
TR::Node *const parent, TR::Node *const node, const int32_t dagId)
2331
{
2332
//IdiomRecognition doesn't know how to handle rdbar/wrtbar for now
2333
if (comp()->incompleteOptimizerSupportForReadWriteBarriers() &&
2334
(node->getOpCode().isReadBar() || node->getOpCode().isWrtBar()))
2335
return 0;
2336
int32_t i;
2337
int32_t numChildren = node->getNumChildren();
2338
TR_ScratchList<TR_CISCNode> childList(trMemory());
2339
vcount_t curVisit = comp()->getVisitCount();
2340
2341
if (node->getVisitCount() == curVisit) // already visited
2342
{
2343
TR_CISCNode *findCisc = graph->getCISCNode(node);
2344
return findCisc;
2345
}
2346
else
2347
{
2348
if (isAfterVersioning() || comp()->getMethodHotness() == warm)
2349
easyTreeSimplification(node);
2350
node->setVisitCount(curVisit);
2351
TR_CISCNode *newCisc = NULL;
2352
const int32_t opcode = node->getOpCodeValue();
2353
TR::DataType nodeDataType = node->getDataType();
2354
2355
//bool ret;
2356
bool isReplaceChild = true;
2357
const bool isSwitch = (opcode == TR::lookup || opcode == TR::table);
2358
2359
int32_t numCreateChildren;
2360
if (isSwitch)
2361
{
2362
numCreateChildren = 1;
2363
numChildren = node->getCaseIndexUpperBound();
2364
}
2365
else
2366
{
2367
numCreateChildren = numChildren; // for ops other than switches, process the children first
2368
}
2369
2370
if (parent &&
2371
((parent->getOpCodeValue() == TR::aiadd && node->getOpCodeValue() == TR::isub && node->getChild(0)->getOpCodeValue() != TR::imul) ||
2372
(parent->getOpCodeValue() == TR::aladd && node->getOpCodeValue() == TR::lsub && node->getChild(0)->getOpCodeValue() != TR::lmul)))
2373
{
2374
// In the internal graph representation, I explicitly represent an index as "index * 1"
2375
// for a byte memory access in order to merge byte and non-byte idioms.
2376
// Example:
2377
// aiadd
2378
// aload
2379
// isub
2380
// iload
2381
// iconst xx
2382
// |
2383
// V
2384
// aiadd
2385
// aload
2386
// isub
2387
// imul
2388
// iload
2389
// iconst 1
2390
// iconst xx
2391
2392
TR_ASSERT(numCreateChildren == 2, "error");
2393
TR_CISCNode *childCisc = addAllSubNodes(graph, block, top, node, node->getChild(0), dagId);
2394
if (!childCisc) return 0;
2395
2396
bool is64bit = node->getOpCodeValue() == TR::lsub;
2397
int opcodeConst = is64bit ? TR::lconst : TR::iconst;
2398
TR::DataType opcodeDataType = is64bit ? TR::Int64 : TR::Int32;
2399
2400
TR_CISCNode *const1 = graph->getCISCNode(opcodeConst, true, 1);
2401
if (!const1)
2402
{
2403
const1 = new (trHeapMemory()) TR_CISCNode(trMemory(), opcodeConst, opcodeDataType, graph->incNumNodes(), 0, 0, 0, 1);
2404
const1->setNewCISCNode();
2405
graph->addNode(const1, 0, 0, 0);
2406
}
2407
2408
TR_CISCNode *mul = new (trHeapMemory()) TR_CISCNode(trMemory(),
2409
is64bit ? TR::lmul : TR::imul,
2410
is64bit ? TR::Int64 : TR::Int32,
2411
graph->incNumNodes(), dagId, 1, 2);
2412
mul->setNewCISCNode();
2413
mul->setChildren(childCisc, const1);
2414
graph->addNode(mul, 0, 0, 0);
2415
childCisc = mul;
2416
childList.add(childCisc);
2417
if (_lastCFGNode)
2418
{
2419
_lastCFGNode->setSucc(0, mul); // _lastCFGNode must be the predecessor of newCisc.
2420
_lastCFGNode = mul;
2421
}
2422
2423
childCisc = addAllSubNodes(graph, block, top, node, node->getChild(1), dagId);
2424
if (!childCisc) return 0;
2425
childList.add(childCisc);
2426
}
2427
else
2428
{
2429
for (i = 0; i < numCreateChildren; i++)
2430
{
2431
TR_CISCNode *childCisc = addAllSubNodes(graph, block, top, node, node->getChild(i), dagId);
2432
if (!childCisc) return 0;
2433
childList.add(childCisc);
2434
}
2435
}
2436
2437
if (node->getOpCode().isLoadVarDirect()) // direct loads (e.g. iload)
2438
{
2439
TR_ASSERT(childList.isEmpty(), "Loads must have no child");
2440
int32_t refNum = node->getSymbolReference()->getReferenceNumber();
2441
TR_CISCNode *variable = graph->getCISCNode(TR_variable, true, refNum);
2442
if (!variable)
2443
{
2444
variable = new (trHeapMemory()) TR_CISCNode(trMemory(), TR_variable, TR::NoType, graph->incNumNodes(), 0,
2445
0, 0, refNum);
2446
variable->addTrNode(block, top, node);
2447
graph->addNode(variable);
2448
}
2449
2450
numChildren = 1;
2451
newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), dagId,
2452
1, numChildren);
2453
newCisc->setIsNegligible();
2454
newCisc->setIsLoadVarDirect();
2455
childList.add(variable);
2456
2457
}
2458
else if (node->getOpCode().isLoadConst()) // constants (e.g. iconst)
2459
{
2460
int32_t val;
2461
bool isConst = true;
2462
switch(opcode)
2463
{
2464
case TR::iconst:
2465
val = node->getInt();
2466
break;
2467
case TR::sconst:
2468
val = node->getShortInt();
2469
break;
2470
case TR::bconst:
2471
val = node->getByte();
2472
break;
2473
case TR::lconst:
2474
val = node->getLongIntLow();
2475
if ((int64_t)val != node->getLongInt())
2476
isConst = false;
2477
break;
2478
default:
2479
isConst = false;
2480
break;
2481
}
2482
2483
if (isConst)
2484
{
2485
newCisc = graph->getCISCNode(opcode, true, val);
2486
if (newCisc) // if the same node is already added
2487
graph->addTrNode(newCisc, block, top, node);
2488
else
2489
{
2490
newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), 0,
2491
0, 0, val);
2492
graph->addNode(newCisc, block, top, node);
2493
}
2494
return newCisc;
2495
}
2496
newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), dagId,
2497
1, numChildren);
2498
}
2499
else if (node->getOpCode().isStoreDirect())
2500
{
2501
if (childList.isSingleton()) // for normal direct store (e.g. istore)
2502
{
2503
isReplaceChild = false;
2504
TR_CISCNode *child = childList.getListHead()->getData();
2505
int refNum = node->getSymbolReference()->getReferenceNumber();
2506
TR_CISCNode *variable = graph->getCISCNode(TR_variable, true, refNum);
2507
if (!variable)
2508
{
2509
variable = new (trHeapMemory()) TR_CISCNode(trMemory(), TR_variable, TR::NoType, graph->incNumNodes(), 0,
2510
0, 0, refNum);
2511
variable->addTrNode(block, top, node);
2512
graph->addNode(variable);
2513
}
2514
if (!child->isInterestingConstant()) child->setDest(variable);
2515
2516
numChildren = 2;
2517
newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), dagId,
2518
1, numChildren);
2519
newCisc->setIsStoreDirect();
2520
childList.add(variable);
2521
}
2522
else
2523
{
2524
newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), dagId,
2525
1, numChildren);
2526
}
2527
}
2528
else
2529
{
2530
if (node->getOpCode().isCall() &&
2531
graph->isRecordingAspectsByOpcode()) return 0;
2532
int32_t nSucc = node->getOpCode().isIf() ? 2 : (isSwitch ? node->getCaseIndexUpperBound()-1 : 1);
2533
if (opcode == TR::Case)
2534
{
2535
TR_ASSERT(numChildren == 0, "TR::Case: numChildren != 0 ???");
2536
numChildren = 1;
2537
newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), dagId,
2538
nSucc, numChildren);
2539
childList.add(graph->getCISCNode(top->getNode()->getChild(0)));
2540
}
2541
else
2542
{
2543
newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), dagId,
2544
nSucc, numChildren);
2545
}
2546
switch(newCisc->getOpcode())
2547
{
2548
case TR::BBStart:
2549
case TR::BBEnd:
2550
newCisc->setOtherInfo(block->getNumber());
2551
// fall through
2552
case TR::Goto:
2553
case TR::asynccheck:
2554
case TR::allocationFence:
2555
case TR::treetop:
2556
case TR::table:
2557
case TR::lookup:
2558
case TR::PassThrough:
2559
newCisc->setIsNegligible(); // These opcodes are negligible.
2560
break;
2561
case TR::compressedRefs:
2562
// compressed refs are really just another kind of treetop so treat them accordingly
2563
{
2564
static const bool anchorIsNegligible = feGetEnv("TR_disableIRNegligibleCompressedRefs") == NULL;
2565
if (anchorIsNegligible)
2566
newCisc->setIsNegligible();
2567
}
2568
break;
2569
}
2570
2571
if (node->getOpCode().isBranch())
2572
{
2573
_backPatchList.add(newCisc); // The branch destination will be set later.
2574
}
2575
}
2576
2577
if (_lastCFGNode)
2578
{
2579
_lastCFGNode->setSucc(0, newCisc); // _lastCFGNode must be the predecessor of newCisc.
2580
}
2581
2582
if (newCisc->getNumSuccs() > 0) _lastCFGNode = newCisc;
2583
graph->addNode(newCisc, block, top, node);// add newCisc
2584
2585
if (isSwitch) // When the node is a switch, children will be added later for analyzing easier.
2586
{
2587
for (i = 1; i < numChildren; i++)
2588
{
2589
TR_CISCNode *childCisc = addAllSubNodes(graph, block, top, node, node->getChild(i), dagId);
2590
if (!childCisc) return 0;
2591
childList.add(childCisc);
2592
switch (opcode)
2593
{
2594
case TR::lookup:
2595
if (i >= 1)
2596
{
2597
newCisc->setSucc(i-1, childCisc);
2598
if (i >= 2)
2599
childCisc->setOtherInfo(node->getChild(i)->getCaseConstant());
2600
}
2601
break;
2602
case TR::table:
2603
if (i >= 1)
2604
{
2605
newCisc->setSucc(i-1, childCisc);
2606
if (i >= 2)
2607
childCisc->setOtherInfo(i-2);
2608
}
2609
break;
2610
}
2611
}
2612
}
2613
2614
ListIterator<TR_CISCNode> ciscLi(&childList);
2615
TR_CISCNode *child = ciscLi.getFirst();
2616
if (isReplaceChild)
2617
{
2618
for (i = numChildren; --i >= 0; child = ciscLi.getNext())
2619
{
2620
if (child->getFirstDest())
2621
{
2622
newCisc->addHint(child);
2623
child = child->getFirstDest();
2624
}
2625
newCisc->setChild(i, child);
2626
}
2627
}
2628
else
2629
{
2630
for (i = numChildren; --i >= 0; child = ciscLi.getNext()) newCisc->setChild(i, child);
2631
}
2632
return newCisc;
2633
}
2634
}
2635
2636
2637
2638
//*****************************************************************************************
2639
// Convert TR::Block to TR_CISCGraph
2640
//*****************************************************************************************
2641
bool
2642
TR_CISCTransformer::makeCISCGraphForBlock(TR_CISCGraph *graph, TR::Block *const block, int32_t dagId)
2643
{
2644
if (trace())
2645
traceMsg(comp(), "\t\tmakeCISCGraphForBlock: Building CISCGraph for block %d.\n", block->getNumber());
2646
2647
TR::TreeTop *top = block->getEntry();
2648
TR::TreeTop *end = block->getExit();
2649
2650
if (!top) return true;
2651
while(true)
2652
{
2653
if (!addAllSubNodes(graph, block, top, NULL, top->getNode(), dagId))
2654
{
2655
if (trace())
2656
traceMsg(comp(), "\t\tFailed to create CISCNode for Node %p in block %d : %p\n", top->getNode(), block->getNumber(), block);
2657
return false;
2658
}
2659
2660
if (top == end) break;
2661
top = top->getNextTreeTop();
2662
}
2663
if (_lastCFGNode)
2664
{
2665
if (!_backPatchList.find(_lastCFGNode))
2666
_backPatchList.add(_lastCFGNode);
2667
_lastCFGNode = 0;
2668
}
2669
return true;
2670
}
2671
2672
2673
//*****************************************************************************************
2674
// Resolve unknown destinations of branches
2675
// If the destination is not included in the region, it will set "exitNode" to the destination.
2676
//*****************************************************************************************
2677
void
2678
TR_CISCTransformer::resolveBranchTargets(TR_CISCGraph *graph, TR_CISCNode *exitNode)
2679
{
2680
ListIterator<TR_CISCNode> ci(&_backPatchList);
2681
TR_CISCNode *p;
2682
for (p = ci.getFirst(); p != 0; p = ci.getNext()) // each element of _backPatchList
2683
{
2684
TR_ASSERT(p->getTrNodeInfo()->isSingleton(), "each branch node must correspond to a single TR node");
2685
TR::Node *trNode = p->getHeadOfTrNodeInfo()->_node;
2686
TR_CISCNode *destCisc;
2687
TR::Node *destNode;
2688
2689
if (trNode->getOpCode().isBranch())
2690
{
2691
destNode = trNode->getBranchDestination()->getNode();
2692
destCisc = graph->getCISCNode(destNode);
2693
if (!destCisc) destCisc = exitNode; // set exitNode if the destination is outside
2694
p->setSucc(p->getNumSuccs()-1, destCisc);
2695
}
2696
else
2697
{
2698
// set fallthrough block
2699
destCisc = exitNode; // set exitNode as default
2700
if (trNode->getOpCodeValue() == TR::BBEnd)
2701
{
2702
TR::TreeTop *nextTreeTop = trNode->getBlock()->getExit()->getNextTreeTop();
2703
if (nextTreeTop)
2704
{
2705
destNode = nextTreeTop->getNode();
2706
destCisc = graph->getCISCNode(destNode);
2707
if (!destCisc) destCisc = exitNode; // set exitNode if the destination is outside
2708
}
2709
}
2710
p->setSucc(0, destCisc);
2711
}
2712
}
2713
2714
for (p = ci.getFirst(); p != 0; p = ci.getNext())
2715
{
2716
uint32_t numSuccs = p->getNumSuccs();
2717
if (numSuccs >= 2) // To exclude 0 and 1 quickly (reduce compilation time)
2718
{
2719
if (numSuccs == 2) // Typical case in "numSuccs >= 2" (reduce compilation time)
2720
{
2721
TR_CISCNode *succ0 = p->getSucc(0);
2722
TR_CISCNode *succ1 = p->getSucc(1);
2723
if (succ0->getOpcode() == TR::BBEnd) p->setSucc(0, (succ0 = succ0->getSucc(0)));
2724
if (succ1->getOpcode() == TR::BBEnd) p->setSucc(1, (succ1 = succ1->getSucc(0)));
2725
if (p->getHeadOfTrNodeInfo()->_node->getOpCode().isIf())
2726
{
2727
if (succ0->getOpcode() == TR_exitnode || // if the fallthrough edge goes to exitNode, or
2728
(p->getDagID() == succ1->getDagID() && // if the fallthrough edge goes to another dagId
2729
p->getDagID() != succ0->getDagID())) // and the branch edge goes to the same dagid
2730
{
2731
p->reverseBranchOpCodes(); // swap the fallthrough and branch edges for canonicalization
2732
}
2733
}
2734
}
2735
else
2736
{
2737
uint32_t idx;
2738
for (idx = 0; idx < numSuccs; idx++)
2739
{
2740
TR_CISCNode *s = p->getSucc(idx);
2741
if (s->getOpcode() == TR::BBEnd) p->setSucc(idx, s->getSucc(0));
2742
}
2743
TR_ASSERT(!p->getHeadOfTrNodeInfo()->_node->getOpCode().isIf(), "error");
2744
}
2745
}
2746
}
2747
}
2748
2749
2750
2751
2752
uint16_t
2753
TR_CISCTransformer::renumberDagId(TR_CISCGraph *graph, int32_t tempMaxDagId, int32_t bodyDagId)
2754
{
2755
int32_t newDagId = 0, newBodyDagId = -1;
2756
List<TR_CISCNode> *orgNodes = graph->getNodes();
2757
ListElement<TR_CISCNode> *cur = NULL, *next = NULL, *appendCursor = NULL, *newListTop = NULL;
2758
// renumber the dagIds of the nodes in the graph
2759
// initially, the exitNode, entryNode and treetop (eg. loads, stores, add/sub, ifcmp)
2760
// nodes in the loop are assigned ids of 3, 0, 2 resp.
2761
// for eg., the dagIds of nodes may look like:
2762
// 0L iconst -16 [] []
2763
// 2L isub [] [40 41]
2764
// 2L aiadd [] [39 42]
2765
// 0L bconst 1 [] []
2766
// 2L ibstore [] [43 44]
2767
// 2L iadd [] [40 22]
2768
// 2L istore [] [46 4]
2769
// 2L ificmpge [] [4 6]
2770
// 2L BBEnd [] []
2771
// 3L exitnode [] []
2772
//
2773
// the loop walks the list of nodes, reassigning ids so that exitNode gets 0, entryNode
2774
// gets 2 and all nodes in the loop (e.g. with 2L) get id 1. the children of these nodes
2775
// will get unique dagIds. note, we need 3 ptrs as we might need to hop over some nodes
2776
// during any iteration of the for loop
2777
#if 0
2778
for (int32_t dId = tempMaxDagId; dId >= 0; dId--)
2779
{
2780
cur = orgNodes->getListHead();
2781
ListElement<TR_CISCNode> *prev = 0;
2782
while (cur)
2783
{
2784
next = cur->getNextElement();
2785
if (cur->getData()->getDagID() == dId)
2786
{
2787
cur->getData()->setDagID(newDagId);
2788
// if node belongs to outside of the loop,
2789
// give it a unique id, otherwise it belongs to the scc
2790
if (dId != bodyDagId)
2791
newDagId++;
2792
else
2793
newBodyDagId = newDagId;
2794
cur->setNextElement(0);
2795
if (!appendCursor)
2796
{
2797
newListTop = cur;
2798
appendCursor = cur;
2799
}
2800
else
2801
{
2802
appendCursor->setNextElement(cur);
2803
appendCursor = cur;
2804
}
2805
if (!prev)
2806
orgNodes->setListHead(next);
2807
else
2808
prev->setNextElement(next);
2809
}
2810
else
2811
prev = cur;
2812
cur = next;
2813
}
2814
if (dId == bodyDagId)
2815
{
2816
newBodyDagId = newDagId;
2817
newDagId++;
2818
}
2819
}
2820
#else
2821
for (int32_t dId = tempMaxDagId; dId >= 0; dId--)
2822
{
2823
while (true)
2824
{
2825
cur = orgNodes->getListHead();
2826
if (!cur || cur->getData()->getDagID() != dId) break;
2827
orgNodes->setListHead(cur->getNextElement());
2828
cur->getData()->setDagID(newDagId);
2829
if (dId != bodyDagId)
2830
newDagId++; // outside of the loop body
2831
else
2832
newBodyDagId = newDagId; // inside of the loop body
2833
cur->setNextElement(0);
2834
if (!appendCursor)
2835
{
2836
newListTop = cur;
2837
appendCursor = cur;
2838
}
2839
else
2840
{
2841
appendCursor->setNextElement(cur);
2842
appendCursor = cur;
2843
}
2844
}
2845
if (cur)
2846
{
2847
while (true)
2848
{
2849
next = cur->getNextElement();
2850
if (!next) break;
2851
if (next->getData()->getDagID() == dId)
2852
{
2853
cur->setNextElement(next->getNextElement());
2854
next->getData()->setDagID(newDagId);
2855
if (dId != bodyDagId)
2856
newDagId++; // outside of the loop body
2857
else
2858
newBodyDagId = newDagId; // inside of the loop body
2859
next->setNextElement(0);
2860
if (!appendCursor)
2861
{
2862
newListTop = next;
2863
appendCursor = next;
2864
}
2865
else
2866
{
2867
appendCursor->setNextElement(next);
2868
appendCursor = next;
2869
}
2870
}
2871
else
2872
{
2873
cur = next;
2874
}
2875
}
2876
}
2877
if (dId == bodyDagId)
2878
{
2879
newBodyDagId = newDagId;
2880
newDagId++;
2881
}
2882
}
2883
#endif
2884
TR_ASSERT(orgNodes->isEmpty(), "there are elements in orgNodes");
2885
orgNodes->setListHead(newListTop);
2886
graph->setNoFragmentDagId();
2887
graph->setNumDagIds(newDagId);
2888
return (uint16_t)newBodyDagId;
2889
}
2890
2891
2892
2893
// make our internal graph representation from the input IL code.
2894
//
2895
TR_CISCGraph *
2896
TR_CISCTransformer::makeCISCGraph(List<TR::Block> *pred,
2897
List<TR::Block> *body,
2898
List<TR::Block> *succ)
2899
{
2900
int32_t dagId = 1, bodyDagId;
2901
TR_CISCGraph *graph = new (trHeapMemory()) TR_CISCGraph(trMemory(), comp()->signature());
2902
TR::Block *block;
2903
ListIterator<TR::Block> bi(pred);
2904
//ListElement<TR_CISCNode> *head;
2905
2906
graph->setRecordingAspectsByOpcode(false); // Stop recording aspects outside of the loop
2907
_backPatchList.init();
2908
comp()->incVisitCount();
2909
2910
// make entry node
2911
TR_CISCNode *newCisc = new (trHeapMemory()) TR_CISCNode(trMemory(), TR_entrynode, TR::NoType, graph->incNumNodes(), dagId, 1, 0);
2912
graph->setEntryNode(newCisc);
2913
graph->addNode(newCisc);
2914
_lastCFGNode = newCisc;
2915
2916
static const bool includePreds = feGetEnv("TR_idiomIncludePreds") != NULL;
2917
if (includePreds)
2918
{
2919
if (trace())
2920
traceMsg(comp(), "\tmakeCISCGraph: Building CISCGraph for Predecessor Blocks.\n");
2921
for (block = bi.getFirst(); block != 0; block = bi.getNext())
2922
{
2923
if (!makeCISCGraphForBlock(graph, block, dagId)) return 0;
2924
}
2925
}
2926
2927
dagId++;
2928
bodyDagId = dagId;
2929
bi.set(body);
2930
if (trace())
2931
traceMsg(comp(), "\tmakeCISCGraph: Building CISCGraph for Loop Body Blocks.\n");
2932
graph->setRecordingAspectsByOpcode(true); // Start recording aspects inside of the loop
2933
for (block = bi.getFirst(); block != 0; block = bi.getNext())
2934
{
2935
if (!makeCISCGraphForBlock(graph, block, dagId)) return 0;
2936
}
2937
graph->setRecordingAspectsByOpcode(false); // Stop recording aspects outside of the loop
2938
2939
2940
dagId++;
2941
#if 0
2942
bi.set(succ);
2943
for (block = bi.getFirst(); block != 0; block = bi.getNext())
2944
{
2945
if (!makeCISCGraphForBlock(graph, block, dagId)) return 0;
2946
}
2947
#endif
2948
2949
TR_CISCNode *exitNode = new (trHeapMemory()) TR_CISCNode(trMemory(), TR_exitnode, TR::NoType, graph->incNumNodes(), dagId, 0, 0);
2950
graph->addNode(exitNode);
2951
graph->setExitNode(exitNode);
2952
if (_lastCFGNode)
2953
{
2954
_lastCFGNode->setSucc(0, exitNode);
2955
_lastCFGNode = 0;
2956
}
2957
2958
// add "iconst -ahsize" if nonexistent
2959
int32_t ahsize = -(int32_t)TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
2960
uint32_t opcode = comp()->target().is64Bit() ? TR::lconst : TR::iconst;
2961
TR::DataType nodeDataType = comp()->target().is64Bit() ? TR::Int64 : TR::Int32;
2962
2963
if (!graph->getCISCNode(opcode, true, ahsize))
2964
{
2965
graph->addNode(new (trHeapMemory()) TR_CISCNode(trMemory(), opcode, nodeDataType, graph->incNumNodes(), 0, 0, 0, ahsize));
2966
}
2967
2968
bodyDagId = renumberDagId(graph, dagId, bodyDagId);
2969
resolveBranchTargets(graph, exitNode);
2970
// setup the dagIds2Nodes table & a list containing
2971
// all the nodes except BBStarts and BBEnds
2972
graph->createInternalData(bodyDagId);
2973
graph->modifyTargetGraphAspects();
2974
2975
return graph;
2976
}
2977
2978
2979
2980
//***************************************************************************************
2981
// Main driver for TR_CISCTransformer
2982
//***************************************************************************************
2983
int32_t TR_CISCTransformer::perform()
2984
{
2985
2986
//TO_BE_ENABLED
2987
///return 0;
2988
2989
static int enable = -1;
2990
if (enable < 0)
2991
{
2992
char *p = feGetEnv("DISABLE_CISC");
2993
enable = p ? 0 : 1;
2994
}
2995
static int disableLoopNumber = -1;
2996
if (disableLoopNumber == -1)
2997
{
2998
char *p = feGetEnv("DISABLE_LOOP_NUMBER");
2999
disableLoopNumber = p ? atoi(p) : -2;
3000
}
3001
static int showStdout = -1;
3002
if (showStdout == -1)
3003
{
3004
char *p = feGetEnv("traceCISCVerbose");
3005
showStdout = p ? 1 : 0;
3006
}
3007
3008
//FIXME: add TR_EnableIdiomRecognitionWarm to options
3009
int32_t methodHotness = scorching;
3010
//_compilation->getOption(TR_EnableIdiomRecognitionWarm) ? scorching :
3011
//_compilation->getMethodHotness();
3012
//FIXME: remove this code if it works
3013
3014
TR::Recompilation *recompInfo = comp()->getRecompilationInfo();
3015
if (!comp()->mayHaveLoops() ||
3016
methodHotness < minimumHotnessPrepared ||
3017
comp()->getProfilingMode() == JitProfiling || // if this method is now profiled
3018
!enable)
3019
return 0;
3020
3021
_useDefInfo = optimizer()->getUseDefInfo();
3022
if (_useDefInfo == 0)
3023
return 0;
3024
3025
// Required beyond the scope of the stack memory region
3026
int32_t cost = 0;
3027
3028
{
3029
TR::StackMemoryRegion stackMemoryRegion(*trMemory());
3030
3031
_cfg = comp()->getFlowGraph();
3032
_rootStructure = _cfg->getStructure();
3033
_nodesInCycle = new (trStackMemory()) TR_BitVector(_cfg->getNextNodeNumber(), trMemory(), stackAlloc);
3034
_isGenerateI2L = comp()->target().is64Bit();
3035
_showMesssagesStdout = (VERBOSE || showStdout);
3036
3037
// make loop candidates
3038
List<TR_RegionStructure> loopCandidates(trMemory());
3039
3040
if (createLoopCandidates(&loopCandidates))
3041
{
3042
TR_CFGReversePostOrder revPost(trMemory());
3043
ListIterator<TR::CFGNode> revPostIterator(revPost.compute(_cfg));
3044
if (trace())
3045
revPost.dump(comp());
3046
3047
bool modified = false;
3048
if (showMesssagesStdout()) printf("\nStarting CISCTransformer %s, %s\n",
3049
comp()->getHotnessName(comp()->getMethodHotness()),
3050
comp()->signature());
3051
if (trace())
3052
{
3053
traceMsg(comp(), "Starting CISCTransformer\n");
3054
comp()->dumpMethodTrees("Trees before transforming CISC instructions");
3055
}
3056
3057
ListIterator<TR_RegionStructure> loopIt(&loopCandidates);
3058
3059
TR_RegionStructure *nextLoop;
3060
3061
List<TR::Block> bblistPred(comp()->trMemory());
3062
List<TR::Block> bblistBody(comp()->trMemory());
3063
List<TR::Block> bblistSucc(comp()->trMemory());
3064
List<TR_BitsKeepAliveInfo> BitsKeepAliveList(comp()->trMemory());
3065
_BitsKeepAliveList = BitsKeepAliveList;
3066
3067
int32_t numNodes = _cfg->getNextNodeNumber();
3068
TR_BitVector cfgBV(numNodes, trMemory(), stackAlloc);
3069
TR::CFGNode *cfgnode;
3070
int loopNumber = 0;
3071
3072
for (nextLoop = loopIt.getFirst(); nextLoop != 0; nextLoop = loopIt.getNext())
3073
{
3074
// make bb list for each loop
3075
TR::Block *block;
3076
bblistPred.init();
3077
bblistBody.init();
3078
bblistSucc.init();
3079
3080
// add predecessors of loop header as long as there is no merge point. (i.e. extended BB)
3081
TR::Block *predBlock = findPredecessorBlockOfLoopEntry(nextLoop);
3082
if (predBlock && predBlock->getEntry())
3083
{
3084
TR_RegionStructure *region = NULL;
3085
3086
// Get the parent of the region the outer region
3087
TR_Structure* blockStructure = predBlock->getStructureOf();
3088
if (blockStructure)
3089
region = (TR_RegionStructure*) blockStructure->getParent();
3090
3091
if (region != NULL)
3092
{
3093
TR::CFGNode* cfgStart = _cfg->getStart();
3094
while(predBlock != cfgStart)
3095
{
3096
if (predBlock->getNumberOfRealTreeTops() > 300)
3097
{
3098
if (trace())
3099
traceMsg(comp(), "Skip the predecessor %d, because it has too many TreeTops (%d).\n", predBlock->getNumber(), predBlock->getNumberOfRealTreeTops());
3100
break; // To reduce unnecessary compilation time
3101
}
3102
3103
bblistPred.add(predBlock);
3104
3105
TR::CFGEdgeList pred = predBlock->getPredecessors();
3106
3107
if (!(pred.size() == 1)) // Check if there exists more than one predecessor ==> merge point.
3108
break;
3109
3110
predBlock = toBlock(pred.front()->getFrom());
3111
3112
// Get the parent of the region the outer region
3113
TR_RegionStructure* predBlockRegion = NULL;
3114
blockStructure = predBlock->getStructureOf();
3115
if (blockStructure)
3116
{
3117
predBlockRegion = (TR_RegionStructure*) blockStructure->getParent();
3118
}
3119
3120
// Check to see if predBlock is the same.
3121
if (region != predBlockRegion)
3122
{
3123
if (trace())
3124
traceMsg(comp(), "Skip the predecessor block_%d, because it is within another region/loop.\n",predBlock->getNumber());
3125
break;
3126
}
3127
}
3128
}
3129
}
3130
3131
3132
// add BBs of loop body
3133
TR_RegionStructure::Cursor si(*nextLoop);
3134
TR_StructureSubGraphNode *node;
3135
cfgBV.empty();
3136
for (node = si.getFirst(); node != 0; node = si.getNext())
3137
{
3138
cfgBV.set(node->getNumber());
3139
}
3140
3141
ListAppender<TR::Block> appender(&bblistBody);
3142
block = nextLoop->getEntry()->getStructure()->asBlock()->getBlock();
3143
for (cfgnode = revPostIterator.getFirst(); cfgnode; cfgnode = revPostIterator.getNext())
3144
{
3145
if (block == cfgnode->asBlock())
3146
break;
3147
}
3148
3149
if (cfgnode == 0)
3150
continue;
3151
3152
for (; cfgnode; cfgnode = revPostIterator.getNext())
3153
{
3154
int32_t bbnum = cfgnode->getNumber();
3155
if (cfgBV.isSet(bbnum))
3156
{
3157
block = cfgnode->asBlock();
3158
TR_ASSERT(block->getEntry() != 0, "assuming block->getEntry() != 0");
3159
appender.add(block);
3160
cfgBV.reset(bbnum);
3161
if (cfgBV.isEmpty())
3162
break;
3163
}
3164
}
3165
3166
if (!cfgBV.isEmpty())
3167
{
3168
for (cfgnode = revPostIterator.getFirst(); cfgnode; cfgnode = revPostIterator.getNext())
3169
{
3170
int32_t bbnum = cfgnode->getNumber();
3171
if (cfgBV.isSet(bbnum))
3172
{
3173
block = cfgnode->asBlock();
3174
TR_ASSERT(block->getEntry() != 0, "assuming block->getEntry() != 0");
3175
appender.add(block);
3176
cfgBV.reset(bbnum);
3177
if (cfgBV.isEmpty())
3178
break;
3179
}
3180
}
3181
}
3182
3183
// add exit blocks of the loop
3184
ListIterator<TR::CFGEdge> ei(&nextLoop->getExitEdges());
3185
TR::CFGEdge *edge;
3186
for (edge = ei.getFirst(); edge != 0; edge = ei.getNext())
3187
{
3188
TR::CFGNode *from = edge->getFrom();
3189
TR::CFGNode *to = edge->getTo();
3190
int32_t toNum = to->getNumber();
3191
if (!toStructureSubGraphNode(from)->getStructure()->asBlock()) continue;
3192
from = toStructureSubGraphNode(from)->getStructure()->asBlock()->getBlock();
3193
// get the corresponding cfg edge
3194
//
3195
auto edgeFrom = from->getSuccessors().begin();
3196
for (; edgeFrom != from->getSuccessors().end(); ++edgeFrom)
3197
{
3198
block = toBlock((*edgeFrom)->getTo());
3199
if (block->getNumber() == toNum)
3200
break;
3201
}
3202
if (edgeFrom != from->getSuccessors().end())
3203
{
3204
TR_ASSERT(block->getNumber() == toNum, "error block->getNumber() != toNum");
3205
if (block->getEntry() &&
3206
!bblistSucc.find(block) &&
3207
!bblistPred.find(block))
3208
bblistSucc.add(block);
3209
}
3210
}
3211
3212
_bblistPred = bblistPred;
3213
_bblistBody = bblistBody;
3214
_bblistSucc = bblistSucc;
3215
3216
if (trace())
3217
{
3218
traceMsg(comp(), "Loop %d.\n\tAnalyzed predecessor, body and successor blocks.\n", nextLoop->getNumber());
3219
traceMsg(comp(), "\t\tPredecessors blocks:");
3220
ListIterator<TR::Block> bi(&bblistPred);
3221
for (block = bi.getFirst(); block != 0; block = bi.getNext())
3222
traceMsg(comp(), " %d:[%p]",block->getNumber(),block);
3223
traceMsg(comp(), "\n");
3224
3225
traceMsg(comp(), "\t\tBody blocks:");
3226
bi.set(&bblistBody);
3227
for (block = bi.getFirst(); block != 0; block = bi.getNext())
3228
traceMsg(comp(), " %d:[%p]",block->getNumber(),block);
3229
traceMsg(comp(), "\n");
3230
3231
traceMsg(comp(), "\t\tSuccessors blocks:");
3232
bi.set(&bblistSucc);
3233
for (block = bi.getFirst(); block != 0; block = bi.getNext())
3234
traceMsg(comp(), " %d:[%p]",block->getNumber(),block);
3235
traceMsg(comp(), "\n");
3236
}
3237
3238
// Iterate through the loop body blocks to remove Bits.keepAlive() or Reference.reachabilityFence() calls.
3239
// This is a NOP function inserted into NIO libraries to keep the NIO object and its native
3240
// ptr alive until after the native pointer accesses.
3241
removeBitsKeepAliveCalls(&bblistBody);
3242
3243
3244
// make our internal graph representation
3245
TR_CISCGraph *graph;
3246
graph = makeCISCGraph(&bblistPred, &bblistBody, &bblistSucc);
3247
if (!graph)
3248
{
3249
if (trace()) traceMsg(comp(), "Loop %d. Failed to make CISC Graph.\n", nextLoop->getNumber());
3250
restoreBitsKeepAliveCalls();
3251
continue;
3252
}
3253
if (loopNumber++ == disableLoopNumber)
3254
{
3255
restoreBitsKeepAliveCalls();
3256
continue; // for debug purpose
3257
}
3258
3259
analyzeHighFrequencyLoop(graph, nextLoop); // Analyze if frequently iterated loop.
3260
3261
setCurrentLoop(nextLoop);
3262
3263
if (trace())
3264
graph->dump(comp()->getOutFile(), comp());
3265
3266
bool modifiedThisLoop = false;
3267
_candidatesForShowing.init();
3268
for (int32_t i = 0; i < numPreparedCISCGraphs; i++) // for each idiom
3269
{
3270
TR_CISCGraph *prepared = preparedCISCGraphs[i];
3271
if (prepared)
3272
{
3273
if (computeTopologicalEmbedding(prepared, graph))// Analyze matching for the idiom
3274
{
3275
modified = true; // IL is modified
3276
modifiedThisLoop = true;
3277
if (trace())
3278
{
3279
traceMsg(comp(), "Transformed %s\n", prepared->getTitle());
3280
comp()->dumpMethodTrees("Trees after transforming CISC instruction");
3281
}
3282
break;
3283
}
3284
}
3285
}
3286
// Restore the keepAliveCalls
3287
restoreBitsKeepAliveCalls();
3288
3289
if (!modifiedThisLoop) showCandidates();
3290
}
3291
3292
3293
if (modified)
3294
{
3295
_cfg->setStructure(0);
3296
// Use/def info and value number info are now bad.
3297
//
3298
optimizer()->setUseDefInfo(NULL);
3299
optimizer()->setValueNumberInfo(0);
3300
3301
if (trace())
3302
{
3303
traceMsg(comp(), "Ending CISCTransformer\n");
3304
comp()->dumpFlowGraph();
3305
comp()->dumpMethodTrees("Trees after transforming CISC instructions");
3306
}
3307
}
3308
3309
if (showMesssagesStdout()) printf("Exiting CISCTransformer\n");
3310
cost = 1;
3311
}
3312
} // scope for stack memory region
3313
3314
manager()->incNumPassesCompleted();
3315
return cost;
3316
}
3317
3318
const char *
3319
TR_CISCTransformer::optDetailString() const throw()
3320
{
3321
return "O^O IDIOM RECOGNITION: ";
3322
}
3323
3324
// check if the block is really in the loop body
3325
//
3326
bool
3327
TR_CISCTransformer::isBlockInLoopBody(TR::Block *block)
3328
{
3329
ListIterator<TR::Block> bi(&_bblistBody);
3330
for (TR::Block *b = bi.getFirst(); b; b = bi.getNext())
3331
{
3332
if (block->getNumber() == b->getNumber())
3333
return true;
3334
}
3335
return false;
3336
}
3337
3338
3339
void
3340
TR_CISCTransformer::showEmbeddedData(char *title, uint8_t *data)
3341
{
3342
int32_t i, j;
3343
traceMsg(comp(), "%s\n ",title);
3344
for (j = 0; j < _numPNodes; j++)
3345
{
3346
traceMsg(comp(), "%3d",j);
3347
}
3348
traceMsg(comp(), "\n --");
3349
for (j = 0; j < _numPNodes; j++)
3350
{
3351
traceMsg(comp(), "---");
3352
}
3353
traceMsg(comp(), "\n");
3354
for (i = 0; i < _numTNodes; i++)
3355
{
3356
traceMsg(comp(), "%3d:",i);
3357
for (j = 0; j < _numPNodes; j++)
3358
{
3359
uint8_t this_result = data[idx(j, i)];
3360
if (this_result == _Unknown || this_result == _NotEmbed)
3361
traceMsg(comp(), "| ");
3362
else
3363
traceMsg(comp(), "| %X",data[idx(j, i)]);
3364
}
3365
traceMsg(comp(), "\n");
3366
}
3367
}
3368
3369
3370
//***************************************************************************************
3371
// It returns the result whether parents of p correspond to those of t.
3372
// It also checks whether the expression is commutative.
3373
//***************************************************************************************
3374
bool
3375
TR_CISCTransformer::checkParents(TR_CISCNode *p, TR_CISCNode *t, uint8_t *result, bool *inLoop, bool *optionalParents)
3376
{
3377
ListIterator<TR_CISCNode> pi(p->getParents());
3378
ListIterator<TR_CISCNode> ti(t->getParents());
3379
TR_CISCNode *pn, *tn;
3380
bool isTargetInsideOfLoop = false;
3381
bool allOptionalParents = true;
3382
for (pn = pi.getFirst(); pn; pn = pi.getNext()) // for each parent of p
3383
{
3384
uint32_t pnOpc = pn->getOpcode();
3385
uint32_t tmpIdx = idx(pn->getID(), 0);
3386
int32_t pIndex = 0;
3387
const bool commutative = pn->isCommutative();
3388
const bool isPnInsideOfLoop = !pn->isOutsideOfLoop();
3389
if (!commutative)
3390
{
3391
for (pIndex = pn->getNumChildren(); --pIndex >= 0; )
3392
{
3393
if (pn->getChild(pIndex) == p) break;
3394
}
3395
}
3396
TR_ASSERT(pIndex >= 0, "error!");
3397
for (tn = ti.getFirst(); tn; tn = ti.getNext()) // for each parent of t
3398
{
3399
if (isPnInsideOfLoop && tn->isOutsideOfLoop())
3400
continue;
3401
if (pn->isEqualOpc(tn))
3402
{
3403
if (result[tmpIdx + tn->getID()] == _Embed &&
3404
(commutative || tn->getChild(pIndex) == t)) break;
3405
}
3406
else
3407
{
3408
if (tn->getIlOpCode().isLoadVarDirect()) /* search one more depth */
3409
{
3410
ListIterator<TR_CISCNode> tci(tn->getParents());
3411
TR_CISCNode *tcn;
3412
for (tcn = tci.getFirst(); tcn; tcn = tci.getNext())
3413
{
3414
if (pn->isEqualOpc(tcn) &&
3415
result[tmpIdx + tcn->getID()] == _Embed &&
3416
(commutative || tcn->getChild(pIndex) == tn)) break;
3417
}
3418
if (tcn) break;
3419
}
3420
}
3421
}
3422
if (!tn)
3423
{
3424
if (!pn->isOptionalNode())
3425
{
3426
return false;
3427
}
3428
else
3429
{
3430
if (!pn->getParents()->isEmpty() && !pn->isSkipParentsCheck())
3431
{
3432
bool nextInLoop = false;
3433
bool nextOptionalParents = false;
3434
if (checkParents(pn, t, result, &nextInLoop, &nextOptionalParents))
3435
{
3436
if (!nextOptionalParents) allOptionalParents = false;
3437
if (nextInLoop) isTargetInsideOfLoop = true;
3438
}
3439
else
3440
{
3441
return false;
3442
}
3443
}
3444
}
3445
}
3446
else
3447
{
3448
if (!pn->isOptionalNode()) allOptionalParents = false;
3449
if (!tn->isOutsideOfLoop()) isTargetInsideOfLoop = true;
3450
}
3451
}
3452
*optionalParents = allOptionalParents;
3453
*inLoop = isTargetInsideOfLoop;
3454
return true;
3455
}
3456
3457
3458
//***************************************************************************************
3459
// It computes embedding information for an input data dependence graph.
3460
// Because this graph consists of a directed acyclic graph (DAG), it handles only a DAG.
3461
// It uses the topological embedding algorithm (dag_embed) by walking data dependence graph
3462
// from leaf to root and find the candidate nodes.
3463
// Note that we need to find candidates among the leaf nodes in the data dependence graph.
3464
// The function checkParentsNonRec() finds candidates of leaf nodes by analyzing their ancestors.
3465
// When there are multiple candidates of a leaf, we will exclude those candidates
3466
// which are unlikely to be matched to the leaf.
3467
// In this case, we will perform the topological embedding again.
3468
//***************************************************************************************
3469
bool
3470
TR_CISCTransformer::computeEmbeddedForData()
3471
{
3472
uint8_t *const result = _embeddedForData;
3473
bool ret = false;
3474
bool skipScreening = false;
3475
const bool enableWriteBarrierConversion = TR::Compiler->om.writeBarrierType() == gc_modron_wrtbar_none;
3476
3477
memset(result, 0, _sizeResult);
3478
TR_CISCNode *p, *t;
3479
ListElement<TR_CISCNode> *const plistHead = _P->getOrderByData()->getListHead();
3480
ListElement<TR_CISCNode> *const tlistHead = _T->getOrderByData()->getListHead();
3481
ListElement<TR_CISCNode> *ple, *tle;
3482
uint32_t i;
3483
3484
while(true) // This loop is for narrowing down candidates of a leaf.
3485
{
3486
// Perform topological embedding algorithm (dagEmbed)
3487
for (ple = plistHead; ple; ple = ple->getNextElement())
3488
{
3489
p = ple->getData();
3490
const uint32_t pOpc = p->getOpcode();
3491
const uint32_t tmpIdx = idx(p->getID(), 0);
3492
const int32_t pOtherInfo = p->getOtherInfo();
3493
const uint16_t numPChi = p->getNumChildren();
3494
const bool isVariable = (pOpc == TR_variable);
3495
const bool isBoolTable = (pOpc == TR_booltable);
3496
const bool isAllowWrtbar = enableWriteBarrierConversion && (pOpc == TR_inbstore || pOpc == TR_indstore);
3497
const bool isCheckOtherInfo = p->isInterestingConstant();
3498
const bool isChildDirectlyConnected = p->isChildDirectlyConnected();
3499
const bool isNecessaryScreening = p->isNecessaryScreening();
3500
const bool commutative = p->isCommutative();
3501
const bool checkLoopInvariant =
3502
pOpc == TR_variableORconst
3503
|| pOpc == TR_quasiConst
3504
|| pOpc == TR_quasiConst2
3505
|| pOpc == TR_arraybase;
3506
const bool isOptionalNode = p->isOptionalNode();
3507
bool existEmbed = false;
3508
tle = tlistHead;
3509
while(true)
3510
{
3511
t = tle->getData();
3512
const uint16_t numTChi = t->getNumChildren();
3513
const uint32_t tOpc = t->getOpcode();
3514
const uint32_t index = tmpIdx + t->getID();
3515
tle = tle->getNextElement();
3516
bool isEmbed = true;
3517
3518
// check degree
3519
if (numPChi != numTChi && numPChi != 0)
3520
{
3521
if ((!isBoolTable || numTChi < 1) &&
3522
(!isAllowWrtbar || numTChi < 2))
3523
isEmbed = false;
3524
}
3525
3526
if (isEmbed)
3527
{
3528
if (skipScreening &&
3529
isNecessaryScreening)
3530
{
3531
existEmbed = true;
3532
isEmbed = (result[index] == _Embed);
3533
}
3534
else if ((isEmbed = p->isEqualOpc(t)) == true)
3535
{
3536
// if its a constant, check if values match
3537
if (isCheckOtherInfo &&
3538
pOtherInfo != t->getOtherInfo())
3539
{
3540
isEmbed = false;
3541
}
3542
else if (isNecessaryScreening)
3543
{
3544
if (checkLoopInvariant)
3545
{
3546
if (tOpc == TR_variable)
3547
{
3548
ListIterator<TR_CISCNode> parenti(t->getParents());
3549
TR_CISCNode *parent;
3550
for (parent = parenti.getFirst(); parent; parent = parenti.getNext())
3551
{
3552
if (!parent->isOutsideOfLoop() &&
3553
parent->getIlOpCode().isStoreDirect())
3554
{
3555
if (trace()) traceMsg(comp(), "pID%d: tID%d isn't loop invariant because of %d\n", p->getID(), t->getID(), parent->getID());
3556
isEmbed = false;
3557
break;
3558
}
3559
}
3560
}
3561
}
3562
3563
// finds candidates of leaf nodes by analyzing their ancestors.
3564
//isEmbed = p->checkParents(t, DEPTH_CHECKPARENTS);
3565
//FIXME: enable the recursive routine
3566
//
3567
if (isEmbed) isEmbed = TR_CISCNode::checkParentsNonRec(p, t, DEPTH_CHECKPARENTS, comp());
3568
}
3569
}
3570
3571
if (isEmbed)
3572
{
3573
if (p->isStoreDirect())
3574
{
3575
isEmbed = false;
3576
TR_ASSERT(numPChi == 2, "error");
3577
uint8_t chiData = result[idx(p->getChild(0)->getID(), t->getChild(0)->getID())];
3578
if (chiData == _Embed || (chiData == _Desc && !isChildDirectlyConnected))
3579
{
3580
chiData = result[idx(p->getChild(1)->getID(), t->getChild(1)->getID())];
3581
if (chiData == _Embed) // allow only _Embed
3582
isEmbed = true;
3583
}
3584
}
3585
else
3586
{
3587
// For commutative expressions, we try to swap operands for the comparison.
3588
if (commutative && numPChi == 2)
3589
{
3590
TR_CISCNode *pch0 = p->getChild(0);
3591
TR_CISCNode *tch0 = t->getChild(0);
3592
TR_CISCNode *tch1 = t->getChild(1);
3593
uint32_t pIdx0 = idx(pch0->getID(), 0);
3594
uint8_t chiData00 = result[pIdx0 + tch0->getID()];
3595
uint8_t chiData01 = result[pIdx0 + tch1->getID()];
3596
if ((chiData00 == _Embed || chiData01 == _Embed) ||
3597
((chiData00 == _Desc) &&
3598
(!isChildDirectlyConnected ||
3599
(tch0->isLoadVarDirect() &&
3600
_Embed == result[pIdx0 + tch0->getChild(0)->getID()]))) ||
3601
((chiData01 == _Desc) &&
3602
(!isChildDirectlyConnected ||
3603
(tch1->isLoadVarDirect() &&
3604
_Embed == result[pIdx0 + tch1->getChild(0)->getID()]))))
3605
{
3606
// OK!
3607
TR_CISCNode *pch1 = p->getChild(1);
3608
uint32_t pIdx1 = idx(pch1->getID(), 0);
3609
uint8_t chiData10 = result[pIdx1 + tch0->getID()];
3610
uint8_t chiData11 = result[pIdx1 + tch1->getID()];
3611
if ((chiData10 == _Embed || chiData11 == _Embed) ||
3612
((chiData10 == _Desc) &&
3613
(!isChildDirectlyConnected ||
3614
(tch0->isLoadVarDirect() &&
3615
_Embed == result[pIdx1 + tch0->getChild(0)->getID()]))) ||
3616
((chiData11 == _Desc) &&
3617
(!isChildDirectlyConnected ||
3618
(tch1->isLoadVarDirect() &&
3619
_Embed == result[pIdx1 + tch1->getChild(0)->getID()]))))
3620
{
3621
// OK!
3622
}
3623
else
3624
{
3625
isEmbed = false;
3626
}
3627
}
3628
else
3629
{
3630
isEmbed = false;
3631
}
3632
}
3633
else
3634
{
3635
for (i = 0; i < numPChi; i++)
3636
{
3637
TR_CISCNode *pch = p->getChild(i);
3638
TR_CISCNode *tch = t->getChild(i);
3639
uint8_t chiData = result[idx(pch->getID(), tch->getID())];
3640
if (chiData == _Embed ||
3641
((chiData == _Desc) &&
3642
(!isChildDirectlyConnected ||
3643
(tch->isLoadVarDirect() &&
3644
_Embed == result[idx(pch->getID(), tch->getChild(0)->getID())]))))
3645
{
3646
// OK!
3647
}
3648
else
3649
{
3650
isEmbed = false;
3651
break;
3652
}
3653
}
3654
}
3655
}
3656
}
3657
}
3658
3659
if (isEmbed)
3660
{
3661
TR_ASSERT(index == idx(p->getID(), t->getID()), "error");
3662
result[index] = _Embed;
3663
existEmbed = true;
3664
}
3665
else
3666
{
3667
if (numTChi == 1) // for reducing compilation time
3668
{
3669
if (tOpc != TR::arraylength) // The type of the child (ref) is different from the type of destination (int).
3670
{
3671
uint8_t chiData = result[tmpIdx + t->getChild(0)->getID()];
3672
isEmbed = isDescOrEmbed(chiData);
3673
}
3674
}
3675
else if (numTChi == 2) // for reducing compilation time
3676
{
3677
uint16_t idTChi0 = t->getChild(0)->getID();
3678
uint16_t idTChi1 = t->getChild(1)->getID();
3679
uint8_t chiData0 = result[tmpIdx + idTChi0];
3680
uint8_t chiData1 = result[tmpIdx + idTChi1];
3681
isEmbed = isDescOrEmbed(chiData0) | isDescOrEmbed(chiData1);
3682
}
3683
else // Natural code
3684
{
3685
for (i = 0; i < numTChi; i++)
3686
{
3687
uint8_t chiData = result[tmpIdx + t->getChild(i)->getID()];
3688
if (isDescOrEmbed(chiData))
3689
{
3690
isEmbed = true;
3691
break;
3692
}
3693
}
3694
}
3695
3696
if (isOptionalNode && !isEmbed)
3697
{
3698
for (i = 0; i < numPChi; i++)
3699
{
3700
uint8_t chiData = result[idx(p->getChild(i)->getID(), t->getID())];
3701
if (chiData == _Desc || chiData == _Embed)
3702
{
3703
isEmbed = true;
3704
break;
3705
}
3706
}
3707
}
3708
TR_ASSERT(index == idx(p->getID(), t->getID()), "error");
3709
if (isEmbed)
3710
{
3711
result[index] = _Desc;
3712
if (t->isStoreDirect())
3713
{
3714
int32_t childIdx = tmpIdx + t->getChild(1)->getID();
3715
result[childIdx] |= _Desc;
3716
}
3717
}
3718
else
3719
result[index] = _NotEmbed;
3720
}
3721
if (!tle) break;
3722
}
3723
if (!existEmbed && !isOptionalNode) // cannot find any nodes corresponding to p
3724
{
3725
if (trace())
3726
{
3727
traceMsg(comp(), "data dag embedding failed for node %d.\n", p->getID());
3728
showEmbeddedData("Result of _embeddedForData", result);
3729
}
3730
return false;
3731
}
3732
}
3733
// Finish topological embedding algorithm (dagEmbed)
3734
3735
3736
// From here, I'd like to exclude those candidates
3737
// which are unlikely to be matched to each leaf.
3738
3739
//showEmbeddedData("before screening1", result);
3740
skipScreening = true;
3741
bool modifyEmbeddedResult = false;
3742
TR_ScratchList<TR_CISCNode> singleList(comp()->trMemory()), multiList(comp()->trMemory());
3743
3744
// This loop tries to exclude candidates by analyzing parents
3745
// It also creates two lists singleList and multiList.
3746
// * singleList has a pattern leaf node corresponding to a single target node
3747
// * multiList has a pattern leaf node corresponding to multiple target nodes
3748
for (ple = plistHead; ple; ple = ple->getNextElement())
3749
{
3750
p = ple->getData();
3751
if (!p->isNecessaryScreening()) continue;
3752
const bool lightScreening = p->isLightScreening();
3753
const uint32_t tmpIdx = idx(p->getID(), 0);
3754
int32_t count = 0;
3755
for (tle = tlistHead; tle; tle = tle->getNextElement())
3756
{
3757
t = tle->getData();
3758
if (result[tmpIdx + t->getID()] == _Embed)
3759
{
3760
bool inLoop = false;
3761
bool allOptionalParents = false;
3762
if (!(!checkParents(p, t, result, &inLoop, &allOptionalParents) || !(inLoop || lightScreening)))
3763
{
3764
count ++;
3765
}
3766
}
3767
}
3768
bool checkOptionalParents = (count == 0);
3769
if (trace() && count == 0) traceMsg(comp(), "screening1: count=%d for p:%d\n",count,p->getID());
3770
count = 0;
3771
for (tle = tlistHead; tle; tle = tle->getNextElement())
3772
{
3773
t = tle->getData();
3774
if (result[tmpIdx + t->getID()] == _Embed)
3775
{
3776
bool inLoop = false;
3777
bool allOptionalParents = false;
3778
if (!checkParents(p, t, result, &inLoop, &allOptionalParents) || !(inLoop || lightScreening ||
3779
(checkOptionalParents && allOptionalParents)))
3780
{
3781
modifyEmbeddedResult = true;
3782
result[tmpIdx + t->getID()] = _NotEmbed;
3783
if (trace()) traceMsg(comp(), "screening1: set _NotEmbed to (%d, %d)\n",p->getID(),t->getID());
3784
}
3785
else
3786
{
3787
count ++;
3788
}
3789
}
3790
}
3791
if (count == 0)
3792
{
3793
if (!p->isOptionalNode())
3794
{
3795
if (trace())
3796
{
3797
traceMsg(comp(), "fail!! pID=%d.\n", p->getID());
3798
showEmbeddedData("Result of _embeddedForData", result);
3799
}
3800
return false;
3801
}
3802
}
3803
else if (count == 1)
3804
{
3805
singleList.add(p);
3806
}
3807
else if (count >= 2)
3808
{
3809
multiList.add(p);
3810
}
3811
}
3812
3813
3814
// This loop tries to exclude those candidates that already included in singleList.
3815
//showEmbeddedData("before screening2", result);
3816
if (!multiList.isEmpty())
3817
{
3818
ListIterator<TR_CISCNode> mi(&multiList);
3819
ListIterator<TR_CISCNode> si(&singleList);
3820
TR_CISCNode *s, *m;
3821
for (m = mi.getFirst(); m; m = mi.getNext())
3822
{
3823
TR_ASSERT(m->isNecessaryScreening(), "error!");
3824
if (m->isLightScreening()) continue;
3825
const uint32_t tmpMIdx = idx(m->getID(), 0);
3826
const int32_t mOpcode = m->getOpcode();
3827
bool thisScreening = false;
3828
// Try a set of the same opcode
3829
for (s = si.getFirst(); s; s = si.getNext())
3830
{
3831
if (mOpcode == s->getOpcode())
3832
{
3833
const uint32_t tmpSIdx = idx(s->getID(), 0);
3834
uint32_t tID;
3835
for (tID = 0; tID < _numTNodes; tID++ )
3836
{
3837
if (result[tmpSIdx + tID] == _Embed) break;
3838
}
3839
if (result[tmpMIdx + tID] == _Embed)
3840
{
3841
modifyEmbeddedResult = true;
3842
thisScreening = true;
3843
result[tmpMIdx + tID] = _NotEmbed;
3844
if (trace()) traceMsg(comp(), "screening2, sameOpcode: set _NotEmbed to (%d, %d)\n",m->getID(),tID);
3845
}
3846
}
3847
}
3848
bool changeToSingle = false;
3849
if (thisScreening)
3850
{
3851
uint32_t tID;
3852
int32_t count = 0;
3853
for (tID = 0; tID < _numTNodes; tID++ )
3854
{
3855
if (result[tmpMIdx + tID] == _Embed) count++;
3856
}
3857
if (count <= 1) changeToSingle = true;
3858
}
3859
// Try a set of different opcodes
3860
if (!changeToSingle)
3861
{
3862
for (s = si.getFirst(); s; s = si.getNext())
3863
{
3864
if (mOpcode != s->getOpcode())
3865
{
3866
const uint32_t tmpSIdx = idx(s->getID(), 0);
3867
uint32_t tID;
3868
for (tID = 0; tID < _numTNodes; tID++ )
3869
{
3870
if (result[tmpSIdx + tID] == _Embed) break;
3871
}
3872
if (result[tmpMIdx + tID] == _Embed)
3873
{
3874
modifyEmbeddedResult = true;
3875
thisScreening = true;
3876
result[tmpMIdx + tID] = _NotEmbed;
3877
if (trace()) traceMsg(comp(), "screening2, others: set _NotEmbed to (%d, %d)\n",m->getID(),tID);
3878
}
3879
}
3880
}
3881
}
3882
}
3883
}
3884
if (!modifyEmbeddedResult) break;
3885
}
3886
3887
for (ple = plistHead; ple; ple = ple->getNextElement())
3888
{
3889
p = ple->getData();
3890
if (!p->isNecessaryScreening()) continue;
3891
const bool lightScreening = p->isLightScreening();
3892
const uint32_t tmpIdx = idx(p->getID(), 0);
3893
for (tle = tlistHead; tle; tle = tle->getNextElement())
3894
{
3895
t = tle->getData();
3896
if (result[tmpIdx + t->getID()] == _Embed)
3897
{
3898
bool inLoop = false;
3899
bool allOptionalParents = false;
3900
if (!checkParents(p, t, result, &inLoop, &allOptionalParents) || !(inLoop || lightScreening))
3901
{
3902
result[tmpIdx + t->getID()] = _NotEmbed;
3903
if (trace()) traceMsg(comp(), "screening3: set _NotEmbed to (%d, %d)\n",p->getID(),t->getID());
3904
}
3905
}
3906
}
3907
}
3908
3909
if (trace())
3910
showEmbeddedData("Result of _embeddedForData", result);
3911
return true;
3912
}
3913
3914
3915
3916
3917
3918
//***************************************************************************************
3919
// It corresponds to the algorithm "dag_embed()" in Fu's paper. (p.385)
3920
// I relaxed degree checking for TR_booltable to find switch-case statements.
3921
//***************************************************************************************
3922
bool
3923
TR_CISCTransformer::dagEmbed(TR_CISCNode *np, TR_CISCNode *nt)
3924
{
3925
uint8_t *const result = _embeddedForCFG;
3926
const uint32_t numPSucc = np->getNumSuccs();
3927
const uint32_t numTSucc = nt->getNumSuccs();
3928
bool isEmbed = false;
3929
const uint32_t tmpIdx = idx(np->getID(), 0);
3930
const uint32_t index = tmpIdx + nt->getID();
3931
uint32_t i;
3932
3933
if (_embeddedForData[index] == _Embed &&
3934
((numPSucc == numTSucc) || (numPSucc == 0)))
3935
{
3936
const bool isSuccDirectlyConnected = np->isSuccDirectlyConnected();
3937
isEmbed = true;
3938
uint8_t *const result = _embeddedForCFG;
3939
if (np->getOpcode() == TR_booltable)
3940
{
3941
TR_ASSERT(numPSucc == 2, "error!!");
3942
uint16_t idPSucc0 = np->getSucc(0)->getID();
3943
uint16_t idTSucc0 = nt->getSucc(0)->getID();
3944
uint16_t idPSucc1 = np->getSucc(1)->getID();
3945
uint16_t idTSucc1 = nt->getSucc(1)->getID();
3946
uint8_t succData01 = result[idx(idPSucc0, idTSucc1)];
3947
uint8_t succData10 = result[idx(idPSucc1, idTSucc0)];
3948
if (isDescOrEmbed(succData01) & isDescOrEmbed(succData10))
3949
{
3950
nt->reverseBranchOpCodes();
3951
}
3952
}
3953
for (i = 0; i < numPSucc; i++)
3954
{
3955
uint16_t idPSucc = np->getSucc(i)->getID();
3956
uint16_t idTSucc = nt->getSucc(i)->getID();
3957
uint8_t succData = result[idx(idPSucc, idTSucc)];
3958
if ((succData != _Desc || isSuccDirectlyConnected) && succData != _Embed)
3959
{
3960
isEmbed = false;
3961
break;
3962
}
3963
}
3964
}
3965
if (isEmbed)
3966
{
3967
result[index] = _Embed;
3968
return true;
3969
}
3970
else
3971
{
3972
TR_ASSERT(index == idx(np->getID(), nt->getID()), "error");
3973
if (numTSucc == 1) // for reducing compilation time
3974
{
3975
uint8_t succData = result[tmpIdx + nt->getSucc(0)->getID()];
3976
result[index] = isDescOrEmbed(succData) ? _Desc : _NotEmbed;
3977
}
3978
else if (numTSucc == 0) // for reducing compilation time
3979
{
3980
result[index] = _NotEmbed;
3981
}
3982
else // Natural code
3983
{
3984
for (i = 0; i < numTSucc; i++)
3985
{
3986
uint16_t idTSucc = nt->getSucc(i)->getID();
3987
uint8_t succData = result[tmpIdx + idTSucc];
3988
if (isDescOrEmbed(succData))
3989
{
3990
isEmbed = true;
3991
break;
3992
}
3993
}
3994
result[index] = isEmbed ? _Desc : _NotEmbed;
3995
}
3996
}
3997
return false;
3998
}
3999
4000
4001
//***************************************************************************************
4002
// It corresponds to the algorithm "cycle_embed()" in Fu's paper. (pp.387-388)
4003
// I relaxed degree checking for TR_booltable to find switch-case statements.
4004
//***************************************************************************************
4005
bool
4006
TR_CISCTransformer::cycleEmbed(uint16_t dagP, uint16_t dagT)
4007
{
4008
const List<TR_CISCNode> *dagId2NodesP = _P->getDagId2Nodes();
4009
const List<TR_CISCNode> *dagId2NodesT = _T->getDagId2Nodes();
4010
List<TR_CISCNode> dagPList = dagId2NodesP[dagP];
4011
List<TR_CISCNode> dagTList = dagId2NodesT[dagT];
4012
ListIterator<TR_CISCNode> pi(&dagPList);
4013
ListIterator<TR_CISCNode> ti(&dagTList);
4014
uint8_t *result = _embeddedForCFG;
4015
uint8_t *const embeddedForData = _embeddedForData;
4016
uint32_t i;
4017
4018
memset(_EM, 0, _sizeResult);
4019
memset(_DE, 0, _sizeDE);
4020
TR_CISCNode *np, *nt;
4021
bool isEmbed;
4022
for (np = pi.getFirst(); np; np = pi.getNext())
4023
{
4024
const uint16_t pId = np->getID();
4025
const uint32_t tmpIdx = idx(pId, 0);
4026
const uint32_t numPSucc = np->getNumSuccs();
4027
const bool isSuccDirectlyConnected = np->isSuccDirectlyConnected();
4028
const bool isBoolTable = (np->getOpcode() == TR_booltable);
4029
for (nt = ti.getFirst(); nt; nt = ti.getNext())
4030
{
4031
const uint32_t numTSucc = nt->getNumSuccs();
4032
const uint32_t index = tmpIdx + nt->getID();
4033
isEmbed = false;
4034
bool isLabelSame = (embeddedForData[index] == _Embed);
4035
uint32_t tOpc = nt->getOpcode();
4036
if (isLabelSame)
4037
{
4038
if ((numPSucc == numTSucc) || (numPSucc == 0))
4039
{
4040
isEmbed = true;
4041
if (isBoolTable)
4042
{
4043
TR_ASSERT(numPSucc == 2, "error!!");
4044
uint16_t idPSucc0 = np->getSucc(0)->getID();
4045
uint16_t idTSucc0 = nt->getSucc(0)->getID();
4046
uint16_t idPSucc1 = np->getSucc(1)->getID();
4047
uint16_t idTSucc1 = nt->getSucc(1)->getID();
4048
uint8_t succData01 = result[idx(idPSucc0, idTSucc1)];
4049
uint8_t succData10 = result[idx(idPSucc1, idTSucc0)];
4050
if (isDescOrEmbed(succData01) & isDescOrEmbed(succData10))
4051
{
4052
nt->reverseBranchOpCodes();
4053
}
4054
}
4055
for (i = 0; i < numPSucc; i++)
4056
{
4057
uint8_t succData = result[idx(np->getSucc(i)->getID(), nt->getSucc(i)->getID())];
4058
if ((succData != _Desc || isSuccDirectlyConnected) && succData != _Embed)
4059
{
4060
isEmbed = false;
4061
break;
4062
}
4063
}
4064
}
4065
else if (isBoolTable)
4066
{
4067
if (tOpc == TR::Case)
4068
{
4069
i = (nt->isValidOtherInfo() ? 1 : 0);
4070
uint8_t succData = result[idx(np->getSucc(i)->getID(), nt->getSucc(0)->getID())];
4071
isEmbed = !((succData != _Desc || isSuccDirectlyConnected) && succData != _Embed);
4072
}
4073
}
4074
}
4075
uint8_t chkEmbed;
4076
if (isEmbed)
4077
chkEmbed = _Embed;
4078
else
4079
{
4080
if (!isLabelSame)
4081
chkEmbed = _NotEmbed;
4082
else if (numPSucc != numTSucc)
4083
{
4084
chkEmbed = _NotEmbed;
4085
if (isBoolTable)
4086
{
4087
chkEmbed = _Cond;
4088
if (tOpc == TR::Case)
4089
{
4090
i = (nt->isValidOtherInfo() ? 1 : 0);
4091
uint8_t succData = result[idx(np->getSucc(i)->getID(), nt->getSucc(0)->getID())];
4092
if (succData == _NotEmbed) chkEmbed = _NotEmbed;
4093
}
4094
}
4095
}
4096
else
4097
{
4098
chkEmbed = _Cond;
4099
for (i = 0; i < numPSucc; i++)
4100
{
4101
uint8_t succData = result[idx(np->getSucc(i)->getID(), nt->getSucc(i)->getID())];
4102
if (succData == _NotEmbed)
4103
{
4104
chkEmbed = _NotEmbed;
4105
break;
4106
}
4107
}
4108
}
4109
}
4110
_EM[index] = chkEmbed;
4111
if (chkEmbed == _Embed || chkEmbed == _Cond)
4112
_DE[pId] = 1;
4113
else
4114
{
4115
for (i = 0; i < numTSucc; i++)
4116
{
4117
uint8_t succData = result[tmpIdx + nt->getSucc(i)->getID()];
4118
if (isDescOrEmbed(succData))
4119
{
4120
_DE[pId] = 1;
4121
break;
4122
}
4123
}
4124
}
4125
}
4126
}
4127
4128
for (np = pi.getFirst(); np; np = pi.getNext())
4129
{
4130
if (_DE[np->getID()] == 0)
4131
{
4132
for (np = pi.getFirst(); np; np = pi.getNext())
4133
{
4134
const uint32_t tmpIdx = idx(np->getID(), 0);
4135
for (nt = ti.getFirst(); nt; nt = ti.getNext())
4136
{
4137
result[tmpIdx + nt->getID()] = _NotEmbed; // set NotEmbed to all elements
4138
}
4139
}
4140
}
4141
}
4142
4143
bool ret = true;
4144
for (np = pi.getFirst(); np; np = pi.getNext())
4145
{
4146
const uint32_t tmpIdx = idx(np->getID(), 0);
4147
bool existEmbed = false;
4148
for (nt = ti.getFirst(); nt; nt = ti.getNext())
4149
{
4150
const uint32_t index = tmpIdx + nt->getID();
4151
uint8_t chkEmbed = _EM[index];
4152
if (chkEmbed == _Embed || chkEmbed == _Cond)
4153
{
4154
result[index] = _Embed;
4155
existEmbed = true;
4156
}
4157
else
4158
{
4159
result[index] = _Desc;
4160
}
4161
}
4162
if (!existEmbed && !np->isOptionalNode())
4163
{
4164
ret = false;
4165
}
4166
}
4167
return ret;
4168
}
4169
4170
4171
4172
//***************************************************************************************
4173
// It computes embedding information for an input CFG.
4174
// Because CFG consists of DAGs and a cycle, it will handle them.
4175
// It uses the result of computeEmbeddedForData() to find candidate nodes
4176
// whose label is the same as that of each node in the idiom graph.
4177
// It uses the topological embedding algorithm by walking the CFG edges from exit to entry.
4178
// At the time, we traverse nodes based on the order of the DagIDs,
4179
// which basically represent a post order of basic blocks.
4180
//***************************************************************************************
4181
bool
4182
TR_CISCTransformer::computeEmbeddedForCFG()
4183
{
4184
TR_ASSERT(_embeddedForData != 0, "error");
4185
uint8_t *const result = _embeddedForCFG;
4186
memset(result, 0, _sizeResult);
4187
uint16_t dagP, dagT;
4188
uint16_t numDagIdsP = _P->getNumDagIds();
4189
uint16_t numDagIdsT = _T->getNumDagIds();
4190
const List<TR_CISCNode> *dagId2NodesP = _P->getDagId2Nodes();
4191
const List<TR_CISCNode> *dagId2NodesT = _T->getDagId2Nodes();
4192
TR_CISCNode *np, *nt;
4193
4194
for (dagP = 0; dagP < numDagIdsP; dagP++)
4195
{
4196
List<TR_CISCNode> dagPList = dagId2NodesP[dagP];
4197
ListIterator<TR_CISCNode> pi(&dagPList);
4198
bool existEmbed = false;
4199
for (dagT = 0; dagT < numDagIdsT; dagT++)
4200
{
4201
List<TR_CISCNode> dagTList = dagId2NodesT[dagT];
4202
TR_ASSERT(!dagTList.isEmpty(), "empty dagId");
4203
if (dagTList.isSingleton())
4204
{
4205
nt = dagTList.getListHead()->getData();
4206
for (np = pi.getFirst(); np; np = pi.getNext())
4207
if (dagEmbed(np, nt)) existEmbed = true;
4208
}
4209
else
4210
{
4211
if (cycleEmbed(dagP, dagT)) existEmbed = true;
4212
}
4213
}
4214
if (!existEmbed)
4215
{
4216
if (trace())
4217
{
4218
traceMsg(comp(), "computeEmbeddedForCFG: Cannot find embedded nodes for dagP:%d\n",dagP);
4219
showEmbeddedData("Result of _embeddedForCFG", result);
4220
}
4221
return false;
4222
}
4223
}
4224
if (trace())
4225
showEmbeddedData("Result of _embeddedForCFG", result);
4226
return true;
4227
}
4228
4229
4230
//***************************************************************************************
4231
// It creates P2T and T2P tables from embedding information.
4232
// (P and T denote Pattern and Target, respectively.)
4233
// We can use them to find target nodes from pattern nodes, and vice versa.
4234
//***************************************************************************************
4235
bool
4236
TR_CISCTransformer::makeLists()
4237
{
4238
TR_CISCNode *p, *t;
4239
ListIterator<TR_CISCNode> pi(_P->getNodes());
4240
ListIterator<TR_CISCNode> ti(_T->getOrderByData());
4241
uint8_t *const result = _embeddedForCFG;
4242
uint8_t *const embeddedForData = _embeddedForData;
4243
bool modify = false;
4244
4245
memset(_P2T, 0, _sizeP2T);
4246
memset(_T2P, 0, _sizeT2P);
4247
4248
int i;
4249
for (i = 0; i < _numPNodes; i++) _P2T[i].setRegion(trMemory()->heapMemoryRegion());
4250
for (i = 0; i < _numTNodes; i++) _T2P[i].setRegion(trMemory()->heapMemoryRegion());
4251
4252
for (p = pi.getFirst(); p; p = pi.getNext())
4253
{
4254
const bool isEssential = p->isEssentialNode();
4255
const bool isSuccDirectlyConnected = p->isSuccDirectlyConnected();
4256
const uint32_t numPSucc = p->getNumSuccs();
4257
uint32_t pID = p->getID();
4258
List<TR_CISCNode> *pList = _P2T + pID;
4259
const uint32_t tmpIdx = idx(pID, 0);
4260
for (t = ti.getFirst(); t; t = ti.getNext())
4261
{
4262
uint32_t tID = t->getID();
4263
if (result[tmpIdx+tID] == _Embed)
4264
{
4265
bool isEmbed = true;
4266
if (isSuccDirectlyConnected)
4267
{
4268
for (uint32_t i = 0; i < numPSucc; i++)
4269
{
4270
if (result[idx(p->getSucc(i)->getID(), t->getSucc(i)->getID())] != _Embed)
4271
{
4272
isEmbed = false;
4273
break;
4274
}
4275
}
4276
}
4277
if (isEmbed)
4278
{
4279
if (trace() &&
4280
!_T2P[tID].isEmpty())
4281
{
4282
traceMsg(comp(), "makeLists: tID:%d corresponds to multiple nodes\n",tID);
4283
}
4284
if (isEssential) t->setIsEssentialNode();
4285
pList->add(t);
4286
if (numPSucc == 0) t->setIsNegligible();
4287
_T2P[tID].add(p);
4288
}
4289
else
4290
{
4291
modify = true;
4292
result[tmpIdx+tID] = _Desc;
4293
embeddedForData[tmpIdx+tID] = _Desc;
4294
}
4295
}
4296
}
4297
if (pList->isMultipleEntry() &&
4298
p->getOpcode() == TR_variable)
4299
{
4300
if (!p->isOptionalNode())
4301
{
4302
if (trace()) traceMsg(comp(), "makeLists: pid:%d a variable corresponds to multiple nodes\n",p->getID());
4303
return false; /* a variable corresponds to multiple nodes */
4304
}
4305
}
4306
}
4307
if (modify)
4308
{
4309
if (trace())
4310
showEmbeddedData("Result of _embeddedForCFG after makeLists", result);
4311
}
4312
return true;
4313
}
4314
4315
4316
4317
//*****************************************************************************
4318
// Analyze relationships for parents and children
4319
//*****************************************************************************
4320
int32_t
4321
TR_CISCTransformer::analyzeConnectionOnePairChild(TR_CISCNode *const p, TR_CISCNode *const t,
4322
TR_CISCNode *const pn, TR_CISCNode *tn)
4323
{
4324
uint8_t *result = _embeddedForData;
4325
const uint32_t tmpIdx = idx(pn->getID(), 0);
4326
int32_t successCount = 0;
4327
TR_CISCNode *tnBefore = t;
4328
while(true) // we may need to analyze descendant for tn because of negligible nodes (e.g. iload)
4329
{
4330
uint8_t chiData = result[tmpIdx + tn->getID()];
4331
if (chiData == _Embed)
4332
{
4333
// the connectivity of p and pn (child) is the same as that of t and tn (child)
4334
successCount++;
4335
tn->setIsParentSimplyConnected();
4336
break;
4337
}
4338
else if (chiData != _Desc || !tn->isNegligible() || tn->getNumSuccs() != 1)
4339
{
4340
bool success = false;
4341
if (tnBefore->isLoadVarDirect())
4342
{
4343
ListIterator<TR_CISCNode> defI;
4344
ListIterator<TR_CISCNode> useI;
4345
defI.set(tnBefore->getChains());
4346
TR_ASSERT(defI.getFirst(), "error");
4347
TR_CISCNode *d;
4348
success = true;
4349
// Check if t and tn are connected via a variable
4350
for (d = defI.getFirst(); d; d = defI.getNext())
4351
{
4352
if (d->getOpcode() == TR_entrynode)
4353
{
4354
success = false;
4355
continue;
4356
}
4357
TR_ASSERT(d->isStoreDirect(), "error");
4358
TR_CISCNode *childOfStore = d->getChild(0);
4359
if (_Embed != result[tmpIdx + childOfStore->getID()])
4360
success = false; // Need to handle this store
4361
else
4362
{
4363
// Check if all uses are appropriate.
4364
bool validThisStore = true;
4365
bool includeExitNode = false;
4366
if (!d->isNegligible())
4367
{
4368
useI.set(d->getChains());
4369
TR_ASSERT(useI.getFirst(), "error");
4370
TR_CISCNode *u;
4371
List<TR_CISCNode> *pParents = p->getChild(0)->getParents();
4372
ListIterator<TR_CISCNode> pParentsI(pParents);
4373
for (u = useI.getFirst(); u; u = useI.getNext())
4374
{
4375
if (tnBefore == u) continue; // short-cut path
4376
if (u->getDagID() != d->getDagID())
4377
{
4378
includeExitNode = true;
4379
continue;
4380
}
4381
List<TR_CISCNode> *uParents = u->getParents();
4382
TR_CISCNode *uParent;
4383
TR_CISCNode *pParent;
4384
4385
// check all stored data are valid.
4386
ListIterator<TR_CISCNode> uParentsI(uParents);
4387
bool isEmbed = true;
4388
for (uParent = uParentsI.getFirst(); uParent; uParent = uParentsI.getNext())
4389
{
4390
isEmbed = false;
4391
for (pParent = pParentsI.getFirst(); pParent; pParent = pParentsI.getNext())
4392
{
4393
if (_Embed == result[idx(pParent->getID(), uParent->getID())])
4394
{
4395
isEmbed = true;
4396
break;
4397
}
4398
}
4399
if (!isEmbed) break;
4400
}
4401
if (!isEmbed)
4402
{
4403
success = validThisStore = false;
4404
break;
4405
}
4406
}
4407
}
4408
if (validThisStore)
4409
{
4410
if (!includeExitNode) d->setIsNegligible();
4411
childOfStore->setIsParentSimplyConnected();
4412
}
4413
}
4414
}
4415
}
4416
else if (tn->getOpcode() == TR_variable)
4417
{
4418
success = false;
4419
ListIterator<TR_CISCNode> hi(t->getHintChildren());
4420
TR_CISCNode *n;
4421
for (n = hi.getFirst(); n; n = hi.getNext()) // n is a right-hand side expression of a store
4422
{
4423
if (_Embed == result[tmpIdx + n->getID()])
4424
{
4425
n->setIsParentSimplyConnected();
4426
success = true;
4427
break;
4428
}
4429
}
4430
4431
// If we cannot use any hint, we'll look at a neighbor store instruction.
4432
if (!success)
4433
{
4434
List<TR_CISCNode> *preds = tnBefore->getPreds();
4435
while(preds->isSingleton())
4436
{
4437
n = preds->getListHead()->getData();
4438
if (n->isStoreDirect() &&
4439
n->getChild(1) == tnBefore &&
4440
_Embed == result[tmpIdx + n->getChild(0)->getID()])
4441
{
4442
n->getChild(0)->setIsParentSimplyConnected();
4443
success = true;
4444
break;
4445
}
4446
preds = n->getPreds();
4447
}
4448
}
4449
}
4450
if (success)
4451
{
4452
successCount++;
4453
}
4454
break;
4455
}
4456
if (tn->getNumChildren() == 0) break;
4457
tnBefore = tn;
4458
tn = tn->getChild(0);
4459
}
4460
return successCount;
4461
}
4462
4463
4464
4465
//*****************************************************************************
4466
// Analyze relationships for parents, children, predecessors, successors.
4467
// They are represented to four flags.
4468
//*****************************************************************************
4469
void
4470
TR_CISCTransformer::analyzeConnectionOnePair(TR_CISCNode *const p, TR_CISCNode *const t)
4471
{
4472
int32_t i, /*j,*/ num;
4473
uint8_t *result;
4474
TR_CISCNode *tn, *pn;
4475
int32_t successCount;
4476
const bool isBoolTable = (p->getOpcode() == TR_booltable);
4477
const bool isCmpAll = (p->getOpcode() == TR_ifcmpall);
4478
4479
result = _embeddedForData;
4480
num = p->getNumChildren();
4481
TR_ASSERT(t->getNumChildren() == num ||
4482
p->getOpcode() == TR_arrayindex ||
4483
p->getOpcode() == TR_arraybase ||
4484
p->getOpcode() == TR_quasiConst ||
4485
p->getOpcode() == TR_quasiConst2 ||
4486
p->getOpcode() == TR_booltable ||
4487
p->getOpcode() == TR_inbstore ||
4488
p->getOpcode() == TR_indstore, "error");
4489
if (p->getParents()->isEmpty() ||
4490
t->getParents()->isEmpty() ||
4491
t->getOpcode() == TR::Case ||
4492
t->getOpcode() == TR::awrtbari) t->setIsParentSimplyConnected();
4493
4494
// Analyze connectivities for children and parents
4495
if (num == 0)
4496
{
4497
t->setIsChildSimplyConnected();
4498
}
4499
else
4500
{
4501
successCount = 0;
4502
for (i = 0; i < num; i++) // for each child
4503
{
4504
const bool commutative = p->isCommutative();
4505
pn = p->getChild(i);
4506
while (pn->isOptionalNode() &&
4507
_P2T[pn->getID()].isEmpty() &&
4508
pn->getNumChildren() > 0)
4509
{
4510
pn = pn->getChild(0);
4511
}
4512
const uint32_t tmpIdx = idx(pn->getID(), 0);
4513
4514
while(true)
4515
{
4516
int32_t thisCount;
4517
if (commutative && num == 2)
4518
{
4519
if ((thisCount = analyzeConnectionOnePairChild(p, t, pn, t->getChild(i))) ||
4520
(thisCount = analyzeConnectionOnePairChild(p, t, pn, t->getChild(1-i))))
4521
{
4522
successCount += thisCount;
4523
break;
4524
}
4525
}
4526
else
4527
{
4528
thisCount = analyzeConnectionOnePairChild(p, t, pn, t->getChild(i));
4529
if (thisCount)
4530
{
4531
successCount += thisCount;
4532
break;
4533
}
4534
}
4535
if (pn->isOptionalNode() &&
4536
pn->getNumChildren() > 0)
4537
{
4538
pn = pn->getChild(0); // Try a child
4539
}
4540
else
4541
{
4542
break; // fail
4543
}
4544
}
4545
}
4546
if (successCount == num)
4547
{
4548
t->setIsChildSimplyConnected();
4549
}
4550
}
4551
4552
4553
// Analyzing Predecessors and Successors
4554
result = _embeddedForCFG;
4555
num = t->getNumSuccs();
4556
if (t->getPreds()->isEmpty() || p->getPreds()->isEmpty()) t->setIsPredSimplyConnected();
4557
if (num == 0 || p->getNumSuccs() == 0)
4558
{
4559
t->setIsSuccSimplyConnected();
4560
}
4561
else if (p->getNumSuccs() == num) // typical case
4562
{
4563
successCount = 0;
4564
for (i = 0; i < num; i++) // for each successor
4565
{
4566
pn = p->getSucc(i);
4567
while (pn->isOptionalNode() &&
4568
_P2T[pn->getID()].isEmpty() &&
4569
pn->getNumSuccs() > 0)
4570
{
4571
pn = pn->getSucc(0); // skip optional nodes
4572
}
4573
4574
while(true)
4575
{
4576
tn = t->getSucc(i);
4577
const uint32_t tmpIdx = idx(pn->getID(), 0);
4578
4579
while(true)
4580
{
4581
uint8_t chiData = result[tmpIdx + tn->getID()];
4582
if (chiData == _Embed)
4583
{
4584
// the connectivity of p and pn (succ) is the same as that of t and tn (succ)
4585
successCount++;
4586
tn->setIsPredSimplyConnected();
4587
break;
4588
}
4589
else if (chiData != _Desc || !tn->isNegligible() || tn->getNumSuccs() != 1)
4590
{
4591
if (isBoolTable || isCmpAll)
4592
{
4593
if (_Embed == result[idx(p->getID(), tn->getID())])
4594
{
4595
successCount++;
4596
tn->setIsPredSimplyConnected();
4597
}
4598
}
4599
break;
4600
}
4601
tn = tn->getSucc(0);
4602
}
4603
4604
if (!tn->isPredSimplyConnected() &&
4605
pn->isOptionalNode())
4606
{
4607
while (pn->isOptionalNode() &&
4608
pn->getNumSuccs() > 0)
4609
{
4610
pn = pn->getSucc(0); // skip optional nodes
4611
}
4612
continue; // retry!
4613
}
4614
break; // Usually, this loop doesn't iterate.
4615
}
4616
}
4617
if (successCount == num)
4618
{
4619
t->setIsSuccSimplyConnected();
4620
}
4621
}
4622
else if (isBoolTable)
4623
{
4624
if (t->getOpcode() == TR::Case)
4625
{
4626
int32_t i = (t->isValidOtherInfo() ? 1 : 0);
4627
pn = p->getSucc(i);
4628
tn = t->getSucc(0);
4629
while(true)
4630
{
4631
uint8_t chiData = result[idx(pn->getID(), tn->getID())];
4632
if (chiData == _Embed)
4633
{
4634
tn->setIsPredSimplyConnected();
4635
t->setIsSuccSimplyConnected();
4636
break;
4637
}
4638
else if (chiData != _Desc || !tn->isNegligible() || tn->getNumSuccs() != 1)
4639
{
4640
if (_Embed == result[idx(p->getID(), tn->getID())])
4641
{
4642
tn->setIsPredSimplyConnected();
4643
t->setIsSuccSimplyConnected();
4644
}
4645
break;
4646
}
4647
tn = tn->getSucc(0);
4648
}
4649
}
4650
}
4651
}
4652
4653
4654
void
4655
TR_CISCTransformer::showT2P()
4656
{
4657
if (trace())
4658
{
4659
TR_CISCNode *p, *t;
4660
int32_t dagT, numDagIdsT = _T->getNumDagIds();
4661
List<TR_CISCNode> *dagId2NodesT = _T->getDagId2Nodes();
4662
for (dagT = numDagIdsT; --dagT >= 0;)
4663
{
4664
ListIterator<TR_CISCNode> ti(dagId2NodesT + dagT);
4665
for (t = ti.getFirst(); t; t = ti.getNext())
4666
{
4667
uint32_t tID = t->getID();
4668
traceMsg(comp(), "%3d:",tID);
4669
if (!_T2P[tID].isEmpty())
4670
{
4671
//TR_ASSERT(_T2P[tID].isSingleton(), "it may be error (not sure).");
4672
ListIterator<TR_CISCNode> pi(_T2P + tID);
4673
for (p = pi.getFirst(); p; p = pi.getNext())
4674
{
4675
uint32_t pID = p->getID();
4676
traceMsg(comp(), " %2d",pID);
4677
}
4678
traceMsg(comp(), " %c%c%c%c", t->isSuccSimplyConnected() ? 'S' : 'x',
4679
t->isPredSimplyConnected() ? 'P' : 'x',
4680
t->isParentSimplyConnected() ? 'B' : 'x',
4681
t->isChildSimplyConnected() ? 'C' : 'x'
4682
);
4683
if (t->isNegligible()) traceMsg(comp(), "\t(negligible)");
4684
traceMsg(comp(), "\n");
4685
}
4686
else
4687
{
4688
if (t->isNegligible())
4689
{
4690
traceMsg(comp(), " negligible\n"); // negligible
4691
}
4692
else
4693
{
4694
t->dump(comp()->getOutFile(), comp());
4695
}
4696
}
4697
}
4698
}
4699
}
4700
}
4701
4702
4703
//*****************************************************************************
4704
// Analyze connectivities between pattern nodes and target nodes
4705
//*****************************************************************************
4706
void
4707
TR_CISCTransformer::analyzeConnection()
4708
{
4709
TR_CISCNode *p, *t;
4710
ListIterator<TR_CISCNode> pi(_P->getNodes());
4711
SpecialNodeTransformerPtr specialTransformer = _P->getSpecialNodeTransformer();
4712
int count = 0;
4713
4714
_T->setListsDuplicator();
4715
while(true)
4716
{
4717
for (p = pi.getFirst(); p; p = pi.getNext()) // for each pattern node
4718
{
4719
uint32_t pID = p->getID();
4720
ListIterator<TR_CISCNode> ti(_P2T + pID);
4721
for (t = ti.getFirst(); t; t = ti.getNext()) // for each target node corresponding to p
4722
{
4723
analyzeConnectionOnePair(p, t);
4724
}
4725
}
4726
4727
if (!specialTransformer ||
4728
!specialTransformer(this)) break;
4729
if (++count > 10) break;
4730
}
4731
4732
showT2P();
4733
}
4734
4735
4736
//*****************************************************************************
4737
// Analyze whether each candidate of array header constant is appropriate compared to the idiom.
4738
// Because the array header size is sometimes modified by constant folding
4739
// e.g. When AH is -24 for a[i], AH is modified to -25 for a[i+1]
4740
// If the analysis fails, it'll invalidate that node.
4741
//*****************************************************************************
4742
void
4743
TR_CISCTransformer::analyzeArrayHeaderConst()
4744
{
4745
int32_t i = 0;
4746
while(true) // check all TR_ahconst
4747
{
4748
TR_CISCNode *p = _P->getCISCNode(TR_ahconst, true, i);
4749
if (!p) break; // no more TR_ahconst
4750
4751
TR_ASSERT(p->getOpcode() == TR_ahconst, "error");
4752
int32_t pid = p->getID();
4753
ListIterator<TR_CISCNode> p2ti(_P2T + pid);
4754
TR_CISCNode *t;
4755
int32_t ahsize = -(int32_t)TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
4756
uint8_t *const embeddedForCFG = _embeddedForCFG;
4757
uint8_t *const embeddedForData = _embeddedForData;
4758
const uint32_t tmpIdx = idx(pid, 0);
4759
bool modify = false;
4760
for (t = p2ti.getFirst(); t; t = p2ti.getNext()) // for each target node
4761
{
4762
TR_ASSERT(t->isValidOtherInfo(), "error");
4763
int32_t val = t->getOtherInfo();
4764
bool invalidate = false;
4765
if (val != ahsize)
4766
{
4767
ListIterator<TR_CISCNode> parentTi(t->getParents());
4768
TR_CISCNode *parent;
4769
for (parent = parentTi.getFirst(); parent; parent = parentTi.getNext())
4770
{
4771
// Condition 1:
4772
// sub - parent
4773
// load - loadNode
4774
// variable - variableNode
4775
// ahconst - t
4776
if (!parent->getIlOpCode().isSub())
4777
{
4778
invalidate = true;
4779
break;
4780
}
4781
else
4782
{
4783
invalidate = true;
4784
TR_CISCNode *loadNode;
4785
TR_CISCNode *variableNode;
4786
TR_CISCNode *subNode = 0;
4787
TR_CISCNode *constNode = 0;
4788
TR_CISCNode *storeNode = 0;
4789
TR_CISCNode *i2lNode = 0;
4790
4791
loadNode = parent->getChild(0);
4792
if (loadNode->getOpcode() == TR::i2l)
4793
{
4794
i2lNode = loadNode;
4795
loadNode = loadNode->getChild(0);
4796
}
4797
if (loadNode->getOpcode() == TR_variable)
4798
{
4799
// Fail. Not implemeted yet.
4800
}
4801
else
4802
{
4803
// OK, Condition 1 is satisfied
4804
4805
// Next, check Condition 2
4806
// istore v
4807
// isub
4808
// iload v
4809
// iconst b
4810
variableNode = loadNode->getChild(0);
4811
ListIterator<TR_CISCNode> loadPi(loadNode->getParents());
4812
bool found = false;
4813
for (subNode = loadPi.getFirst(); subNode; subNode = loadPi.getNext())
4814
{
4815
if (parent != subNode && subNode->getIlOpCode().isSub())
4816
{
4817
constNode = subNode->getChild(1);
4818
if (constNode->isValidOtherInfo() &&
4819
constNode->getIlOpCode().isLoadConst() &&
4820
constNode->getOtherInfo() + ahsize == val)
4821
{
4822
ListIterator<TR_CISCNode> subPi(subNode->getParents());
4823
for (storeNode = subPi.getFirst(); storeNode; storeNode = subPi.getNext())
4824
{
4825
if (storeNode->getChild(1) == variableNode)
4826
{
4827
// Condition 2 is satisfied
4828
invalidate = false;
4829
found = true;
4830
break;
4831
////goto find;
4832
}
4833
}
4834
if (found)
4835
break;
4836
}
4837
}
4838
}
4839
}
4840
////find:;
4841
4842
if (invalidate) break;
4843
else
4844
{
4845
TR_CISCNode *newConstNode = _T->getCISCNode(t->getOpcode(), true, ahsize);
4846
if (newConstNode)
4847
{
4848
if (i2lNode)
4849
{
4850
parent->replaceChild(0, i2lNode);
4851
i2lNode->replaceChild(0, variableNode);
4852
i2lNode->setCISCNodeModified();
4853
}
4854
else
4855
{
4856
parent->replaceChild(0, variableNode);
4857
}
4858
parent->replaceChild(1, newConstNode);
4859
parent->setCISCNodeModified();
4860
const int32_t index = tmpIdx + newConstNode->getID();
4861
embeddedForCFG[index] = _Embed;
4862
embeddedForData[index] = _Embed;
4863
modify = true;
4864
}
4865
}
4866
}
4867
}
4868
}
4869
4870
if (invalidate)
4871
{
4872
const int32_t tid = t->getID();
4873
const int32_t index = tmpIdx + tid;
4874
if (trace())
4875
{
4876
traceMsg(comp(), "tid:%d (pid:%d) is invalidated because of failure of analyzeArrayHeaderConst\n",
4877
tid,pid);
4878
}
4879
embeddedForCFG[index] = _NotEmbed;
4880
embeddedForData[index] = _NotEmbed;
4881
}
4882
}
4883
if (modify && trace())
4884
{
4885
_T->dump(comp()->getOutFile(), comp());
4886
}
4887
i++;
4888
}
4889
}
4890
4891
4892
void
4893
TR_CISCTransformer::showCISCNodeRegion(TR_CISCNodeRegion *r, TR::Compilation * comp)
4894
{
4895
ListIterator<TR_CISCNode> ni;
4896
TR_CISCNode *n;
4897
4898
if (r->isIncludeEssentialNode()) traceMsg(comp, "(E) ");
4899
ni.set((ListHeadAndTail<TR_CISCNode>*)r);
4900
for (n = ni.getFirst(); n; n = ni.getNext())
4901
{
4902
traceMsg(comp, "%d->",n->getID());
4903
}
4904
traceMsg(comp, "\n");
4905
}
4906
4907
4908
void
4909
TR_CISCTransformer::showCISCNodeRegions(List<TR_CISCNodeRegion> *regions, TR::Compilation * comp)
4910
{
4911
ListIterator<TR_CISCNodeRegion> ri(regions);
4912
TR_CISCNodeRegion *r;
4913
4914
for (r = ri.getFirst(); r; r = ri.getNext())
4915
{
4916
showCISCNodeRegion(r, comp);
4917
}
4918
}
4919
4920
4921
4922
//*****************************************************************************
4923
// It removes first several nodes from the region r to correct the alignment
4924
//*****************************************************************************
4925
bool
4926
TR_CISCTransformer::alignTopOfRegion(TR_CISCNodeRegion *r)
4927
{
4928
ListElement<TR_CISCNode> *le;
4929
ListElement<TR_CISCNode> *firstNegligible = 0;
4930
TR_CISCNode *pTop = _P->getEntryNode()->getSucc(0);
4931
TR_CISCNode *t;
4932
4933
// determine the top node pTop of the idiom (skip optional nodes)
4934
while (true)
4935
{
4936
t = getP2TRep(pTop);
4937
if (t == 0)
4938
{
4939
if (!pTop->isOptionalNode())
4940
{
4941
if (trace()) traceMsg(comp(), "alignTopOfRegion failed. There is no target node corresponding to %d. Check for nodes in broken region listings above and x in SPBC listing.\n",pTop->getID());
4942
return false;
4943
}
4944
}
4945
else
4946
{
4947
if (!pTop->isOptionalNode() || r->isIncluded(t)) break;
4948
ListIterator<TR_CISCNode> ci(_P2T + pTop->getID());
4949
for (t = ci.getFirst(); t; t = ci.getNext())
4950
{
4951
if (r->isIncluded(t)) break;
4952
}
4953
if (t) break;
4954
}
4955
pTop = pTop->getSucc(0);
4956
}
4957
4958
if (trace()) traceMsg(comp(), "alignTopOfRegion: (pTop, t) is (%d, %d)\n", pTop->getID(), t->getID());
4959
4960
// remove nodes (from start to pTop) from the region r
4961
for (le = r->getListHead(); le; le = le->getNextElement())
4962
{
4963
t = le->getData();
4964
ListIterator<TR_CISCNode> ci(_T2P + t->getID());
4965
TR_CISCNode *p;
4966
bool pHasCFG = false;
4967
for (p = ci.getFirst(); p; p = ci.getNext())
4968
{
4969
if (p == pTop)
4970
{
4971
r->setListHead(firstNegligible ? firstNegligible : le);
4972
return true;
4973
}
4974
if (p->getNumSuccs() > 0 || !p->getPreds()->isEmpty())
4975
pHasCFG = true;
4976
}
4977
if (t->isNegligible() || !pHasCFG)
4978
{
4979
if (!firstNegligible && t->getOpcode() != TR::BBEnd) firstNegligible = le;
4980
}
4981
else
4982
{
4983
firstNegligible = 0;
4984
}
4985
}
4986
if (trace()) traceMsg(comp(), "alignTopOfRegion failed. Cannot find pTop:%d in the region.\n",pTop->getID());
4987
return false;
4988
}
4989
4990
4991
//*****************************************************************************
4992
// Check whether all nodes in the idiom _P are included in the region r.
4993
// It uses a bit vector to analyze above.
4994
//*****************************************************************************
4995
bool
4996
TR_CISCTransformer::areAllNodesIncluded(TR_CISCNodeRegion *r)
4997
{
4998
ListIterator<TR_CISCNode> ni;
4999
TR_CISCNode *t;
5000
TR_BitVector bv(_P->getNumNodes(), trMemory(), stackAlloc);
5001
ni.set(_P->getNodes());
5002
// Set the IDs of required nodes in the idiom _P to the bit vector bv.
5003
for (t = ni.getFirst(); t; t = ni.getNext())
5004
{
5005
if ((t->getNumSuccs() > 0 || !t->getPreds()->isEmpty()) && !t->isOptionalNode())
5006
{
5007
switch(t->getOpcode())
5008
{
5009
case TR_entrynode:
5010
case TR_exitnode:
5011
break;
5012
default:
5013
bv.set(t->getID());
5014
break;
5015
}
5016
}
5017
}
5018
5019
// Reset the ID of the idiom nodes corresponding to each target node in the region r.
5020
ni.set((ListHeadAndTail<TR_CISCNode>*)r);
5021
for (t = ni.getFirst(); t; t = ni.getNext())
5022
{
5023
ListIterator<TR_CISCNode> ci(_T2P + t->getID());
5024
TR_CISCNode *p;
5025
for (p = ci.getFirst(); p; p = ci.getNext())
5026
bv.reset(p->getID());
5027
}
5028
5029
// If the bit vector bv is empty, alll nodes are included in the region r.
5030
if (trace())
5031
{
5032
if (!bv.isEmpty())
5033
{
5034
traceMsg(comp(), "Cannot find pNodes: ");
5035
bv.print(comp(), comp()->getOutFile());
5036
traceMsg(comp(), "\n");
5037
}
5038
}
5039
return bv.isEmpty();
5040
}
5041
5042
5043
//*****************************************************************************
5044
// If moveTo is 0, the region ("from" through "to") in the list l will be moved to the last.
5045
// Otherwise, the region will be moved to before moveTo.
5046
//*****************************************************************************
5047
void
5048
TR_CISCTransformer::moveCISCNodesInList(List<TR_CISCNode> *l, TR_CISCNode *from, TR_CISCNode *to, TR_CISCNode *moveTo)
5049
{
5050
#if 0
5051
if (showMesssagesStdout())
5052
{
5053
printf("moveCISCNodesInList: %s\n",_T->getTitle());
5054
}
5055
#endif
5056
if (trace())
5057
{
5058
traceMsg(comp(), "moveCISCNodesInList: r_from:%p(%d) r_to:%p(%d) moveTo:%p(%d)\n",from,from->getID(),to,to->getID(),moveTo,moveTo->getID());
5059
}
5060
5061
ListElement<TR_CISCNode> *before = 0, *beforeFrom = 0, *beforeMoveTo = 0, *fromLe = 0, *toLe = 0, *moveToLe = 0, *le;
5062
for (le = l->getListHead(); le; le = le->getNextElement())
5063
{
5064
TR_CISCNode *n = le->getData();
5065
if (n == from)
5066
{
5067
beforeFrom = before;
5068
fromLe = le;
5069
}
5070
if (n == to)
5071
{
5072
TR_ASSERT(fromLe != 0, "error! fromLe must be found first.");
5073
toLe = le;
5074
}
5075
if (n == moveTo)
5076
{
5077
beforeMoveTo = before;
5078
moveToLe = le;
5079
}
5080
before = le;
5081
}
5082
if (moveTo == 0)
5083
{
5084
beforeMoveTo = before;
5085
}
5086
else
5087
{
5088
TR_ASSERT(moveToLe != 0, "error");
5089
if (moveToLe == 0) return; // the case if the assertion failed
5090
}
5091
TR_ASSERT(fromLe != 0 && toLe != 0, "error");
5092
if (fromLe == 0 || toLe == 0) return; // the case if the assertion failed
5093
if (toLe == beforeMoveTo) return; // Already moved
5094
5095
if (!beforeFrom)
5096
l->setListHead(toLe->getNextElement());
5097
else
5098
beforeFrom->setNextElement(toLe->getNextElement());
5099
5100
toLe->setNextElement(moveToLe);
5101
5102
if (!beforeMoveTo)
5103
l->setListHead(fromLe);
5104
else
5105
beforeMoveTo->setNextElement(fromLe);
5106
}
5107
5108
5109
//*****************************************************************************
5110
// If moveTo is 0, the region ("from" through "to") will be moved to the last
5111
// of the dagId2Nodes[from->getDagID()].
5112
// Otherwise, the region will be moved to before moveTo.
5113
// It assumes that all three nodes "from", "to", and "moveTo" (if non-null)
5114
// have the same dagId.
5115
// It maintains the following lists:
5116
// * _T->_dagId2Nodes[from->getDagID()]
5117
// * _T->_nodes
5118
// * _T->_orderByData
5119
//*****************************************************************************
5120
void
5121
TR_CISCTransformer::moveCISCNodes(TR_CISCNode *from, TR_CISCNode *to, TR_CISCNode *moveTo, char *debugStr)
5122
{
5123
if (showMesssagesStdout())
5124
{
5125
printf("moveCISCNodes: %s %s\n",_T->getTitle(), debugStr ? debugStr : "");
5126
}
5127
5128
int32_t dagId = from->getDagID();
5129
TR_ASSERT(dagId == to->getDagID(), "from->getDagID() and to->getDagID() must be same!");
5130
TR_ASSERT(!moveTo || dagId == moveTo->getDagID(), "from->getDagID() and moveTo->getDagID() must be same!");
5131
List<TR_CISCNode> *dagList = _T->getDagId2Nodes()+dagId;
5132
5133
TR_CISCNode *prevOrg, *nextOrg;
5134
TR_CISCNode *prevDst, *nextDst, *succDst;
5135
5136
TR_ASSERT(from->getPreds()->isSingleton(), "assumption error!");
5137
prevOrg = from->getHeadOfPredecessors();
5138
nextOrg = to->getSucc(0);
5139
5140
ListElement<TR_CISCNode> *beforeLastDagIdElement = 0;
5141
ListElement<TR_CISCNode> *lastDagIdElement = dagList->getListHead();
5142
if (!moveTo)
5143
{
5144
while(lastDagIdElement->getNextElement())
5145
{
5146
beforeLastDagIdElement = lastDagIdElement;
5147
lastDagIdElement = lastDagIdElement->getNextElement();
5148
}
5149
prevDst = lastDagIdElement->getData();
5150
if (prevDst->getOpcode() == TR::BBEnd)
5151
{
5152
moveTo = nextDst = prevDst;
5153
TR_ASSERT(beforeLastDagIdElement, "error!");
5154
prevDst = beforeLastDagIdElement->getData();
5155
}
5156
else
5157
{
5158
nextDst = prevDst->getSucc(0);
5159
}
5160
}
5161
else
5162
{
5163
while(lastDagIdElement)
5164
{
5165
if (lastDagIdElement->getData() == moveTo) break;
5166
beforeLastDagIdElement = lastDagIdElement;
5167
lastDagIdElement = lastDagIdElement->getNextElement();
5168
}
5169
nextDst = moveTo;
5170
TR_ASSERT(beforeLastDagIdElement, "error!");
5171
prevDst = beforeLastDagIdElement->getData();
5172
}
5173
succDst = prevDst->getSucc(0);
5174
5175
// Modify the successor of each node
5176
prevOrg->replaceSucc(0, nextOrg);
5177
prevDst->replaceSucc(0, from);
5178
to->replaceSucc(0, succDst);
5179
5180
// Modify three lists
5181
if (to->getNumChildren() != 0 || !to->getParents()->isEmpty()) // if "to" has a child or a parent.
5182
{
5183
TR_CISCNode *fromData = from, *nextDstData = nextDst;
5184
while(fromData->getNumChildren() == 0 && fromData->getParents()->isEmpty()) fromData = fromData->getSucc(0);
5185
while(nextDstData->getNumChildren() == 0 && nextDstData->getParents()->isEmpty() && nextDstData->getOpcode() != TR_exitnode) nextDstData = nextDstData->getSucc(0);
5186
moveCISCNodesInList(_T->getOrderByData(), fromData, to, nextDstData);
5187
}
5188
5189
moveCISCNodesInList(dagList, from, to, moveTo);
5190
moveCISCNodesInList(_T->getNodes(), to, from, prevDst); // Note: _nodes is the reverse post order.
5191
}
5192
5193
5194
//*****************************************************************************
5195
// Based on four relationships (parents, children, predecessors, successors),
5196
// we extract the region that matches the idiom graph.
5197
// Return the region in which all nodes in the idiom graph are included
5198
//*****************************************************************************
5199
TR_CISCNodeRegion *
5200
TR_CISCTransformer::extractMatchingRegion()
5201
{
5202
TR_CISCNodeRegion *lists = new (trHeapMemory()) TR_CISCNodeRegion(_numTNodes, comp()->trMemory()->heapMemoryRegion());
5203
TR_ScratchList<TR_CISCNodeRegion> regions(comp()->trMemory());
5204
TR_CISCNode *t;
5205
ListElement<TR_CISCNode> *firstNegligible = 0;
5206
int32_t dagT, numDagIdsT = _T->getEntryNode()->getDagID() + 1;
5207
List<TR_CISCNode> *dagId2NodesT = _T->getDagId2Nodes();
5208
bool empty = true;
5209
5210
bool isSingleLoopBody = _bblistBody.isSingleton();
5211
5212
// Collect regions in which data dependence of nodes are equivalent to the idiom and
5213
// there is no additional node.
5214
for (dagT = numDagIdsT; --dagT >= 0;) // From entry to exit
5215
{
5216
List<TR_CISCNode> *TList = dagId2NodesT + dagT;
5217
ListElement<TR_CISCNode> *element;
5218
for (element = TList->getListHead(); element; element = element->getNextElement())
5219
{
5220
t = element->getData();
5221
uint32_t tID = t->getID();
5222
bool isEmbed = false;
5223
if (!_T2P[tID].isEmpty() && (t->isDataConnected() || t->isNegligible()))
5224
{
5225
// TODO: We may need more checks !!!
5226
isEmbed = true;
5227
if (t->getIlOpCode().isIf() && !t->isOutsideOfLoop())
5228
if (!t->isPredSimplyConnected() && !isSingleLoopBody)
5229
{
5230
isEmbed = false;
5231
if (showMesssagesStdout())
5232
{
5233
printf("!!!!!!!!!!!!!! Predecessor of tID %" OMR_PRIu32 " is different from that of idiom.\n", tID);
5234
}
5235
if (trace())
5236
{
5237
traceMsg(comp(), "Predecessor of tID %" OMR_PRIu32 " is different from that of idiom.\n", tID);
5238
}
5239
}
5240
}
5241
if (isEmbed)
5242
{
5243
// The node t is an appropriate node!
5244
if (empty && firstNegligible)
5245
{
5246
// Add all of the negligible nodes before the node corresponding to a pattern node.
5247
ListElement<TR_CISCNode> *tmp_ele;
5248
TR_CISCNode *neg;
5249
for (tmp_ele = firstNegligible; true; tmp_ele = tmp_ele->getNextElement())
5250
{
5251
neg = tmp_ele->getData();
5252
if (!neg->isNegligible() || !_T2P[neg->getID()].isEmpty()) break;
5253
lists->append(neg);
5254
}
5255
TR_ASSERT(neg == t, "error!");
5256
}
5257
empty = false;
5258
lists->append(t);
5259
}
5260
else
5261
{
5262
// The node t is an inappropriate node!
5263
if (!empty)
5264
{
5265
if (!t->isNegligible() || !_T2P[t->getID()].isEmpty())
5266
{
5267
// add "lists" to "regions" and clear it
5268
empty = true;
5269
firstNegligible = 0;
5270
regions.add(lists);
5271
lists = new (trHeapMemory()) TR_CISCNodeRegion(_numTNodes, comp()->trMemory()->heapMemoryRegion());
5272
}
5273
else
5274
{
5275
// It can be added
5276
lists->append(t);
5277
}
5278
}
5279
else
5280
{
5281
if (t->isNegligible() && _T2P[t->getID()].isEmpty())
5282
{
5283
// Register the first negligible node after the inappropriate node.
5284
if (!firstNegligible) firstNegligible = element;
5285
}
5286
else
5287
{
5288
// Clear the first negligible node
5289
firstNegligible = 0;
5290
}
5291
}
5292
}
5293
}
5294
firstNegligible = 0;
5295
}
5296
if (!empty)
5297
{
5298
regions.add(lists);
5299
}
5300
if (trace())
5301
{
5302
traceMsg(comp(), "Before alignTopOfRegion\n");
5303
showCISCNodeRegions(&regions, comp());
5304
}
5305
5306
ListIterator<TR_CISCNodeRegion> ri(&regions);
5307
ListIterator<TR_CISCNode> ni;
5308
TR_CISCNodeRegion *r, *ret = 0;
5309
const bool showingCandidates = isShowingCandidates();
5310
for (r = ri.getFirst(); r; r = ri.getNext())
5311
{
5312
if (r->isIncludeEssentialNode())
5313
{
5314
if (showingCandidates) _candidatesForRegister.add(r->clone());
5315
if (alignTopOfRegion(r)) // Remove nodes from the region r to correct the alignment
5316
{
5317
if (areAllNodesIncluded(r)) // If all idiom nodes are included in r
5318
{
5319
ret = r;
5320
break;
5321
}
5322
}
5323
}
5324
}
5325
if (trace())
5326
{
5327
traceMsg(comp(), "After alignTopOfRegion\n");
5328
showCISCNodeRegions(&regions, comp());
5329
traceMsg(comp(), "extractMatchingRegion ret=0x%x\n",ret);
5330
}
5331
5332
return ret;
5333
}
5334
5335
5336
//*****************************************************************************
5337
// It analyzes whether all blocks of the loop body are included in the _candidateRegion
5338
// It also creates _candidateBBStartEnd, which has all of TR::BBStart and TR::BBEnd nodes in the region.
5339
//*****************************************************************************
5340
bool
5341
TR_CISCTransformer::verifyCandidate()
5342
{
5343
ListIterator<TR_CISCNode> ci(_candidateRegion);
5344
TR_CISCNode *cn;
5345
ListHeadAndTail<TR_CISCNode> *listBB = new (trHeapMemory()) ListHeadAndTail<TR_CISCNode>(trMemory());
5346
ListElement <TR_CISCNode> *le;
5347
5348
// Create the list of TR::BBStart and TR::BBEnd nodes in the region.
5349
for (cn = ci.getFirst(); cn; cn = ci.getNext())
5350
{
5351
switch(cn->getOpcode())
5352
{
5353
case TR::BBStart:
5354
case TR::BBEnd:
5355
listBB->append(cn);
5356
break;
5357
}
5358
}
5359
5360
le = listBB->getListHead();
5361
ListIterator<TR::Block> bi(&_bblistBody);
5362
TR::Block *b;
5363
for (b = bi.getFirst(); b; b = bi.getNext())
5364
{
5365
while (true)
5366
{
5367
if (!le)
5368
{
5369
if (trace()) traceMsg(comp(), "Cannot find TR::BBStart of block_%d in the region\n",b->getNumber());
5370
return false; // Cannot find b in listBB
5371
}
5372
cn = le->getData();
5373
if (cn->getOpcode() == TR::BBStart && cn->getHeadOfTrNodeInfo()->_node->getBlock() == b)
5374
{
5375
le = le->getNextElement();
5376
if (!le) return false; // Cannot find TR::BBEnd
5377
cn = le->getData();
5378
if (cn->getOpcode() != TR::BBEnd || cn->getHeadOfTrNodeInfo()->_node->getBlock() != b)
5379
return false; // Cannot find TR::BBEnd
5380
le = le->getNextElement();
5381
break;
5382
}
5383
le = le->getNextElement();
5384
}
5385
}
5386
5387
_candidateBBStartEnd = listBB;
5388
return true;
5389
}
5390
5391
5392
5393
//*****************************************************************************************
5394
// Return the number of target nodes corresponding to p
5395
// Count only within a loop if inLoop is true (default is false).
5396
//*****************************************************************************************
5397
int
5398
TR_CISCTransformer::countP2T(TR_CISCNode *p, bool inLoop)
5399
{
5400
uint32_t pID = p->getID();
5401
List<TR_CISCNode> *list = _P2T + pID;
5402
if (list->isEmpty())
5403
{
5404
return 0;
5405
}
5406
else
5407
{
5408
ListIterator<TR_CISCNode> ni(list);
5409
TR_CISCNode *t;
5410
int count = 0;
5411
if (inLoop)
5412
{
5413
for (t = ni.getFirst(); t; t = ni.getNext()) if (!t->isOutsideOfLoop()) count++;
5414
}
5415
else
5416
{
5417
for (t = ni.getFirst(); t; t = ni.getNext()) count++;
5418
}
5419
return count;
5420
}
5421
}
5422
5423
5424
5425
//*****************************************************************************************
5426
// Return a representative target node corresponding to p
5427
// 0 for no-existence
5428
//*****************************************************************************************
5429
TR_CISCNode *
5430
TR_CISCTransformer::getP2TRep(TR_CISCNode *p)
5431
{
5432
uint32_t pID = p->getID();
5433
List<TR_CISCNode> *list = _P2T + pID;
5434
if (list->isEmpty())
5435
{
5436
return 0;
5437
}
5438
else
5439
{
5440
return list->getListHead()->getData();
5441
}
5442
}
5443
5444
5445
5446
//*****************************************************************************************
5447
// Return a representative target node *in the cycle* corresponding to p
5448
// 0 for no-existence
5449
//*****************************************************************************************
5450
TR_CISCNode *
5451
TR_CISCTransformer::getP2TRepInLoop(TR_CISCNode *p, TR_CISCNode *exclude)
5452
{
5453
uint32_t pID = p->getID();
5454
List<TR_CISCNode> *list = _P2T + pID;
5455
if (list->isEmpty())
5456
{
5457
return 0;
5458
}
5459
else
5460
{
5461
ListIterator<TR_CISCNode> ni(list);
5462
TR_CISCNode *t;
5463
for (t = ni.getFirst(); t; t = ni.getNext())
5464
{
5465
if (!t->isOutsideOfLoop() && t != exclude) return t;
5466
}
5467
return 0;
5468
}
5469
}
5470
5471
5472
5473
//*****************************************************************************************
5474
// Return a target node *in the cycle* corresponding to p if the target node is only one.
5475
// 0 for others
5476
//*****************************************************************************************
5477
TR_CISCNode *
5478
TR_CISCTransformer::getP2TInLoopIfSingle(TR_CISCNode *p)
5479
{
5480
uint32_t pID = p->getID();
5481
List<TR_CISCNode> *list = _P2T + pID;
5482
if (list->isEmpty())
5483
{
5484
return 0;
5485
}
5486
else
5487
{
5488
ListIterator<TR_CISCNode> ni(list);
5489
TR_CISCNode *t;
5490
TR_CISCNode *ret = 0;
5491
for (t = ni.getFirst(); t; t = ni.getNext())
5492
{
5493
if (!t->isOutsideOfLoop())
5494
{
5495
if (ret) return 0;
5496
ret = t;
5497
}
5498
}
5499
return ret;
5500
}
5501
}
5502
5503
5504
5505
//*****************************************************************************************
5506
// It is similar to getP2TInLoopIfSingle.
5507
// If the given pattern node is "optional", we can skip it.
5508
//*****************************************************************************************
5509
TR_CISCNode *
5510
TR_CISCTransformer::getP2TInLoopAllowOptionalIfSingle(TR_CISCNode *p)
5511
{
5512
TR_CISCNode *t;
5513
while(true)
5514
{
5515
t = getP2TInLoopIfSingle(p);
5516
if (t) return t;
5517
if (!p->isOptionalNode()) return 0;
5518
p = p->getChild(0);
5519
if (!p) return 0;
5520
}
5521
}
5522
5523
5524
//*****************************************************************************************
5525
// Return TR::TreeTop, TR::Node, and TR::Block corresponding to the first node of the region _candidateRegion
5526
//*****************************************************************************************
5527
bool
5528
TR_CISCTransformer::findFirstNode(TR::TreeTop **retTree, TR::Node **retNode, TR::Block **retBlock)
5529
{
5530
ListIterator<TR_CISCNode> ci(_candidateRegion);
5531
TR_CISCNode *cn = NULL;
5532
TR::Node *trNode = NULL;
5533
TR::TreeTop *trTreeTop = NULL;
5534
TR::Block *block = NULL;
5535
5536
for (cn = ci.getFirst(); cn; cn = ci.getNext())
5537
{
5538
if (cn->getOpcode() == TR_entrynode) continue;
5539
if (cn->isNewCISCNode()) continue;
5540
if (trace() && !cn->getTrNodeInfo()->isSingleton())
5541
traceMsg(comp(), "!cn->getTrNodeInfo()->isSingleton(): %d\n",cn->getID());
5542
TR_ASSERT(cn->getTrNodeInfo()->isSingleton(), "it must correspond to a single TR node");
5543
struct TrNodeInfo *info = cn->getHeadOfTrNodeInfo();
5544
trNode = info->_node;
5545
if (trNode->getOpCodeValue() == TR::BBEnd) continue;
5546
if(cn->getOpcode() == TR::BBStart)
5547
{
5548
block = trNode->getBlock();
5549
5550
trTreeTop = info->_treeTop;
5551
trTreeTop = trTreeTop->getNextTreeTop();
5552
trNode = trTreeTop->getNode();
5553
if (trNode->getOpCodeValue() != TR::BBEnd)
5554
break;
5555
}
5556
else
5557
{
5558
trTreeTop = info->_treeTop;
5559
if (trTreeTop->getNode() == trNode)
5560
{
5561
if (!block)
5562
{
5563
cn = _candidateBBStartEnd->getHeadData();
5564
if(cn->getOpcode() == TR::BBEnd)
5565
{
5566
block = cn->getHeadOfTrNodeInfo()->_node->getBlock();
5567
}
5568
}
5569
break;
5570
}
5571
}
5572
}
5573
5574
TR_ASSERT(trNode->getOpCodeValue() != TR::BBEnd, "Assumption failed!");
5575
*retTree = trTreeTop;
5576
*retNode = trNode;
5577
*retBlock = block;
5578
if (trace()) traceMsg(comp(), "First node in candidate region - node: %p block_%d: %p\n",trNode, block->getNumber(), block);
5579
return true;
5580
}
5581
5582
5583
//*****************************************************************************************
5584
// It adds the edge from srcBlock to the destBlock, only if succList does not have it.
5585
//*****************************************************************************************
5586
void
5587
TR_CISCTransformer::addEdge(TR::CFGEdgeList *succList, TR::Block *srcBlock, TR::Block *destBlock)
5588
{
5589
for (auto edge = succList->begin(); edge != succList->end(); ++edge)
5590
{
5591
TR::Block * dest = toBlock((*edge)->getTo());
5592
TR::Block * src = toBlock((*edge)->getFrom());
5593
if (src == srcBlock && dest == destBlock)
5594
{
5595
return; // already exists!
5596
}
5597
}
5598
_cfg->addEdge(TR::CFGEdge::createEdge(srcBlock, destBlock, trMemory()));
5599
return;
5600
}
5601
5602
5603
//*****************************************************************************************
5604
// It removes the edge (from srcBlock to the destBlock) from the CFG.
5605
//*****************************************************************************************
5606
void
5607
TR_CISCTransformer::removeEdge(List<TR::CFGEdge> *succList, TR::Block *srcBlock, TR::Block *destBlock)
5608
{
5609
ListIterator<TR::CFGEdge> succIt(succList);
5610
TR::CFGEdge * edge;
5611
5612
for (edge = succIt.getCurrent(); edge != 0; edge = succIt.getNext())
5613
{
5614
TR::Block * dest = toBlock(edge->getTo());
5615
TR::Block * src = toBlock(edge->getFrom());
5616
if (src == srcBlock && dest == destBlock)
5617
{
5618
_cfg->removeEdge(edge);
5619
}
5620
}
5621
return;
5622
}
5623
5624
5625
//*****************************************************************************************
5626
// It removes the edges (from srcBlock to all blocks except for exceptDestBlock) from the CFG.
5627
//*****************************************************************************************
5628
void
5629
TR_CISCTransformer::removeEdgesExceptFor(TR::CFGEdgeList *succList, TR::Block *srcBlock, TR::Block *exceptDestBlock)
5630
{
5631
for (auto edge = succList->begin(); edge != succList->end();)
5632
{
5633
TR::Block * dest = toBlock((*edge)->getTo());
5634
TR::Block * src = toBlock((*edge)->getFrom());
5635
if (src == srcBlock && dest != exceptDestBlock)
5636
{
5637
_cfg->removeEdge(*(edge++));
5638
}
5639
else
5640
++edge;
5641
}
5642
return;
5643
}
5644
5645
5646
//*****************************************************************************************
5647
// Set the edge from srcBlock to destBlock into the CFG.
5648
//*****************************************************************************************
5649
void
5650
TR_CISCTransformer::setEdge(TR::CFGEdgeList *succList, TR::Block *srcBlock, TR::Block *destBlock)
5651
{
5652
addEdge(succList, srcBlock, destBlock);
5653
removeEdgesExceptFor(succList, srcBlock, destBlock);
5654
}
5655
5656
5657
//*****************************************************************************************
5658
// Set two edges from srcBlock to destBlock0 and destBlock1 into the CFG.
5659
//*****************************************************************************************
5660
void
5661
TR_CISCTransformer::setEdges(TR::CFGEdgeList *succList, TR::Block *srcBlock, TR::Block *destBlock0, TR::Block *destBlock1)
5662
{
5663
bool existEdge0, existEdge1;
5664
existEdge0 = existEdge1 = false;
5665
int32_t count0, count1;
5666
5667
for (auto edge = succList->begin(); edge != succList->end(); ++edge)
5668
{
5669
TR::Block * dest = toBlock((*edge)->getTo());
5670
TR::Block * src = toBlock((*edge)->getFrom());
5671
if (src == srcBlock)
5672
{
5673
if (dest == destBlock0) existEdge0 = true;
5674
else if (dest == destBlock1) existEdge1 = true;
5675
}
5676
}
5677
5678
if (!existEdge1) addEdge(succList, srcBlock, destBlock1);
5679
if (!existEdge0) addEdge(succList, srcBlock, destBlock0);
5680
5681
count0 = count1 = 0;
5682
for (auto edge = succList->begin(); edge != succList->end();)
5683
{
5684
TR::Block * dest = toBlock((*edge)->getTo());
5685
TR::Block * src = toBlock((*edge)->getFrom());
5686
if (src == srcBlock)
5687
{
5688
if (dest == destBlock0)
5689
{
5690
if (++count0 >= 2) _cfg->removeEdge(*(edge++));
5691
else ++edge;
5692
}
5693
else if (dest == destBlock1)
5694
{
5695
if (++count1 >= 2) _cfg->removeEdge(*(edge++));
5696
else ++edge;
5697
}
5698
else
5699
{
5700
_cfg->removeEdge(*(edge++));
5701
}
5702
}
5703
else
5704
++edge;
5705
}
5706
}
5707
5708
5709
//*****************************************************************************
5710
// It analyzes whether the successor of the target loop is single.
5711
// Even if there are multiple successors for the loop, it tries to analyze
5712
// whether they can be merged to a single successor.
5713
// It returns:
5714
// * non-null: the single successor block
5715
// * null: multiple successors
5716
//*****************************************************************************
5717
TR::Block *
5718
TR_CISCTransformer::analyzeSuccessorBlock(TR::Node *ignoreTree)
5719
{
5720
TR::Block *target = 0;
5721
if (_bblistSucc.isSingleton())
5722
{
5723
target = _bblistSucc.getListHead()->getData(); // obvious
5724
}
5725
else
5726
{
5727
ListIterator<TR::Block> bbi1(&_bblistSucc), bbi2(&_bblistSucc);
5728
TR::Block *b1, *b2;
5729
5730
// Analyze successors will be merged to a single block without any additional instruction
5731
for (b1 = bbi1.getFirst(); b1; b1 = bbi1.getNext())
5732
{
5733
target = 0;
5734
for (b2 = bbi2.getFirst(); b2; b2 = bbi2.getNext())
5735
{
5736
if (b1 != b2)
5737
{
5738
TR::Node *gotonode = b2->getFirstRealTreeTop()->getNode();
5739
if (gotonode->getOpCodeValue() == TR::Goto &&
5740
gotonode->getBranchDestination()->getNode()->getBlock() == b1)
5741
{
5742
target = b1;
5743
}
5744
else if (gotonode->getOpCodeValue() == TR::BBEnd &&
5745
b2->getNextBlock() == b1)
5746
{
5747
target = b2;
5748
}
5749
else
5750
{
5751
target = 0;
5752
break;
5753
}
5754
}
5755
}
5756
if (target) break;
5757
}
5758
if (!target)
5759
{
5760
for (b1 = bbi1.getFirst(); b1; b1 = bbi1.getNext())
5761
{
5762
TR::Block *gotoTarget = skipGoto(b1, ignoreTree);
5763
if (!target)
5764
{
5765
target = gotoTarget;
5766
}
5767
else
5768
{
5769
if (target != gotoTarget)
5770
{
5771
target = 0;
5772
break;
5773
}
5774
}
5775
}
5776
}
5777
5778
// Especially for two successors, I'll analyze more deeply.
5779
/*
5780
if (!target && _bblistSucc.isDoubleton())
5781
{
5782
b1 = bbi1.getFirst();
5783
b2 = bbi1.getNext();
5784
TR::Block *cur1 = b1, *cur2 = b2;
5785
while(true)
5786
{
5787
cur1 = skipGoto(cur1, ignoreTree);
5788
cur2 = skipGoto(cur2, ignoreTree);
5789
if (cur1 == cur2)
5790
{
5791
target = b1;
5792
break;
5793
}
5794
if (!compareBlockTrNodeTree(cur1, cur2)) break;
5795
if (!(cur1 = getSuccBlockIfSingle(cur1))) break;
5796
if (!(cur2 = getSuccBlockIfSingle(cur2))) break;
5797
}
5798
}
5799
*/
5800
}
5801
5802
if (trace())
5803
{
5804
if (target == 0)
5805
traceMsg(comp(), "!! TR_CISCTransformer::analyzeSuccessorBlock returns 0!\n");
5806
}
5807
5808
return target;
5809
}
5810
5811
5812
//*****************************************************************************************
5813
// It sets one successor "target" to the block.
5814
//*****************************************************************************************
5815
void
5816
TR_CISCTransformer::setSuccessorEdge(TR::Block *block, TR::Block *target)
5817
{
5818
if (target == 0)
5819
target = analyzeSuccessorBlock();
5820
5821
TR_ASSERT(target != 0, "target must be non-null!!");
5822
5823
TR::Node *gotonode = block->getLastRealTreeTop()->getNode();
5824
if (gotonode->getOpCodeValue() != TR::Goto)
5825
{
5826
TR::TreeTop * branchAroundTreeTop = TR::TreeTop::create(comp(), TR::Node::create(gotonode, TR::Goto, 0, target->getEntry()));
5827
block->getLastRealTreeTop()->join(branchAroundTreeTop);
5828
branchAroundTreeTop->join(block->getExit());
5829
}
5830
setEdge(&block->getSuccessors(), block, target);
5831
}
5832
5833
5834
//*****************************************************************************************
5835
// Search for the TR::Block in _bblistSucc except for target0 and target1
5836
//*****************************************************************************************
5837
TR::Block *
5838
TR_CISCTransformer::searchOtherBlockInSuccBlocks(TR::Block *target0, TR::Block *target1)
5839
{
5840
ListIterator<TR::Block> bbi1(&_bblistSucc);
5841
TR::Block *b;
5842
TR::Block *ret = 0;
5843
for (b = bbi1.getFirst(); b; b = bbi1.getNext())
5844
{
5845
if (b == target0 || b == target1)
5846
continue;
5847
if (ret)
5848
return 0; // Failure - Found two non-target0/target1 blocks.
5849
ret = b;
5850
}
5851
return ret;
5852
}
5853
5854
5855
//*****************************************************************************************
5856
// Search for the TR::Block in _bblistSucc except for target0
5857
//*****************************************************************************************
5858
TR::Block *
5859
TR_CISCTransformer::searchOtherBlockInSuccBlocks(TR::Block *target0)
5860
{
5861
if (_bblistSucc.isDoubleton())
5862
{
5863
ListIterator<TR::Block> bbi1(&_bblistSucc);
5864
TR::Block *first = bbi1.getFirst();
5865
TR::Block *second = bbi1.getNext();
5866
if (first == target0)
5867
return second;
5868
else if (second == target0)
5869
return first;
5870
}
5871
return 0;
5872
}
5873
5874
5875
//*****************************************************************************************
5876
// It sets two successors "target0" and "target1" to the block.
5877
//*****************************************************************************************
5878
TR::Block *
5879
TR_CISCTransformer::setSuccessorEdges(TR::Block *block, TR::Block *target0, TR::Block *target1)
5880
{
5881
TR::TreeTop * oldNext = block->getExit()->getNextTreeTop();
5882
if (target0 == 0 || target1 == 0) // automatic detection
5883
{
5884
if (target0 == 0)
5885
target0 = searchOtherBlockInSuccBlocks(target1);
5886
else
5887
target1 = searchOtherBlockInSuccBlocks(target0);
5888
TR_ASSERT(target0 && target1, "error");
5889
}
5890
if (trace())
5891
{
5892
traceMsg(comp(), "setSuccessorEdges for block_%d [%p]: tgt0=%d tgt1=%d\n", block->getNumber(), block, target0->getNumber(),target1->getNumber());
5893
}
5894
5895
if (!oldNext ||
5896
oldNext->getNode()->getBlock() != target0)
5897
{
5898
TR::Node *lastnode = block->getLastRealTreeTop()->getNode();
5899
TR::Block * gotoBlock = TR::Block::createEmptyBlock(lastnode, comp(), block->getFrequency(), block);
5900
_cfg->addNode(gotoBlock);
5901
TR::TreeTop * gotoEntry = gotoBlock->getEntry();
5902
TR::TreeTop * gotoExit = gotoBlock->getExit();
5903
TR::TreeTop * branchTreeTop = TR::TreeTop::create(comp(), TR::Node::create(lastnode, TR::Goto, 0, target0->getEntry()));
5904
gotoEntry->insertAfter(branchTreeTop);
5905
5906
block->getExit()->join(gotoEntry);
5907
gotoExit->join(oldNext);
5908
5909
_cfg->setStructure(0);
5910
_cfg->addEdge(TR::CFGEdge::createEdge(gotoBlock, target0, trMemory()));
5911
setEdges(&block->getSuccessors(), block, gotoBlock, target1);
5912
return gotoBlock;
5913
}
5914
else
5915
{
5916
setEdges(&block->getSuccessors(), block, target0, target1);
5917
return block;
5918
}
5919
}
5920
5921
5922
//*****************************************************************************************
5923
// It returns:
5924
// * 0: if successor is not single
5925
// * non-null: the successor block
5926
//*****************************************************************************************
5927
TR::Block *
5928
TR_CISCTransformer::getSuccBlockIfSingle(TR::Block *block)
5929
{
5930
if (!(block->getSuccessors().size() == 1)) return 0;
5931
TR::CFGEdge *edge = block->getSuccessors().front();
5932
return toBlock(edge->getTo());
5933
}
5934
5935
5936
5937
//*****************************************************************************************
5938
// It searches for a combination of predecessors of "block" and _bblistPred.
5939
//*****************************************************************************************
5940
TR::Block *
5941
TR_CISCTransformer::searchPredecessorOfBlock(TR::Block *block)
5942
{
5943
for (auto edge = block->getPredecessors().begin(); edge != block->getPredecessors().end(); ++edge)
5944
{
5945
TR::Block *from = toBlock((*edge)->getFrom());
5946
if (_bblistPred.find(from))
5947
{
5948
return from; // find
5949
}
5950
}
5951
return NULL;
5952
}
5953
5954
5955
//*****************************************************************************************
5956
// It decides whether we generate versioning code and modifies the target blocks.
5957
// It returns the block that fast code will be appended.
5958
//*****************************************************************************************
5959
TR::Block *
5960
TR_CISCTransformer::modifyBlockByVersioningCheck(TR::Block *block, TR::TreeTop *startTop, TR::Node *lengthNode, List<TR::Node> *guardList)
5961
{
5962
uint16_t versionLength = _P->getVersionLength();
5963
List<TR::Node> guardListLocal(trMemory());
5964
// Create versioning if necessary
5965
if (versionLength >= 1) // Skip if versionLength is less than 1.
5966
{
5967
if (guardList == 0) guardList = &guardListLocal;
5968
ListAppender<TR::Node> appender(guardList);
5969
if (lengthNode->getOpCodeValue() == TR::i2l) lengthNode = lengthNode->getAndDecChild(0);
5970
if (lengthNode->getOpCode().isLong())
5971
{
5972
TR::Node *lconst = TR::Node::create(lengthNode, TR::lconst, 0, 0);
5973
lconst->setLongInt(versionLength);
5974
appender.add(TR::Node::createif(TR::iflcmple, lengthNode, lconst));
5975
}
5976
else
5977
{
5978
TR_ASSERT(lengthNode->getOpCode().isInt(), "Error");
5979
appender.add(TR::Node::createif(TR::ificmple, lengthNode,
5980
TR::Node::create(lengthNode, TR::iconst, 0, versionLength)));
5981
}
5982
}
5983
return modifyBlockByVersioningCheck(block, startTop, guardList);
5984
}
5985
5986
5987
5988
//*****************************************************************************************
5989
// It decides whether we generate versioning code and modifies the target blocks.
5990
// It returns the block that fast code will be appended.
5991
//*****************************************************************************************
5992
TR::Block *
5993
TR_CISCTransformer::modifyBlockByVersioningCheck(TR::Block *block, TR::TreeTop *startTop, List<TR::Node> *guardList)
5994
{
5995
TR::CFG *cfg = comp()->getFlowGraph();
5996
TR::Block *fastpath;
5997
5998
// Create versioning if necessary
5999
if (guardList && !guardList->isEmpty())
6000
{
6001
cfg->setStructure(0);
6002
fastpath = TR::Block::createEmptyBlock(startTop->getNode(), comp(), block->getFrequency(), block);
6003
TR::Block *slowpad;
6004
TR::Node *cmp;
6005
TR::Block *orgPrevBlock = 0;
6006
ListIterator<TR::Node> guardI(guardList);
6007
TR::Block *firstBlock = 0;
6008
TR::Block *lastBlock = 0;
6009
6010
// Append versioning check
6011
// Result: orgPrevBlock->block->fastpath->slowpad
6012
6013
if (block->getFirstRealTreeTop() == startTop)
6014
{
6015
// search the entry pad
6016
orgPrevBlock = searchPredecessorOfBlock(block);
6017
}
6018
6019
// Insert the fastpath + versioning tress in between the previous block and current block, unless:
6020
// 1. We do not find a previous block.
6021
// 2. The previous block does not fall-through into current block (i.e. reached by taken branch).
6022
// In these two cases, we will just split the current block, and insert the fastpath + versioning trees
6023
// in between.
6024
if (!orgPrevBlock || orgPrevBlock->getNextBlock() != block)
6025
{
6026
orgPrevBlock = block;
6027
slowpad = block->split(startTop, cfg, true);
6028
}
6029
else
6030
{
6031
slowpad = block;
6032
}
6033
6034
TR::TreeTop * orgPrevTreeTop = orgPrevBlock->getExit();
6035
TR::Node *lastOrgPrevRealNode = orgPrevBlock->getLastRealTreeTop()->getNode();
6036
TR::TreeTop * orgNextTreeTop = orgPrevTreeTop->getNextTreeTop();
6037
if (orgNextTreeTop)
6038
{
6039
TR::Block * orgNextBlock = orgNextTreeTop->getNode()->getBlock();
6040
cfg->insertBefore(fastpath, orgNextBlock);
6041
}
6042
else
6043
{
6044
cfg->addNode(fastpath);
6045
}
6046
6047
firstBlock = fastpath;
6048
for (cmp = guardI.getFirst(); cmp; cmp = guardI.getNext())
6049
{
6050
block = TR::Block::createEmptyBlock(startTop->getNode(), comp(), block->getFrequency(), block);
6051
if (!lastBlock) lastBlock = block;
6052
TR_ASSERT(cmp->getOpCode().isIf(), "Not implemeted yet");
6053
cmp->setBranchDestination(slowpad->getEntry());
6054
block->append(TR::TreeTop::create(comp(), cmp));
6055
cfg->insertBefore(block, firstBlock);
6056
firstBlock = block;
6057
}
6058
6059
orgPrevTreeTop->join(firstBlock->getEntry());
6060
cfg->addEdge(orgPrevBlock, firstBlock);
6061
cfg->removeEdge(orgPrevBlock, slowpad);
6062
6063
if (trace()) traceMsg(comp(), "modifyBlockByVersioningCheck: orgPrevBlock=%d firstBlock=%d lastBlock=%d fastpath=%d slowpad=%d orgNextTreeTop=%x\n",
6064
orgPrevBlock->getNumber(), firstBlock->getNumber(), lastBlock->getNumber(), fastpath->getNumber(), slowpad->getNumber(), orgNextTreeTop);
6065
6066
if (lastOrgPrevRealNode->getOpCode().getOpCodeValue() == TR::Goto)
6067
{
6068
TR_ASSERT(lastOrgPrevRealNode->getBranchDestination() == slowpad->getEntry(), "Error");
6069
lastOrgPrevRealNode->setBranchDestination(firstBlock->getEntry());
6070
}
6071
}
6072
else
6073
{
6074
// Generate no versioning code
6075
TR::TreeTop *lastRealTT = block->getLastRealTreeTop();
6076
if (lastRealTT->getNode()->getOpCodeValue() == TR::Goto)
6077
{
6078
if (startTop != lastRealTT)
6079
{
6080
TR::TreeTop *last = removeAllNodes(startTop, lastRealTT);
6081
last->join(lastRealTT);
6082
}
6083
block->split(lastRealTT, cfg);
6084
}
6085
else
6086
{
6087
TR::TreeTop *last = removeAllNodes(startTop, block->getExit());
6088
last->join(block->getExit());
6089
}
6090
fastpath = block;
6091
}
6092
return fastpath;
6093
}
6094
6095
6096
//*****************************************************************************************
6097
// It clones the loop body.
6098
//*****************************************************************************************
6099
TR::Block *
6100
TR_CISCTransformer::cloneLoopBodyForPeel(TR::Block **firstBlock, TR::Block **lastBlock, TR_CISCNode *cmpifCISCNode)
6101
{
6102
TR::CFG *cfg = comp()->getFlowGraph();
6103
cfg->setStructure(0);
6104
TR_BlockCloner cloner(cfg);
6105
*firstBlock = cloner.cloneBlocks(_bblistBody.getListHead()->getData(),_bblistBody.getLastElement()->getData());
6106
*lastBlock = cloner.getLastClonedBlock();
6107
if (cmpifCISCNode)
6108
{
6109
struct TrNodeInfo *repNode = cmpifCISCNode->getHeadOfTrNodeInfo();
6110
TR::Block *modifyBlock = cloner.getToBlock(repNode->_block);
6111
TR_ASSERT(modifyBlock != repNode->_block, "error");
6112
TR::Node *modifyNode = modifyBlock->getLastRealTreeTop()->getNode();
6113
TR_ASSERT(modifyNode->getOpCode().isIf(), "error");
6114
TR::Node::recreate(modifyNode, (TR::ILOpCodes)cmpifCISCNode->getOpcode());
6115
modifyNode->setBranchDestination(cmpifCISCNode->getDestination());
6116
}
6117
return *firstBlock;
6118
}
6119
6120
//*****************************************************************************************
6121
// It appends blocks after "block".
6122
//*****************************************************************************************
6123
TR::Block *
6124
TR_CISCTransformer::appendBlocks(TR::Block *block, TR::Block *firstBlock, TR::Block *lastBlock)
6125
{
6126
TR::CFG *cfg = comp()->getFlowGraph();
6127
cfg->setStructure(0);
6128
TR::Block *ret;
6129
6130
TR::TreeTop *orgNextTreeTop = block->getExit()->getNextTreeTop();
6131
if (orgNextTreeTop)
6132
{
6133
TR::Block * orgNextBlock = orgNextTreeTop->getEnclosingBlock();
6134
ret = TR::Block::createEmptyBlock(block->getExit()->getNode(), comp(), block->getFrequency(), block);
6135
cfg->insertBefore(ret, orgNextBlock);
6136
}
6137
else
6138
{
6139
TR_ASSERT(block->getLastRealTreeTop()->getNode()->getOpCode().isBranch(), "error");
6140
ret = block->split(block->getLastRealTreeTop(), cfg);
6141
}
6142
cfg->join(block, firstBlock);
6143
cfg->join(lastBlock, ret);
6144
setSuccessorEdge(block, firstBlock);
6145
return ret;
6146
}
6147
6148
6149
//*****************************************************************************************
6150
// Check whether the node is a dead store by using useDefInfo
6151
//*****************************************************************************************
6152
bool
6153
TR_CISCTransformer::isDeadStore(TR::Node *node)
6154
{
6155
if (node->getOpCode().isStoreDirect())
6156
{
6157
if (!node->getSymbol()->isAutoOrParm())
6158
return false;
6159
6160
TR_UseDefInfo *useDefInfo = _useDefInfo;
6161
const int32_t firstUseIndex = useDefInfo->getFirstUseIndex();
6162
int32_t useDefIndex = node->getUseDefIndex();
6163
//TR_ASSERT(useDefInfo->isDefIndex(useDefIndex), "error!");
6164
if (!useDefInfo->isDefIndex(useDefIndex)) return false;
6165
if (useDefInfo->getUsesFromDefIsZero(useDefIndex)) return true;
6166
}
6167
return false;
6168
}
6169
6170
6171
//*****************************************************************************************
6172
// It basically skips blocks containing only a goto statement.
6173
// It can additionally skip nodes for dead stores and the node specified by "ignoreTree"
6174
//*****************************************************************************************
6175
TR::Block *
6176
TR_CISCTransformer::skipGoto(TR::Block *block, TR::Node *ignoreTree)
6177
{
6178
while(true)
6179
{
6180
TR::TreeTop *treeTop = block->getFirstRealTreeTop();
6181
TR::Node *gotonode;
6182
while(true)
6183
{
6184
gotonode = treeTop->getNode();
6185
if (!isDeadStore(gotonode) &&
6186
(ignoreTree == 0 || !compareTrNodeTree(gotonode, ignoreTree))) break;
6187
treeTop = treeTop->getNextRealTreeTop();
6188
}
6189
TR::ILOpCodes opcode = gotonode->getOpCodeValue();
6190
if (opcode == TR::Goto)
6191
{
6192
block = gotonode->getBranchDestination()->getNode()->getBlock();
6193
}
6194
else if (opcode == TR::BBEnd)
6195
{
6196
treeTop = treeTop->getNextRealTreeTop();
6197
block = treeTop->getNode()->getBlock();
6198
}
6199
else
6200
{
6201
break;
6202
}
6203
}
6204
return block;
6205
}
6206
6207
6208
//*****************************************************************************************
6209
// It searches the "target" node in the tree from "top".
6210
// Return values are stored into *retParent and *retChildNum.
6211
//*****************************************************************************************
6212
bool
6213
TR_CISCTransformer::searchNodeInTrees(TR::Node *top, TR::Node *target, TR::Node **retParent, int *retChildNum)
6214
{
6215
int i;
6216
for (i = top->getNumChildren(); --i >= 0; )
6217
{
6218
if (compareTrNodeTree(top->getChild(i), target))
6219
{
6220
if (retParent) *retParent = top;
6221
if (retChildNum) *retChildNum = i;
6222
return true;
6223
}
6224
}
6225
for (i = top->getNumChildren(); --i >= 0; )
6226
{
6227
if (searchNodeInTrees(top->getChild(i), target, retParent, retChildNum)) return true;
6228
}
6229
return false;
6230
}
6231
6232
6233
//*****************************************************************************************
6234
// Analyze whether node trees a and b are equivalent.
6235
//*****************************************************************************************
6236
bool
6237
TR_CISCTransformer::compareTrNodeTree(TR::Node *a, TR::Node *b)
6238
{
6239
if (a == b) return true;
6240
if (a->getOpCodeValue() != b->getOpCodeValue()) return false;
6241
if (a->getOpCode().hasSymbolReference() &&
6242
(a->getSymbolReference()->getReferenceNumber() != b->getSymbolReference()->getReferenceNumber()))
6243
return false;
6244
6245
if (a->getOpCode().isLoadConst())
6246
{
6247
switch(a->getOpCodeValue())
6248
{
6249
case TR::iconst:
6250
if (a->getUnsignedInt() != b->getUnsignedInt()) return false;
6251
break;
6252
case TR::lconst:
6253
if (a->getUnsignedLongInt() != b->getUnsignedLongInt()) return false;
6254
break;
6255
case TR::aconst:
6256
if (a->getAddress() != b->getAddress()) return false;
6257
break;
6258
case TR::fconst:
6259
if (a->getFloat() != b->getFloat()) return false;
6260
break;
6261
case TR::dconst:
6262
if (a->getDouble() != b->getDouble()) return false;
6263
break;
6264
case TR::bconst:
6265
if (a->getUnsignedByte() != b->getUnsignedByte()) return false;
6266
break;
6267
case TR::sconst:
6268
if (a->getShortInt() != b->getShortInt()) return false;
6269
break;
6270
default:
6271
return false;
6272
}
6273
}
6274
int32_t numChild = a->getNumChildren();
6275
if (numChild != b->getNumChildren()) return false;
6276
if (numChild == 2 && a->getOpCode().isCommutative())
6277
{
6278
if ((!compareTrNodeTree(a->getChild(0), b->getChild(0)) ||
6279
!compareTrNodeTree(a->getChild(1), b->getChild(1))) &&
6280
(!compareTrNodeTree(a->getChild(0), b->getChild(1)) ||
6281
!compareTrNodeTree(a->getChild(1), b->getChild(0))))
6282
return false;
6283
}
6284
else
6285
{
6286
int32_t i;
6287
for (i = 0; i < numChild; i++)
6288
{
6289
if (!compareTrNodeTree(a->getChild(i), b->getChild(i))) return false;
6290
}
6291
}
6292
return true;
6293
}
6294
6295
6296
//*****************************************************************************************
6297
// Analyze whether node trees in blocks a and b are equivalent.
6298
//*****************************************************************************************
6299
bool
6300
TR_CISCTransformer::compareBlockTrNodeTree(TR::Block *a, TR::Block *b)
6301
{
6302
if (a == b) return true;
6303
TR::TreeTop *ttA = a->getFirstRealTreeTop();
6304
TR::TreeTop *ttB = b->getFirstRealTreeTop();
6305
TR::TreeTop *lastA = a->getLastRealTreeTop();
6306
while(true)
6307
{
6308
if (!compareTrNodeTree(ttA->getNode(), ttB->getNode())) return false;
6309
if (ttA == lastA) break;
6310
ttA = ttA->getNextRealTreeTop();
6311
if (ttA->getNode()->getOpCodeValue() == TR::Goto) break;
6312
ttB = ttB->getNextRealTreeTop();
6313
if (ttB->getNode()->getOpCodeValue() == TR::Goto) break;
6314
}
6315
return true;
6316
}
6317
6318
6319
//*****************************************************************************************
6320
// Append nodes in the list _beforeInsertions into the block.
6321
// Restriction: It can create a single new block automatically, but it doesn't create multiple ones.
6322
//*****************************************************************************************
6323
TR::Block *
6324
TR_CISCTransformer::insertBeforeNodes(TR::Block *block)
6325
{
6326
ListIterator<TR::Node> ni(&_beforeInsertions);
6327
TR::Node *n, *last = 0;
6328
int32_t count = 0;
6329
for (n = ni.getFirst(); n; n = ni.getNext())
6330
{
6331
// Insert nodes into given block
6332
TR::TreeTop *top = TR::TreeTop::create(comp(), n);
6333
block->getLastRealTreeTop()->join(top);
6334
top->join(block->getExit());
6335
last = n;
6336
count++;
6337
}
6338
if (trace()) traceMsg(comp(), "insertBeforeNodes added %d node(s) to block_%d [%p]\n", count, block->getNumber(), block);
6339
if (last && last->getOpCode().isBranch())
6340
{
6341
TR::CFG *cfg = comp()->getFlowGraph();
6342
TR::TreeTop *orgNext = block->getExit()->getNextTreeTop();
6343
TR::Block *newBlock = TR::Block::createEmptyBlock(last, comp(), block->getFrequency(), block);
6344
cfg->setStructure(0);
6345
cfg->addNode(newBlock);
6346
newBlock->getExit()->join(orgNext);
6347
block->getExit()->join(newBlock->getEntry());
6348
6349
cfg->addSuccessorEdges(newBlock);
6350
bool isRemove = true;
6351
6352
TR::Block * orgNextBlock = orgNext->getNode()->getBlock();
6353
TR::Block * branchDestinationBlock = 0;
6354
if (last->getOpCode().isIf())
6355
branchDestinationBlock = last->getBranchDestination()->getEnclosingBlock();
6356
// Copy edges to avoid removing necessary blocks
6357
for (auto edge = block->getSuccessors().begin(); edge != block->getSuccessors().end(); ++edge)
6358
{
6359
TR::Block *to = toBlock((*edge)->getTo());
6360
if (to != branchDestinationBlock &&
6361
to != orgNextBlock)
6362
{
6363
if (trace()) traceMsg(comp(), "insertBeforeNodes added the edge (%d, %d).\n",newBlock->getNumber(),to->getNumber());
6364
addEdge(&newBlock->getSuccessors(), newBlock, to);
6365
}
6366
}
6367
6368
if (last->getOpCode().isIf())
6369
{
6370
setSuccessorEdges(block, newBlock, branchDestinationBlock);
6371
if (orgNext->getNode()->getBlock() == branchDestinationBlock)
6372
isRemove = false;
6373
}
6374
else
6375
{
6376
setSuccessorEdge(block, newBlock);
6377
}
6378
6379
if (isRemove)
6380
cfg->removeEdge(block, orgNext->getNode()->getBlock());
6381
if (trace()) traceMsg(comp(), "insertBeforeNodes created block_%d [%p]\n", newBlock->getNumber(), newBlock);
6382
block = newBlock;
6383
}
6384
return block;
6385
}
6386
6387
//*****************************************************************************************
6388
// Search for store to a given symref in the insert before list
6389
// @param symRefNumberToBeMatched The symbol reference number to be matched.
6390
// @return The TR::Node for the store with same symbol reference number. NULL if not found.
6391
//*****************************************************************************************
6392
TR::Node *
6393
TR_CISCTransformer::findStoreToSymRefInInsertBeforeNodes(int32_t symRefNumberToBeMatched)
6394
{
6395
ListIterator<TR::Node> ni(&_beforeInsertions);
6396
TR::Node *n = NULL;
6397
6398
for (n = ni.getFirst(); n; n = ni.getNext())
6399
{
6400
if (n->getOpCode().isStore() && n->getOpCode().hasSymbolReference() && n->getSymbolReference()->getReferenceNumber() == symRefNumberToBeMatched)
6401
return n;
6402
}
6403
6404
return NULL;
6405
}
6406
6407
//*****************************************************************************************
6408
// Prepend/Append nodes in the list l into the block.
6409
// Restriction: It creates no new block automatically.
6410
//*****************************************************************************************
6411
TR::Block *
6412
TR_CISCTransformer::insertAfterNodes(TR::Block *block, List<TR::Node> *l, bool prepend)
6413
{
6414
ListIterator<TR::Node> ni(l);
6415
TR::Node *n;
6416
int32_t count = 0;
6417
if (prepend)
6418
{
6419
TR::TreeTop *last, *orgNext;
6420
last = block->getEntry();
6421
orgNext = last->getNextTreeTop();
6422
for (n = ni.getFirst(); n; n = ni.getNext())
6423
{
6424
TR::TreeTop *top = TR::TreeTop::create(comp(), n);
6425
last->join(top);
6426
last = top;
6427
count++;
6428
}
6429
last->join(orgNext);
6430
}
6431
else
6432
{
6433
for (n = ni.getFirst(); n; n = ni.getNext())
6434
{
6435
TR::TreeTop *top = TR::TreeTop::create(comp(), n);
6436
block->append(top);
6437
count++;
6438
}
6439
}
6440
if (trace()) traceMsg(comp(), "insertAfterNodes adds %d node(s)\n", count);
6441
return block;
6442
}
6443
6444
//*****************************************************************************************
6445
// Add nodes for idiom independent transformations
6446
//*****************************************************************************************
6447
TR::Block *
6448
TR_CISCTransformer::insertAfterNodes(TR::Block *block, bool prepend)
6449
{
6450
return insertAfterNodes(block, &_afterInsertions, prepend);
6451
}
6452
6453
// Add nodes for idiom specific transformations
6454
TR::Block *
6455
TR_CISCTransformer::insertAfterNodesIdiom(TR::Block *block, int32_t pos, bool prepend)
6456
{
6457
return insertAfterNodes(block, _afterInsertionsIdiom + pos, prepend);
6458
}
6459
6460
6461
// Remove all nodes from TreeTops from "start" to "end"
6462
TR::TreeTop *
6463
TR_CISCTransformer::removeAllNodes(TR::TreeTop *start, TR::TreeTop *end)
6464
{
6465
TR::TreeTop *ret = start->getPrevTreeTop();
6466
#if 0
6467
for (; start != end; start = start->getNextTreeTop())
6468
{
6469
start->getNode()->removeAllChildren();
6470
start->setNode(0);
6471
}
6472
#endif
6473
TR::TreeTop *next;
6474
for (; start != end; start = next)
6475
{
6476
next = start->getNextTreeTop();
6477
TR::TransformUtil::removeTree(comp(), start);
6478
if (next == end)
6479
break;
6480
}
6481
return ret;
6482
}
6483
6484
6485
//*****************************************************************************
6486
// These functions create function tables for TRT or TRxx instructions.
6487
//*****************************************************************************
6488
bool
6489
TR_CISCTransformer::analyzeBoolTable(TR_BitVector **bv, TR::TreeTop **retSameExit, TR_CISCNode *boolTable, TR_BitVector *defBV, TR_CISCNode *defNode, TR_CISCNode *ignoreNode, int32_t bvoffset, int32_t allocBVSize)
6490
{
6491
List<TR_CISCNode> *P2T = _P2T;
6492
List<TR_CISCNode> *T2P = _T2P;
6493
TR_CISCGraph *P = _P;
6494
TR_CISCGraph *T = _T;
6495
ListIterator<TR_CISCNode> ni(_candidateRegion);
6496
TR::TreeTop *exitTreeTop;
6497
bool initExitTreeTop;
6498
TR_CISCNode * exitnode;
6499
TR_CISCNode *n;
6500
int32_t i;
6501
TR_BitVector takenBV(allocBVSize, trMemory(), stackAlloc), ntakenBV(allocBVSize, trMemory(), stackAlloc), tmpBV(allocBVSize, trMemory(), stackAlloc);
6502
TR_BitVector orgBV(allocBVSize, trMemory(), stackAlloc);
6503
6504
//
6505
// Perform a forward dataflow analysis to compute exit conditions of the loop
6506
//
6507
for (i = T->getNumNodes(); --i >= 0; )
6508
bv[i] = new (trStackMemory()) TR_BitVector(allocBVSize, trMemory(), stackAlloc);
6509
exitnode = T->getExitNode();
6510
exitTreeTop = 0;
6511
initExitTreeTop = false;
6512
int loopCount = 0;
6513
bool doAgain = true;
6514
while(doAgain)
6515
{
6516
if (loopCount++ > 10)
6517
{
6518
TR_ASSERT(false, "analyzeBoolTable: infinite loop!\n");
6519
return false;
6520
}
6521
doAgain = false;
6522
for (n = ni.getFirst(); n; n = ni.getNext())
6523
{
6524
int32_t pos;
6525
uint32_t tID = n->getID();
6526
TR_CISCNode *p = getT2PheadRep(tID);
6527
bool doPropagateSuccs = true;
6528
if (analyzeT2P(n, defNode) & _T2P_MatchMask)
6529
{
6530
p = defNode;
6531
if (bv[tID]->isEmpty()) *bv[tID] = *defBV;
6532
}
6533
else if (p == boolTable)
6534
{
6535
if (n->getOpcode() == TR::Case)
6536
{
6537
if (!n->isValidOtherInfo())
6538
{
6539
// default case
6540
TR_ASSERT(n->getParents()->isSingleton(), "error!!!");
6541
TR_CISCNode *swbody = n->getHeadOfParents();
6542
takenBV = *bv[tID];
6543
for (i = swbody->getNumChildren(); --i >= 2; )
6544
{
6545
pos = swbody->getChild(i)->getOtherInfo()+bvoffset;
6546
takenBV.reset(pos);
6547
}
6548
}
6549
else
6550
{
6551
pos = n->getOtherInfo()+bvoffset;
6552
takenBV.empty();
6553
takenBV.set(pos);
6554
if (!n->isOutsideOfLoop())
6555
{
6556
TR::TreeTop *thisDestination = n->getDestination();
6557
if (!initExitTreeTop)
6558
{
6559
initExitTreeTop = true;
6560
exitTreeTop = thisDestination;
6561
}
6562
if (exitTreeTop != thisDestination)
6563
{
6564
if (trace() && exitTreeTop)
6565
{
6566
traceMsg(comp(), "Succ(0) is not exit node. ID:%d (TR::Case)\n", n->getID());
6567
}
6568
exitTreeTop = 0;
6569
}
6570
}
6571
}
6572
6573
if (doAgain)
6574
{
6575
*bv[n->getSucc(0)->getID()] |= takenBV;
6576
}
6577
else
6578
{
6579
int id;
6580
id = n->getSucc(0)->getID();
6581
orgBV = *bv[id];
6582
*bv[id] |= takenBV;
6583
if (!(orgBV == *bv[id])) doAgain = true;
6584
}
6585
6586
doPropagateSuccs = false; // already done
6587
}
6588
else
6589
{
6590
TR_CISCNode *child1 = n->getChild(1);
6591
while (!child1->isInterestingConstant())
6592
{
6593
child1 = child1->getNodeIfSingleChain();
6594
if (!child1)
6595
{
6596
if (trace()) traceMsg(comp(), "analyzeBoolTable failed for %p. (no single chain)\n", n->getChild(1));
6597
return false;
6598
}
6599
if (!child1->isStoreDirect())
6600
{
6601
if (trace()) traceMsg(comp(), "analyzeBoolTable failed for %p. (%p is not store)\n", n->getChild(1), child1);
6602
return false;
6603
}
6604
child1 = child1->getChild(0);
6605
}
6606
pos = child1->getOtherInfo()+bvoffset;
6607
6608
switch(n->getOpcode())
6609
{
6610
case TR::ifbcmpeq:
6611
case TR::ificmpeq:
6612
takenBV.empty();
6613
ntakenBV = *bv[tID];
6614
if (bv[tID]->isSet(pos))
6615
{
6616
takenBV.set(pos);
6617
ntakenBV.reset(pos);
6618
}
6619
break;
6620
case TR::ifbcmpne:
6621
case TR::ificmpne:
6622
takenBV = *bv[tID];
6623
ntakenBV.empty();
6624
if (bv[tID]->isSet(pos))
6625
{
6626
takenBV.reset(pos);
6627
ntakenBV.set(pos);
6628
}
6629
break;
6630
case TR::ifbcmplt:
6631
case TR::ifsucmplt:
6632
case TR::ificmplt:
6633
takenBV = *bv[tID];
6634
ntakenBV = takenBV;
6635
tmpBV.empty();
6636
tmpBV.setAll(0, pos-1);
6637
takenBV &= tmpBV;
6638
ntakenBV -= tmpBV;
6639
break;
6640
case TR::ifbcmpge:
6641
case TR::ifsucmpge:
6642
case TR::ificmpge:
6643
takenBV = *bv[tID];
6644
ntakenBV = takenBV;
6645
tmpBV.empty();
6646
tmpBV.setAll(0, pos-1);
6647
ntakenBV &= tmpBV;
6648
takenBV -= tmpBV;
6649
break;
6650
case TR::ifbcmpgt:
6651
case TR::ifsucmpgt:
6652
case TR::ificmpgt:
6653
takenBV = *bv[tID];
6654
ntakenBV = takenBV;
6655
tmpBV.empty();
6656
tmpBV.setAll(0, pos);
6657
ntakenBV &= tmpBV;
6658
takenBV -= tmpBV;
6659
break;
6660
case TR::ifbcmple:
6661
case TR::ifsucmple:
6662
case TR::ificmple:
6663
takenBV = *bv[tID];
6664
ntakenBV = takenBV;
6665
tmpBV.empty();
6666
tmpBV.setAll(0, pos);
6667
takenBV &= tmpBV;
6668
ntakenBV -= tmpBV;
6669
break;
6670
default:
6671
TR_ASSERT(false, "not implemented yet");
6672
// not implemented yet
6673
return false;
6674
}
6675
6676
if (doAgain)
6677
{
6678
*bv[n->getSucc(0)->getID()] |= ntakenBV;
6679
*bv[n->getSucc(1)->getID()] |= takenBV;
6680
}
6681
else
6682
{
6683
int id;
6684
id = n->getSucc(0)->getID();
6685
orgBV = *bv[id];
6686
*bv[id] |= ntakenBV;
6687
if (!(orgBV == *bv[id])) doAgain = true;
6688
6689
id = n->getSucc(1)->getID();
6690
orgBV = *bv[id];
6691
*bv[id] |= takenBV;
6692
if (!(orgBV == *bv[id])) doAgain = true;
6693
}
6694
6695
doPropagateSuccs = false; // already done
6696
6697
if (!n->isOutsideOfLoop())
6698
{
6699
TR::TreeTop *thisDestination = n->getDestination();
6700
if (!initExitTreeTop)
6701
{
6702
if (trace())
6703
traceMsg(comp(), "analyzeBoolTable - Delimiter checking node %d targets treetop: %p block_%d: %p\n",
6704
n->getID(), thisDestination, thisDestination->getEnclosingBlock()->getNumber(), thisDestination->getEnclosingBlock());
6705
initExitTreeTop = true;
6706
exitTreeTop = thisDestination;
6707
}
6708
if (exitTreeTop != thisDestination)
6709
{
6710
if (trace() && exitTreeTop)
6711
traceMsg(comp(), "analyzeBoolTable - found conflicting successors. Delimiter checking node %d targets treetop: %p (!= %p) block_%d: %p\n",
6712
n->getID(), thisDestination, exitTreeTop, thisDestination->getEnclosingBlock()->getNumber(), thisDestination->getEnclosingBlock());
6713
6714
exitTreeTop = NULL;
6715
}
6716
}
6717
}
6718
}
6719
else if (p == ignoreNode)
6720
{
6721
doPropagateSuccs = false; // ignore
6722
}
6723
else
6724
{
6725
if (p &&
6726
p->getNumSuccs() >= 2)
6727
{
6728
for (i = p->getNumSuccs(); --i >= 0; )
6729
{
6730
if (p->getSucc(i)->getOpcode() == TR_exitnode)
6731
{
6732
doPropagateSuccs = false; // ignore
6733
break;
6734
}
6735
}
6736
}
6737
}
6738
6739
if (doPropagateSuccs)
6740
{
6741
if (doAgain)
6742
{
6743
for (i = n->getNumSuccs(); --i >= 0; )
6744
*bv[n->getSucc(i)->getID()] |= *bv[tID];
6745
}
6746
else
6747
{
6748
for (i = n->getNumSuccs(); --i >= 0; )
6749
{
6750
int id = n->getSucc(i)->getID();
6751
orgBV = *bv[id];
6752
*bv[id] |= *bv[tID];
6753
if (!(orgBV == *bv[id])) doAgain = true;
6754
}
6755
}
6756
}
6757
}
6758
}
6759
6760
if (retSameExit)
6761
{
6762
*retSameExit = exitTreeTop;
6763
}
6764
return true;
6765
}
6766
6767
6768
#define BYTEBVOFFSET (128)
6769
#define ALLOCBYTEBVSIZE (128+256)
6770
int32_t
6771
TR_CISCTransformer::analyzeByteBoolTable(TR_CISCNode *boolTable, uint8_t *table256, TR_CISCNode *ignoreNode, TR::TreeTop **retSameExit)
6772
{
6773
TR::StackMemoryRegion stackMemoryRegion(*trMemory());
6774
6775
List<TR_CISCNode> *P2T = _P2T;
6776
List<TR_CISCNode> *T2P = _T2P;
6777
TR_CISCGraph *P = _P;
6778
TR_CISCGraph *T = _T;
6779
//int32_t i;
6780
//TR::TreeTop *exitTreeTop;
6781
6782
//
6783
// initialize
6784
//
6785
memset(table256, 0, 256);
6786
if (!boolTable || !getP2TRepInLoop(boolTable)) return 0; // # of delimiter is zero
6787
6788
TR_BitVector **bv, defBV(ALLOCBYTEBVSIZE, trMemory(), stackAlloc);
6789
uint32_t size = sizeof(*bv) * T->getNumNodes();
6790
TR_ASSERT(boolTable->getOpcode() == TR_booltable, "error!");
6791
TR_CISCNode *defNode = boolTable->getChild(0);
6792
TR_CISCNode *defTargetNode = getP2TRepInLoop(defNode);
6793
bv = (TR_BitVector **)trMemory()->allocateMemory(size, stackAlloc);
6794
memset(bv, 0, size);
6795
6796
switch((defTargetNode ? defTargetNode : defNode)->getOpcode())
6797
{
6798
case TR::b2i:
6799
if (defNode->isOptionalNode()) defNode = defNode->getChild(0);
6800
// fall through
6801
case TR::bloadi:
6802
defBV.setAll(-128+BYTEBVOFFSET, 127+BYTEBVOFFSET);
6803
break;
6804
case TR::bu2i:
6805
defBV.setAll( 0+BYTEBVOFFSET, 255+BYTEBVOFFSET);
6806
break;
6807
default:
6808
TR_ASSERT(false, "not implemented yet");
6809
// not implemented yet
6810
return -1; // error
6811
}
6812
6813
if (!analyzeBoolTable(bv, retSameExit, boolTable, &defBV, defNode, ignoreNode, BYTEBVOFFSET, ALLOCBYTEBVSIZE)) return -1; // error
6814
6815
TR_BitVectorIterator bvi(*bv[T->getExitNode()->getID()]);
6816
int32_t count = 0;
6817
6818
// Create the function table from the BitVector of the exit node
6819
while (bvi.hasMoreElements())
6820
{
6821
int32_t nextElement = bvi.getNextElement() - BYTEBVOFFSET;
6822
if (nextElement < 0) nextElement += 256;
6823
TR_ASSERT(0 <= nextElement && nextElement < 256, "error!!!");
6824
table256[nextElement] = nextElement ? nextElement : 1;
6825
count ++;
6826
}
6827
if (trace())
6828
{
6829
static int traceByteBoolTable = -1;
6830
if (traceByteBoolTable < 0)
6831
{
6832
char *p = feGetEnv("traceBoolTable");
6833
traceByteBoolTable = p ? 1 : 0;
6834
}
6835
if (1 > count || count > 255 || traceByteBoolTable)
6836
{
6837
traceMsg(comp(), "analyzeByteBoolTable: count is %d\n",count);
6838
ListIterator<TR_CISCNode> pi(_candidateRegion);
6839
TR_CISCNode *pn;
6840
traceMsg(comp(), "Predecessors of the exit node:\n ID:count\n");
6841
for (pn = pi.getFirst(); pn; pn = pi.getNext())
6842
{
6843
int32_t id = pn->getID();
6844
if (getT2PheadRep(id) == boolTable)
6845
{
6846
traceMsg(comp(), "%3d:%3d:",id,bv[id]->elementCount());
6847
bv[id]->print(comp());
6848
traceMsg(comp(), "\n");
6849
}
6850
}
6851
}
6852
}
6853
//TR_ASSERT(1 <= count && count <= 255, "maybe error!!");
6854
return count;
6855
}
6856
6857
6858
#define CHARBVOFFSET (0)
6859
#define ALLOCCHARBVSIZE (65536)
6860
int32_t
6861
TR_CISCTransformer::analyzeCharBoolTable(TR_CISCNode *boolTable, uint8_t *table65536, TR_CISCNode *ignoreNode, TR::TreeTop **retSameExit)
6862
{
6863
TR::StackMemoryRegion stackMemoryRegion(*trMemory());
6864
6865
List<TR_CISCNode> *P2T = _P2T;
6866
List<TR_CISCNode> *T2P = _T2P;
6867
TR_CISCGraph *P = _P;
6868
TR_CISCGraph *T = _T;
6869
//int32_t i;
6870
6871
//
6872
// initialize
6873
//
6874
memset(table65536, 0, 65536);
6875
if (!boolTable || !getP2TRepInLoop(boolTable)) return 0; // # of delimiter is zero
6876
6877
TR_BitVector **bv, defBV(ALLOCCHARBVSIZE, trMemory(), stackAlloc);
6878
uint32_t size = sizeof(*bv) * T->getNumNodes();
6879
TR_ASSERT(boolTable->getOpcode() == TR_booltable, "error!");
6880
TR_CISCNode *defNode = boolTable->getChild(0);
6881
TR_CISCNode *defTargetNode = getP2TRepInLoop(defNode);
6882
bv = (TR_BitVector **)trMemory()->allocateMemory(size, stackAlloc);
6883
memset(bv, 0, size);
6884
6885
switch((defTargetNode ? defTargetNode : defNode)->getOpcode())
6886
{
6887
case TR::su2i:
6888
if (defNode->isOptionalNode()) defNode = defNode->getChild(0);
6889
// fall through
6890
case TR::sloadi:
6891
defBV.setAll(0, 65535);
6892
break;
6893
default:
6894
TR_ASSERT(false, "not implemented yet");
6895
// not implemented yet
6896
return -1; // error
6897
}
6898
6899
if (!analyzeBoolTable(bv, retSameExit, boolTable, &defBV, defNode, ignoreNode, CHARBVOFFSET, ALLOCCHARBVSIZE)) return -1; // error
6900
6901
TR_BitVectorIterator bvi(*bv[T->getExitNode()->getID()]);
6902
int32_t count = 0;
6903
6904
// Create the function table from the BitVector of the exit node
6905
while (bvi.hasMoreElements())
6906
{
6907
int32_t nextElement = bvi.getNextElement() - CHARBVOFFSET;
6908
TR_ASSERT(0 <= nextElement && nextElement < 65536, "error!!!");
6909
table65536[nextElement] = 1;
6910
count ++;
6911
}
6912
if (trace())
6913
{
6914
static char *traceCharBoolTable = feGetEnv("traceBoolTable");
6915
6916
if (count < 1 || count > 65535 || traceCharBoolTable)
6917
{
6918
traceMsg(comp(), "analyzeByteBoolTable: count is %d\n",count);
6919
ListIterator<TR_CISCNode> pi(_candidateRegion);
6920
TR_CISCNode *pn;
6921
traceMsg(comp(), "Predecessors of the exit node:\n ID:count\n");
6922
for (pn = pi.getFirst(); pn; pn = pi.getNext())
6923
{
6924
int32_t id = pn->getID();
6925
if (getT2PheadRep(id) == boolTable)
6926
{
6927
traceMsg(comp(), "%3d:%3d:", id, bv[id]->elementCount());
6928
bv[id]->print(comp());
6929
traceMsg(comp(), "\n");
6930
}
6931
}
6932
}
6933
}
6934
6935
return count;
6936
}
6937
6938
6939
//*****************************************************************************
6940
// It sets the cold flags to the all blocks in _bblistBody.
6941
//*****************************************************************************
6942
void
6943
TR_CISCTransformer::setColdLoopBody()
6944
{
6945
ListIterator<TR::Block> bi(&_bblistBody);
6946
TR::Block *b;
6947
for (b = bi.getFirst(); b; b = bi.getNext())
6948
{
6949
b->setIsCold();
6950
b->setFrequency(-1);
6951
}
6952
}
6953
6954
6955
//*****************************************************************************
6956
// It get minimum and maximum of byte code index within the list l.
6957
// Note: Please initialize minIndex and maxIndex in caller!!
6958
// The return value means whether the result includes inlined region.
6959
//*****************************************************************************
6960
bool
6961
TR_CISCTransformer::getBCIndexMinMax(List<TR_CISCNode> *l, int32_t *_minIndex, int32_t *_maxIndex, int32_t *_minLN, int32_t *_maxLN, bool allowInlined)
6962
{
6963
int32_t minIndex = *_minIndex;
6964
int32_t maxIndex = *_maxIndex;
6965
int32_t minLN = *_minLN;
6966
int32_t maxLN = *_maxLN;
6967
ListIterator<TR_CISCNode> ni(l);
6968
TR_CISCNode *n;
6969
TR::Node *tn;
6970
bool includeInline = false;
6971
for (n = ni.getFirst(); n; n = ni.getNext())
6972
{
6973
ListElement<TrNodeInfo> *le = n->getTrNodeInfo()->getListHead();
6974
if (le)
6975
{
6976
tn = le->getData()->_node;
6977
bool go = true;
6978
if (tn->getInlinedSiteIndex() != -1)
6979
{
6980
if (allowInlined)
6981
includeInline = true;
6982
else
6983
go = false;
6984
}
6985
if (go)
6986
{
6987
int bcIndex = tn->getByteCodeIndex();
6988
if (minIndex > bcIndex) minIndex = bcIndex;
6989
if (maxIndex < bcIndex) maxIndex = bcIndex;
6990
int LN = comp()->getLineNumber(tn);
6991
if (minLN > LN) minLN = LN;
6992
if (maxLN < LN) maxLN = LN;
6993
}
6994
}
6995
}
6996
*_minIndex = minIndex;
6997
*_maxIndex = maxIndex;
6998
*_minLN = minLN;
6999
*_maxLN = maxLN;
7000
return includeInline;
7001
}
7002
7003
7004
//*****************************************************************************
7005
// It shows candidates of input code that cannot be transformed to idioms.
7006
//*****************************************************************************
7007
void
7008
TR_CISCTransformer::showCandidates()
7009
{
7010
if (!isShowingCandidates()) return;
7011
FILE *fp = stderr;
7012
int32_t minIndex = _candidatesForShowing.getMinBCIndex();
7013
int32_t maxIndex = _candidatesForShowing.getMaxBCIndex();
7014
int32_t minLN = _candidatesForShowing.getMinLineNumber();
7015
int32_t maxLN = _candidatesForShowing.getMaxLineNumber();
7016
if (minIndex <= maxIndex)
7017
{
7018
ListIterator<TR_CISCGraph> idiomI(_candidatesForShowing.getListIdioms());
7019
TR_CISCGraph *p;
7020
fprintf(fp, "!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
7021
fprintf(fp, "Candidate is found for ");
7022
int count = 0;
7023
for (p = idiomI.getFirst(); p; p = idiomI.getNext())
7024
{
7025
if (count != 0) fprintf(fp, ",");
7026
fprintf(fp, "%s", p->getTitle());
7027
count ++;
7028
}
7029
fprintf(fp, " (%s) in %s",
7030
comp()->getHotnessName(comp()->getMethodHotness()),
7031
_T->getTitle());
7032
#if SHOW_BCINDICES
7033
fprintf(fp, "\t bcindex is %d - %d, linenumber is %d - %d.", minIndex, maxIndex, minLN, maxLN);
7034
#endif
7035
fprintf(fp, "\n");
7036
}
7037
}
7038
7039
7040
7041
//*****************************************************************************
7042
// It registers candidates of input code that cannot be transformed to idioms.
7043
//*****************************************************************************
7044
void
7045
TR_CISCTransformer::registerCandidates()
7046
{
7047
if (!isShowingCandidates()) return;
7048
ListIterator<TR_CISCNodeRegion> ri(&_candidatesForRegister);
7049
TR_CISCNodeRegion *r;
7050
int32_t minIndex, maxIndex;
7051
int32_t minLN, maxLN;
7052
minIndex = 0x7fffffff;
7053
maxIndex = -minIndex;
7054
minLN = 0x7fffffff;
7055
maxLN = -minIndex;
7056
for (r = ri.getFirst(); r; r = ri.getNext())
7057
{
7058
getBCIndexMinMax(r, &minIndex, &maxIndex, &minLN, &maxLN, false);
7059
}
7060
if (minIndex <= maxIndex) _candidatesForShowing.add(_P, minIndex, maxIndex, minLN, maxLN);
7061
}
7062
7063
7064
//*****************************************************************************
7065
// It returns the following conditions:
7066
// * _T2P_NULL: There is no pattern nodes corresponding to the target node t.
7067
// (regardless whether p is null or non-null)
7068
//
7069
// If p is non-null,
7070
// * _T2P_NotMatch: Cannot find any relationships between t and p
7071
// * _T2P_MatchAndSingle: The node t corresponds to ONLY the node p. (= _T2P_MatchMask | _T2P_Single)
7072
// * _T2P_MatchAndMultiple: The node t corresponds to the node p, (= _T2P_MatchMask | _T2P_Multiple)
7073
// but there is another candidate.
7074
// If p is null,
7075
// * _T2P_Single: There is a single pattern node corresponding to the node t.
7076
// * _T2P_Multiple: There are multiple pattern nodes corresponding to the node t.
7077
//*****************************************************************************
7078
CISCT2PCond
7079
TR_CISCTransformer::analyzeT2P(TR_CISCNode *t, TR_CISCNode *p)
7080
{
7081
//CISCT2PCond ret;
7082
int32_t tid = t->getID();
7083
List<TR_CISCNode> *l = _T2P + tid;
7084
TR_CISCNode *t2p;
7085
if (l->isEmpty())
7086
{
7087
return _T2P_NULL;
7088
}
7089
else if (l->isSingleton())
7090
{
7091
t2p = l->getListHead()->getData();
7092
if (!p) return _T2P_Single;
7093
return (p == t2p) ? _T2P_MatchAndSingle : _T2P_NotMatch;
7094
}
7095
else
7096
{
7097
if (!p) return _T2P_Multiple;
7098
ListIterator<TR_CISCNode> t2pi(l);
7099
for (t2p = t2pi.getFirst(); t2p; t2p = t2pi.getNext())
7100
{
7101
if (p == t2p) return _T2P_MatchAndMultiple;
7102
}
7103
return _T2P_NotMatch;
7104
}
7105
}
7106
7107
7108
//*****************************************************************************
7109
// It analyzes whether the pattern node TR_arrayindex corresponds to
7110
// an induction variable V or V + something in an input graph.
7111
//*****************************************************************************
7112
bool
7113
TR_CISCTransformer::analyzeOneArrayIndex(TR_CISCNode *arrayindex, TR::SymbolReference *inductionVariableSymRef)
7114
{
7115
List<TR_CISCNode> *l = getP2T() + arrayindex->getID();
7116
if (l->isEmpty())
7117
{
7118
if (arrayindex->isOptionalNode()) return true;
7119
else return false;
7120
}
7121
else if (!l->isSingleton()) return false;
7122
TR_CISCNode *t = l->getListHead()->getData();
7123
if (t->getOpcode() == TR::iadd) // check induction variable + something
7124
{
7125
bool ret = false;
7126
TR_CISCNode *c;
7127
c = t->getChild(0);
7128
if (c->getOpcode() == TR::iload)
7129
{
7130
TR::SymbolReference *symref = c->getHeadOfTrNodeInfo()->_node->getSymbolReference();
7131
if (symref == inductionVariableSymRef) ret = true;
7132
}
7133
if (!ret)
7134
{
7135
c = t->getChild(1);
7136
if (c->getOpcode() == TR::iload)
7137
{
7138
TR::SymbolReference *symref = c->getHeadOfTrNodeInfo()->_node->getSymbolReference();
7139
if (symref == inductionVariableSymRef) ret = true;
7140
}
7141
}
7142
if (!ret) return false;
7143
}
7144
else if (t->getOpcode() != TR_variable)
7145
{
7146
return false;
7147
}
7148
return true;
7149
}
7150
7151
7152
// Check if all of the node TR_arrayindex correspond to V or V+something
7153
bool
7154
TR_CISCTransformer::analyzeArrayIndex(TR::SymbolReference *inductionVariableSymRef)
7155
{
7156
// check each array index
7157
for (int32_t i = 0; ; i++)
7158
{
7159
TR_CISCNode *arrayindex = _P->getCISCNode(TR_arrayindex, true, i);
7160
if (!arrayindex) break; // end
7161
7162
if (!analyzeOneArrayIndex(arrayindex, inductionVariableSymRef)) return false;
7163
}
7164
return true;
7165
}
7166
7167
7168
// Count valid nodes where the node TR_arrayindex corresponds to V or V+something
7169
int32_t
7170
TR_CISCTransformer::countGoodArrayIndex(TR::SymbolReference *inductionVariableSymRef)
7171
{
7172
int32_t ret = 0;
7173
// check each array index
7174
for (int32_t i = 0; ; i++)
7175
{
7176
TR_CISCNode *arrayindex = _P->getCISCNode(TR_arrayindex, true, i);
7177
if (!arrayindex)
7178
{
7179
if (i == 0) ret = -1; // no TR_arrayindex in the pattern
7180
break; // end
7181
}
7182
7183
if (analyzeOneArrayIndex(arrayindex, inductionVariableSymRef)) ret ++;
7184
}
7185
return ret;
7186
}
7187
7188
7189
//*****************************************************************************
7190
// It performs very simple optimizations using UD/DU chains.
7191
// Currently, it performs:
7192
// (1) redundant BNDCHK elimination. necessary for very early phase, such as in the earlyGlobalOpts phase
7193
//*****************************************************************************
7194
bool
7195
TR_CISCTransformer::simpleOptimization()
7196
{
7197
TR_ASSERT(_T->isSetUDDUchains(), "please call simpleOptimization() after executing importUDchains()!");
7198
ListIterator<TR_CISCNode> ni(_T->getOrderByData());
7199
TR_CISCNode *n, *ch, *def;
7200
List<TR_CISCNode> *l;
7201
TR_CISCNode quasiConst2(trMemory(), TR_quasiConst2, TR::NoType, 0, 0, 0, 0);
7202
7203
for (n = ni.getFirst(); n; n = ni.getNext())
7204
{
7205
if (!n->isNegligible())
7206
{
7207
switch(n->getOpcode())
7208
{
7209
case TR::BNDCHK:
7210
// check whether the size is greater or equal to 256
7211
// and the array index comes from TR::bu2i.
7212
ch = n->getChild(0);
7213
if (ch->getOpcode() == TR::iconst)
7214
{
7215
if (ch->getOtherInfo() >= 256)
7216
{
7217
ch = n->getChild(1);
7218
switch(ch->getOpcode())
7219
{
7220
case TR::iload:
7221
def = ch->getNodeIfSingleChain();
7222
if (def)
7223
{
7224
if ((def->getNumChildren() > 0) && def->getChild(0)) // There is a bug where def is a TR_entrynode, so getChild(0) is null.
7225
{
7226
switch(def->getChild(0)->getOpcode())
7227
{
7228
case TR::bu2i:
7229
n->setIsNegligible(); // because the range is 0 - 255
7230
break;
7231
}
7232
}
7233
}
7234
break;
7235
case TR::bu2i:
7236
n->setIsNegligible(); // because the range is 0 - 255
7237
break;
7238
}
7239
}
7240
}
7241
break;
7242
7243
default:
7244
if (!n->isOutsideOfLoop())
7245
{
7246
if (n->isStoreDirect())
7247
{
7248
ListIterator<TR_CISCNode> useI(n->getChains());
7249
bool includeExitNode = false;
7250
for (ch = useI.getFirst(); ch; ch = useI.getNext())
7251
{
7252
if (ch->getDagID() != n->getDagID())
7253
{
7254
includeExitNode = true;
7255
break;
7256
}
7257
}
7258
if (!includeExitNode) n->setIsNegligible();
7259
}
7260
}
7261
if (!n->isNegligible()) // Still need to analysis?
7262
{
7263
if (quasiConst2.isEqualOpc(n) &&
7264
n->getParents()->isSingleton())
7265
{
7266
TR_CISCNode *parent = n->getHeadOfParents();
7267
if (parent->getOpcode() == TR::iadd)
7268
{
7269
l = _T2P + parent->getID();
7270
if (l->isSingleton() &&
7271
l->getListHead()->getData()->getOpcode() == TR_arrayindex)
7272
{
7273
n->setIsNegligible();
7274
}
7275
}
7276
}
7277
}
7278
break;
7279
}
7280
}
7281
}
7282
return true;
7283
}
7284
7285
7286
//*****************************************************************************
7287
// get the hash value of TR_CISCNodeRegion
7288
//*****************************************************************************
7289
uint64_t
7290
TR_CISCTransformer::getHashValue(TR_CISCNodeRegion *r)
7291
{
7292
uint64_t ret = 0;
7293
int count = 0;
7294
ListIterator<TR_CISCNode> ri(r);
7295
TR_CISCNode *n;
7296
for (n = ri.getFirst(); n; n = ri.getNext())
7297
{
7298
int i = count % 74;
7299
int bigshift = i % 5;
7300
int smallshift = i / 5;
7301
int shiftcount = bigshift * 10 + smallshift;
7302
ret += (uint64_t)n->getOpcode() << (uint64_t)shiftcount;
7303
count++;
7304
}
7305
return ret;
7306
}
7307
7308
7309
//*****************************************************************************
7310
// It analyzes whether we can convert the loop ArrayCmpLen in String.compareTo
7311
// to either ArrayCmpSign or ArrayCmp.
7312
//*****************************************************************************
7313
bool
7314
TR_CISCTransformer::canConvertArrayCmpSign(TR::Node *storeNode, List<TR::TreeTop> *compareIfs, bool *canConvertToArrayCmp)
7315
{
7316
static int disable = -1;
7317
if (disable < 0)
7318
{
7319
char *p = feGetEnv("DISABLE_CONVERTCMPSIGN");
7320
disable = p ? 1 : 0;
7321
}
7322
if (disable) return false;
7323
static int disableCMP = -1;
7324
if (disableCMP < 0)
7325
{
7326
char *p = feGetEnv("DISABLE_CONVERTCMP");
7327
disableCMP = p ? 1 : 0;
7328
}
7329
7330
TR_ASSERT(storeNode->getOpCode().isStoreDirect(), "error");
7331
TR_UseDefInfo *useDefInfo = getUseDefInfo();
7332
int32_t useDefIndex = storeNode->getUseDefIndex();
7333
if (useDefIndex == 0) return true;
7334
TR_ASSERT(useDefInfo->isDefIndex(useDefIndex), "error!");
7335
TR_UseDefInfo::BitVector info(comp()->allocator());
7336
useDefInfo->getUsesFromDef(info, useDefIndex);
7337
if (!info.IsZero())
7338
{
7339
TR_UseDefInfo::BitVector::Cursor cursor(info);
7340
bool convertToArrayCmp = true;
7341
for (cursor.SetToFirstOne(); cursor.Valid(); cursor.SetToNextOne())
7342
{
7343
int32_t useIndex = (int32_t) cursor + useDefInfo->getFirstUseIndex();
7344
TR_ASSERT(useDefInfo->isUseIndex(useIndex), "error!");
7345
TR::Node *useNode = useDefInfo->getNode(useIndex);
7346
if (useNode->getReferenceCount() > 1)
7347
{
7348
if (trace()) traceMsg(comp(), "canConvertArrayCmpSign failed because ReferenceCount > 1. %p\n",useNode);
7349
return false;
7350
}
7351
TR::Node *parentNode = NULL;
7352
int32_t retChildNum = -1;
7353
TR_CISCNode *useCISCNode = _T->getCISCNode(useNode);
7354
TR::TreeTop *foundTT = NULL;
7355
if (useCISCNode)
7356
{
7357
if (useCISCNode->getParents()->isSingleton())
7358
{
7359
TR_CISCNode *parentCISCNode = useCISCNode->getHeadOfParents();
7360
if (parentCISCNode->getTrNodeInfo()->isSingleton())
7361
{
7362
parentNode = parentCISCNode->getHeadOfTrNode();
7363
foundTT = parentCISCNode->getHeadOfTreeTop();
7364
if (parentNode->getChild(0) == useNode)
7365
retChildNum = 0;
7366
else if (parentNode->getChild(1) == useNode)
7367
retChildNum = 1;
7368
else
7369
parentNode = 0;
7370
}
7371
}
7372
}
7373
else
7374
{
7375
_useTreeTopMap.buildAllMap();
7376
foundTT = _useTreeTopMap.findParentTreeTop(useNode);
7377
if (NULL == foundTT || !searchNodeInTrees(foundTT->getNode(), useNode, &parentNode, &retChildNum))
7378
{
7379
if (trace()) traceMsg(comp(), "canConvertArrayCmpSign failed because searchNodeInTrees failed. UseNode: %p with corresponding TreeTop: %p\n",useNode, foundTT);
7380
return false;
7381
}
7382
}
7383
if (!parentNode)
7384
{
7385
if (trace()) traceMsg(comp(), "canConvertArrayCmpSign failed because parentNode is NULL. %p\n",useNode);
7386
return false;
7387
}
7388
TR_ASSERT(foundTT, "error!");
7389
if (parentNode->getOpCode().isStoreDirect())
7390
{
7391
if (!canConvertArrayCmpSign(parentNode, compareIfs, &convertToArrayCmp))
7392
{
7393
if (trace()) traceMsg(comp(), "canConvertArrayCmpSign failed because canConvertArrayCmpSign(p) failed. %p\n",useNode);
7394
return false;
7395
}
7396
}
7397
else if (parentNode->getOpCode().isBooleanCompare())
7398
{
7399
TR_ASSERT(retChildNum == 0 || retChildNum == 1, "error");
7400
TR::Node *theOtherChild = parentNode->getChild(1-retChildNum);
7401
if (theOtherChild->getInt() != 0 ||
7402
theOtherChild->getOpCodeValue() != TR::iconst)
7403
{
7404
if (trace()) traceMsg(comp(), "canConvertArrayCmpSign failed because theOtherChild is not iconst 0. %p\n",useNode);
7405
return false;
7406
}
7407
if (compareIfs) compareIfs->add(foundTT);
7408
switch(parentNode->getOpCodeValue())
7409
{
7410
case TR::icmpeq:
7411
case TR::icmpne:
7412
case TR::ificmpeq:
7413
case TR::ificmpne:
7414
break; // OK!
7415
default:
7416
if (trace())
7417
{
7418
traceMsg(comp(), "convertArrayCmp failed because parentNode is %s. %x\n",
7419
parentNode->getOpCode().getName(),
7420
useNode,parentNode);
7421
}
7422
convertToArrayCmp = false;
7423
break;
7424
}
7425
}
7426
else
7427
{
7428
if (trace())
7429
{
7430
traceMsg(comp(), "canConvertArrayCmpSign failed because unhandled opcode %s. %x %x\n",
7431
parentNode->getOpCode().getName(),useNode,parentNode);
7432
}
7433
return false;
7434
}
7435
}
7436
if (canConvertToArrayCmp) *canConvertToArrayCmp = convertToArrayCmp;
7437
}
7438
if (disableCMP) *canConvertToArrayCmp = false;
7439
#if VERBOSE
7440
if (*canConvertToArrayCmp)
7441
printf("!!! canConvertToArrayCmp %s\n",_T->getTitle());
7442
else
7443
printf("!!! canConvertToArrayCmpSign %s\n",_T->getTitle());
7444
#endif
7445
return true;
7446
}
7447
7448
7449
7450
bool
7451
TR_CISCTransformer::computeTopologicalEmbedding(TR_CISCGraph *P, TR_CISCGraph *T)
7452
{
7453
TR::SimpleRegex *disabledPatterns = comp()->getOptions()->getDisabledIdiomPatterns();
7454
if (disabledPatterns && TR::SimpleRegex::match(disabledPatterns, P->getTitle()))
7455
{
7456
if (trace())
7457
traceMsg(comp(), "%s is disabled by disabledIdiomPatterns={}\n", P->getTitle());
7458
return false;
7459
}
7460
7461
//FIXME: improve this
7462
if (!T->testAllAspects(P))
7463
{
7464
if (trace())
7465
traceMsg(comp(), "%s is skipped since graph properties do not match (%08x)\n", P->getTitle(),P->getAspectsValue());
7466
return false; // No need to analyze
7467
}
7468
if (T->testAnyNoAspects(P))
7469
{
7470
if (trace())
7471
traceMsg(comp(), "%s is skipped due to existence of testAnyNoAspects (%08x)\n",P->getTitle(),P->getNoAspectsValue());
7472
return false; // No need to analyze
7473
}
7474
if (!T->meetMinCounts(P))
7475
{
7476
if (trace())
7477
traceMsg(comp(), "%s is skipped due to failure of meetMinCounts (%d %d %d)\n",P->getTitle(),
7478
P->getAspects()->getIfCount(), P->getAspects()->getIndirectLoadCount(), P->getAspects()->getIndirectStoreCount());
7479
return false; // No need to analyze
7480
}
7481
7482
// avoid analyzing very large graphs
7483
if (T->getNumNodes() >= IDIOM_SIZE_FACTOR*P->getNumNodes())
7484
{
7485
if (trace())
7486
traceMsg(comp(), "%s is skipped due to loop being very large\n", P->getTitle());
7487
return false; // No need to analyze
7488
}
7489
7490
#if !STRESS_TEST
7491
//FIXME: add TR_EnableIdiomRecognitionWarm to options
7492
if (1)//!_compilation->getOption(TR_EnableIdiomRecognitionWarm))
7493
{
7494
if (T->getHotness() < P->getHotness())
7495
{
7496
if (trace())
7497
traceMsg(comp(), "%s is skipped due to hotness\n",P->getTitle());
7498
return false; // No need to analyze
7499
}
7500
}
7501
bool incorrectBBFreqLevel = (T->getHotness() == veryHot);
7502
if (!incorrectBBFreqLevel)
7503
{
7504
if (P->isHighFrequency() && !T->isHighFrequency())
7505
{
7506
if (trace())
7507
traceMsg(comp(), "%s is skipped due to the rarely iterated loop (!isHighFrequency)\n",P->getTitle());
7508
return false; // No need to analyze
7509
}
7510
}
7511
if (T->getHotness() > warm) // Because IR is called only one time at the warm level, skip this check.
7512
{
7513
if (isAfterVersioning() ? P->isInhibitAfterVersioning() :
7514
P->isInhibitBeforeVersioning())
7515
{
7516
if (trace())
7517
traceMsg(comp(), "%s is skipped due to loop versioning check\n",P->getTitle());
7518
return false; // No need to analyze
7519
}
7520
}
7521
#endif
7522
7523
7524
7525
if (trace())
7526
{
7527
traceMsg(comp(), "loopid %d: ", _bblistBody.getListHead()->getData()->getNumber());
7528
P->dump(comp()->getOutFile(), comp());
7529
}
7530
_P = P;
7531
_T = T;
7532
_numPNodes = P->getNumNodes();
7533
_numTNodes = T->getNumNodes();
7534
initTopologicalEmbedding();
7535
7536
// Step 1 computes embedding information for an input data dependence graph.
7537
//
7538
if (trace())
7539
traceMsg(comp(), "Computing embedding info for idiom %s in loop %d\n", P->getTitle(), _bblistBody.getListHead()->getData()->getNumber());
7540
if (showMesssagesStdout()) printf("Idiom: loop %d, %s\n",_bblistBody.getListHead()->getData()->getNumber(),
7541
P->getTitle());
7542
_sizeResult = _numPNodes * _numTNodes * sizeof(*_embeddedForData);
7543
_embeddedForData = (uint8_t*)trMemory()->allocateMemory(_sizeResult, stackAlloc);
7544
if (!computeEmbeddedForData()) return false; // It cannot find all of the idiom nodes.
7545
if (showMesssagesStdout()) printf("find1 %s\n", P->getTitle());
7546
if (trace())
7547
traceMsg(comp(), "Detected IL nodes in loop for idiom %s\n", P->getTitle());
7548
7549
// Step 2 computes embedding information for an input control flow graph.
7550
//
7551
_embeddedForCFG = (uint8_t*)trMemory()->allocateMemory(_sizeResult, stackAlloc);
7552
_sizeDE = _numPNodes * sizeof(*_DE);
7553
_EM = (uint8_t*)trMemory()->allocateMemory(_sizeResult, stackAlloc);
7554
_DE = (uint8_t*)trMemory()->allocateMemory(_sizeDE, stackAlloc);
7555
if (!computeEmbeddedForCFG()) return false; // It cannot find all of the idiom nodes.
7556
if (showMesssagesStdout()) printf("find2 %s\n", P->getTitle());
7557
if (trace())
7558
traceMsg(comp(), "finished topological embedding for idiom %s\n", P->getTitle());
7559
7560
// Step 3 creates P2T and T2P tables from embedding information.
7561
// P and T denote Pattern and Target, respectively.
7562
// We can use them to find target nodes from pattern nodes, and vice versa.
7563
//
7564
_sizeP2T = _numPNodes * sizeof(*_P2T);
7565
_P2T = (List<TR_CISCNode> *)trMemory()->allocateMemory(_sizeP2T, stackAlloc);
7566
_sizeT2P = _numTNodes * sizeof(*_T2P);
7567
_T2P = (List<TR_CISCNode> *)trMemory()->allocateMemory(_sizeT2P, stackAlloc);
7568
if (!makeLists()) return false; // a variable corresponds to multiple nodes
7569
if (showMesssagesStdout()) printf("find3 %s\n", P->getTitle());
7570
7571
// Step 4 transforms the target graph if necessary and
7572
// checks that both graphs are exactly matched.
7573
//
7574
_candidatesForShowing.init();
7575
7576
// Import UD/DU information of TR::Node to TR_CISCNode._chain
7577
//
7578
T->importUDchains(comp(), _useDefInfo);
7579
7580
// It performs very simple optimizations using UD/DU chains.
7581
// Currently, it performs:
7582
// (1) redundant BNDCHK elimination.
7583
simpleOptimization();
7584
if (trace())
7585
{
7586
T->dump(comp()->getOutFile(), comp());
7587
}
7588
7589
// Analyze whether each candidate of array header constant is appropriate compared to the idiom.
7590
// Because the array header size is sometimes modified by constant folding
7591
// e.g. When AH is -24 for a[i], AH is modified to -25 for a[i+1]
7592
// If the analysis fails, it'll invalidate that node.
7593
//
7594
if (P->isRequireAHconst()) analyzeArrayHeaderConst();
7595
7596
// Analyze relationships for parents, children, predecessors, successors.
7597
// They are represented to four flags.
7598
//
7599
analyzeConnection();
7600
7601
// Based on above four relationships, we extract the region that matches the idiom graph.
7602
// If all nodes in the idiom graph are not included in the region, it returns 0.
7603
//
7604
_candidateRegion = extractMatchingRegion();
7605
if (!_candidateRegion ||
7606
!verifyCandidate() || // all blocks of the loop body are not included in the _candidateRegion
7607
embeddingHasConflictingBranches())
7608
{
7609
if (trace()) traceMsg(comp(), "computeTopologicalEmbedding: Graph transformations failed. (step 3)\n\n");
7610
registerCandidates();
7611
_T->restoreListsDuplicator();
7612
return false;
7613
}
7614
if (showMesssagesStdout()) printf("find4 %s\n", P->getTitle());
7615
7616
//***************************************************************************************
7617
// Start to transform actual code (TR::Block, TR::TreeTop, TR::Node, and so on...)
7618
//***************************************************************************************
7619
resetFlags();
7620
TransformerPtr transformer = P->getTransformer();
7621
if (performTransformation(comp(), "%sReducing loop %d to %s\n", OPT_DETAILS, _bblistBody.getListHead()->getData()->getNumber(),
7622
P->getTitle()) && !transformer(this))
7623
{
7624
if (trace()) traceMsg(comp(), "computeTopologicalEmbedding: IL Transformer failed. (step 4)\n\n");
7625
registerCandidates();
7626
_T->restoreListsDuplicator();
7627
return false; // The transformation fails
7628
}
7629
7630
if (trace() || showMesssagesStdout())
7631
{
7632
char *bcinfo = "";
7633
#if SHOW_BCINDICES
7634
char tmpbuf[256];
7635
int32_t minIndex, maxIndex;
7636
int32_t minLN, maxLN;
7637
minIndex = 0x7fffffff;
7638
maxIndex = -minIndex;
7639
minLN = 0x7fffffff;
7640
maxLN = -minIndex;
7641
bool inlined = getBCIndexMinMax(_candidateRegion, &minIndex, &maxIndex, &minLN, &maxLN, true);
7642
if (minIndex <= maxIndex)
7643
{
7644
sprintf(tmpbuf, ", bcindex %" OMR_PRIu32 " - %" OMR_PRIu32 " linenumber %" OMR_PRIu32 " - %" OMR_PRIu32 "%s.", minIndex, maxIndex, minLN, maxLN, inlined ? " (inlined)" : "");
7645
bcinfo = tmpbuf;
7646
}
7647
#endif
7648
#if SHOW_STATISTICS
7649
if (showMesssagesStdout())
7650
printf("!! Hash=0x%" OMR_PRIx64 " %s %s\n", getHashValue(_candidateRegion), P->getTitle(), T->getTitle());
7651
#endif
7652
7653
if (trace()) traceMsg(comp(), "***** Transformed *****, %s, %s, %s, loop:%d%s\n",
7654
comp()->getHotnessName(comp()->getMethodHotness()),
7655
P->getTitle(), T->getTitle(),
7656
_bblistBody.getListHead()->getData()->getNumber(),
7657
bcinfo);
7658
if (showMesssagesStdout()) printf("== Transformed == %s, %s, %s, loop:%d%s\n",
7659
comp()->getHotnessName(comp()->getMethodHotness()),
7660
P->getTitle(), T->getTitle(),
7661
_bblistBody.getListHead()->getData()->getNumber(),
7662
bcinfo);
7663
}
7664
7665
TR::DebugCounter::incStaticDebugCounter(comp(),
7666
TR::DebugCounter::debugCounterName(comp(),
7667
"idiomRecognition.matched/%s/(%s)/%s/loop=%d",
7668
P->getTitle(),
7669
comp()->signature(),
7670
comp()->getHotnessName(comp()->getMethodHotness()),
7671
_bblistBody.getListHead()->getData()->getNumber()));
7672
7673
return true;
7674
}
7675
7676
static void
7677
traceConflictingBranches(
7678
TR_CISCTransformer *opt,
7679
TR_CISCNode *pn,
7680
List<TR_CISCNode> *matches)
7681
{
7682
if (!opt->trace())
7683
return;
7684
7685
TR::Compilation *comp = opt->comp();
7686
traceMsg(comp, "Pattern node %d (%s) has conflicting branches:",
7687
pn->getID(),
7688
TR_CISCNode::getName((TR_CISCOps)pn->getOpcode(), comp));
7689
7690
bool first = true;
7691
ListIterator<TR_CISCNode> ti(matches);
7692
for (TR_CISCNode *tn = ti.getFirst(); tn != NULL; tn = ti.getNext())
7693
{
7694
traceMsg(comp, "%s %d (%s)",
7695
first ? "" : ",",
7696
tn->getID(),
7697
TR_CISCNode::getName((TR_CISCOps)tn->getOpcode(), comp));
7698
first = false;
7699
}
7700
7701
traceMsg(comp, "\n");
7702
}
7703
7704
/**
7705
* Determine whether there is a branch in the pattern with multiple matches.
7706
*
7707
* Such branches obscure the control flow of the target loop, which otherwise
7708
* should have to closely match the control flow seen in the pattern.
7709
*
7710
* Booltable nodes are exempt because the condition may be expressed via a
7711
* series of conditionals.
7712
*
7713
* Additionally, a branch may have matches ahead of the loop, but since these
7714
* have no bearing on control flow within the loop, they are allowed. An
7715
* in-loop match is moved to the front of _P2T, ahead of any outside matches.
7716
*
7717
* \return true if there is a troublesome branch
7718
*/
7719
bool
7720
TR_CISCTransformer::embeddingHasConflictingBranches()
7721
{
7722
static const char * const disableEnv =
7723
feGetEnv("TR_disableIdiomRecognitionConflictingBranchTest");
7724
static bool disable = disableEnv != NULL && disableEnv[0] != '\0';
7725
if (disable)
7726
return false;
7727
7728
List<TR_CISCNode> * const dagNodes = _P->getDagId2Nodes();
7729
const int32_t dagCount = _P->getNumDagIds();
7730
for (int32_t dag = 0; dag < dagCount; dag++)
7731
{
7732
ListIterator<TR_CISCNode> pi(&dagNodes[dag]);
7733
for (TR_CISCNode *pn = pi.getFirst(); pn != NULL; pn = pi.getNext())
7734
{
7735
uint32_t op = pn->getOpcode();
7736
bool isIf =
7737
op == (uint32_t)TR_ifcmpall
7738
|| (op < (uint32_t)TR::NumIlOps && pn->getIlOpCode().isIf());
7739
7740
if (!isIf)
7741
continue;
7742
7743
TR_CISCNode *inLoopMatch = NULL;
7744
List<TR_CISCNode> *matches = &_P2T[pn->getID()];
7745
ListIterator<TR_CISCNode> ti(matches);
7746
for (TR_CISCNode *tn = ti.getFirst(); tn != NULL; tn = ti.getNext())
7747
{
7748
if (getCandidateRegion()->isIncluded(tn))
7749
{
7750
if (inLoopMatch != NULL)
7751
{
7752
traceConflictingBranches(this, pn, matches);
7753
TR::DebugCounter::incStaticDebugCounter(comp(),
7754
TR::DebugCounter::debugCounterName(comp(),
7755
"idiomRecognition.rejected/branchConflict/%s/(%s)/%s/loop=%d",
7756
_P->getTitle(),
7757
comp()->signature(),
7758
comp()->getHotnessName(comp()->getMethodHotness()),
7759
_bblistBody.getListHead()->getData()->getNumber()));
7760
return true;
7761
}
7762
inLoopMatch = tn;
7763
}
7764
}
7765
if (inLoopMatch != NULL && matches->getHeadData() != inLoopMatch)
7766
{
7767
// move it to the front
7768
matches->remove(inLoopMatch);
7769
matches->addAfter(inLoopMatch, NULL);
7770
}
7771
}
7772
}
7773
7774
return false;
7775
}
7776
7777
// Iterate through the loop body blocks to remove Bits.keepAlive() and Reference.reachabilityFence() calls.
7778
// The keepAlive() and Reference.reachabilityFence() calls are NOP functions inserted into NIO libraries to keep
7779
// the NIO object and its native ptr alive until after the native pointer accesses.
7780
// Reference.reachabilityFence is a newly introduced Java 9 public API, we will remove this call only if the caller comes
7781
// from the java.nio package, i.e. only for Reference.reachabilityFence calls replacing existing Bits.keepAlive calls
7782
// in the java.nio package. For all other non-nio callers, we will conservatively not remove the call to not break existing
7783
// Idiom Recognition code.
7784
7785
bool
7786
TR_CISCTransformer::removeBitsKeepAliveCalls(List<TR::Block> *body)
7787
{
7788
if (trace())
7789
traceMsg(comp(), "\tScanning for java/nio/Bits.keepAlive(Ljava/lang/Object;)V calls.\n");
7790
ListIterator<TR::Block> bi(body);
7791
TR::Block *block = NULL;
7792
bool foundCall = false;
7793
7794
_BitsKeepAliveList.init();
7795
7796
// Iterate through loop body blcoks
7797
for (block = bi.getFirst(); block != 0; block = bi.getNext())
7798
{
7799
for (TR::TreeTop *tt = block->getEntry(); tt != block->getExit(); tt = tt->getNextTreeTop())
7800
{
7801
TR::Node* node = tt->getNode();
7802
7803
// Look for the following tree:
7804
// treetop
7805
// vcall #481[0x74fd1dc0] static Method[java/nio/Bits.keepAlive(Ljava/lang/Object;)V]
7806
// aload #423[0x00694234] Parm[<parm 1 Ljava/nio/ByteBuffer;>] <flags:"0x4" (X!=0 )/>
7807
if (node->getOpCodeValue() == TR::treetop)
7808
{
7809
node = node->getChild(0);
7810
if (node->getOpCode().isCall())
7811
{
7812
TR::MethodSymbol *methodSym = node->getSymbol()->getMethodSymbol();
7813
// If the call is Bits.keepAlive() which is package private hence can only be called from the nio package,
7814
// or the call is Reference.reachabilityFence() which is public _and_ provided the caller is from the nio package,
7815
// only then we will remove the call node.
7816
if (methodSym->getRecognizedMethod() == TR::java_nio_Bits_keepAlive
7817
|| ((methodSym->getRecognizedMethod() == TR::java_lang_ref_Reference_reachabilityFence)
7818
&& (!strncmp(comp()->fe()->sampleSignature(node->getOwningMethod(), 0, 0, comp()->trMemory()), "java/nio/", 9))))
7819
{
7820
if (trace())
7821
traceMsg(comp(), "\t\tRemoving KeepAlive call found in block %d [%p] @ Node: %p\n",block->getNumber(), block, node);
7822
foundCall = true;
7823
7824
TR_BitsKeepAliveInfo *info = new (comp()->trStackMemory()) TR_CISCTransformer::TR_BitsKeepAliveInfo(block, tt, tt->getPrevTreeTop());
7825
_BitsKeepAliveList.add(info);
7826
7827
// Disconnect treetop from list.
7828
tt->getPrevTreeTop()->setNextTreeTop(tt->getNextTreeTop());
7829
tt->getNextTreeTop()->setPrevTreeTop(tt->getPrevTreeTop());
7830
}
7831
}
7832
}
7833
}
7834
}
7835
return foundCall;
7836
}
7837
7838
// Insert cloned copies of Bits.keepAlive() call into the fast path code of the reduced loop.
7839
void
7840
TR_CISCTransformer::insertBitsKeepAliveCalls(TR::Block * block)
7841
{
7842
if (trace())
7843
traceMsg(comp(), "\tInserting java/nio/Bits.keepAlive(Ljava/lang/Object;)V calls into reduced loop.\n");
7844
7845
ListIterator<TR_BitsKeepAliveInfo> bi(&_BitsKeepAliveList);
7846
7847
for (TR_BitsKeepAliveInfo *info = bi.getFirst(); info != NULL; info = bi.getNext())
7848
{
7849
TR::TreeTop * tt = info->_treeTop;
7850
7851
// Clone the call node
7852
TR::Node *callNode = TR::Node::copy(tt->getNode()->getChild(0));
7853
callNode->decReferenceCount();
7854
callNode->getChild(0)->incReferenceCount();
7855
7856
// Uncommon the child
7857
callNode->setChild(0,callNode->getChild(0)->uncommon());
7858
TR::Node *treetopNode = TR::Node::create(TR::treetop, 1, callNode);
7859
block->append(TR::TreeTop::create(comp(), treetopNode));
7860
7861
if (trace())
7862
{
7863
TR::TreeTop * prev = info->_prevTreeTop;
7864
TR::Block * keepAliveBlock = info->_block;
7865
traceMsg(comp(), "\t\tInserting KeepAlive call clone node: %p from block %d [%p] node: %p into block: %d %p\n", callNode, keepAliveBlock->getNumber(), keepAliveBlock, tt->getNode(), block->getNumber(), block);
7866
}
7867
}
7868
}
7869
7870
// Restore any Bits.keepAlive() calls to their original locations
7871
void
7872
TR_CISCTransformer::restoreBitsKeepAliveCalls()
7873
{
7874
if (trace())
7875
traceMsg(comp(), "\tRestoring for java/nio/Bits.keepAlive(Ljava/lang/Object;)V calls.\n");
7876
ListIterator<TR_BitsKeepAliveInfo> bi(&_BitsKeepAliveList);
7877
7878
for (TR_BitsKeepAliveInfo *info = bi.getFirst(); info != NULL; info = bi.getNext())
7879
{
7880
TR::TreeTop * tt = info->_treeTop;
7881
TR::TreeTop * prev = info->_prevTreeTop;
7882
TR::Block * block = info->_block;
7883
7884
if (trace())
7885
traceMsg(comp(), "\t\tInserting KeepAlive call found in block %d [%p] @ Node: %p\n",block->getNumber(), block, tt->getNode());
7886
prev->insertAfter(tt);
7887
}
7888
}
7889
7890