Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/optimizer/IdiomTransformations.cpp
6000 views
1
/*******************************************************************************
2
* Copyright (c) 2000, 2022 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
23
#include <stdint.h>
24
#include <stdio.h>
25
#include <stdlib.h>
26
#include <string.h>
27
#include "codegen/CodeGenerator.hpp"
28
#include "env/FrontEnd.hpp"
29
#include "compile/Compilation.hpp"
30
#include "compile/SymbolReferenceTable.hpp"
31
#include "control/Options.hpp"
32
#include "control/Options_inlines.hpp"
33
#include "cs2/bitvectr.h"
34
#include "env/CompilerEnv.hpp"
35
#include "env/TRMemory.hpp"
36
#include "env/jittypes.h"
37
#include "il/AutomaticSymbol.hpp"
38
#include "il/Block.hpp"
39
#include "il/DataTypes.hpp"
40
#include "il/ILOpCodes.hpp"
41
#include "il/ILOps.hpp"
42
#include "il/ILProps.hpp"
43
#include "il/Node.hpp"
44
#include "il/Node_inlines.hpp"
45
#include "il/Symbol.hpp"
46
#include "il/SymbolReference.hpp"
47
#include "il/TreeTop.hpp"
48
#include "il/TreeTop_inlines.hpp"
49
#include "infra/Assert.hpp"
50
#include "infra/BitVector.hpp"
51
#include "infra/Cfg.hpp"
52
#include "infra/List.hpp"
53
#include "optimizer/IdiomRecognition.hpp"
54
#include "optimizer/IdiomRecognitionUtils.hpp"
55
#include "optimizer/Optimization_inlines.hpp"
56
#include "optimizer/Optimizer.hpp"
57
#include "optimizer/UseDefInfo.hpp"
58
#include "ras/Debug.hpp"
59
60
#define OPT_DETAILS "O^O NEWLOOPREDUCER: "
61
#define DISPTRACE(OBJ) ((OBJ)->trace())
62
#define VERBOSE(OBJ) ((OBJ)->showMesssagesStdout())
63
#define PNEW new (PERSISTENT_NEW)
64
65
/** \brief
66
* Determines whether we should avoid transforming loops in java/lang/String due to functional issues when String
67
* compression is enabled.
68
*
69
* \param comp
70
* The compilation object.
71
*
72
* \return
73
* <c>true</c> if the transformation should be avoided, <c>false</c> otherwise.
74
*/
75
static bool avoidTransformingStringLoops(TR::Compilation* comp)
76
{
77
static bool cacheInitialized = false;
78
static bool cacheValue = false;
79
80
if (!cacheInitialized)
81
{
82
// TODO: This is a workaround for Java 829 functionality as we switched to using a byte[] backing array in String*.
83
// Remove this workaround once obsolete. Idiom recognition currently does not handle idioms involving char[] in a
84
// compressed string. String compression is technically a Java 9 feature, but for the sake of evaluating performance
85
// we need to be able to run our standard set of benchmarks, most of which do not work under Java 9 at the moment.
86
// This leaves us with the only option to run the respective benchmarks on Java 8 SR5, however in Java 8 SR5 the
87
// java/lang/String.value is of type char[] which will cause functional problems. To avoid these issues we will
88
// disable idiom recognition on Java 8 SR5 if String compression is enabled.
89
TR_OpaqueClassBlock* stringClass = comp->cg()->fej9()->getSystemClassFromClassName("java/lang/String", strlen("java/lang/String"), true);
90
91
if (stringClass != NULL)
92
{
93
// Only initialize the cache after we are certain java/lang/String has been resolved
94
cacheInitialized = true;
95
96
if (comp->cg()->fej9()->getInstanceFieldOffset(stringClass, "value", "[C") != ~0)
97
{
98
cacheValue = IS_STRING_COMPRESSION_ENABLED_VM(static_cast<TR_J9VMBase*>(comp->fe())->getJ9JITConfig()->javaVM);
99
}
100
}
101
}
102
103
return cacheValue;
104
}
105
106
//*****************************************************************************************
107
// It partially peels the loop body to align the top of the region
108
//*****************************************************************************************
109
bool
110
ChangeAlignmentOfRegion(TR_CISCTransformer *trans)
111
{
112
const bool disptrace = DISPTRACE(trans);
113
TR_CISCGraph *P = trans->getP();
114
TR_CISCGraph *T = trans->getT();
115
TR_CISCNode *pTop = P->getEntryNode()->getSucc(0);
116
TR_CISCNode *t;
117
TR_CISCNode *beforeLoop = NULL;
118
bool changed = false;
119
120
TR::Compilation * comp = trans->comp();
121
122
// Find actual pTop. Skip an optional node if there is no corresponding target node.
123
while (trans->getP2TRep(pTop) == NULL)
124
{
125
if (!pTop->isOptionalNode()) return changed;
126
pTop = pTop->getSucc(0);
127
}
128
129
// Try to find pTop in the predecessors of the loop body
130
for (t = T->getEntryNode(); t->isOutsideOfLoop();)
131
{
132
if (trans->analyzeT2P(t, pTop) & _T2P_MatchMask)
133
{
134
TR_CISCNode *chk;
135
for (chk = t->getSucc(0); chk->isOutsideOfLoop(); chk=chk->getSucc(0))
136
{
137
if (!chk->isNegligible() && trans->analyzeT2P(chk) == _T2P_NULL) break; // t is still invalid.
138
}
139
if (!chk->isOutsideOfLoop())
140
{
141
if (disptrace) traceMsg(comp, "ChangeAlignmentOfRegion : (t:%d p:%d) no need to change alignment\n",t->getID(),pTop->getID());
142
return changed; // Find pTop! Already aligned correctly
143
}
144
}
145
if (t->getNumSuccs() < 1)
146
{
147
if (disptrace) traceMsg(comp,"ChangeAlignmentOfRegion : #succs of tID:%d is 0\n", t->getID());
148
return changed; // cannot find either a loop body or pTop in the fallthrough path
149
}
150
beforeLoop = t;
151
switch(t->getOpcode())
152
{
153
case TR::lookup:
154
case TR::table:
155
int i;
156
for (i = t->getNumSuccs(); --i >= 0; )
157
{
158
TR_CISCNode *next_t = t->getSucc(i);
159
if (next_t->getOpcode() == TR::Case &&
160
next_t->getSucc(0) != T->getExitNode())
161
{
162
t = next_t->getSucc(0);
163
goto exit_switch;
164
}
165
}
166
// fall through
167
default:
168
t = t->getSucc(0);
169
break;
170
}
171
exit_switch:;
172
}
173
TR_ASSERT(beforeLoop, "error");
174
if (t->getOpcode() != TR::BBStart) return changed; // already aligned by this transformation before
175
t = t->getSucc(0); // Skip BBStart
176
177
int condT2P = trans->analyzeT2P(t, pTop);
178
if (condT2P & _T2P_MatchMask) return changed; // no need to change alignment
179
180
if (disptrace) traceMsg(comp,"ChangeAlignmentOfRegion : tTop %d, pTop %d\n",t->getID(),pTop->getID());
181
TR_CISCNodeRegion r(T->getNumNodes(), trans->trMemory()->heapMemoryRegion());
182
TR_CISCNode *firstNode = t;
183
TR_CISCNode *lastNode = NULL;
184
// Find the target node corresponding to pTop
185
int branchCount = 0;
186
for (;;)
187
{
188
if (condT2P != _T2P_NULL || !t->isNegligible()) lastNode = t;
189
t = t->getSucc(0);
190
if (t->getOpcode() == TR::BBEnd || t->getOpcode() == TR_exitnode || t == firstNode) return changed; // current limitation. peeling can be performed within the first BB of the body
191
if (t->getIlOpCode().isBranch())
192
if (++branchCount >= 2) return changed; // allow a single branch
193
condT2P = trans->analyzeT2P(t, pTop);
194
if (condT2P & _T2P_MatchMask)
195
break; // the target node corresponding to pTop is found
196
}
197
if (!lastNode) return changed; // the last node of the peeling region
198
TR_CISCNode *foundNode = lastNode->getSucc(0);
199
200
// Find the last non-negligible node
201
if (lastNode->isNegligible())
202
{
203
TR_CISCNode *lastNonNegligble = NULL;
204
for (t = firstNode; ;t = t->getSucc(0))
205
{
206
if (!t->isNegligible()) lastNonNegligble = t;
207
if (t == lastNode) break;
208
}
209
if (!lastNonNegligble) return changed;
210
lastNode = lastNonNegligble;
211
}
212
213
// Add nodes from firstNode to lastNode into the region r
214
if (disptrace) traceMsg(comp, "ChangeAlignmentOfRegion : foundNode %d, lastNode %d\n",foundNode->getID(),lastNode->getID());
215
if (branchCount > 0 &&
216
!lastNode->getIlOpCode().isBranch())
217
{
218
if (disptrace) traceMsg(comp, "Fail: there is a branch in the region. lastNode must be a branch node.\n");
219
return changed;
220
}
221
for (t = firstNode; ;t = t->getSucc(0))
222
{
223
r.append(t);
224
if (t == lastNode) break;
225
}
226
227
// analyze that all parents of every node in the region r are included in the region r.
228
ListIterator<TR_CISCNode> ri(&r);
229
for (t = ri.getFirst(); t; t = ri.getNext()) // each node in the region r
230
{
231
if (t->getOpcode() == TR::aload || t->getOpcode() == TR::iload)
232
{
233
bool noDefInR = true;
234
ListIterator<TR_CISCNode> chain(t->getChains());
235
TR_CISCNode *def;
236
for (def = chain.getFirst(); def; def = chain.getNext())
237
{
238
if (r.isIncluded(def))
239
{
240
noDefInR = false;
241
break;
242
}
243
}
244
if (noDefInR) continue; // If there is no def in r, it ignores this load node.
245
}
246
ListIterator<TR_CISCNode> pi(t->getParents());
247
TR_CISCNode *pn;
248
for (pn = pi.getFirst(); pn; pn = pi.getNext()) // each parent of t
249
{
250
if (!r.isIncluded(pn))
251
{
252
if (disptrace) traceMsg(comp,"ChangeAlignmentOfRegion : There is a parent(%d) of %d in the outside of the region\n", pn->getID(), t->getID());
253
return changed; // fail
254
}
255
}
256
}
257
258
/////////////////////////////
259
// From here, success path //
260
/////////////////////////////
261
T->duplicateListsDuplicator();
262
changed = true;
263
TR_CISCNode *from = r.getListHead()->getData();
264
TR_CISCNode *to = r.getListTail()->getData();
265
if (disptrace)
266
{
267
traceMsg(comp,"ChangeAlignmentOfRegion: Succ[0] of %d will be changed from %d to %d.\n",
268
beforeLoop->getID(),
269
beforeLoop->getSucc(0)->getID(),
270
foundNode->getID());
271
traceMsg(comp,"\tNodes from %d to %d will be added to BeforeInsertionList.\n",
272
from->getID(),to->getID());
273
}
274
TR_ASSERT(r.getListTail()->getData()->getIlOpCode().isTreeTop(), "error");
275
beforeLoop->replaceSucc(0, foundNode); // replace the loop entry with foundNode
276
TR_NodeDuplicator duplicator(comp);
277
for (t = ri.getFirst(); t; t = ri.getNext())
278
{
279
if (t->getIlOpCode().isTreeTop())
280
{
281
TR::Node *rep = t->getHeadOfTrNodeInfo()->_node;
282
if (disptrace)
283
{
284
traceMsg(comp,"add TR::Node 0x%p (tid:%d) to BeforeInsertionList.\n", rep, t->getID());
285
}
286
rep = duplicator.duplicateTree(rep);
287
if (t->getIlOpCode().isIf())
288
{
289
if (t->getOpcode() != rep->getOpCodeValue())
290
{
291
TR::TreeTop *ret;
292
for (ret = t->getHeadOfTreeTop()->getNextTreeTop();
293
ret->getNode()->getOpCodeValue() != TR::BBStart;
294
ret = ret->getNextTreeTop());
295
TR::Node::recreate(rep, (TR::ILOpCodes)t->getOpcode());
296
rep->setBranchDestination(ret);
297
}
298
}
299
trans->getBeforeInsertionList()->append(rep);
300
}
301
}
302
// Move the region ("from" - "to") to the last
303
trans->moveCISCNodes(from, to, NULL);
304
305
if (disptrace && changed)
306
{
307
traceMsg(comp,"After ChangeAlignmentOfRegion\n");
308
T->dump(comp->getOutFile(), comp);
309
}
310
return changed;
311
}
312
313
314
//*****************************************************************************************
315
// Analyze whether we can move the node n to immediately before the nodes in tgt.
316
// Both the node n and a node in tgt must be included in the list l.
317
// If the analysis fails, it will return NULL.
318
// Otherwise, it will return the target node, which must be included in the list tgt.
319
//*****************************************************************************************
320
TR_CISCNode *
321
analyzeMoveNodeForward(TR_CISCTransformer *trans, List<TR_CISCNode> *l, TR_CISCNode *n, List<TR_CISCNode> *tgt)
322
{
323
const bool disptrace = DISPTRACE(trans);
324
ListIterator<TR_CISCNode> ti(l);
325
TR_CISCNode *t;
326
TR_CISCNode *ret = NULL;
327
328
TR::Compilation * comp = trans->comp();
329
330
for (t = ti.getFirst(); t; t = ti.getNext())
331
{
332
if (t == n) break;
333
}
334
TR_ASSERT(t != NULL, "cannot find the node n in the list l!");
335
336
t = ti.getNext();
337
TR_ASSERT(t != NULL, "cannot find any node in tgt in the list l!");
338
if (tgt->find(t)) return NULL; // already moved
339
340
bool go = false;
341
if (n->isStoreDirect())
342
{
343
go = true;
344
}
345
else if (n->getNumChildren() == 2)
346
{
347
if (n->getIlOpCode().isAdd() ||
348
n->getIlOpCode().isSub() ||
349
n->getIlOpCode().isMul() ||
350
n->getIlOpCode().isLeftShift() ||
351
n->getIlOpCode().isRightShift() ||
352
n->getIlOpCode().isShiftLogical() ||
353
n->getIlOpCode().isAnd() ||
354
n->getIlOpCode().isOr() ||
355
n->getIlOpCode().isXor()) // Safe expressions
356
{
357
go = true;
358
if (n->getChild(0)->getOpcode() == TR_variable ||
359
n->getChild(1)->getOpcode() == TR_variable)
360
go = false; // not implemented yet.
361
}
362
}
363
else if (n->getNumChildren() == 1)
364
{
365
if (n->getIlOpCode().isConversion() ||
366
n->getIlOpCode().isNeg()) // Safe expressions
367
{
368
go = true;
369
if (n->getChild(0)->getOpcode() == TR_variable)
370
go = false; // not implemented yet.
371
}
372
}
373
else
374
{
375
if (n->getIlOpCode().isLoadConst())
376
{
377
go = true;
378
}
379
}
380
381
if (go)
382
{
383
List<TR_CISCNode> *chains = n->getChains();
384
List<TR_CISCNode> *parents = n->getParents();
385
TR_CISCNode *specialCareIf = trans->getP()->getSpecialCareNode(0);
386
bool generateCompensation0 = false;
387
while(true)
388
{
389
if (chains->find(t)) break; // it cannot be moved beyond its use/def.
390
if (parents->find(t)) break; // it cannot be moved beyond its parent.
391
392
if (t->getOpcode() == TR::BBStart)
393
{
394
TR::Block *block = t->getHeadOfTrNode()->getBlock();
395
if (block->getPredecessors().size() > 1) return NULL; // It currently analyzes within this BB.
396
}
397
if (t->getNumSuccs() >= 2 && specialCareIf)
398
{
399
bool fail = true;
400
TR_CISCNode *p = trans->getT2Phead(t);
401
if (p &&
402
p == specialCareIf &&
403
t->getSucc(1) == trans->getT()->getExitNode())
404
{
405
// add compensation code into AfterInsertionIdiomList and go ahead
406
TR::Node *trNode = n->getHeadOfTrNode();
407
if (trNode->getOpCode().isTreeTop())
408
{
409
if (trNode->getOpCode().isStoreDirect())
410
{
411
if (!generateCompensation0)
412
{
413
trans->getT()->duplicateListsDuplicator();
414
if (disptrace) traceMsg(comp,"analyzeMoveNodeForward: append the tree of 0x%p into AfterInsertionIdiomList\n", trNode);
415
trans->getAfterInsertionIdiomList(0)->append(trNode->duplicateTree());
416
}
417
fail = false;
418
generateCompensation0 = true;
419
}
420
// else, fail to move
421
}
422
else
423
{
424
fail = false;
425
}
426
}
427
if (fail) break; // It currently analyzes within this BB.
428
}
429
t = ti.getNext();
430
if (t == NULL) break; // cannot find any node in tgt in the list l.
431
ret = t;
432
if (tgt->find(t)) break; // find goal!
433
}
434
}
435
return ret;
436
}
437
438
439
//*****************************************************************************************
440
// It tries to reorder target nodes to match idiom nodes within each BB.
441
//*****************************************************************************************
442
bool
443
reorderTargetNodesInBB(TR_CISCTransformer *trans)
444
{
445
TR_CISCGraph *P = trans->getP();
446
TR_CISCGraph *T = trans->getT();
447
List<TR_CISCNode> *T2P = trans->getT2P(), *P2T = trans->getP2T(), *l;
448
TR_CISCNode *t, *p;
449
bool changed = false;
450
const bool disptrace = DISPTRACE(trans);
451
452
TR::Compilation * comp = trans->comp();
453
454
static int enable = -1;
455
if (enable < 0)
456
{
457
char *p = feGetEnv("DISABLE_REORDER");
458
enable = p ? 0 : 1;
459
}
460
if (!enable) return false;
461
462
TR_BitVector visited(T->getNumNodes(), comp->trMemory());
463
while(true)
464
{
465
ListIterator<TR_CISCNode> ti(T->getNodes());
466
int currentPID = 0x10000;
467
bool anyChanged = false;
468
469
for (t = ti.getFirst(); t; t = ti.getNext())
470
{
471
int tID = t->getID();
472
if (visited.isSet(tID)) continue;
473
visited.set(tID);
474
l = T2P + tID;
475
if (l->isEmpty()) // There is no idiom nodes corresponding to the node t
476
{
477
if (t->isNegligible())
478
{
479
continue; // skip the node t
480
}
481
else
482
{
483
break; // finish this analysis
484
}
485
}
486
int maxPid = -1;
487
ListIterator<TR_CISCNode> pi(l);
488
for (p = pi.getFirst(); p; p = pi.getNext())
489
{
490
if (p->getID() > maxPid) maxPid = p->getID();
491
}
492
if (maxPid >= 0)
493
{
494
if (maxPid <= currentPID)
495
{
496
currentPID = maxPid; // no problem
497
}
498
else
499
{
500
if (t->isOutsideOfLoop()) break; // reordering is currently supported only inside of the loop
501
502
// Try moving the node t forward
503
List<TR_CISCNode> *nextPlist = P2T+maxPid+1;
504
if (disptrace)
505
{
506
ListIterator<TR_CISCNode> nextTi(nextPlist);
507
TR_CISCNode *nextT;
508
traceMsg(comp,"reorderTargetNodesInBB: Try moving the tgt node %d forward until",tID);
509
for (nextT = nextTi.getFirst(); nextT; nextT = nextTi.getNext())
510
{
511
traceMsg(comp," %p(%d)",nextT,nextT->getID());
512
}
513
traceMsg(comp,"\n");
514
}
515
516
// Analyze whether we can move the node t to immediately before the nodes in nextPlist
517
List<TR_CISCNode> *dagList = T->getDagId2Nodes()+t->getDagID();
518
TR_CISCNode *tgt = analyzeMoveNodeForward(trans, dagList, t, nextPlist);
519
if (tgt)
520
{
521
T->duplicateListsDuplicator();
522
// OK, we can move the node t!
523
if (disptrace) traceMsg(comp,"We can move the node %d to %p(%d)\n",tID,tgt,tgt->getID());
524
anyChanged = changed = true;
525
526
trans->moveCISCNodes(t, t, tgt, "reorderTargetNodesInBB");
527
break;
528
}
529
}
530
}
531
}
532
if (!anyChanged) break;
533
}
534
if (disptrace && changed)
535
{
536
traceMsg(comp,"After reorderTargetNodesInBB\n");
537
T->dump(comp->getOutFile(), comp);
538
}
539
return changed;
540
}
541
542
543
//*****************************************************************************************
544
// It replicates a store instruction outside of the loop.
545
// It is specialized to those idioms that include TR_booltable
546
// Input: SpecialCareNode(0) - the TR_booltable in the idiom
547
// ImportantNode(1) - ificmpge for exiting the loop (optional)
548
//*****************************************************************************************
549
bool
550
moveStoreOutOfLoopForward(TR_CISCTransformer *trans)
551
{
552
TR_CISCGraph *P = trans->getP();
553
List<TR_CISCNode> *P2T = trans->getP2T();
554
TR_CISCNode *ixload, *aload, *iload;
555
TR::Compilation *comp = trans->comp();
556
557
TR_CISCNode *boolTable = P->getSpecialCareNode(0); // Note: The opcode isn't always TR_booltable.
558
TR_CISCNode *p = boolTable->getChild(0); // just before TR_booltable, such as b2i
559
560
TR_BitVector findBV(P->getNumNodes(), trans->trMemory(), stackAlloc);
561
findBV.set(boolTable->getID());
562
563
TR_CISCNode *optionalCmp = P->getImportantNode(1); // ificmpge
564
if (optionalCmp && (optionalCmp->getOpcode() == TR::ificmpge || optionalCmp->getOpcode() == TR_ifcmpall))
565
findBV.set(optionalCmp->getID());
566
567
ListIterator<TR_CISCNode> ti(P2T + p->getID());
568
TR_CISCNode *t;
569
TR_CISCNode *storedVariable = NULL;
570
bool success0 = false;
571
TR_ScratchList<TR_CISCNode> targetList(comp->trMemory());
572
for (t = ti.getFirst(); t; t = ti.getNext()) // for each target node corresponding to p
573
{
574
// t is a target node corresponding to p (just before TR_booltable)
575
ListIterator<TR_CISCNode> tParentIter(t->getParents());
576
TR_CISCNode *tParent;
577
for (tParent = tParentIter.getFirst(); tParent; tParent = tParentIter.getNext())
578
{
579
// checking whether tParent is a store instruction
580
if (tParent->isStoreDirect() &&
581
!tParent->isNegligible())
582
{
583
// checking whether all variables of stores are same.
584
if (!storedVariable) storedVariable = tParent->getChild(1);
585
else if (storedVariable != tParent->getChild(1))
586
{
587
if (DISPTRACE(trans)) traceMsg(comp, "moveStoreOutOfLoopForward failed because all variables of stores are not same.\n");
588
success0 = false;
589
goto endSpecial0; // FAIL!
590
}
591
592
// checking whether tParent will reach either boolTable or optionalCmp
593
if (checkSuccsSet(trans, tParent, &findBV))
594
{
595
success0 = true; // success for this t
596
break;
597
}
598
else
599
{
600
if (DISPTRACE(trans)) traceMsg(comp, "moveStoreOutOfLoopForward failed because tParent will not reach either boolTable or optionalCmp.\n");
601
success0 = false;
602
goto endSpecial0; // FAIL!
603
}
604
}
605
}
606
if (tParent) targetList.add(tParent); // add a store instruction
607
}
608
endSpecial0:
609
610
if (targetList.isEmpty())
611
{
612
if (DISPTRACE(trans)) traceMsg(comp, "moveStoreOutOfLoopForward failed because targetList is empty.\n");
613
success0 = false;
614
}
615
// check if descendants of p include an array load
616
if (!getThreeNodesForArray(p, &ixload, &aload, &iload, true))
617
{
618
if (DISPTRACE(trans)) traceMsg(comp, "moveStoreOutOfLoopForward failed because decendents of pid:%d don't include an array load.\n", p->getID());
619
success0 = false;
620
}
621
622
if (success0)
623
{
624
ixload = trans->getP2TRep(ixload);
625
aload = trans->getP2TRep(aload);
626
iload = trans->getP2TRep(iload);
627
if (DISPTRACE(trans)) traceMsg(comp, "moveStoreOutOfLoopForward: Target nodes ixload=%d, aload=%d, iload=%d\n",
628
ixload ? ixload->getID() : -1, aload ? aload->getID() : -1, iload ? iload->getID() : -1);
629
trans->getT()->duplicateListsDuplicator();
630
if (ixload && aload && iload && (iload->isLoadVarDirect() || iload->getOpcode() == TR_variable))
631
{
632
TR::Node *store;
633
TR::Node *conv;
634
TR::Node *storeDup0;
635
TR::Node *storeDup1;
636
TR::Node *convDup;
637
TR::Node *ixloadNode = ixload->getHeadOfTrNodeInfo()->_node;
638
TR::Node *iloadNode = iload->getHeadOfTrNodeInfo()->_node; // index
639
TR::Node *iloadm1Node = createOP2(comp, TR::isub,
640
TR::Node::createLoad(iloadNode, iloadNode->getSymbolReference()),
641
TR::Node::create(iloadNode, TR::iconst, 0, 1));
642
643
// prepare base[index]
644
TR::Node *arrayLoad0 = createArrayLoad(comp, trans->isGenerateI2L(),
645
ixloadNode,
646
aload->getHeadOfTrNodeInfo()->_node,
647
iloadNode,
648
ixloadNode->getSize());
649
650
// prepare base[index-1] (it may not be used.)
651
TR::Node *arrayLoad1 = createArrayLoad(comp, trans->isGenerateI2L(),
652
ixloadNode,
653
aload->getHeadOfTrNodeInfo()->_node,
654
iloadm1Node,
655
ixloadNode->getSize());
656
ti.set(&targetList);
657
t = ti.getFirst();
658
store = t->getHeadOfTrNodeInfo()->_node;
659
conv = store->getChild(0);
660
if (conv->getOpCode().isConversion())
661
{
662
convDup = TR::Node::create(conv->getOpCodeValue(), 1, arrayLoad0);
663
storeDup0 = TR::Node::createStore(store->getSymbolReference(), convDup);
664
convDup = TR::Node::create(conv->getOpCodeValue(), 1, arrayLoad1);
665
storeDup1 = TR::Node::createStore(store->getSymbolReference(), convDup);
666
}
667
else
668
{
669
storeDup0 = TR::Node::createStore(store->getSymbolReference(), arrayLoad0);
670
storeDup1 = TR::Node::createStore(store->getSymbolReference(), arrayLoad1);
671
}
672
trans->getAfterInsertionIdiomList(0)->append(storeDup0); // base[index]
673
trans->getAfterInsertionIdiomList(1)->append(storeDup1); // base[index-1] (it may not be used.)
674
if (VERBOSE(trans)) printf("%s moveStoreOutOfLoopForward\n", trans->getT()->getTitle());
675
if (DISPTRACE(trans)) traceMsg(comp, "moveStoreOutOfLoopForward adds %d into compensation code [0] and [1]\n", t->getID());
676
for (; t; t = ti.getNext()) t->setIsNegligible(); // set negligible to all stores
677
}
678
else
679
success0 = false;
680
}
681
682
return success0;
683
}
684
685
686
//*****************************************************************************************
687
// It analyzes redundant IAND. It is specialized to MEMCPYxxx2Byte, such as MEMCPYChar2Byte.
688
// Input: SpecialCareNode(*) - a set of conversions, such as i2b
689
//*****************************************************************************************
690
bool
691
IANDSpecialNodeTransformer(TR_CISCTransformer *trans)
692
{
693
TR_CISCGraph *P = trans->getP();
694
List<TR_CISCNode> *P2T = trans->getP2T();
695
TR::Compilation *comp = trans->comp();
696
int idx;
697
bool ret = false;
698
699
for (idx = 0; idx < MAX_SPECIALCARE_NODES; idx++)
700
{
701
TR_CISCNode *p = P->getSpecialCareNode(idx);
702
if (!p) break;
703
ListIterator<TR_CISCNode> ti(P2T + p->getID());
704
TR_CISCNode *t;
705
for (t = ti.getFirst(); t; t = ti.getNext())
706
{
707
if (t->getOpcode() != TR::i2b) continue; // not implemented yet for other OPs
708
TR_CISCNode *ch = t->getChild(0);
709
if (ch->isNegligible()) continue;
710
711
// example: the following two IANDs are redundant.
712
// dst = (byte)(((ch & 0xFF00) >> 8) & 0xFF)
713
// ^^^^^^^^ ^^^^^^
714
switch(ch->getOpcode())
715
{
716
case TR::iand:
717
if (!ch->getParents()->isSingleton() ||
718
!testIConst(ch, 1, 0xFF)) return false; // child(1) is "iconst 0xff"
719
ch->setIsNegligible(); // this IAND can be negligible!
720
ret = true;
721
722
ch = ch->getChild(0);
723
if (ch->getOpcode() != TR::ishr && ch->getOpcode() != TR::iushr) break;
724
// fall through if TR::ishr
725
case TR::ishr:
726
case TR::iushr:
727
if (!testIConst(ch, 1, 0x8)) break; // child(1) is "iconst 0x8"
728
729
ch = ch->getChild(0);
730
if (ch->getOpcode() != TR::iand) break;
731
if (!ch->getParents()->isSingleton() ||
732
!testIConst(ch, 1, 0xFF00)) return false; // child(1) is "iconst 0xFF00"
733
ch->setIsNegligible(); // this SHR can be negligible!
734
ret = true;
735
break;
736
}
737
}
738
}
739
return ret;
740
}
741
742
//////////////////////////////////////////////////////////////////////////
743
// utility routines
744
745
static void
746
findIndexLoad(TR::Node *aiaddNode, TR::Node *&index1, TR::Node *&index2, TR::Node *&topLevelIndex)
747
{
748
// iiload
749
// aiadd <-- aiaddNode
750
// aload
751
// isub
752
// imul
753
// iload <-- looking for the index
754
// iconst 4
755
// iconst -16
756
//
757
// -or-
758
// iiload
759
// aiadd
760
// aload
761
// isub
762
// iload
763
// iconst
764
//
765
// -or-
766
// iiload
767
// aiadd <-- aiaddNode
768
// aload
769
// isub
770
// imul
771
// iadd
772
// iload <-- looking for the index
773
// iload <-- looking for the index
774
// iconst 4
775
// iconst -16
776
//
777
// -or-
778
// iiload
779
// aiadd
780
// aload
781
// isub
782
// iadd
783
// iload <-- looking for the index
784
// iload <-- looking for the index
785
// iconst
786
//
787
index1 = NULL;
788
index2 = NULL;
789
topLevelIndex = NULL;
790
TR::Node *addOrSubNode = aiaddNode->getSecondChild();
791
if (addOrSubNode->getOpCode().isAdd() || addOrSubNode->getOpCode().isSub())
792
{
793
TR::Node *grandChild = NULL;
794
if (addOrSubNode->getFirstChild()->getOpCode().isMul())
795
grandChild = addOrSubNode->getFirstChild()->getFirstChild();
796
else
797
grandChild = addOrSubNode->getFirstChild();
798
799
if (grandChild->getOpCodeValue() == TR::i2l)
800
grandChild = grandChild->getFirstChild();
801
802
topLevelIndex = grandChild;
803
804
if (grandChild->getOpCode().hasSymbolReference())
805
{
806
index1 = grandChild;
807
}
808
else if (grandChild->getOpCode().isAdd() || grandChild->getOpCode().isSub())
809
{
810
TR::Node *grandGrandChild1 = grandChild->getFirstChild();
811
TR::Node *grandGrandChild2 = grandChild->getSecondChild();
812
while(grandGrandChild1->getOpCode().isAdd() || grandGrandChild1->getOpCode().isSub())
813
{
814
grandGrandChild2 = grandGrandChild1->getSecondChild();
815
grandGrandChild1 = grandGrandChild1->getFirstChild();
816
}
817
if (grandGrandChild1->getOpCode().hasSymbolReference())
818
{
819
index1 = grandGrandChild1;
820
}
821
if (grandGrandChild2->getOpCode().hasSymbolReference())
822
{
823
index2 = grandGrandChild2;
824
}
825
}
826
}
827
}
828
829
830
// get the iv thats involved in the looptest
831
//
832
static bool
833
usedInLoopTest(TR::Compilation *comp, TR::Node *loopTestNode, TR::SymbolReference *srcSymRef)
834
{
835
TR::Node *ivNode = loopTestNode->getFirstChild();
836
if (ivNode->getOpCode().isAdd() || ivNode->getOpCode().isSub())
837
ivNode = ivNode->getFirstChild();
838
839
if (ivNode->getOpCode().hasSymbolReference())
840
{
841
if (ivNode->getSymbolReference()->getReferenceNumber() == srcSymRef->getReferenceNumber())
842
return true;
843
}
844
else dumpOptDetails(comp, "iv %p in the loop test %p has no symRef?\n", ivNode, loopTestNode);
845
return false;
846
}
847
848
static bool
849
indexContainsArray(TR::Compilation *comp, TR::Node *index, vcount_t visitCount)
850
{
851
if (index->getVisitCount() == visitCount)
852
return false;
853
854
index->setVisitCount(visitCount);
855
856
if (comp->trace(OMR::idiomRecognition))
857
traceMsg(comp, "analyzing node %p\n", index);
858
859
if (index->getOpCode().hasSymbolReference() &&
860
index->getSymbolReference()->getSymbol()->isArrayShadowSymbol())
861
{
862
if (comp->trace(OMR::idiomRecognition))
863
traceMsg(comp, "found array node %p\n", index);
864
return true;
865
}
866
867
for (int32_t i = 0; i < index->getNumChildren(); i++)
868
if (indexContainsArray(comp, index->getChild(i), visitCount))
869
return true;
870
871
return false;
872
}
873
874
875
static bool
876
indexContainsArrayAccess(TR::Compilation *comp, TR::Node *aXaddNode)
877
{
878
if (comp->trace(OMR::idiomRecognition))
879
traceMsg(comp, "axaddnode %p\n", aXaddNode);
880
881
TR::Node *loadNode1, *loadNode2, *topLevelIndex;
882
findIndexLoad(aXaddNode, loadNode1, loadNode2, topLevelIndex);
883
// topLevelIndex now contains the actual expression q in a[q]
884
// if q contains another array access, then we cannot reduce
885
// this loop into an arraycopy
886
// ie. a[b[i]] do not represent linear array accesses
887
//
888
if (comp->trace(OMR::idiomRecognition))
889
traceMsg(comp, "aXaddNode %p topLevelIndex %p\n", aXaddNode, topLevelIndex);
890
vcount_t visitCount = comp->incOrResetVisitCount();
891
if (topLevelIndex)
892
return indexContainsArray(comp, topLevelIndex, visitCount);
893
return false;
894
}
895
896
// isIndexVariableInList checks whether the induction (index) variable symbol(s)
897
// from the given 'node' subtree is found inside 'nodeList'.
898
//
899
// Returns true if
900
// 1. one induction variable symbol is found in the list.
901
// Returns false if
902
// 1. no induction variables are found.
903
// 2. two induction variables found in 'node' tree are both in the list.
904
// i.e. a[i+j]
905
// i++;
906
// j++;
907
// In this case, the access pattern of the array would skip every
908
// other element.
909
static bool
910
isIndexVariableInList(TR::Node *node, List<TR::Node> *nodeList)
911
{
912
TR::Symbol *indexSymbol1 = NULL, *indexSymbol2 = NULL;
913
TR::Node *loadNode1, *loadNode2, *topLevelIndex;
914
915
findIndexLoad(node->getOpCode().isAdd() ? node : node->getFirstChild(),
916
loadNode1, loadNode2, topLevelIndex);
917
if (loadNode1)
918
indexSymbol1 = loadNode1->getSymbolReference()->getSymbol();
919
if (loadNode2)
920
indexSymbol2 = loadNode2->getSymbolReference()->getSymbol();
921
922
bool foundSymbol1 = false, foundSymbol2 = false;
923
924
if (indexSymbol1 || indexSymbol2)
925
{
926
// Search the node list for the index symbol(s).
927
ListIterator<TR::Node> li(nodeList);
928
TR::Node *store;
929
for (store = li.getFirst(); store; store = li.getNext())
930
{
931
TR::Symbol *storeSymbol = store->getSymbolReference()->getSymbol();
932
if (indexSymbol1 == storeSymbol)
933
foundSymbol1 = true;
934
if (indexSymbol2 && indexSymbol2 == storeSymbol)
935
foundSymbol2 = true;
936
}
937
}
938
939
// Return true only if either one symbol is found, but not both.
940
return foundSymbol1 ^ foundSymbol2;
941
}
942
943
944
// for the memCmp transformer
945
//
946
static bool
947
indicesAndStoresAreConsistent(TR::Compilation *comp, TR::Node *lhsSrcNode, TR::Node *rhsSrcNode, TR_CISCNode *lhsNode, TR_CISCNode *rhsNode)
948
{
949
// lhs and rhs indicate the two arrays involved in the comparison test
950
//
951
//
952
TR_ScratchList<TR::Node> variableList(comp->trMemory());
953
if (lhsNode)
954
variableList.add(lhsNode->getHeadOfTrNode());
955
if (rhsNode && rhsNode != lhsNode)
956
variableList.add(rhsNode->getHeadOfTrNode());
957
return (isIndexVariableInList(lhsSrcNode, &variableList) &&
958
isIndexVariableInList(rhsSrcNode, &variableList));
959
}
960
961
static TR::Node* getArrayBase(TR::Node *node)
962
{
963
if (node->getOpCode().hasSymbolReference() &&
964
node->getSymbolReference()->getSymbol()->isArrayShadowSymbol())
965
{
966
node = node->getFirstChild();
967
if (node->getOpCode().isArrayRef()) node = node->getFirstChild();
968
if (node->getOpCode().isIndirect()) node = node->getFirstChild();
969
return node;
970
}
971
return NULL;
972
}
973
974
static bool
975
areArraysInvariant(TR::Compilation *comp, TR::Node *inputNode, TR::Node *outputNode, TR_CISCGraph *T)
976
{
977
if (T)
978
{
979
TR::Node *aNode = getArrayBase(inputNode);
980
TR::Node *bNode = getArrayBase(outputNode);
981
982
if (comp->trace(OMR::idiomRecognition))
983
traceMsg(comp, "aNode = %p bNode = %p\n", aNode, bNode);
984
if (aNode && aNode->getOpCode().isLoadDirect() &&
985
bNode && bNode->getOpCode().isLoadDirect())
986
{
987
TR_CISCNode *aCNode = T->getCISCNode(aNode);
988
TR_CISCNode *bCNode = T->getCISCNode(bNode);
989
990
if (comp->trace(OMR::idiomRecognition))
991
traceMsg(comp, "aC = %p %d bC = %p %d\n", aCNode, aCNode->getID(), bCNode, bCNode->getID());
992
if (aCNode && bCNode)
993
{
994
ListIterator<TR_CISCNode> aDefI(aCNode->getChains());
995
ListIterator<TR_CISCNode> bDefI(bCNode->getChains());
996
TR_CISCNode *ch;
997
for (ch = aDefI.getFirst(); ch; ch = aDefI.getNext())
998
{
999
if (ch->getDagID() == aCNode->getDagID())
1000
{
1001
traceMsg(comp, "def %d found inside loop for %d\n", ch->getID(), aCNode->getID());
1002
return false;
1003
}
1004
}
1005
for (ch = bDefI.getFirst(); ch; ch = bDefI.getNext())
1006
{
1007
if (ch->getDagID() == bCNode->getDagID())
1008
{
1009
traceMsg(comp, "def %d found inside loop for %d\n", ch->getID(), bCNode->getID());
1010
return false;
1011
}
1012
}
1013
}
1014
}
1015
}
1016
return true;
1017
}
1018
1019
1020
// used for a TRTO reduction in java/io/DataOutputStream.writeUTF(String)
1021
//
1022
static TR::Node *
1023
areDefsOnlyInsideLoop(TR::Compilation *comp, TR_CISCTransformer *trans, TR::Node *outputNode)
1024
{
1025
bool extraTrace = DISPTRACE(trans);
1026
1027
if (extraTrace)
1028
traceMsg(trans->comp(), "finding defs for index used in tree %p\n", outputNode);
1029
1030
TR_UseDefInfo *info = trans->optimizer()->getUseDefInfo();
1031
if (info)
1032
{
1033
TR::Node *loadNode = NULL, *loadNode1, *loadNode2, *topLevelIndex;
1034
findIndexLoad(outputNode, loadNode1, loadNode2, topLevelIndex);
1035
1036
if (loadNode1 && loadNode2) return NULL; // Try to keep the original semantics, but it may be too strict.
1037
loadNode = loadNode1 ? loadNode1 : loadNode2;
1038
1039
if (loadNode)
1040
{
1041
uint16_t useDefIndex = loadNode->getUseDefIndex();
1042
TR_UseDefInfo::BitVector defs(comp->allocator());
1043
info->getUseDef(defs, useDefIndex);
1044
if (!defs.IsZero())
1045
{
1046
TR_UseDefInfo::BitVector::Cursor cursor(defs);
1047
int32_t numDefs = 0;
1048
TR::TreeTop *defTT = NULL;
1049
for (cursor.SetToFirstOne(); cursor.Valid(); cursor.SetToNextOne())
1050
{
1051
int32_t defIndex = cursor;
1052
if (defIndex < info->getFirstRealDefIndex())
1053
continue; // method entry is def
1054
defTT = info->getTreeTop(defIndex);
1055
numDefs++;
1056
}
1057
// if the only def is one inside the loop, then
1058
// insert the def before the translation node
1059
//
1060
if (numDefs == 1)
1061
{
1062
TR::Block *defBlock = defTT->getEnclosingBlock();
1063
if (extraTrace)
1064
traceMsg(trans->comp(), "found single def %p for load %p\n", defTT->getNode(), loadNode);
1065
if (trans->isBlockInLoopBody(defBlock))
1066
return (defTT->getNode()->duplicateTree(trans->comp()));
1067
}
1068
}
1069
}
1070
}
1071
return NULL;
1072
}
1073
1074
1075
static void
1076
findIndVarLoads(TR::Node *node, TR::Node *indVarStoreNode, bool &storeFound,
1077
List<TR::Node> *ivLoads, TR::Symbol *ivSym, vcount_t visitCount)
1078
{
1079
if (node->getVisitCount() == visitCount)
1080
return;
1081
node->setVisitCount(visitCount);
1082
1083
if (node == indVarStoreNode)
1084
storeFound = true;
1085
1086
if (node->getOpCodeValue() == TR::iload &&
1087
node->getSymbolReference()->getSymbol() == ivSym)
1088
{
1089
if (!ivLoads->find(node))
1090
ivLoads->add(node);
1091
}
1092
1093
for (int32_t i = 0; i < node->getNumChildren(); i++)
1094
findIndVarLoads(node->getChild(i), indVarStoreNode, storeFound, ivLoads, ivSym, visitCount);
1095
}
1096
1097
static int32_t
1098
checkForPostIncrement(TR::Compilation *comp, TR::Block *loopHeader, TR::Node *loopCmpNode, TR::Symbol *ivSym)
1099
{
1100
TR::TreeTop *startTree = loopHeader->getFirstRealTreeTop();
1101
TR::Node *indVarStoreNode = NULL;
1102
TR::TreeTop *tt;
1103
for (tt = startTree; tt != loopHeader->getExit(); tt = tt->getNextTreeTop())
1104
{
1105
TR::Node *n = tt->getNode();
1106
if (n->getOpCode().isStoreDirect() &&
1107
(n->getSymbolReference()->getSymbol() == ivSym) /*&&
1108
n->getFirstChild()->getSecondChild()->getOpCode().isLoadConst()*/)
1109
{
1110
indVarStoreNode = n;
1111
break;
1112
}
1113
}
1114
if (!indVarStoreNode)
1115
return 0;
1116
1117
bool storeFound = false;
1118
vcount_t visitCount = comp->incOrResetVisitCount();
1119
TR_ScratchList<TR::Node> ivLoads(comp->trMemory());
1120
for (tt = startTree; !storeFound && tt != loopHeader->getExit(); tt = tt->getNextTreeTop())
1121
findIndVarLoads(tt->getNode(), indVarStoreNode, storeFound, &ivLoads, ivSym, visitCount);
1122
1123
TR::Node *cmpFirstChild = loopCmpNode->getFirstChild();
1124
1125
TR::Node *storeIvLoad = indVarStoreNode->getFirstChild();
1126
if (storeIvLoad->getOpCode().isAdd() || storeIvLoad->getOpCode().isSub())
1127
storeIvLoad = storeIvLoad->getFirstChild();
1128
1129
if(comp->trace(OMR::idiomRecognition))
1130
traceMsg(comp, "found storeIvload %p cmpFirstChild %p\n", storeIvLoad, cmpFirstChild);
1131
// simple case
1132
// the loopCmp uses the un-incremented value
1133
// of the iv
1134
//
1135
if (storeIvLoad == cmpFirstChild)
1136
return 1;
1137
1138
// the loopCmp uses some load of the iv that
1139
// was commoned
1140
//
1141
if (ivLoads.find(cmpFirstChild))
1142
return 1;
1143
1144
// uses a brand new load of the iv
1145
return 0;
1146
}
1147
1148
static bool
1149
checkByteToChar(TR::Compilation *comp, TR::Node *iorNode, TR::Node *&inputNode, bool bigEndian)
1150
{
1151
// this is the pattern thats being reduced
1152
//
1153
// ior
1154
// imul
1155
// bu2i
1156
// ibload #261 Shadow[<array-shadow>]
1157
// aiadd <flags:"0x8000" (internalPtr )/>
1158
// aload #523 Auto[<temp slot 10>]
1159
// isub
1160
// ==>iload i
1161
// iconst -17
1162
// iconst 256
1163
// bu2i
1164
// ibload #261 Shadow[<array-shadow>]
1165
// aiadd <flags:"0x8000" (internalPtr )/>
1166
// ==>aload at #523
1167
// isub
1168
// ==>iload i
1169
// iconst -16
1170
//
1171
// for little-endian platforms,
1172
// char = byte[i+1] << 8 | byte[i] (ie. lower index is in the lsb)
1173
//
1174
// for big-endian platforms,
1175
// char = byte[i] << 8 | byte[i+1] (ie. lower index is in the msb)
1176
//
1177
// in either case, if the incoming user code is swapped, then the transformation
1178
// is illegal.
1179
//
1180
if (!iorNode) return false;
1181
1182
TR::Node *imulNode = iorNode->getFirstChild();
1183
if ((imulNode->getOpCodeValue() != TR::imul) &&
1184
(imulNode->getOpCodeValue() != TR::ishl))
1185
imulNode = iorNode->getSecondChild();
1186
1187
if ((imulNode->getOpCodeValue() == TR::imul) ||
1188
(imulNode->getOpCodeValue() == TR::ishl))
1189
{
1190
// find the index to be either i, i+1
1191
// if (le)
1192
// if index is i+1 then inputNode = other ibload of the ior
1193
// else fail
1194
// if (be)
1195
// if index is i then inputNode = ibload child of imul
1196
// else fail
1197
//
1198
TR::Node *ibloadNode = imulNode->getFirstChild()->skipConversions();
1199
bool plusOne = false;
1200
bool matchPattern = false;
1201
if (ibloadNode->getOpCodeValue() == TR::bloadi)
1202
{
1203
TR::Node *subNode = ibloadNode->getFirstChild()->getSecondChild();
1204
int32_t hdrSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 1;
1205
if (subNode->getOpCode().isSub() &&
1206
subNode->getSecondChild()->getOpCode().isLoadConst())
1207
{
1208
int32_t constVal;
1209
if (subNode->getSecondChild()->getOpCodeValue() == TR::lconst)
1210
constVal = (int32_t) subNode->getSecondChild()->getLongInt();
1211
else
1212
constVal = subNode->getSecondChild()->getInt();
1213
1214
if (constVal < 0) constVal = -constVal;
1215
1216
if (constVal == hdrSize)
1217
{
1218
matchPattern = true;
1219
plusOne = true;
1220
}
1221
else if (constVal == hdrSize-1)
1222
{
1223
matchPattern = true;
1224
plusOne = false;
1225
}
1226
1227
if (matchPattern)
1228
{
1229
if (bigEndian)
1230
{
1231
if (!plusOne)
1232
{
1233
inputNode = ibloadNode->getFirstChild();
1234
return true;
1235
}
1236
else
1237
return false;
1238
}
1239
else
1240
{
1241
if (plusOne)
1242
{
1243
inputNode = iorNode->getSecondChild()->skipConversions();
1244
if (inputNode->getOpCodeValue() == TR::bloadi)
1245
{
1246
inputNode = inputNode->getFirstChild();
1247
return true;
1248
}
1249
else
1250
return false;
1251
}
1252
else
1253
return false;
1254
}
1255
}
1256
}
1257
}
1258
}
1259
1260
return false;
1261
}
1262
1263
static bool
1264
ivIncrementedBeforeBoolTableExit(TR::Compilation *comp, TR::Node *boolTableExit,
1265
TR::Block *entryBlock,
1266
TR::SymbolReference *ivSymRef)
1267
{
1268
TR::TreeTop *startTree = entryBlock->getFirstRealTreeTop();
1269
TR::Node *ivStore = NULL;
1270
bool foundBoolTable = false;
1271
for (TR::TreeTop *tt = startTree; tt != entryBlock->getExit(); tt = tt->getNextTreeTop())
1272
{
1273
TR::Node *n = tt->getNode();
1274
if (n == boolTableExit)
1275
{
1276
foundBoolTable = true;
1277
break;
1278
}
1279
if (n->getOpCode().isStoreDirect() &&
1280
(n->getSymbolReference()->getSymbol() == ivSymRef->getSymbol()))
1281
ivStore = n;
1282
}
1283
1284
if (foundBoolTable && ivStore)
1285
return true;
1286
return false;
1287
}
1288
1289
1290
1291
1292
//*****************************************************************************************
1293
// default graph transformer
1294
// currently, it has:
1295
// (1) partial peeling of the loop body
1296
//*****************************************************************************************
1297
bool
1298
defaultSpecialNodeTransformer(TR_CISCTransformer *trans)
1299
{
1300
bool success = ChangeAlignmentOfRegion(trans);
1301
success |= reorderTargetNodesInBB(trans);
1302
return success;
1303
}
1304
1305
1306
//*****************************************************************************************
1307
// graph transformer for MEMCPY
1308
// default + IANDSpecialNodeTransformer
1309
//*****************************************************************************************
1310
bool
1311
MEMCPYSpecialNodeTransformer(TR_CISCTransformer *trans)
1312
{
1313
bool success = defaultSpecialNodeTransformer(trans);
1314
success |= IANDSpecialNodeTransformer(trans);
1315
return success;
1316
}
1317
1318
1319
//*****************************************************************************************
1320
// graph transformer for TRT
1321
// default + moveStoreOutOfLoopForward
1322
//*****************************************************************************************
1323
bool
1324
TRTSpecialNodeTransformer(TR_CISCTransformer *trans)
1325
{
1326
bool success = moveStoreOutOfLoopForward(trans);
1327
success |= defaultSpecialNodeTransformer(trans);
1328
return success;
1329
}
1330
1331
1332
//*****************************************************************************************
1333
// IL code generation for exploiting the TRT (or SRST) instruction
1334
// Input: ImportantNode(0) - booltable
1335
// ImportantNode(1) - ificmpge
1336
// ImportantNode(2) - NULLCHK
1337
// ImportantNode(3) - array load
1338
//*****************************************************************************************
1339
// Possible parameters of TR::arraytranslateAndTest
1340
// retIndex = findbytes(uint8_t *arrayBase, int arrayIndex, uint8_t *table, int arrayLen)
1341
// retIndex = findbytes(uint8_t *arrayBase, int arrayIndex, uint8_t *table, int arrayLen, int endLen)
1342
// retIndex = findbytes(uint8_t *arrayBase, int arrayIndex, int findByte, int arrayLen)
1343
// retIndex = findbytes(uint8_t *arrayBase, int arrayIndex, int findByte, int arrayLen, int endLen)
1344
1345
// If the flag charArrayTRT is set, the type of the array is "char".
1346
1347
bool
1348
CISCTransform2FindBytes(TR_CISCTransformer *trans)
1349
{
1350
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
1351
// the arraytranslateAndTest opcode is overloaded
1352
// with a flag
1353
const bool disptrace = DISPTRACE(trans);
1354
TR::Node *trNode;
1355
TR::TreeTop *trTreeTop;
1356
TR::Block *block;
1357
TR_CISCGraph *P = trans->getP();
1358
TR_CISCGraph *T = trans->getT();
1359
List<TR_CISCNode> *P2T = trans->getP2T();
1360
TR::Compilation * comp = trans->comp();
1361
bool isTRT2Char = false;
1362
TR::CFG *cfg = comp->getFlowGraph();
1363
1364
TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");
1365
1366
// find the first node of the region _candidateRegion
1367
trans->findFirstNode(&trTreeTop, &trNode, &block);
1368
if (!block) return false; // cannot find
1369
1370
if (isLoopPreheaderLastBlockInMethod(comp, block))
1371
{
1372
traceMsg(comp, "Bailing CISCTransform2FindBytes due to null TT - might be a preheader in last block of method\n");
1373
return false;
1374
}
1375
1376
List<TR_CISCNode> *listT = P2T + P->getImportantNode(1)->getID(); // ificmpge
1377
TR_CISCNode *exitIfRep = trans->getP2TRepInLoop(P->getImportantNode(1));
1378
int32_t modLength = 0;
1379
if (exitIfRep)
1380
{
1381
if (exitIfRep != trans->getP2TInLoopIfSingle(P->getImportantNode(1)))
1382
{
1383
if (disptrace) traceMsg(comp, "Give up because of multiple candidates of ificmpge.\n");
1384
return false;
1385
}
1386
bool isDecrement;
1387
if (!testExitIF(exitIfRep->getOpcode(), &isDecrement, &modLength)) return false;
1388
if (isDecrement) return false;
1389
}
1390
1391
TR::Block *target = trans->analyzeSuccessorBlock();
1392
if (!target) // multiple successors
1393
{
1394
// current restrictions. allow only the case where there is an ificmpge node and successor is 2.
1395
if (listT->isEmpty() ||
1396
trans->getNumOfBBlistSucc() != 2)
1397
{
1398
if (disptrace) traceMsg(comp, "Currently, CISCTransform2FindBytes allows only the case where there is an ificmpge node and successor is 2.\n");
1399
return false;
1400
}
1401
}
1402
1403
// Check if there is idiom specific node insertion.
1404
// Currently, it is inserted by moveStoreOutOfLoopForward() or reorderTargetNodesInBB()
1405
bool isCompensateCode = !trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1);
1406
1407
// There is an ificmpge node and (multiple successors or need to generate idiom specific node insertion)
1408
bool isNeedGenIcmpge = !listT->isEmpty() && (!target || isCompensateCode);
1409
1410
TR::Node *baseRepNode, *indexRepNode, *ahConstNode = NULL;
1411
// get each target node corresponding to p0 and p1
1412
getP2TTrRepNodes(trans, &baseRepNode, &indexRepNode);
1413
// get the node corresponding to
1414
// aiadd
1415
// aload
1416
// isub <---
1417
// index
1418
// headerConst
1419
//
1420
TR_CISCNode *ahConstCISCNode = trans->getP2TRepInLoop(P->getImportantNode(3)->getChild(0)->getChild(1));
1421
1422
if (ahConstCISCNode)
1423
{
1424
ahConstNode = ahConstCISCNode->getHeadOfTrNodeInfo()->_node;
1425
if (ahConstNode->getOpCode().isAdd() || ahConstNode->getOpCode().isSub())
1426
ahConstNode = ahConstNode->getSecondChild();
1427
else
1428
ahConstNode = NULL;
1429
}
1430
TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();
1431
1432
// Prepare the function table
1433
TR::Node *tableNode;
1434
uint8_t *tmpTable = (uint8_t *)comp->trMemory()->allocateStackMemory(65536 * sizeof(uint8_t));
1435
int32_t count;
1436
TR::TreeTop *retSameExit = NULL;
1437
TR_CISCNode *pBoolTable = P->getImportantNode(0);
1438
TR_CISCNode *tBoolTable = NULL;
1439
1440
TR_ASSERT(trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isByte() ||
1441
trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isShort() && trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isUnsigned(), "Error");
1442
isTRT2Char = trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isShort() && trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isUnsigned();
1443
1444
if (!isTRT2Char)
1445
{
1446
if ((count = trans->analyzeByteBoolTable(pBoolTable, tmpTable, P->getImportantNode(1), &retSameExit)) <= 0)
1447
{
1448
bool go = false;
1449
if ((tBoolTable = trans->getP2TInLoopIfSingle(pBoolTable)) != 0 &&
1450
(tBoolTable->getOpcode() == TR::ificmpeq))
1451
{
1452
retSameExit = tBoolTable->getDestination();
1453
go = true;
1454
}
1455
if (!go)
1456
{
1457
if (disptrace) traceMsg(comp, "analyzeByteBoolTable failed.\n");
1458
return false; // fail to analyze
1459
}
1460
}
1461
}
1462
else
1463
{
1464
bool supportsSRSTU = comp->cg()->getSupportsSearchCharString();
1465
if ((count = trans->analyzeCharBoolTable(pBoolTable, tmpTable, P->getImportantNode(1), &retSameExit)) <= 0)
1466
{
1467
// Case where we have a single, non-constant delimiter. With SRSTU, we can handle this situation.
1468
if (supportsSRSTU && // Confirm that the processor has the SRSTU instruction.
1469
(tBoolTable = trans->getP2TInLoopIfSingle(pBoolTable)) != NULL &&
1470
(tBoolTable->getOpcode() == TR::ificmpeq))
1471
{
1472
retSameExit = tBoolTable->getDestination();
1473
}
1474
else
1475
{
1476
if (disptrace) traceMsg(comp, "analyzeCharBoolTable failed.\n");
1477
return false; // fail to analyze
1478
}
1479
}
1480
else
1481
{
1482
if (!supportsSRSTU || // If we don't have SRSTU support, we can use SRST/TRT for single byte searches if delimiters are within 1-255.
1483
count != 1) // If we only have 1 constant delimiter and have SRSTU support, we can search for any 2-byte delimiter.
1484
{
1485
if (disptrace && count > 1)
1486
traceMsg(comp, "Multiple exit conditions for a char array. We can implement this case using the TRTE instruction on z6.\n");
1487
1488
if (tmpTable[0])
1489
{
1490
traceMsg(comp, "Char array has '0' as an exit condition, loop will not be reduced TRT/SRST (single-byte) instruction.\n");
1491
return false; // if zero is a delimiter, give up.
1492
}
1493
for (int32_t i = 256; i < 65536; i++)
1494
{
1495
if (tmpTable[i])
1496
{
1497
traceMsg(comp, "Char array has one of 256 through 65535 (%d) as an exit condition, loop cannot be reduced to TRT/SRST (single-byte) instruction.\n", i);
1498
return false; // if any value between 256 and 65535 is a delimiter, give up.
1499
}
1500
}
1501
}
1502
}
1503
}
1504
1505
if (count != 0 && !retSameExit) // there is a booltable check and all destinations of booltable are not same
1506
{
1507
traceMsg(comp, "Multiple targets for different delimiter checks detected. Abandoning reduction.\n");
1508
return false;
1509
}
1510
1511
// Check to ensure that the delimiter checks 'break' to the target successor blocks if single successor.
1512
if (retSameExit != NULL && !isNeedGenIcmpge && retSameExit->getEnclosingBlock() != target)
1513
{
1514
traceMsg(comp, "Target for delimiter check (Treetop: %p / Block %d: %p) is different than loop exit block_%d: %p. Abandoning reduction.\n",
1515
retSameExit, retSameExit->getEnclosingBlock()->getNumber(), retSameExit->getEnclosingBlock(),
1516
target->getNumber(), target);
1517
return false;
1518
}
1519
1520
// FIXME: this test is needed because in the TRT2Byte
1521
// and TRT2 idioms, the aHeader const is not the 4th node
1522
//
1523
bool indexRequiresAdjustment = false;
1524
int32_t ahValue = 0;
1525
if (ahConstNode && ahConstNode->getOpCode().isLoadConst())
1526
{
1527
ahValue = (ahConstNode->getType().isInt64() ?
1528
(int32_t)ahConstNode->getLongInt() : ahConstNode->getInt());
1529
if (ahValue < 0)
1530
ahValue = -ahValue;
1531
1532
if (ahValue != TR::Compiler->om.contiguousArrayHeaderSizeInBytes())
1533
indexRequiresAdjustment = true;
1534
}
1535
// We currently don't distinguish between case when starting index is in form of index = index + offset
1536
// aiadd
1537
// aload
1538
// lsub <--- ahConstCISCNode->getHeadOfTrNode()
1539
// lmul <--- indexLoadNode
1540
// iload
1541
// lconst 2
1542
// lconst -10 <--- headerConst
1543
//
1544
// vs index' = index; index++; (See: PR: 82148)
1545
//
1546
// istore <--- index++;
1547
// isub
1548
// iload
1549
// iconst -1
1550
//..
1551
// aiadd
1552
// aload
1553
// lsub <--- ahConstCISCNode->getHeadOfTrNode()
1554
// lmul <--- indexLoadNode
1555
// ==>iload <--- index'
1556
// lconst 2
1557
// lconst -10 <--- headerConst
1558
//
1559
// for now disable cases when ahConstNode doesn't equal contiguousArrayHeaderSizeInBytes
1560
if (indexRequiresAdjustment)
1561
{
1562
traceMsg(comp, "headerConst node value doesn't equal contiguous array header size %p. Abandoning reduction.\n", ahConstNode);
1563
return false;
1564
}
1565
1566
if (avoidTransformingStringLoops(comp))
1567
{
1568
traceMsg(comp, "Abandoning reduction because of functional problems when String compression is enabled in Java 8 SR5\n");
1569
return false;
1570
}
1571
1572
if (count == -1) // single delimiter which is not constant value
1573
{
1574
TR_CISCNode *tableCISCNode = tBoolTable->getChild(1);
1575
tableNode = createLoad(tableCISCNode->getHeadOfTrNodeInfo()->_node);
1576
if (disptrace) traceMsg(comp, "Single non-constant delimiter found. Setting %p as tableNode.\n", comp->getDebug()->getName(tableCISCNode->getHeadOfTrNodeInfo()->_node));
1577
}
1578
else if (count == 1) // single delimiter
1579
{
1580
tableNode = NULL;
1581
int32_t i = 0;
1582
for (i = 0; i < 65536; i++)
1583
{
1584
if (tmpTable[i])
1585
{
1586
tableNode = TR::Node::create( baseRepNode, TR::iconst, 0, i); // prepare for SRST / SRSTU
1587
break;
1588
}
1589
}
1590
TR_ASSERT(tableNode, "error!!!");
1591
if (disptrace) traceMsg(comp, "Single delimiter found. Setting 'iconst %d' [%p] as tableNode.\n", i, comp->getDebug()->getName(tableNode));
1592
}
1593
else
1594
{
1595
// the static table currently cannot be relocated
1596
if (comp->compileRelocatableCode())
1597
{
1598
if (disptrace) traceMsg(comp, "Abandoning reduction since we can't relocate the static table\n");
1599
return false;
1600
}
1601
tableNode = createTableLoad(comp, baseRepNode, 8, 8, tmpTable, disptrace); // function table for TRT
1602
}
1603
1604
// prepare the TR::arraytranslateAndTest node
1605
TR::Node *findBytesNode = TR::Node::create(trNode, TR::arraytranslateAndTest, 5);
1606
findBytesNode->setArrayTRT(true);
1607
TR::Node *baseNode = createLoad(baseRepNode);
1608
1609
TR::Node *indexNode = TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef);
1610
if (indexRequiresAdjustment)
1611
{
1612
// if refCount > 1, then this means that an old
1613
// value of the iv is being used in the array index
1614
//
1615
if (ahConstCISCNode)
1616
{
1617
// aiadd
1618
// aload
1619
// isub <--- ahConstCISCNode->getHeadOfTrNode()
1620
// index <--- indexLoadNode
1621
// headerConst
1622
//
1623
TR::Node *indexParentNode=0;
1624
int32_t childNum=0;
1625
if (trans->searchNodeInTrees(ahConstCISCNode->getHeadOfTrNode(), indexNode, &indexParentNode, &childNum))
1626
{
1627
TR::Node *indexLoadNode = indexParentNode->getChild(childNum);
1628
if (indexLoadNode->getOpCode().isLoadVar() &&
1629
indexLoadNode->getReferenceCount() > 1)
1630
indexNode = indexLoadNode;
1631
}
1632
}
1633
1634
int32_t width = isTRT2Char ? 2 : 1;
1635
indexNode = TR::Node::create(TR::isub, 2,
1636
indexNode,
1637
TR::Node::create(indexRepNode, TR::iconst, 0,
1638
((TR::Compiler->om.contiguousArrayHeaderSizeInBytes() - ahValue)/width))
1639
);
1640
}
1641
1642
TR::Node *alenNode = TR::Node::create( baseRepNode, TR::arraylength, 1);
1643
alenNode->setAndIncChild(0, baseNode);
1644
////findBytesNode->setSymbolReference(comp->getSymRefTab()->findOrCreateFindBytesSymbol());
1645
findBytesNode->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayTranslateAndTestSymbol());
1646
findBytesNode->setAndIncChild(0, baseNode);
1647
findBytesNode->setAndIncChild(1, createI2LIfNecessary(comp, trans->isGenerateI2L(), indexNode));
1648
findBytesNode->setAndIncChild(2, tableNode);
1649
findBytesNode->setAndIncChild(3, createI2LIfNecessary(comp, trans->isGenerateI2L(), alenNode));
1650
////findBytesNode->setElementChar(isTRT2Byte);
1651
findBytesNode->setCharArrayTRT(isTRT2Char);
1652
1653
TR_CISCNode *icmpgeCISCnode = NULL;
1654
TrNodeInfo *icmpgeRepInfo = NULL;
1655
TR::Node *lenRepNode = NULL;
1656
1657
// There is no ificmpge node.
1658
if (listT->isEmpty())
1659
{
1660
findBytesNode->setNumChildren(4); // we don't need to prepare the fifth parameter "endLen"
1661
}
1662
else
1663
{
1664
if (disptrace) traceMsg(comp,"Loop has TR::ificmpge for comparing the index.\n");
1665
TR_CISCNode *lenNode;
1666
if (listT->isSingleton())
1667
{
1668
icmpgeCISCnode = listT->getListHead()->getData();
1669
lenNode = icmpgeCISCnode->getChild(1);
1670
}
1671
else
1672
{
1673
ListIterator<TR_CISCNode> li(listT);
1674
TR_CISCNode *n;
1675
lenNode = NULL;
1676
// find icmpge in the candidate region
1677
for (n = li.getFirst(); n; n = li.getNext())
1678
{
1679
if (trans->getCandidateRegion()->isIncluded(n))
1680
{
1681
icmpgeCISCnode = n;
1682
lenNode = n->getChild(1);
1683
break;
1684
}
1685
}
1686
TR_ASSERT(lenNode != NULL, "error!");
1687
}
1688
// set the fifth parameter "endLen"
1689
icmpgeRepInfo = icmpgeCISCnode->getHeadOfTrNodeInfo();
1690
lenRepNode = createLoad(lenNode->getHeadOfTrNodeInfo()->_node);
1691
if (modLength) lenRepNode = createOP2(comp, TR::isub, lenRepNode,
1692
TR::Node::create( baseRepNode, TR::iconst, 0, -modLength));
1693
findBytesNode->setAndIncChild(4, createI2LIfNecessary(comp, trans->isGenerateI2L(), lenRepNode));
1694
}
1695
TR::Node * top = TR::Node::create(TR::treetop, 1, findBytesNode);
1696
TR::Node * storeToIndVar = TR::Node::createStore(indexVarSymRef, findBytesNode);
1697
1698
// create Nodes if there are multiple exit points.
1699
TR::Node *icmpgeNode = NULL;
1700
TR::TreeTop *failDest = NULL;
1701
TR::TreeTop *okDest = NULL;
1702
TR::Block *compensateBlock0 = NULL;
1703
TR::Block *compensateBlock1 = NULL;
1704
if (isNeedGenIcmpge)
1705
{
1706
if (disptrace) traceMsg(comp,"Now assuming that all exits of booltable are identical and the exit of icmpge points different.\n");
1707
TR_ASSERT(icmpgeRepInfo, "Not implemented yet"); // current restriction
1708
okDest = retSameExit;
1709
failDest = icmpgeCISCnode->getDestination();
1710
// create two empty blocks for inserting compensation code (base[index] and base[index-1]) prepared by moveStoreOutOfLoopForward()
1711
if (isCompensateCode)
1712
{
1713
compensateBlock1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
1714
compensateBlock0 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
1715
compensateBlock0->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, okDest)));
1716
compensateBlock1->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest)));
1717
okDest = compensateBlock0->getEntry();
1718
failDest = compensateBlock1->getEntry();
1719
}
1720
TR_ASSERT(okDest != NULL, "error! okDest == NULL");
1721
TR_ASSERT(failDest != NULL, "error! failDest == NULL");
1722
if (disptrace) traceMsg(comp, "Block: okDest=%d failDest=%d\n", okDest->getEnclosingBlock()->getNumber(),
1723
failDest->getEnclosingBlock()->getNumber());
1724
TR_ASSERT(okDest != failDest, "error! okDest == failDest");
1725
1726
// It actually generates "ificmplt" (NOT ificmpge!) in order to suppress a redundant goto block.
1727
icmpgeNode = TR::Node::createif(TR::ificmplt,
1728
TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef),
1729
lenRepNode,
1730
okDest);
1731
}
1732
1733
// Check existence of nullchk
1734
// Insert (nullchk), findbytes, and result store instructions
1735
listT = P2T + P->getImportantNode(2)->getID();
1736
TR::TreeTop *last;
1737
TR::TreeTop *nextTreeTop1 = trTreeTop->getNextTreeTop();
1738
if (nextTreeTop1 == block->getExit())
1739
{
1740
nextTreeTop1 = TR::TreeTop::create(comp); // need to create
1741
}
1742
if (listT->isEmpty()) // no NULLCHK
1743
{
1744
last = trans->removeAllNodes(trTreeTop, block->getExit());
1745
last->join(block->getExit());
1746
block = trans->insertBeforeNodes(block);
1747
last = block->getLastRealTreeTop();
1748
last->join(trTreeTop);
1749
trTreeTop->setNode(top);
1750
trTreeTop->join(nextTreeTop1);
1751
nextTreeTop1->setNode(storeToIndVar);
1752
nextTreeTop1->join(block->getExit());
1753
}
1754
else
1755
{
1756
if (disptrace) traceMsg(comp,"NULLCHK is found!\n");
1757
// a NULLCHK was found, so just create a NULLCHK on
1758
// the arraybase
1759
// NULLCHK
1760
// PassThrough
1761
// baseNode
1762
//
1763
///TR_CISCNode *nullNode = listT->getListHead()->getData();
1764
///TR::Node *nullRepNode = nullNode->getHeadOfTrNodeInfo()->_node;
1765
TR::Node *dupNullRepNode = baseNode->duplicateTree();
1766
dupNullRepNode = TR::Node::create(TR::PassThrough, 1, dupNullRepNode);
1767
dupNullRepNode = TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, dupNullRepNode, comp->getSymRefTab()->findOrCreateNullCheckSymbolRef(comp->getMethodSymbol()));
1768
TR::TreeTop *nextTreeTop2 = TR::TreeTop::create(comp);
1769
last = trans->removeAllNodes(trTreeTop, block->getExit());
1770
last->join(block->getExit());
1771
block = trans->insertBeforeNodes(block);
1772
last = block->getLastRealTreeTop();
1773
last->join(trTreeTop);
1774
trTreeTop->setNode(dupNullRepNode);
1775
trTreeTop->join(nextTreeTop1);
1776
nextTreeTop1->setNode(top);
1777
nextTreeTop1->join(nextTreeTop2);
1778
nextTreeTop2->setNode(storeToIndVar);
1779
nextTreeTop2->join(block->getExit());
1780
}
1781
1782
// insert compensation code generated by non-idiom-specific transformation
1783
block = trans->insertAfterNodes(block);
1784
1785
if (isNeedGenIcmpge)
1786
{
1787
block->append(TR::TreeTop::create(comp, icmpgeNode));
1788
if (isCompensateCode)
1789
{
1790
cfg->setStructure(NULL);
1791
TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();
1792
TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();
1793
compensateBlock0 = trans->insertAfterNodesIdiom(compensateBlock0, 0, true); // ch = base[index]
1794
compensateBlock1 = trans->insertAfterNodesIdiom(compensateBlock1, 1, true); // ch = base[index-1]
1795
cfg->insertBefore(compensateBlock0, orgNextBlock);
1796
cfg->insertBefore(compensateBlock1, compensateBlock0);
1797
cfg->join(block, compensateBlock1);
1798
}
1799
}
1800
else if (isCompensateCode)
1801
{
1802
block = trans->insertAfterNodesIdiom(block, 0); // ch = base[index]
1803
}
1804
1805
// set successor edge(s) to the original block
1806
if (!isNeedGenIcmpge)
1807
{
1808
trans->setSuccessorEdge(block, target);
1809
}
1810
else
1811
{
1812
trans->setSuccessorEdges(block,
1813
failDest->getEnclosingBlock(),
1814
okDest->getEnclosingBlock());
1815
}
1816
1817
return true;
1818
}
1819
1820
1821
/*************************************************************************************
1822
Corresponding Java-like pseudocode
1823
int i, end;
1824
byte byteArray[ ];
1825
while(true){
1826
if (booltable(byteArray[i])) break;
1827
i++;
1828
if (i >= end) break; // optional
1829
}
1830
1831
Note 1: The wildcard node "booltable" matches if-statements or switch-case statements
1832
whose operands consist of the argument of booltable and any constants.
1833
Note 2: "optional" can be excluded in an input program.
1834
*************************************************************************************/
1835
TR_PCISCGraph *
1836
makeTRTGraph(TR::Compilation *c, int32_t ctrl)
1837
{
1838
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TRT", 0, 16);
1839
/************************************ opc id dagId #cfg #child other/pred/children */
1840
TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(byteArray); // array base
1841
TR_PCISCNode *iv = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(iv); // array index
1842
TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 6, 0, 0); tgt->addNode(end); // length (optional)
1843
TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(aHeader); // array header
1844
TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, -1); tgt->addNode(increment);
1845
TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 3, 0, 0); tgt->addNode(mulFactor); // Multiply Factor
1846
TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);
1847
TR_PCISCNode *nullChk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::NULLCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 1, entry, byteArray);
1848
tgt->addNode(nullChk); // optional
1849
TR_PCISCNode *arrayLen = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::arraylength, TR::NoType, tgt->incNumNodes(), 1, 1, 1, nullChk, byteArray); tgt->addNode(arrayLen);
1850
TR_PCISCNode *bndChk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, arrayLen, arrayLen, iv); tgt->addNode(bndChk);
1851
TR_PCISCNode *arrayLoad = createIdiomArrayLoadInLoop(tgt, ctrl, 1, bndChk, TR_ibcload, TR::NoType, byteArray, iv, aHeader, mulFactor);
1852
TR_PCISCNode *b2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, arrayLoad, arrayLoad); tgt->addNode(b2iNode);
1853
TR_PCISCNode *boolTable = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, b2iNode, b2iNode); tgt->addNode(boolTable);
1854
TR_PCISCNode *ivStore = createIdiomDecVarInLoop(tgt, ctrl, 1, boolTable, iv, increment);
1855
TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, ivStore, iv, end);
1856
tgt->addNode(loopTest); // optional
1857
TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);
1858
1859
boolTable->setSucc(1, exit);
1860
loopTest->setSuccs(entry->getSucc(0), exit);
1861
1862
end->setIsOptionalNode();
1863
loopTest->setIsOptionalNode();
1864
nullChk->setIsOptionalNode();
1865
1866
b2iNode->setIsChildDirectlyConnected();
1867
loopTest->setIsChildDirectlyConnected();
1868
1869
tgt->setSpecialCareNode(0, boolTable); // TR_booltable
1870
tgt->setEntryNode(entry);
1871
tgt->setExitNode(exit);
1872
tgt->setImportantNodes(boolTable, loopTest, nullChk, arrayLoad);
1873
tgt->setNumDagIds(9);
1874
tgt->createInternalData(1);
1875
1876
tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);
1877
tgt->setTransformer(CISCTransform2FindBytes);
1878
tgt->setInhibitAfterVersioning();
1879
tgt->setAspects(isub|bndchk, existAccess, 0);
1880
tgt->setNoAspects(call|bitop1, 0, existAccess);
1881
tgt->setMinCounts(1, 1, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount
1882
tgt->setHotness(warm, true);
1883
return tgt;
1884
}
1885
1886
1887
TR_PCISCGraph *
1888
makeTRTGraph2(TR::Compilation *c, int32_t ctrl)
1889
{
1890
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TRT2", 0, 16);
1891
/******************************************************************* opc id dagId #cfg #child other/pred/children */
1892
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(v0); // array base
1893
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(v1); // array index
1894
TR_PCISCNode *corv= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 7, 0, 0); tgt->addNode(corv); // length (optional)
1895
TR_PCISCNode *alen= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 6, 0, 0); tgt->addNode(alen); // arraylength (optional)
1896
TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(cmah); // array header
1897
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, -1); tgt->addNode(cm1);
1898
TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 3, 0, 0); tgt->addNode(mulFactor); // Multiply Factor
1899
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
1900
TR_PCISCNode *nchk= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::NULLCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 1, ent, v0); tgt->addNode(nchk); // optional
1901
TR_PCISCNode *bck = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nchk, alen, v1); tgt->addNode(bck); // optional
1902
TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, bck, TR_ibcload, TR::NoType, v0, v1, cmah, mulFactor);
1903
TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);
1904
TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, n3, n3); tgt->addNode(n4);
1905
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, n4, v1, cm1);
1906
TR_PCISCNode *n7 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n6, v1, corv); tgt->addNode(n7); // optional
1907
TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n8);
1908
1909
n4->setSucc(1, n8);
1910
n7->setSuccs(ent->getSucc(0), n8);
1911
1912
corv->setIsOptionalNode();
1913
n7->setIsOptionalNode();
1914
alen->setIsOptionalNode();
1915
nchk->setIsOptionalNode();
1916
bck->setIsOptionalNode();
1917
1918
n3->setIsChildDirectlyConnected();
1919
n7->setIsChildDirectlyConnected();
1920
1921
tgt->setSpecialCareNode(0, n4); // TR_booltable
1922
tgt->setEntryNode(ent);
1923
tgt->setExitNode(n8);
1924
tgt->setImportantNodes(n4, n7, nchk, n2);
1925
tgt->setNumDagIds(10);
1926
tgt->createInternalData(1);
1927
1928
tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);
1929
tgt->setTransformer(CISCTransform2FindBytes);
1930
tgt->setInhibitBeforeVersioning();
1931
tgt->setAspects(isub, existAccess, 0);
1932
tgt->setNoAspects(call|bitop1, 0, existAccess);
1933
tgt->setMinCounts(1, 1, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount
1934
tgt->setHotness(warm, true);
1935
return tgt;
1936
}
1937
1938
1939
/****************************************************************************************
1940
Corresponding Java-like pseudocode
1941
int i, end;
1942
char charArray[ ]; // char array
1943
while(true){
1944
if (booltable(charArray[i])) break;
1945
i++;
1946
if (i >= end) break; // optional
1947
}
1948
1949
Note 1: There is one limitation. Only when the booltable matches if-statements comparing
1950
to the constants 1 through 255, the transformation will succeed.
1951
Note 2: Currently, the generated code checks whether the character found by TRT (or SRST)
1952
is a delimiter.
1953
Note 3: New instructions that directly support a 2-byte array will improve current
1954
drawbacks described in Notes 1 and 2.
1955
****************************************************************************************/
1956
TR_PCISCGraph *
1957
makeTRT2ByteGraph(TR::Compilation *c, int32_t ctrl)
1958
{
1959
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TRT2Byte", 0, 16);
1960
/**************************************************************************** opc id dagId #cfg #child other/pred/children */
1961
TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(charArray); // array base
1962
TR_PCISCNode *iv = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 1); tgt->addNode(iv); // array index
1963
TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 7, 0, 0); tgt->addNode(end); // length (optional)
1964
TR_PCISCNode *arrayLen = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 6, 0, 0);
1965
tgt->addNode(arrayLen); // arraylength (optional)
1966
TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(aHeader); // array header
1967
TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, -1); tgt->addNode(increment);
1968
TR_PCISCNode *mulFactor = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 2); // element size
1969
TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);
1970
TR_PCISCNode *nullChk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::NULLCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 1, entry, charArray); tgt->addNode(nullChk); // optional
1971
TR_PCISCNode *bndChk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nullChk, arrayLen, iv);
1972
tgt->addNode(bndChk); // optional
1973
TR_PCISCNode *arrayLoad = createIdiomArrayLoadInLoop(tgt, ctrl, 1, bndChk, TR::sloadi, TR::Int16, charArray, iv, aHeader, mulFactor);
1974
TR_PCISCNode *c2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, arrayLoad, arrayLoad); tgt->addNode(c2iNode);
1975
TR_PCISCNode *boolTable = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, c2iNode, c2iNode); tgt->addNode(boolTable);
1976
TR_PCISCNode *ivStore = createIdiomDecVarInLoop(tgt, ctrl, 1, boolTable, iv, increment);
1977
TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, ivStore, iv, end);
1978
tgt->addNode(loopTest); // optional
1979
TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);
1980
1981
boolTable->setSucc(1, exit);
1982
loopTest->setSuccs(entry->getSucc(0), exit);
1983
1984
end->setIsOptionalNode();
1985
loopTest->setIsOptionalNode();
1986
arrayLen->setIsOptionalNode();
1987
nullChk->setIsOptionalNode();
1988
bndChk->setIsOptionalNode();
1989
1990
c2iNode->setIsChildDirectlyConnected();
1991
loopTest->setIsChildDirectlyConnected();
1992
1993
tgt->setSpecialCareNode(0, boolTable); // TR_booltable
1994
tgt->setEntryNode(entry);
1995
tgt->setExitNode(exit);
1996
tgt->setImportantNodes(boolTable, loopTest, nullChk, arrayLoad);
1997
tgt->setNumDagIds(10);
1998
tgt->createInternalData(1);
1999
2000
tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);
2001
tgt->setTransformer(CISCTransform2FindBytes);
2002
tgt->setInhibitBeforeVersioning();
2003
tgt->setAspects(isub|mul, ILTypeProp::Size_2, 0);
2004
tgt->setNoAspects(call|bitop1, 0, existAccess);
2005
tgt->setMinCounts(1, 1, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount
2006
tgt->setHotness(warm, true);
2007
return tgt;
2008
}
2009
2010
2011
//////////////////////////////////////////////////////////////////////////
2012
//////////////////////////////////////////////////////////////////////////
2013
//////////////////////////////////////////////////////////////////////////
2014
2015
//*****************************************************************************************
2016
// IL code generation for exploiting the TRT (or SRST) instruction
2017
// This is the case where the function table is prepared by the user program.
2018
// Input: ImportantNodes(0) - booltable
2019
// ImportantNodes(1) - ificmpge
2020
// ImportantNodes(2) - NULLCHK
2021
//*****************************************************************************************
2022
bool
2023
CISCTransform2NestedArrayFindBytes(TR_CISCTransformer *trans)
2024
{
2025
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
2026
// arraytranslateAndTest is overloaded with a flag
2027
//
2028
const bool disptrace = DISPTRACE(trans);
2029
TR::Node *trNode;
2030
TR::TreeTop *trTreeTop;
2031
TR::Block *block;
2032
TR_CISCGraph *P = trans->getP();
2033
List<TR_CISCNode> *P2T = trans->getP2T();
2034
TR::Compilation *comp = trans->comp();
2035
int lenForDynamic = trans->isInitializeNegative128By1() ? 128 : 256;
2036
2037
TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");
2038
2039
TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");
2040
if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;
2041
2042
trans->findFirstNode(&trTreeTop, &trNode, &block);
2043
if (!block) return false; // cannot find
2044
2045
if (isLoopPreheaderLastBlockInMethod(comp, block))
2046
{
2047
traceMsg(comp, "Bailing CISCTransform2NestedArrayFindBytes due to null TT - might be a preheader in last block of method\n");
2048
return false;
2049
}
2050
2051
TR::Block *target = trans->analyzeSuccessorBlock();
2052
// Currently, it allows only a single successor.
2053
if (!target) return false;
2054
2055
uint8_t tmpTable[256];
2056
int count;
2057
if ((count = trans->analyzeByteBoolTable(P->getImportantNode(0), tmpTable, P->getImportantNode(1))) <= 0)
2058
return false;
2059
if (disptrace) dump256Bytes(tmpTable, comp);
2060
2061
bool isMapDirectlyUsed = isFitTRTFunctionTable(tmpTable);
2062
bool isGenerateTROO = !isMapDirectlyUsed;
2063
2064
// Currently, we support only if the map table can be directly used as the function table.
2065
// Thus, the following code is tentative.
2066
//
2067
if (!isMapDirectlyUsed) return false;
2068
//
2069
2070
if (avoidTransformingStringLoops(comp))
2071
{
2072
traceMsg(comp, "Abandoning reduction because of functional problems when String compression is enabled in Java 8 SR5\n");
2073
return false;
2074
}
2075
2076
TR::Node *baseRepNode, *indexRepNode, *outerBaseRepNode;
2077
getP2TTrRepNodes(trans, &baseRepNode, &indexRepNode, &outerBaseRepNode);
2078
TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();
2079
TR::SymbolReference * outerBaseVarSymRef = outerBaseRepNode->getSymbolReference();
2080
2081
uint8_t *tableOuterResult = NULL;
2082
if (!isMapDirectlyUsed)
2083
{
2084
// TODO: To make this work on non-Java environments, the table should be in the code cache, not persistent memory
2085
tableOuterResult= (uint8_t *)comp->jitPersistentAlloc(256);
2086
if (trans->isInitializeNegative128By1())
2087
memset(tableOuterResult+128, 1, 128);
2088
}
2089
2090
// Currently, TROO is never generated here. In this case, it returned with the failure above.
2091
TR::Node * tableNode;
2092
TR::Node * topOfTranslateNode = NULL;
2093
if (isGenerateTROO)
2094
{
2095
tableNode = createTableLoad(comp, baseRepNode, 8, 8, tmpTable, disptrace);
2096
//
2097
// Prepare TR::arraytranslate
2098
//
2099
TR::Node * inputNode = createArrayTopAddressTree(comp, trans->isGenerateI2L(), outerBaseRepNode);
2100
TR::Node * outputNode = TR::Node::aconst(baseRepNode, (uintptr_t)tableOuterResult);
2101
TR::Node * termCharNode = TR::Node::create( baseRepNode, TR::iconst, 0, 0xff);
2102
TR::Node * lengthNode = TR::Node::create( baseRepNode, TR::iconst, 0, lenForDynamic);
2103
TR::Node * stoppingNode = TR::Node::create( baseRepNode, TR::iconst, 0, 0xffffffff);
2104
2105
TR::Node * translateNode = TR::Node::create(trNode, TR::arraytranslate, 6);
2106
translateNode->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayTranslateSymbol());
2107
translateNode->setAndIncChild(0, inputNode);
2108
translateNode->setAndIncChild(1, outputNode);
2109
translateNode->setAndIncChild(2, tableNode);
2110
translateNode->setAndIncChild(3, termCharNode);
2111
translateNode->setAndIncChild(4, lengthNode);
2112
translateNode->setAndIncChild(5, stoppingNode);
2113
2114
translateNode->setSourceIsByteArrayTranslate(true);
2115
translateNode->setTargetIsByteArrayTranslate(true);
2116
translateNode->setTermCharNodeIsHint(false);
2117
translateNode->setSourceCellIsTermChar(false);
2118
translateNode->setTableBackedByRawStorage(true);
2119
topOfTranslateNode = TR::Node::create(TR::treetop, 1, translateNode);
2120
}
2121
2122
//
2123
// Prepare TR::arraytranslateAndTest
2124
//
2125
TR::Node *findBytesNode = TR::Node::create(trNode, TR::arraytranslateAndTest, 5);
2126
findBytesNode->setArrayTRT(true);
2127
TR::Node *baseNode = createLoad(baseRepNode);
2128
TR::Node *indexNode = TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef);
2129
TR::Node *alenNode = TR::Node::create( baseRepNode, TR::arraylength, 1);
2130
alenNode->setAndIncChild(0, baseNode);
2131
// Currently, it always uses "isMapDirectlyUsed" version.
2132
if (isMapDirectlyUsed)
2133
{
2134
tableNode = createArrayTopAddressTree(comp, trans->isGenerateI2L(), outerBaseRepNode);
2135
}
2136
else
2137
{
2138
tableNode = TR::Node::create( baseRepNode, TR::aconst, (uintptr_t)tableOuterResult);
2139
}
2140
////findBytesNode->setSymbolReference(comp->getSymRefTab()->findOrCreateFindBytesSymbol());
2141
findBytesNode->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayTranslateAndTestSymbol());
2142
findBytesNode->setAndIncChild(0, baseNode);
2143
findBytesNode->setAndIncChild(1, createI2LIfNecessary(comp, trans->isGenerateI2L(), indexNode));
2144
findBytesNode->setAndIncChild(2, tableNode);
2145
findBytesNode->setAndIncChild(3, createI2LIfNecessary(comp, trans->isGenerateI2L(), alenNode));
2146
findBytesNode->setCharArrayTRT(false);
2147
2148
List<TR_CISCNode> *listT = P2T + P->getImportantNode(1)->getID();
2149
if (listT->isEmpty())
2150
{
2151
findBytesNode->setNumChildren(4);
2152
}
2153
else
2154
{
2155
if (disptrace) traceMsg(comp,"TR::ificmpge for comaring the index is found!\n");
2156
TR_CISCNode *lenNode;
2157
TR::Node *lenRepNode;
2158
if (listT->isSingleton())
2159
{
2160
lenNode = listT->getListHead()->getData()->getChild(1);
2161
}
2162
else
2163
{
2164
ListIterator<TR_CISCNode> li(listT);
2165
TR_CISCNode *n;
2166
lenNode = NULL;
2167
for (n = li.getFirst(); n; n = li.getNext())
2168
{
2169
if (trans->getCandidateRegion()->isIncluded(n))
2170
{
2171
if (!lenNode)
2172
{
2173
lenNode = n->getChild(1);
2174
}
2175
}
2176
}
2177
TR_ASSERT(lenNode != NULL, "error!");
2178
}
2179
lenRepNode = createLoad(lenNode->getHeadOfTrNodeInfo()->_node);
2180
findBytesNode->setAndIncChild(4, createI2LIfNecessary(comp, trans->isGenerateI2L(), lenRepNode));
2181
}
2182
TR::Node * top = TR::Node::create(TR::treetop, 1, findBytesNode);
2183
TR::Node * storeToIndVar = TR::Node::createStore(indexVarSymRef, findBytesNode);
2184
2185
// Check existence of nullchk
2186
// Insert (nullchk), findbytes, and result store instructions
2187
listT = P2T + P->getImportantNode(2)->getID();
2188
TR::TreeTop *last;
2189
2190
if (listT->isEmpty()) // no NULLCHK
2191
{
2192
TR::TreeTop *nextTreeTop1 = TR::TreeTop::create(comp);
2193
last = trans->removeAllNodes(trTreeTop, block->getExit());
2194
last->join(block->getExit());
2195
block = trans->insertBeforeNodes(block);
2196
last = block->getLastRealTreeTop();
2197
last->join(trTreeTop);
2198
if (topOfTranslateNode)
2199
{
2200
TR::TreeTop *nextTreeTop2 = TR::TreeTop::create(comp);
2201
trTreeTop->setNode(topOfTranslateNode);
2202
trTreeTop->join(nextTreeTop1);
2203
nextTreeTop1->setNode(top);
2204
nextTreeTop1->join(nextTreeTop2);
2205
nextTreeTop2->setNode(storeToIndVar);
2206
nextTreeTop2->join(block->getExit());
2207
}
2208
else
2209
{
2210
trTreeTop->setNode(top);
2211
trTreeTop->join(nextTreeTop1);
2212
nextTreeTop1->setNode(storeToIndVar);
2213
nextTreeTop1->join(block->getExit());
2214
}
2215
}
2216
else
2217
{
2218
if (disptrace) traceMsg(comp,"NULLCHK is found!\n");
2219
TR::TreeTop *nextTreeTop1 = TR::TreeTop::create(comp);
2220
TR::TreeTop *nextTreeTop2 = TR::TreeTop::create(comp);
2221
// a NULLCHK was found, so just create a NULLCHK on
2222
// the arraybase
2223
// NULLCHK
2224
// PassThrough
2225
// baseNode
2226
//
2227
///TR_CISCNode *nullNode = listT->getListHead()->getData();
2228
///TR::Node *nullRepNode = nullNode->getHeadOfTrNodeInfo()->_node;
2229
TR::Node *dupNullRepNode = baseNode->duplicateTree();
2230
dupNullRepNode = TR::Node::create(TR::PassThrough, 1, dupNullRepNode);
2231
dupNullRepNode = TR::Node::createWithSymRef(TR::NULLCHK, 1, 1, dupNullRepNode, comp->getSymRefTab()->findOrCreateNullCheckSymbolRef(comp->getMethodSymbol()));
2232
2233
last = trans->removeAllNodes(trTreeTop, block->getExit());
2234
last->join(block->getExit());
2235
block = trans->insertBeforeNodes(block);
2236
last = block->getLastRealTreeTop();
2237
last->join(trTreeTop);
2238
trTreeTop->setNode(dupNullRepNode);
2239
trTreeTop->join(nextTreeTop1);
2240
if (topOfTranslateNode)
2241
{
2242
TR::TreeTop *nextTreeTop3 = TR::TreeTop::create(comp);
2243
nextTreeTop1->setNode(topOfTranslateNode);
2244
nextTreeTop1->join(nextTreeTop2);
2245
nextTreeTop2->setNode(top);
2246
nextTreeTop2->join(nextTreeTop3);
2247
nextTreeTop3->setNode(storeToIndVar);
2248
nextTreeTop3->join(block->getExit());
2249
}
2250
else
2251
{
2252
nextTreeTop1->setNode(top);
2253
nextTreeTop1->join(nextTreeTop2);
2254
nextTreeTop2->setNode(storeToIndVar);
2255
nextTreeTop2->join(block->getExit());
2256
}
2257
}
2258
block = trans->insertAfterNodes(block); // insert compensation code generated by non-idiom-specific transformation
2259
block = trans->insertAfterNodesIdiom(block, 0); // ch = base[index]
2260
2261
trans->setSuccessorEdge(block, target);
2262
return true;
2263
}
2264
2265
2266
2267
/****************************************************************************************
2268
Corresponding Java-like pseudocode
2269
int i, end;
2270
byte byteArray[ ], map[ ];
2271
while(true){
2272
if (map[byteArray[i] & 0xff] != 0)) break;
2273
i++;
2274
if (i >= end) break; // optional
2275
}
2276
****************************************************************************************/
2277
TR_PCISCGraph *
2278
makeTRT4NestedArrayGraph(TR::Compilation *c, int32_t ctrl)
2279
{
2280
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TRT4NestedArray", 0, 16);
2281
/**************************************************************************** opc id dagId #cfg #child other/pred/children */
2282
TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(byteArray); // array base
2283
TR_PCISCNode *iv = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(iv); // array index
2284
TR_PCISCNode *mapArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(mapArray); // outer array base
2285
TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 6, 0, 0); tgt->addNode(end); // length (optional)
2286
TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(aHeader); // array header
2287
TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, -1); tgt->addNode(increment);
2288
TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_allconst, TR::NoType, tgt->incNumNodes(),3, 0, 0); tgt->addNode(mulFactor); // Multiply Factor
2289
TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);
2290
TR_PCISCNode *nullChk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::NULLCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 1, entry, byteArray);
2291
tgt->addNode(nullChk); // optional
2292
TR_PCISCNode *arrayLen = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::arraylength, TR::NoType, tgt->incNumNodes(),1, 1, 1, nullChk, byteArray); tgt->addNode(arrayLen);
2293
TR_PCISCNode *bndChk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, arrayLen, arrayLen, iv); tgt->addNode(bndChk);
2294
TR_PCISCNode *bALoad = createIdiomArrayLoadInLoop(tgt, ctrl, 1, bndChk, TR_ibcload, TR::NoType, byteArray, iv, aHeader, mulFactor);
2295
TR_PCISCNode *bu2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, bALoad, bALoad); tgt->addNode(bu2iNode);
2296
TR_PCISCNode *mapAload = createIdiomArrayLoadInLoop(tgt, ctrl, 1, bu2iNode, TR_ibcload, TR::NoType, mapArray, bu2iNode, aHeader, mulFactor);
2297
TR_PCISCNode *b2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, mapAload, mapAload); tgt->addNode(b2iNode);
2298
TR_PCISCNode *boolTable = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, b2iNode, b2iNode); tgt->addNode(boolTable);
2299
TR_PCISCNode *ivStore = createIdiomDecVarInLoop(tgt, ctrl, 1, boolTable, iv, increment);
2300
TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, ivStore, iv, end);
2301
tgt->addNode(loopTest); // optional
2302
TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);
2303
2304
boolTable->setSucc(1, exit);
2305
loopTest->setSuccs(entry->getSucc(0), exit);
2306
2307
end->setIsOptionalNode();
2308
loopTest->setIsOptionalNode();
2309
nullChk->setIsOptionalNode();
2310
b2iNode->setIsOptionalNode();
2311
2312
bu2iNode->setIsChildDirectlyConnected();
2313
loopTest->setIsChildDirectlyConnected();
2314
2315
tgt->setSpecialCareNode(0, boolTable); // TR_booltable
2316
tgt->setEntryNode(entry);
2317
tgt->setExitNode(exit);
2318
tgt->setImportantNodes(boolTable, loopTest, nullChk);
2319
tgt->setNumDagIds(10);
2320
tgt->createInternalData(1);
2321
2322
tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);
2323
tgt->setTransformer(CISCTransform2NestedArrayFindBytes);
2324
tgt->setInhibitAfterVersioning();
2325
tgt->setAspects(isub|bndchk, ILTypeProp::Size_1, 0);
2326
tgt->setNoAspects(call|bitop1, 0, existAccess);
2327
tgt->setMinCounts(1, 2, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount
2328
tgt->setHotness(veryHot, true);
2329
return tgt;
2330
}
2331
2332
2333
//////////////////////////////////////////////////////////////////////////
2334
//////////////////////////////////////////////////////////////////////////
2335
//////////////////////////////////////////////////////////////////////////
2336
2337
bool
2338
CISCTransform2NestedArrayIfFindBytes(TR_CISCTransformer *trans)
2339
{
2340
trans->setIsInitializeNegative128By1();
2341
return CISCTransform2NestedArrayFindBytes(trans);
2342
}
2343
2344
2345
2346
/****************************************************************************************
2347
Corresponding Java-like Pseudo Program
2348
int v1, end;
2349
byte v0[ ], map[ ];
2350
while(true){
2351
T = v0[v1];
2352
if (T < 0 || map[T] != 0)) break;
2353
v1++;
2354
if (v1 >= end) break; // optional
2355
}
2356
****************************************************************************************/
2357
TR_PCISCGraph *
2358
makeTRT4NestedArrayIfGraph(TR::Compilation *c, int32_t ctrl)
2359
{
2360
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TRT4NestedArrayIf", 0, 16);
2361
/********************************************************************* opc id dagId #cfg #child other/pred/children */
2362
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 0); tgt->addNode(v0); // array base
2363
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(v1); // array index
2364
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 1); tgt->addNode(v2); // outer array base
2365
TR_PCISCNode *corv= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),7, 0, 0); tgt->addNode(corv); // length (optional)
2366
TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 0); tgt->addNode(cmah); // array header
2367
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, -1); tgt->addNode(cm1);
2368
TR_PCISCNode *cm0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 0); tgt->addNode(cm0);
2369
TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_allconst, TR::NoType, tgt->incNumNodes(),3, 0, 0); tgt->addNode(mulFactor); // Multiply Factor
2370
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
2371
TR_PCISCNode *nchk= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::NULLCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 1, ent , v0); tgt->addNode(nchk); // optional
2372
TR_PCISCNode *alen= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::arraylength, TR::NoType, tgt->incNumNodes(),1, 1, 1, nchk, v0); tgt->addNode(alen);
2373
TR_PCISCNode *bck = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, alen, alen, v1); tgt->addNode(bck);
2374
TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, bck, TR_ibcload, TR::NoType, v0, v1, cmah, mulFactor);
2375
TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::b2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);
2376
TR_PCISCNode *nif0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmplt, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n3, n3, cm0); tgt->addNode(nif0);
2377
TR_PCISCNode *nn2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, nif0, TR_ibcload, TR::NoType, v2, n3, cmah, mulFactor);
2378
TR_PCISCNode *nn3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::b2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nn2, nn2); tgt->addNode(nn3);
2379
TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, nn3, nn3); tgt->addNode(n4);
2380
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, n4, v1, cm1);
2381
TR_PCISCNode *n7 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n6, v1, corv); tgt->addNode(n7); // optional
2382
TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n8);
2383
2384
nif0->setSucc(1, n8);
2385
n4->setSucc(1, n8);
2386
n7->setSuccs(ent->getSucc(0), n8);
2387
2388
corv->setIsOptionalNode();
2389
n7->setIsOptionalNode();
2390
nchk->setIsOptionalNode();
2391
2392
n3->setIsChildDirectlyConnected();
2393
n7->setIsChildDirectlyConnected();
2394
2395
tgt->setSpecialCareNode(0, n4); // TR_booltable
2396
tgt->setEntryNode(ent);
2397
tgt->setExitNode(n8);
2398
tgt->setImportantNodes(n4, n7, nchk);
2399
tgt->setNumDagIds(11);
2400
tgt->createInternalData(1);
2401
2402
tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);
2403
tgt->setTransformer(CISCTransform2NestedArrayIfFindBytes);
2404
tgt->setInhibitAfterVersioning();
2405
tgt->setAspects(isub|bndchk, ILTypeProp::Size_1, 0);
2406
tgt->setNoAspects(call|bitop1, 0, existAccess);
2407
tgt->setMinCounts(2, 2, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount
2408
tgt->setHotness(veryHot, true);
2409
return tgt;
2410
}
2411
2412
2413
//////////////////////////////////////////////////////////////////////////
2414
//////////////////////////////////////////////////////////////////////////
2415
//////////////////////////////////////////////////////////////////////////
2416
2417
//*****************************************************************************************
2418
// IL code generation for exploiting the TROT or TROO instruction
2419
// This is the case where the compiler will create the function table by analyzing booltable.
2420
// Input: ImportantNode(0) - booltable
2421
// ImportantNode(1) - ificmpge
2422
// ImportantNode(2) - load of the source array
2423
// ImportantNode(3) - store of the destination array
2424
// ImportantNode(4) - optional node for optimizing java/lang/String.<init>([BIII)V
2425
// We will version the loop by "if (high == 0)".
2426
//*****************************************************************************************
2427
static TR_YesNoMaybe isSignExtendingCopyingTROx(TR_CISCTransformer *trans);
2428
2429
#define TERMCHAR (0xF0FF) // not the sign- or zero-extension of any byte
2430
bool
2431
CISCTransform2CopyingTROx(TR_CISCTransformer *trans)
2432
{
2433
const bool disptrace = DISPTRACE(trans);
2434
TR::Node *trNode;
2435
TR::TreeTop *trTreeTop;
2436
TR::Block *block;
2437
TR_CISCGraph *P = trans->getP();
2438
List<TR_CISCNode> *P2T = trans->getP2T();
2439
TR::Compilation *comp = trans->comp();
2440
bool isOutputChar = trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isShort() && trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isUnsigned();
2441
const char *title = P->getTitle();
2442
int32_t pattern = P->getPatternType();
2443
2444
bool genTRxx = comp->cg()->getSupportsArrayTranslateTRxx();
2445
bool genSIMD = comp->cg()->getSupportsVectorRegisters() && !comp->getOption(TR_DisableSIMDArrayTranslate);
2446
2447
if (!isOutputChar && genSIMD && !genTRxx){
2448
traceMsg(comp, "Bailing CISCTransform2CopyingTROx : b2b - no proper evaluator available\n");
2449
return false;
2450
}
2451
2452
bool isSignExtending = false;
2453
if (isOutputChar)
2454
{
2455
TR_YesNoMaybe sx = isSignExtendingCopyingTROx(trans);
2456
if (sx == TR_maybe)
2457
{
2458
traceMsg(comp,
2459
"Bailing CISCTransform2CopyingTROx : unknown integer conversion\n");
2460
return false;
2461
}
2462
isSignExtending = sx == TR_yes;
2463
}
2464
2465
TR_CISCNode *additionHigh = NULL;
2466
if (P->getImportantNode(4))
2467
additionHigh = trans->getP2TRepInLoop(P->getImportantNode(4));
2468
2469
if (additionHigh)
2470
{
2471
TR_CISCNode *loadResult = trans->getP2TRepInLoop(P->getImportantNode(0)->getChild(0));
2472
// Below we need to be able to tell which of the children of iadd is the
2473
// loaded value, and which is the loop-invariant offset. We do that by
2474
// requiring that one is obviously the loaded value. If not, give up now.
2475
if (additionHigh->getChild(0) != loadResult && additionHigh->getChild(1) != loadResult)
2476
{
2477
traceMsg(comp,
2478
"Bailing CISCTransform2CopyingTROx : inscrutable iadd\n");
2479
return false;
2480
}
2481
}
2482
2483
/*
2484
while (*title != '\0')
2485
{
2486
if (*title == '(')
2487
{
2488
pattern = *(++title) - '0';
2489
break;
2490
}
2491
++title;
2492
}
2493
*/
2494
if (disptrace)
2495
traceMsg(comp, "Found graph pattern as %d\n", pattern);
2496
2497
trans->findFirstNode(&trTreeTop, &trNode, &block);
2498
if (!block) return false; // cannot find
2499
2500
if (isLoopPreheaderLastBlockInMethod(comp, block))
2501
{
2502
traceMsg(comp, "Bailing CISCTransform2CopyingTROx due to null TT - might be a preheader in last block of method\n");
2503
return false;
2504
}
2505
2506
TR_CISCNode * inputCISCNode = trans->getP2TRepInLoop(P->getImportantNode(2)->getChild(0));
2507
TR_CISCNode * outputCISCNode = trans->getP2TRepInLoop(P->getImportantNode(3)->getChild(0));
2508
TR::Node * inputNode = inputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();
2509
TR::Node * outputNode = outputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();
2510
2511
TR::Node *baseRepNode, *indexRepNode, *dstBaseRepNode, *dstIndexRepNode, *indexDiffRepNode;
2512
getP2TTrRepNodes(trans, &baseRepNode, &indexRepNode, &dstBaseRepNode, &dstIndexRepNode, &indexDiffRepNode);
2513
TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();
2514
TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode ? dstIndexRepNode->getSymbolReference() : NULL;
2515
if (trans->countGoodArrayIndex(indexVarSymRef) == 0)
2516
{
2517
if (disptrace) traceMsg(comp, "countGoodArrayIndex failed for %p\n",indexRepNode);
2518
return false;
2519
}
2520
if (indexVarSymRef == dstIndexVarSymRef)
2521
{
2522
dstIndexRepNode = NULL;
2523
dstIndexVarSymRef = NULL;
2524
}
2525
if (dstIndexVarSymRef)
2526
{
2527
if (trans->countGoodArrayIndex(dstIndexVarSymRef) == 0)
2528
{
2529
if (disptrace) traceMsg(comp, "countGoodArrayIndex failed for %p\n",dstIndexRepNode);
2530
return false;
2531
}
2532
}
2533
TR_ScratchList<TR::Node> variableList(comp->trMemory());
2534
variableList.add(indexRepNode);
2535
if (dstIndexRepNode) variableList.add(dstIndexRepNode);
2536
if (!isIndexVariableInList(inputNode, &variableList) ||
2537
!isIndexVariableInList(outputNode, &variableList))
2538
{
2539
dumpOptDetails(comp, "indices used in array loads %p and %p are not consistent with the induction varaible updates\n", inputNode, outputNode);
2540
return false;
2541
}
2542
TR::SymbolReference * indexDiffVarSymRef = (indexDiffRepNode->getOpCode().isLoadVarOrStore() &&
2543
!indexDiffRepNode->getOpCode().isIndirect()) ?
2544
indexDiffRepNode->getSymbolReference() : NULL;
2545
TR::Node *ignoreTree = dstIndexVarSymRef && indexDiffVarSymRef && indexVarSymRef ?
2546
createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, indexVarSymRef, indexDiffVarSymRef, trNode) : NULL;
2547
TR::Block *target = trans->analyzeSuccessorBlock(ignoreTree);
2548
if (!target) // multiple successors
2549
{
2550
// current restrictions. allow only the case where the number of successors is 2.
2551
if (trans->getNumOfBBlistSucc() != 2)
2552
{
2553
if (disptrace) traceMsg(comp, "current restrictions. The number of successors is %d\n", trans->getNumOfBBlistSucc());
2554
return false;
2555
}
2556
}
2557
2558
// Check if there is idiom specific node insertion.
2559
// Currently, it is inserted by moveStoreOutOfLoopForward() or reorderTargetNodesInBB()
2560
bool isCompensateCode = !trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1);
2561
2562
// There is an ificmpge node and (multiple successors or need to generate idiom specific node insertion)
2563
bool isNeedGenIcmpge = (!target || isCompensateCode);
2564
2565
// Prepare the function table
2566
TR::Node *tableNode;
2567
uint8_t tmpTable[256];
2568
2569
TR::TreeTop *retSameExit = NULL;
2570
2571
// Number of Bool Table Test characters:
2572
// -1 -> analyzeByteBoolTable error.
2573
// 0 -> no bool table tests.
2574
// >0 -> # of constant test characters.
2575
int32_t numBoolTableTestChars = trans->analyzeByteBoolTable(P->getImportantNode(0), tmpTable, P->getImportantNode(1), &retSameExit);
2576
if (numBoolTableTestChars < 0)
2577
{
2578
if (disptrace) traceMsg(comp, "analyzeByteBoolTable failed.\n");
2579
return false;
2580
}
2581
2582
if (numBoolTableTestChars != 0 && !retSameExit) // Destinations of booltable checks are not same
2583
{
2584
traceMsg(comp, "Multiple targets for different delimiter checks detected. Abandoning reduction.\n");
2585
return false;
2586
}
2587
2588
// Check to ensure that the delimiter checks 'break' to the target successor blocks if single successor.
2589
if (retSameExit != NULL && !isNeedGenIcmpge && retSameExit->getEnclosingBlock() != target)
2590
{
2591
traceMsg(comp, "Target for delimiter check (Treetop: %p / Block %d: %p) is different than loop exit block_%d: %p. Abandoning reduction.\n",
2592
retSameExit, retSameExit->getEnclosingBlock()->getNumber(), retSameExit->getEnclosingBlock(),
2593
target->getNumber(), target);
2594
return false;
2595
}
2596
2597
// check if the induction variable needs to be updated by 1
2598
// this depends on whether the induction variable is incremented
2599
// before the boolTable exit or after (ie. before the loop driving test)
2600
//
2601
TR_CISCNode *boolTableExit = P->getImportantNode(0) ? trans->getP2TRepInLoop(P->getImportantNode(0)) : NULL;
2602
bool ivNeedsUpdate = false;
2603
bool dstIvNeedsUpdate = false;
2604
if (0 && boolTableExit)
2605
{
2606
TR::Node *boolTableNode = boolTableExit->getHeadOfTrNodeInfo()->_node;
2607
traceMsg(comp, "boolTableNode : %p of loop %d\n", boolTableNode, block->getNumber());
2608
ivNeedsUpdate = ivIncrementedBeforeBoolTableExit(comp, boolTableNode, block, indexVarSymRef);
2609
if (dstIndexVarSymRef)
2610
dstIvNeedsUpdate = ivIncrementedBeforeBoolTableExit(comp, boolTableNode, block, dstIndexVarSymRef);
2611
}
2612
2613
int termchar;
2614
int stopchar = -1;
2615
if (comp->cg()->getSupportsArrayTranslateTROTNoBreak()||comp->cg()->getSupportsArrayTranslateTROT())
2616
{
2617
//for b2s on X (ISO, ASCII) and P(ISO)
2618
bool foundLoopToReduce = false;
2619
termchar = 0; //value of 0, needed by arraytranslateEvaluator to decide between TROT and TROTNoBreak versions.
2620
if (!isOutputChar)
2621
{
2622
traceMsg(comp, "failed because of reason 1 %\n");
2623
return false;
2624
}
2625
if (comp->cg()->getSupportsArrayTranslateTROTNoBreak())
2626
{
2627
foundLoopToReduce = true;
2628
for (int i = 0; i < 256; i++)
2629
{
2630
if (tmpTable[i] != 0)
2631
foundLoopToReduce = false;
2632
}
2633
2634
if (foundLoopToReduce)
2635
termchar = TERMCHAR; //It needs to be greater than zero, dummy termination char otherwise, i.e., it's not gonna be used,
2636
}
2637
if (!foundLoopToReduce && comp->cg()->getSupportsArrayTranslateTROT()) //try ascii
2638
{
2639
foundLoopToReduce = true;
2640
for (int i = 0; i < 256; i++)
2641
{
2642
bool excluded = tmpTable[i] != 0;
2643
bool nonASCII = i >= 128;
2644
if (excluded != nonASCII)
2645
foundLoopToReduce = false;
2646
}
2647
2648
if (foundLoopToReduce)
2649
termchar = 0; //to distinguish between ISO and ASCII when evaluating the node.
2650
}
2651
//
2652
if (!foundLoopToReduce)
2653
{
2654
traceMsg(comp, "failed because of reason 2\n");
2655
return false;
2656
}
2657
tableNode = TR::Node::create(baseRepNode, TR::iconst, 0, 0); //dummy table node, it's not gonna be used
2658
2659
if (termchar != 0)
2660
{
2661
// This is ISO 8859-1. The decode helpers accept all input bytes, and
2662
// they zero-extend each byte into a char. While that's the right way
2663
// to decode ISO 8859-1, it may not be what the loop asks us to do.
2664
if (isSignExtending)
2665
{
2666
traceMsg(comp,
2667
"Bailing CISCTransform2CopyingTROx due to sign-extension\n");
2668
return false;
2669
}
2670
}
2671
}
2672
else
2673
{
2674
//SIMD or TRxx
2675
if (isOutputChar)
2676
{
2677
//b2c
2678
termchar = TERMCHAR;
2679
uint16_t table[256];
2680
2681
bool isSIMDPossible = genSIMD && !isSignExtending;
2682
if (isSIMDPossible) {
2683
//SIMD possible only if we have consecutive chars, and no ranges
2684
for (int i = 0; i < 256; i++) {
2685
if (tmpTable[i] == 0) {
2686
if (stopchar != (i-1)) {
2687
isSIMDPossible = false;
2688
break;
2689
}
2690
stopchar++;
2691
}
2692
}
2693
2694
//case all are non-valid chars
2695
if (stopchar == -1 )
2696
isSIMDPossible = false;
2697
}
2698
2699
if (isSIMDPossible) {
2700
tableNode = TR::Node::create(baseRepNode, TR::aconst, 0, 0); //dummy table node, it's not gonna be used
2701
} else if (!genTRxx){
2702
traceMsg(comp, "Bailing CISCTransform2CopyingTROx: b2c - no proper evaluator available\n");
2703
return false;
2704
} else {
2705
for (int i = 0; i < 256; i++)
2706
{
2707
uint8_t excluded = tmpTable[i];
2708
uint16_t *entry = &table[i];
2709
if (excluded)
2710
*entry = TERMCHAR;
2711
else if (isSignExtending)
2712
*entry = (int8_t)i; // sign-extends up from 8-bit
2713
else
2714
*entry = i;
2715
}
2716
tableNode = createTableLoad(comp, baseRepNode, 8, 16, table, disptrace);
2717
}
2718
}
2719
else
2720
{
2721
//b2b
2722
termchar = -1;
2723
for (int i = 0; i < 256; i++)
2724
{
2725
uint8_t u8 = tmpTable[i];
2726
if (u8)
2727
{
2728
if (termchar < 0) termchar = i;
2729
tmpTable[i] = termchar;
2730
}
2731
else
2732
{
2733
tmpTable[i] = i;
2734
}
2735
}
2736
if (termchar < 0)
2737
{
2738
traceMsg(comp, "No terminating character found. Abandoning reduction.\n");
2739
return false;
2740
}
2741
tableNode = createTableLoad(comp, baseRepNode, 8, 8, tmpTable, disptrace);
2742
}
2743
}
2744
2745
// find the target node of icmpge
2746
TR_CISCNode *icmpgeCISCnode = NULL;
2747
TrNodeInfo *icmpgeRepInfo = NULL;
2748
TR::Node *lenRepNode = NULL;
2749
List<TR_CISCNode> *listT = P2T + P->getImportantNode(1)->getID(); // ificmpge
2750
TR_CISCNode *lenNode;
2751
if (listT->isSingleton())
2752
{
2753
icmpgeCISCnode = listT->getListHead()->getData();
2754
lenNode = icmpgeCISCnode->getChild(1);
2755
}
2756
else
2757
{
2758
ListIterator<TR_CISCNode> li(listT);
2759
TR_CISCNode *n;
2760
lenNode = NULL;
2761
// find icmpge in the candidate region
2762
for (n = li.getFirst(); n; n = li.getNext())
2763
{
2764
if (trans->getCandidateRegion()->isIncluded(n))
2765
{
2766
if (icmpgeCISCnode != NULL)
2767
{
2768
if (disptrace)
2769
traceMsg(comp, "Bailing CISCTransform2CopyingTROx: multiple loop tests: %d and %d\n", icmpgeCISCnode->getID(), n->getID());
2770
return false;
2771
}
2772
icmpgeCISCnode = n;
2773
lenNode = n->getChild(1);
2774
}
2775
}
2776
TR_ASSERT(lenNode != NULL, "error!");
2777
}
2778
bool isDecrement;
2779
int32_t modLength;
2780
if (!testExitIF(icmpgeCISCnode->getOpcode(), &isDecrement, &modLength)) return false;
2781
if (isDecrement) return false;
2782
TR_ASSERT(modLength == 0 || modLength == 1, "error");
2783
icmpgeRepInfo = icmpgeCISCnode->getHeadOfTrNodeInfo();
2784
lenRepNode = createLoad(lenNode->getHeadOfTrNodeInfo()->_node);
2785
2786
// Modify array header constant if necessary
2787
TR::Node *constLoad;
2788
if (trans->getOffsetOperand1())
2789
{
2790
constLoad = modifyArrayHeaderConst(comp, inputNode, trans->getOffsetOperand1());
2791
TR_ASSERT(constLoad, "Not implemented yet");
2792
if (disptrace) traceMsg(comp,"The array header const of inputNode %p is modified. (offset=%d)\n", inputNode, trans->getOffsetOperand1());
2793
}
2794
if (trans->getOffsetOperand2())
2795
{
2796
int32_t offset = trans->getOffsetOperand2() * (isOutputChar ? 2 : 1);
2797
constLoad = modifyArrayHeaderConst(comp, outputNode, offset);
2798
TR_ASSERT(constLoad, "Not implemented yet");
2799
if (disptrace) traceMsg(comp,"The array header const of outputNode %p is modified. (offset=%d)\n", outputNode, offset);
2800
}
2801
2802
// Prepare the arraytranslate node
2803
TR::Node * indexNode = TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef);
2804
TR::Node * lenTmpNode = createOP2(comp, TR::isub, lenRepNode, indexNode);
2805
if (modLength) lenTmpNode = createOP2(comp, TR::isub, lenTmpNode, TR::Node::create(indexRepNode, TR::iconst, 0, -modLength));
2806
TR::Node * lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), lenTmpNode);
2807
TR::Node * termCharNode = TR::Node::create( baseRepNode, TR::iconst, 0, termchar);
2808
TR::Node * stopCharNode = TR::Node::create( baseRepNode, TR::iconst, 0, stopchar);
2809
2810
TR::Node * translateNode = TR::Node::create(trNode, TR::arraytranslate, 6);
2811
translateNode->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayTranslateSymbol());
2812
translateNode->setAndIncChild(0, inputNode);
2813
translateNode->setAndIncChild(1, outputNode);
2814
translateNode->setAndIncChild(2, tableNode);
2815
translateNode->setAndIncChild(3, termCharNode);
2816
translateNode->setAndIncChild(4, lengthNode);
2817
translateNode->setAndIncChild(5, stopCharNode);
2818
2819
translateNode->setSourceIsByteArrayTranslate(true);
2820
translateNode->setTargetIsByteArrayTranslate(!isOutputChar);
2821
translateNode->setTermCharNodeIsHint(false);
2822
translateNode->setSourceCellIsTermChar(false);
2823
translateNode->setTableBackedByRawStorage(true);
2824
TR::SymbolReference * translateTemp = comp->getSymRefTab()->
2825
createTemporary(comp->getMethodSymbol(), TR::Int32);
2826
TR::Node * topOfTranslateNode = TR::Node::createStore(translateTemp, translateNode);
2827
2828
// prepare nodes that add the number of elements (which was translated) into the induction variables
2829
2830
TR::Node * addCountNode = createOP2(comp, TR::iadd, indexNode->duplicateTree(), translateNode);
2831
if (ivNeedsUpdate)
2832
addCountNode = TR::Node::create(TR::iadd, 2, addCountNode, TR::Node::iconst(indexNode, 1));
2833
2834
TR::Node * indVarUpdateNode = TR::Node::createStore(indexVarSymRef, addCountNode);
2835
TR::TreeTop * indVarUpdateTreeTop = TR::TreeTop::create(comp, indVarUpdateNode);
2836
2837
TR::TreeTop * dstIndVarUpdateTreeTop = NULL;
2838
// update the derived induction variable accordingly as well
2839
//
2840
if (dstIndexRepNode)
2841
{
2842
// find the store corresponding to the derived induction variable
2843
//
2844
TR_CISCNode *loopTest = P->getImportantNode(1);
2845
ListIterator<TR_CISCNode> ni(P->getNodes());
2846
TR_CISCNode *jstore = NULL;
2847
TR::Node *dstIVStore = NULL;
2848
for (TR_CISCNode *n = ni.getFirst(); n; n = ni.getNext())
2849
{
2850
if (n->getNumSuccs() >= 1 &&
2851
n->getSucc(0) &&
2852
(n->getSucc(0)->getID() == loopTest->getID()))
2853
{
2854
jstore = n;
2855
break;
2856
}
2857
2858
}
2859
if (jstore)
2860
{
2861
///traceMsg(comp, "found jstore %p to be %d\n", jstore, jstore->getID());
2862
TR_CISCNode *matchJ = trans->getP2TRepInLoop(jstore);
2863
if (matchJ)
2864
{
2865
///traceMsg(comp, "found matching jstore %p to be %d\n", matchJ, matchJ->getID());
2866
///traceMsg(comp, "actual store node is %p\n", matchJ->getHeadOfTrNodeInfo()->_node);
2867
dstIVStore = matchJ->getHeadOfTrNodeInfo()->_node;
2868
}
2869
}
2870
2871
if (dstIVStore &&
2872
dstIVStore->getOpCode().hasSymbolReference() &&
2873
dstIVStore->getSymbolReference() == dstIndexVarSymRef)
2874
{
2875
// j = j + 1 (pattern=1)
2876
// final value j_final = j_start + arraytranslate + needsUpdate ? 1 : 0
2877
// or
2878
// j = i + offset (pattern=0)
2879
// final value j_final = i_final + offset (i_final has already been emitted in the previous TT)
2880
//
2881
dstIVStore = dstIVStore->duplicateTree();
2882
TR::Node * dstIndVarUpdateNode = NULL;
2883
if (pattern == 1)
2884
{
2885
TR::Node *dstAddCountNode = createOP2(comp, TR::iadd,
2886
TR::Node::createLoad(dstIndexRepNode, dstIndexVarSymRef),
2887
translateNode);
2888
if (dstIvNeedsUpdate)
2889
dstAddCountNode = TR::Node::create(TR::iadd, 2,
2890
dstAddCountNode,
2891
TR::Node::iconst(dstAddCountNode, 1));
2892
2893
2894
dstIndVarUpdateNode = TR::Node::createStore(dstIndexVarSymRef, dstAddCountNode);
2895
}
2896
else if (pattern == 0)
2897
{
2898
TR::Node *firstChild = dstIVStore->getFirstChild();
2899
if (firstChild->getOpCode().isAdd() || firstChild->getOpCode().isSub())
2900
{
2901
TR::Node *ivLoad = firstChild->getFirstChild();
2902
if (!ivLoad->getOpCode().hasSymbolReference() ||
2903
(ivLoad->getSymbolReference() != indexVarSymRef))
2904
{
2905
ivLoad->recursivelyDecReferenceCount();
2906
firstChild->setAndIncChild(0, TR::Node::createLoad(indexRepNode, indexVarSymRef));
2907
}
2908
}
2909
dstIndVarUpdateNode = dstIVStore;
2910
}
2911
if (dstIndVarUpdateNode)
2912
dstIndVarUpdateTreeTop = TR::TreeTop::create(comp, dstIndVarUpdateNode);
2913
}
2914
}
2915
2916
// create Nodes if there are multiple exit points.
2917
TR::Node *icmpgeNode = NULL;
2918
TR::TreeTop *failDest = NULL;
2919
TR::TreeTop *okDest = NULL;
2920
TR::Block *compensateBlock0 = NULL;
2921
TR::Block *compensateBlock1 = NULL;
2922
if (isNeedGenIcmpge)
2923
{
2924
if (disptrace) traceMsg(comp, "Now assuming that all exits of booltable are identical and the exit of icmpge points different.\n");
2925
2926
TR_ASSERT(icmpgeRepInfo, "Not implemented yet"); // current restriction
2927
okDest = retSameExit;
2928
failDest = icmpgeCISCnode->getDestination();
2929
// create two empty blocks for inserting compensation code (base[index] and base[index-1]) prepared by moveStoreOutOfLoopForward()
2930
if (isCompensateCode)
2931
{
2932
compensateBlock1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
2933
compensateBlock0 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
2934
compensateBlock0->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, okDest)));
2935
compensateBlock1->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest)));
2936
okDest = compensateBlock0->getEntry();
2937
failDest = compensateBlock1->getEntry();
2938
}
2939
TR_ASSERT(okDest != NULL && failDest != NULL && okDest != failDest, "error!");
2940
2941
// It actually generates "ificmplt" (NOT ificmpge!) in order to suppress a redundant goto block.
2942
icmpgeNode = TR::Node::createif(TR::ificmplt,
2943
TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef),
2944
lenRepNode,
2945
okDest);
2946
}
2947
2948
// Insert nodes and maintain the CFG
2949
if (additionHigh)
2950
{
2951
TR_CISCNode *highCISCNode;
2952
TR_CISCNode *loadResult = trans->getP2TRepInLoop(P->getImportantNode(0)->getChild(0));
2953
// Guaranteed above
2954
TR_ASSERT(additionHigh->getChild(0) == loadResult || additionHigh->getChild(1) == loadResult, "error!");
2955
highCISCNode = (additionHigh->getChild(0) == loadResult) ? additionHigh->getChild(1) :
2956
additionHigh->getChild(0);
2957
List<TR::Node> guardList(comp->trMemory());
2958
guardList.add(TR::Node::createif(TR::ificmpne, convertStoreToLoad(comp, highCISCNode->getHeadOfTrNodeInfo()->_node),
2959
TR::Node::create(lengthNode, TR::iconst, 0, 0)));
2960
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lenTmpNode->duplicateTree(), &guardList);
2961
}
2962
else
2963
{
2964
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lenTmpNode->duplicateTree());
2965
}
2966
2967
// Create the fast path code
2968
block = trans->insertBeforeNodes(block);
2969
block->append(TR::TreeTop::create(comp, topOfTranslateNode));
2970
2971
block->append(indVarUpdateTreeTop);
2972
//block->append(indVarIncTreeTop);
2973
if (dstIndVarUpdateTreeTop) block->append(dstIndVarUpdateTreeTop);
2974
block = trans->insertAfterNodes(block);
2975
2976
if (isNeedGenIcmpge)
2977
{
2978
block->append(TR::TreeTop::create(comp, icmpgeNode));
2979
if (isCompensateCode)
2980
{
2981
TR::CFG *cfg = comp->getFlowGraph();
2982
cfg->setStructure(NULL);
2983
TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();
2984
TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();
2985
compensateBlock0 = trans->insertAfterNodesIdiom(compensateBlock0, 0, true); // ch = base[index]
2986
compensateBlock1 = trans->insertAfterNodesIdiom(compensateBlock1, 1, true); // ch = base[index-1]
2987
cfg->insertBefore(compensateBlock0, orgNextBlock);
2988
cfg->insertBefore(compensateBlock1, compensateBlock0);
2989
cfg->join(block, compensateBlock1);
2990
}
2991
}
2992
else if (isCompensateCode)
2993
{
2994
block = trans->insertAfterNodesIdiom(block, 0); // ch = base[index]
2995
}
2996
2997
// set successor edge(s) to the original block
2998
if (!isNeedGenIcmpge)
2999
{
3000
trans->setSuccessorEdge(block, target);
3001
}
3002
else
3003
{
3004
trans->setSuccessorEdges(block,
3005
failDest->getEnclosingBlock(),
3006
okDest->getEnclosingBlock());
3007
}
3008
3009
return true;
3010
}
3011
3012
/**
3013
* Determine whether the 16-bit output values are sign- or zero-extended.
3014
*
3015
* Loops transformed by CISCTransform2CopyingTROx(TR_CISCTransformer*) are
3016
* loops that copy values from an input <tt>byte[]</tt> to an output
3017
* <tt>byte[]</tt> or <tt>char[]</tt>, When the output goes into a
3018
* <tt>char[]</tt>, the output values are never identical to the input values,
3019
* because they are wider. So each is the result of some integer conversion,
3020
* effectively like this:
3021
*
3022
\verbatim
3023
dest[j] = convert(src[i])
3024
\endverbatim
3025
*
3026
* In order to correctly transform the loop, it's important to know the
3027
* conversion operation. This function analyzes the loop to determine whether
3028
* the conversion is known to be a sign-extension (\c TR_yes), known to be a
3029
* zero-extension (\c TR_no), or neither (\c TR_maybe).
3030
*
3031
* Note that a result of \c TR_maybe necessarily prevents the transformation
3032
* from succeeding. A result of \c TR_no allows the transformation to proceed,
3033
* since zero-extension was previously the tacit assumption. For contrast, an
3034
* effort is made to transform sign-extending loops (\c TR_yes), but doing so
3035
* is not always possible, even in cases where the corresponding zero-extending
3036
* loop can be transformed.
3037
*
3038
* \param[in] trans The optimization pass object.
3039
* \return \c TR_yes for sign-extension, \c TR_no for zero-extension, or \c
3040
* TR_maybe for unknown/neither.
3041
*/
3042
static TR_YesNoMaybe
3043
isSignExtendingCopyingTROx(TR_CISCTransformer *trans)
3044
{
3045
TR_CISCGraph *P = trans->getP();
3046
TR::Compilation *comp = trans->comp();
3047
3048
TR_CISCNode *patArrStore = P->getImportantNode(3);
3049
TR_CISCNode *patStoreConv = patArrStore->getChild(1);
3050
TR_ASSERT(
3051
patStoreConv->getOpcode() == TR_conversion
3052
|| patStoreConv->getIlOpCode().isConversion(),
3053
"isSignExtendingCopyingTROx: pattern store conversion not found\n");
3054
3055
TR_CISCNode *patLoadConv = patStoreConv->getChild(0);
3056
// In CopyingTROx(*), the child is an optional iadd, but not in
3057
// CopyingTROTInduction1 or CopyingTROOSpecial.
3058
if (patLoadConv->getOpcode() == TR::iadd)
3059
patLoadConv = patLoadConv->getChild(0);
3060
3061
TR_ASSERT(
3062
patLoadConv->getOpcode() == TR_conversion
3063
|| patLoadConv->getIlOpCode().isConversion(),
3064
"isSignExtendingCopyingTROx: pattern load conversion not found\n");
3065
3066
TR_CISCNode *tgtStoreConv = trans->getP2TRepInLoop(patStoreConv);
3067
TR_CISCNode *tgtLoadConv = trans->getP2TRepInLoop(patLoadConv);
3068
TR_ASSERT(
3069
tgtStoreConv != NULL || tgtLoadConv != NULL,
3070
"isSignExtendingCopyingTROx: converted from byte to char without "
3071
"any conversions\n");
3072
3073
TR::Node *storeConv = NULL;
3074
if (tgtStoreConv != NULL)
3075
storeConv = tgtStoreConv->getHeadOfTrNodeInfo()->_node;
3076
3077
TR::Node *loadConv = NULL;
3078
if (tgtLoadConv != NULL)
3079
loadConv = tgtLoadConv->getHeadOfTrNodeInfo()->_node;
3080
3081
if (storeConv == NULL || loadConv == NULL) // only one conversion
3082
{
3083
TR::Node *loneConv = loadConv != NULL ? loadConv : storeConv;
3084
TR::ILOpCode op = loneConv->getOpCode();
3085
TR_ASSERT(
3086
op.isZeroExtension() || op.isSignExtension(),
3087
"isSignExtendingCopyingTROx: lone conversion not an extension\n");
3088
return op.isSignExtension() ? TR_yes : TR_no;
3089
}
3090
3091
// Two conversions.
3092
TR::ILOpCode firstOp = loadConv->getOpCode();
3093
if (!firstOp.isInteger() && !firstOp.isUnsigned())
3094
{
3095
traceMsg(comp,
3096
"isSignExtendingCopyingTROx: conversion through non-integer type\n");
3097
return TR_maybe;
3098
}
3099
3100
// The first conversion has to be a (zero- or sign-) extension, because Int8
3101
// is the smallest available integer type.
3102
TR_ASSERT(
3103
firstOp.isZeroExtension() || firstOp.isSignExtension(),
3104
"isSignExtendingCopyingTROx: first conversion not an extension\n");
3105
3106
// If it produces a 16-bit integer directly, the second would have to be a
3107
// "conversion" from short to short.
3108
TR_ASSERT(
3109
!firstOp.isShort(),
3110
"isSignExtendingCopyingTROx: first conversion directly to short\n");
3111
3112
// So the intermediate type is an integer type longer than 16-bit, and the
3113
// second conversion has to be a truncation to 16 bits. The net effect is
3114
// either a zero- or sign-extension depending only on the first conversion.
3115
return firstOp.isSignExtension() ? TR_yes : TR_no;
3116
}
3117
3118
bool
3119
CISCTransform2CopyingTROxAddDest1(TR_CISCTransformer *trans)
3120
{
3121
trans->setOffsetOperand2(1); // add offset of destination with 1
3122
return CISCTransform2CopyingTROx(trans);
3123
}
3124
3125
/****************************************************************************************
3126
Corresponding Java-like Pseudo Program
3127
int v1, v3, end;
3128
byte v0[ ];
3129
byte v2[ ];
3130
while(true){
3131
if (booltable(v0[v1])) break;
3132
v2[v3] = v0[v1];
3133
v1++;
3134
v3++;
3135
if (v1 >= end) break;
3136
}
3137
3138
Note 1: It allows that variables v1 and v3 are identical.
3139
****************************************************************************************/
3140
TR_PCISCGraph *
3141
makeCopyingTROOSpecialGraph(TR::Compilation *c, int32_t ctrl)
3142
{
3143
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "CopyingTROOSpecial", 0, 16);
3144
/********************************************************************** opc id dagId #cfg #child other/pred/children */
3145
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v0); // src array base
3146
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0); tgt->addNode(v1); // src array index
3147
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 1); tgt->addNode(v2); // dst array base
3148
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v3); // dst array index
3149
TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(idx0);
3150
TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 1); tgt->addNode(idx1);
3151
TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),7, 0, 0); tgt->addNode(vorc); // length
3152
TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 0); tgt->addNode(cmah); // array header
3153
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, -1); tgt->addNode(cm1);
3154
TR_PCISCNode *lc1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 4, 1); // element size for input
3155
TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_allconst, TR::NoType, tgt->incNumNodes(),3, 0, 0); tgt->addNode(mulFactor); // Multiply Factor
3156
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
3157
TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, ent, TR::bloadi, TR::Int8, v0, idx0, cmah, lc1);
3158
TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);
3159
TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, n3, n3); tgt->addNode(n4); // optional
3160
TR_PCISCNode *n5 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, n4, TR::bloadi, TR::Int8, v0, idx0, cmah, mulFactor);
3161
TR_PCISCNode *nn0 = createIdiomArrayStoreInLoop(tgt, ctrl|CISCUtilCtl_NoConversion, 1, n5, TR::bstorei, TR::Int8, v2, idx1, cmah, mulFactor, n5);
3162
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, nn0, v1, cm1);
3163
TR_PCISCNode *nn1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n6, v3, cm1); tgt->addNode(nn1);
3164
TR_PCISCNode *nn2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2s, TR::Int16, tgt->incNumNodes(), 1, 1, 1, nn1, nn1); tgt->addNode(nn2); // optional
3165
TR_PCISCNode *nn3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::s2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nn2, nn2); tgt->addNode(nn3); // optional
3166
TR_PCISCNode *nn6 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nn3, nn3, v3); tgt->addNode(nn6);
3167
TR_PCISCNode *n7 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, nn6, v1, vorc); tgt->addNode(n7);
3168
TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n8);
3169
3170
n4->setSucc(1, n8);
3171
n7->setSuccs(ent->getSucc(0), n8);
3172
3173
n4->setIsOptionalNode();
3174
nn2->setIsOptionalNode();
3175
nn3->setIsOptionalNode();
3176
3177
n3->setIsChildDirectlyConnected();
3178
n7->setIsChildDirectlyConnected();
3179
3180
tgt->setSpecialCareNode(0, n4); // TR_booltable
3181
tgt->setEntryNode(ent);
3182
tgt->setExitNode(n8);
3183
tgt->setImportantNodes(n4, n7, n2, nn0, NULL);
3184
tgt->setNumDagIds(14);
3185
tgt->createInternalData(1);
3186
3187
tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);
3188
tgt->setTransformer(CISCTransform2CopyingTROx);
3189
tgt->setInhibitBeforeVersioning();
3190
tgt->setAspects(isub|sameTypeLoadStore, ILTypeProp::Size_1, ILTypeProp::Size_1);
3191
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
3192
tgt->setMinCounts(1, 2, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
3193
tgt->setHotness(warm, false);
3194
static char *versionLengthStr = feGetEnv("TR_CopyingTROOSpecialGraph_versionLength");
3195
static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 19);
3196
tgt->setVersionLength(versionLength); // depending on each architecture
3197
tgt->setPatternType(1); // dest. induction variable is updated by incrementing
3198
return tgt;
3199
}
3200
3201
3202
/****************************************************************************************
3203
Corresponding Java-like pseudocode
3204
3205
int i, j, end;
3206
byte byteArray[ ];
3207
char charArray[ ];
3208
while(true){
3209
char T = (char)byteArray[i];
3210
if (booltable(T)) break;
3211
(T = T + high;) // optional
3212
charArray[j] = T;
3213
i++;
3214
j++;
3215
if (i >= end) break;
3216
}
3217
3218
Note 1: Idiom allows variables i and j to be identical.
3219
Note 2: The optional addition "T = T + high" is to optimize java/lang/String.<init>([BIII)V.
3220
We will version the loop by "if (high == 0)".
3221
****************************************************************************************/
3222
TR_PCISCGraph *
3223
makeCopyingTROxGraph(TR::Compilation *c, int32_t ctrl, int pattern)
3224
{
3225
TR_ASSERT(pattern == 0 || pattern == 1, "not implemented");
3226
char *name = (char *)TR_MemoryBase::jitPersistentAlloc(16);
3227
sprintf(name, "CopyingTROx(%d)",pattern);
3228
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), name, 0, 16);
3229
/**************************************************************************** opc id dagId #cfg #child other/pred/children */
3230
TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(),16, 0, 0, 0);
3231
tgt->addNode(byteArray); // src array base
3232
TR_PCISCNode *i = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(i); // src array index
3233
TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(),14, 0, 0, 1); tgt->addNode(charArray); // dst array base
3234
TR_PCISCNode *j = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(j); // dst array index
3235
TR_PCISCNode *idx0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),12, 0, 0, 0); tgt->addNode(idx0);
3236
TR_PCISCNode *idx1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),11, 0, 0, 1); tgt->addNode(idx1);
3237
TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_quasiConst2, TR::NoType, tgt->incNumNodes(),10, 0, 0); tgt->addNode(end); // length
3238
TR_PCISCNode *high = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),9, 0, 0); tgt->addNode(high); // optional
3239
TR_PCISCNode *aHeader0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(aHeader0); // array header
3240
TR_PCISCNode *aHeader1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(aHeader1); // array header
3241
TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -1); tgt->addNode(increment);
3242
TR_PCISCNode *lc1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 5, 1); // element size for input
3243
TR_PCISCNode *elemSize = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_allconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0); tgt->addNode(elemSize); // Multiply Factor
3244
TR_PCISCNode *offset = NULL;
3245
if (pattern == 0)
3246
{
3247
offset = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),3, 0, 0); tgt->addNode(offset); // optional
3248
}
3249
TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);
3250
TR_PCISCNode *byteAddr = createIdiomArrayLoadInLoop(tgt, ctrl, 1, entry, TR::bloadi, TR::Int8, byteArray, idx0, aHeader0, lc1);
3251
TR_PCISCNode *b2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, byteAddr, byteAddr);
3252
tgt->addNode(b2iNode);
3253
TR_PCISCNode *exitTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, b2iNode, b2iNode);
3254
tgt->addNode(exitTest); // optional
3255
TR_PCISCNode *add = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iadd, TR::Int32, tgt->incNumNodes(), 1, 1, 2, exitTest, b2iNode, high); tgt->addNode(add); // optional
3256
TR_PCISCNode *charAddr = createIdiomArrayStoreInLoop(tgt, ctrl, 1, add, TR_ibcstore, TR::NoType, charArray, idx1, aHeader1, elemSize, add);
3257
TR_PCISCNode *iStore = createIdiomDecVarInLoop(tgt, ctrl, 1, charAddr, i, increment);
3258
TR_PCISCNode *jStore = NULL;
3259
switch(pattern)
3260
{
3261
case 0:
3262
jStore = createIdiomIncVarInLoop(tgt, ctrl, 1, iStore, j, i, offset); // j = i + offset; (optional)
3263
break;
3264
case 1:
3265
jStore = createIdiomDecVarInLoop(tgt, ctrl, 1, iStore, j, increment); // j = j + 1; (optional)
3266
break;
3267
default:
3268
TR_ASSERT(0, "not implemented!");
3269
return NULL;
3270
}
3271
TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, jStore, i, end); tgt->addNode(loopTest);
3272
TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);
3273
3274
exitTest->setSucc(1, exit);
3275
loopTest->setSuccs(entry->getSucc(0), exit);
3276
3277
jStore->getChild(0)->setIsOptionalNode();
3278
jStore->setIsOptionalNode();
3279
j->setIsOptionalNode();
3280
3281
exitTest->setIsOptionalNode();
3282
add->setIsOptionalNode();
3283
high->setIsOptionalNode();
3284
if (offset) offset->setIsOptionalNode();
3285
3286
b2iNode->setIsChildDirectlyConnected();
3287
loopTest->setIsChildDirectlyConnected();
3288
3289
tgt->setSpecialCareNode(0, exitTest); // TR_booltable
3290
tgt->setEntryNode(entry);
3291
tgt->setExitNode(exit);
3292
tgt->setImportantNodes(exitTest, loopTest, byteAddr, charAddr, add);
3293
tgt->setNumDagIds(17);
3294
tgt->createInternalData(1);
3295
3296
tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);
3297
tgt->setTransformer(CISCTransform2CopyingTROx);
3298
tgt->setInhibitBeforeVersioning();
3299
tgt->setAspects(isub|mul, ILTypeProp::Size_1, existAccess);
3300
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
3301
tgt->setMinCounts(1, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
3302
tgt->setHotness(warm, false);
3303
static char *versionLengthStr = feGetEnv("TR_CopyingTROxGraph_versionLength");
3304
static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 8);
3305
tgt->setVersionLength(versionLength); // depending on each architecture
3306
3307
tgt->setPatternType(pattern);
3308
3309
return tgt;
3310
}
3311
3312
3313
/****************************************************************************************
3314
Corresponding Java-like Pseudo Program
3315
int v1, end;
3316
int v3; // optional
3317
int v4; // v4 usually has the value of "v3 - v1".
3318
byte v0[ ];
3319
char v2[ ];
3320
while(true){
3321
char T = (char)v0[v1];
3322
if (booltable(T)) break;
3323
v2[v1+v4] = T;
3324
v1++;
3325
v3 = v1+v4; // optional
3326
if (v1 >= end) break;
3327
}
3328
****************************************************************************************/
3329
TR_PCISCGraph *
3330
makeCopyingTROTInduction1Graph(TR::Compilation *c, int32_t ctrl, int32_t pattern)
3331
{
3332
TR_ASSERT(pattern == 0 || pattern == 1, "not implemented");
3333
char *name = (char *)TR_MemoryBase::jitPersistentAlloc(26);
3334
sprintf(name, "CopyingTROTInduction1(%d)",pattern);
3335
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), name, 0, 16);
3336
/********************************************************************* opc id dagId #cfg #child other/pred/children */
3337
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v0); // src array base
3338
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0); tgt->addNode(v1); // src array index
3339
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 1); tgt->addNode(v2); // dst array base
3340
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v3); // actual dst array index (optional)
3341
TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 2); tgt->addNode(v4); // difference of dst array index from src array index
3342
TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),8, 0, 0); tgt->addNode(vorc); // length
3343
TR_PCISCNode *cmah0=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(cmah0); // array header
3344
TR_PCISCNode *cmah1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 1); tgt->addNode(cmah1); // array header
3345
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, -1); tgt->addNode(cm1);
3346
TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 4, 1); // element size
3347
TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 2); // element size
3348
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
3349
TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, ent, TR::bloadi, TR::Int8, v0, v1, cmah0, c1);
3350
TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);
3351
TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, n3, n3); tgt->addNode(n4); // optional
3352
TR_PCISCNode *n45 = (pattern == 1) ? createIdiomDecVarInLoop(tgt, ctrl, 1, n4, v1, cm1) : n4;
3353
TR_PCISCNode *n5 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iadd, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n45, v1, v4); tgt->addNode(n5);
3354
TR_PCISCNode *nn0 = createIdiomCharArrayStoreInLoop(tgt, ctrl, 1, n5, v2, n5, cmah1, c2, n3);
3355
TR_PCISCNode *n6 = (pattern == 0) ? createIdiomDecVarInLoop(tgt, ctrl, 1, nn0, v1, cm1) : nn0;
3356
TR_PCISCNode *op0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n6, n5, cm1); tgt->addNode(op0); // (optional)
3357
TR_PCISCNode *op1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore , TR::Int32, tgt->incNumNodes(), 1, 1, 2, op0,op0, v3); tgt->addNode(op1); // (optional)
3358
TR_PCISCNode *n7 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, op1, v1, vorc); tgt->addNode(n7);
3359
TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n8);
3360
3361
n4->setSucc(1, n8);
3362
n7->setSuccs(ent->getSucc(0), n8);
3363
3364
n4->setIsOptionalNode();
3365
v3->setIsOptionalNode();
3366
op0->setIsOptionalNode();
3367
op1->setIsOptionalNode();
3368
3369
op1->setIsChildDirectlyConnected();
3370
n3->setIsChildDirectlyConnected();
3371
n7->setIsChildDirectlyConnected();
3372
3373
tgt->setSpecialCareNode(0, n4); // TR_booltable
3374
tgt->setEntryNode(ent);
3375
tgt->setExitNode(n8);
3376
tgt->setImportantNodes(n4, n7, n2, nn0, NULL);
3377
tgt->setNumDagIds(14);
3378
tgt->createInternalData(1);
3379
3380
tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);
3381
tgt->setTransformer(pattern == 0 ? CISCTransform2CopyingTROx : CISCTransform2CopyingTROxAddDest1);
3382
tgt->setInhibitBeforeVersioning();
3383
tgt->setAspects(isub|mul, ILTypeProp::Size_1, ILTypeProp::Size_2);
3384
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
3385
tgt->setMinCounts(1, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
3386
tgt->setHotness(warm, false);
3387
static char *versionLengthStr = feGetEnv("TR_CopyingTROTInduction1Graph_versionLength");
3388
static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 8);
3389
tgt->setVersionLength(versionLength); // depending on each architecture
3390
return tgt;
3391
}
3392
3393
3394
//////////////////////////////////////////////////////////////////////////
3395
//////////////////////////////////////////////////////////////////////////
3396
//////////////////////////////////////////////////////////////////////////
3397
3398
//*****************************************************************************************
3399
// IL code generation for exploiting the TROT instruction
3400
// This is the case where the function table is prepared by the user program.
3401
// Input: ImportantNodes(0) - booltable
3402
// ImportantNodes(1) - ificmpge
3403
// ImportantNodes(2) - address of the source array
3404
// ImportantNodes(3) - address of the destination array
3405
//*****************************************************************************************
3406
#define TERMBYTE (0x0B) // Vertical Tab is rarely used, I guess...
3407
bool
3408
CISCTransform2TROTArray(TR_CISCTransformer *trans)
3409
{
3410
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
3411
const bool disptrace = DISPTRACE(trans);
3412
TR::Node *trNode;
3413
TR::TreeTop *trTreeTop;
3414
TR::Block *block;
3415
TR_CISCGraph *P = trans->getP();
3416
List<TR_CISCNode> *P2T = trans->getP2T();
3417
TR::Compilation *comp = trans->comp();
3418
3419
TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");
3420
if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;
3421
3422
trans->findFirstNode(&trTreeTop, &trNode, &block);
3423
if (!block) return false; // cannot find
3424
3425
if (isLoopPreheaderLastBlockInMethod(comp, block))
3426
{
3427
traceMsg(comp, "Bailing CISCTransform2TROTArray due to null TT - might be a preheader in last block of method\n");
3428
return false;
3429
}
3430
3431
TR_CISCNode * inputCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(2));
3432
TR_CISCNode * outputCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(3));
3433
if (!inputCISCNode || !outputCISCNode) return false;
3434
TR::Node * inputNode = inputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();
3435
TR::Node * outputNode = outputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();
3436
3437
TR::Node *baseRepNode, *indexRepNode, *dstBaseRepNode, *dstIndexRepNode, *mapBaseRepNode;
3438
getP2TTrRepNodes(trans, &baseRepNode, &indexRepNode, &dstBaseRepNode, &dstIndexRepNode, &mapBaseRepNode);
3439
TR::Node *cmpRepNode = trans->getP2TRep(P->getImportantNode(1))->getHeadOfTrNodeInfo()->_node;
3440
TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();
3441
TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode ? dstIndexRepNode->getSymbolReference() : NULL;
3442
if (trans->countGoodArrayIndex(indexVarSymRef) == 0) return false;
3443
if (dstIndexVarSymRef == indexVarSymRef)
3444
{
3445
dstIndexRepNode = NULL;
3446
dstIndexVarSymRef = NULL;
3447
}
3448
if (dstIndexVarSymRef)
3449
{
3450
if (trans->countGoodArrayIndex(dstIndexVarSymRef) == 0) return false;
3451
}
3452
TR_ScratchList<TR::Node> variableList(comp->trMemory());
3453
variableList.add(indexRepNode);
3454
if (dstIndexRepNode) variableList.add(dstIndexRepNode);
3455
if (!isIndexVariableInList(inputNode, &variableList) ||
3456
!isIndexVariableInList(outputNode, &variableList))
3457
{
3458
dumpOptDetails(comp, "indices used in array loads %p and %p are not consistent with the induction varaible updates\n", inputNode, outputNode);
3459
return false;
3460
}
3461
TR::Block *target = trans->analyzeSuccessorBlock();
3462
3463
// Prepare arraytranslate node
3464
TR::Node * tableNode = createLoad(mapBaseRepNode);
3465
TR::Node * indexNode = TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef);
3466
TR::Node * lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(),
3467
createOP2(comp, TR::isub, cmpRepNode->getChild(1)->duplicateTree(),
3468
indexNode));
3469
TR_CISCNode *ifeqCiscNode = trans->getP2TRep(P->getImportantNode(0));
3470
TR::Node * termCharNode;
3471
if (ifeqCiscNode)
3472
termCharNode = createLoad(ifeqCiscNode->getHeadOfTrNode()->getChild(1));
3473
else
3474
termCharNode = TR::Node::create(inputNode, TR::iconst, 0, TERMBYTE);
3475
TR::Node * stoppingNode = TR::Node::create( baseRepNode, TR::iconst, 0, 0xffffffff);
3476
3477
3478
TR::Node * translateNode = TR::Node::create(trNode, TR::arraytranslate, 6);
3479
translateNode->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayTranslateSymbol());
3480
translateNode->setAndIncChild(0, inputNode);
3481
translateNode->setAndIncChild(1, outputNode);
3482
translateNode->setAndIncChild(2, tableNode);
3483
translateNode->setAndIncChild(3, termCharNode);
3484
translateNode->setAndIncChild(4, lengthNode);
3485
translateNode->setAndIncChild(5, stoppingNode);
3486
3487
translateNode->setSourceIsByteArrayTranslate(true);
3488
translateNode->setTargetIsByteArrayTranslate(false);
3489
translateNode->setTermCharNodeIsHint(ifeqCiscNode ? false : true);
3490
translateNode->setSourceCellIsTermChar(false);
3491
translateNode->setTableBackedByRawStorage(false);
3492
TR::Node * topOfTranslateNode = TR::Node::create(TR::treetop, 1, translateNode);
3493
TR::Node * lengthTRxx = translateNode;
3494
3495
if (target)
3496
{
3497
// prepare nodes that add the number of elements (which was translated) into the induction variables
3498
3499
/*lengthTRxx = createOP2(comp, TR::isub,
3500
translateNode,
3501
TR::Node::create(translateNode, TR::iconst, 0, -1)); */
3502
}
3503
else
3504
{
3505
// For Multiple Successor Blocks, we have a test character condition in the
3506
// loop, which may lead to a different successor block than the fallthrough.
3507
// We need to be able to distinguish the following two scenarios, which both
3508
// would load the last character in the source array:
3509
// 1. no test character found (translateNode == lengthNode).
3510
// 2. test character found in the last element(translateNode < lengthNode).
3511
// The final IV value is always (IV + translateNode).
3512
// However, under case 1, the element loaded is at index (IV + translateNode - 1).
3513
// Under case 2, the element loaded is at index (IV + translateNode).
3514
// As such, we will subtract 1 in the existing final IV calculation for case 1,
3515
// so that any array accesses will be correctly indexed. The final IV value will
3516
// be increased by 1 again before we hit the exit test.
3517
lengthTRxx = TR::Node::create(TR::isub, 2, translateNode,
3518
TR::Node::create(TR::icmpeq, 2, translateNode,
3519
lengthNode->getOpCodeValue() == TR::i2l ? lengthNode->getChild(0)
3520
: lengthNode));
3521
}
3522
3523
TR::Node * addCountNode = createOP2(comp, TR::iadd, indexNode->duplicateTree(), lengthTRxx);
3524
TR::Node * indVarUpdateNode = TR::Node::createStore(indexVarSymRef, addCountNode);
3525
TR::TreeTop * indVarUpdateTreeTop = TR::TreeTop::create(comp, indVarUpdateNode);
3526
3527
TR::TreeTop * dstIndVarUpdateTreeTop = NULL;
3528
if (dstIndexRepNode)
3529
{
3530
dstIndVarUpdateTreeTop = TR::TreeTop::create(comp, createStoreOP2(comp, dstIndexVarSymRef, TR::iadd,
3531
dstIndexVarSymRef, lengthTRxx, dstIndexRepNode));
3532
}
3533
3534
// Insert nodes and maintain the CFG
3535
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree());
3536
3537
// Create the fast path code
3538
block = trans->insertBeforeNodes(block);
3539
block->append(TR::TreeTop::create(comp, topOfTranslateNode));
3540
block->append(indVarUpdateTreeTop);
3541
if (dstIndVarUpdateTreeTop) block->append(dstIndVarUpdateTreeTop);
3542
block = trans->insertAfterNodes(block);
3543
3544
if (target)
3545
{
3546
// A single successor
3547
trans->setSuccessorEdge(block, target);
3548
}
3549
else
3550
{
3551
// Multiple successors
3552
TR::SymbolReference * translateTemp = comp->getSymRefTab()->
3553
createTemporary(comp->getMethodSymbol(), TR::Int32);
3554
TR_ASSERT(ifeqCiscNode, "Expecting equal CISC node.");
3555
TR::Node *ifeqNode = ifeqCiscNode->getHeadOfTrNode()->duplicateTree();
3556
if (ifeqCiscNode->getOpcode() != ifeqNode->getOpCodeValue())
3557
{
3558
TR::Node::recreate(ifeqNode, (TR::ILOpCodes)ifeqCiscNode->getOpcode());
3559
ifeqNode->setBranchDestination(ifeqCiscNode->getDestination());
3560
}
3561
TR::Node *tempStore = TR::Node::createStore(translateTemp, ifeqNode->getAndDecChild(0));
3562
ifeqNode->setAndIncChild(0, TR::Node::createLoad(ifeqNode, translateTemp));
3563
TR::TreeTop *tempStoreTTop = TR::TreeTop::create(comp, tempStore);
3564
TR::TreeTop *ifeqTTop = TR::TreeTop::create(comp, ifeqNode);
3565
// Fix up the IV value by adding 1 if translateNode == lengthNode (where no test char was found). See comment above.
3566
TR::Node *incIndex = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthTRxx->getChild(1), indexRepNode);
3567
TR::TreeTop *incIndexTTop = TR::TreeTop::create(comp, incIndex);
3568
3569
TR::TreeTop *last = block->getLastRealTreeTop();
3570
last->join(tempStoreTTop);
3571
tempStoreTTop->join(incIndexTTop);
3572
if (dstIndVarUpdateTreeTop)
3573
{
3574
TR::Node * incDstIndex = createStoreOP2(comp, dstIndexVarSymRef, TR::isub, dstIndexVarSymRef, -1, dstIndexRepNode);
3575
TR::TreeTop *incDstIndexTTop = TR::TreeTop::create(comp, incDstIndex);
3576
incIndexTTop->join(incDstIndexTTop);
3577
last = incDstIndexTTop;
3578
}
3579
else
3580
{
3581
last = incIndexTTop;
3582
}
3583
last->join(ifeqTTop);
3584
ifeqTTop->join(block->getExit());
3585
trans->setSuccessorEdges(block,
3586
NULL, // rely on automatic detection
3587
ifeqNode->getBranchDestination()->getEnclosingBlock());
3588
}
3589
3590
return true;
3591
}
3592
3593
3594
/****************************************************************************************
3595
Corresponding Java-like pseudocode
3596
int i, j, end, exitValue;
3597
byte byteArray[ ];
3598
char charArray[ ], map[ ];
3599
while(true){
3600
char c = map[byteArray[i]];
3601
if (c == exitValue) break;
3602
charArray[j] = c;
3603
i++;
3604
j;
3605
if (i >= end) break;
3606
}
3607
3608
3609
Note 1: Idiom allows that variables i and j are identical.
3610
****************************************************************************************/
3611
TR_PCISCGraph *
3612
makeTROTArrayGraph(TR::Compilation *c, int32_t ctrl)
3613
{
3614
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TROTArray", 0, 16);
3615
/************************************************************************** opc id dagId #cfg #child other/pred/children */
3616
TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 16, 0, 0, 0);
3617
tgt->addNode(byteArray); // src array base
3618
TR_PCISCNode *i = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0);
3619
tgt->addNode(i); // src array index
3620
TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1);
3621
tgt->addNode(charArray); // dst array base
3622
TR_PCISCNode *j = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(j); // dst array index
3623
TR_PCISCNode *map = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 2); tgt->addNode(map); // map array base
3624
TR_PCISCNode *idx0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),11, 0, 0, 0); tgt->addNode(idx0);
3625
TR_PCISCNode *idx1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 1); tgt->addNode(idx1);
3626
TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),9, 0, 0); tgt->addNode(end); // length
3627
TR_PCISCNode *exitValue = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),8, 0, 0); tgt->addNode(exitValue);// exitvalue (optional)
3628
TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0);
3629
tgt->addNode(aHeader); // array header constant
3630
TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -1); tgt->addNode(increment);
3631
TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 5, 1); // element size
3632
TR_PCISCNode *elemSize = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 4, 2); // element size
3633
TR_PCISCNode *offset = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),3, 0, 0); tgt->addNode(offset); // optional
3634
TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);
3635
TR_PCISCNode *byteAddr = createIdiomArrayLoadInLoop(tgt, ctrl, 1, entry, TR::bloadi, TR::Int8, byteArray, idx0, aHeader, c1);
3636
TR_PCISCNode *convNode, *mapAddr;
3637
if (ctrl & CISCUtilCtl_64Bit)
3638
{
3639
convNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2l, TR::Int64, tgt->incNumNodes(), 1, 1, 1, byteAddr, byteAddr); tgt->addNode(convNode);
3640
mapAddr = createIdiomCharArrayLoadInLoop(tgt, ctrl | CISCUtilCtl_NoI2L, 1, convNode, map, convNode, aHeader, elemSize);
3641
}
3642
else
3643
{
3644
convNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, byteAddr, byteAddr); tgt->addNode(convNode);
3645
mapAddr = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, convNode, map, convNode, aHeader, elemSize);
3646
}
3647
TR_PCISCNode *c2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, mapAddr, mapAddr); tgt->addNode(c2iNode); // optional
3648
TR_PCISCNode *exitTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpeq, TR::NoType, tgt->incNumNodes(), 1, 2, 2, c2iNode, c2iNode, exitValue); // optional
3649
tgt->addNode(exitTest);
3650
TR_PCISCNode *charAddr = createIdiomCharArrayStoreInLoop(tgt, ctrl, 1, exitTest, charArray, idx1, aHeader, elemSize, c2iNode);
3651
TR_PCISCNode *iStore = createIdiomDecVarInLoop(tgt, ctrl, 1, charAddr, i, increment);
3652
TR_PCISCNode *jStore = createIdiomIncVarInLoop(tgt, ctrl, 1, iStore, j, i, offset); // optional
3653
TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, jStore, i, end); tgt->addNode(loopTest);
3654
TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);
3655
3656
exitTest->setSucc(1, exit);
3657
loopTest->setSuccs(entry->getSucc(0), exit);
3658
3659
jStore->getChild(0)->setIsOptionalNode();
3660
jStore->setIsOptionalNode();
3661
j->setIsOptionalNode();
3662
offset->setIsOptionalNode();
3663
3664
convNode->setIsChildDirectlyConnected();
3665
loopTest->setIsChildDirectlyConnected();
3666
charAddr->setIsChildDirectlyConnected(false);
3667
3668
exitTest->setIsOptionalNode();
3669
exitValue->setIsOptionalNode();
3670
c2iNode->setIsOptionalNode();
3671
c2iNode->getHeadOfParents()->setIsOptionalNode();
3672
3673
tgt->setSpecialCareNode(0, convNode); // TR_booltable
3674
tgt->setEntryNode(entry);
3675
tgt->setExitNode(exit);
3676
tgt->setImportantNodes(exitTest, loopTest, byteAddr->getChild(0), charAddr->getChild(0));
3677
tgt->setNumDagIds(17);
3678
tgt->createInternalData(1);
3679
3680
tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);
3681
tgt->setTransformer(CISCTransform2TROTArray);
3682
tgt->setInhibitBeforeVersioning();
3683
tgt->setAspects(isub|mul, ILTypeProp::Size_1|ILTypeProp::Size_2, ILTypeProp::Size_2);
3684
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
3685
tgt->setMinCounts(1, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
3686
tgt->setHotness(warm, false);
3687
static char *versionLengthStr = feGetEnv("TR_CopyingTRTOInduction1Graph_versionLength");
3688
static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 8);
3689
tgt->setVersionLength(versionLength); // depending on each architecture
3690
return tgt;
3691
}
3692
3693
3694
//////////////////////////////////////////////////////////////////////////
3695
//////////////////////////////////////////////////////////////////////////
3696
//////////////////////////////////////////////////////////////////////////
3697
//*****************************************************************************************
3698
// IL code generation for exploiting the TRTx instruction
3699
// This is the case where the compiler will create the function table by analyzing booltable.
3700
// Input: ImportantNode(0) - booltable
3701
// ImportantNode(1) - ificmpge
3702
// ImportantNode(2) - load of the source array
3703
// ImportantNode(3) - store of the destination array
3704
// ImportantNode(4) - another ificmpxx if exists (optional)
3705
//*****************************************************************************************
3706
bool
3707
CISCTransform2CopyingTRTx(TR_CISCTransformer *trans)
3708
{
3709
const bool disptrace = DISPTRACE(trans);
3710
TR::Node *trNode;
3711
TR::TreeTop *trTreeTop;
3712
TR::Block *block;
3713
TR_CISCGraph *P = trans->getP();
3714
List<TR_CISCNode> *P2T = trans->getP2T();
3715
TR::Compilation *comp = trans->comp();
3716
bool isOutputChar = trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isShort() && trans->getP2TRepInLoop(P->getImportantNode(3))->getIlOpCode().isUnsigned();
3717
bool genTRxx = comp->cg()->getSupportsArrayTranslateTRxx();
3718
bool genSIMD = comp->cg()->getSupportsVectorRegisters() && !comp->getOption(TR_DisableSIMDArrayTranslate);
3719
3720
if (isOutputChar && genSIMD && !genTRxx){
3721
traceMsg(comp, "Bailing CISCTransform2CopyingTRTx : c2c - no proper evaluator available\n");
3722
return false;
3723
}
3724
3725
3726
trans->findFirstNode(&trTreeTop, &trNode, &block);
3727
if (!block)
3728
return false; // cannot find
3729
3730
if (isLoopPreheaderLastBlockInMethod(comp, block))
3731
{
3732
traceMsg(comp, "Bailing CISCTransform2CopyingTRTx due to null TT - might be a preheader in last block of method\n");
3733
return false;
3734
}
3735
3736
TR_CISCNode * inputCISCNode = trans->getP2TRepInLoop(P->getImportantNode(2)->getChild(0));
3737
TR_CISCNode * outputCISCNode = trans->getP2TRepInLoop(P->getImportantNode(3)->getChild(0));
3738
TR::Node * inputNode = inputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();
3739
TR::Node * outputNode = outputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();
3740
3741
TR::Node *baseRepNode, *indexRepNode, *dstBaseRepNode, *dstIndexRepNode;
3742
getP2TTrRepNodes(trans, &baseRepNode, &indexRepNode, &dstBaseRepNode, &dstIndexRepNode);
3743
if (indexRepNode == 0) indexRepNode = dstIndexRepNode;
3744
TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();
3745
TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode ? dstIndexRepNode->getSymbolReference() : NULL;
3746
if (indexVarSymRef == dstIndexVarSymRef)
3747
{
3748
dstIndexRepNode = NULL;
3749
dstIndexVarSymRef = NULL;
3750
}
3751
if (trans->countGoodArrayIndex(indexVarSymRef) == 0 &&
3752
(!dstIndexVarSymRef || trans->countGoodArrayIndex(dstIndexVarSymRef) == 0))
3753
{
3754
if (disptrace) traceMsg(comp, "countGoodArrayIndex failed for %p, %p\n",indexRepNode,dstIndexRepNode);
3755
return false;
3756
}
3757
TR_ScratchList<TR::Node> variableList(comp->trMemory());
3758
variableList.add(indexRepNode);
3759
if (dstIndexRepNode) variableList.add(dstIndexRepNode);
3760
if (!isIndexVariableInList(inputNode, &variableList) ||
3761
!isIndexVariableInList(outputNode, &variableList))
3762
{
3763
dumpOptDetails(comp, "indices used in array loads %p and %p are not consistent with the induction variable updates\n", inputNode, outputNode);
3764
return false;
3765
}
3766
3767
TR::Block *target = trans->analyzeSuccessorBlock();
3768
if (!target) // multiple successors
3769
{
3770
// current restrictions. allow only the case where the number of successors is greater than 3.
3771
if (trans->getNumOfBBlistSucc() > 3)
3772
{
3773
if (disptrace) traceMsg(comp, "trans->getNumOfBBlistSucc() is %d.",trans->getNumOfBBlistSucc());
3774
return false;
3775
}
3776
}
3777
3778
// Check if there is idiom specific node insertion.
3779
// Currently, it is inserted by moveStoreOutOfLoopForward() or reorderTargetNodesInBB()
3780
bool isCompensateCode = !trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1);
3781
3782
// There is an ificmpge node and (multiple successors or need to generate idiom specific node insertion)
3783
bool isNeedGenIcmpge = (!target || isCompensateCode);
3784
3785
TR::Node *tableNode;
3786
uint8_t *tmpTable = (uint8_t*)comp->trMemory()->allocateMemory(65536, stackAlloc);
3787
bool isAllowSourceCellTermChar = false;
3788
3789
int count;
3790
TR::TreeTop *retSameExit = NULL;
3791
if ((count = trans->analyzeCharBoolTable(P->getImportantNode(0), tmpTable, P->getImportantNode(1), &retSameExit)) <= 0)
3792
{
3793
if (disptrace) traceMsg(comp, "trans->analyzeCharBoolTable failed\n");
3794
return false;
3795
}
3796
3797
if (!retSameExit) // all destinations of booltable are not same
3798
{
3799
traceMsg(comp, "Multiple targets for different delimiter checks detected. Abandoning reduction.\n");
3800
return false;
3801
}
3802
3803
// Check to ensure that the delimiter checks 'break' to the target successor blocks if single successor.
3804
if (retSameExit != NULL && !isNeedGenIcmpge && retSameExit->getEnclosingBlock() != target)
3805
{
3806
traceMsg(comp, "Target for delimiter check (Treetop: %p / Block %d: %p) is different than loop exit block_%d: %p. Abandoning reduction.\n",
3807
retSameExit, retSameExit->getEnclosingBlock()->getNumber(), retSameExit->getEnclosingBlock(),
3808
target->getNumber(), target);
3809
return false;
3810
}
3811
3812
// check if the induction variable needs to be updated by 1
3813
// this depends on whether the induction variable is incremented
3814
// before the boolTable exit or after (ie. before the loop driving test)
3815
//
3816
TR_CISCNode *boolTableExit = P->getImportantNode(0) ? trans->getP2TRepInLoop(P->getImportantNode(0)) : NULL;
3817
bool ivNeedsUpdate = false;
3818
bool dstIvNeedsUpdate = false;
3819
if (0 && boolTableExit)
3820
{
3821
TR::Node *boolTableNode = boolTableExit->getHeadOfTrNodeInfo()->_node;
3822
traceMsg(comp, "boolTableNode : %p of loop %d\n", boolTableNode, block->getNumber());
3823
ivNeedsUpdate = ivIncrementedBeforeBoolTableExit(comp, boolTableNode, block, indexVarSymRef);
3824
if (dstIndexVarSymRef)
3825
dstIvNeedsUpdate = ivIncrementedBeforeBoolTableExit(comp, boolTableNode, block, dstIndexVarSymRef);
3826
}
3827
3828
// Try to find a terminal byte (but we might not find it in many cases...)
3829
int termchar = -1;
3830
int stopchar = -1;
3831
if (comp->cg()->getSupportsArrayTranslateTRTO255() || comp->cg()->getSupportsArrayTranslateTRTO() )
3832
{
3833
if (isOutputChar)
3834
return false;
3835
3836
for (int i = 256; i < 65536; i++)
3837
if (tmpTable[i] != 1)
3838
return false;
3839
if (comp->cg()->getSupportsArrayTranslateTRTO255())
3840
{
3841
for (int i = 0; i < 256; i++)
3842
if (tmpTable[i] != 0)
3843
return false;
3844
termchar = 0x0ff00ff00;
3845
}
3846
else
3847
{
3848
bool allOnes = true;
3849
bool allZeros = true;
3850
3851
for (int i = 0; i < 128; i++)
3852
if (tmpTable[i] != 0)
3853
return false;
3854
3855
for (int i = 128; i < 256; i++)
3856
{
3857
uint8_t u8 = tmpTable[i];
3858
if (u8 == 0)
3859
allOnes = false;
3860
else if (u8 == 1)
3861
allZeros = false;
3862
else
3863
{
3864
allOnes = false;
3865
allZeros = false;
3866
}
3867
}
3868
3869
if (allZeros && !allOnes) //this is 255 (ISO_8859_1)
3870
termchar = 0x0ff00ff00;
3871
else if (allOnes && !allZeros) //this is 127 (ASCII)
3872
termchar = 0x0ff80ff80;
3873
else
3874
return false;
3875
}
3876
//termchar = TERMBYTE; //It needs to be greater than zero, dummy termination char otherwise, i.e., it's not gonna be used,
3877
tableNode = TR::Node::create(baseRepNode, TR::iconst, 0, 0); //dummy table node, it's not gonna be used
3878
}
3879
else //Z
3880
{
3881
if (!isOutputChar)
3882
{
3883
uint8_t termByteTable[256];
3884
memset(termByteTable, 0, 256);
3885
int i;
3886
for (i = 0; i < 65536; i++)
3887
{
3888
if (tmpTable[i] == 0) {
3889
if ( i >= 256)
3890
return false;
3891
termByteTable[i] = 1;
3892
}
3893
}
3894
3895
3896
bool isSIMDPossible = genSIMD;
3897
if (isSIMDPossible) {
3898
//SIMD possible only if we have consecutive chars, and no ranges
3899
for (int i = 0; i < 256; i++) {
3900
if (tmpTable[i] == 0) {
3901
if (stopchar != (i-1)) {
3902
isSIMDPossible = false;
3903
break;
3904
}
3905
stopchar++;
3906
}
3907
}
3908
3909
//case all non valid chars
3910
if (stopchar == -1 )
3911
isSIMDPossible = false;
3912
}
3913
3914
if (isSIMDPossible) {
3915
tableNode = TR::Node::create(baseRepNode, TR::aconst, 0, 0); //dummy table node, it's not gonna be used
3916
} else if(!genTRxx){
3917
traceMsg(comp, "Bailing CISCTransform2CopyingTRTx : c2b - no proper evaluator available\n");
3918
return false;
3919
} else {
3920
//TRxx
3921
for (i = 256; --i >= 0; )
3922
{
3923
if (termByteTable[i] == 0)
3924
{
3925
termchar = i; // find termchar;
3926
break;
3927
}
3928
}
3929
3930
// Create the function table for TRTO
3931
if (termchar < 0) // no room of termchar
3932
{
3933
isAllowSourceCellTermChar = true; // Generated code will check whether the character is a delimiter.
3934
termchar = TERMBYTE;
3935
if (disptrace)
3936
traceMsg(comp, "setAllowSourceCellIsTermChar: ");
3937
}
3938
if (disptrace)
3939
traceMsg(comp, "termchar is 0x%02x\n", termchar);
3940
3941
3942
uint8_t *table = (uint8_t*)comp->trMemory()->allocateMemory(65536, stackAlloc);
3943
//Only check up to 256 because we already
3944
for (i = 0; i < 65536; i++)
3945
{
3946
uint8_t u8 = tmpTable[i];
3947
//Not sure I understand the reasning behind discarding those: chars larger than 256 which map to byte ... possible
3948
//we have the table to hold all chars. Value needs to represent i & ff
3949
//for now I moved the check up - so bail out earlier.
3950
//Reach here only if chars that need mapping are <256.
3951
//if (!u8 && i >= 256)
3952
// return false;
3953
table[i] = (uint8_t)(u8 ? termchar : i);
3954
}
3955
tableNode = createTableLoad(comp, baseRepNode, 16, 8, table, disptrace);
3956
}
3957
3958
}
3959
else
3960
{
3961
//c2c case - currently no SIMD support
3962
uint16_t *table = (uint16_t*)comp->trMemory()->allocateMemory(65536*2, stackAlloc);
3963
int i;
3964
for (i = 0; i < 65536; i++)
3965
{
3966
uint8_t u8 = tmpTable[i];
3967
if (u8)
3968
{
3969
if (termchar < 0)
3970
termchar = i;
3971
table[i] = termchar;
3972
}
3973
else
3974
{
3975
table[i] = i;
3976
}
3977
}
3978
tableNode = createTableLoad(comp, baseRepNode, 16, 16, table, disptrace);
3979
}
3980
}
3981
3982
3983
3984
// find the target node of icmpge
3985
TR_ScratchList<TR_CISCNode> necessaryCmp(comp->trMemory());
3986
3987
// find icmpge in the candidate region
3988
sortList(P2T + P->getImportantNode(1)->getID(),
3989
&necessaryCmp, trans->getCandidateRegion());
3990
3991
bool isDecrement;
3992
int32_t modLength;
3993
TR::Node * cmpIndexNode;
3994
TR::Node * lenTmpNode;
3995
TR::Node * lengthNode;
3996
3997
TR_CISCNode *icmpgeCISCnode1 = NULL;
3998
TR::Node *lenRepNode1 = NULL;
3999
TR_CISCNode *icmpgeCISCnode2 = NULL;
4000
TR::Node *lenRepNode2 = NULL;
4001
TR::SymbolReference * icmpgeSymRef2 = NULL;
4002
4003
// We cannot handle too many loop exit tests.
4004
if (necessaryCmp.getSize() >= 3)
4005
{
4006
if (disptrace) traceMsg(comp, "Too many (%d) loop exit tests to transform correctly. Transformation only supports up to 2. Abandoning reduction.\n", necessaryCmp.getSize());
4007
return false;
4008
}
4009
4010
icmpgeCISCnode1 = necessaryCmp.getListHead()->getData();
4011
4012
if (!testExitIF(icmpgeCISCnode1->getOpcode(), &isDecrement, &modLength))
4013
{
4014
if (disptrace) traceMsg(comp, "testExitIF for icmpgeCISCnode1 failed\n");
4015
return false;
4016
}
4017
if (isDecrement)
4018
{
4019
if (disptrace) traceMsg(comp, "Not support a decrement loop. (icmpgeCISCnode1)\n");
4020
return false;
4021
}
4022
TR_ASSERT(modLength == 0 || modLength == 1, "error");
4023
4024
// The length calculation requires the initial value of the induction variable
4025
// used in the loop iteration comparison.
4026
TR::Node *cmpChild = icmpgeCISCnode1->getHeadOfTrNode()->getChild(0);
4027
4028
TR::SymbolReference * cmpVarSymRef = NULL;
4029
while (cmpChild && (cmpChild->getOpCode().isAdd() || cmpChild->getOpCode().isSub()))
4030
{
4031
cmpChild = cmpChild->getChild(0);
4032
}
4033
if (cmpChild && cmpChild->getOpCode().isLoadVar())
4034
cmpVarSymRef = cmpChild->getSymbolReference();
4035
if (cmpVarSymRef == NULL)
4036
{
4037
if (disptrace) traceMsg(comp, "Unable to determine the sym ref of induction variable in loop termination node.\n");
4038
return false;
4039
}
4040
4041
lenRepNode1 = createLoad(icmpgeCISCnode1->getChild(1)->getHeadOfTrNode());
4042
if (modLength) lenRepNode1 = createOP2(comp, TR::isub, lenRepNode1, TR::Node::create(baseRepNode, TR::iconst, 0, -modLength));
4043
cmpIndexNode = TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, cmpVarSymRef);
4044
lenTmpNode = createOP2(comp, TR::isub, lenRepNode1, cmpIndexNode);
4045
if (necessaryCmp.isDoubleton())
4046
{
4047
icmpgeCISCnode2 = necessaryCmp.getListHead()->getNextElement()->getData();
4048
}
4049
4050
// analyze ImportantNode(4) - another ificmpxx
4051
if (P->getImportantNode(4))
4052
{
4053
if (icmpgeCISCnode2)
4054
{
4055
if (disptrace) traceMsg(comp, "Not support yet more than three if-statements. (1)\n");
4056
return false;
4057
}
4058
icmpgeCISCnode2 = trans->getP2TInLoopIfSingle(P->getImportantNode(4));
4059
if (!icmpgeCISCnode2)
4060
{
4061
if (disptrace) traceMsg(comp, "Not support yet more than three if-statements. (2)\n");
4062
return false;
4063
}
4064
}
4065
4066
if (icmpgeCISCnode2)
4067
{
4068
if (!testExitIF(icmpgeCISCnode2->getOpcode(), &isDecrement, &modLength))
4069
{
4070
if (disptrace) traceMsg(comp, "testExitIF for icmpgeCISCnode2 failed\n");
4071
return false;
4072
}
4073
if (isDecrement)
4074
{
4075
if (disptrace) traceMsg(comp, "Not support a decrement loop. (icmpgeCISCnode2)\n");
4076
return false;
4077
}
4078
TR_ASSERT(modLength == 0 || modLength == 1, "error");
4079
lenRepNode2 = createLoad(icmpgeCISCnode2->getChild(1)->getHeadOfTrNode());
4080
if (modLength) lenRepNode2 = createOP2(comp, TR::isub, lenRepNode2, TR::Node::create(baseRepNode, TR::iconst, 0, -modLength));
4081
4082
TR::Node *icmpgeNode2 = icmpgeCISCnode2->getHeadOfTrNode();
4083
TR_ASSERT(icmpgeNode2->getChild(0)->getOpCode().isLoadVarDirect(), "Please remove this assertion");
4084
if (!icmpgeNode2->getChild(0)->getOpCode().isLoadVarDirect()) return false;
4085
icmpgeSymRef2 = icmpgeNode2->getChild(0)->getSymbolReference();
4086
4087
TR::Node *lenTmpNode2 = createOP2(comp, TR::isub, lenRepNode2, TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, icmpgeSymRef2));
4088
4089
lenTmpNode = createMin(comp, lenTmpNode, lenTmpNode2);
4090
}
4091
lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), lenTmpNode);
4092
4093
// Modify array header constant if necessary
4094
TR::Node *constLoad;
4095
if (trans->getOffsetOperand1())
4096
{
4097
int32_t offset = trans->getOffsetOperand1() * 2;
4098
constLoad = modifyArrayHeaderConst(comp, inputNode, offset);
4099
TR_ASSERT(constLoad, "Not implemented yet");
4100
if (disptrace) traceMsg(comp,"The array header const of inputNode %p is modified. (offset=%d)\n", inputNode, offset);
4101
}
4102
if (trans->getOffsetOperand2())
4103
{
4104
int32_t offset = trans->getOffsetOperand2() * (isOutputChar ? 2 : 1);
4105
constLoad = modifyArrayHeaderConst(comp, outputNode, offset);
4106
TR_ASSERT(constLoad, "Not implemented yet");
4107
if (disptrace) traceMsg(comp,"The array header const of outputNode %p is modified. (offset=%d)\n", outputNode, offset);
4108
}
4109
4110
// Prepare arraytranslate
4111
TR::Node * termCharNode = TR::Node::create( baseRepNode, TR::iconst, 0, termchar);
4112
TR::Node * stopCharNode = TR::Node::create( baseRepNode, TR::iconst, 0, stopchar);
4113
4114
4115
4116
TR::Node * translateNode = TR::Node::create(trNode, TR::arraytranslate, 6);
4117
translateNode->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayTranslateSymbol());
4118
translateNode->setAndIncChild(0, inputNode);
4119
translateNode->setAndIncChild(1, outputNode);
4120
translateNode->setAndIncChild(2, tableNode);
4121
translateNode->setAndIncChild(3, termCharNode);
4122
translateNode->setAndIncChild(4, lengthNode);
4123
translateNode->setAndIncChild(5, stopCharNode);
4124
4125
translateNode->setSourceIsByteArrayTranslate(false);
4126
translateNode->setTargetIsByteArrayTranslate(!isOutputChar);
4127
translateNode->setTableBackedByRawStorage(true);
4128
if (isAllowSourceCellTermChar)
4129
{
4130
translateNode->setTermCharNodeIsHint(true);
4131
//translateNode->setAllowSourceCellIsTermChar(true); // Generated code will check whether the character is a delimiter.
4132
// determine the use of this flag on the node
4133
translateNode->setSourceCellIsTermChar(true); // Generated code will check whether the character is a delimiter.
4134
}
4135
else
4136
{
4137
translateNode->setTermCharNodeIsHint(false);
4138
translateNode->setSourceCellIsTermChar(false);
4139
}
4140
TR::SymbolReference * translateTemp = comp->getSymRefTab()->
4141
createTemporary(comp->getMethodSymbol(), TR::Int32);
4142
TR::Node * topOfTranslateNode = TR::Node::createStore(translateTemp, translateNode);
4143
4144
// prepare nodes that add the number of elements (which was translated) into the induction variables
4145
TR::Node *addCountNode = createOP2(comp, TR::iadd,
4146
TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef),
4147
translateNode);
4148
if (ivNeedsUpdate)
4149
addCountNode = TR::Node::create(TR::iadd, 2, addCountNode, TR::Node::iconst(indexRepNode, 1));
4150
4151
TR::Node * indVarUpdateNode = TR::Node::createStore(indexVarSymRef, addCountNode);
4152
TR::TreeTop * indVarUpdateTreeTop = TR::TreeTop::create(comp, indVarUpdateNode);
4153
4154
TR::TreeTop * dstIndVarUpdateTreeTop = NULL;
4155
TR::Node *dstIndVarInitializer = NULL;
4156
if (dstIndexRepNode)
4157
{
4158
dstIndVarInitializer = areDefsOnlyInsideLoop(comp, trans, outputCISCNode->getHeadOfTrNodeInfo()->_node);
4159
4160
TR::Node *dstAddCountNode = NULL;
4161
if (dstIndexVarSymRef->getSymbol()->getDataType() == TR::Int32)
4162
{
4163
dstAddCountNode = createOP2(comp, TR::iadd,
4164
TR::Node::createWithSymRef(dstIndexRepNode, TR::iload, 0, dstIndexVarSymRef),
4165
translateNode);
4166
if (dstIvNeedsUpdate)
4167
dstAddCountNode = TR::Node::create(TR::iadd, 2, dstAddCountNode, TR::Node::iconst(dstAddCountNode, 1));
4168
}
4169
else
4170
{
4171
dstAddCountNode = createOP2(comp, TR::ladd,
4172
TR::Node::createWithSymRef(dstIndexRepNode, TR::lload, 0, dstIndexVarSymRef),
4173
TR::Node::create(TR::i2l, 1, translateNode));
4174
if (dstIvNeedsUpdate)
4175
dstAddCountNode = TR::Node::create(TR::ladd, 2, dstAddCountNode, TR::Node::lconst(dstAddCountNode, 1));
4176
}
4177
TR::Node * dstIndVarUpdateNode = TR::Node::createStore(dstIndexVarSymRef, dstAddCountNode);
4178
dstIndVarUpdateTreeTop = TR::TreeTop::create(comp, dstIndVarUpdateNode);
4179
}
4180
4181
// create Nodes if there are multiple exit points.
4182
TR::Node *icmpgeNode = NULL;
4183
TR::TreeTop *failDest = NULL;
4184
TR::TreeTop *okDest = NULL;
4185
TR::Block *compensateBlock0 = NULL;
4186
TR::Block *compensateBlock1 = NULL;
4187
if (icmpgeCISCnode2)
4188
{
4189
TR_ASSERT(isNeedGenIcmpge, "assumption error?");
4190
TR::Node *icmpgeNode2 = NULL;
4191
TR::TreeTop *failDest2 = NULL;
4192
TR::Block *compensateBlock2 = NULL;
4193
TR::Block *newBlockForIf2 = NULL;
4194
4195
if (disptrace) traceMsg(comp, "Now assuming that all exits of booltable are identical.\n");
4196
4197
icmpgeNode = icmpgeCISCnode1->getHeadOfTrNode();
4198
okDest = retSameExit;
4199
failDest = icmpgeCISCnode1->getDestination();
4200
4201
icmpgeNode2 = icmpgeCISCnode2->getHeadOfTrNode();
4202
failDest2 = icmpgeCISCnode2->getDestination();
4203
newBlockForIf2 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
4204
// create two empty blocks for inserting compensation code (base[index] and base[index-1]) prepared by moveStoreOutOfLoopForward()
4205
if (isCompensateCode)
4206
{
4207
compensateBlock2 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
4208
compensateBlock2->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest2)));
4209
failDest2 = compensateBlock2->getEntry();
4210
4211
compensateBlock1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
4212
compensateBlock0 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
4213
compensateBlock0->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, okDest)));
4214
compensateBlock1->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest)));
4215
okDest = compensateBlock0->getEntry();
4216
failDest = compensateBlock1->getEntry();
4217
}
4218
if (disptrace)
4219
{
4220
if (okDest == NULL) traceMsg(comp,"error, okDest == NULL!\n");
4221
if (failDest == NULL) traceMsg(comp,"error, failDest == NULL!\n");
4222
if (failDest2 == NULL) traceMsg(comp,"error, failDest2 == NULL!\n");
4223
}
4224
TR_ASSERT(okDest != NULL && failDest != NULL && failDest2 != NULL, "error!");
4225
4226
// It generates "ificmpge".
4227
icmpgeNode = TR::Node::createif(TR::ificmpge,
4228
cmpIndexNode->duplicateTree(),
4229
lenRepNode1,
4230
failDest);
4231
icmpgeNode2 = TR::Node::createif(TR::ificmpge,
4232
TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, icmpgeSymRef2),
4233
lenRepNode2->duplicateTree(),
4234
failDest2);
4235
4236
// Insert nodes and maintain the CFG
4237
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lenTmpNode->duplicateTree());
4238
4239
if (P->needsInductionVariableInit())
4240
{
4241
TR::TreeTop *storeTree = TR::TreeTop::create(comp, dstIndexRepNode->duplicateTree());
4242
block->prepend(storeTree);
4243
}
4244
4245
// Create the fast path code
4246
block = trans->insertBeforeNodes(block);
4247
TR::TreeTop *translateTT = TR::TreeTop::create(comp, topOfTranslateNode);
4248
block->append(translateTT);
4249
if (dstIndVarInitializer)
4250
{
4251
translateTT->insertBefore(TR::TreeTop::create(comp, dstIndVarInitializer));
4252
}
4253
block->append(indVarUpdateTreeTop);
4254
if (dstIndVarUpdateTreeTop) block->append(dstIndVarUpdateTreeTop);
4255
block = trans->insertAfterNodes(block);
4256
4257
block->append(TR::TreeTop::create(comp, icmpgeNode));
4258
newBlockForIf2->append(TR::TreeTop::create(comp, icmpgeNode2));
4259
TR::CFG *cfg = comp->getFlowGraph();
4260
cfg->setStructure(NULL);
4261
TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();
4262
TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();
4263
if (isCompensateCode)
4264
{
4265
compensateBlock0 = trans->insertAfterNodesIdiom(compensateBlock0, 0, true); // ch = base[index]
4266
compensateBlock1 = trans->insertAfterNodesIdiom(compensateBlock1, 1, true); // ch = base[index-1]
4267
// Duplicate all insertion nodes in getAfterInsertionIdiomList(1)
4268
ListElement<TR::Node> *le;
4269
for (le = trans->getAfterInsertionIdiomList(1)->getListHead(); le; le = le->getNextElement())
4270
{
4271
le->setData(le->getData()->duplicateTree());
4272
}
4273
compensateBlock2 = trans->insertAfterNodesIdiom(compensateBlock2, 1, true); // ch = base[index-1]
4274
cfg->insertBefore(compensateBlock0, orgNextBlock);
4275
cfg->insertBefore(compensateBlock1, compensateBlock0);
4276
cfg->insertBefore(compensateBlock2, compensateBlock1);
4277
cfg->insertBefore(newBlockForIf2, compensateBlock2);
4278
cfg->join(block, newBlockForIf2);
4279
}
4280
else
4281
{
4282
cfg->insertBefore(newBlockForIf2, orgNextBlock);
4283
cfg->join(block, newBlockForIf2);
4284
}
4285
trans->setSuccessorEdges(block,
4286
newBlockForIf2,
4287
failDest->getEnclosingBlock());
4288
trans->setSuccessorEdges(newBlockForIf2,
4289
okDest->getEnclosingBlock(),
4290
failDest2->getEnclosingBlock());
4291
}
4292
else
4293
{
4294
if (isNeedGenIcmpge)
4295
{
4296
if (disptrace) traceMsg(comp, "Now assuming that all exits of booltable are identical and the exit of icmpge points different.\n");
4297
4298
icmpgeNode = icmpgeCISCnode1->getHeadOfTrNode();
4299
okDest = retSameExit;
4300
failDest = icmpgeCISCnode1->getDestination();
4301
// create two empty blocks for inserting compensation code (base[index] and base[index-1]) prepared by moveStoreOutOfLoopForward()
4302
if (isCompensateCode)
4303
{
4304
compensateBlock1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
4305
compensateBlock0 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
4306
compensateBlock0->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, okDest)));
4307
compensateBlock1->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest)));
4308
okDest = compensateBlock0->getEntry();
4309
failDest = compensateBlock1->getEntry();
4310
}
4311
TR_ASSERT(okDest != NULL && failDest != NULL && okDest != failDest, "error!");
4312
4313
// It actually generates "ificmplt" (NOT ificmpge!) in order to suppress a redundant goto block.
4314
icmpgeNode = TR::Node::createif(TR::ificmplt,
4315
cmpIndexNode->duplicateTree(), // TR::Node::create(indexRepNode, TR::iload, 0, indexVarSymRef),
4316
lenRepNode1,
4317
okDest);
4318
}
4319
4320
// Insert nodes and maintain the CFG
4321
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lenTmpNode->duplicateTree());
4322
4323
if (P->needsInductionVariableInit())
4324
{
4325
TR::TreeTop *storeTree = TR::TreeTop::create(comp, dstIndexRepNode->duplicateTree());
4326
block->prepend(storeTree);
4327
}
4328
4329
// Create the fast path code
4330
block = trans->insertBeforeNodes(block);
4331
block->append(TR::TreeTop::create(comp, topOfTranslateNode));
4332
block->append(indVarUpdateTreeTop);
4333
if (dstIndVarUpdateTreeTop) block->append(dstIndVarUpdateTreeTop);
4334
block = trans->insertAfterNodes(block);
4335
4336
if (isNeedGenIcmpge)
4337
{
4338
block->append(TR::TreeTop::create(comp, icmpgeNode));
4339
if (isCompensateCode)
4340
{
4341
TR::CFG *cfg = comp->getFlowGraph();
4342
cfg->setStructure(NULL);
4343
TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();
4344
TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();
4345
compensateBlock0 = trans->insertAfterNodesIdiom(compensateBlock0, 0, true); // ch = base[index]
4346
compensateBlock1 = trans->insertAfterNodesIdiom(compensateBlock1, 1, true); // ch = base[index-1]
4347
cfg->insertBefore(compensateBlock0, orgNextBlock);
4348
cfg->insertBefore(compensateBlock1, compensateBlock0);
4349
cfg->join(block, compensateBlock1);
4350
}
4351
}
4352
else if (isCompensateCode)
4353
{
4354
block = trans->insertAfterNodesIdiom(block, 0); // ch = base[index]
4355
}
4356
4357
// set successor edge(s) to the original block
4358
if (!isNeedGenIcmpge)
4359
{
4360
trans->setSuccessorEdge(block, target);
4361
}
4362
else
4363
{
4364
trans->setSuccessorEdges(block,
4365
failDest->getEnclosingBlock(),
4366
okDest->getEnclosingBlock());
4367
}
4368
}
4369
4370
return true;
4371
}
4372
4373
bool
4374
CISCTransform2CopyingTRTxAddDest1(TR_CISCTransformer *trans)
4375
{
4376
trans->setOffsetOperand2(1); // add offset of destination with 1
4377
return CISCTransform2CopyingTRTx(trans);
4378
}
4379
4380
/****************************************************************************************
4381
Corresponding Java-like Pseudo Program
4382
int i, j, end;
4383
char charArray[ ];
4384
byte byteArray[ ];
4385
while(true){
4386
char c = charArray[i];
4387
if (booltable(c)) break;
4388
byteArray[j] = (byte)c;
4389
i++;
4390
j++;
4391
if (j >= end) break;
4392
}
4393
4394
Note 1: It allows that variables v1 and v3 are identical.
4395
****************************************************************************************/
4396
TR_PCISCGraph *
4397
makeCopyingTRTxGraph(TR::Compilation *c, int32_t ctrl, int pattern)
4398
{
4399
TR_ASSERT(pattern == 0 || pattern == 1 || pattern == 2, "not implemented");
4400
char *name = (char *)TR_MemoryBase::jitPersistentAlloc(16);
4401
sprintf(name, "CopyingTRTx(%d)",pattern);
4402
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), name, 0, 16);
4403
/*************************************************************************** opc id dagId #cfg #child other/pred/children */
4404
TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0);
4405
tgt->addNode(charArray); // src array base
4406
TR_PCISCNode *i = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 0); tgt->addNode(i); // src array index
4407
TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1);
4408
tgt->addNode(byteArray); // dst array base
4409
TR_PCISCNode *j = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 1); tgt->addNode(j); // dst array index
4410
TR_PCISCNode *idx0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),11, 0, 0, 0); tgt->addNode(idx0);
4411
TR_PCISCNode *idx1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 1); tgt->addNode(idx1);
4412
TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 9, 0, 0); tgt->addNode(end); // length
4413
TR_PCISCNode *aHeader0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(aHeader0); // array header
4414
TR_PCISCNode *aHeader1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(aHeader1); // array header
4415
TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -1); tgt->addNode(increment);
4416
TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0); tgt->addNode(mulFactor); // Multiply Factor
4417
TR_PCISCNode *elemSize = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 4, 2); // element size
4418
TR_PCISCNode *offset = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 3, 0, 0, 2); tgt->addNode(offset); // optional
4419
TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);
4420
TR_PCISCNode *charAddr = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, entry, charArray, idx0, aHeader0, elemSize);
4421
TR_PCISCNode *c2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, charAddr, charAddr);
4422
tgt->addNode(c2iNode);
4423
TR_PCISCNode *exitTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, c2iNode, c2iNode);
4424
tgt->addNode(exitTest);
4425
TR_PCISCNode *byteAddr = createIdiomArrayStoreInLoop(tgt, ctrl, 1, exitTest, TR_ibcstore, TR::NoType, byteArray, idx1, aHeader1, mulFactor, c2iNode);
4426
TR_PCISCNode *store1, *store2;
4427
switch(pattern)
4428
{
4429
case 0:
4430
store1 = createIdiomDecVarInLoop(tgt, ctrl, 1, byteAddr, i, increment); // optional (i = i + 1)
4431
store2 = createIdiomDecVarInLoop(tgt, ctrl, 1, store1, j, idx1, increment); // j = idx1 + 1
4432
store1->getChild(0)->setIsOptionalNode();
4433
store1->setIsOptionalNode();
4434
break;
4435
case 1:
4436
store1 = createIdiomDecVarInLoop(tgt, ctrl, 1, byteAddr, j, idx1, increment); // j = idx1 + 1
4437
store2 = createIdiomIncVarInLoop(tgt, ctrl, 1, store1, i, j, offset); // optional (i = j + offset)
4438
store2->getChild(0)->setIsOptionalNode();
4439
store2->setIsOptionalNode();
4440
break;
4441
case 2:
4442
store1 = createIdiomDecVarInLoop(tgt, ctrl, 1, byteAddr, i, increment); // optional (i = i + 1)
4443
store2 = createIdiomDecVarInLoop(tgt, ctrl, 1, store1, j, j, increment); // j = j + 1
4444
store1->getChild(0)->setIsOptionalNode();
4445
store1->setIsOptionalNode();
4446
break;
4447
}
4448
TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, store2, j, end); tgt->addNode(loopTest);
4449
TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);
4450
4451
exitTest->setSucc(1, exit);
4452
loopTest->setSuccs(entry->getSucc(0), exit);
4453
4454
i->setIsOptionalNode();
4455
offset->setIsOptionalNode();
4456
4457
c2iNode->setIsChildDirectlyConnected();
4458
loopTest->setIsChildDirectlyConnected();
4459
4460
tgt->setSpecialCareNode(0, exitTest); // TR_booltable
4461
tgt->setEntryNode(entry);
4462
tgt->setExitNode(exit);
4463
tgt->setImportantNodes(exitTest, loopTest, charAddr, byteAddr, NULL);
4464
tgt->setNumDagIds(16);
4465
tgt->createInternalData(1);
4466
4467
tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);
4468
tgt->setTransformer(CISCTransform2CopyingTRTx);
4469
tgt->setInhibitBeforeVersioning();
4470
tgt->setAspects(isub|mul, ILTypeProp::Size_2, existAccess);
4471
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
4472
tgt->setMinCounts(2, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
4473
tgt->setHotness(warm, false);
4474
static char *versionLengthStr = feGetEnv("TR_CopyingTRTxGraph_versionLength");
4475
static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 15);
4476
tgt->setVersionLength(versionLength); // depending on each architecture
4477
4478
tgt->setPatternType(pattern);
4479
4480
return tgt;
4481
}
4482
4483
4484
/****************************************************************************************
4485
Corresponding Java-like Pseudo Program
4486
int i, j, end;
4487
char charArray[ ];
4488
byte byteArray[ ];
4489
while(true){
4490
char c = charArray[i];
4491
if (booltable(c)) break;
4492
if (j > end) break;
4493
byteArray[j] = (byte)c;
4494
i++;
4495
j++;
4496
if (i >= end) break;
4497
}
4498
4499
Note 1: It allows that variables i and j are identical.
4500
****************************************************************************************/
4501
TR_PCISCGraph *
4502
makeCopyingTRTxThreeIfsGraph(TR::Compilation *c, int32_t ctrl)
4503
{
4504
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "CopyingTRTxThreeIfs", 0, 16);
4505
/*************************************************************************** opc id dagId #cfg #child other/pred/children */
4506
TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0);
4507
tgt->addNode(charArray); // src array base
4508
TR_PCISCNode *i = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0); tgt->addNode(i); // src array index
4509
TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 1);
4510
tgt->addNode(byteArray); // dst array base
4511
TR_PCISCNode *j = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(j); // dst array index
4512
TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 9, 0, 0); tgt->addNode(end); // length
4513
TR_PCISCNode *end2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 8, 0, 0); tgt->addNode(end2); // length2
4514
TR_PCISCNode *aHeader0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(aHeader0); // array header
4515
TR_PCISCNode *aHeader1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 1); tgt->addNode(aHeader1); // array header
4516
TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, -1); tgt->addNode(increment);
4517
TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0); tgt->addNode(mulFactor); // Multiply Factor
4518
TR_PCISCNode *elemSize = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 2); // element size
4519
TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);
4520
TR_PCISCNode *charAddr = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, entry, charArray, i, aHeader0, elemSize);
4521
TR_PCISCNode *c2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, charAddr, charAddr);
4522
tgt->addNode(c2iNode);
4523
TR_PCISCNode *exitTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, c2iNode, c2iNode);
4524
tgt->addNode(exitTest);
4525
TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, exitTest, j, end); tgt->addNode(loopTest);
4526
TR_PCISCNode *byteAddr = createIdiomArrayStoreInLoop(tgt, ctrl, 1, loopTest, TR_ibcstore, TR::NoType, byteArray, j, aHeader1, mulFactor, c2iNode);
4527
TR_PCISCNode *store1 = createIdiomDecVarInLoop(tgt, ctrl, 1, byteAddr, i, increment); // i = i + 1
4528
TR_PCISCNode *store2 = createIdiomDecVarInLoop(tgt, ctrl, 1, store1, j, j, increment); // j = j + 1
4529
TR_PCISCNode *loopTest2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, store2, i, end2); tgt->addNode(loopTest2);
4530
TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);
4531
4532
exitTest->setSucc(1, exit);
4533
loopTest->setSucc(1, exit);
4534
loopTest2->setSuccs(entry->getSucc(0), exit);
4535
4536
c2iNode->setIsChildDirectlyConnected();
4537
loopTest->setIsChildDirectlyConnected();
4538
loopTest2->setIsChildDirectlyConnected();
4539
4540
tgt->setSpecialCareNode(0, exitTest); // TR_booltable
4541
tgt->setEntryNode(entry);
4542
tgt->setExitNode(exit);
4543
tgt->setImportantNodes(exitTest, loopTest2, charAddr, byteAddr, loopTest);
4544
tgt->setNumDagIds(14);
4545
tgt->createInternalData(1);
4546
4547
tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);
4548
tgt->setTransformer(CISCTransform2CopyingTRTx);
4549
tgt->setInhibitBeforeVersioning();
4550
tgt->setAspects(isub|mul, ILTypeProp::Size_2, existAccess);
4551
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
4552
tgt->setMinCounts(3, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
4553
tgt->setHotness(warm, false);
4554
static char *versionLengthStr = feGetEnv("TR_CopyingTRTxThreeIfsGraph_versionLength");
4555
static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 15);
4556
tgt->setVersionLength(versionLength); // depending on each architecture
4557
return tgt;
4558
}
4559
4560
4561
/****************************************************************************************
4562
****************************************************************************************/
4563
TR_PCISCGraph *
4564
makeCopyingTRTOGraphSpecial(TR::Compilation *c, int32_t ctrl)
4565
{
4566
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "CopyingTRTOSpecial", 0, 16);
4567
/************************************ opc id dagId #cfg #child other/pred/children */
4568
TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0);
4569
tgt->addNode(charArray); // src array base
4570
TR_PCISCNode *i = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(i); // src array index
4571
TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1);
4572
tgt->addNode(byteArray); // dst array base
4573
TR_PCISCNode *j = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 1); tgt->addNode(j); // dst array index
4574
TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),8, 0, 0); tgt->addNode(end); // length
4575
TR_PCISCNode *aHeader0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(aHeader0); // array header
4576
TR_PCISCNode *aHeader1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 1); tgt->addNode(aHeader1); // array header
4577
TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, -1); tgt->addNode(increment);
4578
TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0); tgt->addNode(mulFactor); // Multiply Factor
4579
TR_PCISCNode *elemSize = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 2); // element size
4580
TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);
4581
TR_PCISCNode *charAddr = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, entry, charArray, i, aHeader0, elemSize);
4582
TR_ASSERT((ctrl & CISCUtilCtl_64Bit) && i->getParents()->isSingleton(), "assumption error");
4583
TR_PCISCNode *i2lNode = (TR_PCISCNode *)i->getHeadOfParents();
4584
TR_PCISCNode *c2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, charAddr, charAddr);
4585
tgt->addNode(c2iNode);
4586
TR_PCISCNode *lStore = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lstore , TR::Int64, tgt->incNumNodes(), 1, 1, 2, c2iNode, i2lNode, j);
4587
tgt->addNode(lStore);
4588
TR_PCISCNode *exitTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, lStore, c2iNode); tgt->addNode(exitTest);
4589
TR_PCISCNode *byteAddr = createIdiomArrayStoreInLoop(tgt, ctrl|CISCUtilCtl_NoI2L, 1, exitTest, TR_ibcstore, TR::NoType, byteArray, j, aHeader1, mulFactor, c2iNode);
4590
TR_PCISCNode *iStore = createIdiomDecVarInLoop(tgt, ctrl, 1, byteAddr, i, increment);
4591
TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, iStore, i, end); tgt->addNode(loopTest);
4592
TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);
4593
4594
exitTest->setSucc(1, exit);
4595
loopTest->setSuccs(entry->getSucc(0), exit);
4596
4597
c2iNode->setIsChildDirectlyConnected();
4598
loopTest->setIsChildDirectlyConnected();
4599
4600
tgt->setSpecialCareNode(0, c2iNode); // TR_booltable
4601
tgt->setEntryNode(entry);
4602
tgt->setExitNode(exit);
4603
tgt->setImportantNodes(exitTest, loopTest, charAddr, byteAddr, NULL);
4604
tgt->setNumDagIds(13);
4605
tgt->createInternalData(1);
4606
4607
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
4608
tgt->setTransformer(CISCTransform2CopyingTRTx);
4609
tgt->setInhibitBeforeVersioning();
4610
tgt->setAspects(isub|mul, ILTypeProp::Size_2, ILTypeProp::Size_1);
4611
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
4612
tgt->setMinCounts(2, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
4613
tgt->setHotness(warm, false);
4614
static char *versionLengthStr = feGetEnv("TR_CopyingTRTOGraphSpecial_versionLength");
4615
static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 15);
4616
tgt->setVersionLength(versionLength); // depending on each architecture
4617
4618
// needs induction variable init
4619
tgt->setNeedsInductionVariableInit(true);
4620
4621
return tgt;
4622
}
4623
4624
/****************************************************************************************
4625
Corresponding Java-like Pseudo Program
4626
int v1, end;
4627
int v3; // optional
4628
int v4; // v4 usually has the value of "v3 - v1".
4629
byte v0[ ];
4630
char v2[ ];
4631
while(true){
4632
char T = (char)v0[v1];
4633
if (booltable(T)) break;
4634
v2[v1+v4] = T;
4635
v3 = (v1+v4)+1; // optional
4636
v1++;
4637
if (v1 >= end) break;
4638
}
4639
****************************************************************************************/
4640
TR_PCISCGraph *
4641
makeCopyingTRTOInduction1Graph(TR::Compilation *c, int32_t ctrl, int32_t pattern)
4642
{
4643
TR_ASSERT(pattern == 0 || pattern == 1 || pattern == 2, "not implemented");
4644
char *name = (char *)TR_MemoryBase::jitPersistentAlloc(26);
4645
sprintf(name, "CopyingTRTOInduction1(%d)",pattern);
4646
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), name, 0, 16);
4647
/********************************************************************* opc id dagId #cfg #child other/pred/children */
4648
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v0); // src array base
4649
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0); tgt->addNode(v1); // src array index
4650
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 1); tgt->addNode(v2); // dst array base
4651
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v3); // actual dst array index (optional)
4652
TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 2); tgt->addNode(v4); // difference of dst array index from src array index
4653
TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),8, 0, 0); tgt->addNode(vorc); // length
4654
TR_PCISCNode *cmah0=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(cmah0); // array header
4655
TR_PCISCNode *cmah1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 1); tgt->addNode(cmah1); // array header
4656
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, -1); tgt->addNode(cm1);
4657
TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 4, 1); // element size
4658
TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 2); // element size
4659
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
4660
TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, ent, TR::sloadi, TR::Int16, v0, v1, cmah0, c2);
4661
TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);
4662
TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, n3, n3); tgt->addNode(n4); // optional
4663
TR_PCISCNode *n5, *nn0, *op1, *n6;
4664
switch(pattern)
4665
{
4666
case 0: {
4667
// v2[v1+v4] = T;
4668
// v3 = (v1+v4)+1 (optional)
4669
// v1++;
4670
TR_PCISCNode *op0;
4671
n5 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iadd, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n4, v1, v4); tgt->addNode(n5);
4672
nn0 = createIdiomArrayStoreInLoop(tgt, ctrl, 1, n5, TR::bstorei, TR::Int8, v2, n5, cmah1, c1, n3);
4673
op0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nn0, n5, cm1); tgt->addNode(op0); // (optional)
4674
op1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore , TR::Int32, tgt->incNumNodes(), 1, 1, 2, op0,op0, v3); tgt->addNode(op1); // (optional)
4675
n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, op1, v1, cm1);
4676
op0->setIsOptionalNode();
4677
op1->setIsOptionalNode();
4678
op1->setIsChildDirectlyConnected();
4679
break; }
4680
4681
case 1: {
4682
// v2[v3] = T;
4683
// v1++;
4684
// v3 = v1+v4;
4685
op1 = NULL;
4686
nn0 = createIdiomArrayStoreInLoop(tgt, ctrl, 1, n4, TR::bstorei, TR::Int8, v2, v3, cmah1, c1, n3);
4687
n5 = createIdiomDecVarInLoop(tgt, ctrl, 1, nn0, v1, cm1);
4688
n6 = createIdiomIncVarInLoop(tgt, ctrl, 1, n5, v3, v1, v4);
4689
break; }
4690
4691
case 2: {
4692
// v1++;
4693
// v2[v1+v4] = T; In this case, we need to add 1 to the destination index, because v1 was incremented.
4694
// v3 = v1+v4;
4695
TR_PCISCNode *n45;
4696
n45 = createIdiomDecVarInLoop(tgt, ctrl, 1, n4, v1, cm1);
4697
n5 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iadd, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n45, v1, v4); tgt->addNode(n5);
4698
nn0 = createIdiomArrayStoreInLoop(tgt, ctrl, 1, n5, TR::bstorei, TR::Int8, v2, n5, cmah1, c1, n3);
4699
op1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore , TR::Int32, tgt->incNumNodes(), 1, 1, 2, nn0, n5, v3); tgt->addNode(op1);
4700
n6 = op1;
4701
op1->setIsChildDirectlyConnected();
4702
break; }
4703
}
4704
TR_PCISCNode *n7 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n6, v1, vorc); tgt->addNode(n7);
4705
TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n8);
4706
4707
n4->setSucc(1, n8);
4708
n7->setSuccs(ent->getSucc(0), n8);
4709
4710
n4->setIsOptionalNode();
4711
v3->setIsOptionalNode();
4712
4713
n3->setIsChildDirectlyConnected();
4714
n7->setIsChildDirectlyConnected();
4715
4716
tgt->setSpecialCareNode(0, n4); // TR_booltable
4717
tgt->setEntryNode(ent);
4718
tgt->setExitNode(n8);
4719
tgt->setImportantNodes(n4, n7, n2, nn0, NULL);
4720
tgt->setNumDagIds(14);
4721
tgt->createInternalData(1);
4722
4723
tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);
4724
tgt->setTransformer(pattern != 2 ? CISCTransform2CopyingTRTx : CISCTransform2CopyingTRTxAddDest1);
4725
tgt->setInhibitBeforeVersioning();
4726
tgt->setAspects(isub|mul, ILTypeProp::Size_2, ILTypeProp::Size_1);
4727
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
4728
tgt->setMinCounts(1, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
4729
tgt->setHotness(warm, false);
4730
static char *versionLengthStr = feGetEnv("TR_CopyingTRTOInduction1Graph_versionLength");
4731
static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 8);
4732
tgt->setVersionLength(versionLength); // depending on each architecture
4733
return tgt;
4734
}
4735
4736
4737
/****************************************************************************************
4738
Corresponding Java-like Pseudo Program
4739
int v1, v3, end;
4740
char v0[ ];
4741
char v2[ ];
4742
while(true){
4743
if (booltable(v0[v1])) break;
4744
v2[v3] = v0[v1];
4745
v1++;
4746
v3++;
4747
if (v1 >= end) break;
4748
}
4749
4750
Note 1: It allows that variables v1 and v3 are identical.
4751
****************************************************************************************/
4752
TR_PCISCGraph *
4753
makeCopyingTRTTSpecialGraph(TR::Compilation *c, int32_t ctrl)
4754
{
4755
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "CopyingTRTTSpecial", 0, 16);
4756
/************************************ opc id dagId #cfg #child other/pred/children */
4757
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0); tgt->addNode(v0); // src array base
4758
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(v1); // src array index
4759
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v2); // dst array base
4760
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 1); tgt->addNode(v3); // dst array index
4761
TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(idx0);
4762
TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(idx1);
4763
TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),6, 0, 0); tgt->addNode(vorc); // length
4764
TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(cmah); // array header
4765
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, -1); tgt->addNode(cm1);
4766
TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 2); // element size
4767
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
4768
TR_PCISCNode *n2 = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, ent, v0, idx0, cmah, c2);
4769
TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);
4770
TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_booltable, TR::NoType, tgt->incNumNodes(), 1, 2, 1, n3, n3); tgt->addNode(n4);
4771
TR_PCISCNode *n5 = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, n4, v0, idx0, cmah, c2);
4772
TR_PCISCNode *nn0 = createIdiomCharArrayStoreInLoop(tgt, ctrl|CISCUtilCtl_NoConversion, 1, n5, v2, idx1, cmah, c2, n5);
4773
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, nn0, v1, cm1);
4774
TR_PCISCNode *nn6 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v3, cm1);
4775
TR_PCISCNode *n7 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, nn6, v3, vorc); tgt->addNode(n7);
4776
TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n8);
4777
4778
n4->setSucc(1, n8);
4779
n7->setSuccs(ent->getSucc(0), n8);
4780
4781
n3->setIsChildDirectlyConnected();
4782
n7->setIsChildDirectlyConnected();
4783
4784
tgt->setSpecialCareNode(0, n4); // TR_booltable
4785
tgt->setEntryNode(ent);
4786
tgt->setExitNode(n8);
4787
tgt->setImportantNodes(n4, n7, n2, nn0, NULL);
4788
tgt->setNumDagIds(13);
4789
tgt->createInternalData(1);
4790
4791
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
4792
tgt->setTransformer(CISCTransform2CopyingTRTx);
4793
tgt->setInhibitBeforeVersioning();
4794
tgt->setAspects(isub|mul|sameTypeLoadStore, ILTypeProp::Size_2, ILTypeProp::Size_2);
4795
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
4796
tgt->setMinCounts(2, 2, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
4797
tgt->setHotness(warm, false);
4798
static char *versionLengthStr = feGetEnv("TR_CopyingTRTTSpecialGraph_versionLength");
4799
static int versionLength = versionLengthStr ? atoi(versionLengthStr) : (c->target().cpu.isPower() ? 0 : 20);
4800
tgt->setVersionLength(versionLength); // depending on each architecture
4801
return tgt;
4802
}
4803
4804
//////////////////////////////////////////////////////////////////////////
4805
//////////////////////////////////////////////////////////////////////////
4806
//////////////////////////////////////////////////////////////////////////
4807
4808
//*****************************************************************************************
4809
// IL code generation for exploiting the TRTO instruction
4810
// This is the case where the function table is prepared by the user program.
4811
// Input: ImportantNode(0) - ificmpeq (booltable)
4812
// ImportantNode(1) - ificmpge
4813
// ImportantNode(2) - address of the source array
4814
// ImportantNode(3) - address of the destination array
4815
// ImportantNode(4) - optional ificmpge for limit checking
4816
//*****************************************************************************************
4817
bool
4818
CISCTransform2TRTOArray(TR_CISCTransformer *trans)
4819
{
4820
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
4821
const bool disptrace = DISPTRACE(trans);
4822
TR::Node *trNode;
4823
TR::TreeTop *trTreeTop;
4824
TR::Block *block;
4825
TR_CISCGraph *P = trans->getP();
4826
List<TR_CISCNode> *P2T = trans->getP2T();
4827
TR::Compilation *comp = trans->comp();
4828
4829
TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");
4830
if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;
4831
4832
trans->findFirstNode(&trTreeTop, &trNode, &block);
4833
if (!block) return false; // cannot find
4834
4835
if (isLoopPreheaderLastBlockInMethod(comp, block))
4836
{
4837
traceMsg(comp, "Bailing CISCTransform2TRTOArray due to null TT - might be a preheader in last block of method\n");
4838
return false;
4839
}
4840
4841
TR_CISCNode * inputCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(2));
4842
TR_CISCNode * outputCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(3));
4843
if (!inputCISCNode || !outputCISCNode) return false;
4844
TR::Node * inputNode = inputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();
4845
TR::Node * outputNode = outputCISCNode->getHeadOfTrNodeInfo()->_node->duplicateTree();
4846
4847
TR::Node *baseRepNode, *indexRepNode, *dstBaseRepNode, *dstIndexRepNode, *mapBaseRepNode;
4848
getP2TTrRepNodes(trans, &baseRepNode, &indexRepNode, &dstBaseRepNode, &dstIndexRepNode, &mapBaseRepNode);
4849
TR::Node *cmpRepNode = trans->getP2TRep(P->getImportantNode(1))->getHeadOfTrNodeInfo()->_node;
4850
TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();
4851
TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode ? dstIndexRepNode->getSymbolReference() : NULL;
4852
if (trans->countGoodArrayIndex(indexVarSymRef) == 0) return false;
4853
if (dstIndexVarSymRef == indexVarSymRef)
4854
{
4855
dstIndexRepNode = NULL;
4856
dstIndexVarSymRef = NULL;
4857
}
4858
if (dstIndexVarSymRef)
4859
{
4860
if (trans->countGoodArrayIndex(dstIndexVarSymRef) == 0) return false;
4861
}
4862
TR_ScratchList<TR::Node> variableList(comp->trMemory());
4863
variableList.add(indexRepNode);
4864
if (dstIndexRepNode) variableList.add(dstIndexRepNode);
4865
if (!isIndexVariableInList(inputNode, &variableList) ||
4866
!isIndexVariableInList(outputNode, &variableList))
4867
{
4868
dumpOptDetails(comp, "indices used in array loads %p and %p are not consistent with the induction varaible updates\n", inputNode, outputNode);
4869
return false;
4870
}
4871
4872
// check if the induction variable needs to be updated by 1
4873
// this depends on whether the induction variable is incremented
4874
// before the boolTable exit or after (ie. before the loop driving test)
4875
//
4876
TR_CISCNode *boolTableExit = P->getImportantNode(0) ? trans->getP2TRepInLoop(P->getImportantNode(0)) : NULL;
4877
bool ivNeedsUpdate = false;
4878
bool dstIvNeedsUpdate = false;
4879
if (0 && boolTableExit)
4880
{
4881
TR::Node *boolTableNode = boolTableExit->getHeadOfTrNodeInfo()->_node;
4882
///traceMsg(comp, "boolTableNode : %p of loop %d\n", boolTableNode, block->getNumber());
4883
ivNeedsUpdate = ivIncrementedBeforeBoolTableExit(comp, boolTableNode, block, indexVarSymRef);
4884
if (dstIndexVarSymRef)
4885
dstIvNeedsUpdate = ivIncrementedBeforeBoolTableExit(comp, boolTableNode, block, dstIndexVarSymRef);
4886
}
4887
4888
4889
TR::Block *target = trans->analyzeSuccessorBlock();
4890
4891
// Prepare arraytranslate node
4892
TR::Node * tableNode = createLoad(mapBaseRepNode);
4893
if (tableNode->getOpCode().isLong() && comp->target().is32Bit())
4894
tableNode = TR::Node::create(TR::l2i, 1, tableNode);
4895
TR::Node * indexNode = TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, indexVarSymRef);
4896
TR::Node * lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(),
4897
createOP2(comp, TR::isub, cmpRepNode->getChild(1)->duplicateTree(), indexNode));
4898
TR::Node * termCharNode = createLoad(trans->getP2TRep(P->getImportantNode(0)->getChild(1))->getHeadOfTrNodeInfo()->_node);
4899
TR::Node * stoppingNode = TR::Node::create( baseRepNode, TR::iconst, 0, 0xffffffff);
4900
4901
TR::Node * translateNode = TR::Node::create(trNode, TR::arraytranslate, 6);
4902
translateNode->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayTranslateSymbol());
4903
translateNode->setAndIncChild(0, inputNode);
4904
translateNode->setAndIncChild(1, outputNode);
4905
translateNode->setAndIncChild(2, tableNode);
4906
translateNode->setAndIncChild(3, termCharNode);
4907
translateNode->setAndIncChild(4, lengthNode);
4908
translateNode->setAndIncChild(5, stoppingNode);
4909
4910
translateNode->setSourceIsByteArrayTranslate(false);
4911
translateNode->setTargetIsByteArrayTranslate(true);
4912
translateNode->setTermCharNodeIsHint(false);
4913
translateNode->setSourceCellIsTermChar(false);
4914
translateNode->setTableBackedByRawStorage(trans->isTableBackedByRawStorage());
4915
TR::Node * topOfTranslateNode = TR::Node::create(TR::treetop, 1, translateNode);
4916
TR::Node * lengthTRxx = translateNode;
4917
4918
TR_CISCNode *ifeqCiscNode = NULL;
4919
TR::Node *ifeqNode = NULL;
4920
if (target) // single successor block
4921
{
4922
// prepare nodes that add the number of elements (which was translated) into the induction variables
4923
4924
/*lengthTRxx = createOP2(comp, TR::isub,
4925
translateNode,
4926
TR::Node::create(translateNode, TR::iconst, 0, -1)); */
4927
}
4928
else
4929
{ // multiple successor blocks
4930
// A loop may have multiple successor blocks (i.e. break from a test character match)
4931
// First, we need to identify the node that we will try to match. We have one of two
4932
// scenarios:
4933
// 1. b2i node (commoned with the b2i load of the translation table character)
4934
// 2. iload of an auto - the same auto should have a preceding store with the
4935
// translation table character.
4936
// In case 2, we'll try to replace the load with the RHS expression of the corresponding
4937
// store (expect b2i node).
4938
//
4939
// Once we have the b2i node in hand, we attempt to break the commoning of that node between
4940
// the store and test comparison node.
4941
TR_CISCNode *b2iCiscNode = NULL;
4942
TR::Node *b2iNode = NULL;
4943
4944
ifeqCiscNode = trans->getP2TRep(P->getImportantNode(0));
4945
b2iCiscNode = ifeqCiscNode->getChild(0);
4946
TR_CISCNode *store;
4947
ifeqNode = ifeqCiscNode->getHeadOfTrNodeInfo()->_node;
4948
// try to find a tree including the array load
4949
switch(b2iCiscNode->getOpcode())
4950
{
4951
case TR::b2i:
4952
break;
4953
case TR::iload:
4954
TR_ASSERT(b2iCiscNode->getChains()->isSingleton(), "Not implemented yet");
4955
store = b2iCiscNode->getChains()->getListHead()->getData();
4956
b2iCiscNode = store->getChild(0);
4957
TR_ASSERT(b2iCiscNode->getOpcode() == TR::b2i, "Not implemented yet");
4958
b2iNode = b2iCiscNode->getHeadOfTrNodeInfo()->_node;
4959
break;
4960
case TR_variable:
4961
if (ifeqCiscNode->isEmptyHint()) return false;
4962
b2iCiscNode = ifeqCiscNode->getHintChildren()->getListHead()->getData();
4963
TR_ASSERT(b2iCiscNode->getOpcode() == TR::b2i, "Not implemented yet");
4964
store = b2iCiscNode->getHeadOfParents();
4965
TR_ASSERT(store->getOpcode() == TR::istore, "Not implemented yet");
4966
TR_ASSERT(store->getChild(1) == ifeqCiscNode->getChild(0), "Not implemented yet");
4967
b2iNode = b2iCiscNode->getHeadOfTrNodeInfo()->_node;
4968
break;
4969
default:
4970
TR_ASSERT(0, "Not implemented yet");
4971
break;
4972
}
4973
// Expect b2iCiscNode has the tree.
4974
TR_CISCNode *ixload, *aload, *iload;
4975
if (getThreeNodesForArray(b2iCiscNode, &ixload, &aload, &iload))
4976
{
4977
// Try to replace "iload" with a RHS expression of the single store.
4978
if (iload->getOpcode() == TR::iload &&
4979
iload->getChains()->isSingleton() &&
4980
iload->getParents()->isSingleton())
4981
{ // simple copy propagation
4982
TR_ASSERT(iload->getChains()->isSingleton(), "Not implemented yet");
4983
store = iload->getChains()->getListHead()->getData();
4984
TR::Node *storeTR = store->getHeadOfTrNode();
4985
TR::Node *iloadTR = iload->getHeadOfTrNode();
4986
4987
TR_ASSERT(iload->getParents()->isSingleton(), "Not implemented yet");
4988
TR_CISCNode *iloadParent = iload->getHeadOfParents();
4989
TR::Node *iloadParentTR = iloadParent->getHeadOfTrNodeInfo()->_node;
4990
4991
if (iloadParentTR->getChild(0) == iloadTR)
4992
{
4993
iloadParentTR->setAndIncChild(0, storeTR->getChild(0)->duplicateTree());
4994
}
4995
else if (iloadParentTR->getChild(1) == iloadTR)
4996
{
4997
iloadParentTR->setAndIncChild(1, storeTR->getChild(0)->duplicateTree());
4998
}
4999
else
5000
{
5001
TR_ASSERT(false, "Not implemented yet");
5002
}
5003
}
5004
}
5005
if (b2iNode)
5006
{
5007
ifeqNode->getAndDecChild(0);
5008
ifeqNode->setAndIncChild(0, b2iNode->duplicateTree());
5009
}
5010
5011
// For Multiple Successor Blocks, we have a test character condition in the
5012
// loop, which may lead to a different successor block than the fallthrough.
5013
// We need to be able to distinguish the following two scenarios, which both
5014
// would load the last character in the source array:
5015
// 1. no test character found (translateNode == lengthNode).
5016
// 2. test character found in the last element(translateNode < lengthNode).
5017
// The final IV value is always (IV + translateNode).
5018
// However, under case 1, the element loaded is at index (IV + translateNode - 1).
5019
// Under case 2, the element loaded is at index (IV + translateNode).
5020
// As such, we will subtract 1 in the existing final IV calculation for case 1,
5021
// so that any array accesses will be correctly indexed. The final IV value will
5022
// be increased by 1 again before we hit the exit test.
5023
lengthTRxx = TR::Node::create(TR::isub, 2, translateNode,
5024
TR::Node::create(TR::icmpeq, 2, translateNode,
5025
lengthNode->getOpCodeValue() == TR::i2l ? lengthNode->getChild(0)
5026
: lengthNode));
5027
}
5028
5029
// prepare nodes that add the number of elements (which was translated) into the induction variables
5030
TR::Node * addCountNode = createOP2(comp, TR::iadd, indexNode->duplicateTree(), lengthTRxx);
5031
if (ivNeedsUpdate)
5032
addCountNode = TR::Node::create(TR::iadd, 2, addCountNode, TR::Node::iconst(indexNode, 1));
5033
TR::Node * indVarUpdateNode = TR::Node::createStore(indexVarSymRef, addCountNode);
5034
TR::TreeTop * indVarUpdateTreeTop = TR::TreeTop::create(comp, indVarUpdateNode);
5035
5036
TR::TreeTop * dstIndVarUpdateTreeTop = NULL;
5037
if (dstIndexRepNode)
5038
{
5039
TR::Node *dstAddCountNode = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd,
5040
dstIndexVarSymRef, lengthTRxx, dstIndexRepNode);
5041
if (dstIvNeedsUpdate)
5042
dstAddCountNode = TR::Node::create(TR::iadd, 2, dstAddCountNode, TR::Node::iconst(dstAddCountNode, 1));
5043
5044
dstIndVarUpdateTreeTop = TR::TreeTop::create(comp, dstAddCountNode);
5045
}
5046
5047
// Insert nodes and maintain the CFG
5048
TR_CISCNode *optionalIficmpge = NULL;
5049
if (P->getImportantNode(4)) optionalIficmpge = trans->getP2TRepInLoop(P->getImportantNode(4));
5050
TR_ScratchList<TR::Node> guardList(comp->trMemory());
5051
if (optionalIficmpge)
5052
{
5053
TR_CISCNode *limitCISCNode = optionalIficmpge->getChild(1);
5054
guardList.add(TR::Node::createif(TR::ificmple, convertStoreToLoad(comp, limitCISCNode->getHeadOfTrNode()),
5055
TR::Node::create(lengthNode, TR::iconst, 0, 65535)));
5056
}
5057
TR::Node* alignmentCheck = createTableAlignmentCheck(comp, tableNode, false, true, trans->isTableBackedByRawStorage());
5058
if (alignmentCheck)
5059
guardList.add(alignmentCheck);
5060
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree(), &guardList);
5061
5062
// Create the fast path code
5063
block = trans->insertBeforeNodes(block);
5064
block->append(TR::TreeTop::create(comp, topOfTranslateNode));
5065
block->append(indVarUpdateTreeTop);
5066
if (dstIndVarUpdateTreeTop) block->append(dstIndVarUpdateTreeTop);
5067
5068
// Insert java/nio/Bits.keepAlive() calls into fastpath, if any.
5069
trans->insertBitsKeepAliveCalls(block);
5070
5071
block = trans->insertAfterNodes(block);
5072
5073
if (target)
5074
{
5075
// A single successor block
5076
trans->setSuccessorEdge(block, target);
5077
}
5078
else
5079
{
5080
// Multiple successor blocks
5081
// Generate the if-statement to jump to the correct destinations.
5082
TR::SymbolReference * translateTemp = comp->getSymRefTab()->
5083
createTemporary(comp->getMethodSymbol(), TR::Int32);
5084
TR::Node *tempStore;
5085
ifeqNode = ifeqNode->duplicateTree();
5086
tempStore = TR::Node::createStore(translateTemp, ifeqNode->getAndDecChild(0));
5087
ifeqNode->setAndIncChild(0, TR::Node::createLoad(ifeqNode, translateTemp));
5088
TR::TreeTop *tempStoreTTop = TR::TreeTop::create(comp, tempStore);
5089
TR::TreeTop *ifeqTTop = TR::TreeTop::create(comp, ifeqNode);
5090
// Fix up the IV value by adding 1 if translateNode == lengthNode (where no test char was found). See comment above.
5091
TR::Node *incIndex = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthTRxx->getChild(1), indexRepNode);
5092
///TR::Node *icmpeqNode = TR::Node::create(TR::icmpeq, 2, TR::Node::createLoad(indexNode, statusCheckTemp), TR::Node::iconst(indexNode, 0));
5093
///TR::Node *incNode = TR::Node::create(TR::iadd, 2, TR::Node::createLoad(indexNode, indexVarSymRef), icmpeqNode);
5094
///TR::Node *incIndex = TR::Node::createStore(indexVarSymRef, incNode);
5095
TR::TreeTop *incIndexTTop = TR::TreeTop::create(comp, incIndex);
5096
5097
TR::TreeTop *last = block->getLastRealTreeTop();
5098
last->join(tempStoreTTop);
5099
tempStoreTTop->join(incIndexTTop);
5100
if (dstIndVarUpdateTreeTop)
5101
{
5102
TR::Node * incDstIndex = createStoreOP2(comp, dstIndexVarSymRef, TR::isub, dstIndexVarSymRef, -1, dstIndexRepNode);
5103
TR::TreeTop *incDstIndexTTop = TR::TreeTop::create(comp, incDstIndex);
5104
incIndexTTop->join(incDstIndexTTop);
5105
last = incDstIndexTTop;
5106
}
5107
else
5108
{
5109
last = incIndexTTop;
5110
}
5111
last->join(ifeqTTop);
5112
ifeqTTop->join(block->getExit());
5113
if (ifeqCiscNode->getOpcode() != ifeqNode->getOpCodeValue())
5114
{
5115
ifeqNode->setBranchDestination(ifeqCiscNode->getDestination());
5116
TR::Node::recreate(ifeqNode, (TR::ILOpCodes)ifeqCiscNode->getOpcode());
5117
}
5118
TR::Block *okDest = ifeqNode->getBranchDestination()->getEnclosingBlock();
5119
TR::Block *failDest = NULL;
5120
TR::Block *optionalDest = NULL;
5121
if (optionalIficmpge) optionalDest = optionalIficmpge->getDestination()->getEnclosingBlock();
5122
failDest = trans->searchOtherBlockInSuccBlocks(okDest, optionalDest);
5123
TR_ASSERT(failDest, "error");
5124
trans->setSuccessorEdges(block, failDest, okDest);
5125
}
5126
5127
return true;
5128
}
5129
5130
bool
5131
CISCTransform2TRTOArrayTableRaw(TR_CISCTransformer *trans)
5132
{
5133
trans->setTableBackedByRawStorage();
5134
return CISCTransform2TRTOArray(trans);
5135
}
5136
5137
/****************************************************************************************
5138
Corresponding Java-like pseudocode
5139
5140
int i, j, end, exitValue;
5141
char charArray[ ];
5142
byte byteArray[ ], map[ ];
5143
while(true){
5144
byte b = map[charArray[i]];
5145
if (b == exitValue) break;
5146
byteArray[j] = b;
5147
i++;
5148
j++;
5149
if (i >= end) break;
5150
}
5151
5152
Note 1: Idiom allows variables i and j to be identical.
5153
****************************************************************************************/
5154
TR_PCISCGraph *
5155
makeTRTOArrayGraph(TR::Compilation *c, int32_t ctrl)
5156
{
5157
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TRTOArray", 0, 16);
5158
/************************************ opc id dagId #cfg #child other/pred/children */
5159
TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 16, 0, 0, 0);
5160
tgt->addNode(charArray); // src array base
5161
TR_PCISCNode *i = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(i); // src array index
5162
TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1);
5163
tgt->addNode(byteArray); // dst array base
5164
TR_PCISCNode *j = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(j); // dst array index
5165
TR_PCISCNode *map = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 2); tgt->addNode(map); // map array base
5166
TR_PCISCNode *idx0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),11, 0, 0, 0); tgt->addNode(idx0);
5167
TR_PCISCNode *idx1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 1); tgt->addNode(idx1);
5168
TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),9, 0, 0); tgt->addNode(end); // length
5169
TR_PCISCNode *exitValue = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),8, 0, 0); tgt->addNode(exitValue); // exitvalue
5170
TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0);
5171
tgt->addNode(aHeader); // array header const
5172
TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -1); tgt->addNode(increment);
5173
TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0); tgt->addNode(mulFactor); // Multiply Factor
5174
TR_PCISCNode *elemSize = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 4, 2); // element size
5175
TR_PCISCNode *offset = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),3, 0, 0); tgt->addNode(offset); // optional
5176
TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);
5177
TR_PCISCNode *charAddr = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, entry, charArray, idx0, aHeader, elemSize);
5178
TR_PCISCNode *convNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), (ctrl & CISCUtilCtl_64Bit) ? TR::su2l : TR::su2i,
5179
(ctrl & CISCUtilCtl_64Bit) ? TR::Int64 : TR::Int32,
5180
tgt->incNumNodes(), 1, 1, 1, charAddr, charAddr); tgt->addNode(convNode);
5181
TR_PCISCNode *mapAddr = createIdiomArrayLoadInLoop(tgt, ctrl|CISCUtilCtl_NoI2L, 1, convNode, TR::bloadi, TR::Int8, map, convNode, aHeader, mulFactor);
5182
TR_PCISCNode *b = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::b2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, mapAddr, mapAddr); tgt->addNode(b);
5183
TR_PCISCNode *exitTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpeq, TR::NoType, tgt->incNumNodes(), 1, 2, 2, b, b, exitValue); tgt->addNode(exitTest);
5184
TR_PCISCNode *byteAddr = createIdiomArrayStoreInLoop(tgt, ctrl, 1, exitTest, TR::bstorei, TR::Int8, byteArray, idx1, aHeader, mulFactor, b);
5185
TR_PCISCNode *iStore = createIdiomDecVarInLoop(tgt, ctrl, 1, byteAddr, i, increment);
5186
TR_PCISCNode *jStore = createIdiomIncVarInLoop(tgt, ctrl, 1, iStore, j, i, offset); // optional
5187
TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, jStore, i, end); tgt->addNode(loopTest);
5188
TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);
5189
5190
exitTest->setSucc(1, exit);
5191
loopTest->setSuccs(entry->getSucc(0), exit);
5192
5193
jStore->getChild(0)->setIsOptionalNode();
5194
jStore->setIsOptionalNode();
5195
j->setIsOptionalNode();
5196
offset->setIsOptionalNode();
5197
5198
convNode->setIsChildDirectlyConnected();
5199
loopTest->setIsChildDirectlyConnected();
5200
5201
tgt->setSpecialCareNode(0, exitTest); // TR_booltable
5202
tgt->setEntryNode(entry);
5203
tgt->setExitNode(exit);
5204
tgt->setImportantNodes(exitTest, loopTest, charAddr->getChild(0), byteAddr->getChild(0), NULL);
5205
tgt->setNumDagIds(16);
5206
tgt->createInternalData(1);
5207
5208
tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);
5209
tgt->setTransformer(CISCTransform2TRTOArray);
5210
tgt->setInhibitBeforeVersioning();
5211
tgt->setAspects(isub|mul, ILTypeProp::Size_1|ILTypeProp::Size_2, ILTypeProp::Size_1);
5212
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
5213
tgt->setMinCounts(2, 2, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
5214
tgt->setHotness(warm, false);
5215
tgt->setVersionLength(c->target().cpu.isPower() ? 0 : 11); // depending on each architecture
5216
return tgt;
5217
}
5218
5219
5220
/****************************************************************************************
5221
Corresponding Java-like pseudocode
5222
int i, j, end, exitValue;
5223
char charArray[ ];
5224
byte byteArray[ ], *map;
5225
while(true){
5226
int T = charArray[i];
5227
if (T >= limit) break; // optional
5228
byte b = *(map + T); - (1)
5229
if (b == exitValue) break;
5230
byteArray[j] = b;
5231
i++;
5232
j++;
5233
if (i >= end) break;
5234
}
5235
5236
Note 1: Idiom allows variables i and j to be identical.
5237
Note 2: This pattern is found in "sun/io/CharToByteSingleByte.JITintrinsicConvert".
5238
I don't know how we can write (1) in a Java program. From a log file, it seems
5239
that the map table is in java.nio.DirectByteBuffer and is treated as a pointer
5240
of C; the address (1) can be computed without adding the array header size.
5241
****************************************************************************************/
5242
TR_PCISCGraph *
5243
makeTRTOArrayGraphSpecial(TR::Compilation *c, int32_t ctrl)
5244
{
5245
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "TRTOArraySpecial", 0, 16);
5246
/*************************************************************************** opc id dagId #cfg #child other/pred/children */
5247
TR_PCISCNode *charArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 16, 0, 0, 0);
5248
tgt->addNode(charArray); // src array base
5249
TR_PCISCNode *i = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(i); // src array index
5250
TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1);
5251
tgt->addNode(byteArray); // dst array base
5252
TR_PCISCNode *j = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(j); // dst array index
5253
TR_PCISCNode *map = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 2); tgt->addNode(map); // map array base
5254
TR_PCISCNode *idx0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),11, 0, 0, 0); tgt->addNode(idx0);
5255
TR_PCISCNode *idx1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 1); tgt->addNode(idx1);
5256
TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),9, 0, 0); tgt->addNode(end); // length
5257
TR_PCISCNode *exitValue = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),8, 0, 0); tgt->addNode(exitValue); // exitvalue
5258
TR_PCISCNode *limit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),7, 0, 0); tgt->addNode(limit); // optional
5259
TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 0);
5260
tgt->addNode(aHeader); // array header const
5261
TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, -1); tgt->addNode(increment);
5262
TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0); tgt->addNode(mulFactor); // Multiply Factor
5263
TR_PCISCNode *elemSize = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 2); // element size
5264
TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(entry);
5265
TR_PCISCNode *charAddr = createIdiomCharArrayLoadInLoop(tgt, ctrl, 1, entry, charArray, idx0, aHeader, elemSize);
5266
TR_PCISCNode *c2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, charAddr, charAddr); tgt->addNode(c2iNode);
5267
TR_PCISCNode *limitChk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, c2iNode, c2iNode, limit);
5268
tgt->addNode(limitChk); // optional
5269
TR_PCISCNode *mapAddr = createIdiomByteDirectArrayLoadInLoop(tgt, ctrl, 1, limitChk, map, c2iNode);
5270
TR_PCISCNode *b2iNode = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::b2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, mapAddr, mapAddr); tgt->addNode(b2iNode);
5271
TR_PCISCNode *exitTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpeq, TR::NoType, tgt->incNumNodes(), 1, 2, 2, b2iNode, b2iNode, exitValue);
5272
tgt->addNode(exitTest);
5273
TR_PCISCNode *byteAddr = createIdiomArrayStoreInLoop(tgt, ctrl, 1, exitTest, TR::bstorei, TR::Int8, byteArray, idx1, aHeader, mulFactor, b2iNode);
5274
TR_PCISCNode *iStore = createIdiomDecVarInLoop(tgt, ctrl, 1, byteAddr, i, increment);
5275
TR_PCISCNode *jStore = createIdiomDecVarInLoop(tgt, ctrl, 1, iStore, j, increment);
5276
TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, jStore, i, end); tgt->addNode(loopTest);
5277
TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);
5278
5279
exitTest->setSucc(1, exit);
5280
limitChk->setSucc(1, exit);
5281
loopTest->setSuccs(entry->getSucc(0), exit);
5282
5283
c2iNode->setIsChildDirectlyConnected();
5284
loopTest->setIsChildDirectlyConnected();
5285
5286
limit->setIsOptionalNode();
5287
limitChk->setIsOptionalNode();
5288
5289
tgt->setSpecialCareNode(0, exitTest); // TR_booltable
5290
tgt->setEntryNode(entry);
5291
tgt->setExitNode(exit);
5292
tgt->setImportantNodes(exitTest, loopTest, charAddr->getChild(0), byteAddr->getChild(0), limitChk);
5293
tgt->setNumDagIds(16);
5294
tgt->createInternalData(1);
5295
5296
tgt->setSpecialNodeTransformer(TRTSpecialNodeTransformer);
5297
tgt->setTransformer(CISCTransform2TRTOArrayTableRaw);
5298
tgt->setInhibitBeforeVersioning();
5299
tgt->setAspects(isub|mul, ILTypeProp::Size_1|ILTypeProp::Size_2, ILTypeProp::Size_1);
5300
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
5301
tgt->setMinCounts(2, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
5302
tgt->setHotness(warm, false);
5303
tgt->setVersionLength(c->target().cpu.isPower() ? 0 : 11); // depending on each architecture
5304
return tgt;
5305
}
5306
5307
5308
enum StatusArrayStore
5309
{
5310
NO_NEED_TO_CHECK = 0,
5311
ABANDONING_REDUCTION = 1,
5312
GENERATE_ARRAY_ALIAS_TEST = 2,
5313
GENERATE_SUBRANGE_OVERLAP_TEST = 3,
5314
};
5315
5316
static StatusArrayStore checkArrayStore(TR::Compilation *comp, TR::Node *inputNode, TR::Node *outputNode, int elementSize, bool isForward)
5317
{
5318
// if the src and dest objects are the same, return
5319
// a) x86 arraycopy helpers dont handle byte copies well
5320
// (they use SSE so at least 4 bytes are copied at a time for perf)
5321
// b) this idiom cannot distinguish between loops of the type
5322
// for (i=0; i<N; i++)
5323
// a[l+i] = a[l-1]
5324
// and a typical arraycopy loop. loops like above cannot be reduced to an arraycopy
5325
// (look at the java semantics for arraycopy)
5326
//
5327
if (comp->target().cpu.isZ() || comp->target().cpu.isARM64())
5328
return NO_NEED_TO_CHECK; // On 390, MVC (which performs byte copies) is generated. ARM64 arraycopy can handle byte copies.
5329
5330
if (inputNode->getFirstChild()->getSymbol()->getRegisterMappedSymbol() == outputNode->getFirstChild()->getSymbol()->getRegisterMappedSymbol())
5331
{
5332
static bool disableArraycopyOverlapTest = feGetEnv("TR_disableArraycopyOverlapTest") != NULL;
5333
if (!disableArraycopyOverlapTest &&
5334
comp->getOptLevel() >= hot &&
5335
performTransformation(comp, "%sNot abandoning reduction due to src == dest, generating element range overlap test\n", OPT_DETAILS))
5336
{
5337
dumpOptDetails(comp, "src and dest are the same, generating guard code 'if (src/dest subranges nonoverlapping)'\n");
5338
return GENERATE_SUBRANGE_OVERLAP_TEST;
5339
}
5340
dumpOptDetails(comp, "src and dest are the same, abandoning reduction\n");
5341
return ABANDONING_REDUCTION;
5342
}
5343
if (!inputNode->getFirstChild()->getOpCode().hasSymbolReference() ||
5344
!outputNode->getFirstChild()->getOpCode().hasSymbolReference())
5345
{
5346
dumpOptDetails(comp, "src and dest may be the same, generating guard code 'if (src != dst)'\n");
5347
return GENERATE_ARRAY_ALIAS_TEST;
5348
}
5349
5350
//the only safe thing to do, in general, is to dynamically check the arrays
5351
return GENERATE_ARRAY_ALIAS_TEST;
5352
}
5353
5354
namespace
5355
{
5356
// Subrange overlap test: Arraycopy is incorrect for aliased arrays iff
5357
// |d - s| < n,
5358
// where d is the destination offset, s is the source offset,
5359
// and n is the length (all in bytes).
5360
// (Note that it would be safe to relax this condition, and run the loop
5361
// instead of arraycopy more often.)
5362
class SubrangeOverlapTestGenerator
5363
{
5364
public:
5365
SubrangeOverlapTestGenerator(TR::Compilation *comp, TR::Node *arraycopy, TR::Node *byteLength, bool is64Bit, int elementSize);
5366
TR::Node *generate();
5367
5368
private:
5369
TR::ILOpCodes ifxcmplt() { return _is64Bit ? TR::iflcmplt : TR::ificmplt; }
5370
TR::ILOpCodes xabs() { return _is64Bit ? TR::labs : TR::iabs; }
5371
TR::ILOpCodes xconst() { return _is64Bit ? TR::lconst : TR::iconst; }
5372
TR::ILOpCodes xmul() { return _is64Bit ? TR::lmul : TR::imul; }
5373
TR::ILOpCodes xsub() { return _is64Bit ? TR::lsub : TR::isub; }
5374
5375
// probably better to teach simplifier how to do these
5376
void simplifyConstSub();
5377
void simplifyConstMul();
5378
void simplifyI2L();
5379
5380
TR::Node *byteOffset(TR::Node *addr);
5381
void checkTypes();
5382
void checkType(const char *desc, TR::Node *node);
5383
5384
TR::Compilation *_comp;
5385
bool _is64Bit;
5386
TR::Node *_dst;
5387
TR::Node *_src;
5388
TR::Node *_len;
5389
int _elementSize;
5390
};
5391
5392
SubrangeOverlapTestGenerator::SubrangeOverlapTestGenerator(
5393
TR::Compilation *comp,
5394
TR::Node *arraycopy,
5395
TR::Node *byteLength,
5396
bool is64Bit,
5397
int elementSize)
5398
: _comp(comp)
5399
, _is64Bit(is64Bit)
5400
, _dst(byteOffset(arraycopy->getChild(1)))
5401
, _src(byteOffset(arraycopy->getChild(0)))
5402
, _len(byteLength)
5403
, _elementSize(elementSize)
5404
{
5405
checkTypes();
5406
simplifyConstSub();
5407
checkTypes();
5408
simplifyConstMul();
5409
checkTypes();
5410
simplifyI2L();
5411
checkTypes();
5412
}
5413
5414
// Generate the test: if |d - s| < n
5415
TR::Node *SubrangeOverlapTestGenerator::generate()
5416
{
5417
_dst = _dst->duplicateTree();
5418
_src = _src->duplicateTree();
5419
_len = _len->duplicateTree();
5420
5421
TR::Node *diff = TR::Node::create(xsub(), 2, _dst, _src);
5422
TR::Node *separation = TR::Node::create(xabs(), 1, diff);
5423
5424
return TR::Node::createif(ifxcmplt(), separation, _len);
5425
}
5426
5427
// For all k, TFAE:
5428
// 1. |(d - k) - (s - k)| < n
5429
// 2. |d - s| < n
5430
void SubrangeOverlapTestGenerator::simplifyConstSub()
5431
{
5432
static bool disableArraycopyOverlapTestSubSimplification =
5433
feGetEnv("TR_disableArraycopyOverlapTestSubSimplification") != NULL;
5434
if (disableArraycopyOverlapTestSubSimplification)
5435
return;
5436
5437
// Check that both are sub.
5438
if (_dst->getOpCodeValue() != xsub())
5439
return;
5440
if (_src->getOpCodeValue() != xsub())
5441
return;
5442
5443
// Check that both subtrahends are constant.
5444
TR::Node *dstSubtrahend = _dst->getChild(1);
5445
TR::Node *srcSubtrahend = _src->getChild(1);
5446
if (dstSubtrahend->getOpCodeValue() != xconst())
5447
return;
5448
if (srcSubtrahend->getOpCodeValue() != xconst())
5449
return;
5450
5451
// Check that the constants are equal.
5452
if (dstSubtrahend->getConstValue() != srcSubtrahend->getConstValue())
5453
return;
5454
5455
// Ask permission to transform.
5456
if (!performTransformation(_comp, "%sSimplifying arraycopy element range overlap test by looking through sub const\n", OPT_DETAILS))
5457
return;
5458
5459
// Transform.
5460
_dst = _dst->getChild(0);
5461
_src = _src->getChild(0);
5462
}
5463
5464
// For all k > 0,
5465
// if dk, sk, nk don't overflow,
5466
// and if dk, sk >= 0, then TFAE:
5467
// 1. |dk - sk| < nk
5468
// 2. |d - s| < n.
5469
void SubrangeOverlapTestGenerator::simplifyConstMul()
5470
{
5471
static bool disableArraycopyOverlapTestMulSimplification =
5472
feGetEnv("TR_disableArraycopyOverlapTestMulSimplification") != NULL;
5473
if (disableArraycopyOverlapTestMulSimplification)
5474
return;
5475
5476
// Check that all three are mul.
5477
if (_dst->getOpCodeValue() != xmul())
5478
return;
5479
if (_src->getOpCodeValue() != xmul())
5480
return;
5481
if (_len->getOpCodeValue() != xmul())
5482
return;
5483
5484
// Check that all three multiplicands are constant.
5485
TR::Node *dstMultiplicand = _dst->getChild(1);
5486
TR::Node *srcMultiplicand = _src->getChild(1);
5487
TR::Node *lenMultiplicand = _len->getChild(1);
5488
if (dstMultiplicand->getOpCodeValue() != xconst())
5489
return;
5490
if (srcMultiplicand->getOpCodeValue() != xconst())
5491
return;
5492
if (lenMultiplicand->getOpCodeValue() != xconst())
5493
return;
5494
5495
// Check that all constants are equal, and positive.
5496
int64_t k = dstMultiplicand->getConstValue();
5497
if (k <= 0)
5498
return;
5499
if (srcMultiplicand->getConstValue() != k)
5500
return;
5501
if (lenMultiplicand->getConstValue() != k)
5502
return;
5503
5504
// Check that the multiplications don't overflow.
5505
if (!_dst->cannotOverflow() || !_src->cannotOverflow())
5506
return;
5507
// NB. when _elementSize > 1, _len is expected to start as a
5508
// newly-created multiply by _elementSize. It won't yet be marked
5509
// as cannotOverflow, but overflow would mean serious trouble.
5510
if (!_len->cannotOverflow() && !(k == _elementSize && k > 1))
5511
return;
5512
5513
// Check that src, dst >= 0.
5514
if (!_dst->isNonNegative() || !_src->isNonNegative())
5515
return;
5516
5517
// Ask permission to transform.
5518
if (!performTransformation(_comp, "%sSimplifying arraycopy element range overlap test by looking through mul const\n", OPT_DETAILS))
5519
return;
5520
5521
// Transform.
5522
_dst = _dst->getChild(0);
5523
_src = _src->getChild(0);
5524
_len = _len->getChild(0);
5525
_elementSize = 1;
5526
}
5527
5528
// When d, s >= 0, TFAE:
5529
// 1. |i2l(d) - i2l(s)| < i2l(n)
5530
// 2. |d - s| < n
5531
void SubrangeOverlapTestGenerator::simplifyI2L()
5532
{
5533
static bool disableArraycopyOverlapTestI2LSimplification =
5534
feGetEnv("TR_disableArraycopyOverlapTestI2LSimplification") != NULL;
5535
if (disableArraycopyOverlapTestI2LSimplification)
5536
return;
5537
5538
// Check that we are operating on 64-bit numbers,
5539
// and that all three are i2l.
5540
if (!_is64Bit)
5541
return;
5542
if (_dst->getOpCodeValue() != TR::i2l)
5543
return;
5544
if (_src->getOpCodeValue() != TR::i2l)
5545
return;
5546
if (_len->getOpCodeValue() != TR::i2l)
5547
return;
5548
5549
// Check that dst, src >= 0.
5550
// Note that x and i2l(x) have identical signs,
5551
// so it's good enough if the child is >= 0.
5552
if (!_dst->isNonNegative() && !_dst->getFirstChild()->isNonNegative())
5553
return;
5554
if (!_src->isNonNegative() && !_src->getFirstChild()->isNonNegative())
5555
return;
5556
5557
// Ask permission to transform.
5558
if (!performTransformation(_comp, "%sSimplifying arraycopy element range overlap test by looking through i2l\n", OPT_DETAILS))
5559
return;
5560
5561
// Transform
5562
_dst = _dst->getFirstChild();
5563
_src = _src->getFirstChild();
5564
_len = _len->getFirstChild();
5565
_is64Bit = false;
5566
}
5567
5568
// Get the byte offset from an array element address calculation.
5569
TR::Node *SubrangeOverlapTestGenerator::byteOffset(TR::Node *addr)
5570
{
5571
TR::ILOpCodes op = addr->getOpCodeValue();
5572
TR_ASSERT(op == TR::aladd || op == TR::aiadd,
5573
"unexpected arraycopy child opcode %s",
5574
addr->getOpCode().getName());
5575
return addr->getChild(1);
5576
}
5577
5578
// Assert that all nodes involved in the test have the expected data type.
5579
void SubrangeOverlapTestGenerator::checkTypes()
5580
{
5581
checkType("destination index", _dst);
5582
checkType("source index", _src);
5583
checkType("length", _len);
5584
}
5585
5586
// Assert that a single node has the expected data type.
5587
void SubrangeOverlapTestGenerator::checkType(const char *desc, TR::Node *node)
5588
{
5589
TR::DataType expectedType = _is64Bit ? TR::Int64 : TR::Int32;
5590
TR::DataType actualType = node->getDataType();
5591
TR_ASSERT(
5592
actualType == expectedType,
5593
"expected %s node to have type %s, but found %s (%d)",
5594
desc,
5595
TR::DataType::getName(expectedType),
5596
TR::DataType::getName(actualType),
5597
(int)actualType);
5598
}
5599
}
5600
5601
//////////////////////////////////////////////////////////////////////////
5602
//////////////////////////////////////////////////////////////////////////
5603
//////////////////////////////////////////////////////////////////////////
5604
//*****************************************************************************************
5605
// IL code generation for copying memory
5606
// Input: ImportantNode(0) - array load
5607
// ImportantNode(1) - array store
5608
// ImportantNode(2) - the size of elements (NULL for the byte array)
5609
// ImportantNode(3) - exit if node
5610
// ImportantNode(4) - optional iistore
5611
//*****************************************************************************************
5612
static bool
5613
CISCTransform2ArrayCopySub(TR_CISCTransformer *trans, TR::Node *indexRepNode, TR::Node *dstIndexRepNode,
5614
TR::Node *exitVarRepNode, TR::Node *variableORconstRepNode)
5615
{
5616
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
5617
TR::Node *trNode;
5618
TR::TreeTop *trTreeTop;
5619
TR::Block *block;
5620
TR_CISCGraph *P = trans->getP();
5621
List<TR_CISCNode> *P2T = trans->getP2T();
5622
TR::Compilation *comp = trans->comp();
5623
bool isDecrement = trans->isMEMCPYDec();
5624
const bool disptrace = DISPTRACE(trans);
5625
5626
TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");
5627
if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;
5628
5629
trans->findFirstNode(&trTreeTop, &trNode, &block);
5630
if (!block) return false; // cannot find
5631
5632
if (isLoopPreheaderLastBlockInMethod(comp, block))
5633
{
5634
traceMsg(comp, "Bailing CISCTransform2ARrayCopySub due to null TT - might be a preheader in last block of method\n");
5635
return false;
5636
}
5637
5638
TR::Block *target = trans->analyzeSuccessorBlock();
5639
// Currently, it allows only a single successor.
5640
if (!target) return false;
5641
5642
TR_CISCNode * inLoadCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(0));
5643
TR_CISCNode * inStoreCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(1));
5644
if (!inLoadCISCNode || !inStoreCISCNode)
5645
{
5646
if (DISPTRACE(trans)) traceMsg(comp, "CISCTransform2ArrayCopy failed. inLoadCISCNode %x inStoreCISCNode %x\n",inLoadCISCNode,inStoreCISCNode);
5647
return false;
5648
}
5649
5650
// The transformation can support one exit if-stmt
5651
TR_CISCGraph *T = trans->getT();
5652
if (T && T->getAspects()->getIfCount() > 1)
5653
{
5654
traceMsg(comp,"CISCTransform2ArrayCopySub detected %d if-stmts in loop (> 1). Not transforming.\n", T->getAspects()->getIfCount());
5655
return false;
5656
}
5657
5658
TR::Node * inLoadNode = inLoadCISCNode->getHeadOfTrNodeInfo()->_node;
5659
TR::Node * inStoreNode = inStoreCISCNode->getHeadOfTrNodeInfo()->_node;
5660
TR::Node * mulFactorNode;
5661
int elementSize;
5662
5663
TR::Node * inputNode = inLoadNode->getChild(0)->duplicateTree();
5664
TR::Node * outputNode = inStoreNode->getChild(0)->duplicateTree();
5665
5666
// Get the size of elements
5667
if (!getMultiplier(trans, P->getImportantNode(2), &mulFactorNode, &elementSize, inLoadNode->getType()))
5668
{
5669
if (DISPTRACE(trans)) traceMsg(comp, "CISCTransform2ArrayCopy getMultiplier failed.\n");
5670
return false;
5671
}
5672
if (elementSize != inLoadNode->getSize() || elementSize != inStoreNode->getSize())
5673
{
5674
traceMsg(comp, "CISCTransform2ArrayCopy failed - Size Mismatch. Element Size: %d InLoadSize: %d inStoreSize: %d\n", elementSize, inLoadNode->getSize(), inStoreNode->getSize());
5675
return false; // Size is mismatch!
5676
}
5677
5678
// if the src and dest objects are the same, return
5679
//
5680
StatusArrayStore statusArrayStore;
5681
if ((statusArrayStore = checkArrayStore(comp, inputNode, outputNode, elementSize, !isDecrement)) == ABANDONING_REDUCTION)
5682
return false;
5683
5684
if (indexContainsArrayAccess(comp, inLoadNode->getFirstChild()) ||
5685
indexContainsArrayAccess(comp, inStoreNode->getFirstChild()))
5686
{
5687
traceMsg(comp, "inputNode %p or outputnode %p contains another arrayaccess, no reduction\n", inLoadNode, inStoreNode);
5688
return false;
5689
}
5690
5691
TR_CISCNode *cmpIfAllCISCNode = trans->getP2TRepInLoop(P->getImportantNode(3));
5692
int modStartIdx = 0;
5693
int modLength = 0;
5694
bool isDecrementRet;
5695
if (!testExitIF(cmpIfAllCISCNode->getOpcode(), &isDecrementRet, &modLength, &modStartIdx))
5696
{
5697
if (DISPTRACE(trans)) traceMsg(comp, "CISCTransform2ArrayCopy testExitIF failed.\n");
5698
return false;
5699
}
5700
if (isDecrement != isDecrementRet) return false;
5701
5702
5703
TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();
5704
if (!trans->analyzeArrayIndex(indexVarSymRef))
5705
{
5706
if (DISPTRACE(trans)) traceMsg(comp, "analyzeArrayIndex failed. %x\n",indexRepNode);
5707
return false;
5708
}
5709
TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode ? dstIndexRepNode->getSymbolReference() : NULL;
5710
if (indexVarSymRef == dstIndexVarSymRef) dstIndexVarSymRef = NULL;
5711
indexRepNode = convertStoreToLoad(comp, indexRepNode);
5712
if (!trans->searchNodeInTrees(inputNode, indexRepNode))
5713
{
5714
if (DISPTRACE(trans)) traceMsg(comp, "searchNodeInTrees for inputNode failed.\n");
5715
return false;
5716
}
5717
if (!trans->searchNodeInTrees(outputNode, dstIndexVarSymRef ? convertStoreToLoad(comp, dstIndexRepNode) : indexRepNode))
5718
{
5719
if (DISPTRACE(trans)) traceMsg(comp, "searchNodeInTrees for outputNode failed.\n");
5720
return false;
5721
}
5722
TR::SymbolReference * exitVarSymRef = exitVarRepNode->getSymbolReference();
5723
if (indexVarSymRef != exitVarSymRef && dstIndexVarSymRef != exitVarSymRef)
5724
{
5725
if (DISPTRACE(trans)) traceMsg(comp, "CISCTransform2ArrayCopy cannot find exitVarSymRef correctly %x.\n", exitVarRepNode);
5726
return false;
5727
}
5728
5729
TR::Node *optionalIistore = NULL;
5730
if (P->getImportantNode(4))
5731
{
5732
TR_CISCNode *optionalCISCIistore = trans->getP2TInLoopIfSingle(P->getImportantNode(4));
5733
if (!optionalCISCIistore)
5734
return false;
5735
optionalIistore = optionalCISCIistore->getHeadOfTrNode()->duplicateTree();
5736
}
5737
5738
TR::Node * exitVarNode = createLoad(exitVarRepNode);
5739
variableORconstRepNode = convertStoreToLoad(comp, variableORconstRepNode);
5740
5741
5742
int32_t postIncrement = checkForPostIncrement(comp, block, cmpIfAllCISCNode->getHeadOfTrNodeInfo()->_node, exitVarSymRef->getSymbol());
5743
5744
if (disptrace)
5745
traceMsg(comp, "detected postIncrement %d modLength %d modStartIdx %d\n", postIncrement, modLength, modStartIdx);
5746
5747
TR::Node * lengthNode;
5748
if (isDecrement)
5749
{
5750
TR_ASSERT(dstIndexVarSymRef == NULL, "not implemented yet");
5751
TR_CISCNode *ixloadORstore, *aload, *iload;
5752
if (postIncrement &&
5753
(modStartIdx > 0))
5754
modStartIdx = 0;
5755
TR::Node *startIdx = modStartIdx ? createOP2(comp, TR::isub, variableORconstRepNode,
5756
TR::Node::create(trNode, TR::iconst, 0, -modStartIdx)) :
5757
variableORconstRepNode;
5758
if (!getThreeNodesForArray(inLoadCISCNode, &ixloadORstore, &aload, &iload)) return false;
5759
if ((inputNode = replaceIndexInAddressTree(comp, ixloadORstore->getChild(0)->getHeadOfTrNodeInfo()->_node->duplicateTree(),
5760
indexVarSymRef, startIdx)) == NULL) return false;
5761
if (!getThreeNodesForArray(inStoreCISCNode, &ixloadORstore, &aload, &iload)) return false;
5762
if ((outputNode = replaceIndexInAddressTree(comp, ixloadORstore->getChild(0)->getHeadOfTrNodeInfo()->_node->duplicateTree(),
5763
dstIndexVarSymRef ? dstIndexVarSymRef : indexVarSymRef, startIdx)) == NULL) return false;
5764
lengthNode = createOP2(comp, TR::isub, exitVarNode, variableORconstRepNode);
5765
}
5766
else
5767
{
5768
TR_ASSERT(modStartIdx == 0, "error");
5769
inputNode = inputNode->duplicateTree();
5770
outputNode = outputNode->duplicateTree();
5771
lengthNode = createOP2(comp, TR::isub, variableORconstRepNode, exitVarNode);
5772
}
5773
5774
if (postIncrement != 0)
5775
lengthNode = createOP2(comp, TR::iadd, lengthNode, TR::Node::create(lengthNode, TR::iconst, 0, postIncrement));
5776
5777
if (modLength) lengthNode = createOP2(comp, TR::isub, lengthNode, TR::Node::create(trNode, TR::iconst, 0, -modLength));
5778
TR::Node * diff = lengthNode;
5779
5780
lengthNode = createBytesFromElement(comp, trans->isGenerateI2L(), lengthNode, elementSize);
5781
5782
// Prepare the arraycopy node.
5783
bool needWriteBarrier = false;
5784
if (inStoreNode->getOpCodeValue() == TR::awrtbari)
5785
{
5786
switch (TR::Compiler->om.writeBarrierType())
5787
{
5788
case gc_modron_wrtbar_oldcheck:
5789
case gc_modron_wrtbar_cardmark:
5790
case gc_modron_wrtbar_cardmark_and_oldcheck:
5791
case gc_modron_wrtbar_cardmark_incremental:
5792
needWriteBarrier = true;
5793
break;
5794
default:
5795
break;
5796
}
5797
}
5798
5799
if (!comp->cg()->getSupportsReferenceArrayCopy() && needWriteBarrier)
5800
{
5801
if (DISPTRACE(trans)) traceMsg(comp, "CISCTransform2ArrayCopy: needWriteBarrier but not getSupportsReferenceArrayCopy().\n");
5802
return false;
5803
}
5804
5805
TR::Node * arraycopy;
5806
if (!needWriteBarrier)
5807
{
5808
arraycopy = TR::Node::createArraycopy(inputNode, outputNode, lengthNode);
5809
}
5810
else
5811
{
5812
arraycopy = TR::Node::createArraycopy(inputNode->getFirstChild(), outputNode->getFirstChild(), inputNode, outputNode, lengthNode);
5813
}
5814
arraycopy->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCopySymbol());
5815
if (isDecrement)
5816
{
5817
arraycopy->setBackwardArrayCopy(true); /* bit available only to primitive arraycopy */
5818
}
5819
else
5820
{
5821
arraycopy->setForwardArrayCopy(true);
5822
}
5823
arraycopy->setArrayCopyElementType(inStoreNode->getDataType());
5824
5825
switch(elementSize)
5826
{
5827
case 2:
5828
arraycopy->setHalfWordElementArrayCopy(true);
5829
break;
5830
5831
case 4:
5832
case 8:
5833
arraycopy->setWordElementArrayCopy(true);
5834
break;
5835
}
5836
5837
TR::Node * topArraycopy = TR::Node::create(TR::treetop, 1, arraycopy);
5838
5839
// Insert nodes and maintain the CFG
5840
if (statusArrayStore == GENERATE_ARRAY_ALIAS_TEST)
5841
{
5842
// devinmp: Should this also check the index ranges?
5843
List<TR::Node> guardList(comp->trMemory());
5844
guardList.add(TR::Node::createif(TR::ifacmpeq, inputNode->getFirstChild()->duplicateTree(),
5845
outputNode->getFirstChild()->duplicateTree()));
5846
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree(), &guardList);
5847
}
5848
else if (statusArrayStore == GENERATE_SUBRANGE_OVERLAP_TEST)
5849
{
5850
// We know that the arrays alias, so only test the index ranges.
5851
bool is64Bit = trans->isGenerateI2L();
5852
SubrangeOverlapTestGenerator overlapTestGen(comp, arraycopy, lengthNode, is64Bit, elementSize);
5853
List<TR::Node> guardList(comp->trMemory());
5854
guardList.add(overlapTestGen.generate());
5855
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree(), &guardList);
5856
}
5857
else
5858
{
5859
TR_ASSERT(statusArrayStore == NO_NEED_TO_CHECK, "unexpected statusArrayStore value %d", (int)statusArrayStore);
5860
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree());
5861
}
5862
block = trans->insertBeforeNodes(block);
5863
block->append(TR::TreeTop::create(comp, topArraycopy));
5864
TR::Node * finalValue = variableORconstRepNode;
5865
if (modLength || (postIncrement != 0))
5866
{
5867
int32_t incr = 0;
5868
if (modLength)
5869
incr = modLength;
5870
else if (postIncrement != 0)
5871
incr = postIncrement;
5872
5873
finalValue = createOP2(comp, TR::isub, finalValue,
5874
TR::Node::create(trNode, TR::iconst, 0, isDecrement ? incr : -incr));
5875
}
5876
TR::TreeTop * exitVarUpdateTreeTop = TR::TreeTop::create(comp,
5877
TR::Node::createStore(exitVarSymRef, finalValue));
5878
5879
block->append(exitVarUpdateTreeTop);
5880
TR_ASSERT(indexVarSymRef == exitVarSymRef || dstIndexVarSymRef == exitVarSymRef, "error!");
5881
TR::Node * theOtherVarUpdateNode = NULL;
5882
if (dstIndexVarSymRef != NULL)
5883
{
5884
// If there are two induction variables, we need to maintain the other one.
5885
TR::SymbolReference * theOtherSymRef = (indexVarSymRef == exitVarSymRef ? dstIndexVarSymRef : indexVarSymRef);
5886
TR::Node * result = createOP2(comp, isDecrement ? TR::isub : TR::iadd,
5887
TR::Node::createLoad(trNode, theOtherSymRef),
5888
diff);
5889
theOtherVarUpdateNode = TR::Node::createStore(theOtherSymRef, result);
5890
TR::TreeTop * theOtherVarUpdateTreeTop = TR::TreeTop::create(comp, theOtherVarUpdateNode);
5891
block->append(theOtherVarUpdateTreeTop);
5892
}
5893
5894
if (optionalIistore)
5895
{
5896
TR_ASSERT(theOtherVarUpdateNode != NULL, "error!");
5897
optionalIistore->setAndIncChild(1, theOtherVarUpdateNode->getChild(0));
5898
block->append(TR::TreeTop::create(comp, optionalIistore));
5899
}
5900
5901
trans->insertAfterNodes(block);
5902
5903
trans->setSuccessorEdge(block, target);
5904
return true;
5905
}
5906
5907
bool
5908
CISCTransform2ArrayCopy(TR_CISCTransformer *trans)
5909
{
5910
TR::Node *indexRepNode, *dstIndexRepNode, *exitVarRepNode, *variableORconstRepNode;
5911
getP2TTrRepNodes(trans, &indexRepNode, &dstIndexRepNode, &exitVarRepNode, &variableORconstRepNode);
5912
return CISCTransform2ArrayCopySub(trans, indexRepNode, dstIndexRepNode, exitVarRepNode, variableORconstRepNode);
5913
}
5914
5915
bool
5916
CISCTransform2ArrayCopySpecial(TR_CISCTransformer *trans)
5917
{
5918
TR::Node *indexRepNode, *dstIndexRepNode, *variableORconstRepNode;
5919
getP2TTrRepNodes(trans, &indexRepNode, &dstIndexRepNode, &variableORconstRepNode);
5920
return CISCTransform2ArrayCopySub(trans, indexRepNode, dstIndexRepNode, indexRepNode, variableORconstRepNode);
5921
}
5922
5923
bool
5924
CISCTransform2ArrayCopyDec(TR_CISCTransformer *trans)
5925
{
5926
trans->setMEMCPYDec();
5927
return CISCTransform2ArrayCopy(trans);
5928
}
5929
5930
5931
/****************************************************************************************
5932
Corresponding Java-like Pseudo Program
5933
int v1, v3, end;
5934
byte v0[ ], v2[ ];
5935
while(true){
5936
v2[v3] = v0[v1];
5937
v1++;
5938
v3++;
5939
if (v1 >= end) break;
5940
}
5941
****************************************************************************************/
5942
TR_PCISCGraph *
5943
makeMemCpySpecialGraph(TR::Compilation *c, int32_t ctrl)
5944
{
5945
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCpySpecial", 0, 16);
5946
/******************************************************************** opc id dagId #cfg #child other/pred/children */
5947
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 0); tgt->addNode(v1); // src array index
5948
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(v3); // dst array index
5949
TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),12, 0, 0); tgt->addNode(vorc); // length
5950
5951
TR_PCISCNode *v5 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 2); tgt->addNode(v5); // dst array index
5952
TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 0); tgt->addNode(idx0);
5953
TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 1); tgt->addNode(idx1);
5954
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(v0); // src array base
5955
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(v2); // dst array base
5956
TR_PCISCNode *cmah0=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 0); tgt->addNode(cmah0); // array header
5957
TR_PCISCNode *cmah1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 1); tgt->addNode(cmah1); // array header
5958
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, -1); tgt->addNode(cm1);
5959
TR_PCISCNode *mulFactor = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 3, 0, 0); tgt->addNode(mulFactor); // Multiply Factor
5960
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
5961
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ent, v3, cm1);
5962
n6->getChild(0)->setIsSuccDirectlyConnected(false);
5963
TR_PCISCNode *iis = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istorei, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n6, v5, n6->getChild(0)); tgt->addNode(iis);
5964
TR_PCISCNode *n0 = createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, iis, idx1, cmah1, mulFactor);
5965
TR_PCISCNode *n1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, n0, v2, n0);
5966
TR_PCISCNode *n2 = createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, n1, idx0, cmah0, mulFactor);
5967
TR_PCISCNode *n3 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, n2, v0, n2);
5968
TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indload, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n3, n3); tgt->addNode(n4);
5969
TR_PCISCNode *nn1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indstore, TR::NoType, tgt->incNumNodes(), 1, 1, 2, n4, n1, n4); tgt->addNode(nn1);
5970
TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, nn1, v1, cm1);
5971
TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v1, vorc); tgt->addNode(n8);
5972
TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
5973
5974
n8->setSuccs(ent->getSucc(0), n9);
5975
5976
n4->setIsChildDirectlyConnected();
5977
nn1->setIsChildDirectlyConnected();
5978
n8->setIsChildDirectlyConnected();
5979
5980
tgt->setEntryNode(ent);
5981
tgt->setExitNode(n9);
5982
tgt->setImportantNodes(n4, nn1, NULL, n8, iis);
5983
tgt->setNumDagIds(15);
5984
tgt->createInternalData(1);
5985
5986
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
5987
tgt->setTransformer(CISCTransform2ArrayCopySpecial);
5988
tgt->setAspects(isub|sameTypeLoadStore, existAccess, existAccess);
5989
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
5990
tgt->setMinCounts(1, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
5991
tgt->setHotness(warm, false);
5992
tgt->setInhibitBeforeVersioning();
5993
return tgt;
5994
}
5995
5996
/****************************************************************************************
5997
Corresponding Java-like Pseudo Program
5998
int v1, v3, end;
5999
v0[ ], v2[ ]; // char, int, float, long, and so on
6000
while(true){
6001
v2[v3] = v0[v1];
6002
v1++;
6003
v3++;
6004
if (v1 >= end) break;
6005
}
6006
****************************************************************************************/
6007
TR_PCISCGraph *
6008
makeMemCpyGraph(TR::Compilation *c, int32_t ctrl)
6009
{
6010
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCpy", 0, 16);
6011
/************************************ opc id dagId #cfg #child other/pred/children */
6012
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 0); tgt->addNode(v1); // src array index
6013
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(v3); // dst array index
6014
TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 2); tgt->addNode(v4); // exit checking
6015
TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),11, 0, 0); tgt->addNode(vorc); // length
6016
6017
TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 0); tgt->addNode(idx0);
6018
TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 1); tgt->addNode(idx1);
6019
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(v0); // src array base
6020
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(v2); // dst array base
6021
TR_PCISCNode *iall= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0); tgt->addNode(iall); // Multiply Factor
6022
TR_PCISCNode *cmah0=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(cmah0); // array header
6023
TR_PCISCNode *cmah1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0, 1); tgt->addNode(cmah1); // array header
6024
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);
6025
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
6026
TR_PCISCNode *n1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, ent, v2, idx1, cmah1, iall);
6027
TR_PCISCNode *n3 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, n1, v0, idx0, cmah0, iall);
6028
TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indload, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n3, n3); tgt->addNode(n4);
6029
TR_PCISCNode *nn1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indstore, TR::NoType, tgt->incNumNodes(), 1, 1, 2, n4, n1, n4); tgt->addNode(nn1);
6030
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, nn1, v3, cm1);
6031
TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v1, cm1);
6032
TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v4, vorc); tgt->addNode(n8);
6033
TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
6034
6035
n8->setSuccs(ent->getSucc(0), n9);
6036
6037
n4->setIsChildDirectlyConnected();
6038
nn1->setIsChildDirectlyConnected();
6039
n8->setIsChildDirectlyConnected();
6040
6041
tgt->setEntryNode(ent);
6042
tgt->setExitNode(n9);
6043
tgt->setImportantNodes(n4, nn1, iall, n8, NULL);
6044
tgt->setNumDagIds(15);
6045
tgt->createInternalData(1);
6046
6047
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
6048
tgt->setTransformer(CISCTransform2ArrayCopy);
6049
tgt->setAspects(isub|mul | sameTypeLoadStore, existAccess, existAccess);
6050
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
6051
tgt->setMinCounts(1, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
6052
tgt->setHotness(warm, true);
6053
tgt->setInhibitBeforeVersioning();
6054
return tgt;
6055
}
6056
6057
/****************************************************************************************
6058
Corresponding Java-like Pseudo Program
6059
int v1, v3, end;
6060
v0[ ], v2[ ]; // char, int, float, long, and so on
6061
while(true){
6062
v2[v1] = v0[v1];
6063
v1--;
6064
if (v1 <= end) break;
6065
}
6066
****************************************************************************************/
6067
TR_PCISCGraph *
6068
makeMemCpyDecGraph(TR::Compilation *c, int32_t ctrl)
6069
{
6070
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCpyDec", 0, 16);
6071
/********************************************************************* opc id dagId #cfg #child other/pred/children */
6072
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 0); tgt->addNode(v1); // src array index
6073
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(v3); // dst array index
6074
TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 2); tgt->addNode(v4); // exit checking
6075
TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),11, 0, 0); tgt->addNode(vorc); // length
6076
6077
TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 0); tgt->addNode(idx0);
6078
TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 1); tgt->addNode(idx1);
6079
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(v0); // src array base
6080
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(v2); // dst array base
6081
TR_PCISCNode *iall= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0); tgt->addNode(iall); // Multiply Factor
6082
TR_PCISCNode *cmah0=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(cmah0); // array header
6083
TR_PCISCNode *cmah1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0, 1); tgt->addNode(cmah1); // array header
6084
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);
6085
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
6086
TR_PCISCNode *n1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, ent, v2, idx1, cmah1, iall);
6087
TR_PCISCNode *n3 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, n1, v0, idx0, cmah0, iall);
6088
TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indload, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n3, n3); tgt->addNode(n4);
6089
TR_PCISCNode *nn1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indstore, TR::NoType, tgt->incNumNodes(), 1, 1, 2, n4, n1, n4); tgt->addNode(nn1);
6090
TR_PCISCNode *n6 = createIdiomIncVarInLoop(tgt, ctrl, 1, nn1, v3, cm1);
6091
TR_PCISCNode *n7 = createIdiomIncVarInLoop(tgt, ctrl, 1, n6, v1, cm1);
6092
TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v4, vorc); tgt->addNode(n8);
6093
TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
6094
6095
n8->setSuccs(ent->getSucc(0), n9);
6096
6097
n4->setIsChildDirectlyConnected();
6098
nn1->setIsChildDirectlyConnected();
6099
n8->setIsChildDirectlyConnected();
6100
6101
tgt->setEntryNode(ent);
6102
tgt->setExitNode(n9);
6103
tgt->setImportantNodes(n4, nn1, iall, n8, NULL);
6104
tgt->setNumDagIds(15);
6105
tgt->createInternalData(1);
6106
6107
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
6108
tgt->setTransformer(CISCTransform2ArrayCopyDec);
6109
tgt->setAspects(iadd|mul | sameTypeLoadStore, existAccess, existAccess);
6110
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
6111
tgt->setMinCounts(1, 1, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
6112
tgt->setHotness(warm, false);
6113
tgt->setInhibitBeforeVersioning();
6114
return tgt;
6115
}
6116
6117
6118
//////////////////////////////////////////////////////////////////////////
6119
//////////////////////////////////////////////////////////////////////////
6120
//////////////////////////////////////////////////////////////////////////
6121
//*****************************************************************************************
6122
// IL code generation for copying memory (ByteToChar or CharToByte version)
6123
// Input: ImportantNodes(0) - array load
6124
// ImportantNodes(1) - array store
6125
//*****************************************************************************************
6126
bool
6127
CISCTransform2ArrayCopyB2CorC2B(TR_CISCTransformer *trans)
6128
{
6129
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
6130
TR::Node *trNode;
6131
TR::TreeTop *trTreeTop;
6132
TR::Block *block;
6133
TR_CISCGraph *P = trans->getP();
6134
List<TR_CISCNode> *P2T = trans->getP2T();
6135
TR::Compilation *comp = trans->comp();
6136
6137
bool bigEndian = comp->target().cpu.isBigEndian();
6138
6139
TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");
6140
6141
TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");
6142
if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;
6143
6144
trans->findFirstNode(&trTreeTop, &trNode, &block);
6145
if (!block) return false; // cannot find
6146
6147
if (isLoopPreheaderLastBlockInMethod(comp, block))
6148
{
6149
traceMsg(comp, "Bailing CISCTransform2ArrayCopyB2CorC2B due to null TT - might be a preheader in last block of method\n");
6150
return false;
6151
}
6152
6153
TR::Block *target = trans->analyzeSuccessorBlock();
6154
// Currently, it allows only a single successor.
6155
if (!target) return false;
6156
6157
TR::Node *indexRepNode, *dstIndexRepNode, *exitVarRepNode, *variableORconstRepNode;
6158
getP2TTrRepNodes(trans, &indexRepNode, &dstIndexRepNode, &exitVarRepNode, &variableORconstRepNode);
6159
TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();
6160
TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode->getSymbolReference();
6161
TR::SymbolReference * exitVarSymRef = exitVarRepNode->getSymbolReference();
6162
TR_ASSERT(indexVarSymRef == exitVarSymRef || dstIndexVarSymRef == exitVarSymRef, "error!");
6163
TR_ASSERT(indexVarSymRef != dstIndexVarSymRef, "error!");
6164
6165
TR::Node * inputMemNode = trans->getP2TRepInLoop(P->getImportantNode(0))->getHeadOfTrNodeInfo()->_node->duplicateTree();
6166
TR::Node * outputMemNode = trans->getP2TRepInLoop(P->getImportantNode(1))->getHeadOfTrNodeInfo()->_node->duplicateTree();
6167
TR::Node * inputNode = trans->getP2TRepInLoop(P->getImportantNode(0)->getChild(0))->getHeadOfTrNodeInfo()->_node;
6168
TR::Node * outputNode = trans->getP2TRepInLoop(P->getImportantNode(1)->getChild(0))->getHeadOfTrNodeInfo()->_node->duplicateTree();
6169
6170
// check whether the transformation is valid
6171
//
6172
if (outputMemNode->getOpCode().isShort())
6173
{
6174
TR::Node * iorNode = trans->getP2TRepInLoop(P->getImportantNode(2))->getHeadOfTrNodeInfo()->_node;
6175
if (!checkByteToChar(comp, iorNode, inputNode, bigEndian))
6176
{
6177
dumpOptDetails(comp, "byte loads in [%p] are not compatible with endian-ness %d\n", iorNode, bigEndian);
6178
return false;
6179
}
6180
}
6181
inputNode = inputNode->duplicateTree();
6182
6183
TR::Node * exitVarNode = createLoad(exitVarRepNode);
6184
variableORconstRepNode = convertStoreToLoad(comp, variableORconstRepNode);
6185
TR::Node * lengthNode = createOP2(comp, TR::isub,
6186
variableORconstRepNode,
6187
exitVarNode);
6188
TR::Node * updateTree1, *updateTree2;
6189
TR::Node * c2 = TR::Node::create(exitVarRepNode, TR::iconst, 0, 2);
6190
bool isExitVarChar;
6191
isExitVarChar = (dstIndexVarSymRef == exitVarSymRef) ? outputMemNode->getSize() == 2 :
6192
inputMemNode->getSize() == 2;
6193
//there are 2 scenarios
6194
// dstIndexVarSymRef is a char (ie. outputMemNode size == 2, consequently inputMemNode == 1 and indexVarSymRef is a byte)
6195
// or
6196
// indexVarSymRef is a char (ie. inputMemNode size == 2, consequently outputMemNode == 1 and dstIndexVarSymRef is a byte)
6197
// in each of these cases, its possible that each induction variable could be the loop controlling variable (ie. exitVarSymRef) ; thereby creating 4 possible conditions
6198
//
6199
if (outputMemNode->getSize() == 2) // type is a byteToChar copy
6200
{
6201
// for a byteToChar copy, the length needs to be expressed in number of bytes
6202
if (dstIndexVarSymRef == exitVarSymRef)
6203
{
6204
// dstIndex is the char array's index and length should be multiplied by 2 because the
6205
// arraycopy length should be expressed in bytes
6206
//
6207
lengthNode = TR::Node::create(TR::imul, 2, lengthNode, c2);
6208
updateTree1 = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthNode, trNode);
6209
updateTree2 = TR::Node::createStore(dstIndexVarSymRef, variableORconstRepNode);
6210
}
6211
else
6212
{
6213
// byte array's index is the loop controlling variable. this means length is already in bytes
6214
// nothing to do for length
6215
updateTree1 = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthNode, trNode);
6216
updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef,
6217
TR::Node::create(TR::idiv, 2, lengthNode, c2), trNode);
6218
}
6219
}
6220
else // type is a charToByte copy
6221
{
6222
// For a charToByte copy, the length needs to be expressed in number of bytes
6223
if (dstIndexVarSymRef == exitVarSymRef)
6224
{
6225
// dstIndex is the byte array's index and length is already in bytes.
6226
// index is the char array's index and needs to be adjusted by # of chars (byte / 2).
6227
updateTree1 = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, TR::Node::create(TR::idiv, 2, lengthNode, c2), trNode);
6228
updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef, lengthNode, trNode);
6229
}
6230
else
6231
{
6232
// char array's index is the loop controlling variable, so length needs to be adjusted by 2.
6233
// index is the char array's index and should be added to original length value.
6234
// dstIndex is the byte array's index and needs to be added to 2 * length (or updated lengthNode).
6235
updateTree1 = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthNode, trNode);
6236
lengthNode = TR::Node::create(TR::imul, 2, lengthNode, c2);
6237
updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef, lengthNode, trNode);
6238
}
6239
}
6240
6241
#if 0
6242
// Prepare nodes for byte length and induction variable updates
6243
if (isExitVarChar) // The variable that checks the exit condition is for a 2-byte array.
6244
{
6245
TR::Node * diff = lengthNode;
6246
lengthNode = TR::Node::create(TR::imul, 2, lengthNode, c2);
6247
// lengthNode has the byte size, and diff has the char-based size (that is, lengthNode = diff * 2)
6248
updateTree1 = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthNode, trNode);
6249
updateTree2 = TR::Node::createStore(dstIndexVarSymRef, variableORconstRepNode);
6250
}
6251
else
6252
{
6253
///TR::Node * div2 = TR::Node::create(TR::idiv, 2, lengthNode, c2);
6254
lengthNode = TR::Node::create(TR::idiv, 2, lengthNode, c2);
6255
///lengthNode = TR::Node::create(TR::imul, 2, div2, c2); // to make the length even
6256
// lengthNode has the byte size, and div2 has the char-based size (that is, lengthNode = div2 * 2)
6257
updateTree1 = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthNode, trNode);
6258
updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef,
6259
TR::Node::create(TR::imul, 2, lengthNode, c2), trNode);
6260
}
6261
#endif
6262
6263
lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode);
6264
6265
// Prepare the arraycopy node
6266
TR::Node * arraycopy = TR::Node::createArraycopy(inputNode, outputNode, lengthNode);
6267
arraycopy->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCopySymbol());
6268
arraycopy->setForwardArrayCopy(true);
6269
arraycopy->setArrayCopyElementType(TR::Int8);
6270
6271
TR::Node * topArraycopy = TR::Node::create(TR::treetop, 1, arraycopy);
6272
TR::TreeTop * updateTreeTop1 = TR::TreeTop::create(comp, updateTree1);
6273
TR::TreeTop * updateTreeTop2 = TR::TreeTop::create(comp, updateTree2);
6274
6275
// Insert nodes and maintain the CFG
6276
TR::TreeTop *last;
6277
last = trans->removeAllNodes(trTreeTop, block->getExit());
6278
last->join(block->getExit());
6279
block = trans->insertBeforeNodes(block);
6280
last = block->getLastRealTreeTop();
6281
last->join(trTreeTop);
6282
trTreeTop->setNode(topArraycopy);
6283
trTreeTop->join(updateTreeTop1);
6284
updateTreeTop1->join(updateTreeTop2);
6285
updateTreeTop2->join(block->getExit());
6286
6287
trans->insertAfterNodes(block);
6288
6289
trans->setSuccessorEdge(block, target);
6290
return true;
6291
}
6292
6293
/****************************************************************************************
6294
Corresponding Java-like Pseudo Program (for big endian)
6295
int v1, v3, end;
6296
byte v0[ ];
6297
char v2[ ];
6298
while(true){
6299
v2[v3] = ((v0[v1] & 0xFF) << 8) | (v0[v1+1] & 0xFF))
6300
v1+=2;
6301
v3++;
6302
if (v3 >= end) break;
6303
}
6304
6305
Note 1: This idiom also supports little endian.
6306
****************************************************************************************/
6307
TR_PCISCGraph *
6308
makeMemCpyByteToCharGraph(TR::Compilation *c, int32_t ctrl)
6309
{
6310
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCpyByteToChar", 0, 16);
6311
bool isBigEndian = (ctrl & CISCUtilCtl_BigEndian);
6312
/************************************ opc id dagId #cfg #child other/pred/children */
6313
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(v1); // src array index
6314
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1); tgt->addNode(v3); // dst array index
6315
TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 2); tgt->addNode(v4); // exit checking
6316
TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),12, 0, 0); tgt->addNode(vorc); // length
6317
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(v0); // src array base
6318
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v2); // dst array base
6319
TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(cmah); // array header
6320
TR_PCISCNode *cmah1=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 8, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+1));// array header+1
6321
TR_PCISCNode *ah1 = isBigEndian ? cmah : cmah1;
6322
TR_PCISCNode *ah2 = isBigEndian ? cmah1 : cmah;
6323
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 7, 0, 0, -1); tgt->addNode(cm1);
6324
TR_PCISCNode *cm2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -2); tgt->addNode(cm2);
6325
TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 5, 2); // element size
6326
TR_PCISCNode *c256= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 256); tgt->addNode(c256);
6327
TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size
6328
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
6329
TR_PCISCNode *ns0 = createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, ent, v3, cmah, c2);
6330
TR_PCISCNode *ns1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns0, v2, ns0);
6331
TR_PCISCNode *nl00;
6332
TR_PCISCNode *nl10;
6333
if (ctrl & CISCUtilCtl_64Bit)
6334
{
6335
nl00= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2l, TR::Int64, tgt->incNumNodes(), 1, 1, 1, ns1, v1); tgt->addNode(nl00);
6336
nl10= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl | CISCUtilCtl_NoI2L, 1, nl00, nl00, ah1, c1);
6337
}
6338
else
6339
{
6340
nl00= v1;
6341
nl10= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl | CISCUtilCtl_NoI2L, 1, ns1, nl00, ah1, c1);
6342
}
6343
TR_PCISCNode *nl11= createIdiomArrayAddressInLoop(tgt, ctrl, 1, nl10, v0, nl10);
6344
TR_PCISCNode *nl12= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bloadi, TR::Int8, tgt->incNumNodes(), 1, 1, 1, nl11, nl11); tgt->addNode(nl12);
6345
TR_PCISCNode *nl13= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl12, nl12); tgt->addNode(nl13);
6346
TR_PCISCNode *nl14= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl13, nl13, c256); tgt->addNode(nl14);
6347
TR_PCISCNode *nl20= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl | CISCUtilCtl_NoI2L, 1, nl14, nl00, ah2, c1);
6348
TR_PCISCNode *nl21= createIdiomArrayAddressInLoop(tgt, ctrl, 1, nl20, v0, nl20);
6349
TR_PCISCNode *nl22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bloadi, TR::Int8, tgt->incNumNodes(), 1, 1, 1, nl21, nl21); tgt->addNode(nl22);
6350
TR_PCISCNode *nl23= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl22, nl22); tgt->addNode(nl23);
6351
TR_PCISCNode *ns2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ior, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl23, nl14, nl23); tgt->addNode(ns2);
6352
TR_PCISCNode *ns3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2s, TR::Int16, tgt->incNumNodes(), 1, 1, 1, ns2, ns2); tgt->addNode(ns3);
6353
TR_PCISCNode *ns4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::sstorei, TR::Int16, tgt->incNumNodes(), 1, 1, 2, ns3, ns1, ns3); tgt->addNode(ns4);
6354
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ns4, v1, cm2);
6355
TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v3, cm1);
6356
TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v4, vorc); tgt->addNode(n8);
6357
TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
6358
6359
n8->setSuccs(ent->getSucc(0), n9);
6360
n8->setIsChildDirectlyConnected();
6361
6362
tgt->setEntryNode(ent);
6363
tgt->setExitNode(n9);
6364
tgt->setImportantNodes(nl12, ns4, ns2);
6365
tgt->setNumDagIds(16);
6366
tgt->createInternalData(1);
6367
6368
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
6369
tgt->setTransformer(CISCTransform2ArrayCopyB2CorC2B);
6370
tgt->setAspects(isub|mul|bitop1, ILTypeProp::Size_1, ILTypeProp::Size_2);
6371
tgt->setNoAspects(call|bndchk, 0, 0);
6372
tgt->setMinCounts(1, 2, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
6373
tgt->setHotness(warm, false);
6374
tgt->setInhibitBeforeVersioning();
6375
return tgt;
6376
}
6377
6378
6379
//////////////////////////////////////////////////////////////////////////
6380
//////////////////////////////////////////////////////////////////////////
6381
//////////////////////////////////////////////////////////////////////////
6382
6383
6384
/****************************************************************************************
6385
Corresponding Java-like Pseudo Program (for big endian)
6386
int v1, v3, end;
6387
char v0[ ];
6388
byte v2[ ];
6389
while(true){
6390
v2[v3] = (byte)(v0[v1] >> 8);
6391
v2[v3+1] = (byte)(v0[v1] & 0xff);
6392
v1++;
6393
v3+=2;
6394
if (v1 >= end) break;
6395
}
6396
6397
Note 1: This idiom also supports little endian.
6398
****************************************************************************************/
6399
TR_PCISCGraph *
6400
makeMemCpyCharToByteGraph(TR::Compilation *c, int32_t ctrl)
6401
{
6402
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCpyCharToByte", 0, 16);
6403
/************************************ opc id dagId #cfg #child other/pred/children */
6404
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(v1); // src array index
6405
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1); tgt->addNode(v3); // dst array index
6406
TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 2); tgt->addNode(v4); // exit checking
6407
TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),12, 0, 0); tgt->addNode(vorc); // length
6408
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(v0); // src array base
6409
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v2); // dst array base
6410
TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(cmah); // array header
6411
TR_PCISCNode *cmah1=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 8, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+1));// array header+1
6412
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 7, 0, 0, -1); tgt->addNode(cm1);
6413
TR_PCISCNode *cm2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -2); tgt->addNode(cm2);
6414
TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 5, 2); // element size
6415
TR_PCISCNode *c8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 8); tgt->addNode(c8);
6416
TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size
6417
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
6418
TR_PCISCNode *ns10= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, ent, v3, cmah, c1);
6419
TR_PCISCNode *ns11= createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns10, v2, ns10);
6420
TR_PCISCNode *nl0 = createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, ns11, v1, cmah, c2);
6421
TR_PCISCNode *nl1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, nl0, v0, nl0);
6422
TR_PCISCNode *nl2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::sloadi, TR::Int16, tgt->incNumNodes(), 1, 1, 1, nl1, nl1); tgt->addNode(nl2);
6423
TR_PCISCNode *cvt0, *cvt1;
6424
if ((ctrl & CISCUtilCtl_BigEndian))
6425
{
6426
TR_PCISCNode *nc2i= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl2, nl2); tgt->addNode(nc2i);
6427
TR_PCISCNode *ns22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ishrall, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nc2i, nc2i, c8); tgt->addNode(ns22);
6428
cvt0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns22, ns22); tgt->addNode(cvt0);
6429
}
6430
else
6431
{
6432
cvt0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::s2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, nl2, nl2); tgt->addNode(cvt0);
6433
}
6434
TR_PCISCNode *ns14= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, cvt0, ns11, cvt0); tgt->addNode(ns14);
6435
TR_PCISCNode *ns20= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl|CISCUtilCtl_NoI2L, 1, ns14, ns10->getChild(0)->getChild(0), cmah1, c1);
6436
TR_PCISCNode *ns21= createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns20, v2, ns20);
6437
if ((ctrl & CISCUtilCtl_BigEndian))
6438
{
6439
cvt1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::s2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns21, nl2); tgt->addNode(cvt1);
6440
}
6441
else
6442
{
6443
TR_PCISCNode *nc2i= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, ns21, nl2); tgt->addNode(nc2i);
6444
TR_PCISCNode *ns22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ishrall, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nc2i, nc2i, c8); tgt->addNode(ns22);
6445
cvt1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns22, ns22); tgt->addNode(cvt1);
6446
}
6447
TR_PCISCNode *ns24= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, cvt1, ns21, cvt1); tgt->addNode(ns24);
6448
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ns24, v3, cm2);
6449
TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v1, cm1);
6450
TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v4, vorc); tgt->addNode(n8);
6451
TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
6452
6453
n8->setSuccs(ent->getSucc(0), n9);
6454
6455
n8->setIsChildDirectlyConnected();
6456
6457
tgt->setEntryNode(ent);
6458
tgt->setExitNode(n9);
6459
tgt->setImportantNodes(nl2, ns14);
6460
tgt->setNumDagIds(16);
6461
tgt->createInternalData(1);
6462
6463
tgt->setSpecialCareNode(0, cvt0); // conversion (possibly i2b)
6464
tgt->setSpecialCareNode(1, cvt1); // conversion (possibly i2b)
6465
tgt->setSpecialNodeTransformer(MEMCPYSpecialNodeTransformer);
6466
6467
tgt->setTransformer(CISCTransform2ArrayCopyB2CorC2B);
6468
tgt->setAspects(isub|mul|shr, ILTypeProp::Size_2, ILTypeProp::Size_1);
6469
tgt->setNoAspects(call|bndchk, 0, 0);
6470
tgt->setMinCounts(1, 1, 2); // minimum ifCount, indirectLoadCount, indirectStoreCount
6471
tgt->setHotness(warm, false);
6472
tgt->setInhibitBeforeVersioning();
6473
return tgt;
6474
}
6475
6476
6477
//////////////////////////////////////////////////////////////////////////
6478
//////////////////////////////////////////////////////////////////////////
6479
//////////////////////////////////////////////////////////////////////////
6480
//*****************************************************************************************
6481
// IL code generation for copying memory (ByteToChar or CharToByte version)
6482
// Input: ImportantNode(0) - array load
6483
// ImportantNode(1) - array store
6484
// ImportantNode(2) - indirect load of the array index for the array load
6485
//*****************************************************************************************
6486
bool
6487
CISCTransform2ArrayCopyB2CBndchk(TR_CISCTransformer *trans)
6488
{
6489
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
6490
TR::Node *trNode;
6491
TR::TreeTop *trTreeTop;
6492
TR::Block *block;
6493
TR_CISCGraph *P = trans->getP();
6494
List<TR_CISCNode> *P2T = trans->getP2T();
6495
TR::Compilation *comp = trans->comp();
6496
6497
TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");
6498
if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;
6499
6500
trans->findFirstNode(&trTreeTop, &trNode, &block);
6501
if (!block) return false; // cannot find
6502
6503
if (isLoopPreheaderLastBlockInMethod(comp, block))
6504
{
6505
traceMsg(comp, "Bailing CISCTransform2ArrayCopyB2CBndchk due to null TT - might be a preheader in last block of method\n");
6506
return false;
6507
}
6508
6509
TR::Block *target = trans->analyzeSuccessorBlock();
6510
// Currently, it allows only a single successor.
6511
if (!target) return false;
6512
6513
TR::Node *dstIndexRepNode, *exitVarRepNode, *variableORconstRepNode, *arrayLenRepNode;
6514
getP2TTrRepNodes(trans, &dstIndexRepNode, &exitVarRepNode, &variableORconstRepNode, &arrayLenRepNode);
6515
TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode->getSymbolReference();
6516
TR::SymbolReference * exitVarSymRef = exitVarRepNode->getSymbolReference();
6517
if (!trans->analyzeArrayIndex(dstIndexVarSymRef))
6518
{
6519
if (DISPTRACE(trans)) traceMsg(comp, "analyzeArrayIndex failed. %x\n",dstIndexRepNode);
6520
return false;
6521
}
6522
6523
TR::Node * inputMemNode = trans->getP2TRepInLoop(P->getImportantNode(0))->getHeadOfTrNodeInfo()->_node;
6524
TR::Node * outputMemNode = trans->getP2TRepInLoop(P->getImportantNode(1))->getHeadOfTrNodeInfo()->_node;
6525
TR::Node * indexLoadNode = trans->getP2TRepInLoop(P->getImportantNode(2))->getHeadOfTrNodeInfo()->_node;
6526
TR_ASSERT(inputMemNode && outputMemNode && indexLoadNode, "error");
6527
TR::Node * inputNode = inputMemNode->getChild(0)->duplicateTree();
6528
TR::Node * outputNode = outputMemNode->getChild(0)->duplicateTree();
6529
6530
TR::Node * exitVarNode = createLoad(exitVarRepNode);
6531
variableORconstRepNode = convertStoreToLoad(comp, variableORconstRepNode);
6532
TR::Node * lengthNode = createOP2(comp, TR::isub,
6533
variableORconstRepNode,
6534
exitVarNode);
6535
TR::Node * updateTree1, *updateTree2, *updateTree3;
6536
TR::Node * c2 = TR::Node::create(exitVarRepNode, TR::iconst, 0, 2);
6537
bool isExitVarChar = (outputMemNode->getSize() == 2);
6538
// Prepare nodes for byte length and induction variable updates
6539
indexLoadNode = indexLoadNode->duplicateTree();
6540
TR::Node * endIndex;
6541
if (isExitVarChar) // The variable that checks the exit condition is for a 2-byte array.
6542
{
6543
TR::Node * diff = lengthNode;
6544
lengthNode = TR::Node::create(TR::imul, 2, lengthNode, c2);
6545
// lengthNode has the byte size, and diff has the char-based size (that is, lengthNode = diff * 2)
6546
endIndex = createOP2(comp, TR::iadd, indexLoadNode, lengthNode);
6547
updateTree1 = TR::Node::createWithSymRef(TR::istorei, 2, 2, indexLoadNode->getChild(0), endIndex, indexLoadNode->getSymbolReference());
6548
updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef, diff, trNode);
6549
}
6550
else
6551
{
6552
TR::Node * div2 = TR::Node::create(TR::idiv, 2, lengthNode, c2);
6553
lengthNode = TR::Node::create(TR::imul, 2, div2, c2); // to make the length even
6554
// lengthNode has the byte size, and div2 has the char-based size (that is, lengthNode = div2 * 2)
6555
endIndex = createOP2(comp, TR::iadd, indexLoadNode, lengthNode);
6556
updateTree1 = TR::Node::createWithSymRef(TR::istorei, 2, 2, indexLoadNode->getChild(0), endIndex, indexLoadNode->getSymbolReference());
6557
updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef, div2, trNode);
6558
}
6559
updateTree3 = TR::Node::createStore(exitVarSymRef, variableORconstRepNode);
6560
6561
lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode);
6562
// Prepare the arraycopy node
6563
TR::Node * arraycopy = TR::Node::createArraycopy(inputNode, outputNode, lengthNode);
6564
arraycopy->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCopySymbol());
6565
arraycopy->setForwardArrayCopy(true);
6566
arraycopy->setArrayCopyElementType(TR::Int8);
6567
6568
TR::Node * topArraycopy = TR::Node::create(TR::treetop, 1, arraycopy);
6569
TR::TreeTop * updateTreeTop1 = TR::TreeTop::create(comp, updateTree1);
6570
TR::TreeTop * updateTreeTop2 = TR::TreeTop::create(comp, updateTree2);
6571
TR::TreeTop * updateTreeTop3 = TR::TreeTop::create(comp, updateTree3);
6572
6573
// Insert nodes and maintain the CFG
6574
List<TR::Node> guardList(comp->trMemory());
6575
guardList.add(TR::Node::createif(TR::ifiucmpgt, endIndex->duplicateTree(), createLoad(arrayLenRepNode)));
6576
guardList.add(TR::Node::createif(TR::ifiucmpge, indexLoadNode->duplicateTree(), createLoad(arrayLenRepNode)));
6577
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree(), &guardList);
6578
6579
block = trans->insertBeforeNodes(block);
6580
6581
block->append(TR::TreeTop::create(comp, topArraycopy));
6582
block->append(updateTreeTop1);
6583
block->append(updateTreeTop2);
6584
block->append(updateTreeTop3);
6585
6586
block = trans->insertAfterNodes(block);
6587
6588
trans->setSuccessorEdge(block, target);
6589
return true;
6590
}
6591
6592
/****************************************************************************************
6593
Corresponding Java-like Pseudo Program
6594
int indIndex2, end;
6595
byte v0[ ];
6596
char v2[ ];
6597
while(true){
6598
v2[v1++] = ((v0[this.indeIndex1++] & 0xFF) << 8) + (v0[this.indIndex1++] & 0xFF))
6599
v3++;
6600
if (v3 >= end) break;
6601
}
6602
6603
Note 1: One of target methods is com/ibm/rmi/iiop/CDRInputStream.read_wstring().
6604
****************************************************************************************/
6605
TR_PCISCGraph *
6606
makeMemCpyByteToCharBndchkGraph(TR::Compilation *c, int32_t ctrl)
6607
{
6608
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCpyByteToCharBndchk", 0, 16);
6609
bool isBigEndian = (ctrl & CISCUtilCtl_BigEndian);
6610
/******************************************************************* opc id dagId #cfg #child other/pred/children */
6611
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 17, 0, 0, 0); tgt->addNode(v3); // dst array index
6612
TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 16, 0, 0, 1); tgt->addNode(v4); // exit checking
6613
TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_quasiConst2, TR::NoType, tgt->incNumNodes(),15, 0, 0); tgt->addNode(vorc); // length
6614
TR_PCISCNode *alen= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_quasiConst2, TR::NoType, tgt->incNumNodes(),14, 0, 0); tgt->addNode(alen); // arraylength
6615
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v0); // src array base
6616
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 1); tgt->addNode(v2); // dst array base
6617
TR_PCISCNode *ths = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 2); tgt->addNode(ths); // this object
6618
TR_PCISCNode *aidx= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),10, 0, 0, 0); tgt->addNode(aidx);
6619
TR_PCISCNode *cmah= createIdiomArrayHeaderConst (tgt, ctrl, tgt->incNumNodes(), 9, c);// array header
6620
TR_PCISCNode *cmah1=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 8, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+1));// array header+1
6621
TR_PCISCNode *ah1 = isBigEndian ? cmah : cmah1;
6622
TR_PCISCNode *ah2 = isBigEndian ? cmah1 : cmah;
6623
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 7, 0, 0, -1); tgt->addNode(cm1);
6624
TR_PCISCNode *cm2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -2); tgt->addNode(cm2);
6625
TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 5, 2); // element size
6626
TR_PCISCNode *c256= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 256); tgt->addNode(c256);
6627
6628
TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size
6629
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
6630
TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iloadi, TR::Int32, tgt->incNumNodes(), 1, 1, 1, ent, ths); tgt->addNode(idx0);
6631
TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, idx0, idx0, cm1); tgt->addNode(idx1);
6632
TR_PCISCNode *idx2= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istorei, TR::Int32, tgt->incNumNodes(), 1, 1, 2, idx1, ths, idx1); tgt->addNode(idx2);
6633
TR_PCISCNode *idx3= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, idx2, alen,idx0); tgt->addNode(idx3);
6634
TR_PCISCNode *idx4= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, idx3, idx0, cm2); tgt->addNode(idx4);
6635
TR_PCISCNode *idx5= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istorei, TR::Int32, tgt->incNumNodes(), 1, 1, 2, idx4, ths, idx4); tgt->addNode(idx5);
6636
TR_PCISCNode *idx6= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, idx5, alen,idx1); tgt->addNode(idx6);
6637
TR_PCISCNode *ns0 = createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, idx6, aidx, cmah, c2);
6638
TR_PCISCNode *ns1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns0, v2, ns0);
6639
TR_PCISCNode *nl00;
6640
TR_PCISCNode *nl10;
6641
if (ctrl & CISCUtilCtl_64Bit)
6642
{
6643
nl00= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2l, TR::Int64, tgt->incNumNodes(), 1, 1, 1, ns1, idx0); tgt->addNode(nl00);
6644
nl10= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl | CISCUtilCtl_NoI2L, 1, nl00, nl00, ah1, c1);
6645
}
6646
else
6647
{
6648
nl00= idx0;
6649
nl10= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl | CISCUtilCtl_NoI2L, 1, ns1, nl00, ah1, c1);
6650
}
6651
TR_PCISCNode *nl11= createIdiomArrayAddressInLoop(tgt, ctrl, 1, nl10, v0, nl10);
6652
TR_PCISCNode *nl12= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bloadi, TR::Int8, tgt->incNumNodes(), 1, 1, 1, nl11, nl11); tgt->addNode(nl12);
6653
TR_PCISCNode *nl13= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl12, nl12); tgt->addNode(nl13);
6654
TR_PCISCNode *nl14= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl13, nl13, c256); tgt->addNode(nl14);
6655
TR_PCISCNode *nl20= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl | CISCUtilCtl_NoI2L, 1, nl14, nl00, ah2, c1);
6656
TR_PCISCNode *nl21= createIdiomArrayAddressInLoop(tgt, ctrl, 1, nl20, v0, nl20);
6657
TR_PCISCNode *nl22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bloadi, TR::Int8, tgt->incNumNodes(), 1, 1, 1, nl21, nl21); tgt->addNode(nl22);
6658
TR_PCISCNode *nl23= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl22, nl22); tgt->addNode(nl23);
6659
TR_PCISCNode *ns2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iadd, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl23, nl14, nl23); tgt->addNode(ns2);
6660
TR_PCISCNode *ns3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2s, TR::Int16, tgt->incNumNodes(), 1, 1, 1, ns2, ns2); tgt->addNode(ns3);
6661
TR_PCISCNode *ns4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::sstorei, TR::Int16, tgt->incNumNodes(), 1, 1, 2, ns3, ns1, ns3); tgt->addNode(ns4);
6662
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ns4, v3, cm1);
6663
TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v4, cm1);
6664
TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v4, vorc); tgt->addNode(n8);
6665
TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
6666
6667
n8->setSuccs(ent->getSucc(0), n9);
6668
n8->setIsChildDirectlyConnected();
6669
idx3->setIsChildDirectlyConnected();
6670
idx6->setIsChildDirectlyConnected();
6671
6672
tgt->setEntryNode(ent);
6673
tgt->setExitNode(n9);
6674
tgt->setImportantNodes(isBigEndian ? nl12 : nl22, ns4, idx0);
6675
tgt->setNumDagIds(18);
6676
tgt->createInternalData(1);
6677
6678
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
6679
tgt->setTransformer(CISCTransform2ArrayCopyB2CBndchk);
6680
tgt->setAspects(isub|iadd|mul|bndchk|sameTypeLoadStore, ILTypeProp::Size_1|ILTypeProp::Size_4, ILTypeProp::Size_2|ILTypeProp::Size_4);
6681
tgt->setNoAspects(call, 0, 0);
6682
tgt->setMinCounts(1, 3, 3); // minimum ifCount, indirectLoadCount, indirectStoreCount
6683
tgt->setHotness(warm, false);
6684
tgt->setInhibitBeforeVersioning();
6685
return tgt;
6686
}
6687
6688
6689
6690
//////////////////////////////////////////////////////////////////////////
6691
//////////////////////////////////////////////////////////////////////////
6692
//////////////////////////////////////////////////////////////////////////
6693
//*****************************************************************************************
6694
// IL code generation for copying memory (ByteToChar or CharToByte version)
6695
// Input: ImportantNode(0) - array load in the little endian path
6696
// ImportantNode(1) - array store in the little endian path
6697
// ImportantNode(2) - array load in the big endian path
6698
// ImportantNode(3) - array store in the big endian path
6699
// ImportantNode(4) - if statement of the flag checking
6700
// ImportantNode(5) - if statement of back edge
6701
//*****************************************************************************************
6702
bool
6703
CISCTransform2ArrayCopyC2BMixed(TR_CISCTransformer *trans)
6704
{
6705
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
6706
TR::Node *trNode;
6707
TR::TreeTop *trTreeTop;
6708
TR::Block *block;
6709
TR_CISCGraph *P = trans->getP();
6710
List<TR_CISCNode> *P2T = trans->getP2T();
6711
TR::Compilation *comp = trans->comp();
6712
6713
TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");
6714
if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;
6715
6716
trans->findFirstNode(&trTreeTop, &trNode, &block);
6717
if (!block) return false; // cannot find
6718
6719
if (isLoopPreheaderLastBlockInMethod(comp, block))
6720
{
6721
traceMsg(comp, "Bailing CISCTransform2ArrayCopyC2BMixed due to null TT - might be a preheader in last block of method\n");
6722
return false;
6723
}
6724
6725
TR::Block *target = trans->analyzeSuccessorBlock();
6726
// Currently, it allows only a single successor.
6727
if (!target) return false;
6728
6729
TR::Node *indexRepNode, *dstIndexRepNode, *arrayLenRepNode;
6730
getP2TTrRepNodes(trans, &indexRepNode, &dstIndexRepNode, &arrayLenRepNode);
6731
TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();
6732
TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode->getSymbolReference();
6733
if (trans->countGoodArrayIndex(indexVarSymRef) == 0)
6734
{
6735
if (DISPTRACE(trans)) traceMsg(comp, "analyzeArrayIndex failed. %x\n",indexRepNode);
6736
return false;
6737
}
6738
TR_ASSERT(indexVarSymRef != dstIndexVarSymRef, "error");
6739
if (trans->countGoodArrayIndex(dstIndexVarSymRef) == 0)
6740
{
6741
if (DISPTRACE(trans)) traceMsg(comp, "analyzeArrayIndex failed. %x\n",dstIndexRepNode);
6742
return false;
6743
}
6744
6745
TR_CISCNode * BEloadMem = trans->getP2TInLoopIfSingle(P->getImportantNode(2));
6746
TR_CISCNode * BEstoreMem = trans->getP2TInLoopIfSingle(P->getImportantNode(3));
6747
TR_CISCNode * LEloadMem = trans->getP2TRepInLoop(P->getImportantNode(0), BEloadMem);
6748
TR_CISCNode * LEstoreMem = trans->getP2TInLoopIfSingle(P->getImportantNode(1));
6749
TR_CISCNode * flagIf = trans->getP2TInLoopIfSingle(P->getImportantNode(4));
6750
TR_CISCNode * backIf = trans->getP2TInLoopIfSingle(P->getImportantNode(5));
6751
6752
if (DISPTRACE(trans)) traceMsg(comp, "All parameters: %x %x %x %x %x %x\n",
6753
LEloadMem, LEstoreMem, BEloadMem, BEstoreMem, flagIf, backIf);
6754
if (!LEloadMem || !LEstoreMem || !BEloadMem || !BEstoreMem || !flagIf || !backIf) return false;
6755
if (flagIf->getOpcode() != TR::ificmpeq && flagIf->getOpcode() != TR::ificmpne) return false;
6756
6757
TR_ASSERT(searchNodeInBlock(flagIf->getSucc(1), LEloadMem) ||
6758
searchNodeInBlock(flagIf->getSucc(1), BEloadMem), "error");
6759
TR_ASSERT(!searchNodeInBlock(flagIf->getSucc(1), LEloadMem) ||
6760
!searchNodeInBlock(flagIf->getSucc(1), BEloadMem), "error");
6761
bool LEalongJumpPath = searchNodeInBlock(flagIf->getSucc(1), LEloadMem);
6762
bool isBig = comp->target().cpu.isBigEndian();
6763
if (!isBig) LEalongJumpPath = !LEalongJumpPath;
6764
if (DISPTRACE(trans)) traceMsg(comp, "LEalongJumpPath = %d\n",LEalongJumpPath);
6765
6766
TR::Block *blockBE = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency()/2, block);
6767
TR::Block *blockLE = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency()/2, block);
6768
TR::Block *blockAfter = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
6769
6770
TR::Node * LEloadMemNode = LEloadMem->getHeadOfTrNode();
6771
TR::Node * LEstoreMemNode = LEstoreMem->getHeadOfTrNode();
6772
TR::Node * BEloadMemNode = BEloadMem->getHeadOfTrNode();
6773
TR::Node * BEstoreMemNode = BEstoreMem->getHeadOfTrNode();
6774
TR::Node * flagIfNode = flagIf->getHeadOfTrNode()->duplicateTree();
6775
TR::Node * backIfNode = backIf->getHeadOfTrNode();
6776
6777
TR::Node * variableORconstRepNode = backIfNode->getChild(1)->duplicateTree();
6778
indexRepNode = createLoad(indexRepNode);
6779
TR::Node * lengthNode = createOP2(comp, TR::isub, variableORconstRepNode, indexRepNode);
6780
TR::Node * c2 = TR::Node::create(indexRepNode, TR::iconst, 0, 2);
6781
TR::Node * diff = lengthNode;
6782
lengthNode = TR::Node::create(TR::imul, 2, lengthNode, c2);
6783
// lengthNode has the byte size, and diff has the char-based size (that is, lengthNode = diff * 2)
6784
TR::Node * indexLoadNode = backIfNode->getChild(0)->duplicateTree();
6785
6786
//
6787
// Big Endian Path
6788
//
6789
TR::Node * BELoadAddrTree = BEloadMemNode->getChild(0)->duplicateTree();
6790
TR::Node * BEStoreAddrTree = BEstoreMemNode->getChild(0)->duplicateTree();
6791
TR::Node * BEMemCpy = TR::Node::createArraycopy(BELoadAddrTree, BEStoreAddrTree, createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode));
6792
BEMemCpy->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCopySymbol());
6793
BEMemCpy->setForwardArrayCopy(true);
6794
BEMemCpy->setArrayCopyElementType(TR::Int8);
6795
blockBE->append(TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, BEMemCpy)));
6796
TR::Node * updateTree1 = TR::Node::createStore(indexVarSymRef, variableORconstRepNode->duplicateTree());
6797
TR::Node * updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef, lengthNode, trNode);
6798
blockBE->append(TR::TreeTop::create(comp, updateTree2));
6799
blockBE->append(TR::TreeTop::create(comp, updateTree1));
6800
blockBE->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, blockAfter->getEntry())));
6801
6802
//
6803
// Little Endian Path
6804
//
6805
TR::Node * LELoadTree = LEloadMemNode->duplicateTree();
6806
TR::Node * LEStoreAddrTree = LEstoreMemNode->getChild(0)->duplicateTree();
6807
if (comp->cg()->supportsByteswap())
6808
{
6809
TR::Node * LEReverseStore = TR::Node::createWithSymRef(TR::sstorei, 2, 2,
6810
LEStoreAddrTree,
6811
TR::Node::create(TR::sbyteswap, 1, LELoadTree),
6812
comp->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0));
6813
blockLE->append(TR::TreeTop::create(comp, LEReverseStore));
6814
}
6815
else
6816
{
6817
TR::Node *replaceParent = NULL;
6818
int childNum = -1;
6819
bool ret;
6820
TR::Node * LEStoreAddrTree2 = LEStoreAddrTree->duplicateTree();
6821
TR::Node *arrayHeaderConst = createArrayHeaderConst(comp, comp->target().is64Bit(), trNode);
6822
ret = trans->searchNodeInTrees(isBig ? LEStoreAddrTree2 : LEStoreAddrTree,
6823
arrayHeaderConst, &replaceParent, &childNum);
6824
TR_ASSERT(ret, "error");
6825
if (comp->target().is64Bit())
6826
{
6827
arrayHeaderConst->setLongInt(arrayHeaderConst->getLongInt()-1);
6828
}
6829
else
6830
{
6831
arrayHeaderConst->setInt(arrayHeaderConst->getInt()-1);
6832
}
6833
replaceParent->setAndIncChild(childNum, arrayHeaderConst);
6834
6835
TR::Node * LEc2b0 = TR::Node::create(TR::s2b, 1, LELoadTree);
6836
TR::Node * LEstore0 = TR::Node::createWithSymRef(TR::bstorei, 2, 2, LEStoreAddrTree, LEc2b0,
6837
comp->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0));
6838
blockLE->append(TR::TreeTop::create(comp, LEstore0));
6839
6840
TR::Node * LEand1 = createOP2(comp, TR::iushr, LELoadTree, TR::Node::create(indexRepNode, TR::iconst, 0, 0x8));
6841
TR::Node * LEi2b1 = TR::Node::create(TR::i2b, 1, LEand1);
6842
TR::Node * LEstore1 = TR::Node::createWithSymRef(TR::bstorei, 2, 2, LEStoreAddrTree2, LEi2b1,
6843
comp->getSymRefTab()->findOrCreateGenericIntShadowSymbolReference(0));
6844
blockLE->append(TR::TreeTop::create(comp, LEstore1));
6845
}
6846
TR::Node * c1 = TR::Node::create(indexRepNode, TR::iconst, 0, 1);
6847
TR::Node * indexUpdateLE = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, c1, trNode);
6848
blockLE->append(TR::TreeTop::create(comp, indexUpdateLE));
6849
blockLE->append(TR::TreeTop::create(comp, createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef, c2->duplicateTree(), trNode)));
6850
TR::Node *backIfLE = TR::Node::createif(TR::ificmplt, indexUpdateLE->getChild(0), variableORconstRepNode->duplicateTree(),
6851
blockLE->getEntry());
6852
blockLE->append(TR::TreeTop::create(comp, backIfLE));
6853
6854
// after these two paths
6855
//
6856
// Currently, blockAfter has no nodes.
6857
//
6858
6859
//
6860
// Insert nodes and maintain the CFG
6861
List<TR::Node> guardList(comp->trMemory());
6862
guardList.add(TR::Node::createif(TR::ifiucmpgt, updateTree2->getChild(0)->duplicateTree(), createLoad(arrayLenRepNode)));
6863
guardList.add(TR::Node::createif(TR::ifiucmpge, createLoad(dstIndexRepNode), createLoad(arrayLenRepNode)));
6864
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree(), &guardList);
6865
block = trans->insertBeforeNodes(block);
6866
flagIfNode->setBranchDestination(blockLE->getEntry());
6867
if (!LEalongJumpPath) TR::Node::recreate(flagIfNode, flagIfNode->getOpCode().getOpCodeForReverseBranch());
6868
block->append(TR::TreeTop::create(comp, flagIfNode));
6869
6870
TR::CFG *cfg = comp->getFlowGraph();
6871
cfg->setStructure(NULL);
6872
TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();
6873
if (orgNextTreeTop)
6874
{
6875
cfg->insertBefore(blockAfter, orgNextTreeTop->getNode()->getBlock());
6876
}
6877
else
6878
{
6879
cfg->addNode(blockAfter);
6880
}
6881
cfg->insertBefore(blockLE, blockAfter);
6882
cfg->insertBefore(blockBE, blockLE);
6883
cfg->join(block, blockBE);
6884
6885
blockAfter = trans->insertAfterNodes(blockAfter);
6886
6887
trans->setSuccessorEdges(block, blockBE, blockLE);
6888
trans->setSuccessorEdge(blockAfter, target);
6889
6890
return true;
6891
}
6892
6893
/****************************************************************************************
6894
Corresponding Java-like Pseudo Program (for big endian)
6895
char v0[ ];
6896
byte v2[ ];
6897
while (true)
6898
{
6899
if(flag)
6900
{
6901
v2[i++] = (byte)(v0[j] & 0xff);
6902
v2[i++] = (byte)(v0[j] >>> 8 & 0xff);
6903
}
6904
else
6905
{
6906
v2[i++] = (byte)(v0[j] >>> 8 & 0xff);
6907
v2[i++] = (byte)(v0[j] & 0xff);
6908
}
6909
j++;
6910
if (j >= len) break;
6911
}
6912
6913
Note 1: One of target methods is com/ibm/rmi/iiop/CDROutputStream.read_wstring().
6914
****************************************************************************************/
6915
TR_PCISCGraph *
6916
makeMEMCPYChar2ByteMixedGraph(TR::Compilation *c, int32_t ctrl)
6917
{
6918
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MEMCPYChar2ByteMixed", 0, 16);
6919
/******************************************************************** opc id dagId #cfg #child other/pred/children */
6920
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 18, 0, 0, 0); tgt->addNode(v1); // src array index
6921
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 17, 0, 0, 1); tgt->addNode(v3); // dst array index
6922
TR_PCISCNode *alen = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_quasiConst2, TR::NoType, tgt->incNumNodes(),16, 0, 0); tgt->addNode(alen); // arraylength
6923
TR_PCISCNode *vorc = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(),TR_quasiConst2, TR::NoType, tgt->incNumNodes(),15, 0, 0); tgt->addNode(vorc); // length
6924
TR_PCISCNode *flag = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 2); tgt->addNode(flag); // flag
6925
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v0); // src array base
6926
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 1); tgt->addNode(v2); // dst array base
6927
TR_PCISCNode *aidx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),11, 0, 0, 0); tgt->addNode(aidx0);
6928
TR_PCISCNode *cmah = createIdiomArrayHeaderConst (tgt, ctrl, tgt->incNumNodes(), 10, c);// array header
6929
TR_PCISCNode *cmah1=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 9, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+1));// array header+1
6930
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 8, 0, 0, -1); tgt->addNode(cm1);
6931
TR_PCISCNode *cm2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 7, 0, 0, -2); tgt->addNode(cm2);
6932
TR_PCISCNode *c0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, 0); tgt->addNode(c0);
6933
TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 5, 2); // element size
6934
TR_PCISCNode *c8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 8); tgt->addNode(c8);
6935
TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size
6936
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
6937
TR_PCISCNode *fchk = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, ent, flag, c0); tgt->addNode(fchk);
6938
6939
// big endian path
6940
TR_PCISCNode *bbck0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, fchk, alen, v3); tgt->addNode(bbck0);
6941
TR_PCISCNode *bld0 = createIdiomCharArrayLoadInLoop(tgt, ctrl | CISCUtilCtl_ChildDirectConnected, 1, bbck0, v0, aidx0, cmah, c2);
6942
TR_PCISCNode *bnc2i= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, bld0, bld0); tgt->addNode(bnc2i);
6943
TR_PCISCNode *bns22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iushr, TR::Int32, tgt->incNumNodes(), 1, 1, 2, bnc2i, bnc2i, c8); tgt->addNode(bns22);
6944
//TR_PCISCNode *bcvt0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, bns22, bns22); tgt->addNode(bcvt0);
6945
TR_PCISCNode *bns0 = createIdiomArrayStoreInLoop(tgt, ctrl|CISCUtilCtl_ChildDirectConnected, 1, bns22, TR::bstorei, TR::Int8, v2, v3, cmah, c1, bns22);
6946
TR_PCISCNode *ba1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, bns0, v3, cm1); tgt->addNode(ba1);
6947
TR_PCISCNode *bbck1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, ba1, alen, ba1); tgt->addNode(bbck1);
6948
TR_PCISCNode *bns10= createIdiomArrayAddressInLoop(tgt, ctrl | CISCUtilCtl_ChildDirectConnected, 1, bbck1, v2, v3, cmah1, c1);
6949
TR_PCISCNode *bcvt1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::s2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, bns10, bld0); tgt->addNode(bcvt1);
6950
TR_PCISCNode *bns11= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, bcvt1, bns10, bcvt1); tgt->addNode(bns11);
6951
TR_PCISCNode *bn6 = createIdiomDecVarInLoop(tgt, ctrl, 1, bns11, v3, cm2);
6952
6953
// little endian path
6954
TR_PCISCNode *lbck0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, fchk, alen, v3); tgt->addNode(lbck0);
6955
TR_PCISCNode *lld0 = createIdiomCharArrayLoadInLoop(tgt, ctrl | CISCUtilCtl_ChildDirectConnected, 1, lbck0, v0, aidx0, cmah, c2);
6956
TR_PCISCNode *lns10= createIdiomArrayAddressInLoop(tgt, ctrl | CISCUtilCtl_ChildDirectConnected, 1, lld0, v2, v3, cmah, c1);
6957
TR_PCISCNode *lcvt1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::s2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, lns10, lld0); tgt->addNode(lcvt1);
6958
TR_PCISCNode *lns11= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, lcvt1, lns10, lcvt1); tgt->addNode(lns11);
6959
TR_PCISCNode *la1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, lns11, v3, cm1); tgt->addNode(la1);
6960
TR_PCISCNode *lbck1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, la1, alen, la1); tgt->addNode(lbck1);
6961
TR_PCISCNode *lnc2i= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, lbck1, lld0); tgt->addNode(lnc2i);
6962
TR_PCISCNode *lns22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iushr, TR::Int32, tgt->incNumNodes(), 1, 1, 2, lnc2i, lnc2i, c8); tgt->addNode(lns22);
6963
//TR_PCISCNode *lcvt0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, lns22, lns22); tgt->addNode(lcvt0);
6964
TR_PCISCNode *lns0 = createIdiomArrayStoreInLoop(tgt, ctrl|CISCUtilCtl_ChildDirectConnected, 1, lns22, TR::bstorei, TR::Int8, v2, v3, cmah1, c1, lns22);
6965
TR_PCISCNode *ln6 = createIdiomDecVarInLoop(tgt, ctrl, 1, lns0, v3, cm2);
6966
6967
// merge two paths
6968
TR_PCISCNode *addv1= createIdiomDecVarInLoop(tgt, ctrl, 1, ln6, v1, cm1);
6969
TR_PCISCNode *topAddV1 = addv1->getChild(0);
6970
TR_PCISCNode *back = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, addv1, v1, vorc); tgt->addNode(back);
6971
TR_PCISCNode *ext = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(ext);
6972
6973
fchk->setSuccs(lbck0, bbck0);
6974
bn6->setSucc(0, topAddV1);
6975
back->setSuccs(ent->getSucc(0), ext);
6976
6977
bbck0->setIsChildDirectlyConnected();
6978
bbck1->setIsChildDirectlyConnected();
6979
bnc2i->setIsChildDirectlyConnected();
6980
bns22->setIsChildDirectlyConnected();
6981
//bcvt0->setIsChildDirectlyConnected();
6982
bcvt1->setIsChildDirectlyConnected();
6983
bns10->setIsChildDirectlyConnected();
6984
bns11->setIsChildDirectlyConnected();
6985
6986
lbck0->setIsChildDirectlyConnected();
6987
lbck1->setIsChildDirectlyConnected();
6988
lnc2i->setIsChildDirectlyConnected();
6989
lns22->setIsChildDirectlyConnected();
6990
//lcvt0->setIsChildDirectlyConnected();
6991
lcvt1->setIsChildDirectlyConnected();
6992
lns10->setIsChildDirectlyConnected();
6993
lns11->setIsChildDirectlyConnected();
6994
6995
fchk->setIsChildDirectlyConnected();
6996
back->setIsChildDirectlyConnected();
6997
6998
bld0->setIsSuccDirectlyConnected();
6999
7000
tgt->setEntryNode(ent);
7001
tgt->setExitNode(ext);
7002
tgt->setImportantNodes(lld0, lns11, bld0, bns0, fchk, back);
7003
tgt->setNumDagIds(18);
7004
tgt->createInternalData(1);
7005
7006
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
7007
7008
tgt->setTransformer(CISCTransform2ArrayCopyC2BMixed);
7009
tgt->setAspects(isub|mul|shr|bndchk, ILTypeProp::Size_2, ILTypeProp::Size_1);
7010
tgt->setNoAspects(call, 0, 0);
7011
tgt->setMinCounts(2, 2, 4); // minimum ifCount, indirectLoadCount, indirectStoreCount
7012
tgt->setHotness(warm, false);
7013
tgt->setInhibitBeforeVersioning();
7014
return tgt;
7015
}
7016
7017
7018
7019
7020
//////////////////////////////////////////////////////////////////////////
7021
//////////////////////////////////////////////////////////////////////////
7022
//////////////////////////////////////////////////////////////////////////
7023
//*****************************************************************************************
7024
// IL code generation for copying memory for CharToByte with two if-statements version
7025
// Input: ImportantNodes(0) - array load
7026
// ImportantNodes(1) - array store
7027
// ImportantNodes(2) - the first if
7028
// ImportantNodes(3) - the second if
7029
//*****************************************************************************************
7030
bool
7031
CISCTransform2ArrayCopyC2BIf2(TR_CISCTransformer *trans)
7032
{
7033
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
7034
TR::Node *trNode;
7035
TR::TreeTop *trTreeTop;
7036
TR::Block *block;
7037
TR_CISCGraph *P = trans->getP();
7038
List<TR_CISCNode> *P2T = trans->getP2T();
7039
TR::Compilation *comp = trans->comp();
7040
7041
TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");
7042
7043
TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");
7044
if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;
7045
7046
trans->findFirstNode(&trTreeTop, &trNode, &block);
7047
if (!block) return false; // cannot find
7048
7049
if (isLoopPreheaderLastBlockInMethod(comp, block))
7050
{
7051
traceMsg(comp, "Bailing CISCTransform2ArrayCopyC2BIf2 due to null TT - might be a preheader in last block of method\n");
7052
return false;
7053
}
7054
7055
TR::Block *target = trans->analyzeSuccessorBlock();
7056
7057
TR::Node *indexRepNode, *dstIndexRepNode, *variableORconstRepNode, *variableORconstRepNode2;
7058
getP2TTrRepNodes(trans, &indexRepNode, &dstIndexRepNode, &variableORconstRepNode, &variableORconstRepNode2);
7059
TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();
7060
TR::SymbolReference * dstIndexVarSymRef = dstIndexRepNode->getSymbolReference();
7061
TR_ASSERT(indexVarSymRef != dstIndexVarSymRef, "error!");
7062
7063
TR::Node * inputNode = trans->getP2TRepInLoop(P->getImportantNode(0)->getChild(0))->getHeadOfTrNodeInfo()->_node->duplicateTree();
7064
TR::Node * outputNode = trans->getP2TRepInLoop(P->getImportantNode(1)->getChild(0))->getHeadOfTrNodeInfo()->_node->duplicateTree();
7065
7066
//**********************************************************************
7067
// For this idiom, because there are two if-statements, we need to check
7068
// which if-statement will trigger the loop exit.
7069
// Based on this, it will compute the length of copy, which will be
7070
// stored into the variable "lengthByteTemp".
7071
//**********************************************************************
7072
//
7073
TR::CFG *cfg = comp->getFlowGraph();
7074
TR::Node * c2 = TR::Node::create(indexRepNode, TR::iconst, 0, 2);
7075
indexRepNode = convertStoreToLoad(comp, indexRepNode)->duplicateTree();
7076
dstIndexRepNode = convertStoreToLoad(comp, dstIndexRepNode)->duplicateTree();
7077
variableORconstRepNode = convertStoreToLoad(comp, variableORconstRepNode)->duplicateTree();
7078
variableORconstRepNode2 = convertStoreToLoad(comp, variableORconstRepNode2)->duplicateTree();
7079
7080
// Compute length
7081
TR::Block *chkLen1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
7082
TR::Block *chkLen2 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
7083
TR::Block *bodyBlock = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
7084
TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();
7085
7086
TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();
7087
// chkLen1
7088
TR::SymbolReference * lengthCharTemp = comp->getSymRefTab()->
7089
createTemporary(comp->getMethodSymbol(), TR::Int32);
7090
TR::SymbolReference * lengthByteTemp = comp->getSymRefTab()->
7091
createTemporary(comp->getMethodSymbol(), TR::Int32);
7092
TR::SymbolReference * selectLen2 = comp->getSymRefTab()->
7093
createTemporary(comp->getMethodSymbol(), TR::Int32);
7094
7095
// use the formula to compute the number of iterations
7096
// the number of times the loop is executed
7097
// n1 => C1 = ceiling[(N1 - i)/incr(i)] = (N1 - i) // entry valueof i ; increment is 1 & lt condition
7098
// C2 = floor[(N2 - j)/incr(j)] = floor[(N2 - j)/2] // entry valueof j ; increment is 2 & le condition
7099
// n2 => C2 + 1 // which necessitates adding an extra iteration
7100
//
7101
// so the lesser(C1, C2) will decide which test exits the loop.
7102
//
7103
//
7104
TR::Node * lengthSrcNode = createOP2(comp, TR::isub,
7105
variableORconstRepNode,
7106
indexRepNode);
7107
TR::Node * storeSrcCharLen = TR::Node::createStore(lengthCharTemp, lengthSrcNode);
7108
TR::Node * storeSrcByteLen = TR::Node::createStore(lengthByteTemp,
7109
TR::Node::create(TR::imul, 2, lengthSrcNode, c2));
7110
TR::Node *zeroConst = TR::Node::create(indexRepNode, TR::iconst, 0, 0);
7111
TR::Node * storeSelectLen = TR::Node::createStore(selectLen2,
7112
zeroConst);
7113
TR::Node * lengthDstNode = createOP2(comp, TR::isub,
7114
variableORconstRepNode2,
7115
dstIndexRepNode);
7116
7117
TR::Node * c1 = TR::Node::create(indexRepNode, TR::iconst, 0, 1);
7118
7119
TR::Node *incr = c1->duplicateTree();
7120
lengthDstNode = TR::Node::create(TR::ishr, 2, lengthDstNode, incr);
7121
TR::Node * lengthDstDiv2Node = TR::Node::create(TR::isub, 2, lengthDstNode, TR::Node::create(indexRepNode, TR::iconst, 0, -1));
7122
7123
TR::Node *cmpMin = TR::Node::createif(TR::ificmpge, lengthDstDiv2Node, lengthSrcNode, bodyBlock->getEntry());
7124
chkLen1->append(TR::TreeTop::create(comp, storeSrcCharLen));
7125
chkLen1->append(TR::TreeTop::create(comp, storeSrcByteLen));
7126
chkLen1->append(TR::TreeTop::create(comp, storeSelectLen));
7127
chkLen1->append(TR::TreeTop::create(comp, cmpMin));
7128
7129
// chkLen2
7130
c1 = c1->duplicateTree();
7131
lengthDstDiv2Node = lengthDstDiv2Node->duplicateTree();
7132
TR::Node * storeSrcCharLen2 = TR::Node::createStore(lengthCharTemp, lengthDstDiv2Node);
7133
TR::Node * storeSrcByteLen2 = TR::Node::createStore(lengthByteTemp,
7134
TR::Node::create(TR::ishl, 2, lengthDstDiv2Node, c1->duplicateTree()));
7135
TR::Node * storeSelectLen2 = TR::Node::createStore(selectLen2, c1);
7136
chkLen2->append(TR::TreeTop::create(comp, storeSrcCharLen2));
7137
chkLen2->append(TR::TreeTop::create(comp, storeSrcByteLen2));
7138
chkLen2->append(TR::TreeTop::create(comp, storeSelectLen2));
7139
7140
// body
7141
c2 = c2->duplicateTree();
7142
TR::Node * updateTree1, *updateTree2;
7143
updateTree1 = createStoreOP2(comp, indexVarSymRef, TR::iadd, indexVarSymRef, lengthCharTemp, trNode);
7144
updateTree2 = createStoreOP2(comp, dstIndexVarSymRef, TR::iadd, dstIndexVarSymRef, lengthByteTemp, trNode);
7145
7146
// Prepare the node arraycopy
7147
TR::Node *lenNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, lengthByteTemp));
7148
TR::Node * arraycopy = TR::Node::createArraycopy(inputNode, outputNode, lenNode);
7149
arraycopy->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCopySymbol());
7150
arraycopy->setForwardArrayCopy(true);
7151
arraycopy->setArrayCopyElementType(TR::Int8);
7152
7153
TR::Node * topArraycopy = TR::Node::create(TR::treetop, 1, arraycopy);
7154
TR::TreeTop * updateTreeTop1 = TR::TreeTop::create(comp, updateTree1);
7155
TR::TreeTop * updateTreeTop2 = TR::TreeTop::create(comp, updateTree2);
7156
TR::Node * cmpExit = NULL;
7157
TR::TreeTop *failDest = NULL;
7158
TR::TreeTop *okDest = NULL;
7159
if (!target) // multiple successor blocks
7160
{
7161
TR_CISCNode *cmpgeCISCNode = trans->getP2TRepInLoop(P->getImportantNode(2));
7162
TR_CISCNode *cmpgtCISCNode = trans->getP2TRepInLoop(P->getImportantNode(3));
7163
failDest = cmpgtCISCNode->getDestination();
7164
okDest = cmpgeCISCNode->getDestination();
7165
7166
cmpExit = TR::Node::createif(TR::ificmpeq,
7167
TR::Node::createWithSymRef(indexRepNode, TR::iload, 0, selectLen2),
7168
TR::Node::create(indexRepNode, TR::iconst, 0, 0),
7169
okDest);
7170
}
7171
7172
//
7173
// Insert nodes and maintain the CFG
7174
//
7175
TR::TreeTop *last;
7176
last = trans->removeAllNodes(trTreeTop, block->getExit());
7177
last->join(block->getExit());
7178
block = trans->insertBeforeNodes(block);
7179
7180
cfg->setStructure(NULL);
7181
7182
trTreeTop->setNode(topArraycopy);
7183
bodyBlock->append(trTreeTop);
7184
bodyBlock->append(updateTreeTop1);
7185
bodyBlock->append(updateTreeTop2);
7186
trans->insertAfterNodes(bodyBlock);
7187
cfg->insertBefore(bodyBlock, orgNextBlock);
7188
cfg->insertBefore(chkLen2, bodyBlock);
7189
cfg->insertBefore(chkLen1, chkLen2);
7190
cfg->join(block, chkLen1);
7191
if (target) // single successor block
7192
{
7193
trans->setSuccessorEdge(bodyBlock, target);
7194
}
7195
else
7196
{ // multiple successor blocks
7197
bodyBlock->append(TR::TreeTop::create(comp, cmpExit));
7198
trans->setSuccessorEdges(bodyBlock,
7199
failDest->getEnclosingBlock(),
7200
okDest->getEnclosingBlock());
7201
}
7202
trans->setSuccessorEdge(block, chkLen1);
7203
return true;
7204
}
7205
7206
7207
/****************************************************************************************
7208
Corresponding Java-like Pseudo Program (for big endian)
7209
int v1, v3, end, end2;
7210
char v0[ ];
7211
byte v2[ ];
7212
while(true){
7213
if (v1 >= end) break;
7214
if (v3 > end2) break;
7215
char T = v0[v1++];
7216
v2[v3++] = (byte)(T >> 8);
7217
v2[v3++] = (byte)(T & 0xff);
7218
}
7219
7220
Note 1: This idiom also supports little endian.
7221
****************************************************************************************/
7222
TR_PCISCGraph *
7223
makeMEMCPYChar2ByteGraph2(TR::Compilation *c, int32_t ctrl)
7224
{
7225
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MEMCPYChar2Byte2", 0, 16);
7226
/************************************ opc id dagId #cfg #child other/pred/children */
7227
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(v1); // src array index
7228
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1); tgt->addNode(v3); // dst array index
7229
TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),13, 0, 0); tgt->addNode(vorc); // length
7230
TR_PCISCNode *vorc2=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),12, 0, 0); tgt->addNode(vorc2); // length2
7231
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(v0); // src array base
7232
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v2); // dst array base
7233
TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(cmah); // array header
7234
TR_PCISCNode *cmah1=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 8, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+1));// array header+1
7235
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 7, 0, 0, -1); tgt->addNode(cm1);
7236
TR_PCISCNode *cm2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, -2); tgt->addNode(cm2);
7237
TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 5, 2); // element size
7238
TR_PCISCNode *c8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 8); tgt->addNode(c8);
7239
TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size
7240
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
7241
TR_PCISCNode *lv1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iload, TR::Int32, tgt->incNumNodes(), 1, 1, 1, ent, v1); tgt->addNode(lv1);
7242
TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, lv1, lv1, cm1);
7243
TR_PCISCNode *ns10= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, n7, v3, cmah, c1);
7244
TR_PCISCNode *ns11= createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns10, v2, ns10);
7245
TR_PCISCNode *nl0 = createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, ns11, lv1, cmah, c2);
7246
TR_PCISCNode *nl1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, nl0, v0, nl0);
7247
TR_PCISCNode *nl2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::sloadi, TR::Int16, tgt->incNumNodes(), 1, 1, 1, nl1, nl1); tgt->addNode(nl2);
7248
TR_PCISCNode *cvt0, *cvt1;
7249
if ((ctrl & CISCUtilCtl_BigEndian))
7250
{
7251
TR_PCISCNode *nc2i= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl2, nl2); tgt->addNode(nc2i);
7252
TR_PCISCNode *ns22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ishrall, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nc2i, nc2i, c8); tgt->addNode(ns22);
7253
cvt0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns22, ns22); tgt->addNode(cvt0);
7254
}
7255
else
7256
{
7257
cvt0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::s2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, nl2, nl2); tgt->addNode(cvt0);
7258
}
7259
TR_PCISCNode *ns14= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, cvt0, ns11, cvt0); tgt->addNode(ns14);
7260
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ns14, v3, cm2);
7261
TR_PCISCNode *ns20= createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl|CISCUtilCtl_NoI2L, 1, n6, ns10->getChild(0)->getChild(0), cmah1, c1);
7262
TR_PCISCNode *ns21= createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns20, v2, ns20);
7263
if ((ctrl & CISCUtilCtl_BigEndian))
7264
{
7265
cvt1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::s2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns21, nl2); tgt->addNode(cvt1);
7266
}
7267
else
7268
{
7269
TR_PCISCNode *nc2i= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::su2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, ns21, nl2); tgt->addNode(nc2i);
7270
TR_PCISCNode *ns22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ishrall, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nc2i, nc2i, c8); tgt->addNode(ns22);
7271
cvt1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns22, ns22); tgt->addNode(cvt1);
7272
}
7273
TR_PCISCNode *ns24= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, cvt1, ns21, cvt1); tgt->addNode(ns24);
7274
TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, ns24, v1, vorc); tgt->addNode(n8);
7275
TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpgt, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n8, v3, vorc2); tgt->addNode(n9);
7276
TR_PCISCNode *n10 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n10);
7277
7278
n8->setSucc(1, n10);
7279
n9->setSuccs(ent->getSucc(0), n10);
7280
7281
n8->setIsChildDirectlyConnected();
7282
n9->setIsChildDirectlyConnected();
7283
7284
tgt->setEntryNode(ent);
7285
tgt->setExitNode(n10);
7286
tgt->setImportantNodes(nl2, ns14, n8, n9);
7287
tgt->setNumDagIds(16);
7288
tgt->createInternalData(1);
7289
7290
tgt->setSpecialCareNode(0, cvt0); // conversion (possibly i2b)
7291
tgt->setSpecialCareNode(1, cvt1); // conversion (possibly i2b)
7292
tgt->setSpecialNodeTransformer(MEMCPYSpecialNodeTransformer);
7293
7294
tgt->setTransformer(CISCTransform2ArrayCopyC2BIf2);
7295
tgt->setAspects(isub|mul|shr, ILTypeProp::Size_2, ILTypeProp::Size_1);
7296
tgt->setNoAspects(call|bndchk, 0, 0);
7297
tgt->setMinCounts(1, 1, 2); // minimum ifCount, indirectLoadCount, indirectStoreCount
7298
tgt->setHotness(warm, false);
7299
tgt->setInhibitBeforeVersioning();
7300
return tgt;
7301
}
7302
7303
7304
7305
7306
//////////////////////////////////////////////////////////////////////////
7307
//////////////////////////////////////////////////////////////////////////
7308
//////////////////////////////////////////////////////////////////////////
7309
//*****************************************************************************************
7310
// IL code generation for copying memory (ByteToInt or IntToByte version)
7311
// Input: ImportantNodes(0) - array load
7312
// ImportantNodes(1) - array store
7313
//*****************************************************************************************
7314
bool
7315
CISCTransform2ArrayCopyB2I(TR_CISCTransformer *trans)
7316
{
7317
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
7318
TR::Node *trNode;
7319
TR::TreeTop *trTreeTop;
7320
TR::Block *block;
7321
TR_CISCGraph *P = trans->getP();
7322
List<TR_CISCNode> *P2T = trans->getP2T();
7323
TR::Compilation *comp = trans->comp();
7324
7325
TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");
7326
7327
TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");
7328
if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;
7329
7330
trans->findFirstNode(&trTreeTop, &trNode, &block);
7331
if (!block) return false; // cannot find
7332
7333
if (isLoopPreheaderLastBlockInMethod(comp, block))
7334
{
7335
traceMsg(comp, "Bailing CISCTransform2ArrayCopyB2I due to null TT - might be a preheader in last block of method\n");
7336
return false;
7337
}
7338
7339
TR::Block *target = trans->analyzeSuccessorBlock();
7340
// Currently, it allows only a single successor.
7341
if (!target) return false;
7342
7343
TR::Node *indexRepNode, *variableORconstRepNode;
7344
getP2TTrRepNodes(trans, &indexRepNode, &variableORconstRepNode);
7345
TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();
7346
7347
TR::Node * inputMemNode = trans->getP2TRepInLoop(P->getImportantNode(0))->getHeadOfTrNodeInfo()->_node->duplicateTree();
7348
TR::Node * outputMemNode = trans->getP2TRepInLoop(P->getImportantNode(1))->getHeadOfTrNodeInfo()->_node->duplicateTree();
7349
TR::Node * inputNode = trans->getP2TRepInLoop(P->getImportantNode(0)->getChild(0))->getHeadOfTrNodeInfo()->_node->duplicateTree();
7350
TR::Node * outputNode = trans->getP2TRepInLoop(P->getImportantNode(1)->getChild(0))->getHeadOfTrNodeInfo()->_node->duplicateTree();
7351
7352
TR::Node * exitVarNode = createLoad(indexRepNode);
7353
variableORconstRepNode = convertStoreToLoad(comp, variableORconstRepNode);
7354
TR::Node * lengthNode = createOP2(comp, TR::isub,
7355
variableORconstRepNode,
7356
exitVarNode);
7357
TR::Node * updateTree1;
7358
TR::Node * c4 = TR::Node::create(indexRepNode, TR::iconst, 0, 4);
7359
TR::Node * diff = lengthNode;
7360
lengthNode = TR::Node::create(TR::imul, 2, lengthNode, c4);
7361
// lengthNode has the byte size, and diff has the int-based size (that is, lengthNode = diff * 4)
7362
updateTree1 = TR::Node::createStore(indexVarSymRef, variableORconstRepNode);
7363
7364
lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode);
7365
7366
// Prepare the arraycopy node
7367
TR::Node * arraycopy = TR::Node::createArraycopy(inputNode, outputNode, lengthNode);
7368
arraycopy->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCopySymbol());
7369
arraycopy->setForwardArrayCopy(true);
7370
arraycopy->setArrayCopyElementType(TR::Int8);
7371
7372
TR::Node * topArraycopy = TR::Node::create(TR::treetop, 1, arraycopy);
7373
TR::TreeTop * updateTreeTop1 = TR::TreeTop::create(comp, updateTree1);
7374
7375
// Insert nodes and maintain the CFG
7376
TR::TreeTop *last;
7377
last = trans->removeAllNodes(trTreeTop, block->getExit());
7378
last->join(block->getExit());
7379
block = trans->insertBeforeNodes(block);
7380
last = block->getLastRealTreeTop();
7381
last->join(trTreeTop);
7382
trTreeTop->setNode(topArraycopy);
7383
trTreeTop->join(updateTreeTop1);
7384
updateTreeTop1->join(block->getExit());
7385
7386
trans->insertAfterNodes(block);
7387
7388
trans->setSuccessorEdge(block, target);
7389
return true;
7390
}
7391
7392
7393
/****************************************************************************************
7394
Corresponding Java-like Pseudo Program
7395
int v1, end;
7396
byte v0[ ];
7397
int v2[ ];
7398
while(true){
7399
v2[v1] = ((v0[v1*4] & 0xFF) << 24) | (v0[v1*4+1] & 0xFF) << 16) |
7400
(v0[v1*4+2] & 0xFF) << 8) | (v0[v1*4+3] & 0xFF));
7401
v1++;
7402
if (v1 >= end) break;
7403
}
7404
****************************************************************************************/
7405
TR_PCISCGraph *
7406
makeMEMCPYByte2IntGraph(TR::Compilation *c, int32_t ctrl)
7407
{
7408
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MEMCPYByte2Int", 0, 16);
7409
/************************************ opc id dagId #cfg #child other/pred/children */
7410
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 16, 0, 0, 0); tgt->addNode(v1); // array index of src and dst
7411
TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 15, 0, 0); tgt->addNode(vorc); // length
7412
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 0); tgt->addNode(v0); // src array base
7413
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(v2); // dst array base
7414
TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0); tgt->addNode(cmah); // array header
7415
TR_PCISCNode *cmah1=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(),12, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+1));// array header+1
7416
TR_PCISCNode *cmah2=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(),11, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+2));// array header+2
7417
TR_PCISCNode *cmah3=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(),10, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+3));// array header+3
7418
TR_PCISCNode *ah1 = (ctrl & CISCUtilCtl_BigEndian) ? cmah : cmah3;
7419
TR_PCISCNode *ah2 = (ctrl & CISCUtilCtl_BigEndian) ? cmah1 : cmah2;
7420
TR_PCISCNode *ah3 = (ctrl & CISCUtilCtl_BigEndian) ? cmah2 : cmah1;
7421
TR_PCISCNode *ah4 = (ctrl & CISCUtilCtl_BigEndian) ? cmah3 : cmah;
7422
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 9, 0, 0, -1); tgt->addNode(cm1);
7423
TR_PCISCNode *c4 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 8, 4); // element size
7424
TR_PCISCNode *ci4 = c4;
7425
if (ctrl & CISCUtilCtl_64Bit)
7426
{
7427
ci4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 7, 0, 0, 4); tgt->addNode(ci4);
7428
}
7429
TR_PCISCNode *cs8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, 0x100); tgt->addNode(cs8);
7430
TR_PCISCNode *cs16= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, 0x10000); tgt->addNode(cs16);
7431
TR_PCISCNode *cs24= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 0x1000000); tgt->addNode(cs24);
7432
TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size
7433
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
7434
TR_PCISCNode *ns0 = createIdiomArrayAddressIndexTreeInLoop(tgt, ctrl, 1, ent, v1, cmah, c4);
7435
TR_PCISCNode *ns1 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns0, v2, ns0);
7436
TR_PCISCNode *nmul= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, ns1, v1, ci4); tgt->addNode(nmul);
7437
TR_PCISCNode *nl12= createIdiomArrayLoadInLoop(tgt, ctrl, 1, nmul, TR::bloadi, TR::Int8, v0, nmul, ah1, c1);
7438
TR_PCISCNode *nl13= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl12, nl12); tgt->addNode(nl13);
7439
TR_PCISCNode *nl14= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl13, nl13, cs24); tgt->addNode(nl14);
7440
TR_PCISCNode *nl22= createIdiomArrayLoadInLoop(tgt, ctrl, 1, nl14, TR::bloadi, TR::Int8, v0, nmul, ah2, c1);
7441
TR_PCISCNode *nl23= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl22, nl22); tgt->addNode(nl23);
7442
TR_PCISCNode *nl24= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl23, nl23, cs16); tgt->addNode(nl24);
7443
TR_PCISCNode *nl25= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ior, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl24, nl14, nl24); tgt->addNode(nl25);
7444
TR_PCISCNode *nl32= createIdiomArrayLoadInLoop(tgt, ctrl, 1, nl25, TR::bloadi, TR::Int8, v0, nmul, ah3, c1);
7445
TR_PCISCNode *nl33= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl32, nl32); tgt->addNode(nl33);
7446
TR_PCISCNode *nl34= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl33, nl33, cs8); tgt->addNode(nl34);
7447
TR_PCISCNode *nl35= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ior, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl34, nl25, nl34); tgt->addNode(nl35);
7448
TR_PCISCNode *nl42= createIdiomArrayLoadInLoop(tgt, ctrl, 1, nl35, TR::bloadi, TR::Int8, v0, nmul, ah4, c1);
7449
TR_PCISCNode *nl43= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bu2i, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl42, nl42); tgt->addNode(nl43);
7450
TR_PCISCNode *nl45= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ior, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl43, nl35, nl43); tgt->addNode(nl45);
7451
TR_PCISCNode *ns4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istorei, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl45, ns1, nl45); tgt->addNode(ns4);
7452
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ns4, v1, cm1);
7453
TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n6, v1, vorc); tgt->addNode(n8);
7454
TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
7455
7456
n8->setSuccs(ent->getSucc(0), n9);
7457
n8->setIsChildDirectlyConnected();
7458
7459
tgt->setEntryNode(ent);
7460
tgt->setExitNode(n9);
7461
tgt->setImportantNodes((ctrl & CISCUtilCtl_BigEndian) ? nl12 : nl42, ns4);
7462
tgt->setNumDagIds(17);
7463
tgt->createInternalData(1);
7464
7465
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
7466
tgt->setTransformer(CISCTransform2ArrayCopyB2I);
7467
tgt->setAspects(isub|mul|bitop1, ILTypeProp::Size_1, ILTypeProp::Size_4);
7468
tgt->setNoAspects(call|bndchk, 0, 0);
7469
tgt->setMinCounts(1, 4, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
7470
tgt->setHotness(hot, false);
7471
tgt->setInhibitBeforeVersioning();
7472
return tgt;
7473
}
7474
7475
7476
/****************************************************************************************
7477
Corresponding Java-like Pseudo Program
7478
int v1, end, end2;
7479
int v0[ ];
7480
byte v2[ ];
7481
while(true){
7482
v2[v1*4] = (byte)(v0[v1] >>> 24) & 0xFF;
7483
v2[v1*4+1] = (byte)(v0[v1] >>> 16) & 0xFF;
7484
v2[v1*4+2] = (byte)(v0[v1] >>> 8) & 0xFF;
7485
v2[v1*4+3] = (byte)(v0[v1] & 0xff);
7486
v1++;
7487
if (v1 >= end) break;
7488
}
7489
****************************************************************************************/
7490
TR_PCISCGraph *
7491
makeMEMCPYInt2ByteGraph(TR::Compilation *c, int32_t ctrl)
7492
{
7493
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MEMCPYInt2Byte", 0, 16);
7494
/************************************ opc id dagId #cfg #child other/pred/children */
7495
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 17, 0, 0, 0); tgt->addNode(v1); // array index of src and dst
7496
TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),16, 0, 0); tgt->addNode(vorc); // length
7497
TR_PCISCNode *v0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(v0); // src array base
7498
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1); tgt->addNode(v2); // dst array base
7499
TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(cmah); // array header
7500
TR_PCISCNode *cmah1=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(),12, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+1));// array header+1
7501
TR_PCISCNode *cmah2=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(),11, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+2));// array header+2
7502
TR_PCISCNode *cmah3=createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(),10, -(int32_t)(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+3));// array header+3
7503
TR_PCISCNode *ah1 = (ctrl & CISCUtilCtl_BigEndian) ? cmah : cmah3;
7504
TR_PCISCNode *ah2 = (ctrl & CISCUtilCtl_BigEndian) ? cmah1 : cmah2;
7505
TR_PCISCNode *ah3 = (ctrl & CISCUtilCtl_BigEndian) ? cmah2 : cmah1;
7506
TR_PCISCNode *ah4 = (ctrl & CISCUtilCtl_BigEndian) ? cmah3 : cmah;
7507
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 9, 0, 0, -1); tgt->addNode(cm1);
7508
TR_PCISCNode *cs4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 8, 0, 0, 4); tgt->addNode(cs4); // element size
7509
TR_PCISCNode *cl4 = cs4;
7510
if (ctrl & CISCUtilCtl_64Bit)
7511
{
7512
cl4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lconst, TR::Int64, tgt->incNumNodes(), 7, 0, 0, 4); tgt->addNode(cl4); // element size for 64-bit
7513
}
7514
TR_PCISCNode *cs8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 6, 0, 0, 8); tgt->addNode(cs8);
7515
TR_PCISCNode *cs16= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 5, 0, 0, 16); tgt->addNode(cs16);
7516
TR_PCISCNode *cs24= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, 24); tgt->addNode(cs24);
7517
TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size
7518
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
7519
TR_PCISCNode *nmul= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, ent, v1, cs4); tgt->addNode(nmul);
7520
TR_PCISCNode *ns00= createIdiomArrayAddressInLoop(tgt, ctrl, 1, nmul, v2, nmul, ah1, c1);
7521
TR_PCISCNode *nl00;
7522
if (ctrl & CISCUtilCtl_64Bit)
7523
{
7524
nl00 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns00, v0, v1, cmah, cl4);
7525
}
7526
else
7527
{
7528
nl00 = createIdiomArrayAddressInLoop (tgt, ctrl, 1, ns00, v0, nmul, cmah, c1);
7529
}
7530
TR_PCISCNode *nl01= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iloadi, TR::Int32, tgt->incNumNodes(), 1, 1, 1, nl00, nl00); tgt->addNode(nl01);
7531
TR_PCISCNode *ns01= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ishrall, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nl01, nl01, cs24); tgt->addNode(ns01);
7532
TR_PCISCNode *ns02= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns01, ns01); tgt->addNode(ns02);
7533
TR_PCISCNode *ns03= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, ns02, ns00, ns02); tgt->addNode(ns03);
7534
TR_PCISCNode *ns10= createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns03, v2, nmul, ah2, c1);
7535
TR_PCISCNode *ns11= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ishrall, TR::NoType, tgt->incNumNodes(), 1, 1, 2, ns10, nl01, cs16); tgt->addNode(ns11);
7536
TR_PCISCNode *ns12= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns11, ns11); tgt->addNode(ns12);
7537
TR_PCISCNode *ns13= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, ns12, ns10, ns12); tgt->addNode(ns13);
7538
TR_PCISCNode *ns20= createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns13, v2, nmul, ah3, c1);
7539
TR_PCISCNode *ns21= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ishrall, TR::NoType, tgt->incNumNodes(), 1, 1, 2, ns20, nl01, cs8); tgt->addNode(ns21);
7540
TR_PCISCNode *ns22= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns21, ns21); tgt->addNode(ns22);
7541
TR_PCISCNode *ns23= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, ns22, ns20, ns22); tgt->addNode(ns23);
7542
TR_PCISCNode *ns30= createIdiomArrayAddressInLoop(tgt, ctrl, 1, ns23, v2, nmul, ah4, c1);
7543
TR_PCISCNode *ns32= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, ns30, nl01); tgt->addNode(ns32);
7544
TR_PCISCNode *ns33= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, ns32, ns30, ns32); tgt->addNode(ns33);
7545
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ns33, v1, cm1);
7546
TR_PCISCNode *n8 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n6, v1, vorc); tgt->addNode(n8);
7547
TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
7548
7549
n8->setSuccs(ent->getSucc(0), n9);
7550
n8->setIsChildDirectlyConnected();
7551
7552
tgt->setEntryNode(ent);
7553
tgt->setExitNode(n9);
7554
tgt->setImportantNodes(nl01, (ctrl & CISCUtilCtl_BigEndian) ? ns03 : ns33);
7555
tgt->setNumDagIds(18);
7556
tgt->createInternalData(1);
7557
7558
tgt->setSpecialCareNode(0, ns02); // i2b
7559
tgt->setSpecialCareNode(1, ns12); // i2b
7560
tgt->setSpecialCareNode(2, ns22); // i2b
7561
tgt->setSpecialCareNode(3, ns32); // i2b
7562
tgt->setSpecialNodeTransformer(MEMCPYSpecialNodeTransformer);
7563
7564
tgt->setTransformer(CISCTransform2ArrayCopyB2I);
7565
tgt->setAspects(isub|mul|shr, ILTypeProp::Size_4, ILTypeProp::Size_1);
7566
tgt->setNoAspects(call|bndchk, 0, 0);
7567
tgt->setMinCounts(1, 1, 4); // minimum ifCount, indirectLoadCount, indirectStoreCount
7568
tgt->setHotness(hot, false);
7569
tgt->setInhibitBeforeVersioning();
7570
return tgt;
7571
}
7572
7573
7574
//////////////////////////////////////////////////////////////////////////
7575
//////////////////////////////////////////////////////////////////////////
7576
//////////////////////////////////////////////////////////////////////////
7577
//*****************************************************************************************
7578
// IL code generation for filling memory
7579
// Input: ImportantNode(0) - astore of aiadd or aladd for address induction variable
7580
// ImportantNode(1) - array element store
7581
// ImportantNode(2) - exit if
7582
//*****************************************************************************************
7583
static int32_t getAbs(int32_t val)
7584
{
7585
return val < 0 ? -val : val;
7586
}
7587
bool
7588
CISCTransform2PtrArraySet(TR_CISCTransformer *trans)
7589
{
7590
bool trace = trans->trace();
7591
TR::Node *trNode = NULL;
7592
TR::TreeTop *trTreeTop = NULL;
7593
TR::Block *block = NULL;
7594
TR_CISCGraph *p = trans->getP();
7595
List<TR_CISCNode> *P2T = trans->getP2T();
7596
TR::Compilation *comp = trans->comp();
7597
trans->findFirstNode(&trTreeTop, &trNode, &block);
7598
if (!block)
7599
return false; // cannot find
7600
7601
// Currently, it allows only a single successor.
7602
TR::Block *target = trans->analyzeSuccessorBlock();
7603
if (!target)
7604
return false;
7605
7606
// Only handle very simple loops.
7607
if (trans->getNumOfBBlistBody() > 1)
7608
{
7609
if (trace) traceMsg(comp, "Need exactly 1 basic block\n");
7610
return false;
7611
}
7612
7613
// Should have 3 treetops in body. See makePtrArraySetGraph
7614
if (block->getNumberOfRealTreeTops() != 3)
7615
{
7616
if (trace) traceMsg(comp, "Need exactly 3 real treetops\n");
7617
return false;
7618
}
7619
7620
auto astore = trans->getP2TRepInLoop(p->getImportantNode(0));
7621
auto Store = trans->getP2TRepInLoop(p->getImportantNode(1));
7622
auto ifcmp = trans->getP2TRepInLoop(p->getImportantNode(2));
7623
7624
if (!astore)
7625
{
7626
if (trace) traceMsg(comp, "astore missing\n");
7627
return false;
7628
}
7629
if (!Store)
7630
{
7631
if (trace) traceMsg(comp, "array element store missing\n");
7632
return false;
7633
}
7634
if (!ifcmp)
7635
{
7636
if (trace) traceMsg(comp, "if compare missing\n");
7637
return false;
7638
}
7639
7640
auto astoreNode = astore->getHeadOfTrNode();
7641
auto StoreNode = Store->getHeadOfTrNode();
7642
auto ifcmpNode = ifcmp->getHeadOfTrNode();
7643
7644
if (!(astoreNode->getChild(0)->getChild(0) == StoreNode->getChild(0) &&
7645
astoreNode->getChild(0) == ifcmpNode->getChild(0)))
7646
{
7647
if (trace) traceMsg(comp, "node trees not in required form\n");
7648
return false;
7649
}
7650
7651
if (!ifcmpNode->getChild(0)->getOpCode().isLoadVar() &&
7652
!ifcmpNode->getChild(1)->getOpCode().isLoadVar())
7653
{
7654
if (trace) traceMsg(comp, "neither comparands are loadvar\n");
7655
return false;
7656
}
7657
7658
if (ifcmpNode->getChild(0)->getOpCode().isLoadVar() ^
7659
ifcmpNode->getChild(1)->getOpCode().isLoadVar())
7660
{
7661
auto nonLoadChild = (ifcmpNode->getChild(0)->getOpCode().isLoadVar()) ?
7662
ifcmpNode->getChild(1) : ifcmpNode->getChild(0);
7663
if (astoreNode->getChild(0) != nonLoadChild)
7664
{
7665
if (trace) traceMsg(comp, "iv is not a commoned child in if comparand\n");
7666
return false;
7667
}
7668
}
7669
7670
// Only ordered compare {lt,le,ge,gt} and ne allowed
7671
if (!ifcmpNode->getOpCode().isCompareForOrder() &&
7672
!(!ifcmpNode->getOpCode().isCompareTrueIfEqual() && ifcmpNode->getOpCode().isCompareForEquality()))
7673
{
7674
if (trace) traceMsg(comp, "invalid compare condition\n");
7675
return false;
7676
}
7677
7678
if (!StoreNode->getOpCode().isStoreIndirect() ||
7679
(StoreNode->getChild(0)->getOpCode().isLoadVar() &&
7680
StoreNode->getChild(0)->getSymbolReference() != astoreNode->getSymbolReference()))
7681
{
7682
if (trace) traceMsg(comp, "array element store node is neither indirect store "
7683
"nor matched with addr iv\n");
7684
return false;
7685
}
7686
7687
switch(StoreNode->getSize())
7688
{
7689
case 1:
7690
case 2:
7691
case 4:
7692
case 8: break;
7693
default:
7694
if (trace)
7695
traceMsg(comp, "element size is not power-of-2 <= 8\n");
7696
return false;
7697
}
7698
7699
if (StoreNode->getDataType() == TR::Aggregate)
7700
{
7701
if (trace)
7702
traceMsg(comp, "arrayset can't handle aggregate elem type\n");
7703
return false;
7704
}
7705
7706
auto increment = astoreNode->getChild(0)->getChild(1)->getConst<int32_t>();
7707
if (StoreNode->getSize() != getAbs(increment))
7708
{
7709
if (trace) traceMsg(comp, "increment size does not match element size\n");
7710
return false;
7711
}
7712
7713
TR::Node *endPtr = NULL;
7714
if (ifcmpNode->getChild(0)->getOpCode().isLoadVar() &&
7715
ifcmpNode->getChild(0)->getSymbolReference() != astoreNode->getSymbolReference())
7716
endPtr = ifcmpNode->getChild(0);
7717
else if (ifcmpNode->getChild(1)->getOpCode().isLoadVar() &&
7718
ifcmpNode->getChild(1)->getSymbolReference() != astoreNode->getSymbolReference())
7719
endPtr = ifcmpNode->getChild(1);
7720
7721
if (!endPtr)
7722
{
7723
if (trace) traceMsg(comp, "Could not get end pointer\n");
7724
return false;
7725
}
7726
7727
// all good.. now actual transformations
7728
auto startPtr = TR::Node::createWithSymRef(TR::aload, 0, astoreNode->getSymbolReference());
7729
TR::Node *length, *arrayset;
7730
bool use64bit = comp->target().is64Bit();
7731
bool equal = ifcmpNode->getOpCode().isCompareTrueIfEqual(); // fix off by one.
7732
if (increment < 0)
7733
{
7734
length = TR::Node::create(use64bit ? TR::a2l : TR::a2i, 1, TR::Node::create(TR::asub, 2, startPtr, endPtr));
7735
if (equal)
7736
{
7737
length = TR::Node::create(use64bit ? TR::ladd : TR::iadd, 2, length,
7738
use64bit ? TR::Node::lconst(1) : TR::Node::iconst(1));
7739
}
7740
arrayset = TR::Node::create(TR::arrayset, 3, endPtr, StoreNode->getChild(1), length);
7741
}
7742
else
7743
{
7744
length = TR::Node::create(use64bit ? TR::a2l : TR::a2i, 1, TR::Node::create(TR::asub, 2, endPtr, startPtr));
7745
if (equal)
7746
{
7747
length = TR::Node::create(use64bit ? TR::ladd : TR::iadd, 2, length,
7748
use64bit ? TR::Node::lconst(1) : TR::Node::iconst(1));
7749
}
7750
arrayset = TR::Node::create(TR::arrayset, 3, startPtr, StoreNode->getChild(1), length);
7751
}
7752
arrayset->setSymbolReference(comp->getSymRefTab()->findOrCreateArraySetSymbol());
7753
7754
//reset block
7755
block->getEntry()->join(block->getExit());
7756
block->append(TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, arrayset)));
7757
7758
ifcmpNode->recursivelyDecReferenceCount();
7759
StoreNode->recursivelyDecReferenceCount();
7760
auto tmpastoreChild = astoreNode->getChild(0);
7761
7762
// set startPtr as if it got to the end of the loop
7763
if (equal)
7764
{
7765
int offsetAtEnd = (increment < 0) ? -1 : 1;
7766
auto newEnd = TR::Node::create(use64bit ? TR::aladd : TR::aiadd, 2, endPtr,
7767
use64bit ? TR::Node::lconst(offsetAtEnd) : TR::Node::iconst(offsetAtEnd));
7768
astoreNode->setAndIncChild(0, newEnd);
7769
}
7770
else
7771
{
7772
astoreNode->setAndIncChild(0, endPtr);
7773
}
7774
tmpastoreChild->recursivelyDecReferenceCount();
7775
7776
block->append(TR::TreeTop::create(comp, astoreNode));
7777
trans->setSuccessorEdge(block, target);
7778
return true;
7779
}
7780
7781
//////////////////////////////////////////////////////////////////////////
7782
//////////////////////////////////////////////////////////////////////////
7783
//////////////////////////////////////////////////////////////////////////
7784
//*****************************************************************************************
7785
// IL code generation for filling memory
7786
// Input: ImportantNode(0) - array store
7787
// ImportantNode(1) - Store of iadd or isub for induction variable
7788
// ImportantNode(2) - Store of iadd or isub for induction variable 1
7789
// ImportantNode(3) - exit if
7790
//*****************************************************************************************
7791
bool
7792
CISCTransform2ArraySet(TR_CISCTransformer *trans)
7793
{
7794
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
7795
const bool disptrace = DISPTRACE(trans);
7796
TR::Node *trNode = NULL;
7797
TR::TreeTop *trTreeTop = NULL;
7798
TR::Block *block = NULL;
7799
TR_CISCGraph *P = trans->getP();
7800
List<TR_CISCNode> *P2T = trans->getP2T();
7801
TR::Compilation *comp = trans->comp();
7802
bool ctrl = trans->isGenerateI2L();
7803
7804
TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");
7805
if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;
7806
7807
trans->findFirstNode(&trTreeTop, &trNode, &block);
7808
if (!block) return false; // cannot find
7809
7810
if (isLoopPreheaderLastBlockInMethod(comp, block))
7811
{
7812
traceMsg(comp, "Bailing CISCTransform2ArraySet due to null TT - might be a preheader in last block of method\n");
7813
return false;
7814
}
7815
7816
TR::Block *target = trans->analyzeSuccessorBlock();
7817
// Currently, it allows only a single successor.
7818
if (!target) return false;
7819
7820
TR_CISCNode *ivStoreCISCNode = trans->getP2TRepInLoop(P->getImportantNode(1));
7821
TR_CISCNode *ivStore1CISCNode = trans->getP2TRepInLoop(P->getImportantNode(2));
7822
TR_CISCNode *addORsubCISCNode = trans->getP2TRepInLoop(P->getImportantNode(1)->getChild(0));
7823
TR_CISCNode *addORsub1CISCNode = trans->getP2TRepInLoop(P->getImportantNode(2)->getChild(0));
7824
TR_CISCNode *cmpIfAllCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(3));
7825
7826
TR_ScratchList<TR::Node> storeList(comp->trMemory());
7827
TR_ASSERT(ivStoreCISCNode, "Expected induction variable store node in Transform2ArraySet");
7828
storeList.add(ivStoreCISCNode->getHeadOfTrNode());
7829
7830
if (ivStore1CISCNode && ivStore1CISCNode != ivStoreCISCNode)
7831
storeList.add(ivStore1CISCNode->getHeadOfTrNode());
7832
7833
if (!cmpIfAllCISCNode)
7834
{
7835
if (disptrace) traceMsg(comp, "Not implemented yet for multiple-if\n");
7836
return false;
7837
}
7838
TR_ASSERT(addORsubCISCNode->getOpcode() == TR::isub || addORsubCISCNode->getOpcode() == TR::iadd, "error");
7839
TR_ASSERT(addORsub1CISCNode->getOpcode() == TR::isub || addORsub1CISCNode->getOpcode() == TR::iadd, "error");
7840
7841
// Check which count-up or count-down loop
7842
bool isIncrement0 = (addORsubCISCNode->getOpcode() == TR::isub);
7843
bool isIncrement1 = (addORsub1CISCNode->getOpcode() == TR::isub);
7844
7845
bool isIncrement = isIncrement0;
7846
7847
// Depending on the loop exit comparison, we may need to adjust the length of the arrayset.
7848
int32_t lengthMod = 0;
7849
TR_CISCNode *retStore = trans->getT()->searchStore(cmpIfAllCISCNode->getChild(0), cmpIfAllCISCNode);
7850
switch(cmpIfAllCISCNode->getOpcode())
7851
{
7852
case TR::ificmpgt:
7853
lengthMod = 1;
7854
// fallthrough
7855
case TR::ificmpge:
7856
if (!isIncrement) return false;
7857
if (retStore == ivStoreCISCNode) lengthMod++;
7858
break;
7859
case TR::ificmplt:
7860
lengthMod = 1;
7861
// fallthrough
7862
case TR::ificmple:
7863
if (isIncrement) return false;
7864
if (retStore == ivStoreCISCNode) lengthMod++;
7865
break;
7866
default:
7867
traceMsg(comp, "Bailing CISCTransform2ArraySet due to unrecognized loop exit comparison.\n");
7868
return false;
7869
}
7870
7871
if (disptrace)
7872
traceMsg(comp,"Examining exit comparison CICS node %d, and determined required length modifier to be: %d\n", cmpIfAllCISCNode->getID(), lengthMod);
7873
7874
TR_ScratchList<TR::Node> listStores(comp->trMemory());
7875
ListAppender<TR::Node> appenderListStores(&listStores);
7876
ListIterator<TR_CISCNode> ni(trans->getP2T() + P->getImportantNode(0)->getID());
7877
TR_CISCNode *inStoreCISCNode;
7878
TR::Node *inStoreNode;
7879
for (inStoreCISCNode = ni.getFirst(); inStoreCISCNode; inStoreCISCNode = ni.getNext())
7880
{
7881
if (!inStoreCISCNode->isOutsideOfLoop())
7882
{
7883
inStoreNode = inStoreCISCNode->getHeadOfTrNodeInfo()->_node;
7884
if (!isIndexVariableInList(inStoreNode, &storeList))
7885
{
7886
dumpOptDetails(comp, "an index used in an array store %p is not consistent with the induction varaible updates\n", inStoreNode);
7887
return false;
7888
}
7889
// this idiom operates in two modes - arrayset for all values or arrayset only for setting to zero
7890
// if the codegen does not support generic arrayset - make sure we are storing a constant 0
7891
// note the stored value is constrained to a constant by the node matcher
7892
if (!trans->comp()->cg()->getSupportsArraySet()
7893
&& !(inStoreNode->getType().isIntegral() && inStoreNode->getSecondChild()->get64bitIntegralValueAsUnsigned() == 0)
7894
&& !(inStoreNode->getType().isAddress() && inStoreNode->getSecondChild()->getAddress() == 0))
7895
{
7896
dumpOptDetails(comp, "the cg only supports arrayset to zero, but found a non-zero or non-constant value\n");
7897
return false;
7898
}
7899
appenderListStores.add(inStoreNode);
7900
}
7901
}
7902
if (listStores.isEmpty()) return false;
7903
7904
TR::Node *indexRepNode, *index1RepNode, *dstBaseRepNode, *variableORconstRepNode1;
7905
getP2TTrRepNodes(trans, &indexRepNode, &index1RepNode, &dstBaseRepNode, &variableORconstRepNode1);
7906
7907
if (disptrace)
7908
{
7909
traceMsg(comp,"Identified target nodes\n\tindexRepNode: %p\n\tindex1RepNode: %p\n\tdstBaseRepNode: %p\n\tvariableOrconstRepNode1: %p\n",
7910
indexRepNode, index1RepNode, dstBaseRepNode, variableORconstRepNode1);
7911
}
7912
TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();
7913
TR::SymbolReference * indexVar1SymRef = index1RepNode->getSymbolReference();
7914
if (trans->countGoodArrayIndex(indexVarSymRef) == 0 &&
7915
trans->countGoodArrayIndex(indexVar1SymRef) == 0) return false;
7916
if (indexVarSymRef != indexVar1SymRef)
7917
{
7918
// there are two induction variables
7919
if (!listStores.isSingleton())
7920
{
7921
dumpOptDetails(comp, "Multiple induction variables with multiple stores not supported for arrayset transformation.\n");
7922
return false;
7923
}
7924
if (!isIncrement1)
7925
{
7926
// We do not correctly handle the second induction variable being a decrement.
7927
// TODO: Things to fix include:
7928
// Proper Last Value calculation for count-down loop that uses ind var 1.
7929
// Proper length calculation for count-down loop that uses ind var 1.
7930
dumpOptDetails(comp, "A decrementing second induction variable is not supported. \n");
7931
return false;
7932
}
7933
}
7934
7935
//
7936
// analyze each store
7937
//
7938
ListIterator<TR::Node> iteratorStores(&listStores);
7939
TR::Node * indexNode = createLoad(indexRepNode);
7940
7941
// check if the induction variable
7942
// is being stored into the array
7943
for (inStoreNode = iteratorStores.getFirst(); inStoreNode; inStoreNode = iteratorStores.getNext())
7944
{
7945
TR::Node * valueNode = inStoreNode->getChild(1);
7946
if (valueNode->getOpCode().isLoadDirect() && valueNode->getOpCode().hasSymbolReference())
7947
{
7948
if (valueNode->getSymbolReference()->getReferenceNumber() == indexNode->getSymbolReference()->getReferenceNumber() ||
7949
valueNode->getSymbolReference()->getReferenceNumber() == index1RepNode->getSymbolReference()->getReferenceNumber())
7950
{
7951
traceMsg(comp, "arraystore tree has induction variable on rhs\n");
7952
return false;
7953
}
7954
}
7955
}
7956
7957
List<TR::Node> listArraySet(comp->trMemory());
7958
TR::Node * computeIndex = NULL;
7959
TR::Node * lengthNode = NULL;
7960
TR::Node * lengthByteNode = NULL;
7961
7962
for (inStoreNode = iteratorStores.getFirst(); inStoreNode; inStoreNode = iteratorStores.getNext())
7963
{
7964
TR::Node * outputNode = inStoreNode->getChild(0)->duplicateTree();
7965
TR::Node * valueNode = convertStoreToLoad(comp, inStoreNode->getChild(1));
7966
7967
uint32_t elementSize = 0;
7968
if (inStoreNode->getType().isAddress())
7969
elementSize = TR::Compiler->om.sizeofReferenceField();
7970
else
7971
elementSize = inStoreNode->getSize();
7972
7973
// Depending on the induction variable used in the loop, determine if it's count up or count down.
7974
bool loopIsIncrement = false;
7975
if (findAndOrReplaceNodesWithMatchingSymRefNumber(outputNode->getSecondChild(), NULL, indexVarSymRef->getReferenceNumber()))
7976
{
7977
loopIsIncrement = isIncrement0;
7978
}
7979
else
7980
{
7981
TR_ASSERT(findAndOrReplaceNodesWithMatchingSymRefNumber(outputNode->getSecondChild(), NULL, indexVar1SymRef->getReferenceNumber()), "Unable to find matching array access induction variable.\n");
7982
loopIsIncrement = isIncrement1;
7983
}
7984
7985
if (!loopIsIncrement) // count-down loop
7986
{
7987
// This case covers a backwards counting loops of the following general forms:
7988
7989
// A) Induction variable update BEFORE the array store.
7990
// i = i_init;
7991
// do {
7992
// i--;
7993
// a [i + c] = d;
7994
// } while ( i >= i_last );
7995
//
7996
// B) Induction variable update AFTER the array store.
7997
// i = i_init;
7998
// do {
7999
// a [i + c] = d;
8000
// i--;
8001
// } while ( i >= i_last );
8002
//
8003
// The loops can be transformed into an equivalent forward counting loop:
8004
// i = i_last';
8005
// do {
8006
// a [i + c] = d;
8007
// i++;
8008
// } while (i <= i_init')
8009
//
8010
// Where:
8011
// A) Induction variable update BEFORE the array store.
8012
// i_init' = i_init - 1
8013
// i_last' = i_last - 1
8014
// B) Induction variable update AFTER the array store.
8015
// i_init' = i_init
8016
// i_last' = i_last
8017
//
8018
// This forward version can be reduced to an arrayset
8019
// arrayset
8020
// a[i_last' + c] // Address of first element to set (forward sense)
8021
// bconst d // Element to set.
8022
// i_init - i_last (+1) // Length
8023
// Calculate the last value of the induction variable in the original count-down loop.
8024
// This value becomes the index of the first element in the count-up version, and hence
8025
// the first element of the arrayset.
8026
8027
TR::Node * lastValueNode = convertStoreToLoad(comp, variableORconstRepNode1);
8028
8029
// Determine if the induction variable update is before the arrayset
8030
bool isIndexVarUpdateBeforeArrayset = (trans->findStoreToSymRefInInsertBeforeNodes(indexVarSymRef->getReferenceNumber()) != NULL);
8031
8032
// Adjust for the index based on exit condition (i.e. > vs >= ) and whether the induction
8033
// variable update is before/after the array stores.
8034
// i_last': > (lengthMod=0) >= (lengthMod=1)
8035
// --------------- ----------------
8036
// Before i_last i_last - 1
8037
// After i_last + 1 i_last
8038
int32_t lastLegalValueAdjustment = -lengthMod;
8039
if (!isIndexVarUpdateBeforeArrayset)
8040
lastLegalValueAdjustment++;
8041
8042
// If the induction variable update is before the arrayset, we need to validate whether the array access
8043
// commoned the node with the iadd/isub of the induction variable. i.e.
8044
//
8045
// istore #indvar
8046
// iadd (A)
8047
// iload #indvar (B)
8048
// iconst -1
8049
// istore
8050
// aiadd
8051
// aload arraybase
8052
// aiadd
8053
// index
8054
// iconst array_header_size
8055
//
8056
// where index could be:
8057
// (A) commoned to iadd, effectively using new value of #indvar
8058
// (B) commoned to iload, effectively using old value of #indvar
8059
// (C) a new iload using new value of #indvar
8060
//
8061
// Case (A) is problematic, as the induction variable store is still before the arrayset, but
8062
// the array access pattern is using the original value of #indvar.
8063
// Case (B) is okay, in that topological embedding will recognize that to be equivalent to
8064
// updating induction variable after the arraystore.
8065
// Case (C) is handled correctly.
8066
int32_t arrayStoreCommoningAdjustment = 0;
8067
if (isIndexVarUpdateBeforeArrayset)
8068
{
8069
TR::Node *origIndVarStore = ivStoreCISCNode->getHeadOfTrNodeInfo()->_node;
8070
TR::Node *origIndVarLoad = origIndVarStore->getChild(0)->getChild(0);
8071
8072
TR::Node *origArrayIndVarLoad = findLoadWithMatchingSymRefNumber(inStoreNode->getChild(0)->getSecondChild(), indexVarSymRef->getReferenceNumber());
8073
8074
// If they match, we have case (B), so we need to readjust by +1.
8075
if (origIndVarLoad == origArrayIndVarLoad)
8076
{
8077
traceMsg(comp, "Identified array index to have been referencing original induction variable value: %p\n",origIndVarLoad);
8078
arrayStoreCommoningAdjustment = 1;
8079
}
8080
}
8081
8082
TR::Node *lastLegalValue = createOP2(comp, TR::iadd, lastValueNode,
8083
TR::Node::create(indexNode, TR::iconst, 0, lastLegalValueAdjustment + arrayStoreCommoningAdjustment));
8084
8085
// Search for the induction variable in the array access sub-tree and replace that node
8086
// with the last value index we just calculated.
8087
bool isFound = findAndOrReplaceNodesWithMatchingSymRefNumber(outputNode->getSecondChild(), lastLegalValue, indexVarSymRef->getReferenceNumber());
8088
if (!isFound && (indexVarSymRef != indexVar1SymRef))
8089
isFound = findAndOrReplaceNodesWithMatchingSymRefNumber(outputNode->getSecondChild(), lastLegalValue, indexVar1SymRef->getReferenceNumber());
8090
8091
TR_ASSERT(isFound, "Count down arrayset was unable to find and replace array access induction variable.\n");
8092
8093
// Determine the length of the arrayset (# of elements to set) and adjusting it based on exit condition.
8094
// In the case of the induction variable update is before the array store, the indexNode value has already been
8095
// decremented by 1 once already (since i--; is inserted before the final arrayset. We need to readjust that.
8096
// length: > (lengthMod=0) >= (lengthMod=1)
8097
// --------------- ----------------
8098
// Before i_init - i_last + 1 i_init - i_last +2
8099
// After i_init - i_last i_init - i_last +1
8100
int32_t lengthAdjustment = lengthMod + ((isIndexVarUpdateBeforeArrayset)?1:0);
8101
8102
lengthNode = createOP2(comp, TR::isub, indexNode, lastValueNode);
8103
lengthNode = createOP2(comp, TR::iadd, lengthNode, TR::Node::create(indexNode, TR::iconst, 0, lengthAdjustment));
8104
8105
// Determine the final induction variable value on loop exit.
8106
// If the induction variable update is before the arrayset,
8107
// it will be the last value we access.
8108
// If the induction variable update is after the arrayset,
8109
// it will always be one less than the last index (count-down sense) that we access.
8110
computeIndex = createOP2(comp, TR::iadd, lastLegalValue, TR::Node::create(indexRepNode, TR::iconst, 0, ((isIndexVarUpdateBeforeArrayset)?0:-1) - arrayStoreCommoningAdjustment));
8111
8112
}
8113
else // count-up loop
8114
{
8115
TR::Node * lastValue = convertStoreToLoad(comp, variableORconstRepNode1);
8116
lastValue = createOP2(comp, isIncrement0 ? TR::iadd : TR::isub, lastValue,
8117
TR::Node::create(indexNode, TR::iconst, 0, lengthMod));
8118
8119
// Induction variable 0 is always part of the loop exit condition based on idiom graph.
8120
if (isIncrement0)
8121
{
8122
lengthNode = createOP2(comp, TR::isub, lastValue, indexNode);
8123
}
8124
else
8125
{
8126
lengthNode = createOP2(comp, TR::isub, indexNode, lastValue);
8127
}
8128
computeIndex = lastValue;
8129
}
8130
8131
lengthByteNode = lengthNode;
8132
const bool longOffsets = trans->isGenerateI2L();
8133
lengthByteNode = createI2LIfNecessary(comp, longOffsets, lengthByteNode);
8134
if (elementSize > 1)
8135
{
8136
TR::Node *elementSizeNode = NULL;
8137
if (longOffsets)
8138
elementSizeNode = TR::Node::lconst(inStoreNode, elementSize);
8139
else
8140
elementSizeNode = TR::Node::iconst(inStoreNode, elementSize);
8141
8142
lengthByteNode = TR::Node::create(
8143
longOffsets ? TR::lmul : TR::imul,
8144
2,
8145
lengthByteNode,
8146
elementSizeNode);
8147
}
8148
8149
TR::Node * arrayset = TR::Node::create(TR::arrayset, 3, outputNode, valueNode, lengthByteNode);
8150
arrayset->setSymbolReference(comp->getSymRefTab()->findOrCreateArraySetSymbol());
8151
8152
listArraySet.add(TR::Node::create(TR::treetop, 1, arrayset));
8153
}
8154
8155
TR::Node * indVarUpdateNode = TR::Node::createStore(indexVarSymRef, computeIndex);
8156
TR::TreeTop * indVarUpdateTreeTop = TR::TreeTop::create(comp, indVarUpdateNode);
8157
TR::Node * indVar1UpdateNode = NULL;
8158
TR::TreeTop * indVar1UpdateTreeTop = NULL;
8159
if (indexVarSymRef != indexVar1SymRef)
8160
{
8161
indVar1UpdateNode = createStoreOP2(comp, indexVar1SymRef, TR::iadd, indexVar1SymRef, lengthNode, trNode);
8162
indVar1UpdateTreeTop = TR::TreeTop::create(comp, indVar1UpdateNode);
8163
}
8164
8165
// Insert nodes and maintain the CFG
8166
TR::TreeTop *last;
8167
ListIterator<TR::Node> iteratorArraySet(&listArraySet);
8168
TR::Node *arrayset = NULL;
8169
TR_ASSERT(lengthByteNode, "Expected at least one set of arrayset.");
8170
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthByteNode->duplicateTree());
8171
block = trans->insertBeforeNodes(block);
8172
last = block->getLastRealTreeTop();
8173
for (arrayset = iteratorArraySet.getFirst(); arrayset; arrayset = iteratorArraySet.getNext())
8174
{
8175
TR::TreeTop *newTop = TR::TreeTop::create(comp, arrayset);
8176
last->join(newTop);
8177
last = newTop;
8178
}
8179
last->join(indVarUpdateTreeTop);
8180
indVarUpdateTreeTop->join(block->getExit());
8181
if (indVar1UpdateTreeTop)
8182
{
8183
block->append(indVar1UpdateTreeTop);
8184
}
8185
8186
trans->insertAfterNodes(block);
8187
8188
trans->setSuccessorEdge(block, target);
8189
return true;
8190
}
8191
8192
bool CISCTransform2Strlen16(TR_CISCTransformer *trans)
8193
{
8194
bool trace = trans->trace();
8195
TR::Node *trNode = NULL;
8196
TR::TreeTop *trTreeTop = NULL;
8197
TR::Block *block = NULL;
8198
TR_CISCGraph *p = trans->getP();
8199
List<TR_CISCNode> *P2T = trans->getP2T();
8200
TR::Compilation *comp = trans->comp();
8201
trans->findFirstNode(&trTreeTop, &trNode, &block);
8202
if (!block)
8203
return false; // cannot find
8204
8205
// Currently, it allows only a single successor.
8206
TR::Block *target = trans->analyzeSuccessorBlock();
8207
if (!target)
8208
return false;
8209
8210
// Only handle very simple loops.
8211
if (trans->getNumOfBBlistBody() > 1)
8212
{
8213
if (trace) traceMsg(comp, "Need exactly 1 basic block\n");
8214
return false;
8215
}
8216
8217
// Should have 2 treetops in body. See makeStrlen16Graph
8218
if (block->getNumberOfRealTreeTops() != 2)
8219
{
8220
if (trace) traceMsg(comp, "Need exactly 2 real treetops\n");
8221
return false;
8222
}
8223
8224
auto astore = trans->getP2TRepInLoop(p->getImportantNode(0));
8225
auto loopTest = trans->getP2TRepInLoop(p->getImportantNode(1));
8226
auto astoreNode = astore->getHeadOfTrNode();
8227
auto ificmpne = loopTest->getHeadOfTrNode();
8228
8229
if (!astore || !loopTest || !astoreNode || !ificmpne)
8230
return false;
8231
8232
auto ptr = astoreNode->getChild(0)->getChild(0);
8233
auto increment = astoreNode->getChild(0)->getChild(1)->getConst<int32_t>();
8234
8235
TR::Node *iconst=NULL, *conv=NULL;
8236
if (ificmpne->getChild(0)->getOpCodeValue() == TR::iconst)
8237
{
8238
iconst = ificmpne->getChild(0);
8239
conv = ificmpne->getChild(1);
8240
}
8241
else if (ificmpne->getChild(1)->getOpCodeValue() == TR::iconst)
8242
{
8243
iconst = ificmpne->getChild(1);
8244
conv = ificmpne->getChild(0);
8245
}
8246
8247
if (trace) traceMsg(comp, "Failed one of the requirements\n");
8248
return false;
8249
}
8250
8251
/*********************************************************************************************
8252
* Catch very simple case of strlen16
8253
n170n BBStart <block_30> (freq 1682) (in loop 30)
8254
n177n astore <auto slot 14>[id=384:"pszTmp"] [#65 Auto] [flags 0x7 0x0 ]
8255
n176n aladd (X>=0 internalPtr sharedMemory )
8256
n172n aload <auto slot 14>[id=384:"pszTmp"] [#65 Auto] [flags 0x7 0x0 ]
8257
n175n lconst 2 (highWordZero X!=0 X>=0 )
8258
n185n ificmpne --> block_30 BBStart at n170n ()
8259
n184n su2i (X>=0 )
8260
n181n sloadi <refined-array-shadow>[id=185:"(unsigned short)"] [#61 Shadow]
8261
n176n ==>aladd
8262
n183n iconst 0 (X==0 X>=0 X<=0 )
8263
n179n BBEnd </block_30> =====
8264
*/
8265
8266
TR_PCISCGraph *
8267
makeStrlen16Graph(TR::Compilation *c, int32_t ctrl)
8268
{
8269
auto tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "Strlen16", 0, 10);
8270
auto entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 9, 1, 0);
8271
tgt->addNode(entry);
8272
8273
auto ptr = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::Address, tgt->incNumNodes(), 8, 0, 0, 0);
8274
tgt->addNode(ptr);
8275
auto increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lconst, TR::Int64, tgt->incNumNodes(), 7, 0, 0, 2);
8276
tgt->addNode(increment);
8277
auto addrAdd = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::aladd, TR::Address, tgt->incNumNodes(), 6, 1, 2, entry, ptr, increment);
8278
tgt->addNode(addrAdd);
8279
auto addrStore = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::astore, TR::Address, tgt->incNumNodes(), 5, 1, 2, addrAdd, addrAdd, ptr);
8280
tgt->addNode(addrStore);
8281
8282
auto Load = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indload, TR::Int16, tgt->incNumNodes(), 4, 1, 1, addrStore, ptr);
8283
Load->addHint(addrAdd);
8284
tgt->addNode(Load);
8285
auto conversion = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::Int32, tgt->incNumNodes(), 3, 1, 1, Load, Load);
8286
tgt->addNode(conversion);
8287
auto nullChar = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 2, 0, 0, 0);
8288
tgt->addNode(nullChar);
8289
auto loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::Int32, tgt->incNumNodes(), 1, 2, 2, conversion, conversion, nullChar);
8290
tgt->addNode(loopTest);
8291
8292
auto exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0);
8293
tgt->addNode(exit);
8294
8295
loopTest->setSuccs(entry->getSucc(0), exit);
8296
loopTest->setIsChildDirectlyConnected();
8297
8298
tgt->setEntryNode(entry);
8299
tgt->setExitNode(exit);
8300
tgt->setImportantNodes(addrStore, loopTest);
8301
tgt->setNumDagIds(10);
8302
tgt->createInternalData(1);
8303
8304
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
8305
tgt->setTransformer(CISCTransform2Strlen16);
8306
//tgt->setAspects(storeMasks); // not sure which to set, but do want astore aload for ptr incr and any size ptr deref store
8307
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
8308
tgt->setMinCounts(1, 1, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount
8309
tgt->setHotness(warm, false);
8310
tgt->setInhibitBeforeVersioning();
8311
return tgt;
8312
}
8313
8314
8315
/****************************************************************************************
8316
* Corresponding C-like pseudocode with loop with pointer increment
8317
* T* start = arr; // char, short, int long ptr. Array can be any type container
8318
* T* end = arr + size // arr
8319
* while(start < end)
8320
* *start++ = 0;
8321
*
8322
n16n BBStart <block_5> (freq 10000) (in loop 5) [0x00000000823bf380]
8323
n22n astore <auto slot 2>[id=3:"start"] [#49 Auto] [flags 0x7 0x0 ] [0x00000000823bf590]
8324
n21n aladd (internalPtr sharedMemory ) [0x00000000823bf538]
8325
n18n aload <auto slot 2>[id=3:"start"] [#49 Auto] [flags 0x7 0x0 ] (X>=0 sharedMemory ) [0x00000000823b
8326
n20n lconst 1 (highWordZero X!=0 X>=0 ) [0x00000000823bf4e0]
8327
n26n bstorei <refined-array-shadow>[id=7:"(char)"] [#51 Shadow] [flags 0x80000601 0x0 ] [0x00000000823bf6f0
8328
n18n ==>aload
8329
n25n bconst 0 (Unsigned X==0 X>=0 X<=0 ) [0x00000000823bf698]
8330
n31n ifacmpne --> block_5 BBStart at n16n () [0x00000000823bf8a8]
8331
n21n ==>aladd
8332
n30n aload <auto slot 0>[id=5:"end"] [#50 Auto] [flags 0x7 0x0 ] [0x00000000823bf850]
8333
n28n BBEnd </block_5> ===== [0x00000000823bf7a0]
8334
*
8335
*/
8336
TR_PCISCGraph *
8337
makePtrArraySetGraph(TR::Compilation *c, int32_t ctrl)
8338
{
8339
bool is64bit = c->target().is64Bit();
8340
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "PtrArraySet", 0, 10);
8341
/****************************************************************************** opc id dagId #cfg #child other/pred/children */
8342
auto entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 9, 1, 0);
8343
tgt->addNode(entry);
8344
auto ptr = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::Address, tgt->incNumNodes(), 8, 0, 0, 0);
8345
tgt->addNode(ptr);
8346
auto increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lconst, TR::Int64, tgt->incNumNodes(), 7, 0, 0);
8347
tgt->addNode(increment);
8348
auto addrAdd = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), is64bit ? TR::aladd : TR::aiadd, TR::Address, tgt->incNumNodes(), 6, 1, 2, entry, ptr, increment);
8349
tgt->addNode(addrAdd);
8350
auto addrStore = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::astore, TR::Address, tgt->incNumNodes(), 5, 1, 2, addrAdd, addrAdd, ptr);
8351
tgt->addNode(addrStore);
8352
auto value = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variableORconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0);
8353
tgt->addNode(value); // set value
8354
auto Store = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indstore, TR::NoType, tgt->incNumNodes(), 3, 1, 2, addrStore, ptr, value);
8355
tgt->addNode(Store);
8356
auto endPtr = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variableORconst, TR::Address, tgt->incNumNodes(), 2, 0, 0, 0);
8357
tgt->addNode(endPtr);
8358
auto loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::Address, tgt->incNumNodes(), 1, 2, 2, Store, ptr, endPtr);
8359
tgt->addNode(loopTest);
8360
auto exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0);
8361
tgt->addNode(exit);
8362
8363
loopTest->setSuccs(entry->getSucc(0), exit);
8364
loopTest->setIsChildDirectlyConnected();
8365
8366
tgt->setEntryNode(entry);
8367
tgt->setExitNode(exit);
8368
tgt->setImportantNodes(addrStore, Store, loopTest);
8369
tgt->setNumDagIds(10);
8370
tgt->createInternalData(1);
8371
8372
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
8373
tgt->setTransformer(CISCTransform2PtrArraySet);
8374
//tgt->setAspects(storeMasks); // not sure which to set, but do want astore aload for ptr incr and any size ptr deref store
8375
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
8376
tgt->setMinCounts(1, 0, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
8377
tgt->setHotness(warm, false);
8378
tgt->setInhibitBeforeVersioning();
8379
return tgt;
8380
}
8381
8382
/****************************************************************************************
8383
Corresponding Java-like pseudocode
8384
int i, end, value;
8385
Array[ ]; // char, int, float, long, and so on
8386
while(true){
8387
Array[i] = value;
8388
iaddORisub(i, -1)
8389
ifcmpall(i, end) break;
8390
}
8391
8392
Note 1: This idiom matches both count up and down loops.
8393
Note 2: The wildcard node iaddORisub matches iadd or isub.
8394
Note 3: The wildcard node ifcmpall matches all types of if-instructions.
8395
****************************************************************************************/
8396
TR_PCISCGraph *
8397
makeMemSetGraph(TR::Compilation *c, int32_t ctrl)
8398
{
8399
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemSet", 0, 16);
8400
/****************************************************************************** opc id dagId #cfg #child other/pred/children */
8401
TR_PCISCNode *iv = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(),11, 0, 0, 0); tgt->addNode(iv); // array index
8402
TR_PCISCNode *iv1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(),10, 0, 0, 1); tgt->addNode(iv1); // array index
8403
TR_PCISCNode *Array = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(),9, 0, 0, 0); // array base
8404
tgt->addNode(Array);
8405
TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),8, 0, 0); tgt->addNode(end); // length
8406
// if cg only supports arrayset to zero only match constant nodes
8407
TR_PCISCNode *value = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), c->cg()->getSupportsArraySet() ? TR_variableORconst : TR_allconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0); tgt->addNode(value); // set value
8408
TR_PCISCNode *mulConst = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0);
8409
tgt->addNode(mulConst); // Multiplicative factor for index into non-byte arrays
8410
TR_PCISCNode *idx0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),5, 0, 0, 0); tgt->addNode(idx0);
8411
TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0, 0); tgt->addNode(aHeader); // array header
8412
TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(increment);
8413
TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(),2, 1, 0); tgt->addNode(entry);
8414
TR_PCISCNode *Addr = createIdiomArrayAddressInLoop(tgt, ctrl, 1, entry, Array, idx0, aHeader, mulConst);
8415
TR_PCISCNode *i2x = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, Addr, value); tgt->addNode(i2x);
8416
TR_PCISCNode *Store = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indstore, TR::NoType, tgt->incNumNodes(), 1, 1, 2, i2x, Addr, i2x);
8417
tgt->addNode(Store);
8418
TR_PCISCNode *ivStore = createIdiomIOP2VarInLoop(tgt, ctrl, 1, Store, TR_iaddORisub, iv, increment);
8419
TR_PCISCNode *iv1Store = createIdiomIOP2VarInLoop(tgt, ctrl, 1, ivStore, TR_iaddORisub, iv1, increment);
8420
TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, iv1Store, iv, end);
8421
tgt->addNode(loopTest);
8422
TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);
8423
8424
loopTest->setSuccs(entry->getSucc(0), exit);
8425
loopTest->setIsChildDirectlyConnected();
8426
8427
i2x->setIsOptionalNode();
8428
i2x->setIsChildDirectlyConnected();
8429
8430
tgt->setEntryNode(entry);
8431
tgt->setExitNode(exit);
8432
tgt->setImportantNodes(Store, ivStore, iv1Store, loopTest);
8433
tgt->setNumDagIds(12);
8434
tgt->createInternalData(1);
8435
8436
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
8437
tgt->setTransformer(CISCTransform2ArraySet);
8438
tgt->setAspects(mul, 0, existAccess);
8439
tgt->setNoAspects(call|bndchk|bitop1, 0, 0);
8440
tgt->setMinCounts(1, 0, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
8441
tgt->setHotness(warm, false);
8442
tgt->setInhibitBeforeVersioning();
8443
return tgt;
8444
}
8445
8446
//////////////////////////////////////////////////////////////////////////
8447
//////////////////////////////////////////////////////////////////////////
8448
//////////////////////////////////////////////////////////////////////////
8449
8450
//*****************************************************************************************
8451
// IL code generation for filling memory
8452
// Input: ImportantNode(0) - non-byte array store
8453
// ImportantNode(1) - byte array store
8454
// ImportantNode(2) - iadd or isub for induction variable
8455
// ImportantNode(3) - exit if
8456
// ImportantNode(4) - the size of elements
8457
//*****************************************************************************************
8458
bool
8459
CISCTransform2MixedArraySet(TR_CISCTransformer *trans)
8460
{
8461
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
8462
TR::Node *trNode;
8463
TR::TreeTop *trTreeTop;
8464
TR::Block *block;
8465
TR_CISCGraph *P = trans->getP();
8466
List<TR_CISCNode> *P2T = trans->getP2T();
8467
TR::Compilation *comp = trans->comp();
8468
bool ctrl = trans->isGenerateI2L();
8469
8470
TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");
8471
8472
TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");
8473
if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;
8474
8475
trans->findFirstNode(&trTreeTop, &trNode, &block);
8476
if (!block) return false; // cannot find
8477
8478
if (isLoopPreheaderLastBlockInMethod(comp, block))
8479
{
8480
traceMsg(comp, "Bailing CISCTransform2MixedArraySet due to null TT - might be a preheader in last block of method\n");
8481
return false;
8482
}
8483
8484
TR::Block *target = trans->analyzeSuccessorBlock();
8485
// Currently, it allows only a single successor.
8486
if (!target) return false;
8487
8488
TR_CISCNode *addORsubCISCNode = trans->getP2TRepInLoop(P->getImportantNode(2));
8489
TR_CISCNode *cmpIfAllCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(3));
8490
if (!cmpIfAllCISCNode) return false;
8491
TR_ASSERT(addORsubCISCNode->getOpcode() == TR::isub || addORsubCISCNode->getOpcode() == TR::iadd, "error");
8492
8493
// Check which count-up or count-down loop
8494
bool isIncrement = (addORsubCISCNode->getOpcode() == TR::isub);
8495
int lengthMod = 0;
8496
switch(cmpIfAllCISCNode->getOpcode())
8497
{
8498
case TR::ificmpgt:
8499
lengthMod = 1;
8500
// fallthrough
8501
case TR::ificmpge:
8502
if (!isIncrement) return false;
8503
break;
8504
case TR::ificmple:
8505
lengthMod = -1;
8506
// fallthrough
8507
case TR::ificmplt:
8508
if (isIncrement) return false;
8509
break;
8510
default:
8511
return false;
8512
}
8513
8514
List<TR::Node> listStores(comp->trMemory());
8515
ListAppender<TR::Node> appenderListStores(&listStores);
8516
ListIterator<TR_CISCNode> ni(trans->getP2T() + P->getImportantNode(0)->getID());
8517
TR_CISCNode *inStoreCISCNode;
8518
for (inStoreCISCNode = ni.getFirst(); inStoreCISCNode; inStoreCISCNode = ni.getNext())
8519
{
8520
if (!inStoreCISCNode->isOutsideOfLoop())
8521
appenderListStores.add(inStoreCISCNode->getHeadOfTrNodeInfo()->_node);
8522
}
8523
ni.set(trans->getP2T() + P->getImportantNode(1)->getID());
8524
for (inStoreCISCNode = ni.getFirst(); inStoreCISCNode; inStoreCISCNode = ni.getNext())
8525
{
8526
if (!inStoreCISCNode->isOutsideOfLoop())
8527
appenderListStores.add(inStoreCISCNode->getHeadOfTrNodeInfo()->_node);
8528
}
8529
if (listStores.isEmpty()) return false;
8530
8531
TR::Node *indexRepNode, *variableORconstRepNode1;
8532
getP2TTrRepNodes(trans, &indexRepNode, &variableORconstRepNode1);
8533
TR::SymbolReference * indexVarSymRef = indexRepNode->getSymbolReference();
8534
if (trans->countGoodArrayIndex(indexVarSymRef) == 0) return false;
8535
8536
//
8537
// analyze each store
8538
//
8539
ListIterator<TR::Node> iteratorStores(&listStores);
8540
TR::Node *inStoreNode;
8541
TR::Node * indexNode = createLoad(indexRepNode);
8542
8543
// check if the induction variable
8544
// is being stored into the array
8545
for (inStoreNode = iteratorStores.getFirst(); inStoreNode; inStoreNode = iteratorStores.getNext())
8546
{
8547
TR::Node * valueNode = inStoreNode->getChild(1);
8548
if (valueNode->getOpCode().isLoadDirect() && valueNode->getOpCode().hasSymbolReference())
8549
{
8550
if (valueNode->getSymbolReference()->getReferenceNumber() == indexNode->getSymbolReference()->getReferenceNumber())
8551
{
8552
dumpOptDetails(comp, "arraystore tree has induction variable on rhs\n");
8553
return false;
8554
}
8555
}
8556
}
8557
8558
List<TR::Node> listArraySet(comp->trMemory());
8559
TR::Node * computeIndex = NULL;
8560
for (inStoreNode = iteratorStores.getFirst(); inStoreNode; inStoreNode = iteratorStores.getNext())
8561
{
8562
TR::Node * outputNode = inStoreNode->getChild(0)->duplicateTree();
8563
TR::Node * valueNode = convertStoreToLoad(comp, inStoreNode->getChild(1));
8564
int elementSize = inStoreNode->getSize();
8565
8566
TR::Node * lengthNode;
8567
if (!isIncrement) // count-down loop
8568
{
8569
// exit variable is zero or not
8570
bool isInitOffset0 = (variableORconstRepNode1->getOpCodeValue() == TR::iconst && (variableORconstRepNode1->getInt()-lengthMod) == 0);
8571
bool done = false;
8572
TR::Node * constm1 = TR::Node::create(indexRepNode, TR::iconst, 0, -1);
8573
TR::Node * lastValue = NULL;
8574
if (isInitOffset0)
8575
{
8576
// When the array index is zero, it will modify the address computation to "base + size of header".
8577
TR::Node *arrayheader = outputNode->getSecondChild()->getSecondChild();
8578
switch (outputNode->getSecondChild()->getOpCodeValue())
8579
{
8580
case TR::iadd:
8581
case TR::ladd:
8582
outputNode->setSecond(arrayheader);
8583
done = true;
8584
break;
8585
case TR::isub:
8586
if (arrayheader->getOpCodeValue() == TR::iconst)
8587
{
8588
arrayheader->setInt(-arrayheader->getInt());
8589
outputNode->setSecond(arrayheader);
8590
done = true;
8591
}
8592
break;
8593
case TR::lsub:
8594
if (arrayheader->getOpCodeValue() == TR::lconst)
8595
{
8596
arrayheader->setLongInt(-arrayheader->getLongInt());
8597
outputNode->setSecond(arrayheader);
8598
done = true;
8599
}
8600
break;
8601
default:
8602
break;
8603
}
8604
lengthNode = indexNode;
8605
computeIndex = constm1;
8606
}
8607
else
8608
{
8609
lastValue = convertStoreToLoad(comp, variableORconstRepNode1);
8610
if (lengthMod)
8611
{
8612
lastValue = createOP2(comp, TR::isub,
8613
lastValue,
8614
TR::Node::create(indexNode, TR::iconst, 0, lengthMod));
8615
}
8616
lengthNode = createOP2(comp, TR::isub, indexNode, lastValue);
8617
computeIndex = createOP2(comp, TR::iadd, lastValue, constm1);
8618
}
8619
lengthNode = createOP2(comp, TR::isub, lengthNode, TR::Node::create(indexNode, TR::iconst, 0, -(lengthMod+1)));
8620
8621
if (!done)
8622
{
8623
if (!lastValue) lastValue = convertStoreToLoad(comp, variableORconstRepNode1);
8624
TR::Node *termNode = createI2LIfNecessary(comp, ctrl, lastValue);
8625
TR::Node *mulNode = outputNode->getSecondChild()->getFirstChild();
8626
mulNode = mulNode->skipConversions();
8627
if (elementSize > 1)
8628
mulNode->setAndIncChild(0, termNode);
8629
else
8630
outputNode->getSecondChild()->setAndIncChild(0, termNode);
8631
}
8632
}
8633
else // count-up loop
8634
{
8635
TR::Node * lastValue = convertStoreToLoad(comp, variableORconstRepNode1);
8636
if (lengthMod)
8637
{
8638
lastValue = createOP2(comp, TR::iadd,
8639
lastValue,
8640
TR::Node::create(indexNode, TR::iconst, 0, lengthMod));
8641
}
8642
lengthNode = createOP2(comp, TR::isub, lastValue, indexNode);
8643
computeIndex = lastValue;
8644
}
8645
8646
if (elementSize > 1)
8647
lengthNode = TR::Node::create(TR::imul, 2,
8648
lengthNode,
8649
TR::Node::create(inStoreNode, TR::iconst, 0, elementSize));
8650
8651
lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode);
8652
8653
TR::Node * arrayset = TR::Node::create(TR::arrayset, 3, outputNode, valueNode, lengthNode);
8654
arrayset->setSymbolReference(comp->getSymRefTab()->findOrCreateArraySetSymbol());
8655
8656
listArraySet.add(TR::Node::create(TR::treetop, 1, arrayset));
8657
}
8658
8659
TR::Node * indVarUpdateNode = TR::Node::createStore(indexVarSymRef, computeIndex);
8660
TR::TreeTop * indVarUpdateTreeTop = TR::TreeTop::create(comp, indVarUpdateNode);
8661
8662
// Insert nodes and maintain the CFG
8663
TR::TreeTop *last;
8664
ListIterator<TR::Node> iteratorArraySet(&listArraySet);
8665
TR::Node *arrayset;
8666
last = trans->removeAllNodes(trTreeTop, block->getExit());
8667
last->join(block->getExit());
8668
block = trans->insertBeforeNodes(block);
8669
last = block->getLastRealTreeTop();
8670
for (arrayset = iteratorArraySet.getFirst(); arrayset; arrayset = iteratorArraySet.getNext())
8671
{
8672
TR::TreeTop *newTop = TR::TreeTop::create(comp, arrayset);
8673
last->join(newTop);
8674
last = newTop;
8675
}
8676
last->join(indVarUpdateTreeTop);
8677
indVarUpdateTreeTop->join(block->getExit());
8678
8679
trans->insertAfterNodes(block);
8680
8681
trans->setSuccessorEdge(block, target);
8682
return true;
8683
}
8684
8685
/****************************************************************************************
8686
Corresponding Java-like pseudocode
8687
int i, end, value;
8688
byte byteArray[ ];
8689
Array[ ]; // char, int, float, long, and so on
8690
while(true){
8691
Array[i] = value1;
8692
byteArray[i] = value2;
8693
iaddORisub(i, -1)
8694
ifcmpall(i, end) break;
8695
}
8696
8697
Note 1: This idiom matches both count up and down loops.
8698
Note 2: The wildcard node iaddORisub matches iadd or isub.
8699
Note 3: The wildcard node ifcmpall matches all types of if-instructions.
8700
****************************************************************************************/
8701
TR_PCISCGraph *
8702
makeMixedMemSetGraph(TR::Compilation *c, int32_t ctrl)
8703
{
8704
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MixedMemSet", 0, 16);
8705
/************************************ opc id dagId #cfg #child other/pred/children */
8706
TR_PCISCNode *iv = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(),12, 0, 0, 0); tgt->addNode(iv); // array index
8707
TR_PCISCNode *end = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),11, 0, 0); tgt->addNode(end); // length
8708
TR_PCISCNode *Array = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(),10, 0, 0, 0); // array base
8709
tgt->addNode(Array);
8710
TR_PCISCNode *byteArray = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 1); // array base
8711
tgt->addNode(byteArray);
8712
TR_PCISCNode *value1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variableORconst, TR::NoType, tgt->incNumNodes(), 8, 0, 0); tgt->addNode(value1); // set value
8713
TR_PCISCNode *value2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variableORconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0); tgt->addNode(value2); // set value
8714
TR_PCISCNode *mulConst = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0);
8715
tgt->addNode(mulConst); // Multiplicative factor for index into non-byte arrays
8716
TR_PCISCNode *idx0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(),5, 0, 0, 0); tgt->addNode(idx0);
8717
TR_PCISCNode *aHeader = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0, 0); tgt->addNode(aHeader); // array header
8718
TR_PCISCNode *increment = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(increment);
8719
TR_PCISCNode *c1 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 3, 1); // element size
8720
TR_PCISCNode *entry = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(),2, 1, 0); tgt->addNode(entry);
8721
TR_PCISCNode *Addr = createIdiomArrayAddressInLoop(tgt, ctrl, 1, entry, Array, idx0, aHeader, mulConst);
8722
TR_PCISCNode *i2x = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, Addr, value1); tgt->addNode(i2x);
8723
TR_PCISCNode *Store = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_inbstore, TR::NoType, tgt->incNumNodes(), 1, 1, 2, i2x, Addr, i2x);
8724
tgt->addNode(Store);
8725
TR_PCISCNode *byteAddr = createIdiomArrayAddressInLoop(tgt, ctrl, 1, Store, byteArray, idx0, aHeader, c1);
8726
TR_PCISCNode *i2b = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::i2b, TR::Int8, tgt->incNumNodes(), 1, 1, 1, byteAddr, value2); tgt->addNode(i2b);
8727
TR_PCISCNode *byteStore =new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::bstorei, TR::Int8, tgt->incNumNodes(), 1, 1, 2, i2b, byteAddr, i2b);
8728
tgt->addNode(byteStore);
8729
TR_PCISCNode *ivStore = createIdiomIOP2VarInLoop(tgt, ctrl, 1, byteStore, TR_iaddORisub, iv, increment);
8730
TR_PCISCNode *loopTest = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, ivStore, iv, end);
8731
tgt->addNode(loopTest);
8732
TR_PCISCNode *exit = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(exit);
8733
8734
loopTest->setSuccs(entry->getSucc(0), exit);
8735
loopTest->setIsChildDirectlyConnected();
8736
8737
i2x->setIsOptionalNode();
8738
i2x->setIsChildDirectlyConnected();
8739
i2b->setIsOptionalNode();
8740
i2b->setIsChildDirectlyConnected();
8741
8742
tgt->setEntryNode(entry);
8743
tgt->setExitNode(exit);
8744
tgt->setImportantNodes(Store, byteStore, ivStore->getChild(0), loopTest, mulConst);
8745
tgt->setNumDagIds(13);
8746
tgt->createInternalData(1);
8747
8748
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
8749
tgt->setTransformer(CISCTransform2MixedArraySet);
8750
tgt->setAspects(mul, 0, existAccess);
8751
tgt->setNoAspects(call|bndchk|bitop1, ILTypeProp::Size_2, 0);
8752
tgt->setMinCounts(1, 0, 2); // minimum ifCount, indirectLoadCount, indirectStoreCount
8753
tgt->setHotness(warm, false);
8754
tgt->setInhibitBeforeVersioning();
8755
return tgt;
8756
}
8757
8758
8759
8760
8761
//////////////////////////////////////////////////////////////////////////
8762
//////////////////////////////////////////////////////////////////////////
8763
//*****************************************************************************************
8764
// IL code generation for 2 if-statement version of comparing memory (using CLCL)
8765
// Input: ImportantNode(0) - array load for src1
8766
// ImportantNode(1) - array load for src2
8767
// ImportantNode(2) - exit-if for checking the length
8768
// ImportantNode(3) - exit-if for comparing two arrays
8769
// ImportantNode(4) - increment the array index for src1
8770
// ImportantNode(5) - increment the array index for src2
8771
// ImportantNode(6) - the size of elements (NULL for byte arrays)
8772
//
8773
// Note: If we need to know the position where characters are different (flag generateArraycmplen),
8774
// we generate the CLCL instruction. Otherwise, we generate the CLC instruction.
8775
//*****************************************************************************************
8776
bool
8777
CISCTransform2ArrayCmp2Ifs(TR_CISCTransformer *trans)
8778
{
8779
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
8780
TR::Node *trNode;
8781
TR::TreeTop *trTreeTop;
8782
TR::Block *block;
8783
TR_CISCGraph *P = trans->getP();
8784
List<TR_CISCNode> *P2T = trans->getP2T();
8785
TR::Compilation *comp = trans->comp();
8786
bool ctrl = trans->isGenerateI2L();
8787
8788
TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");
8789
8790
trans->findFirstNode(&trTreeTop, &trNode, &block);
8791
if (!block) return false; // cannot find
8792
8793
if (isLoopPreheaderLastBlockInMethod(comp, block))
8794
{
8795
traceMsg(comp, "Bailing CISCTransform2ArrayCmp2Ifs due to null TT - might be a preheader in last block of method\n");
8796
return false;
8797
}
8798
8799
TR_CISCNode *cmpIfAllCISCNode = trans->getP2TRepInLoop(P->getImportantNode(2));
8800
TR::TreeTop *okDest = NULL; // Target treetop of the array length check.
8801
TR_CISCNode *cmpneIfAllCISCNode[2]; // CISCNodes for the arraycmp checks.
8802
TR::TreeTop *topCmpIfNonEqual[2]; // Treetops of the arraycmp checks.
8803
TR::Node *cmpIfNonEqual[2]; // Nodes of the arraycmp checks.
8804
TR::TreeTop *failDest[2]; // Target treetops of the arraycmp checks.
8805
8806
int32_t count = 0;
8807
// Extract all the CISCNodes corresponding to the two exit if-stmts
8808
// for comparing the two arrays.
8809
ListIterator <TR_CISCNode> ci;
8810
ci.set(trans->getP2T() + P->getImportantNode(3)->getID());
8811
for (TR_CISCNode *c = ci.getFirst(); c; c = ci.getNext())
8812
{
8813
if (!c->isOutsideOfLoop())
8814
{
8815
// Checks exit-if for comparing two arrays
8816
switch(c->getOpcode())
8817
{
8818
case TR::ificmpgt:
8819
case TR::ificmplt:
8820
case TR::iflcmpgt:
8821
case TR::iflcmplt:
8822
if (count >= 2) return false;
8823
cmpneIfAllCISCNode[count] = c;
8824
topCmpIfNonEqual[count] = c->getHeadOfTrNodeInfo()->_treeTop;
8825
cmpIfNonEqual[count] = c->getHeadOfTrNodeInfo()->_node;
8826
failDest[count] = c->getDestination();
8827
count++;
8828
break;
8829
default:
8830
return false;
8831
}
8832
}
8833
}
8834
if (count != 2) return false;
8835
8836
// Checks exit-if for checking the length
8837
switch(cmpIfAllCISCNode->getOpcode())
8838
{
8839
case TR::ificmpge:
8840
break;
8841
default:
8842
return false;
8843
}
8844
okDest = cmpIfAllCISCNode->getDestination();
8845
8846
//
8847
// obtain a CISCNode of each store for incrementing induction variables
8848
TR_CISCNode *storeSrc1 = trans->getP2TRepInLoop(P->getImportantNode(4));
8849
TR_CISCNode *storeSrc2 = trans->getP2TRepInLoop(P->getImportantNode(5));
8850
if (!storeSrc2) storeSrc2 = storeSrc1;
8851
TR_ASSERT(storeSrc1 != NULL && storeSrc2 != NULL, "error");
8852
8853
//
8854
// checking a set of all uses for each index
8855
TR_ASSERT(storeSrc1->getDagID() == storeSrc2->getDagID(), "error");
8856
8857
TR_CISCNode *src1CISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(0));
8858
TR_CISCNode *src2CISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(1));
8859
if (!src1CISCNode || !src2CISCNode) return false;
8860
TR::Node * inSrc1Node = src1CISCNode->getHeadOfTrNodeInfo()->_node;
8861
TR::Node * inSrc2Node = src2CISCNode->getHeadOfTrNodeInfo()->_node;
8862
// check the indices used in the array loads and
8863
// the store nodes
8864
//
8865
if (!indicesAndStoresAreConsistent(comp, inSrc1Node, inSrc2Node, storeSrc1, storeSrc2))
8866
{
8867
dumpOptDetails(comp, "indices used in array loads %p and %p are not consistent with the induction varaible updates\n", inSrc1Node, inSrc2Node);
8868
return false;
8869
}
8870
TR::Node * mulFactorNode;
8871
int32_t elementSize;
8872
8873
// Get the size of elements
8874
if (!getMultiplier(trans, P->getImportantNode(6), &mulFactorNode, &elementSize, inSrc1Node->getType())) return false;
8875
if (elementSize != inSrc1Node->getSize() || elementSize != inSrc2Node->getSize())
8876
{
8877
traceMsg(comp, "CISCTransform2ArrayCmp2Ifs failed - Size Mismatch. Element Size: %d InSrc1Size: %d inSrc2Size: %d\n", elementSize, inSrc1Node->getSize(), inSrc2Node->getSize());
8878
return false; // Size is mismatch!
8879
}
8880
8881
TR::Node *src1IdxRepNode, *src2IdxRepNode, *src1BaseRepNode, *src2BaseRepNode, *variableORconstRepNode;
8882
getP2TTrRepNodes(trans, &src1IdxRepNode, &src2IdxRepNode, &src1BaseRepNode, &src2BaseRepNode, &variableORconstRepNode);
8883
if (!src2IdxRepNode) src2IdxRepNode = src1IdxRepNode;
8884
TR::SymbolReference * src1IdxSymRef = src1IdxRepNode->getSymbolReference();
8885
if (!trans->analyzeArrayIndex(src1IdxSymRef)) return false;
8886
TR::SymbolReference * src2IdxSymRef = src2IdxRepNode->getSymbolReference();
8887
TR::Node *start1Idx, *start2Idx, *end1Idx, *end2Idx, *diff2;
8888
TR_CISCNode *arrayindex0, *arrayindex1;
8889
arrayindex0 = trans->getP()->getCISCNode(TR_arrayindex, true, 0);
8890
bool indexOf = trans->isIndexOf();
8891
if (indexOf && arrayindex0)
8892
{
8893
// more analysis for String.indexOf(Ljava/lang/String;I)I
8894
TR_CISCNode *a0;
8895
ListIterator<TR_CISCNode> pi(arrayindex0->getParents());
8896
for (a0 = pi.getFirst(); a0; a0 = pi.getNext())
8897
{
8898
if (a0->getOpcode() == TR::isub)
8899
{
8900
if (trans->getP2TRepInLoop(a0)) arrayindex0 = a0;
8901
break;
8902
}
8903
}
8904
}
8905
arrayindex1 = trans->getP()->getCISCNode(TR_arrayindex, true, 1);
8906
8907
bool useSrc1 = usedInLoopTest(comp, cmpIfAllCISCNode->getHeadOfTrNodeInfo()->_node, src1IdxSymRef);
8908
end2Idx = convertStoreToLoad(comp, variableORconstRepNode);
8909
start2Idx = convertStoreToLoad(comp, useSrc1 ? src1IdxRepNode : src2IdxRepNode);
8910
diff2 = createOP2(comp, TR::isub, end2Idx, start2Idx);
8911
start1Idx = convertStoreToLoad(comp, src1IdxRepNode);
8912
end1Idx = NULL;
8913
8914
if (arrayindex0) start1Idx = trans->getP2TRep(arrayindex0)->getHeadOfTrNodeInfo()->_node;
8915
if (arrayindex1) start2Idx = trans->getP2TRep(arrayindex1)->getHeadOfTrNodeInfo()->_node;
8916
8917
// Prepare effective addresses for arraycmplen
8918
TR::Node * input1Node = inSrc1Node->getChild(0)->duplicateTree();
8919
TR::Node * input2Node = inSrc2Node->getChild(0)->duplicateTree();
8920
TR::Node * lengthNode;
8921
lengthNode = diff2;
8922
8923
// an extra compare is going to be generated after the arrayCmp
8924
// to determine where to branch. if the arrayCmp found a mismatch
8925
// between the two array elements, the induction variable will be
8926
// updated correctly and the extra compare will test the element
8927
// at the index where the mismatch occurred.
8928
// however, if the two arrays are the same, the arrayCmp will terminate
8929
// after searching lengthNode bytes causing the extra compare to test
8930
// the index at lengthNode+1 which is incorrect.
8931
//
8932
lengthNode = TR::Node::create(TR::isub, 2, lengthNode, TR::Node::create(mulFactorNode, TR::iconst, 0, 1));
8933
8934
int shrCount = 0;
8935
TR::Node * elementSizeNode = NULL;
8936
if (elementSize > 1)
8937
{
8938
//FIXME: enable this code for 64-bit
8939
// currently disabled until all uses of lengthNode are
8940
// sign-extended correctly
8941
//
8942
TR::ILOpCodes mulOp = TR::imul;
8943
#if 0
8944
if (comp->target().is64Bit())
8945
{
8946
elementSizeNode = TR::Node::create(mulFactorNode, TR::lconst);
8947
elementSizeNode->setLongInt(elementSize);
8948
lengthNode = TR::Node::create(TR::i2l, 1, lengthNode);
8949
mulOp = TR::lmul;
8950
}
8951
else
8952
#endif
8953
elementSizeNode = TR::Node::create(mulFactorNode, TR::iconst, 0, elementSize);
8954
lengthNode = TR::Node::create(mulOp, 2,
8955
lengthNode,
8956
elementSizeNode);
8957
switch(elementSize)
8958
{
8959
case 2: shrCount = 1; break;
8960
case 4: shrCount = 2; break;
8961
case 8: shrCount = 3; break;
8962
default: TR_ASSERT(false, "error");
8963
}
8964
}
8965
8966
// Currently, it is inserted by reorderTargetNodesInBB()
8967
bool isCompensateCode = !trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1);
8968
TR::Block *compensateBlock0[2];
8969
compensateBlock0[0] = compensateBlock0[1] = NULL;
8970
TR::Block *compensateBlock1 = NULL;
8971
8972
// create two empty blocks for inserting compensation code prepared by reorderTargetNodesInBB()
8973
if (isCompensateCode)
8974
{
8975
compensateBlock0[0] = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
8976
compensateBlock0[1] = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
8977
compensateBlock1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
8978
compensateBlock0[0]->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest[0])));
8979
compensateBlock0[1]->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest[1])));
8980
compensateBlock1->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, okDest)));
8981
failDest[0] = compensateBlock0[0]->getEntry();
8982
failDest[1] = compensateBlock0[1]->getEntry();
8983
okDest = compensateBlock1->getEntry();
8984
}
8985
TR_ASSERT(okDest != NULL && failDest[0] != NULL && failDest[1] != NULL, "error!");
8986
8987
TR::Node * topArraycmp;
8988
TR::TreeTop * newFirstTreeTop[2];
8989
TR::TreeTop * newLastTreeTop[2];
8990
8991
// Using the CLCL instruction
8992
lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode);
8993
TR::Node * arraycmplen = TR::Node::create(TR::arraycmp, 3, input1Node, input2Node, lengthNode);
8994
arraycmplen->setArrayCmpLen(true);
8995
arraycmplen->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCmpSymbol());
8996
8997
TR::SymbolReference * resultSymRef = comp->getSymRefTab()->
8998
createTemporary(comp->getMethodSymbol(), TR::Int32);
8999
topArraycmp = TR::Node::createStore(resultSymRef, arraycmplen);
9000
9001
TR::Node * resultLoad = TR::Node::createLoad(topArraycmp, resultSymRef);
9002
TR::Node * equalLen = resultLoad;
9003
if (shrCount != 0)
9004
{
9005
equalLen = TR::Node::create(TR::ishr, 2,
9006
equalLen,
9007
TR::Node::create(equalLen, TR::iconst, 0, shrCount));
9008
}
9009
9010
TR::Node *tmpNode = createStoreOP2(comp, src1IdxSymRef, TR::iadd, src1IdxSymRef, equalLen, trNode);
9011
newFirstTreeTop[0] = TR::TreeTop::create(comp, tmpNode);
9012
newLastTreeTop[0] = newFirstTreeTop[0];
9013
TR::TreeTop * tmpTreeTop = NULL;
9014
9015
if (src1IdxSymRef != src2IdxSymRef)
9016
{
9017
tmpNode = createStoreOP2(comp, src2IdxSymRef, TR::iadd, src2IdxSymRef, equalLen, trNode);
9018
tmpTreeTop = TR::TreeTop::create(comp, tmpNode);
9019
newLastTreeTop[0]->join(tmpTreeTop);
9020
newLastTreeTop[0] = tmpTreeTop;
9021
}
9022
9023
//
9024
// Generate 2 if-statements
9025
9026
// First One
9027
TR_CISCNode *ifChild[2];
9028
ifChild[0] = trans->getP2TInLoopAllowOptionalIfSingle(P->getImportantNode(3)->getChild(0));
9029
ifChild[1] = trans->getP2TInLoopAllowOptionalIfSingle(P->getImportantNode(3)->getChild(1));
9030
TR::DataType dataType = cmpIfNonEqual[0]->getChild(0)->getDataType();
9031
TR_ASSERT(dataType == TR::Int32 || dataType == TR::Int64, "error!");
9032
TR::SymbolReference * diffSymRef = comp->getSymRefTab()->
9033
createTemporary(comp->getMethodSymbol(), dataType);
9034
tmpNode = TR::Node::createStore(diffSymRef,
9035
TR::Node::create(dataType == TR::Int32 ? TR::isub : TR::lsub, 2,
9036
ifChild[0]->getHeadOfTrNodeInfo()->_node,
9037
ifChild[1]->getHeadOfTrNodeInfo()->_node));
9038
tmpTreeTop = TR::TreeTop::create(comp, tmpNode);
9039
newLastTreeTop[0]->join(tmpTreeTop);
9040
newLastTreeTop[0] = tmpTreeTop;
9041
TR::Node * loadNode = convertStoreToLoad(comp, tmpNode);
9042
TR::Node * constNode;
9043
if (dataType == TR::Int32)
9044
{
9045
constNode = TR::Node::create(loadNode, TR::iconst, 0, 0);
9046
}
9047
else
9048
{
9049
constNode = TR::Node::create(loadNode, TR::lconst, 0, 0);
9050
constNode->setLongInt(0);
9051
}
9052
9053
tmpNode = TR::Node::createif((TR::ILOpCodes)cmpneIfAllCISCNode[0]->getOpcode(),
9054
loadNode,
9055
constNode,
9056
failDest[0]);
9057
tmpTreeTop = TR::TreeTop::create(comp, tmpNode);
9058
newLastTreeTop[0]->join(tmpTreeTop);
9059
newLastTreeTop[0] = tmpTreeTop;
9060
9061
// Second One
9062
tmpNode = TR::Node::createif((TR::ILOpCodes)cmpneIfAllCISCNode[1]->getOpcode(),
9063
loadNode->duplicateTree(),
9064
constNode->duplicateTree(),
9065
failDest[1]);
9066
tmpTreeTop = TR::TreeTop::create(comp, tmpNode);
9067
newFirstTreeTop[1] = tmpTreeTop;
9068
newLastTreeTop[1] = newFirstTreeTop[1];
9069
9070
// Transform CFG
9071
TR::CFG *cfg = comp->getFlowGraph();
9072
TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();
9073
cfg->setStructure(NULL);
9074
TR::TreeTop *last;
9075
9076
last = trans->removeAllNodes(trTreeTop, block->getExit());
9077
last->join(block->getExit());
9078
block = trans->insertBeforeNodes(block);
9079
last = block->getLastRealTreeTop();
9080
last->join(trTreeTop);
9081
trTreeTop->setNode(topArraycmp);
9082
trTreeTop->join(newFirstTreeTop[0]);
9083
newLastTreeTop[0]->join(block->getExit());
9084
9085
block = trans->insertAfterNodes(block);
9086
9087
TR::Block *if1Block = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
9088
if1Block->getEntry()->join(newFirstTreeTop[1]);
9089
newLastTreeTop[1]->join(if1Block->getExit());
9090
if (orgNextTreeTop != NULL) {
9091
cfg->insertBefore(if1Block, orgNextTreeTop->getNode()->getBlock());
9092
} else {
9093
// Block returned by findFirstNode is the last BB of the method.
9094
cfg->addNode(if1Block);
9095
}
9096
cfg->join(block, if1Block);
9097
9098
trans->setSuccessorEdges(if1Block,
9099
okDest->getEnclosingBlock(),
9100
failDest[1]->getEnclosingBlock());
9101
9102
trans->setSuccessorEdges(block,
9103
if1Block,
9104
failDest[0]->getEnclosingBlock());
9105
9106
block = if1Block;
9107
if (isCompensateCode)
9108
{
9109
TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();
9110
TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();
9111
compensateBlock0[0] = trans->insertAfterNodesIdiom(compensateBlock0[0], 0, true);
9112
compensateBlock0[1] = trans->insertAfterNodesIdiom(compensateBlock0[1], 0, true);
9113
compensateBlock1 = trans->insertAfterNodesIdiom(compensateBlock1, 1, true);
9114
cfg->insertBefore(compensateBlock1, orgNextBlock);
9115
cfg->insertBefore(compensateBlock0[1], compensateBlock1);
9116
cfg->insertBefore(compensateBlock0[0], compensateBlock0[1]);
9117
cfg->join(block, compensateBlock0[0]);
9118
}
9119
9120
return true;
9121
}
9122
9123
9124
9125
//*****************************************************************************************
9126
// IL code generation for comparing memory (using CLC or CLCL)
9127
// Input: ImportantNode(0) - array load for src1
9128
// ImportantNode(1) - array load for src2
9129
// ImportantNode(2) - exit-if for checking the length
9130
// ImportantNode(3) - exit-if for comparing two arrays
9131
// ImportantNode(4) - increment the array index for src1
9132
// ImportantNode(5) - increment the array index for src2
9133
// ImportantNode(6) - the size of elements (NULL for byte arrays)
9134
// ImportantNode(7) - additional node for analyzing MEMCMPCompareTo. Not used for the others.
9135
//
9136
// Note: If we need to know the position where characters are different (flag generateArraycmplen),
9137
// we generate the CLCL instruction. Otherwise, we generate the CLC instruction.
9138
//*****************************************************************************************
9139
bool
9140
CISCTransform2ArrayCmp(TR_CISCTransformer *trans)
9141
{
9142
9143
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
9144
const bool disptrace = DISPTRACE(trans);
9145
TR::Node *trNode;
9146
TR::TreeTop *trTreeTop;
9147
TR::Block *block;
9148
TR_CISCGraph *P = trans->getP();
9149
List<TR_CISCNode> *P2T = trans->getP2T();
9150
TR::Compilation *comp = trans->comp();
9151
bool ctrl = trans->isGenerateI2L();
9152
9153
trans->findFirstNode(&trTreeTop, &trNode, &block);
9154
if (!block) return false; // cannot find
9155
9156
TR::Block *preHeader = NULL;
9157
if (isLoopPreheaderLastBlockInMethod(comp, block, &preHeader))
9158
{
9159
traceMsg(comp, "Bailing CISCTransform2ArrayCmp due to null TT - might be a preheader in last block of method\n");
9160
return false;
9161
}
9162
9163
TR_CISCNode *storeSrc1 = trans->getP2TRepInLoop(P->getImportantNode(4));
9164
TR_CISCNode *storeSrc2 = trans->getP2TRepInLoop(P->getImportantNode(5));
9165
if (!storeSrc2) storeSrc2 = storeSrc1;
9166
TR_ASSERT(storeSrc1 != NULL && storeSrc2 != NULL, "error");
9167
9168
9169
if (preHeader)
9170
{
9171
if (disptrace)
9172
traceMsg(comp, "found preheader to be %d\n", preHeader->getNumber());
9173
//
9174
// obtain a CISCNode of each store for incrementing induction variables
9175
9176
//check if any of the loop indices are defined between the preheader first tree and the first node found to match idiom
9177
TR::Node * inStoreSrc1= storeSrc1->getHeadOfTrNodeInfo()->_node;
9178
TR::Node * inStoreSrc2= storeSrc2->getHeadOfTrNodeInfo()->_node;
9179
9180
int32_t index1SymRefNum = inStoreSrc1->getSymbolReference()->getReferenceNumber();
9181
int32_t index2SymRefNum = inStoreSrc2->getSymbolReference()->getReferenceNumber();
9182
9183
if (disptrace)
9184
traceMsg(comp, "searching for stores to loop indices between preheader first tree %p and first matching tree %p, looking for symrefnum %d %d\n", preHeader->getFirstRealTreeTop()->getNode(),trTreeTop->getNode(),index1SymRefNum,index2SymRefNum);
9185
9186
9187
TR::Node * tempNode;
9188
for (TR::TreeTop * tt = preHeader->getFirstRealTreeTop();tt && tt != trTreeTop; tt = tt->getNextRealTreeTop())
9189
{
9190
tempNode = tt->getNode();
9191
if (tempNode->getOpCode().isStore() && tempNode->getOpCode().hasSymbolReference() &&
9192
((tempNode->getSymbolReference()->getReferenceNumber() == index1SymRefNum) ||
9193
(tempNode->getSymbolReference()->getReferenceNumber() == index2SymRefNum)))
9194
{
9195
traceMsg(comp, "Bailing CISCTransform2ArrayCmp due to unexpected store (%p) of one of the indices prior to the idiom\n",tempNode);
9196
return false;
9197
}
9198
}
9199
}
9200
9201
TR_CISCNode *cmpIfAllCISCNode = trans->getP2TRepInLoop(P->getImportantNode(2));
9202
TR_CISCNode *cmpneIfAllCISCNode = trans->getP2TRepInLoop(P->getImportantNode(3));
9203
ListIterator <TR_CISCNode> ci;
9204
TR_CISCNode *c;
9205
bool isDecrement = false;
9206
bool needVersioned = false;
9207
bool addLength1 = true;
9208
TR::TreeTop *failDest = NULL, *okDest = NULL;
9209
bool generateArraycmplen;
9210
bool generateArraycmpsign;
9211
bool compareTo;
9212
bool indexOf = trans->isIndexOf();
9213
9214
// The transformation can support two if-stmts: array comparison exit and one loop ending condition.
9215
TR_CISCGraph *T = trans->getT();
9216
if (T && T->getAspects()->getIfCount() > 2)
9217
{
9218
traceMsg(comp,"CISCTransform2ArrayCmp detected %d if-stmts in loop (> 2). Not transforming.\n", T->getAspects()->getIfCount());
9219
return false;
9220
}
9221
9222
// Checks exit-if for comparing two arrays
9223
switch(cmpneIfAllCISCNode->getOpcode())
9224
{
9225
case TR::ificmpne:
9226
case TR::ifbcmpne:
9227
case TR::ifscmpne:
9228
case TR::iflcmpne:
9229
case TR::ifacmpne:
9230
case TR::iffcmpne:
9231
case TR::ifdcmpne:
9232
case TR::iffcmpneu:
9233
case TR::ifdcmpneu:
9234
break;
9235
case TR::ificmpgt:
9236
case TR::ificmplt:
9237
case TR::iflcmpgt:
9238
case TR::iflcmplt:
9239
return CISCTransform2ArrayCmp2Ifs(trans); // Use 2 if-statements version
9240
default:
9241
return false;
9242
}
9243
9244
failDest = cmpneIfAllCISCNode->getDestination();
9245
9246
// We will fail this pattern if the comparison 'exit' is in fact not an exit out of the loop
9247
if (trans->isBlockInLoopBody(failDest->getNode()->getBlock()))
9248
{
9249
if (disptrace)
9250
traceMsg(comp, "CISCTransform2ArrayCmp failing transformer, ifcmpall test branch does not exit the loop.\n");
9251
return false;
9252
}
9253
9254
// Checks exit-if for checking the length
9255
switch(cmpIfAllCISCNode->getOpcode())
9256
{
9257
case TR::ificmplt:
9258
if (cmpIfAllCISCNode->isEmptyHint()) return false;
9259
c = cmpIfAllCISCNode->getHintChildren()->getListHead()->getData();
9260
if (c->getOpcode() != TR::iadd) return false;
9261
isDecrement = true;
9262
needVersioned = true;
9263
addLength1 = true;
9264
break;
9265
case TR::ificmple:{
9266
TR_CISCNode *child = cmpIfAllCISCNode->getChild(0);
9267
ci.set(child->getParents());
9268
for (c = ci.getFirst(); c; c = ci.getNext())
9269
if (c->getOpcode() == TR::iadd) break;
9270
if (!c) return false;
9271
isDecrement = true;
9272
needVersioned = true;
9273
addLength1 = true;
9274
break;}
9275
case TR::ificmpgt:
9276
isDecrement = false;
9277
needVersioned = false;
9278
addLength1 = true;
9279
break;
9280
case TR::ificmpge:
9281
isDecrement = false;
9282
needVersioned = false;
9283
addLength1 = false;
9284
break;
9285
default:
9286
return false;
9287
}
9288
9289
okDest = cmpIfAllCISCNode->getDestination();
9290
9291
//
9292
// checking a set of all uses for each index
9293
TR_ASSERT(storeSrc1->getDagID() == storeSrc2->getDagID(), "error");
9294
generateArraycmplen = false;
9295
generateArraycmpsign = false;
9296
if (storeSrc1 == storeSrc2)
9297
{
9298
if (!storeSrc1->checkDagIdInChains())
9299
{
9300
// there is an use outside of the loop.
9301
if (isDecrement)
9302
return false;
9303
else
9304
generateArraycmplen = true;
9305
}
9306
}
9307
else
9308
{
9309
if (!storeSrc1->checkDagIdInChains() || !storeSrc2->checkDagIdInChains())
9310
{
9311
// there is an use outside of the loop.
9312
if (isDecrement)
9313
return false;
9314
else
9315
generateArraycmplen = true;
9316
}
9317
}
9318
List<TR::TreeTop> compareIfs(comp->trMemory());
9319
if (true == (compareTo = trans->isCompareTo()))
9320
{
9321
if (!generateArraycmplen)
9322
{
9323
bool canConvertToArrayCmp = false;
9324
if (trans->canConvertArrayCmpSign(trans->getP2TRep(P->getImportantNode(7))->getHeadOfTrNode(),
9325
&compareIfs, &canConvertToArrayCmp))
9326
{
9327
if (!canConvertToArrayCmp)
9328
generateArraycmpsign = true;
9329
}
9330
else
9331
{
9332
generateArraycmplen = true;
9333
}
9334
}
9335
}
9336
9337
TR_CISCNode *src1CISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(0));
9338
TR_CISCNode *src2CISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(1));
9339
if (!src1CISCNode || !src2CISCNode || src1CISCNode == src2CISCNode) return false;
9340
TR::Node * inSrc1Node = src1CISCNode->getHeadOfTrNodeInfo()->_node;
9341
TR::Node * inSrc2Node = src2CISCNode->getHeadOfTrNodeInfo()->_node;
9342
9343
if (generateArraycmpsign)
9344
{
9345
if (!comp->cg()->getSupportsArrayCmpSign() ||
9346
!((inSrc1Node->getType().isIntegral() && src1CISCNode->getIlOpCode().isUnsigned()) || inSrc1Node->getType().isAddress()))
9347
{
9348
// arrayCmpLen can be reduced to arrayCmpSign, but either codegen does not support it
9349
// or we can't guarantee it works with byte-by-byte comparisons (only allow addresses and unsigned integrals)
9350
generateArraycmpsign = false;
9351
generateArraycmplen = true;
9352
}
9353
}
9354
9355
// check the indices used in the array loads and
9356
// the store nodes
9357
//
9358
if (!indicesAndStoresAreConsistent(comp, inSrc1Node, inSrc2Node, storeSrc1, storeSrc2))
9359
{
9360
dumpOptDetails(comp, "indices used in array loads %p and %p are not consistent with the induction varaible updates\n", inSrc1Node, inSrc2Node);
9361
return false;
9362
}
9363
9364
if (!areArraysInvariant(comp, inSrc1Node, inSrc2Node, T))
9365
{
9366
traceMsg(comp, "input array bases %p and %p are not invariant, no reduction\n", inSrc1Node, inSrc2Node);
9367
return false;
9368
}
9369
9370
TR::Node * mulFactorNode;
9371
int elementSize;
9372
9373
// Get the size of elements
9374
if (!getMultiplier(trans, P->getImportantNode(6), &mulFactorNode, &elementSize, inSrc1Node->getType())) return false;
9375
9376
if (inSrc1Node->getType() != inSrc2Node->getType())
9377
{
9378
traceMsg(comp,
9379
"CISCTransform2ArrayCmp failed - Array access types differ. inSrc1: %s, inSrc2: %s\n",
9380
TR::DataType::getName(inSrc1Node->getType()),
9381
TR::DataType::getName(inSrc2Node->getType()));
9382
return false; // Size is mismatch!
9383
}
9384
9385
const uint32_t expectedSize = inSrc1Node->getType().isAddress()
9386
? TR::Compiler->om.sizeofReferenceField()
9387
: inSrc1Node->getSize();
9388
9389
if (elementSize != expectedSize)
9390
{
9391
traceMsg(comp,
9392
"CISCTransform2ArrayCmp failed - Size Mismatch. Element Size: %d, Expected Size: %d\n",
9393
elementSize,
9394
expectedSize);
9395
return false; // Size is mismatch!
9396
}
9397
9398
TR::Node *src1IdxRepNode, *src2IdxRepNode, *src1BaseRepNode, *src2BaseRepNode, *variableORconstRepNode;
9399
getP2TTrRepNodes(trans, &src1IdxRepNode, &src2IdxRepNode, &src1BaseRepNode, &src2BaseRepNode, &variableORconstRepNode);
9400
if (!src2IdxRepNode) src2IdxRepNode = src1IdxRepNode;
9401
TR::SymbolReference * src1IdxSymRef = src1IdxRepNode->getSymbolReference();
9402
if (!trans->analyzeArrayIndex(src1IdxSymRef)) return false;
9403
TR::SymbolReference * src2IdxSymRef = src2IdxRepNode->getSymbolReference();
9404
TR::Node *start1Idx, *start2Idx, *end1Idx, *end2Idx, *diff2;
9405
TR_CISCNode *arrayindex0, *arrayindex1;
9406
arrayindex0 = trans->getP()->getCISCNode(TR_arrayindex, true, 0);
9407
if (indexOf && arrayindex0)
9408
{
9409
// more analysis for String.indexOf(Ljava/lang/String;I)I
9410
TR_CISCNode *a0;
9411
ListIterator<TR_CISCNode> pi(arrayindex0->getParents());
9412
for (a0 = pi.getFirst(); a0; a0 = pi.getNext())
9413
{
9414
if (a0->getOpcode() == TR::isub)
9415
{
9416
if (trans->getP2TRepInLoop(a0)) arrayindex0 = a0;
9417
break;
9418
}
9419
}
9420
}
9421
arrayindex1 = trans->getP()->getCISCNode(TR_arrayindex, true, 1);
9422
9423
bool useSrc1 = usedInLoopTest(comp, cmpIfAllCISCNode->getHeadOfTrNodeInfo()->_node, src1IdxSymRef);
9424
9425
TR::Node * input1Node;
9426
TR::Node * input2Node;
9427
TR::Node *startNode = NULL;
9428
TR::Node *endNode = NULL;
9429
if (isDecrement) // count-down loop
9430
{
9431
start2Idx = convertStoreToLoad(comp, variableORconstRepNode);
9432
end2Idx = convertStoreToLoad(comp, useSrc1 ? src1IdxRepNode : src2IdxRepNode);
9433
diff2 = createOP2(comp, TR::isub, end2Idx, start2Idx);
9434
end1Idx = convertStoreToLoad(comp, src1IdxRepNode);
9435
start1Idx = createOP2(comp, TR::isub, end1Idx, diff2);
9436
9437
if (disptrace)
9438
traceMsg(comp, "isDecrement start1Idx %p start2Idx %p end1Idx %p end2Idx %p\n", start1Idx, start2Idx, end1Idx, end2Idx);
9439
startNode = start2Idx->duplicateTree();
9440
endNode = useSrc1 ? end1Idx->duplicateTree() : end2Idx->duplicateTree();
9441
9442
if (arrayindex0) end1Idx = trans->getP2TRep(arrayindex0)->getHeadOfTrNodeInfo()->_node;
9443
if (arrayindex1) end2Idx = trans->getP2TRep(arrayindex1)->getHeadOfTrNodeInfo()->_node;
9444
input1Node = createArrayAddressTree(comp, ctrl, src1BaseRepNode, start1Idx, elementSize);
9445
input2Node = createArrayAddressTree(comp, ctrl, src2BaseRepNode, start2Idx, elementSize);
9446
}
9447
else
9448
{ // count-up loop
9449
end2Idx = convertStoreToLoad(comp, variableORconstRepNode);
9450
start2Idx = convertStoreToLoad(comp, useSrc1 ? src1IdxRepNode : src2IdxRepNode);
9451
diff2 = createOP2(comp, TR::isub, end2Idx, start2Idx);
9452
start1Idx = convertStoreToLoad(comp, src1IdxRepNode);
9453
end1Idx = needVersioned ? createOP2(comp, TR::iadd, start1Idx, diff2) : NULL;
9454
9455
if (disptrace)
9456
traceMsg(comp, "start1Idx %p start2Idx %p end1Idx %p end2Idx %p\n", start1Idx, start2Idx, end1Idx, end2Idx);
9457
startNode = useSrc1 ? start1Idx->duplicateTree() : start2Idx->duplicateTree();
9458
endNode = end2Idx->duplicateTree();
9459
9460
if (arrayindex0) start1Idx = trans->getP2TRep(arrayindex0)->getHeadOfTrNodeInfo()->_node;
9461
if (arrayindex1) start2Idx = trans->getP2TRep(arrayindex1)->getHeadOfTrNodeInfo()->_node;
9462
input1Node = inSrc1Node->getChild(0)->duplicateTree();
9463
input2Node = inSrc2Node->getChild(0)->duplicateTree();
9464
}
9465
9466
// Prepare effective addresses for arraycmp(len)
9467
TR::Node * lengthNode;
9468
if (addLength1)
9469
{
9470
lengthNode = createOP2(comp, TR::isub,
9471
diff2,
9472
TR::Node::create(src1BaseRepNode, TR::iconst, 0, -1));
9473
}
9474
else
9475
{
9476
lengthNode = diff2;
9477
}
9478
9479
int shrCount = 0;
9480
TR::Node * elementSizeNode = NULL;
9481
if (elementSize > 1)
9482
{
9483
//FIXME: enable this code for 64-bit
9484
// currently disabled until all uses of lengthNode are
9485
// sign-extended correctly
9486
//
9487
TR::ILOpCodes mulOp = TR::imul;
9488
#if 0
9489
if (comp->target().is64Bit())
9490
{
9491
elementSizeNode = TR::Node::create(mulFactorNode, TR::lconst);
9492
elementSizeNode->setLongInt(elementSize);
9493
mulOp = TR::lmul;
9494
lengthNode = TR::Node::create(TR::i2l, 1, lengthNode);
9495
}
9496
else
9497
#endif
9498
elementSizeNode = TR::Node::create(mulFactorNode, TR::iconst, 0, elementSize);
9499
lengthNode = TR::Node::create(mulOp, 2,
9500
lengthNode,
9501
elementSizeNode);
9502
switch(elementSize)
9503
{
9504
case 2: shrCount = 1; break;
9505
case 4: shrCount = 2; break;
9506
case 8: shrCount = 3; break;
9507
default: TR_ASSERT(false, "error");
9508
}
9509
}
9510
9511
TR_ASSERT(!generateArraycmplen || !generateArraycmpsign, "error");
9512
9513
// Prepare compensation code
9514
if (compareTo)
9515
{
9516
if (generateArraycmplen)
9517
{
9518
TR::Node *tmpNode;
9519
TR_CISCNode *storeResult = P->getImportantNode(7);
9520
tmpNode = trans->getP2TRep(storeResult)->getHeadOfTrNodeInfo()->_node->duplicateTree();
9521
trans->getAfterInsertionIdiomList(0)->add(tmpNode);
9522
9523
tmpNode = TR::Node::createStore(tmpNode->getSymbolReference(),
9524
TR::Node::create(tmpNode, TR::iconst, 0, 0));
9525
trans->getAfterInsertionIdiomList(1)->add(tmpNode);
9526
}
9527
}
9528
9529
bool isCompensateCode = !trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1);
9530
TR::Block *compensateBlock0 = NULL;
9531
TR::Block *compensateBlock1 = NULL;
9532
9533
// create two empty blocks for inserting compensation code prepared by reorderTargetNodesInBB()
9534
if (isCompensateCode)
9535
{
9536
compensateBlock0 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
9537
compensateBlock1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
9538
compensateBlock0->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest)));
9539
compensateBlock1->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, okDest)));
9540
failDest = compensateBlock0->getEntry();
9541
okDest = compensateBlock1->getEntry();
9542
}
9543
TR_ASSERT(okDest != NULL && failDest != NULL, "error!");
9544
9545
TR::Node * topArraycmp;
9546
TR::TreeTop * newFirstTreeTop;
9547
TR::TreeTop * newLastTreeTop;
9548
9549
TR::Node *storeCompareToResult = NULL;
9550
9551
if (generateArraycmplen)
9552
{
9553
// Using the CLCL instruction
9554
9555
TR::Node * arraycmplen = TR::Node::create(TR::arraycmp, 3, input1Node, input2Node, createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode));
9556
arraycmplen->setArrayCmpLen(true);
9557
arraycmplen->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCmpSymbol());
9558
9559
TR::SymbolReference * resultSymRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR::Int32);
9560
topArraycmp = TR::Node::createStore(resultSymRef, arraycmplen);
9561
9562
TR::Node * resultLoad = TR::Node::createLoad(topArraycmp, resultSymRef);
9563
TR::Node * equalLen = resultLoad;
9564
if (shrCount != 0)
9565
{
9566
equalLen = TR::Node::create(TR::ishr, 2,
9567
equalLen,
9568
TR::Node::create(equalLen, TR::iconst, 0, shrCount));
9569
}
9570
9571
TR::Node *tmpNode = createStoreOP2(comp, src1IdxSymRef, TR::iadd, src1IdxSymRef, equalLen, trNode);
9572
newFirstTreeTop = TR::TreeTop::create(comp, tmpNode);
9573
newLastTreeTop = newFirstTreeTop;
9574
TR::TreeTop * tmpTreeTop = NULL;
9575
9576
if (src1IdxSymRef != src2IdxSymRef)
9577
{
9578
tmpNode = createStoreOP2(comp, src2IdxSymRef, TR::iadd, src2IdxSymRef, equalLen, trNode);
9579
tmpTreeTop = TR::TreeTop::create(comp, tmpNode);
9580
newLastTreeTop->join(tmpTreeTop);
9581
newLastTreeTop = tmpTreeTop;
9582
}
9583
9584
tmpNode = TR::Node::createif(TR::ificmpeq,
9585
lengthNode,
9586
resultLoad,
9587
okDest);
9588
tmpTreeTop = TR::TreeTop::create(comp, tmpNode);
9589
newLastTreeTop->join(tmpTreeTop);
9590
newLastTreeTop = tmpTreeTop;
9591
}
9592
else
9593
{
9594
// Using the CLC instruction
9595
TR::Node * arraycmp = TR::Node::create(TR::arraycmp, 3, input1Node, input2Node, createI2LIfNecessary(comp, trans->isGenerateI2L(), lengthNode));
9596
arraycmp->setSymbolReference(comp->getSymRefTab()->findOrCreateArrayCmpSymbol());
9597
9598
TR::Node * cmpIfNode;
9599
if (compareTo)
9600
{
9601
storeCompareToResult = trans->getP2TRep(P->getImportantNode(7))->getHeadOfTrNode();
9602
if (generateArraycmpsign)
9603
{
9604
TR_ASSERT(comp->cg()->getSupportsArrayCmpSign(), "error");
9605
arraycmp->setArrayCmpSign(true);
9606
9607
topArraycmp = TR::Node::createStore(storeCompareToResult->getSymbolReference(), arraycmp);
9608
cmpIfNode = TR::Node::createif(TR::ificmpeq, arraycmp,
9609
TR::Node::create( src1BaseRepNode, TR::iconst, 0, 0),
9610
okDest);
9611
}
9612
else
9613
{
9614
if (disptrace) traceMsg(comp, "ArrayCmp: Convert compareTo into equals!\n");
9615
topArraycmp = TR::Node::createStore(storeCompareToResult->getSymbolReference(),
9616
createOP2(comp, TR::isub,
9617
TR::Node::create(src1BaseRepNode, TR::iconst, 0, 1),
9618
TR::Node::create(TR::iand, 2, TR::Node::create(TR::ixor, 2, arraycmp, TR::Node::iconst(arraycmp, 1)), TR::Node::create(TR::ishr, 2, TR::Node::create(TR::ixor, 2, arraycmp, TR::Node::iconst(arraycmp, 2)), TR::Node::iconst(arraycmp, 1)))));
9619
9620
cmpIfNode = TR::Node::createif(TR::ificmpeq, arraycmp,
9621
TR::Node::create( src1BaseRepNode, TR::iconst, 0, 0),
9622
okDest);
9623
9624
}
9625
}
9626
else
9627
{
9628
topArraycmp = TR::Node::createStore(src1IdxSymRef, TR::Node::create(TR::iand, 2, TR::Node::create(TR::ixor, 2, arraycmp, TR::Node::iconst(arraycmp, 1)), TR::Node::create(TR::ishr, 2, TR::Node::create(TR::ixor, 2, arraycmp, TR::Node::iconst(arraycmp, 2)), TR::Node::iconst(arraycmp, 1))));
9629
cmpIfNode = TR::Node::createif(TR::ificmpeq, arraycmp,
9630
TR::Node::create( src1BaseRepNode, TR::iconst, 0, 0),
9631
okDest);
9632
}
9633
newFirstTreeTop = TR::TreeTop::create(comp, cmpIfNode);
9634
newLastTreeTop = newFirstTreeTop;
9635
}
9636
9637
TR::TreeTop *last;
9638
9639
if (needVersioned) // Need to version the loop to eliminate array bounds checking
9640
{
9641
TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");
9642
9643
// making two versions (safe and non-safe).
9644
TR::CFG *cfg = comp->getFlowGraph();
9645
TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();
9646
TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();
9647
TR::Block *chkSrc1a = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
9648
TR::Block *chkSrc1b = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
9649
TR::Block *chkSrc2a = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
9650
TR::Block *chkSrc2b = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
9651
TR::Block *fastpath = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
9652
TR::Block *slowpad = block->split(trTreeTop, cfg, true);
9653
TR::Block *gotoBlock = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
9654
9655
TR::Node *cmp, *len1, *len2;
9656
len1 = TR::Node::create(TR::arraylength, 1,
9657
convertStoreToLoad(comp, src1BaseRepNode));
9658
cmp = TR::Node::createif(TR::ifiucmpge, start1Idx->duplicateTree(), len1, slowpad->getEntry());
9659
chkSrc1a->getEntry()->insertAfter(TR::TreeTop::create(comp, cmp));
9660
cmp = TR::Node::createif(TR::ifiucmpge, end1Idx->duplicateTree(), len1->duplicateTree(), slowpad->getEntry());
9661
chkSrc1b->getEntry()->insertAfter(TR::TreeTop::create(comp, cmp));
9662
len2 = TR::Node::create(TR::arraylength, 1,
9663
convertStoreToLoad(comp, src2BaseRepNode));
9664
cmp = TR::Node::createif(TR::ifiucmpge, start2Idx->duplicateTree(), len2, slowpad->getEntry());
9665
chkSrc2a->getEntry()->insertAfter(TR::TreeTop::create(comp, cmp));
9666
cmp = TR::Node::createif(TR::ifiucmpge, end2Idx->duplicateTree(), len2->duplicateTree(), slowpad->getEntry());
9667
chkSrc2b->getEntry()->insertAfter(TR::TreeTop::create(comp, cmp));
9668
9669
TR::TreeTop * branchTreeTop = TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, failDest));
9670
gotoBlock->append(branchTreeTop);
9671
9672
cfg->setStructure(NULL);
9673
cfg->insertBefore(gotoBlock, slowpad);
9674
cfg->insertBefore(fastpath, gotoBlock);
9675
cfg->insertBefore(chkSrc2b, fastpath);
9676
cfg->insertBefore(chkSrc2a, chkSrc2b);
9677
cfg->insertBefore(chkSrc1b, chkSrc2a);
9678
cfg->insertBefore(chkSrc1a, chkSrc1b);
9679
9680
fastpath = trans->insertBeforeNodes(fastpath);
9681
last = fastpath->getLastRealTreeTop();
9682
TR::TreeTop *arrayCmpTreeTop = TR::TreeTop::create(comp, topArraycmp);
9683
last->join(arrayCmpTreeTop);
9684
arrayCmpTreeTop->join(newFirstTreeTop);
9685
newLastTreeTop->join(fastpath->getExit());
9686
fastpath = trans->insertAfterNodes(fastpath);
9687
9688
if (isCompensateCode)
9689
{
9690
cfg->setStructure(NULL);
9691
TR::TreeTop * orgNextTreeTop = fastpath->getExit()->getNextTreeTop();
9692
TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();
9693
compensateBlock0 = trans->insertAfterNodesIdiom(compensateBlock0, 0, true);
9694
compensateBlock1 = trans->insertAfterNodesIdiom(compensateBlock1, 1, true);
9695
cfg->insertBefore(compensateBlock1, orgNextBlock);
9696
cfg->insertBefore(compensateBlock0, compensateBlock1);
9697
cfg->join(fastpath, compensateBlock0);
9698
}
9699
9700
fastpath->getExit()->join(gotoBlock->getEntry());
9701
trans->setSuccessorEdges(fastpath,
9702
gotoBlock,
9703
okDest->getEnclosingBlock());
9704
9705
block->getExit()->join(chkSrc1a->getEntry());
9706
cfg->addEdge(block, chkSrc1a);
9707
cfg->removeEdge(block, slowpad);
9708
trans->setColdLoopBody();
9709
}
9710
else
9711
{
9712
// making only the bound-check free version
9713
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree());
9714
block = trans->insertBeforeNodes(block);
9715
block->append(TR::TreeTop::create(comp, topArraycmp));
9716
last = block->getLastRealTreeTop();
9717
last->join(newFirstTreeTop);
9718
newLastTreeTop->join(block->getExit());
9719
9720
block = trans->insertAfterNodes(block);
9721
9722
if (isCompensateCode)
9723
{
9724
TR::CFG *cfg = comp->getFlowGraph();
9725
cfg->setStructure(NULL);
9726
TR::TreeTop * orgNextTreeTop = block->getExit()->getNextTreeTop();
9727
TR::Block *orgNextBlock = orgNextTreeTop->getNode()->getBlock();
9728
compensateBlock0 = trans->insertAfterNodesIdiom(compensateBlock0, 0, true);
9729
compensateBlock1 = trans->insertAfterNodesIdiom(compensateBlock1, 1, true);
9730
cfg->insertBefore(compensateBlock1, orgNextBlock);
9731
cfg->insertBefore(compensateBlock0, compensateBlock1);
9732
cfg->join(block, compensateBlock0);
9733
}
9734
9735
trans->setSuccessorEdges(block,
9736
failDest->getEnclosingBlock(),
9737
okDest->getEnclosingBlock());
9738
}
9739
9740
if (0 && isCompensateCode)
9741
{
9742
// create control flow as below
9743
// --start preheader--
9744
// if (i reverseopcode N)
9745
// goto compensateblock0
9746
// --end preheader--
9747
// else
9748
// arraycmp
9749
// ...
9750
traceMsg(comp, "cmpifallciscnode %d ifcmpge %d\n", cmpIfAllCISCNode->getOpcode(), TR::ificmpge);
9751
TR::Node *compareNode = TR::Node::createif((TR::ILOpCodes)cmpIfAllCISCNode->getOpcode(), startNode, endNode, compensateBlock0->getEntry());
9752
TR::TreeTop *compareTree = TR::TreeTop::create(comp, compareNode);
9753
if (!preHeader)
9754
preHeader = trans->addPreHeaderIfNeeded(trans->getCurrentLoop());
9755
preHeader->append(compareTree);
9756
comp->getFlowGraph()->addEdge(preHeader, compensateBlock0);
9757
}
9758
9759
return true;
9760
}
9761
9762
bool
9763
CISCTransform2ArrayCmpCompareTo(TR_CISCTransformer *trans)
9764
{
9765
trans->setCompareTo();
9766
return CISCTransform2ArrayCmp(trans);
9767
}
9768
9769
9770
bool
9771
CISCTransform2ArrayCmpIndexOf(TR_CISCTransformer *trans)
9772
{
9773
trans->setIndexOf();
9774
return CISCTransform2ArrayCmp(trans);
9775
}
9776
9777
/****************************************************************************************
9778
Corresponding Java-like Pseudo Program
9779
int v1, v2, end;
9780
v3[ ], v4[ ]; // char, int, float, long, and so on
9781
while(true){
9782
ifcmpall (v3[v1], v4[v2] ) break;
9783
v1++;
9784
v2++;
9785
ifcmpall(v1, end) break;
9786
}
9787
9788
Note 1: It allows that variables v1 and v2 are identical.
9789
Note 2: The wildcard node ifcmpall matches all types of if-instructions.
9790
****************************************************************************************/
9791
TR_PCISCGraph *
9792
makeMemCmpGraph(TR::Compilation *c, int32_t ctrl)
9793
{
9794
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCmp", 0, 16);
9795
/************************************ opc id dagId #cfg #child other/pred/children */
9796
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v1); // array index for src1
9797
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 1); tgt->addNode(v2); // array index for src2
9798
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(v3); // src1 array base
9799
TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v4); // src2 array base
9800
TR_PCISCNode *vorc1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 9, 0, 0); tgt->addNode(vorc1); // length
9801
TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(idx0);
9802
TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(idx1);
9803
TR_PCISCNode *cmah0=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 0); tgt->addNode(cmah0); // array header
9804
TR_PCISCNode *cmah1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 1); tgt->addNode(cmah1); // array header
9805
TR_PCISCNode *iall= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0); tgt->addNode(iall); // Multiply Factor
9806
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);
9807
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
9808
TR_PCISCNode *n0 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, ent, TR_indload, TR::NoType, v3, idx0, cmah0, iall);
9809
TR_PCISCNode *n1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n0, n0); tgt->addNode(n1);
9810
TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, n1, TR_indload, TR::NoType, v4, idx1, cmah1, iall);
9811
TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);
9812
TR_PCISCNode *ncmp= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n3, n1, n3); tgt->addNode(ncmp);
9813
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ncmp, v1, cm1);
9814
TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v2, cm1);
9815
TR_PCISCNode *ncmpge = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v1, vorc1); tgt->addNode(ncmpge);
9816
TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
9817
9818
ncmpge->setSuccs(ent->getSucc(0), n9);
9819
ncmp->setSucc(1, n9);
9820
9821
n1->setIsOptionalNode();
9822
n3->setIsOptionalNode();
9823
9824
tgt->setEntryNode(ent);
9825
tgt->setExitNode(n9);
9826
tgt->setSpecialCareNode(0, ncmp); // exit-if due to a different character
9827
tgt->setImportantNodes(n0, n2, ncmpge, ncmp, n6, n7, iall);
9828
tgt->setNumDagIds(14);
9829
tgt->createInternalData(1);
9830
9831
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
9832
tgt->setTransformer(CISCTransform2ArrayCmp);
9833
tgt->setAspects(isub|mul, existAccess, 0);
9834
tgt->setNoAspects(call|bndchk|bitop1, 0, existAccess);
9835
tgt->setMinCounts(2, 2, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount
9836
tgt->setInhibitBeforeVersioning();
9837
tgt->setHotness(warm, false);
9838
return tgt;
9839
}
9840
9841
9842
/****************************************************************************************
9843
Corresponding Java-like Pseudo Program
9844
int v1, v2, end;
9845
v3[ ], v4[ ]; // char, int, float, long, and so on
9846
while(true){
9847
ifcmpall (v3[v1], v4[v2] ) break;
9848
v1++;
9849
v2++;
9850
ifcmpall(v1, end) break;
9851
}
9852
9853
Note 1: It allows that variables v1 and v2 are identical.
9854
Note 2: The wildcard node ifcmpall matches all types of if-instructions.
9855
****************************************************************************************/
9856
TR_PCISCGraph *
9857
makeMemCmpIndexOfGraph(TR::Compilation *c, int32_t ctrl)
9858
{
9859
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCmpIndexOf", 0, 16);
9860
/************************************ opc id dagId #cfg #child other/pred/children */
9861
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 0); tgt->addNode(v1); // array index for src1
9862
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 1); tgt->addNode(v2); // array index for src2
9863
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 0); tgt->addNode(v3); // src1 array base
9864
TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 1); tgt->addNode(v4); // src2 array base
9865
TR_PCISCNode *vorc1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 8, 0, 0); tgt->addNode(vorc1); // length
9866
TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(idx0);
9867
TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 1); tgt->addNode(idx1);
9868
TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(cmah); // array header
9869
TR_PCISCNode *iall= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0); tgt->addNode(iall); // Multiply Factor
9870
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);
9871
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
9872
TR_PCISCNode *a1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, ent, idx0, cm1); tgt->addNode(a1);
9873
TR_PCISCNode *n0 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, a1, TR_inbload, TR::NoType, v3, a1, cmah, iall);
9874
a1->getHeadOfParents()->setIsChildDirectlyConnected();
9875
TR_PCISCNode *n1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n0, n0); tgt->addNode(n1);
9876
TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, n1, TR_inbload, TR::NoType, v4, idx1, cmah, iall);
9877
TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);
9878
TR_PCISCNode *ncmp= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n3, n1, n3); tgt->addNode(ncmp);
9879
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ncmp, v1, cm1);
9880
TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v2, cm1);
9881
TR_PCISCNode *ncmpge = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v1, vorc1); tgt->addNode(ncmpge);
9882
TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
9883
9884
ncmpge->setSuccs(ent->getSucc(0), n9);
9885
ncmp->setSucc(1, n9);
9886
9887
n1->setIsOptionalNode();
9888
n3->setIsOptionalNode();
9889
9890
tgt->setEntryNode(ent);
9891
tgt->setExitNode(n9);
9892
tgt->setSpecialCareNode(0, ncmp); // exit-if due to a different character
9893
tgt->setImportantNodes(n0, n2, ncmpge, ncmp, n6, n7, iall);
9894
tgt->setNumDagIds(13);
9895
tgt->createInternalData(1);
9896
9897
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
9898
tgt->setTransformer(CISCTransform2ArrayCmpIndexOf);
9899
tgt->setAspects(isub|mul, existAccess, 0);
9900
tgt->setNoAspects(call|bndchk|bitop1, ILTypeProp::Size_1, existAccess);
9901
tgt->setMinCounts(2, 2, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount
9902
tgt->setInhibitBeforeVersioning();
9903
tgt->setHotness(warm, false);
9904
return tgt;
9905
}
9906
9907
9908
/****************************************************************************************
9909
Corresponding Java-like Pseudo Program
9910
int v1, v2, end, v5;
9911
v3[ ], v4[ ]; // char, int, float, long, and so on
9912
while(true){
9913
v5 = v3[v1++] - v4[v2++];
9914
if (v5 != 0) break;
9915
if (v1 >= end) break;
9916
}
9917
9918
Note 1: It allows that variables v1 and v2 are identical.
9919
****************************************************************************************/
9920
TR_PCISCGraph *
9921
makeMemCmpSpecialGraph(TR::Compilation *c, int32_t ctrl)
9922
{
9923
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "MemCmpSpecial", 0, 16);
9924
/************************************ opc id dagId #cfg #child other/pred/children */
9925
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v1); // array index for src1
9926
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 1); tgt->addNode(v2); // array index for src2
9927
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(v3); // src1 array base
9928
TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 10, 0, 0, 1); tgt->addNode(v4); // src2 array base
9929
TR_PCISCNode *vorc1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 9, 0, 0); tgt->addNode(vorc1); // length
9930
TR_PCISCNode *v5 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 2); tgt->addNode(v5); // result
9931
TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(idx0);
9932
TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 0); tgt->addNode(cmah); // array header
9933
TR_PCISCNode *iall= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0); tgt->addNode(iall); // Multiply Factor
9934
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 4, 0, 0, -1); tgt->addNode(cm1);
9935
TR_PCISCNode *c0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, 0); tgt->addNode(c0);
9936
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
9937
TR_PCISCNode *n0 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, ent, TR_inbload, TR::NoType, v3, v1, cmah, iall);
9938
TR_PCISCNode *n1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n0, n0); tgt->addNode(n1);
9939
TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, n1, TR_inbload, TR::NoType, v4, idx0, cmah, iall);
9940
TR_PCISCNode *n3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(n3);
9941
TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n3, n1, n3); tgt->addNode(n4);
9942
TR_PCISCNode *n5 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore, TR::Int32, tgt->incNumNodes(), 1, 1, 2, n4, n4, v5); tgt->addNode(n5);
9943
TR_PCISCNode *ncmp= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpne, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n5, v5, c0); tgt->addNode(ncmp);
9944
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, ncmp, v1, cm1);
9945
TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v2, cm1);
9946
TR_PCISCNode *ncmpge= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n7, v1, vorc1); tgt->addNode(ncmpge);
9947
TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
9948
9949
ncmpge->setSuccs(ent->getSucc(0), n9);
9950
ncmp->setSucc(1, n9);
9951
9952
n1->setIsOptionalNode();
9953
n3->setIsOptionalNode();
9954
9955
tgt->setSpecialCareNode(0, ncmp); // exit-if due to a different character
9956
tgt->setEntryNode(ent);
9957
tgt->setExitNode(n9);
9958
tgt->setImportantNodes(n0, n2, ncmpge, ncmp, n6, n7, iall, n5);
9959
tgt->setNumDagIds(14);
9960
tgt->createInternalData(1);
9961
9962
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
9963
tgt->setTransformer(CISCTransform2ArrayCmpCompareTo);
9964
tgt->setAspects(isub|mul, existAccess, 0);
9965
tgt->setNoAspects(call|bndchk|bitop1, ILTypeProp::Size_1, existAccess);
9966
tgt->setMinCounts(2, 2, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount
9967
tgt->setInhibitBeforeVersioning();
9968
tgt->setHotness(warm, false);
9969
return tgt;
9970
}
9971
9972
9973
//////////////////////////////////////////////////////////////////////////
9974
//////////////////////////////////////////////////////////////////////////
9975
//////////////////////////////////////////////////////////////////////////
9976
// Utilities for BitOpMem
9977
9978
static void
9979
setSubopBitOpMem(TR::Compilation *comp, TR::Node *bitOpMem, TR_CISCNode *opCISCNode)
9980
{
9981
if (opCISCNode->getIlOpCode().isAnd())
9982
{
9983
bitOpMem->setAndBitOpMem(true);
9984
}
9985
else if (opCISCNode->getIlOpCode().isXor())
9986
{
9987
bitOpMem->setXorBitOpMem(true);
9988
}
9989
else
9990
{
9991
TR_ASSERT(opCISCNode->getIlOpCode().isOr(), "error");
9992
bitOpMem->setOrBitOpMem(true);
9993
}
9994
}
9995
9996
static TR::AutomaticSymbol *
9997
setPinningArray(TR::Compilation *comp, TR::Node *internalPtrStore, TR::Node *base, TR::Block *appendBlock)
9998
{
9999
TR::AutomaticSymbol *pinningArray = NULL;
10000
if (base->getOpCode().isLoadVarDirect() &&
10001
base->getSymbolReference()->getSymbol()->isAuto())
10002
{
10003
pinningArray = (base->getSymbolReference()->getSymbol()->castToAutoSymbol()->isInternalPointer()) ?
10004
base->getSymbolReference()->getSymbol()->castToInternalPointerAutoSymbol()->getPinningArrayPointer() :
10005
base->getSymbolReference()->getSymbol()->castToAutoSymbol();
10006
}
10007
else
10008
{
10009
TR::SymbolReference *newRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR::Address);
10010
appendBlock->append(TR::TreeTop::create(comp, TR::Node::createStore(newRef, createLoad(base))));
10011
pinningArray = newRef->getSymbol()->castToAutoSymbol();
10012
}
10013
pinningArray->setPinningArrayPointer();
10014
internalPtrStore->getSymbolReference()->getSymbol()->castToInternalPointerAutoSymbol()->setPinningArrayPointer(pinningArray);
10015
if (internalPtrStore->isInternalPointer()) internalPtrStore->setPinningArrayPointer(pinningArray);
10016
return pinningArray;
10017
}
10018
10019
//*****************************************************************************************
10020
// IL code generation for bit operations for memory to memory (dest = src1 op src2)
10021
// Input: ImportantNode(0) - array load for src1
10022
// ImportantNode(1) - array load for src2
10023
// ImportantNode(2) - array store for dest
10024
// ImportantNode(3) - a bit operation (XOR, AND, or OR)
10025
// ImportantNode(4) - increment the array index for src1
10026
// ImportantNode(5) - increment the array index for src2
10027
// ImportantNode(6) - increment the array index for dest
10028
// ImportantNode(7) - the size of elements (NULL for byte arrays)
10029
//*****************************************************************************************
10030
// This transformer will generate the following code.
10031
// if (dest.addr == src1.addr)
10032
// {
10033
// // dest and src1 are identical
10034
// bitOpMem(dest.addr, src2.addr, len); // three children (dest op= src2)
10035
// }
10036
// else if (dest.addr == src2.addr)
10037
// {
10038
// // dest and src2 are identical
10039
// bitOpMem(dest.addr, src1.addr, len); // three children (dest op= src1)
10040
// }
10041
// else if (dest.obj == src1.obj || dest.obj == src2.obj)
10042
// {
10043
// // the destination may overlap to src1 or src2.
10044
// <go to the original loop>
10045
// }
10046
// else
10047
// {
10048
// // We can guarantee the destination NEVER overlaps to src1 or src2.
10049
// bitOpMem(dest.addr, src1.addr, src2.addr, len); // four children (dest = src1 op src2)
10050
// }
10051
bool
10052
CISCTransform2BitOpMem(TR_CISCTransformer *trans)
10053
{
10054
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
10055
const bool disptrace = DISPTRACE(trans);
10056
TR::Node *trNode;
10057
TR::TreeTop *trTreeTop;
10058
TR::Block *block;
10059
TR_CISCGraph *P = trans->getP();
10060
List<TR_CISCNode> *P2T = trans->getP2T();
10061
TR::Compilation *comp = trans->comp();
10062
bool ctrl = trans->isGenerateI2L();
10063
10064
TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");
10065
if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;
10066
10067
trans->findFirstNode(&trTreeTop, &trNode, &block);
10068
if (!block) return false; // cannot find
10069
10070
if (isLoopPreheaderLastBlockInMethod(comp, block))
10071
{
10072
traceMsg(comp, "Bailing CISCTransform2BitOpMem due to null TT - might be a preheader in last block of method\n");
10073
return false;
10074
}
10075
10076
TR::Block *target = trans->analyzeSuccessorBlock();
10077
// Currently, it allows only a single successor.
10078
if (!target) return false;
10079
10080
//
10081
// obtain a CISCNode of each store for incrementing induction variables
10082
TR_CISCNode *src1CISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(0));
10083
TR_CISCNode *src2CISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(1));
10084
if (!src1CISCNode || !src2CISCNode || src1CISCNode == src2CISCNode) return false;
10085
TR_CISCNode *destCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(2));
10086
if (!destCISCNode) return false;
10087
TR_CISCNode *opCISCNode = trans->getP2TInLoopIfSingle(P->getImportantNode(3));
10088
TR_ASSERT(opCISCNode, "error");
10089
TR::Node * inSrc1Node = src1CISCNode->getHeadOfTrNodeInfo()->_node;
10090
TR::Node * inSrc2Node = src2CISCNode->getHeadOfTrNodeInfo()->_node;
10091
TR::Node * inDestNode = destCISCNode->getHeadOfTrNodeInfo()->_node;
10092
TR::Node * inputNode1 = inSrc1Node->getChild(0);
10093
TR::Node * inputNode2 = inSrc2Node->getChild(0);
10094
TR::Node * outputNode = inDestNode->getChild(0);
10095
10096
TR::Node * mulFactorNode;
10097
int elementSize;
10098
10099
// Get the size of elements
10100
if (!getMultiplier(trans, P->getImportantNode(7), &mulFactorNode, &elementSize, inSrc1Node->getType())) return false;
10101
if (elementSize != inSrc1Node->getSize() || elementSize != inSrc2Node->getSize())
10102
{
10103
traceMsg(comp, "CISCTransform2BitOpMem failed - Size Mismatch. Element Size: %d InSrc1Size: %d inSrc2Size: %d\n", elementSize, inSrc1Node->getSize(), inSrc2Node->getSize());
10104
return false; // Size is mismatch!
10105
}
10106
10107
TR_CISCNode *storeSrc1 = trans->getP2TRepInLoop(P->getImportantNode(4));
10108
TR_CISCNode *storeSrc2 = trans->getP2TRepInLoop(P->getImportantNode(5));
10109
TR_CISCNode *storeDest = trans->getP2TRepInLoop(P->getImportantNode(6));
10110
10111
// check the indices used in the array loads and
10112
// the store nodes
10113
//
10114
List<TR::Node> storeList(comp->trMemory());
10115
TR_ASSERT(storeSrc1, "error");
10116
storeList.add(storeSrc1->getHeadOfTrNode());
10117
if (storeSrc2 && storeSrc2 != storeSrc1) storeList.add(storeSrc2->getHeadOfTrNode());
10118
if (storeDest && storeDest != storeSrc1) storeList.add(storeDest->getHeadOfTrNode());
10119
if (!isIndexVariableInList(inSrc1Node, &storeList) ||
10120
!isIndexVariableInList(inSrc2Node, &storeList) ||
10121
!isIndexVariableInList(inDestNode, &storeList))
10122
{
10123
dumpOptDetails(comp, "indices used in array loads %p, %p, and %p are not consistent with the induction varaible updates\n", inSrc1Node, inSrc2Node, inDestNode);
10124
return false;
10125
}
10126
10127
TR::Node *src1IdxRepNode, *src2IdxRepNode, *destIdxRepNode, *src1BaseRepNode, *src2BaseRepNode, *destBaseRepNode, *variableORconstRepNode;
10128
getP2TTrRepNodes(trans, &src1IdxRepNode, &src2IdxRepNode, &destIdxRepNode, &src1BaseRepNode, &src2BaseRepNode, &destBaseRepNode, &variableORconstRepNode);
10129
TR_ASSERT(src1IdxRepNode != 0, "error");
10130
TR::SymbolReference * src1IdxSymRef = src1IdxRepNode->getSymbolReference();
10131
TR::SymbolReference * src2IdxSymRef = 0;
10132
TR::SymbolReference * destIdxSymRef = 0;
10133
if (src2IdxRepNode)
10134
src2IdxSymRef = src2IdxRepNode->getSymbolReference();
10135
if (destIdxRepNode)
10136
destIdxSymRef = destIdxRepNode->getSymbolReference();
10137
if (src1IdxSymRef == destIdxSymRef) destIdxSymRef = 0;
10138
if (src1IdxSymRef == src2IdxSymRef) src2IdxSymRef = 0;
10139
if (trans->countGoodArrayIndex(src1IdxSymRef) == 0) return false;
10140
if (src2IdxSymRef && (trans->countGoodArrayIndex(src2IdxSymRef) == 0)) return false;
10141
if (destIdxSymRef && (trans->countGoodArrayIndex(destIdxSymRef) == 0)) return false;
10142
TR::Node *startSrc1Idx, *endSrc1Idx, *diff2;
10143
endSrc1Idx = convertStoreToLoad(comp, variableORconstRepNode);
10144
startSrc1Idx = convertStoreToLoad(comp, src1IdxRepNode);
10145
diff2 = createOP2(comp, TR::isub, endSrc1Idx, startSrc1Idx);
10146
TR::Node * elementSizeNode = NULL;
10147
10148
TR::Node * lengthNode = createI2LIfNecessary(comp, trans->isGenerateI2L(), diff2);
10149
if (elementSize > 1)
10150
{
10151
TR::ILOpCodes mulOp = TR::imul;
10152
if (comp->target().is64Bit())
10153
{
10154
elementSizeNode = TR::Node::create(mulFactorNode, TR::lconst);
10155
elementSizeNode->setLongInt(elementSize);
10156
mulOp = TR::lmul;
10157
}
10158
else
10159
elementSizeNode = TR::Node::create(mulFactorNode, TR::iconst, 0, elementSize);
10160
lengthNode = TR::Node::create(mulOp, 2,
10161
lengthNode,
10162
elementSizeNode);
10163
}
10164
10165
TR::Node * src1Update = TR::Node::createStore(src1IdxSymRef, endSrc1Idx->duplicateTree());
10166
TR::Node * destUpdate = NULL;
10167
if (destIdxSymRef != NULL && src1IdxSymRef != destIdxSymRef)
10168
{
10169
// If there are two induction variables, we need to maintain the other one.
10170
TR::Node * result = createOP2(comp, TR::iadd,
10171
TR::Node::createLoad(trNode, destIdxSymRef),
10172
diff2->duplicateTree());
10173
destUpdate = TR::Node::createStore(destIdxSymRef, result);
10174
}
10175
TR::Node * src2Update = NULL;
10176
if (src2IdxSymRef != NULL && src2IdxSymRef != destIdxSymRef && src2IdxSymRef != src1IdxSymRef)
10177
{
10178
// If there are three induction variables, we need to maintain the other one.
10179
TR::Node * result = createOP2(comp, TR::iadd,
10180
TR::Node::createLoad(trNode, src2IdxSymRef),
10181
diff2->duplicateTree());
10182
src2Update = TR::Node::createStore(src2IdxSymRef, result);
10183
}
10184
10185
TR::Node * bitOpMem = NULL;
10186
if (outputNode == inputNode1 || outputNode == inputNode2)
10187
{
10188
bitOpMem = TR::Node::create(TR::bitOpMem, 3,
10189
outputNode->duplicateTree(),
10190
(outputNode == inputNode1 ? inputNode2 : inputNode1)->duplicateTree(),
10191
lengthNode);
10192
bitOpMem->setSymbolReference(comp->getSymRefTab()->findOrCreatebitOpMemSymbol());
10193
setSubopBitOpMem(comp, bitOpMem, opCISCNode);
10194
}
10195
10196
//********************
10197
// Modify actual code
10198
//********************
10199
if (bitOpMem)
10200
{ // src1 or src2 is equal to dest
10201
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, lengthNode->duplicateTree());
10202
block = trans->insertBeforeNodes(block);
10203
block->append(TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, bitOpMem)));
10204
}
10205
else
10206
{
10207
TR::CFG *cfg = comp->getFlowGraph();
10208
cfg->setStructure(0);
10209
TR::Block *slowpad;
10210
TR::Block *orgPrevBlock = 0;
10211
TR::Block *checkSrc1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
10212
TR::Block *fastpath1 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
10213
TR::Block *checkSrc2 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
10214
TR::Block *fastpath2 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
10215
TR::Block *checkSrc3 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
10216
TR::Block *checkSrc4 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
10217
TR::Block *fastpath3 = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
10218
TR::Block *lastpath = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
10219
10220
// find orgPrevBlock and slowpad
10221
if (block->getFirstRealTreeTop() == trTreeTop)
10222
{
10223
// search the entry pad
10224
orgPrevBlock = trans->searchPredecessorOfBlock(block);
10225
}
10226
10227
slowpad = block;
10228
if (!orgPrevBlock)
10229
{
10230
orgPrevBlock = block;
10231
slowpad = block->split(trTreeTop, cfg, true);
10232
}
10233
10234
// checkSrc1: if (dest.addr != src1.addr) goto checkSrc2
10235
TR::SymbolReference *destAddrSymRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR::Address, true);
10236
TR::SymbolReference *src1AddrSymRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR::Address, true);
10237
TR::SymbolReference *src2AddrSymRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR::Address, true);
10238
TR::Node *destStore = TR::Node::createStore(destAddrSymRef, outputNode->duplicateTree());
10239
TR::Node *src1Store = TR::Node::createStore(src1AddrSymRef, inputNode1->duplicateTree());
10240
TR::Node *src2Store = TR::Node::createStore(src2AddrSymRef, inputNode2->duplicateTree());
10241
10242
setPinningArray(comp, destStore, destBaseRepNode, checkSrc1);
10243
setPinningArray(comp, src1Store, src1BaseRepNode, checkSrc1);
10244
setPinningArray(comp, src2Store, src2BaseRepNode, checkSrc1);
10245
10246
checkSrc1->append(TR::TreeTop::create(comp, destStore));
10247
checkSrc1->append(TR::TreeTop::create(comp, src1Store));
10248
checkSrc1->append(TR::TreeTop::create(comp, src2Store));
10249
checkSrc1->append(TR::TreeTop::create(comp, TR::Node::createif(TR::ifacmpne,
10250
TR::Node::createLoad(trNode, destAddrSymRef),
10251
TR::Node::createLoad(trNode, src1AddrSymRef),
10252
checkSrc2->getEntry())));
10253
10254
// fastpath1: bitOpMem(dest, src2, length); goto lastpath;
10255
TR::Node *bitOpMem1 = TR::Node::create(TR::bitOpMem, 3,
10256
TR::Node::createLoad(trNode, destAddrSymRef),
10257
TR::Node::createLoad(trNode, src2AddrSymRef),
10258
lengthNode->duplicateTree());
10259
bitOpMem1->setSymbolReference(comp->getSymRefTab()->findOrCreatebitOpMemSymbol());
10260
setSubopBitOpMem(comp, bitOpMem1, opCISCNode);
10261
///fastpath1 = trans->insertBeforeNodes(fastpath1);
10262
fastpath1->append(TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, bitOpMem1)));
10263
fastpath1->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, lastpath->getEntry())));
10264
10265
// checkSrc2: if (dest.addr != src2.addr) goto checkSrc3
10266
checkSrc2->append(TR::TreeTop::create(comp, TR::Node::createif(TR::ifacmpne,
10267
TR::Node::createLoad(trNode, destAddrSymRef),
10268
TR::Node::createLoad(trNode, src2AddrSymRef),
10269
checkSrc3->getEntry())));
10270
10271
// fastpath2: bitOpMem(dest, src1, length); goto lastpath;
10272
TR::Node *bitOpMem2 = TR::Node::create(TR::bitOpMem, 3,
10273
TR::Node::createLoad(trNode, destAddrSymRef),
10274
TR::Node::createLoad(trNode, src1AddrSymRef),
10275
lengthNode->duplicateTree());
10276
bitOpMem2->setSymbolReference(comp->getSymRefTab()->findOrCreatebitOpMemSymbol());
10277
setSubopBitOpMem(comp, bitOpMem2, opCISCNode);
10278
///fastpath2 = trans->insertBeforeNodes(fastpath2);
10279
fastpath2->append(TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, bitOpMem2)));
10280
fastpath2->append(TR::TreeTop::create(comp, TR::Node::create(trNode, TR::Goto, 0, lastpath->getEntry())));
10281
10282
// checkSrc3: if (dest.obj == src1.obj) goto slowpad
10283
checkSrc3->append(TR::TreeTop::create(comp, TR::Node::createif(TR::ifacmpeq,
10284
createLoad(destBaseRepNode),
10285
createLoad(src1BaseRepNode),
10286
slowpad->getEntry())));
10287
10288
// checkSrc4: if (dest.obj == src2.obj) goto slowpad
10289
checkSrc4->append(TR::TreeTop::create(comp, TR::Node::createif(TR::ifacmpeq,
10290
createLoad(destBaseRepNode),
10291
createLoad(src2BaseRepNode),
10292
slowpad->getEntry())));
10293
10294
// fastpath3: bitOpMem(dest, src1, src2, length);
10295
// We can guarantee the destination NEVER overlaps to src1 or src2.
10296
bitOpMem = TR::Node::create(TR::bitOpMem, 4,
10297
TR::Node::createLoad(trNode, destAddrSymRef),
10298
TR::Node::createLoad(trNode, src1AddrSymRef),
10299
TR::Node::createLoad(trNode, src2AddrSymRef),
10300
lengthNode->duplicateTree());
10301
bitOpMem->setSymbolReference(comp->getSymRefTab()->findOrCreatebitOpMemSymbol());
10302
setSubopBitOpMem(comp, bitOpMem, opCISCNode);
10303
///fastpath3 = trans->insertBeforeNodes(fastpath3);
10304
fastpath3->append(TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, bitOpMem)));
10305
10306
// Insert new blocks
10307
TR::TreeTop * orgPrevTreeTop = orgPrevBlock->getExit();
10308
TR::Node *lastOrgPrevRealNode = orgPrevBlock->getLastRealTreeTop()->getNode();
10309
TR::TreeTop * orgNextTreeTop = orgPrevTreeTop->getNextTreeTop();
10310
if (orgNextTreeTop)
10311
{
10312
TR::Block * orgNextBlock = orgNextTreeTop->getNode()->getBlock();
10313
cfg->insertBefore(lastpath, orgNextBlock);
10314
}
10315
else
10316
{
10317
cfg->addNode(lastpath);
10318
}
10319
cfg->insertBefore(fastpath3, lastpath);
10320
cfg->insertBefore(checkSrc4, fastpath3);
10321
cfg->insertBefore(checkSrc3, checkSrc4);
10322
cfg->insertBefore(fastpath2, checkSrc3);
10323
cfg->insertBefore(checkSrc2, fastpath2);
10324
cfg->insertBefore(fastpath1, checkSrc2);
10325
cfg->insertBefore(checkSrc1, fastpath1);
10326
10327
TR::Block *extraBlock = NULL;
10328
if (!trans->isEmptyBeforeInsertionList())
10329
{
10330
extraBlock = TR::Block::createEmptyBlock(trNode, comp, block->getFrequency(), block);
10331
cfg->insertBefore(extraBlock, checkSrc1);
10332
orgPrevTreeTop->join(extraBlock->getEntry());
10333
cfg->addEdge(orgPrevBlock, extraBlock);
10334
TR::Block *newBlock = trans->insertBeforeNodes(extraBlock);
10335
}
10336
else
10337
{
10338
orgPrevTreeTop->join(checkSrc1->getEntry());
10339
cfg->addEdge(orgPrevBlock, checkSrc1);
10340
}
10341
cfg->removeEdge(orgPrevBlock, slowpad);
10342
block = lastpath;
10343
10344
if (disptrace) traceMsg(comp, "CISCTransform2BitOpMem: orgPrevBlock=%d checkSrc1=%d lastpath=%d slowpad=%d orgNextTreeTop=%x\n",
10345
orgPrevBlock->getNumber(), checkSrc1->getNumber(), lastpath->getNumber(), slowpad->getNumber(), orgNextTreeTop);
10346
10347
if (lastOrgPrevRealNode->getOpCode().getOpCodeValue() == TR::Goto)
10348
{
10349
TR_ASSERT(lastOrgPrevRealNode->getBranchDestination() == slowpad->getEntry(), "Error");
10350
if (!extraBlock)
10351
lastOrgPrevRealNode->setBranchDestination(checkSrc1->getEntry());
10352
else
10353
lastOrgPrevRealNode->setBranchDestination(extraBlock->getEntry());
10354
}
10355
}
10356
10357
if (src2Update) block->append(TR::TreeTop::create(comp, src2Update));
10358
if (destUpdate) block->append(TR::TreeTop::create(comp, destUpdate));
10359
// Original value of first induction variable used in the updates of the two induction variables above
10360
// Update this one last
10361
block->append(TR::TreeTop::create(comp, src1Update));
10362
10363
trans->insertAfterNodes(block);
10364
10365
trans->setSuccessorEdge(block, target);
10366
return true;
10367
}
10368
10369
/****************************************************************************************
10370
Corresponding Java-like Pseudo Program
10371
int v1, v2, end;
10372
v3[ ], v4[ ], v5[ ];
10373
while(true){
10374
v5[v2] = v3[v1] op v4[v1]; // op will match one of AND, OR, and XOR operations.
10375
v1++;
10376
v2++;
10377
if (v1 >= end) break;
10378
}
10379
10380
Note 1: It allows that variables v1 and v2 are identical.
10381
****************************************************************************************/
10382
TR_PCISCGraph *
10383
makeBitOpMemGraph(TR::Compilation *c, int32_t ctrl)
10384
{
10385
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "BitOpMem", 0, 16);
10386
/************************************ opc id dagId #cfg #child other/pred/children */
10387
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 16, 0, 0, 0); tgt->addNode(v1); // array index for src1
10388
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 1); tgt->addNode(v2); // array index for src2
10389
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 2); tgt->addNode(v3); // array index for dest
10390
TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 0); tgt->addNode(v4); // src1 array base
10391
TR_PCISCNode *v5 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 12, 0, 0, 1); tgt->addNode(v5); // src2 array base
10392
TR_PCISCNode *v6 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 2); tgt->addNode(v6); // dest array base
10393
TR_PCISCNode *vorc1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),10, 0, 0); tgt->addNode(vorc1); // length
10394
TR_PCISCNode *iall= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 9, 0, 0); tgt->addNode(iall); // Multiply Factor
10395
TR_PCISCNode *idx0= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(idx0);
10396
TR_PCISCNode *idx1= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(idx1);
10397
TR_PCISCNode *idx2= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arrayindex, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 2); tgt->addNode(idx2);
10398
TR_PCISCNode *cmah0=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(cmah0); // array header
10399
TR_PCISCNode *cmah1=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 4, 0, 0, 1); tgt->addNode(cmah1); // array header
10400
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);
10401
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
10402
TR_PCISCNode *sn0 = createIdiomArrayAddressInLoop(tgt, ctrl, 1, ent, v6, idx0, cmah1, iall);
10403
TR_PCISCNode *n0 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, sn0, TR_indload, TR::NoType, v4, idx1, cmah0, iall);
10404
TR_PCISCNode *cv0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n0, n0); tgt->addNode(cv0); // optional
10405
TR_PCISCNode *n2 = createIdiomArrayLoadInLoop(tgt, ctrl, 1, cv0, TR_indload, TR::NoType, v5, idx2, cmah0, iall);
10406
TR_PCISCNode *cv1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n2, n2); tgt->addNode(cv1); // optional
10407
TR_PCISCNode *n4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_bitop1, TR::NoType, tgt->incNumNodes(), 1, 1, 2, cv1, cv0, cv1); tgt->addNode(n4);
10408
TR_PCISCNode *cv2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_conversion, TR::NoType, tgt->incNumNodes(), 1, 1, 1, n4, n4); tgt->addNode(cv2); // optional
10409
TR_PCISCNode *sn1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_indstore, TR::NoType, tgt->incNumNodes(), 1, 1, 2, cv2, sn0, cv2); tgt->addNode(sn1);
10410
TR_PCISCNode *n6 = createIdiomDecVarInLoop(tgt, ctrl, 1, sn1, v1, cm1);
10411
TR_PCISCNode *n7 = createIdiomDecVarInLoop(tgt, ctrl, 1, n6, v2, cm1);
10412
TR_PCISCNode *n8 = createIdiomDecVarInLoop(tgt, ctrl, 1, n7, v3, cm1);
10413
TR_PCISCNode *ncmpge= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpge, TR::NoType, tgt->incNumNodes(), 1, 2, 2, n8, v1, vorc1); tgt->addNode(ncmpge);
10414
TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
10415
10416
ncmpge->setSuccs(ent->getSucc(0), n9);
10417
10418
cv0->setIsOptionalNode();
10419
cv1->setIsOptionalNode();
10420
cv2->setIsOptionalNode();
10421
10422
tgt->setEntryNode(ent);
10423
tgt->setExitNode(n9);
10424
tgt->setImportantNodes(n0, n2, sn1, n4, n6, n7, n8, iall);
10425
tgt->setNumDagIds(17);
10426
tgt->createInternalData(1);
10427
10428
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
10429
tgt->setTransformer(CISCTransform2BitOpMem);
10430
tgt->setAspects(isub|mul|sameTypeLoadStore|bitop1, existAccess, existAccess);
10431
tgt->setNoAspects(call|bndchk, 0, 0);
10432
tgt->setMinCounts(1, 2, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
10433
tgt->setInhibitBeforeVersioning();
10434
tgt->setHotness(warm, false);
10435
return tgt;
10436
}
10437
10438
10439
10440
//////////////////////////////////////////////////////////////////////////
10441
//////////////////////////////////////////////////////////////////////////
10442
//////////////////////////////////////////////////////////////////////////
10443
// Counts number of digits (Not count the character '-' (minus))
10444
//
10445
// e.g. do { count ++; } while((l /= 10) != 0);
10446
//
10447
// numDigit = countdigit10(int i, void *work)
10448
// numDigit = countdigit10(long i, void *work)
10449
//
10450
// Use of work area depends on each platform. (e.g. 16 bytes for zSeries)
10451
// The work area for some platforms may be NULL.
10452
static const int64_t digit10Table[] =
10453
{
10454
-10L, // 0
10455
-100L, // 1
10456
-1000L, // 2
10457
-10000L, // 3
10458
-100000L, // 4
10459
-1000000L, // 5
10460
-10000000L, // 6
10461
-100000000L, // 7
10462
-1000000000L // 8 (32-bit)
10463
#ifdef TR_TARGET_64BIT
10464
,-10000000000L, // 9
10465
-100000000000L, // 10
10466
-1000000000000L, // 11
10467
-10000000000000L, // 12
10468
-100000000000000L, // 13
10469
-1000000000000000L, // 14
10470
-10000000000000000L, // 15
10471
-100000000000000000L,// 16
10472
-1000000000000000000L// 17 (64-bit)
10473
#endif
10474
};
10475
10476
#if 0
10477
struct ppcDigit10TableEnt
10478
{
10479
int32_t digits;
10480
uint32_t limit; // 10^digits-1
10481
uint64_t limitLong; // 10^digits-1
10482
};
10483
10484
// For CountDecimalDigitInt, use ppcDigit10Table[32..64]
10485
static const struct ppcDigit10TableEnt ppcDigit10Table[64 + 1] =
10486
{
10487
//digits limit limitLong zeros32 zeros64 min max limit limitLong incr
10488
//---------------------------------------------------------------------------------------------------------------------------
10489
{19, 0u, 9999999999999999999llu}, // 0 0 [0x8000000000000000,0xffffffffffffffff] 0x00000000 0x8ac7230489e7ffff *
10490
{19, 0u, 9999999999999999999llu}, // 0 1 [0x4000000000000000,0x7fffffffffffffff] 0x00000000 0x8ac7230489e7ffff
10491
{19, 0u, 9999999999999999999llu}, // 0 2 [0x2000000000000000,0x3fffffffffffffff] 0x00000000 0x8ac7230489e7ffff
10492
{19, 0u, 9999999999999999999llu}, // 0 3 [0x1000000000000000,0x1fffffffffffffff] 0x00000000 0x8ac7230489e7ffff
10493
{18, 0u, 999999999999999999llu}, // 0 4 [0x0800000000000000,0x0fffffffffffffff] 0x00000000 0x0de0b6b3a763ffff *
10494
{18, 0u, 999999999999999999llu}, // 0 5 [0x0400000000000000,0x07ffffffffffffff] 0x00000000 0x0de0b6b3a763ffff
10495
{18, 0u, 999999999999999999llu}, // 0 6 [0x0200000000000000,0x03ffffffffffffff] 0x00000000 0x0de0b6b3a763ffff
10496
{17, 0u, 99999999999999999llu}, // 0 7 [0x0100000000000000,0x01ffffffffffffff] 0x00000000 0x016345785d89ffff *
10497
{17, 0u, 99999999999999999llu}, // 0 8 [0x0080000000000000,0x00ffffffffffffff] 0x00000000 0x016345785d89ffff
10498
{17, 0u, 99999999999999999llu}, // 0 9 [0x0040000000000000,0x007fffffffffffff] 0x00000000 0x016345785d89ffff
10499
{16, 0u, 9999999999999999llu}, // 0 10 [0x0020000000000000,0x003fffffffffffff] 0x00000000 0x002386f26fc0ffff *
10500
{16, 0u, 9999999999999999llu}, // 0 11 [0x0010000000000000,0x001fffffffffffff] 0x00000000 0x002386f26fc0ffff
10501
{16, 0u, 9999999999999999llu}, // 0 12 [0x0008000000000000,0x000fffffffffffff] 0x00000000 0x002386f26fc0ffff
10502
{16, 0u, 9999999999999999llu}, // 0 13 [0x0004000000000000,0x0007ffffffffffff] 0x00000000 0x002386f26fc0ffff
10503
{15, 0u, 999999999999999llu}, // 0 14 [0x0002000000000000,0x0003ffffffffffff] 0x00000000 0x00038d7ea4c67fff *
10504
{15, 0u, 999999999999999llu}, // 0 15 [0x0001000000000000,0x0001ffffffffffff] 0x00000000 0x00038d7ea4c67fff
10505
{15, 0u, 999999999999999llu}, // 0 16 [0x0000800000000000,0x0000ffffffffffff] 0x00000000 0x00038d7ea4c67fff
10506
{14, 0u, 99999999999999llu}, // 0 17 [0x0000400000000000,0x00007fffffffffff] 0x00000000 0x00005af3107a3fff *
10507
{14, 0u, 99999999999999llu}, // 0 18 [0x0000200000000000,0x00003fffffffffff] 0x00000000 0x00005af3107a3fff
10508
{14, 0u, 99999999999999llu}, // 0 19 [0x0000100000000000,0x00001fffffffffff] 0x00000000 0x00005af3107a3fff
10509
{13, 0u, 9999999999999llu}, // 0 20 [0x0000080000000000,0x00000fffffffffff] 0x00000000 0x000009184e729fff *
10510
{13, 0u, 9999999999999llu}, // 0 21 [0x0000040000000000,0x000007ffffffffff] 0x00000000 0x000009184e729fff
10511
{13, 0u, 9999999999999llu}, // 0 22 [0x0000020000000000,0x000003ffffffffff] 0x00000000 0x000009184e729fff
10512
{13, 0u, 9999999999999llu}, // 0 23 [0x0000010000000000,0x000001ffffffffff] 0x00000000 0x000009184e729fff
10513
{12, 0u, 999999999999llu}, // 0 24 [0x0000008000000000,0x000000ffffffffff] 0x00000000 0x000000e8d4a50fff *
10514
{12, 0u, 999999999999llu}, // 0 25 [0x0000004000000000,0x0000007fffffffff] 0x00000000 0x000000e8d4a50fff
10515
{12, 0u, 999999999999llu}, // 0 26 [0x0000002000000000,0x0000003fffffffff] 0x00000000 0x000000e8d4a50fff
10516
{11, 0u, 99999999999llu}, // 0 27 [0x0000001000000000,0x0000001fffffffff] 0x00000000 0x000000174876e7ff *
10517
{11, 0u, 99999999999llu}, // 0 28 [0x0000000800000000,0x0000000fffffffff] 0x00000000 0x000000174876e7ff
10518
{11, 0u, 99999999999llu}, // 0 29 [0x0000000400000000,0x00000007ffffffff] 0x00000000 0x000000174876e7ff
10519
{10, 0u, 9999999999llu}, // 0 30 [0x0000000200000000,0x00000003ffffffff] 0x00000000 0x00000002540be3ff *
10520
{10, 0u, 9999999999llu}, // 0 31 [0x0000000100000000,0x00000001ffffffff] 0x00000000 0x00000002540be3ff
10521
{10, 4294967295u, 9999999999llu}, // 0 32 [0x0000000080000000,0x00000000ffffffff] 0xffffffff 0x00000002540be3ff
10522
{10, 4294967295u, 9999999999llu}, // 1 33 [0x0000000040000000,0x000000007fffffff] 0xffffffff 0x00000002540be3ff
10523
{ 9, 999999999u, 999999999llu}, // 2 34 [0x0000000020000000,0x000000003fffffff] 0x3b9ac9ff 0x000000003b9ac9ff *
10524
{ 9, 999999999u, 999999999llu}, // 3 35 [0x0000000010000000,0x000000001fffffff] 0x3b9ac9ff 0x000000003b9ac9ff
10525
{ 9, 999999999u, 999999999llu}, // 4 36 [0x0000000008000000,0x000000000fffffff] 0x3b9ac9ff 0x000000003b9ac9ff
10526
{ 8, 99999999u, 99999999llu}, // 5 37 [0x0000000004000000,0x0000000007ffffff] 0x05f5e0ff 0x0000000005f5e0ff *
10527
{ 8, 99999999u, 99999999llu}, // 6 38 [0x0000000002000000,0x0000000003ffffff] 0x05f5e0ff 0x0000000005f5e0ff
10528
{ 8, 99999999u, 99999999llu}, // 7 39 [0x0000000001000000,0x0000000001ffffff] 0x05f5e0ff 0x0000000005f5e0ff
10529
{ 7, 9999999u, 9999999llu}, // 8 40 [0x0000000000800000,0x0000000000ffffff] 0x0098967f 0x000000000098967f *
10530
{ 7, 9999999u, 9999999llu}, // 9 41 [0x0000000000400000,0x00000000007fffff] 0x0098967f 0x000000000098967f
10531
{ 7, 9999999u, 9999999llu}, // 10 42 [0x0000000000200000,0x00000000003fffff] 0x0098967f 0x000000000098967f
10532
{ 7, 9999999u, 9999999llu}, // 11 43 [0x0000000000100000,0x00000000001fffff] 0x0098967f 0x000000000098967f
10533
{ 6, 999999u, 999999llu}, // 12 44 [0x0000000000080000,0x00000000000fffff] 0x000f423f 0x00000000000f423f *
10534
{ 6, 999999u, 999999llu}, // 13 45 [0x0000000000040000,0x000000000007ffff] 0x000f423f 0x00000000000f423f
10535
{ 6, 999999u, 999999llu}, // 14 46 [0x0000000000020000,0x000000000003ffff] 0x000f423f 0x00000000000f423f
10536
{ 5, 99999u, 99999llu}, // 15 47 [0x0000000000010000,0x000000000001ffff] 0x0001869f 0x000000000001869f *
10537
{ 5, 99999u, 99999llu}, // 16 48 [0x0000000000008000,0x000000000000ffff] 0x0001869f 0x000000000001869f
10538
{ 5, 99999u, 99999llu}, // 17 49 [0x0000000000004000,0x0000000000007fff] 0x0001869f 0x000000000001869f
10539
{ 4, 9999u, 9999llu}, // 18 50 [0x0000000000002000,0x0000000000003fff] 0x0000270f 0x000000000000270f *
10540
{ 4, 9999u, 9999llu}, // 19 51 [0x0000000000001000,0x0000000000001fff] 0x0000270f 0x000000000000270f
10541
{ 4, 9999u, 9999llu}, // 20 52 [0x0000000000000800,0x0000000000000fff] 0x0000270f 0x000000000000270f
10542
{ 4, 9999u, 9999llu}, // 21 53 [0x0000000000000400,0x00000000000007ff] 0x0000270f 0x000000000000270f
10543
{ 3, 999u, 999llu}, // 22 54 [0x0000000000000200,0x00000000000003ff] 0x000003e7 0x00000000000003e7 *
10544
{ 3, 999u, 999llu}, // 23 55 [0x0000000000000100,0x00000000000001ff] 0x000003e7 0x00000000000003e7
10545
{ 3, 999u, 999llu}, // 24 56 [0x0000000000000080,0x00000000000000ff] 0x000003e7 0x00000000000003e7
10546
{ 2, 99u, 99llu}, // 25 57 [0x0000000000000040,0x000000000000007f] 0x00000063 0x0000000000000063 *
10547
{ 2, 99u, 99llu}, // 26 58 [0x0000000000000020,0x000000000000003f] 0x00000063 0x0000000000000063
10548
{ 2, 99u, 99llu}, // 27 59 [0x0000000000000010,0x000000000000001f] 0x00000063 0x0000000000000063
10549
{ 1, 9u, 9llu}, // 28 60 [0x0000000000000008,0x000000000000000f] 0x00000009 0x0000000000000009 *
10550
{ 1, 9u, 9llu}, // 29 61 [0x0000000000000004,0x0000000000000007] 0x00000009 0x0000000000000009
10551
{ 1, 9u, 9llu}, // 30 62 [0x0000000000000002,0x0000000000000003] 0x00000009 0x0000000000000009
10552
{ 1, 9u, 9llu}, // 31 63 [0x0000000000000001,0x0000000000000001] 0x00000009 0x0000000000000009
10553
{ 1, 9u, 9llu}, // 32 64 [0x0000000000000000,0xffffffffffffffff] 0x00000009 0x0000000000000009 *
10554
};
10555
#endif
10556
10557
static TR::SymbolReference *
10558
getSymrefDigit10(TR::Compilation *comp, TR::Node *trNode)
10559
{
10560
if (comp->target().cpu.isZ())
10561
{
10562
return comp->getSymRefTab()->createKnownStaticDataSymbolRef((void *)digit10Table, TR::Address);
10563
}
10564
10565
return NULL;
10566
}
10567
10568
static TR::Node *
10569
createNodeLoadDigit10Table(TR::Compilation *comp, TR::Node *trNode)
10570
{
10571
TR_ASSERT(trNode->getDataType() == TR::Int32 || trNode->getDataType() == TR::Int64, "Unexpected datatype for trNode for CountDigits10.");
10572
TR::SymbolReference *symRef = getSymrefDigit10(comp, trNode);
10573
return symRef ? TR::Node::createWithSymRef(trNode, TR::loadaddr, 0, symRef) :
10574
TR::Node::create(trNode, TR::aconst, 0, 0);
10575
}
10576
10577
//*****************************************************************************************
10578
// IL code generation for counting digits
10579
// The IL node TR_countdigit10 will find the number of digits by using a binary search, which
10580
// uses the above table "digit10Table".
10581
//
10582
// Input: ImportantNode(0) - if node
10583
//*****************************************************************************************
10584
bool
10585
CISCTransform2CountDecimalDigit(TR_CISCTransformer *trans)
10586
{
10587
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
10588
const bool disptrace = DISPTRACE(trans);
10589
TR::Node *trNode;
10590
TR::TreeTop *trTreeTop;
10591
TR::Block *block;
10592
TR_CISCGraph *P = trans->getP();
10593
List<TR_CISCNode> *P2T = trans->getP2T();
10594
TR::Compilation *comp = trans->comp();
10595
bool ctrl = trans->isGenerateI2L();
10596
10597
TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");
10598
10599
TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");
10600
if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;
10601
10602
trans->findFirstNode(&trTreeTop, &trNode, &block);
10603
if (!block) return false; // cannot find
10604
10605
if (isLoopPreheaderLastBlockInMethod(comp, block))
10606
{
10607
traceMsg(comp, "Bailing CISCTransform2CountDecimalDigit due to null TT - might be a preheader in last block of method\n");
10608
return false;
10609
}
10610
10611
TR::Block *target = trans->analyzeSuccessorBlock();
10612
// Currently, it allows only a single successor.
10613
if (!target) return false;
10614
TR_CISCNode *ifcmp = trans->getP2TInLoopIfSingle(P->getImportantNode(0));
10615
TR_ASSERT(ifcmp, "error!");
10616
TR_CISCNode *constNode = ifcmp->getChild(1);
10617
if (!constNode->getIlOpCode().isLoadConst())
10618
{
10619
if (disptrace) traceMsg(comp, "%p is not isLoadConst().\n",constNode);
10620
return false;
10621
}
10622
10623
TR::Node *countVarRepNode, *inputVarRepNode;
10624
getP2TTrRepNodes(trans, &countVarRepNode, &inputVarRepNode);
10625
TR::SymbolReference * countVarSymRef = countVarRepNode->getSymbolReference();
10626
TR::SymbolReference * inputVarSymRef = inputVarRepNode->getSymbolReference();
10627
TR::Node *countVar, *inputVar;
10628
TR::Node *workNode, *digitNode;
10629
countVar = createLoad(countVarRepNode);
10630
inputVar = createLoad(inputVarRepNode);
10631
10632
TR_ASSERT(inputVar->getDataType() == TR::Int32 || inputVar->getDataType() == TR::Int64, "error");
10633
10634
10635
// The countDigitsEvaluator does not handle long (register pairs) on 31-bit.
10636
if (inputVar->getDataType() == TR::Int64 && (!comp->target().cpu.isPower() && comp->target().is32Bit()))
10637
{
10638
return false;
10639
}
10640
10641
TR::Node *versionNode = 0;
10642
int modificationResult = 0;
10643
switch(ifcmp->getOpcode())
10644
{
10645
case TR::ificmpeq:
10646
case TR::iflcmpeq:
10647
if (constNode->getOtherInfo() != 0)
10648
{
10649
if (disptrace) traceMsg(comp, "The exit-if is TR::if*cmpeq but the constant value is %d.\n",constNode->getOtherInfo());
10650
return false;
10651
}
10652
break;
10653
case TR::ificmplt:
10654
case TR::iflcmplt:
10655
if (constNode->getOtherInfo() != 10)
10656
{
10657
if (disptrace) traceMsg(comp, "The exit-if is TR::if*cmplt but the constant value is %d.\n",constNode->getOtherInfo());
10658
return false;
10659
}
10660
versionNode = TR::Node::createif((TR::ILOpCodes)ifcmp->getOpcode(), inputVar->duplicateTree(),
10661
constNode->getHeadOfTrNode()->duplicateTree());
10662
modificationResult = -1;
10663
break;
10664
default:
10665
if (disptrace) traceMsg(comp, "The exit-if %p is not as expected. We may be able to implement this case.\n",ifcmp);
10666
return false;
10667
}
10668
10669
//workNode = createNodeLoadDigit10Table(comp, trNode);
10670
workNode = createNodeLoadDigit10Table(comp, inputVarRepNode);
10671
10672
digitNode = TR::Node::create(trNode, TR::countDigits, 2);
10673
digitNode->setAndIncChild(0, inputVar);
10674
digitNode->setAndIncChild(1, workNode);
10675
if (modificationResult != 0)
10676
{
10677
digitNode = createOP2(comp, TR::isub, digitNode,
10678
TR::Node::create(digitNode, TR::iconst, 0, -modificationResult));
10679
}
10680
10681
TR::Node *top = TR::Node::createStore(countVarSymRef,
10682
createOP2(comp, TR::iadd, countVar, digitNode));
10683
10684
// Insert nodes and maintain the CFG
10685
if (versionNode)
10686
{
10687
List<TR::Node> guardList(comp->trMemory());
10688
guardList.add(versionNode);
10689
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, &guardList);
10690
}
10691
else
10692
{
10693
block = trans->modifyBlockByVersioningCheck(block, trTreeTop, (List<TR::Node>*)0);
10694
}
10695
10696
block = trans->insertBeforeNodes(block);
10697
block->append(TR::TreeTop::create(comp, top));
10698
trans->insertAfterNodes(block);
10699
10700
trans->setSuccessorEdge(block, target);
10701
return true;
10702
}
10703
10704
/****************************************************************************************
10705
Corresponding Java-like Pseudo Program
10706
int v1;
10707
long v2;
10708
while(true){
10709
v1++;
10710
v2 = v2 / 10;
10711
if (v2 == 0) break;
10712
}
10713
10714
Note 1: This idiom already supported both division and multiplication versions.
10715
****************************************************************************************/
10716
TR_PCISCGraph *
10717
makeCountDecimalDigitLongGraph(TR::Compilation *c, int32_t ctrl, bool isDiv2Mul)
10718
{
10719
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "CountDecimalDigitLong", 0, 16);
10720
TR_PCISCNode *ent, *ncmp, *v2, *cexit, *n9, *ndiv;
10721
if (isDiv2Mul)
10722
{
10723
/************************************ opc id dagId #cfg #child other/pred/children */
10724
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(v1); // count
10725
v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 1); tgt->addNode(v2); // long var
10726
cexit=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0); tgt->addNode(cexit); // all constant
10727
TR_PCISCNode *c2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 5, 0, 0, 2); tgt->addNode(c2); // iconst 2
10728
TR_PCISCNode *c63 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 4, 0, 0, 63); tgt->addNode(c63);// iconst 63 (optional)
10729
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);
10730
ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
10731
TR_PCISCNode *n1 = createIdiomDecVarInLoop(tgt, ctrl, 1, ent, v1, cm1);
10732
TR_PCISCNode *mag = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lconst , TR::Int64, tgt->incNumNodes(), 1, 1, 0, n1); tgt->addNode(mag); // lconst 7378697629483820647
10733
TR_PCISCNode *nmul= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lmulh , TR::Int64, tgt->incNumNodes(), 1, 1, 2, mag, v2, mag); tgt->addNode(nmul);
10734
TR_PCISCNode *nshr= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lshr , TR::Int64, tgt->incNumNodes(), 1, 1, 2, nmul, nmul, c2); tgt->addNode(nshr);
10735
TR_PCISCNode *ushr= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lushr , TR::Int64, tgt->incNumNodes(), 1, 1, 2, nshr, v2, c63); tgt->addNode(ushr); // optional
10736
ndiv= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ladd , TR::Int64, tgt->incNumNodes(), 1, 1, 2, ushr, nshr, ushr); tgt->addNode(ndiv); // optional
10737
c63->setIsOptionalNode();
10738
ushr->setIsOptionalNode();
10739
ushr->setSkipParentsCheck();
10740
ndiv->setIsOptionalNode();
10741
tgt->setNumDagIds(9);
10742
tgt->setAspects(isub|mul|shr);
10743
}
10744
else
10745
{
10746
/************************************ opc id dagId #cfg #child other/pred/children */
10747
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(v1); // count
10748
v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 1); tgt->addNode(v2); // long var
10749
cexit=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0); tgt->addNode(cexit); // all constant
10750
TR_PCISCNode *c10 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lconst , TR::Int64, tgt->incNumNodes(), 4, 0, 0, 10); tgt->addNode(c10);// lconst 10
10751
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);
10752
ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
10753
TR_PCISCNode *n1 = createIdiomDecVarInLoop(tgt, ctrl, 1, ent, v1, cm1);
10754
ndiv= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ldiv , TR::Int64, tgt->incNumNodes(), 1, 1, 2, n1, v2, c10); tgt->addNode(ndiv);
10755
tgt->setNumDagIds(8);
10756
tgt->setAspects(isub|division);
10757
}
10758
TR_PCISCNode *nst = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lstore , TR::Int64, tgt->incNumNodes(), 1, 1, 2, ndiv, ndiv, v2); tgt->addNode(nst);
10759
ncmp= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, nst, v2, cexit); tgt->addNode(ncmp);
10760
n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
10761
10762
ncmp->setSuccs(ent->getSucc(0), n9);
10763
10764
tgt->setEntryNode(ent);
10765
tgt->setExitNode(n9);
10766
tgt->createInternalData(1);
10767
10768
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
10769
tgt->setImportantNode(0, ncmp);
10770
tgt->setTransformer(CISCTransform2CountDecimalDigit);
10771
tgt->setInhibitAfterVersioning();
10772
tgt->setNoAspects(call|bndchk, existAccess, existAccess);
10773
tgt->setMinCounts(1, 0, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount
10774
tgt->setHotness(warm, false);
10775
return tgt;
10776
}
10777
10778
/****************************************************************************************
10779
Corresponding Java-like Pseudo Program (Division version)
10780
int v1, v2;
10781
while(true){
10782
v1++;
10783
v2 = v2 / 10;
10784
if (v2 == 0) break;
10785
}
10786
10787
Note 1: This idiom already supported both division and multiplication versions.
10788
****************************************************************************************/
10789
// Division is converted to multiply
10790
TR_PCISCGraph *
10791
makeCountDecimalDigitIntGraph(TR::Compilation *c, int32_t ctrl, bool isDiv2Mul)
10792
{
10793
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "CountDecimalDigitInt", 0, 16);
10794
TR_PCISCNode *ent, *ncmp, *v2, *cexit, *n9, *ndiv;
10795
if (isDiv2Mul)
10796
{
10797
/************************************ opc id dagId #cfg #child other/pred/children */
10798
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 9, 0, 0, 0); tgt->addNode(v1); // count
10799
v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 8, 0, 0, 1); tgt->addNode(v2); // int var
10800
cexit=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 7, 0, 0); tgt->addNode(cexit); // all constant
10801
TR_PCISCNode *c2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 6, 0, 0, 2); tgt->addNode(c2); // iconst 2
10802
TR_PCISCNode *c31 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 5, 0, 0, 31); tgt->addNode(c31); // iconst 31 (optional)
10803
TR_PCISCNode *mag = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 4, 0, 0, 1717986919); tgt->addNode(mag);// iconst 1717986919
10804
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);
10805
ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
10806
TR_PCISCNode *n1 = createIdiomDecVarInLoop(tgt, ctrl, 1, ent, v1, cm1);
10807
ndiv= createIdiomIDiv10InLoop(tgt, ctrl, true, 1, n1, v2, mag, c2, c31);
10808
tgt->setAspects(isub|mul|shr);
10809
tgt->setNumDagIds(10);
10810
}
10811
else
10812
{
10813
/************************************ opc id dagId #cfg #child other/pred/children */
10814
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 7, 0, 0, 0); tgt->addNode(v1); // count
10815
v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 1); tgt->addNode(v2); // int var
10816
cexit=new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_allconst, TR::NoType, tgt->incNumNodes(), 5, 0, 0); tgt->addNode(cexit); // iconst 0
10817
TR_PCISCNode *mag = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 4, 0, 0, 10); tgt->addNode(mag); // iconst 10
10818
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);
10819
ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
10820
TR_PCISCNode *n1 = createIdiomDecVarInLoop(tgt, ctrl, 1, ent, v1, cm1);
10821
ndiv= createIdiomIDiv10InLoop(tgt, ctrl, false, 1, n1, v2, mag, NULL, NULL);
10822
tgt->setAspects(isub|division);
10823
tgt->setNumDagIds(8);
10824
}
10825
TR_PCISCNode * nst = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore , TR::Int32, tgt->incNumNodes(), 1, 1, 2, ndiv, ndiv, v2); tgt->addNode(nst);
10826
ncmp= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ifcmpall, TR::NoType, tgt->incNumNodes(), 1, 2, 2, nst, v2, cexit); tgt->addNode(ncmp);
10827
n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
10828
10829
ncmp->setSuccs(ent->getSucc(0), n9);
10830
tgt->setEntryNode(ent);
10831
tgt->setExitNode(n9);
10832
tgt->createInternalData(1);
10833
10834
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
10835
tgt->setImportantNode(0, ncmp);
10836
tgt->setTransformer(CISCTransform2CountDecimalDigit);
10837
tgt->setInhibitAfterVersioning();
10838
tgt->setNoAspects(call|bndchk, existAccess, existAccess);
10839
tgt->setMinCounts(1, 0, 0); // minimum ifCount, indirectLoadCount, indirectStoreCount
10840
tgt->setHotness(warm, false);
10841
return tgt;
10842
}
10843
10844
10845
10846
//////////////////////////////////////////////////////////////////////////
10847
//////////////////////////////////////////////////////////////////////////
10848
//////////////////////////////////////////////////////////////////////////
10849
// Convert long to string
10850
/* Example
10851
int v2, v3;
10852
while(true) {
10853
int num = v3 / 10;
10854
int ch = v3 - num * 10;
10855
v1[v2] = (char) ('0' - ch);
10856
v2--;
10857
v3 = num;
10858
} while (v3 != 0);
10859
*/
10860
10861
static TR::SymbolReference *
10862
getSymrefLocalArray(TR::Compilation *comp, int size)
10863
{
10864
if (comp->target().cpu.isZ())
10865
{
10866
TR::SymbolReference *workSymRef = comp->getSymRefTab()->createLocalPrimArray(size, comp->getMethodSymbol(), 8); // work area for CVD(G)
10867
workSymRef->setStackAllocatedArrayAccess();
10868
return workSymRef;
10869
}
10870
return NULL;
10871
}
10872
10873
static TR::Node *
10874
createNodeLoadLocalArray(TR::Compilation *comp, TR::Node *trNode, int size)
10875
{
10876
TR::SymbolReference *symRef = getSymrefLocalArray(comp, size);
10877
return symRef ? TR::Node::createWithSymRef(trNode, TR::loadaddr, 0, symRef) :
10878
TR::Node::create(trNode, TR::aconst, 0, 0);
10879
}
10880
10881
//*****************************************************************************************
10882
// IL code generation for converting integer to string (using CVD and UNPKU)
10883
// Input: ImportantNode(0) - istore node for index (V2)
10884
// ImportantNode(1) - (i/l)store node for input value (V3)
10885
// ImportantNode(2) - array store node
10886
// ImportantNode(3) - null check (optional)
10887
//*****************************************************************************************
10888
bool
10889
CISCTransform2LongToStringDigit(TR_CISCTransformer *trans)
10890
{
10891
TR_ASSERT(trans->getOffsetOperand1() == 0 && trans->getOffsetOperand2() == 0, "Not implemented yet");
10892
TR::Node *trNode;
10893
TR::TreeTop *trTreeTop;
10894
TR::Block *block;
10895
TR_CISCGraph *P = trans->getP();
10896
List<TR_CISCNode> *P2T = trans->getP2T();
10897
TR::Compilation *comp = trans->comp();
10898
bool ctrl = trans->isGenerateI2L();
10899
10900
TR_ASSERT(trans->getP()->getVersionLength() == 0, "Versioning code is not implemented yet");
10901
10902
TR_ASSERT(trans->isEmptyAfterInsertionIdiomList(0) && trans->isEmptyAfterInsertionIdiomList(1), "Not implemented yet!");
10903
if (!trans->isEmptyAfterInsertionIdiomList(0) || !trans->isEmptyAfterInsertionIdiomList(1)) return false;
10904
10905
trans->findFirstNode(&trTreeTop, &trNode, &block);
10906
if (!block) return false; // cannot find
10907
10908
if (isLoopPreheaderLastBlockInMethod(comp, block))
10909
{
10910
traceMsg(comp, "Bailing CISCTransform2LongToStringDigit due to null TT - might be a preheader in last block of method\n");
10911
return false;
10912
}
10913
10914
TR::Block *target = trans->analyzeSuccessorBlock();
10915
// Currently, it allows only a single successor.
10916
if (!target) return false;
10917
TR_CISCNode *arrayStoreCISC = trans->getP2TInLoopIfSingle(P->getImportantNode(2));
10918
if (!arrayStoreCISC) return false;
10919
TR::Node *arrayStoreAddress = arrayStoreCISC->getHeadOfTrNode()->getChild(0)->duplicateTree();
10920
10921
TR::Node *baseVarRepNode, *countVarRepNode, *inputVarRepNode;
10922
getP2TTrRepNodes(trans, &baseVarRepNode, &countVarRepNode, &inputVarRepNode);
10923
TR::SymbolReference * countVarSymRef = countVarRepNode->getSymbolReference();
10924
TR::SymbolReference * inputVarSymRef = inputVarRepNode->getSymbolReference();
10925
TR::Node *countVar, *inputVar;
10926
countVar = createLoad(countVarRepNode);
10927
inputVar = createLoad(inputVarRepNode);
10928
TR::Node *replaceParent = NULL;
10929
int childNum = -1;
10930
if (!trans->searchNodeInTrees(arrayStoreAddress, countVar, &replaceParent, &childNum))
10931
return false;
10932
10933
TR_ASSERT(inputVar->getDataType() == TR::Int32 || inputVar->getDataType() == TR::Int64, "error");
10934
10935
//
10936
// obtain a CISCNode of each store
10937
TR_CISCNode *storeV2 = trans->getP2TRepInLoop(P->getImportantNode(0));
10938
TR_CISCNode *storeV3 = trans->getP2TRepInLoop(P->getImportantNode(1));
10939
TR_ASSERT(storeV2 != NULL && storeV3 != NULL, "error");
10940
TR::Node *nullchk = 0;
10941
if (P->getImportantNode(3))
10942
{
10943
TR_CISCNode *nullchkCISC = trans->getP2TInLoopIfSingle(P->getImportantNode(3));
10944
if (nullchkCISC) nullchk = nullchkCISC->getHeadOfTrNode()->duplicateTree();
10945
}
10946
10947
//
10948
// checking a set of all uses for each index
10949
TR_ASSERT(storeV2->getDagID() == storeV3->getDagID(), "error");
10950
#if 1
10951
TR::Node *digit = TR::Node::create(TR::countDigits, 2,
10952
inputVar,
10953
createNodeLoadDigit10Table(comp, inputVarRepNode));
10954
#else
10955
TR::Node *digit = TR::Node::create(TR::countDigits, 2,
10956
inputVar,
10957
createNodeLoadDigit10Table(comp, trNode));
10958
#endif
10959
TR::Node *resultV2 = createOP2(comp, TR::isub, countVar, digit);
10960
replaceParent->setAndIncChild(childNum, createOP2(comp, TR::isub, resultV2,
10961
TR::Node::create(trNode, TR::iconst, 0, -1)));
10962
TR::Node *storeResultV3 = 0;
10963
if (!storeV3->checkDagIdInChains())
10964
{
10965
TR::DataType dataType = storeV3->getDataType();
10966
TR::Node * constNode;
10967
if (dataType == TR::Int32)
10968
{
10969
constNode = TR::Node::create(trNode, TR::iconst, 0, 0);
10970
}
10971
else
10972
{
10973
constNode = TR::Node::create(trNode, TR::lconst, 0, 0);
10974
constNode->setLongInt(0);
10975
}
10976
storeResultV3 = TR::Node::createStore(inputVarSymRef, constNode);
10977
}
10978
10979
TR::Node *l2s = TR::Node::create(trNode, TR::long2String, 4);
10980
l2s->setSymbolReference(comp->getSymRefTab()->findOrCreatelong2StringSymbol());
10981
l2s->setAndIncChild(0, inputVar);
10982
l2s->setAndIncChild(1, arrayStoreAddress);
10983
l2s->setAndIncChild(2, digit);
10984
l2s->setAndIncChild(3, createNodeLoadLocalArray(comp, trNode, 16));
10985
TR::Node *storeResultV2 = TR::Node::createStore(countVarSymRef, resultV2);
10986
10987
// Insert nodes and maintain the CFG
10988
TR::TreeTop *last;
10989
last = trans->removeAllNodes(trTreeTop, block->getExit());
10990
last->join(block->getExit());
10991
block = trans->insertBeforeNodes(block);
10992
if (nullchk) block->append(TR::TreeTop::create(comp, nullchk));
10993
block->append(TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, l2s)));
10994
block->append(TR::TreeTop::create(comp, storeResultV2));
10995
if (storeResultV3) block->append(TR::TreeTop::create(comp, storeResultV3));
10996
10997
trans->insertAfterNodes(block);
10998
10999
trans->setSuccessorEdge(block, target);
11000
return true;
11001
}
11002
11003
11004
TR_PCISCGraph *
11005
makeLongToStringGraph(TR::Compilation *c, int32_t ctrl)
11006
{
11007
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "LongToString", 0, 16);
11008
/************************************ opc id dagId #cfg #child other/pred/children */
11009
TR_PCISCNode *v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 16, 0, 0, 0); tgt->addNode(v1); // array base
11010
TR_PCISCNode *v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(v2); // count
11011
TR_PCISCNode *v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 1); tgt->addNode(v3); // long var
11012
TR_PCISCNode *v4 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 2); tgt->addNode(v4); // stored value
11013
TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(), 12, 0, 0); tgt->addNode(vorc); // length
11014
TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 11, 0, 0, 0); tgt->addNode(cmah); // array header
11015
TR_PCISCNode *cl0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lconst , TR::Int64, tgt->incNumNodes(), 10, 0, 0, 0); tgt->addNode(cl0);// lconst 0
11016
TR_PCISCNode *cl10= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lconst , TR::Int64, tgt->incNumNodes(), 9, 0, 0, 10); tgt->addNode(cl10);//lconst 10
11017
TR_PCISCNode *c0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 8, 0, 0, 0); tgt->addNode(c0); // iconst 0
11018
TR_PCISCNode *c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(), 7, 2); // element size
11019
TR_PCISCNode *c9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 6, 0, 0, 9); tgt->addNode(c9); // iconst 9
11020
TR_PCISCNode *cm87= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 5, 0, 0, -87); tgt->addNode(cm87);//iconst -87
11021
TR_PCISCNode *cm48= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 4, 0, 0, -48); tgt->addNode(cm48);//iconst -48
11022
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);
11023
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
11024
TR_PCISCNode *nrem= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lrem , TR::Int64, tgt->incNumNodes(), 1, 1, 2, ent, v3, cl10); tgt->addNode(nrem);
11025
TR_PCISCNode *nl2i= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::l2i , TR::Int32, tgt->incNumNodes(), 1, 1, 1, nrem, nrem); tgt->addNode(nl2i);
11026
TR_PCISCNode *nneg= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub , TR::Int32, tgt->incNumNodes(), 1, 1, 2, nl2i, c0, nl2i); tgt->addNode(nneg);
11027
TR_PCISCNode *nst4= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore , TR::Int32, tgt->incNumNodes(), 1, 1, 2, nneg, nneg, v4); tgt->addNode(nst4);
11028
TR_PCISCNode *ifge= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpgt, TR::NoType, tgt->incNumNodes(), 1, 2, 2, nst4, v4, c9); tgt->addNode(ifge);
11029
TR_PCISCNode *ad48= createIdiomDecVarInLoop(tgt, ctrl, 1, ifge, v4, cm48);
11030
TR_PCISCNode *ad87= createIdiomDecVarInLoop(tgt, ctrl, 1, ad48, v4, cm87);
11031
TR_PCISCNode *adm1= createIdiomIncVarInLoop(tgt, ctrl, 1, ad87, v2, cm1);
11032
TR_PCISCNode *nck = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::NULLCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 1, adm1, v1); tgt->addNode(nck); // optional
11033
TR_PCISCNode *bck = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nck, vorc, v2); tgt->addNode(bck);
11034
TR_PCISCNode *ncst= createIdiomCharArrayStoreInLoop(tgt, ctrl, 1, bck, v1, v2, cmah, c2, v4);
11035
TR_PCISCNode *ndiv= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ldiv , TR::Int64, tgt->incNumNodes(), 1, 1, 2, ncst, v3, cl10); tgt->addNode(ndiv);
11036
TR_PCISCNode *nst3= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::lstore , TR::Int64, tgt->incNumNodes(), 1, 1, 2, ndiv, ndiv, v3); tgt->addNode(nst3);
11037
TR_PCISCNode *ifeq= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iflcmpeq, TR::NoType, tgt->incNumNodes(), 1, 2, 2, nst3, v3, cl0); tgt->addNode(ifeq);
11038
TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
11039
11040
ifge->setSucc(1, ad87);
11041
ad48->setSucc(0, adm1);
11042
ifeq->setSuccs(ent->getSucc(0), n9);
11043
nck->setIsOptionalNode();
11044
11045
tgt->setEntryNode(ent);
11046
tgt->setExitNode(n9);
11047
tgt->setImportantNodes(adm1, nst3, ncst, nck);
11048
tgt->setNumDagIds(17);
11049
tgt->createInternalData(1);
11050
11051
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
11052
tgt->setTransformer(CISCTransform2LongToStringDigit);
11053
tgt->setAspects(isub|iadd|bndchk|division|reminder, 0, ILTypeProp::Size_2);
11054
tgt->setNoAspects(call, 0, 0);
11055
tgt->setMinCounts(2, 0, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
11056
tgt->setHotness(warm, false);
11057
tgt->setInhibitAfterVersioning();
11058
return tgt;
11059
}
11060
11061
11062
TR_PCISCGraph *
11063
makeIntToStringGraph(TR::Compilation *c, int32_t ctrl, bool isDiv2Mul)
11064
{
11065
TR_PCISCGraph *tgt = new (PERSISTENT_NEW) TR_PCISCGraph(c->trMemory(), "IntToString", 0, 16);
11066
TR_PCISCNode *ci2, *c2, *c10, *c31, *mag, *v1, *v2, *v3;
11067
uint32_t otherMask;
11068
/******************************************************** opc id dagId #cfg #child other/pred/children */
11069
v1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_arraybase, TR::NoType, tgt->incNumNodes(), 15, 0, 0, 0); tgt->addNode(v1); // array base
11070
v2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 14, 0, 0, 0); tgt->addNode(v2); // count
11071
v3 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_variable, TR::NoType, tgt->incNumNodes(), 13, 0, 0, 1); tgt->addNode(v3); // long var
11072
c2 = createIdiomArrayRelatedConst(tgt, ctrl, tgt->incNumNodes(),12, 2); // element size
11073
c10 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 11, 0, 0, 10); tgt->addNode(c10);// iconst 10
11074
if (isDiv2Mul)
11075
{
11076
c31 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 10, 0, 0, 31); tgt->addNode(c31);// iconst 31
11077
if (ctrl & CISCUtilCtl_64Bit)
11078
{
11079
ci2 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 9, 0, 0, 2); tgt->addNode(ci2);// iconst 2
11080
}
11081
else
11082
ci2 = c2;
11083
mag = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 8, 0, 0, 1717986919); tgt->addNode(mag);// iconst 1717986919
11084
otherMask = shr;
11085
}
11086
else
11087
{
11088
ci2 = c31 = NULL;
11089
mag = c10;
11090
otherMask = division;
11091
}
11092
TR_PCISCNode *vorc= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_quasiConst2, TR::NoType, tgt->incNumNodes(),7, 0, 0); tgt->addNode(vorc); // length
11093
TR_PCISCNode *cmah= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_ahconst, TR::NoType, tgt->incNumNodes(), 6, 0, 0, 0); tgt->addNode(cmah); // array header
11094
TR_PCISCNode *c0 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 5, 0, 0, 0); tgt->addNode(c0); // iconst 0
11095
TR_PCISCNode *c48 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst , TR::Int32, tgt->incNumNodes(), 4, 0, 0, 48); tgt->addNode(c48);//iconst 48
11096
TR_PCISCNode *cm1 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::iconst, TR::Int32, tgt->incNumNodes(), 3, 0, 0, -1); tgt->addNode(cm1);
11097
TR_PCISCNode *ent = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_entrynode, TR::NoType, tgt->incNumNodes(), 2, 1, 0); tgt->addNode(ent);
11098
TR_PCISCNode *nck = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::NULLCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 1, ent, v1); tgt->addNode(nck); // optional
11099
TR_PCISCNode *bck = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::BNDCHK, TR::NoType, tgt->incNumNodes(), 1, 1, 2, nck, vorc, v2); tgt->addNode(bck);
11100
TR_PCISCNode *addr= createIdiomArrayAddressInLoop(tgt, ctrl, 1, bck, v1, v2, cmah, c2);
11101
TR_PCISCNode *ndiv= createIdiomIDiv10InLoop(tgt, ctrl, isDiv2Mul, 1, addr, v3, mag, ci2, c31);
11102
TR_PCISCNode *nmul= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::imul, TR::Int32, tgt->incNumNodes(), 1, 1, 2, ndiv, ndiv, c10); tgt->addNode(nmul);
11103
TR_PCISCNode *nrem= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nmul, v3, nmul); tgt->addNode(nrem);
11104
TR_PCISCNode *nch = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::isub, TR::Int32, tgt->incNumNodes(), 1, 1, 2, nrem, c48, nrem); tgt->addNode(nch);
11105
TR_PCISCNode *ncst= createIdiomCharArrayStoreBodyInLoop(tgt, ctrl, 1, nch, addr, nch);
11106
TR_PCISCNode *nst3= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::istore , TR::Int32, tgt->incNumNodes(), 1, 1, 2, ncst, ndiv, v3); tgt->addNode(nst3);
11107
TR_PCISCNode *adm1= createIdiomIncVarInLoop(tgt, ctrl, 1, nst3, v2, cm1);
11108
TR_PCISCNode *ifeq= new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR::ificmpeq, TR::NoType, tgt->incNumNodes(), 1, 2, 2, adm1, v3, c0); tgt->addNode(ifeq);
11109
TR_PCISCNode *n9 = new (PERSISTENT_NEW) TR_PCISCNode(c->trMemory(), TR_exitnode, TR::NoType, tgt->incNumNodes(), 0, 0, 0); tgt->addNode(n9);
11110
11111
ifeq->setSuccs(ent->getSucc(0), n9);
11112
nck->setIsOptionalNode();
11113
11114
tgt->setEntryNode(ent);
11115
tgt->setExitNode(n9);
11116
tgt->setImportantNodes(adm1, nst3, ncst, nck);
11117
tgt->setNumDagIds(16);
11118
tgt->createInternalData(1);
11119
11120
tgt->setSpecialNodeTransformer(defaultSpecialNodeTransformer);
11121
tgt->setTransformer(CISCTransform2LongToStringDigit);
11122
tgt->setAspects(isub|iadd|bndchk|mul|otherMask, 0, ILTypeProp::Size_2);
11123
tgt->setNoAspects(call, 0, 0);
11124
tgt->setMinCounts(1, 0, 1); // minimum ifCount, indirectLoadCount, indirectStoreCount
11125
tgt->setHotness(warm, false);
11126
tgt->setInhibitAfterVersioning();
11127
return tgt;
11128
}
11129
11130