Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/z/codegen/J9CodeGenerator.cpp
6004 views
1
/*******************************************************************************
2
* Copyright (c) 2000, 2022 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
23
//On zOS XLC linker can't handle files with same name at link time
24
//This workaround with pragma is needed. What this does is essentially
25
//give a different name to the codesection (csect) for this file. So it
26
//doesn't conflict with another file with same name.
27
28
#pragma csect(CODE,"TRJ9ZCGBase#C")
29
#pragma csect(STATIC,"TRJ9ZCGBase#S")
30
#pragma csect(TEST,"TRJ9ZCGBase#T")
31
32
#include <algorithm>
33
#include "env/CompilerEnv.hpp"
34
#include "codegen/AheadOfTimeCompile.hpp"
35
#include "codegen/CodeGenerator.hpp"
36
#include "codegen/CodeGenerator_inlines.hpp"
37
#include "codegen/ConstantDataSnippet.hpp"
38
#include "codegen/Linkage_inlines.hpp"
39
#include "codegen/S390CHelperLinkage.hpp"
40
#include "codegen/S390PrivateLinkage.hpp"
41
#include "env/VMJ9.h"
42
#include "env/jittypes.h"
43
#include "il/Node.hpp"
44
#include "il/Node_inlines.hpp"
45
#include "z/codegen/J9SystemLinkageLinux.hpp"
46
#include "z/codegen/J9SystemLinkagezOS.hpp"
47
#include "z/codegen/S390GenerateInstructions.hpp"
48
#include "z/codegen/S390Recompilation.hpp"
49
#include "z/codegen/S390Register.hpp"
50
#include "z/codegen/ReduceSynchronizedFieldLoad.hpp"
51
52
#define OPT_DETAILS "O^O CODE GENERATION: "
53
54
extern void TEMPORARY_initJ9S390TreeEvaluatorTable(TR::CodeGenerator *cg);
55
56
//Forward declarations
57
bool nodeMightClobberAccumulatorBeforeUse(TR::Node *);
58
59
J9::Z::CodeGenerator::CodeGenerator(TR::Compilation *comp) :
60
J9::CodeGenerator(comp)
61
{
62
/**
63
* Do not add CodeGenerator initialization logic here.
64
* Use the \c initialize() method instead.
65
*/
66
}
67
68
void
69
J9::Z::CodeGenerator::initialize()
70
{
71
self()->J9::CodeGenerator::initialize();
72
73
TR::CodeGenerator *cg = self();
74
TR::Compilation *comp = cg->comp();
75
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
76
77
cg->setAheadOfTimeCompile(new (cg->trHeapMemory()) TR::AheadOfTimeCompile(cg));
78
79
// Java specific runtime helpers
80
cg->symRefTab()->createSystemRuntimeHelper(TR_S390jitMathHelperConvertLongToFloat);
81
cg->symRefTab()->createSystemRuntimeHelper(TR_S390induceRecompilation);
82
83
// Enable Direct to JNI calls unless we're mimicking interpreter stack frames.
84
if (!comp->getOption(TR_FullSpeedDebug))
85
cg->setSupportsDirectJNICalls();
86
87
if (cg->getSupportsVectorRegisters() && !comp->getOption(TR_DisableSIMDStringCaseConv))
88
cg->setSupportsInlineStringCaseConversion();
89
90
if (cg->getSupportsVectorRegisters() && !comp->getOption(TR_DisableFastStringIndexOf) &&
91
!TR::Compiler->om.canGenerateArraylets())
92
{
93
cg->setSupportsInlineStringIndexOf();
94
}
95
96
if (cg->getSupportsVectorRegisters() && !comp->getOption(TR_DisableSIMDStringHashCode) &&
97
!TR::Compiler->om.canGenerateArraylets())
98
{
99
cg->setSupportsInlineStringHashCode();
100
}
101
102
if (cg->getSupportsVectorRegisters() && comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z14))
103
{
104
cg->setSupportsInlineStringLatin1Inflate();
105
}
106
107
// See comment in `handleHardwareReadBarrier` implementation as to why we cannot support CTX under CS
108
if (cg->getSupportsTM() && TR::Compiler->om.readBarrierType() == gc_modron_readbar_none)
109
{
110
cg->setSupportsInlineConcurrentLinkedQueue();
111
}
112
113
// Similar to AOT, array translate instructions are not supported for remote compiles because instructions such as
114
// TRTO allocate lookup tables in persistent memory that cannot be relocated.
115
if (comp->isOutOfProcessCompilation())
116
{
117
cg->resetSupportsArrayTranslateTRxx();
118
}
119
120
static char *disableInlineEncodeASCII = feGetEnv("TR_disableInlineEncodeASCII");
121
if (comp->fej9()->isStringCompressionEnabledVM() && cg->getSupportsVectorRegisters() && !TR::Compiler->om.canGenerateArraylets() && !disableInlineEncodeASCII)
122
{
123
cg->setSupportsInlineEncodeASCII();
124
}
125
126
// Let's turn this on. There is more work needed in the opt
127
// to catch the case where the BNDSCHK is inserted after
128
//
129
cg->setDisableNullCheckOfArrayLength();
130
131
// Enable Range splitter by default.
132
if (!comp->getOption(TR_DisableLiveRangeSplitter))
133
comp->setOption(TR_EnableRangeSplittingGRA);
134
135
// Disable SS Optimization that generates better SS instruction memory references.
136
// Issue in Java because of symref in AOT case. See RTC 31738 for details.
137
comp->setOption(TR_DisableSSOpts);
138
139
// Invoke Class.newInstanceImpl() from the JIT directly
140
cg->setSupportsNewInstanceImplOpt();
141
142
// Still being set in the S390CodeGenerator constructor, as zLinux sTR requires this.
143
//cg->setSupportsJavaFloatSemantics();
144
145
// Enable this only on Java, as there is a possibility that optimizations driven by this
146
// flag will generate calls to helper routines.
147
#if defined(J9VM_OPT_JITSERVER)
148
// The TRT instruction generated by the arrayTranslateAndTestEvaluator is not relocatable. Thus, to
149
// attain functional correctness we don't enable this support for remote compilations.
150
if (!comp->isOutOfProcessCompilation())
151
#endif /* defined(J9VM_OPT_JITSERVER) */
152
{
153
cg->setSupportsArrayTranslateAndTest();
154
}
155
156
// Enable compaction of local stack slots. i.e. variables with non-overlapping live ranges
157
// can share the same slot.
158
cg->setSupportsCompactedLocals();
159
160
// Enable Implicit NULL Checks on zLinux. On zOS, page zero is readable, so we need explicit checks.
161
cg->setSupportsImplicitNullChecks(comp->target().isLinux() && cg->getHasResumableTrapHandler() && !comp->getOption(TR_DisableZImplicitNullChecks));
162
163
// Enable Monitor cache lookup for monent/monexit
164
static char *disableMonitorCacheLookup = feGetEnv("TR_disableMonitorCacheLookup");
165
if (!disableMonitorCacheLookup)
166
comp->setOption(TR_EnableMonitorCacheLookup);
167
168
// Enable high-resolution timer
169
cg->setSupportsCurrentTimeMaxPrecision();
170
171
// Defect 109299 : PMR 14649,999,760 / CritSit AV8426
172
// Turn off use of hardware clock on zLinux for calculating currentTimeMillis() as user can adjust time on their system.
173
//
174
// Hardware clock, however, can be used for calculating System.nanoTime() on zLinux
175
// since java/lang/System.nanoTime() returns an arbitrary number, rather than the current time
176
// (see the java/lang/System.nanoTime() spec for details).
177
if (comp->target().isZOS())
178
cg->setSupportsMaxPrecisionMilliTime();
179
180
// Support BigDecimal Long Lookaside versioning optimizations.
181
if (!comp->getOption(TR_DisableBDLLVersioning))
182
cg->setSupportsBigDecimalLongLookasideVersioning();
183
184
// RI support
185
if (comp->getOption(TR_HWProfilerDisableRIOverPrivateLinkage)
186
&& comp->getPersistentInfo()->isRuntimeInstrumentationEnabled()
187
&& comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_ZEC12)
188
&& comp->target().cpu.supportsFeature(OMR_FEATURE_S390_RI))
189
{
190
cg->setSupportsRuntimeInstrumentation();
191
cg->setEnableRIOverPrivateLinkage(false); // Disable RI over private linkage, since RION/OFF will be controlled over J2I / I2J.
192
}
193
194
/*
195
* "Statically" initialize the FE-specific tree evaluator functions.
196
* This code only needs to execute once per JIT lifetime.
197
*/
198
static bool initTreeEvaluatorTable = false;
199
if (!initTreeEvaluatorTable)
200
{
201
TEMPORARY_initJ9S390TreeEvaluatorTable(cg);
202
initTreeEvaluatorTable = true;
203
}
204
205
cg->getS390Linkage()->initS390RealRegisterLinkage();
206
207
if (comp->fej9()->hasFixedFrameC_CallingConvention())
208
{
209
cg->setHasFixedFrameC_CallingConvention();
210
}
211
212
static bool disableIntegerToChars = (feGetEnv("TR_DisableIntegerToChars") != NULL);
213
if (cg->getSupportsVectorRegisters() && !TR::Compiler->om.canGenerateArraylets() && !disableIntegerToChars && comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_ZNEXT))
214
{
215
cg->setSupportsIntegerToChars();
216
cg->setSupportsIntegerStringSize();
217
}
218
219
cg->setIgnoreDecimalOverflowException(false);
220
}
221
222
bool
223
J9::Z::CodeGenerator::callUsesHelperImplementation(TR::Symbol *sym)
224
{
225
return sym && (!self()->comp()->getOption(TR_DisableInliningOfNatives) &&
226
sym->castToMethodSymbol()->getMandatoryRecognizedMethod() == TR::java_lang_invoke_ComputedCalls_dispatchJ9Method);
227
}
228
229
TR::Linkage *
230
J9::Z::CodeGenerator::createLinkage(TR_LinkageConventions lc)
231
{
232
TR::Linkage * linkage;
233
switch (lc)
234
{
235
case TR_CHelper:
236
linkage = new (self()->trHeapMemory()) J9::Z::CHelperLinkage(self());
237
break;
238
case TR_Helper:
239
linkage = new (self()->trHeapMemory()) J9::Z::HelperLinkage(self());
240
break;
241
242
case TR_Private:
243
linkage = new (self()->trHeapMemory()) J9::Z::PrivateLinkage(self());
244
break;
245
246
case TR_J9JNILinkage:
247
linkage = new (self()->trHeapMemory()) J9::Z::JNILinkage(self());
248
break;
249
250
case TR_System:
251
if (self()->comp()->target().isLinux())
252
linkage = new (self()->trHeapMemory()) J9::Z::zLinuxSystemLinkage(self());
253
else
254
linkage = new (self()->trHeapMemory()) J9::Z::zOSSystemLinkage(self());
255
break;
256
257
default :
258
TR_ASSERT(0, "\nTestarossa error: Illegal linkage convention %d\n", lc);
259
}
260
261
self()->setLinkage(lc, linkage);
262
return linkage;
263
}
264
265
bool
266
J9::Z::CodeGenerator::doInlineAllocate(TR::Node *node)
267
{
268
TR_OpaqueClassBlock * classInfo = 0;
269
if (self()->comp()->suppressAllocationInlining()) return false;
270
TR::ILOpCodes opCode = node->getOpCodeValue();
271
272
if ((opCode!=TR::anewarray) && (opCode!=TR::newarray) && (opCode!=TR::New))
273
return false;
274
275
276
int32_t objectSize = self()->comp()->canAllocateInline(node, classInfo);
277
if (objectSize < 0) return false;
278
279
return true;
280
}
281
282
bool
283
J9::Z::CodeGenerator::constLoadNeedsLiteralFromPool(TR::Node *node)
284
{
285
if (node->isClassUnloadingConst() || node->getType().isIntegral() || node->getType().isAddress())
286
{
287
return false;
288
}
289
else
290
{
291
return true; // Floats/Doubles require literal pool
292
}
293
}
294
295
TR::Recompilation *
296
J9::Z::CodeGenerator::allocateRecompilationInfo()
297
{
298
TR::Compilation *comp = self()->comp();
299
if(comp->getJittedMethodSymbol()->isJNI() &&
300
!comp->getOption(TR_FullSpeedDebug))
301
{
302
traceMsg(comp, "\n====== THIS METHOD IS VIRTUAL JNI THUNK. IT WILL NOT BE RECOMPILED====\n");
303
return NULL;
304
}
305
else
306
{
307
return TR_S390Recompilation::allocate(comp);
308
}
309
}
310
311
void
312
J9::Z::CodeGenerator::lowerTreesPreChildrenVisit(TR::Node* parent, TR::TreeTop * treeTop, vcount_t visitCount)
313
{
314
J9::CodeGenerator::lowerTreesPreChildrenVisit(parent, treeTop, visitCount);
315
316
if (parent->getOpCodeValue() == TR::BCDCHK)
317
{
318
// sometimes TR::pdModifyPrecision will be inserted
319
// just under BCDCHK, we have to remove it.
320
TR::Node * chkChild = parent->getFirstChild();
321
if (chkChild->getOpCodeValue() == TR::pdModifyPrecision)
322
{
323
TR::Node * pdopNode = chkChild->getFirstChild();
324
pdopNode->incReferenceCount();
325
chkChild->recursivelyDecReferenceCount();
326
parent->setChild(0, pdopNode);
327
}
328
}
329
}
330
331
void
332
J9::Z::CodeGenerator::lowerTreesPostChildrenVisit(TR::Node * parent, TR::TreeTop * treeTop, vcount_t visitCount)
333
{
334
J9::CodeGenerator::lowerTreesPostChildrenVisit(parent, treeTop, visitCount);
335
336
// J9, Z
337
//
338
if (self()->codegenSupportsLoadlessBNDCheck() &&
339
parent->getOpCode().isBndCheck() &&
340
(parent->getFirstChild()->getOpCode().isLoadVar() ||
341
parent->getSecondChild()->getOpCode().isLoadVar()))
342
{
343
TR::Node * memChild = parent->getFirstChild()->getOpCode().isLoadVar()?parent->getFirstChild():parent->getSecondChild();
344
345
if (memChild->getVisitCount() != self()->comp()->getVisitCount() && memChild->getReferenceCount() > 1 && performTransformation(self()->comp(), "%sRematerializing memref child %p from BNDCheck node\n", OPT_DETAILS, memChild))
346
{
347
memChild->decReferenceCount();
348
TR::Node *newNode = TR::Node::copy(memChild);
349
newNode->setReferenceCount(1);
350
parent->setChild(parent->findChildIndex(memChild), newNode);
351
}
352
}
353
}
354
355
356
void
357
J9::Z::CodeGenerator::lowerTreeIfNeeded(
358
TR::Node *node,
359
int32_t childNumberOfNode,
360
TR::Node *parent,
361
TR::TreeTop *tt)
362
{
363
TR::Compilation *comp = self()->comp();
364
J9::CodeGenerator::lowerTreeIfNeeded(node, childNumberOfNode, parent, tt);
365
366
if (self()->yankIndexScalingOp() &&
367
(node->getOpCodeValue() == TR::aiadd || node->getOpCodeValue() == TR::aladd ) )
368
{
369
// 390 sees a lot of scaling ops getting stuck between BNDSchk and array read/write
370
// causing heavy AGIs. This transformation pulls the scaling opp up a tree to unpin it.
371
//
372
373
// Looking for trees that look like this:
374
375
// BNDCHK / BNDCHKwithSpineCHK
376
// iiload
377
// ==>aRegLoad
378
// iiload
379
// ==>aRegLoad
380
381
// iaload
382
// aiadd <===== You are here
383
// ==>aRegLoad
384
// isub
385
// imul <=== Find this node and anchor it up above the BNDCHK
386
// ==>iiload
387
// iconst 4
388
// iconst -16
389
390
TR::TreeTop* prevPrevTT = NULL;
391
TR::TreeTop* prevTT = tt->getPrevTreeTop();
392
393
while ( prevTT &&
394
(prevTT->getNode()->getOpCodeValue() == TR::iRegStore ||
395
prevTT->getNode()->getOpCodeValue() == TR::aRegStore ||
396
prevTT->getNode()->getOpCodeValue() == TR::asynccheck ||
397
((prevTT->getNode()->getOpCodeValue() == TR::treetop) &&
398
(!prevTT->getNode()->getFirstChild()->getOpCode().hasSymbolReference() ||
399
prevTT->getNode()->getFirstChild()->getOpCode().isLoad()))))
400
{
401
prevTT = prevTT->getPrevTreeTop();
402
}
403
404
// Pull scaling op up above the arrayStoreCheck as performing the scaling op right before the store is a horrible AGI.
405
if (tt->getPrevTreeTop() &&
406
tt->getNode()->getOpCodeValue() == TR::ArrayStoreCHK &&
407
node->getSecondChild()->getNumChildren() >= 2)
408
{
409
// The general tree that we are matching is:
410
// aladd <===== You are here
411
// ==>iaload
412
// lsub
413
// lmul <===== Find this node and anchor it up above the ArrayStoreCHK
414
// i2l
415
// ==>iRegLoad
416
//
417
// However, with internal pointers, there may or may not be an isub/lsub for arrayheader. If there is no
418
// arrayheader isub/lsub, we will see a tree as such:
419
//
420
// aladd (internal ptr) <===== You are here
421
// ==>iaload
422
// lshl <===== Find this node and anchor it up above the ArrayStoreCHK
423
// i2l
424
// ==>iRegLoad
425
//
426
// As such, we will check the second child of the aiadd/aladd, and see if it's the mul/shift operation.
427
// If not, we'll get the subsequent first child.
428
TR::Node* mulNode = node->getSecondChild();
429
430
if (mulNode->getOpCodeValue() != TR::imul && mulNode->getOpCodeValue() != TR::ishl &&
431
mulNode->getOpCodeValue() != TR::lmul && mulNode->getOpCodeValue() != TR::lshl)
432
mulNode = node->getSecondChild()->getFirstChild();
433
434
if ((mulNode->getOpCodeValue() == TR::imul || mulNode->getOpCodeValue() == TR::ishl || mulNode->getOpCodeValue() == TR::lmul || mulNode->getOpCodeValue() == TR::lshl) &&
435
(performTransformation(comp, "%sYank mul above ArrayStoreChk [%p] \n", OPT_DETAILS, node)))
436
{
437
TR::TreeTop * ttNew = TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, mulNode));
438
tt->getPrevTreeTop()->insertAfter(ttNew);
439
}
440
}
441
else if (prevTT &&
442
(prevPrevTT = prevTT->getPrevTreeTop()) &&
443
prevTT->getNode()->getOpCode().isBndCheck() &&
444
node->getSecondChild()->getNumChildren() >= 2 )
445
{
446
// The general tree that we are matching is:
447
// aladd <===== You are here
448
// ==>iaload
449
// lsub
450
// lmul <===== Find this node and anchor it up above the BNDCHK
451
// i2l
452
// ==>iRegLoad
453
//
454
// However, with internal pointers, there may or may not be an isub/lsub for arrayheader. If there is no
455
// arrayheader isub/lsub, we will see a tree as such:
456
//
457
// aladd (internal ptr) <===== You are here
458
// ==>iaload
459
// lshl <===== Find this node and anchor it up above the BNDCHK
460
// i2l
461
// ==>iRegLoad
462
//
463
// As such, we will check the second child of the aiadd/aladd, and see if it's the mul/shift operation.
464
// If not, we'll get the subsequent first child.
465
TR::Node* mulNode = node->getSecondChild();
466
467
if (mulNode->getOpCodeValue() != TR::imul && mulNode->getOpCodeValue() != TR::ishl &&
468
mulNode->getOpCodeValue() != TR::lmul && mulNode->getOpCodeValue() != TR::lshl)
469
mulNode = node->getSecondChild()->getFirstChild();
470
471
TR::Node *prevNode = prevTT->getNode();
472
TR::Node *bndchkIndex = prevNode->getOpCode().isSpineCheck() ?
473
prevNode->getChild(3) : // TR::BNDCHKwithSpineCHK
474
prevNode->getSecondChild(); // TR::BNDCHK
475
476
bool doIt = false;
477
478
doIt |= ((mulNode->getOpCodeValue() == TR::imul || mulNode->getOpCodeValue() == TR::ishl) &&
479
(mulNode->getFirstChild() == bndchkIndex)); // Make sure the BNDCHK is for this ind var
480
481
doIt |= ((mulNode->getOpCodeValue() == TR::lmul || mulNode->getOpCodeValue() == TR::lshl) &&
482
(mulNode->getFirstChild()->getOpCodeValue() == TR::i2l && // 64-bit memrefs have an extra iu2l
483
// Make sure the BNDCHKxxx is for this ind var
484
(mulNode->getFirstChild() == bndchkIndex ||
485
mulNode->getFirstChild()->getFirstChild() == bndchkIndex ||
486
(bndchkIndex->getNumChildren() >= 1 &&
487
mulNode->getFirstChild() == bndchkIndex->getFirstChild())) ));
488
489
if (doIt && performTransformation(comp, "%sYank mul [%p] \n", OPT_DETAILS, node))
490
{
491
TR::TreeTop * ttNew = TR::TreeTop::create(comp, TR::Node::create(TR::treetop, 1, mulNode));
492
prevPrevTT->insertAfter(ttNew);
493
}
494
}
495
496
}
497
498
// J9, Z
499
//
500
// On zseries, convert aconst to iaload of aconst 0 and move it to its own new treetop
501
if (comp->target().cpu.isZ() && !self()->profiledPointersRequireRelocation() &&
502
node->getOpCodeValue() == TR::aconst && node->isClassUnloadingConst())
503
{
504
TR::Node * dummyNode = TR::Node::create(node, TR::aconst, 0);
505
TR::Node *constCopy;
506
TR::SymbolReference *intShadow;
507
508
dumpOptDetails(comp, "transforming unloadable aconst %p \n", node);
509
510
constCopy =TR::Node::copy(node);
511
intShadow = self()->symRefTab()->findOrCreateGenericIntShadowSymbolReference((intptr_t)constCopy);
512
intShadow->setLiteralPoolAddress();
513
514
TR::Node::recreate(node, TR::aloadi);
515
node->setNumChildren(1);
516
node->setSymbolReference(intShadow);
517
node->setAndIncChild(0,dummyNode);
518
519
520
tt->getPrevTreeTop()->insertAfter(TR::TreeTop::create(comp,TR::Node::create(TR::treetop, 1, node)));
521
node->decReferenceCount();
522
parent->setAndIncChild(childNumberOfNode, node);
523
}
524
525
// J9, Z
526
//
527
if (comp->target().cpu.isZ() && node->getOpCodeValue() == TR::aloadi && node->isUnneededIALoad())
528
{
529
ListIterator<TR_Pair<TR::Node, int32_t> > listIter(&_ialoadUnneeded);
530
TR_Pair<TR::Node, int32_t> *ptr;
531
uintptr_t temp;
532
int32_t updatedTemp;
533
for (ptr = listIter.getFirst(); ptr; ptr = listIter.getNext())
534
{
535
temp = (uintptr_t)ptr->getValue();
536
updatedTemp = (int32_t) temp;
537
if (ptr->getKey() == node && temp != node->getReferenceCount())
538
{
539
node->setUnneededIALoad(false);
540
break;
541
}
542
}
543
}
544
545
}
546
547
TR::S390EyeCatcherDataSnippet *
548
J9::Z::CodeGenerator::CreateEyeCatcher(TR::Node * node)
549
{
550
// 88448: Cold Eyecatcher is used for padding of endPC so that Return Address for exception snippets will never equal the endPC.
551
TR::S390EyeCatcherDataSnippet * eyeCatcherSnippet = new (self()->trHeapMemory()) TR::S390EyeCatcherDataSnippet(self(),node);
552
_snippetDataList.push_front(eyeCatcherSnippet);
553
return eyeCatcherSnippet;
554
}
555
556
/**
557
* Input reg can be NULL (when called for a store node or other type that does not return a register)
558
*/
559
void
560
J9::Z::CodeGenerator::widenUnicodeSignLeadingSeparate(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, int32_t bytesToClear, TR::MemoryReference *targetMR)
561
{
562
TR_ASSERT(node->getType().isAnyUnicode(),"widenUnicodeSignLeadingSeparate is only valid for unicode types (type = %s)\n",node->getDataType().toString());
563
TR_ASSERT( targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"widenUnicodeSignLeadingSeparate is only valid for aligned memory references\n");
564
if (bytesToClear > 0)
565
{
566
TR_StorageReference *storageRef = reg ? reg->getStorageReference() : NULL;
567
if (self()->traceBCDCodeGen())
568
traceMsg(self()->comp(),"\twidenUnicodeSignLeadingSeparate: node %p, reg %s targetStorageRef #%d, endByte %d, bytesToClear %d\n",
569
node,reg?self()->getDebug()->getName(reg):"0",storageRef?storageRef->getReferenceNumber():0,endByte,bytesToClear);
570
targetMR = reuseS390LeftAlignedMemoryReference(targetMR, node, storageRef, self(), endByte);
571
if (self()->traceBCDCodeGen())
572
traceMsg(self()->comp(),"\tgen MVC of size 2 to move unicode leading separate sign code left by %d bytes to the widened left aligned position\n",bytesToClear);
573
TR::MemoryReference *originalSignCodeMR = generateS390LeftAlignedMemoryReference(*targetMR, node, bytesToClear, self(), endByte);
574
int32_t mvcSize = 2;
575
generateSS1Instruction(self(), TR::InstOpCode::MVC, node,
576
mvcSize-1,
577
targetMR,
578
originalSignCodeMR);
579
580
self()->genZeroLeftMostUnicodeBytes(node, reg, endByte - TR::DataType::getUnicodeSignSize(), bytesToClear, targetMR);
581
}
582
}
583
584
#define TR_MAX_UNPKU_SIZE 64
585
/**
586
* Input reg can be NULL (when called for a store node or other type that does not return a register)
587
*/
588
void
589
J9::Z::CodeGenerator::genZeroLeftMostUnicodeBytes(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, int32_t bytesToClear, TR::MemoryReference *targetMR)
590
{
591
TR_ASSERT(node->getType().isAnyUnicode(),"genZeroLeftMostUnicodeDigits is only valid for unicode types (type = %d)\n",node->getDataType().toString());
592
TR_ASSERT(targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"genZeroLeftMostUnicodeBytes is only valid for aligned memory references\n");
593
594
bool evaluatedPaddingAnchor = false;
595
TR::Node *paddingAnchor = NULL;
596
if (bytesToClear > 0)
597
{
598
TR_StorageReference *storageRef = reg ? reg->getStorageReference() : NULL;
599
if (self()->traceBCDCodeGen())
600
traceMsg(self()->comp(),"\tgenZeroLeftMostUnicodeBytes: node %p, reg %s targetStorageRef #%d, endByte %d, bytesToClear %d\n",
601
node,reg?self()->getDebug()->getName(reg):"0",storageRef?storageRef->getReferenceNumber():0,endByte,bytesToClear);
602
603
// zero 16 bytes (the fixed UNPKU source size) followed by a left aligned UNPKU of bytesToClear length to get 0030 repeated as the left most digits.
604
// less efficient than the MVC literal copy above but doesn't require any extra storage as it is in-place
605
int32_t tempSize = self()->getPackedToUnicodeFixedSourceSize();
606
TR_StorageReference *tempStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(tempSize, self()->comp());
607
tempStorageReference->setTemporaryReferenceCount(1);
608
TR::MemoryReference *tempMR = generateS390LeftAlignedMemoryReference(node, tempStorageReference, self(), tempSize, true, true); // enforceSSLimits=true, isNewTemp=true
609
610
TR_ASSERT(bytesToClear <= TR_MAX_UNPKU_SIZE,"expecting bytesToClear (%d) <= TR_MAX_UNPKU_SIZE (%d)\n",bytesToClear,TR_MAX_UNPKU_SIZE);
611
self()->genZeroLeftMostPackedDigits(node, NULL, tempSize, tempSize*2, tempMR);
612
613
int32_t unpkuCount = ((bytesToClear-1)/TR_MAX_UNPKU_SIZE)+1;
614
for (int32_t i = 0; i < unpkuCount; i++)
615
{
616
int32_t unpkuSize = std::min(bytesToClear,TR_MAX_UNPKU_SIZE);
617
int32_t destOffset = i*TR_MAX_UNPKU_SIZE;
618
if (self()->traceBCDCodeGen())
619
traceMsg(self()->comp(),"\tgen %d of %d UNPKUs with dest size of %d destOffset of %d and fixed source size %d\n",i+1,unpkuCount,unpkuSize,destOffset,tempSize);
620
generateSS1Instruction(self(), TR::InstOpCode::UNPKU, node,
621
unpkuSize-1,
622
generateS390LeftAlignedMemoryReference(*targetMR, node, destOffset, self(), endByte),
623
generateS390LeftAlignedMemoryReference(*tempMR, node, 0, self(), tempSize));
624
bytesToClear-=unpkuSize;
625
}
626
tempStorageReference->decrementTemporaryReferenceCount();
627
}
628
if (!evaluatedPaddingAnchor)
629
self()->processUnusedNodeDuringEvaluation(paddingAnchor);
630
}
631
632
/**
633
* Input reg can be NULL (when called for a store node or other type that does not return a register)
634
*/
635
void
636
J9::Z::CodeGenerator::widenZonedSignLeadingSeparate(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, int32_t bytesToClear, TR::MemoryReference *targetMR)
637
{
638
TR_ASSERT(node->getDataType() == TR::ZonedDecimalSignLeadingSeparate,
639
"widenZonedSignLeadingSeparate is only valid for TR::ZonedDecimalSignLeadingSeparate (type=%s)\n",node->getDataType().toString());
640
TR_ASSERT( targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"widenZonedSignLeadingSeparate is only valid for aligned memory references\n");
641
if (bytesToClear > 0)
642
{
643
TR_StorageReference *storageRef = reg ? reg->getStorageReference() : NULL;
644
if (self()->traceBCDCodeGen())
645
traceMsg(self()->comp(),"\twidenZonedSignLeadingSeparate: node %p, reg %s targetStorageRef #%d, endByte %d, bytesToClear %d\n",
646
node,reg?self()->getDebug()->getName(reg):"0",storageRef?storageRef->getReferenceNumber():0,endByte,bytesToClear);
647
targetMR = reuseS390LeftAlignedMemoryReference(targetMR, node, storageRef, self(), endByte);
648
if (self()->traceBCDCodeGen())
649
traceMsg(self()->comp(),"\tgen MVC of size 1 to move zoned leading separate sign code left by %d bytes to the widened left aligned position\n",bytesToClear);
650
TR::MemoryReference *originalSignCodeMR = generateS390LeftAlignedMemoryReference(*targetMR, node, bytesToClear, self(), endByte);
651
int32_t mvcSize = 1;
652
generateSS1Instruction(self(), TR::InstOpCode::MVC, node,
653
mvcSize-1,
654
targetMR,
655
originalSignCodeMR);
656
self()->genZeroLeftMostZonedBytes(node, reg, endByte - TR::DataType::getZonedSignSize(), bytesToClear, targetMR);
657
}
658
}
659
660
/**
661
* Input reg can be NULL (when called for a store node or other type that does not return a register)
662
*/
663
void
664
J9::Z::CodeGenerator::widenZonedSignLeadingEmbedded(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, int32_t bytesToClear, TR::MemoryReference *targetMR)
665
{
666
TR_ASSERT(node->getDataType() == TR::ZonedDecimalSignLeadingEmbedded,
667
"widenZonedSignLeadingEmbedded is only valid for TR::ZonedDecimalSignLeadingEmbedded (type=%s)\n",node->getDataType().toString());
668
TR_ASSERT( targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"widenZonedSignLeadingEmbedded is only valid for aligned memory references\n");
669
if (bytesToClear > 0)
670
{
671
TR_StorageReference *storageRef = reg ? reg->getStorageReference() : NULL;
672
if (self()->traceBCDCodeGen())
673
traceMsg(self()->comp(),"\twidenZonedSignLeadingEmbedded: node %p, reg %s targetStorageRef #%d, endByte %d, bytesToClear %d\n",
674
node,reg?self()->getDebug()->getName(reg):"0",storageRef?storageRef->getReferenceNumber():0,endByte,bytesToClear);
675
self()->genZeroLeftMostZonedBytes(node, reg, endByte, bytesToClear, targetMR);
676
targetMR = reuseS390LeftAlignedMemoryReference(targetMR, node, storageRef, self(), endByte);
677
if (self()->traceBCDCodeGen())
678
traceMsg(self()->comp(),"\tgen MVZ of size 1 to move leading sign code left by %d bytes to the widened left aligned position\n",bytesToClear);
679
TR::MemoryReference *originalSignCodeMR = generateS390LeftAlignedMemoryReference(*targetMR, node, bytesToClear, self(), endByte);
680
int32_t mvzSize = 1;
681
generateSS1Instruction(self(), TR::InstOpCode::MVZ, node,
682
mvzSize-1,
683
targetMR,
684
generateS390LeftAlignedMemoryReference(*originalSignCodeMR, node, 0, self(), originalSignCodeMR->getLeftMostByte()));
685
{
686
if (self()->traceBCDCodeGen()) traceMsg(self()->comp(),"\tgenerate OI 0xF0 to force original leading sign code at offset=bytesToClear=%d\n",bytesToClear);
687
generateSIInstruction(self(), TR::InstOpCode::OI, node, originalSignCodeMR, TR::DataType::getZonedCode());
688
}
689
}
690
}
691
692
void
693
J9::Z::CodeGenerator::genZeroLeftMostZonedBytes(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, int32_t bytesToClear, TR::MemoryReference *targetMR)
694
{
695
TR_ASSERT(node->getType().isAnyZoned(),"genZeroLeftMostZonedBytes is only valid for zoned types (type = %s)\n",node->getDataType().toString());
696
TR_ASSERT(targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"genZeroLeftMostZonedBytes is only valid for aligned memory references\n");
697
TR::Node *paddingAnchor = NULL;
698
bool evaluatedPaddingAnchor = false;
699
if (bytesToClear > 0)
700
{
701
TR_StorageReference *storageRef = reg ? reg->getStorageReference() : NULL;
702
if (self()->traceBCDCodeGen())
703
traceMsg(self()->comp(),"\tgenZeroLeftMostZoneBytes: (%s) %p, reg %s targetStorageRef #%d, endByte %d, bytesToClear %d\n",
704
node->getOpCode().getName(),node,reg?self()->getDebug()->getName(reg):"0",storageRef?storageRef->getReferenceNumber():0,endByte,bytesToClear);
705
706
{
707
targetMR = reuseS390LeftAlignedMemoryReference(targetMR, node, storageRef, self(), endByte);
708
709
generateSIInstruction(self(), TR::InstOpCode::MVI, node, targetMR, TR::DataType::getZonedZeroCode());
710
if (bytesToClear > 2)
711
{
712
int32_t overlapMVCSize = bytesToClear-1;
713
generateSS1Instruction(self(), TR::InstOpCode::MVC, node,
714
overlapMVCSize-1,
715
generateS390LeftAlignedMemoryReference(*targetMR, node, 1, self(), targetMR->getLeftMostByte()),
716
generateS390LeftAlignedMemoryReference(*targetMR, node, 0, self(), targetMR->getLeftMostByte()));
717
}
718
}
719
if (reg)
720
reg->addRangeOfZeroBytes(endByte-bytesToClear, endByte);
721
}
722
723
if (!evaluatedPaddingAnchor)
724
self()->processUnusedNodeDuringEvaluation(paddingAnchor);
725
}
726
727
bool
728
J9::Z::CodeGenerator::alwaysGeneratesAKnownCleanSign(TR::Node *node)
729
{
730
switch (node->getOpCodeValue())
731
{
732
case TR::ud2pd:
733
return true;
734
default:
735
return false;
736
}
737
return false;
738
}
739
740
bool
741
J9::Z::CodeGenerator::alwaysGeneratesAKnownPositiveCleanSign(TR::Node *node)
742
{
743
switch (node->getOpCodeValue())
744
{
745
case TR::ud2pd:
746
return true;
747
default:
748
return false;
749
}
750
return false;
751
}
752
753
TR_RawBCDSignCode
754
J9::Z::CodeGenerator::alwaysGeneratedSign(TR::Node *node)
755
{
756
switch (node->getOpCodeValue())
757
{
758
case TR::ud2pd:
759
return raw_bcd_sign_0xc;
760
default:
761
return raw_bcd_sign_unknown;
762
}
763
return raw_bcd_sign_unknown;
764
}
765
766
TR_OpaquePseudoRegister *
767
J9::Z::CodeGenerator::allocateOpaquePseudoRegister(TR::DataType dt)
768
{
769
TR_OpaquePseudoRegister *temp = new (self()->trHeapMemory()) TR_OpaquePseudoRegister(dt, self()->comp());
770
self()->addAllocatedRegister(temp);
771
if (self()->getDebug())
772
self()->getDebug()->newRegister(temp);
773
return temp;
774
}
775
776
777
TR_OpaquePseudoRegister *
778
J9::Z::CodeGenerator::allocateOpaquePseudoRegister(TR_OpaquePseudoRegister *reg)
779
{
780
TR_OpaquePseudoRegister *temp = new (self()->trHeapMemory()) TR_OpaquePseudoRegister(reg, self()->comp());
781
self()->addAllocatedRegister(temp);
782
if (self()->getDebug())
783
self()->getDebug()->newRegister(temp);
784
return temp;
785
}
786
787
788
TR_PseudoRegister *
789
J9::Z::CodeGenerator::allocatePseudoRegister(TR_PseudoRegister *reg)
790
{
791
TR_PseudoRegister *temp = new (self()->trHeapMemory()) TR_PseudoRegister(reg, self()->comp());
792
self()->addAllocatedRegister(temp);
793
if (self()->getDebug())
794
self()->getDebug()->newRegister(temp);
795
return temp;
796
}
797
798
/**
799
* OPR in this context is OpaquePseudoRegister
800
*/
801
TR_OpaquePseudoRegister *
802
J9::Z::CodeGenerator::evaluateOPRNode(TR::Node * node)
803
{
804
bool isBCD = node->getType().isBCD();
805
bool isAggr = node->getType().isAggregate();
806
TR_ASSERT(isBCD || isAggr,"evaluateOPRNode node %s (%p) must be BCD/Aggr type\n",node->getOpCode().getName(),node);
807
TR::Register *reg = isBCD ? self()->evaluateBCDNode(node) : self()->evaluate(node);
808
TR_OpaquePseudoRegister *opaquePseudoReg = reg->getOpaquePseudoRegister();
809
TR_ASSERT(opaquePseudoReg,"reg must be some type of opaquePseudoRegister on node %s (%p)\n",node->getOpCode().getName(),node);
810
return opaquePseudoReg;
811
}
812
813
void
814
J9::Z::CodeGenerator::freeUnusedTemporaryBasedHint(TR::Node *node)
815
{
816
TR_StorageReference *hint = node->getOpCode().canHaveStorageReferenceHint() ? node->getStorageReferenceHint() : NULL;
817
if (hint && hint->isTemporaryBased() && hint->getTemporaryReferenceCount() == 0)
818
{
819
self()->pendingFreeVariableSizeSymRef(hint->getTemporarySymbolReference());
820
if (self()->traceBCDCodeGen())
821
traceMsg(self()->comp(),"\tfreeing (pending) unused hint symRef #%d (%s) on %s (%p)\n",
822
hint->getReferenceNumber(),
823
self()->getDebug()->getName(hint->getTemporarySymbol()),
824
node->getOpCode().getName(),
825
node);
826
}
827
}
828
829
bool
830
J9::Z::CodeGenerator::storageReferencesMatch(TR_StorageReference *ref1, TR_StorageReference *ref2)
831
{
832
bool refMatch = false;
833
if (ref1->isNodeBased() && (ref1->getNode()->getOpCode().isLoadVar() || ref1->getNode()->getOpCode().isStore()) &&
834
ref2->isNodeBased() && (ref2->getNode()->getOpCode().isLoadVar() || ref2->getNode()->getOpCode().isStore()) &&
835
self()->loadOrStoreAddressesMatch(ref1->getNode(), ref2->getNode()))
836
{
837
if (ref1->getNode()->getSize() != ref2->getNode()->getSize())
838
{
839
if (self()->traceBCDCodeGen())
840
traceMsg(self()->comp(),"\tnode based storageRefs match = false : ref1 (#%d) and ref2 (#%d) addresses match but node1 %s (%p) size=%d != node2 %s (%p) size=%d\n",
841
ref1->getReferenceNumber(),ref2->getReferenceNumber(),
842
ref1->getNode()->getOpCode().getName(),ref1->getNode(),ref1->getNode()->getSize(),
843
ref2->getNode()->getOpCode().getName(),ref2->getNode(),ref2->getNode()->getSize());
844
refMatch = false;
845
}
846
else
847
{
848
if (self()->traceBCDCodeGen())
849
traceMsg(self()->comp(),"\tnode based storageRefs match = true : ref1 (#%d) %s (%p) == ref2 (#%d) %s (%p)\n",
850
ref1->getReferenceNumber(),ref1->getNode()->getOpCode().getName(),ref1->getNode(),
851
ref2->getReferenceNumber(),ref2->getNode()->getOpCode().getName(),ref2->getNode());
852
refMatch = true;
853
}
854
}
855
else if (ref1->isTemporaryBased() &&
856
ref2->isTemporaryBased() &&
857
ref1->getSymbolReference() == ref2->getSymbolReference())
858
{
859
if (self()->traceBCDCodeGen())
860
traceMsg(self()->comp(),"\ttemp based storageRefs match = true : ref1 (#%d) == ref2 (#%d) match\n",ref1->getReferenceNumber(),ref2->getReferenceNumber());
861
refMatch = true;
862
}
863
return refMatch;
864
}
865
866
void
867
J9::Z::CodeGenerator::processUnusedStorageRef(TR_StorageReference *ref)
868
{
869
if (ref == NULL || !ref->isNodeBased())
870
return;
871
872
if (ref->getNodeReferenceCount() == 0)
873
return;
874
875
TR::Node *refNode = ref->getNode();
876
TR::Node *addrChild = NULL;
877
if (refNode->getOpCode().isIndirect() ||
878
(ref->isConstantNodeBased() && refNode->getNumChildren() > 0))
879
{
880
addrChild = refNode->getFirstChild();
881
}
882
883
if (self()->traceBCDCodeGen())
884
traceMsg(self()->comp(),"\tprocessUnusedStorageRef ref->node %s (%p) with addrChild %s (%p)\n",
885
refNode->getOpCode().getName(),refNode,addrChild?addrChild->getOpCode().getName():"NULL",addrChild);
886
887
if (addrChild)
888
{
889
TR_ASSERT(addrChild->getType().isAddress(),"addrChild %s (%p) not an address type\n",addrChild->getOpCode().getName(),addrChild);
890
if (ref->getNodeReferenceCount() == 1)
891
{
892
if (self()->traceBCDCodeGen())
893
traceMsg(self()->comp(),"\t\tstorageRef->nodeRefCount %d == 1 so processUnusedAddressNode %s (%p) (refCount %d)\n",
894
ref->getNodeReferenceCount(),addrChild->getOpCode().getName(),addrChild,addrChild->getReferenceCount());
895
self()->processUnusedNodeDuringEvaluation(addrChild);
896
}
897
else if (self()->traceBCDCodeGen())
898
{
899
traceMsg(self()->comp(),"\t\tstorageRef->nodeRefCount %d > 1 so do not decRefCounts of unusedAddressNode %s (%p) (refCount %d)\n",
900
ref->getNodeReferenceCount(),addrChild->getOpCode().getName(),addrChild,addrChild->getReferenceCount());
901
}
902
}
903
904
if (self()->traceBCDCodeGen())
905
traceMsg(self()->comp(),"\tdec storageRef->nodeRefCount %d->%d\n",
906
ref->getNodeReferenceCount(),ref->getNodeReferenceCount()-1);
907
908
ref->decrementNodeReferenceCount();
909
}
910
911
TR_PseudoRegister *
912
J9::Z::CodeGenerator::allocatePseudoRegister(TR::DataType dt)
913
{
914
TR_PseudoRegister *temp = new (self()->trHeapMemory()) TR_PseudoRegister(dt, self()->comp());
915
self()->addAllocatedRegister(temp);
916
if (self()->getDebug())
917
self()->getDebug()->newRegister(temp);
918
return temp;
919
}
920
921
#define TR_ACCUMULATOR_NODE_BUDGET 50
922
923
/// canUseSingleStoreAsAnAccumulator does not use visitCounts (as they are
924
/// already in use at this point) but instead the slightly less / exact
925
/// getRegister() == NULL checks
926
///
927
/// In a pathological case, such as doubly commoned nodes under the same store
928
/// there is a potential for an exponential number of nodes to / be visited. To
929
/// guard against this maintain a count of nodes visited under one store and
930
/// compare against the budget below.
931
///
932
/// \note Today, it should be relatively easy to insert a Checklist, which
933
/// addresses the concern about visit counts above.
934
template <class TR_AliasSetInterface>
935
bool
936
J9::Z::CodeGenerator::canUseSingleStoreAsAnAccumulator(TR::Node *parent, TR::Node *node, TR::Node *store,TR_AliasSetInterface &storeAliases, TR::list<TR::Node*> *conflictingAddressNodes, bool justLookForConflictingAddressNodes, bool isChainOfFirstChildren, bool mustCheckAllNodes)
937
{
938
TR::Compilation *comp = self()->comp();
939
940
// A note on isChainOfFirstChildren:
941
// In RTC 75858, we saw the following trees for the following COBOL statements, where X is packed decimal:
942
// COMPUTE X = X - 2.
943
// COMPUTE X = 999 - X.
944
//
945
// pdstore "X"
946
// pdsub
947
// pdconst +999
948
// pdsub
949
// pdload "X"
950
// pdconst 2
951
//
952
// In this case, canUseSingleStoreAsAnAccumulator is returning true because the pdload of X is the first child of its parent, but it's missing
953
// the fact that the parent pdsub is itself a second child. This is resulting in the value of X getting clobbered with +999.
954
//
955
// To solve this, isChainOfFirstChildren is used. It is set to true initially, and it will only remain true when called for a node's first child
956
// if it was already true. In the example above, it would be true for the pdsub and the pdconst +999 and false for any other nodes.
957
LexicalTimer foldTimer("canUseSingleStore", comp->phaseTimer());
958
959
if (self()->traceBCDCodeGen())
960
traceMsg(comp,"\t\texamining node %s (%p) (usage/budget = %d/%d)\n",node->getOpCode().getName(),node,self()->getAccumulatorNodeUsage(),TR_ACCUMULATOR_NODE_BUDGET);
961
962
if (self()->getAccumulatorNodeUsage() > TR_ACCUMULATOR_NODE_BUDGET)
963
{
964
if (self()->traceBCDCodeGen())
965
traceMsg(comp,"\t\ta^a : disallow useAccum=false as node budget %d exceeded for store %s (%p)\n",
966
TR_ACCUMULATOR_NODE_BUDGET,store->getOpCode().getName(),store);
967
return false;
968
}
969
970
if (!mustCheckAllNodes)
971
{
972
if (self()->endAccumulatorSearchOnOperation(node))
973
{
974
if (self()->traceBCDCodeGen())
975
traceMsg(comp,"\t\t\tallow -- found node %s (%p) with endSearch = yes\n",node->getOpCode().getName(),node);
976
if (conflictingAddressNodes->empty())
977
{
978
return true;
979
}
980
else
981
{
982
// do not have to worry about overlaps but still must descend to look for conflictingAddressNodes
983
if (self()->traceBCDCodeGen())
984
traceMsg(comp,"\t\tconflictingAddressNodes list is not empty so continue searching for conflictingAddressNodes\n");
985
justLookForConflictingAddressNodes = true;
986
}
987
}
988
else if (!justLookForConflictingAddressNodes && nodeMightClobberAccumulatorBeforeUse(node))
989
{
990
// RTC 75966: In general, we want to check all nodes until we hit a node for which endAccumulatorSearchOnOperation is true
991
// (eg. zd2pd; we won't accumulate across a type change). However, if we have already done something that might clobber the
992
// destination, we still need to search all nodes. So, mustCheckAllNodes is initially false but will be set to true when we
993
// first encounter any node for which endAccumulatorSearchOnOperation is false. If we've already hit such a node, and we're
994
// continuing the search to find conflicting address nodes, then mustCheckAllNodes can remain false.
995
//
996
// pdstore "a"
997
// pdsub
998
// pdconst
999
// zd2pd
1000
// zdload "a"
1001
//
1002
// Previously, the code would hit the zd2pd and stop, incorrectly accumulating into "a" and potentially clobbering "a" before
1003
// the pdload was evaluated. Now, we'll set mustCheckAllNodes to true when we hit the pdsub, and the code that won't let us
1004
// accumulate because the pdload "a" isn't on a chain of first children will kick in, and we won't accumulate to "a".
1005
if (!mustCheckAllNodes && self()->traceBCDCodeGen())
1006
traceMsg(comp,"\t\tFound a node that could clobber the accumulator before use; must check all children\n");
1007
1008
mustCheckAllNodes = true;
1009
}
1010
}
1011
1012
TR::Node *nodeForAliasing = NULL;
1013
if (!justLookForConflictingAddressNodes)
1014
{
1015
// An already evaluated OpaquePseudoRegister may have had its storageReference updated to point to
1016
// memory different from that on the node itself (e.g. updated by skipCopyOnStore checks in pdstoreEvaluator
1017
// or to a temp by ssrClobberEvaluate)
1018
// It is this updated memory that will be used to generate the actual instructions/memoryReferences therefore it is
1019
// this memory that must be used for the overlap tests
1020
if (node->getOpaquePseudoRegister())
1021
{
1022
TR_StorageReference *storageRef = node->getOpaquePseudoRegister()->getStorageReference();
1023
if (self()->traceBCDCodeGen())
1024
traceMsg(comp,"\t\tfound evaluated reg %s : storageRef #%d ",self()->getDebug()->getName(node->getOpaquePseudoRegister()),storageRef->getReferenceNumber());
1025
if (storageRef->isTemporaryBased())
1026
{
1027
if (self()->traceBCDCodeGen()) traceMsg(comp,"(tempBased)\n");
1028
TR::SymbolReference *tempSymRef = storageRef->getTemporarySymbolReference();
1029
// the rest of the code below expects a node but there is not one for tempBased storageRefs so construct/reuse one on the fly
1030
if (_dummyTempStorageRefNode == NULL)
1031
{
1032
_dummyTempStorageRefNode = TR::Node::createWithSymRef(node, comp->il.opCodeForDirectLoad(node->getDataType()), 0, tempSymRef);
1033
}
1034
else
1035
{
1036
TR::Node::recreate(_dummyTempStorageRefNode, comp->il.opCodeForDirectLoad(node->getDataType()));
1037
_dummyTempStorageRefNode->setSymbolReference(tempSymRef);
1038
}
1039
if (node->getType().isBCD())
1040
_dummyTempStorageRefNode->setDecimalPrecision(node->getDecimalPrecision());
1041
else
1042
TR_ASSERT(false,"unexpected type on node %s (%p)\n",node->getOpCode().getName(),node);
1043
nodeForAliasing = _dummyTempStorageRefNode;
1044
}
1045
else if (storageRef->isNonConstantNodeBased())
1046
{
1047
if (self()->traceBCDCodeGen()) traceMsg(comp,"(nodeBased storageRefNode %s (%p))\n",storageRef->getNode()->getOpCode().getName(),storageRef->getNode());
1048
TR_ASSERT(storageRef->getNode()->getOpCode().hasSymbolReference(),"storageRef node %s (%p) should have a symRef\n",
1049
storageRef->getNode()->getOpCode().getName(),storageRef->getNode());
1050
nodeForAliasing = storageRef->getNode();
1051
}
1052
else
1053
{
1054
if (self()->traceBCDCodeGen()) traceMsg(comp,"(constNodeBased storageRefNode %s (%p))\n",storageRef->getNode()->getOpCode().getName(),storageRef->getNode());
1055
TR_ASSERT(storageRef->isConstantNodeBased(),"expecting storageRef #%d to be constant node based\n",storageRef->getReferenceNumber());
1056
}
1057
}
1058
else if (node->getOpCodeValue() != TR::loadaddr && // no aliasing implications to a simple loadaddr (it is not a deref)
1059
node->getOpCode().hasSymbolReference())
1060
{
1061
nodeForAliasing = node;
1062
}
1063
1064
}
1065
1066
TR::SymbolReference *symRefForAliasing = NULL;
1067
if (nodeForAliasing)
1068
symRefForAliasing = nodeForAliasing->getSymbolReference();
1069
1070
if (self()->traceBCDCodeGen() && nodeForAliasing && symRefForAliasing)
1071
traceMsg(comp,"\t\tgot nodeForAliasing %s (%p), symRefForAliasing #%d\n",
1072
nodeForAliasing->getOpCode().getName(),nodeForAliasing,symRefForAliasing?symRefForAliasing->getReferenceNumber():-1);
1073
1074
bool useAliasing = true;
1075
if (self()->traceBCDCodeGen() && useAliasing && !storeAliases.isZero(comp) && symRefForAliasing)
1076
{
1077
if (comp->getOption(TR_TraceAliases) && !symRefForAliasing->getUseDefAliases().isZero(comp))
1078
{
1079
traceMsg(comp, "\t\t\taliases for #%d: ",symRefForAliasing->getReferenceNumber());
1080
TR::SparseBitVector aliases(comp->allocator());
1081
symRefForAliasing->getUseDefAliases().getAliases(aliases);
1082
(*comp) << aliases << "\n";
1083
}
1084
traceMsg(comp,"\t\t\tsymRefForAliasing #%d isSet in storeAliases = %s\n",
1085
symRefForAliasing->getReferenceNumber(),storeAliases.contains(symRefForAliasing->getReferenceNumber(), comp) ? "yes":"no");
1086
}
1087
1088
if (symRefForAliasing &&
1089
loadAndStoreMayOverlap(store, store->getSize(), nodeForAliasing, nodeForAliasing->getSize(), storeAliases)) // if aliases are present node can be of any node type (a call for example)
1090
{
1091
// allow expressions like a=a+b but not a=b+a
1092
if (parent &&
1093
nodeForAliasing->getOpCode().isLoadVar() &&
1094
(parent->getOpCode().isBasicPackedArithmetic()) &&
1095
parent->getFirstChild() == nodeForAliasing &&
1096
isChainOfFirstChildren &&
1097
self()->loadOrStoreAddressesMatch(store, nodeForAliasing))
1098
{
1099
if (self()->traceBCDCodeGen())
1100
traceMsg(comp,"\t\t\tallow hint (loadVar case) %s (%p) -- store %s #%d (%p) location = nodeForAliasing %s #%d (%p) location\n",
1101
parent->getOpCode().getName(),parent,
1102
store->getOpCode().getName(),store->getSymbolReference()->getReferenceNumber(),store,
1103
nodeForAliasing->getOpCode().getName(),symRefForAliasing->getReferenceNumber(),nodeForAliasing);
1104
1105
return true;
1106
}
1107
else if (parent &&
1108
node->getOpaquePseudoRegister() &&
1109
nodeForAliasing->getOpCode().isStore() &&
1110
(parent->getOpCode().isBasicPackedArithmetic()) &&
1111
parent->getFirstChild() == node &&
1112
isChainOfFirstChildren &&
1113
self()->loadOrStoreAddressesMatch(store, nodeForAliasing))
1114
{
1115
// zdstoreA #y
1116
// zdTrMultipleA
1117
// zdload #y
1118
//
1119
// zdstoreB #y <- store
1120
// zdTrMultipleB <- parent
1121
// ==>zdTrMultipleA <- node with nodeForAliasing zdstoreA
1122
if (self()->traceBCDCodeGen())
1123
traceMsg(comp,"\t\t\tallow hint (storeVar case) %s (%p) -- store %s #%d (%p) location = nodeForAliasing %s #%d (%p) location\n",
1124
parent->getOpCode().getName(),parent,
1125
store->getOpCode().getName(),store->getSymbolReference()->getReferenceNumber(),store,
1126
nodeForAliasing->getOpCode().getName(),symRefForAliasing->getReferenceNumber(),nodeForAliasing);
1127
1128
return true;
1129
}
1130
// Catch this case
1131
// pdstore #y
1132
// pdshr
1133
// pdload #y
1134
// where the store is to the leading bytes of the load. See RTC 95073
1135
else if (self()->isAcceptableDestructivePDShiftRight(store, nodeForAliasing))
1136
{
1137
if (self()->traceBCDCodeGen())
1138
traceMsg(comp,"\t\t\tallow hint (pdshr in place case) %s (%p) -- store %s #%d (%p) location = nodeForAliasing %s #%d (%p) location\n",
1139
parent->getOpCode().getName(),parent,
1140
store->getOpCode().getName(),store->getSymbolReference()->getReferenceNumber(),store,
1141
nodeForAliasing->getOpCode().getName(),symRefForAliasing->getReferenceNumber(),nodeForAliasing);
1142
1143
return true;
1144
}
1145
else if (self()->isAcceptableDestructivePDModPrecision(store, nodeForAliasing))
1146
{
1147
if (self()->traceBCDCodeGen())
1148
traceMsg(comp,"\t\t\tallow hint (pdMod in place case) %s (%p) -- store %s #%d (%p) location = nodeForAliasing %s #%d (%p) location\n",
1149
parent->getOpCode().getName(),parent,
1150
store->getOpCode().getName(),store->getSymbolReference()->getReferenceNumber(),store,
1151
nodeForAliasing->getOpCode().getName(),symRefForAliasing->getReferenceNumber(),nodeForAliasing);
1152
1153
return true;
1154
}
1155
else
1156
{
1157
if (useAliasing && // checking useAliasing here because in the no info case the above loadAndStoreMayOverlap already did the pattern match
1158
self()->storageMayOverlap(store, store->getSize(), nodeForAliasing, nodeForAliasing->getSize()) == TR_NoOverlap)
1159
{
1160
// get a second opinion -- the aliasing says the operations overlap but perhaps it is too conservative
1161
// so do pattern matching based test to see if the operations are actually disjoint
1162
if (self()->traceBCDCodeGen())
1163
traceMsg(comp,"\t\t\tcheck children -- useAccum=true aliasing test failed but pattern match passed for nodeForAliasing %s (%p) with symRefForAliasing #%d\n",
1164
nodeForAliasing->getOpCode().getName(),nodeForAliasing,symRefForAliasing->getReferenceNumber());
1165
}
1166
else
1167
{
1168
if (self()->traceBCDCodeGen())
1169
traceMsg(comp,"\t\t\tdisallow -- useAccum=false for nodeForAliasing %s (%p) with symRefForAliasing #%d\n",
1170
nodeForAliasing->getOpCode().getName(),nodeForAliasing,symRefForAliasing->getReferenceNumber());
1171
return false;
1172
}
1173
}
1174
}
1175
1176
// no need to descend below a load if loadAndStoreMayOverlap already has returned false -- we have our answer and there
1177
// is no overlap -- unless mustCheckAllNodes is true (something higher up could clobber the accumulator before it's used,
1178
// so make sure no one uses it). Never any need to descend below a node that's already been evaluated.
1179
if (node->getOpCode().isLoad())
1180
{
1181
if (self()->traceBCDCodeGen())
1182
traceMsg(comp,"\t\t\t%s -- found load %s (%p) under store %s (%p)\n", (mustCheckAllNodes ? "check children" : "allow"),
1183
node->getOpCode().getName(),node,store->getOpCode().getName(),store);
1184
if (!mustCheckAllNodes)
1185
return true;
1186
}
1187
else if (node->getRegister())
1188
{
1189
if (self()->traceBCDCodeGen())
1190
traceMsg(comp,"\t\t\tallow -- found base case evaluated reg %s on node %s (%p) under store %s (%p)\n",
1191
self()->getDebug()->getName(node->getRegister()),node->getOpCode().getName(),node,store->getOpCode().getName(),store);
1192
return true;
1193
}
1194
// Check conflicting address nodes on the parent node too
1195
if (self()->foundConflictingNode(node, conflictingAddressNodes))
1196
{
1197
// If the same unevaluated BCD/Aggr node is present in the address child and the value child then prevent the accum flag from being set
1198
// The problem is that if the store is used an accum then there will be a circular evaluation as the value child will have to evaluate
1199
// the address child in order to the get accumulated store address
1200
if (self()->traceBCDCodeGen())
1201
traceMsg(comp,"\t\t\ta^a: disallow -- useAccum=false because node %s (%p) was found commoned from address tree on %s (%p)\n",
1202
node->getOpCode().getName(),node,store->getOpCode().getName(),store);
1203
return false;
1204
}
1205
1206
for (int32_t i = node->getNumChildren() - 1; i >= 0; --i) // recurse from original node and not nodeForAliasing
1207
{
1208
TR::Node *child = node->getChild(i);
1209
if (self()->foundConflictingNode(child, conflictingAddressNodes))
1210
{
1211
// If the same unevaluated BCD/Aggr node is present in the address child and the value child then prevent the accum flag from being set
1212
// The problem is that if the store is used an accum then there will be a circular evaluation as the value child will have to evaluate
1213
// the address child in order to the get accumulated store address
1214
if (self()->traceBCDCodeGen())
1215
traceMsg(comp,"\t\t\ta^a: disallow -- useAccum=false because node %s (%p) was found commoned from address tree on %s (%p)\n",
1216
child->getOpCode().getName(),child,store->getOpCode().getName(),store);
1217
return false;
1218
}
1219
else
1220
{
1221
// If so far we have an unbroken chain of first children, the chain continues if this node is the value child.
1222
// If this isn't the value child (eg. second operand of an arith op), or the chain was broken, then we definitely
1223
// can't continue the chain.
1224
bool continueChainOfFirstChildren = false;
1225
if (child == node->getValueChild() && isChainOfFirstChildren)
1226
continueChainOfFirstChildren = true;
1227
1228
self()->incAccumulatorNodeUsage();
1229
if (!canUseSingleStoreAsAnAccumulator(node, child, store, storeAliases, conflictingAddressNodes, justLookForConflictingAddressNodes, continueChainOfFirstChildren, mustCheckAllNodes))
1230
{
1231
if (!justLookForConflictingAddressNodes && self()->endHintOnOperation(node))
1232
{
1233
if (self()->traceBCDCodeGen())
1234
traceMsg(comp,"\t\t\ta^a: : endHint mismatch -- node %s (%p)\n",node->getOpCode().getName(),node);
1235
}
1236
return false;
1237
}
1238
}
1239
}
1240
1241
return true;
1242
}
1243
1244
1245
// Z
1246
bool
1247
J9::Z::CodeGenerator::isAcceptableDestructivePDShiftRight(TR::Node *storeNode, TR::Node * nodeForAliasing)
1248
{
1249
TR::Node *shiftNode = NULL;
1250
TR::Node *loadNode = NULL;
1251
1252
if (storeNode->getOpCodeValue() != TR::pdstore && storeNode->getOpCodeValue() != TR::pdstorei)
1253
return false;
1254
1255
if (storeNode->getValueChild()->getOpCodeValue() == TR::pdshr)
1256
shiftNode = storeNode->getValueChild();
1257
1258
if (!shiftNode)
1259
return false;
1260
1261
if (shiftNode->getDecimalRound() != 0)
1262
return false;
1263
1264
if (shiftNode->getChild(0)->getOpCode().isLoadVar())
1265
loadNode = shiftNode->getChild(0);
1266
1267
if (!loadNode)
1268
return false;
1269
1270
if (nodeForAliasing && loadNode != nodeForAliasing)
1271
return false;
1272
1273
return self()->loadOrStoreAddressesMatch(storeNode, loadNode);
1274
1275
}
1276
1277
1278
///
1279
/// pdstorei s=5
1280
/// addr+3
1281
/// pdModPrec s=5
1282
/// pdX (with nodeForAliasing address : addr) s=8
1283
///
1284
/// The above IL is truncating the sourceNode (pdX) and storing the result back
1285
/// to the same field right aligned / In this case it is ok to accumulate as an
1286
/// exact right aligned subfield of the source is being operated on
1287
///
1288
bool
1289
J9::Z::CodeGenerator::isAcceptableDestructivePDModPrecision(TR::Node *storeNode, TR::Node *nodeForAliasing)
1290
{
1291
return false; // currently disabled as this leads to a completely overlapping MVC that is even slower than going thru a temp
1292
// should be re-enabled when redundant MVC removal is complete
1293
1294
if (storeNode->getOpCodeValue() != TR::pdstore && storeNode->getOpCodeValue() != TR::pdstorei)
1295
return false;
1296
1297
if (!nodeForAliasing->getOpCode().isIndirect())
1298
return false;
1299
1300
if (storeNode->getValueChild()->getOpCodeValue() != TR::pdModifyPrecision)
1301
return false;
1302
1303
TR::Node *modPrecNode = storeNode->getValueChild();
1304
TR::Node *sourceNode = modPrecNode->getFirstChild();
1305
1306
bool matchSourceAndAliasingNode = false;
1307
if (sourceNode == nodeForAliasing)
1308
{
1309
matchSourceAndAliasingNode = true;
1310
}
1311
else if (sourceNode->getOpaquePseudoRegister() &&
1312
sourceNode->getOpaquePseudoRegister()->getStorageReference()->isNonConstantNodeBased() &&
1313
sourceNode->getOpaquePseudoRegister()->getStorageReference()->getNode() == nodeForAliasing)
1314
{
1315
matchSourceAndAliasingNode = true;
1316
}
1317
1318
if (!matchSourceAndAliasingNode)
1319
return false;
1320
1321
int32_t storePrec = storeNode->getDecimalPrecision();
1322
int32_t modPrec = modPrecNode->getDecimalPrecision();
1323
int32_t sourcePrec = sourceNode->getDecimalPrecision();
1324
1325
if (storePrec != modPrec)
1326
return false;
1327
1328
if (sourceNode->getSize() != nodeForAliasing->getSize())
1329
return false;
1330
1331
if (modPrec >= sourcePrec) // only handling truncations and this is not a truncation
1332
return false;
1333
1334
int32_t truncatedBytes = nodeForAliasing->getSize() - storeNode->getSize();
1335
1336
return self()->validateAddressOneToAddressOffset(truncatedBytes,
1337
nodeForAliasing->getFirstChild(),
1338
nodeForAliasing->getSymbolReference()->getOffset(),
1339
storeNode->getFirstChild(),
1340
storeNode->getSymbolReference()->getOffset(),
1341
NULL,
1342
self()->traceBCDCodeGen()); // _baseLoadsThatAreNotKilled = NULL (not tracking here)
1343
}
1344
1345
// Z
1346
bool
1347
J9::Z::CodeGenerator::validateAddressOneToAddressOffset(int32_t expectedOffset,
1348
TR::Node *addr1,
1349
int64_t addr1ExtraOffset,
1350
TR::Node *addr2,
1351
int64_t addr2ExtraOffset,
1352
TR::list<TR::Node*> *_baseLoadsThatAreNotKilled,
1353
bool trace) // _baseLoadsThatAreNotKilled can be NULL
1354
{
1355
TR_ASSERT(addr1->getType().isAddress(),"addr1 %s (%p) must an address type\n",addr1->getOpCode().getName(),addr1);
1356
TR_ASSERT(addr2->getType().isAddress(),"addr2 %s (%p) must an address type\n",addr2->getOpCode().getName(),addr2);
1357
1358
bool canGetOffset = false;
1359
int32_t addrOffset = 0;
1360
self()->getAddressOneToAddressTwoOffset(&canGetOffset, addr1, addr1ExtraOffset, addr2, addr2ExtraOffset, &addrOffset, _baseLoadsThatAreNotKilled, trace);
1361
if (!canGetOffset)
1362
{
1363
if (trace)
1364
traceMsg(self()->comp(),"\tvalidateAddressOneToAddressOffset = false : could not compute offset between addr1 %s (%p) (+%lld) and addr2 %s (%p) (+%lld)\n",
1365
addr1->getOpCode().getName(),addr1,addr1ExtraOffset,addr2->getOpCode().getName(),addr2,addr2ExtraOffset);
1366
return false;
1367
}
1368
1369
// some examples:
1370
// pdstorei (highDigitsStore or lowDigitsStore) p=15,s=8
1371
// addr1
1372
// ...
1373
// The addr2 could be from an MVO:
1374
// tt
1375
// mvo
1376
// dstAddr
1377
// addr2 = addr1 + expectedOffset (3)
1378
//
1379
if (addrOffset != expectedOffset)
1380
{
1381
if (trace)
1382
traceMsg(self()->comp(),"\tvalidateAddressOneToAddressOffset = false : addrOffset %d not the expected value of %d between addr1 %s (%p) (+%lld) and addr2 %s (%p) (+%lld)\n",
1383
addrOffset,expectedOffset,addr1->getOpCode().getName(),addr1,addr1ExtraOffset,addr2->getOpCode().getName(),addr2,addr2ExtraOffset);
1384
return false;
1385
}
1386
return true;
1387
}
1388
1389
// _baseLoadsThatAreNotKilled is if the caller is doing its own tracking of loads that are not killed between treetops
1390
// For these loads syntactic address matching of the loads is allowed even if the node pointers themselves are not the same
1391
// That is
1392
//
1393
// load1 "A"
1394
//
1395
// intervening treetops checked by caller not to kill "A"
1396
//
1397
// load2 "A"
1398
//
1399
// load1 and load2 can be matched for symRef and other properties as the caller has checked that "A" is not killed in between the loads
1400
//
1401
// resultOffset = address2 - address1 or equivalently address2 = address1 + resultOffset
1402
//
1403
// Z
1404
void
1405
J9::Z::CodeGenerator::getAddressOneToAddressTwoOffset(bool *canGetOffset,
1406
TR::Node *addr1,
1407
int64_t addr1ExtraOffset,
1408
TR::Node *addr2,
1409
int64_t addr2ExtraOffset,
1410
int32_t *offset,
1411
TR::list<TR::Node*> *_baseLoadsThatAreNotKilled,
1412
bool trace) // _baseLoadsThatAreNotKilled can be NULL
1413
{
1414
TR::Compilation *comp = self()->comp();
1415
int64_t offset64 = 0;
1416
*canGetOffset = false;
1417
*offset=0;
1418
bool foundOffset = false;
1419
1420
if (!foundOffset &&
1421
self()->addressesMatch(addr1, addr2))
1422
{
1423
foundOffset = true;
1424
offset64 = (addr2ExtraOffset - addr1ExtraOffset);
1425
if (trace)
1426
traceMsg(comp,"\t\t(addr2 %s (%p) + %lld) = (addr1 %s (%p) + %lld) + offset (%lld) : node matches case\n",
1427
addr2->getOpCode().getName(),addr2,addr2ExtraOffset,addr1->getOpCode().getName(),addr1,addr1ExtraOffset,offset64);
1428
}
1429
1430
if (!foundOffset &&
1431
self()->isSupportedAdd(addr1) &&
1432
self()->isSupportedAdd(addr2) &&
1433
self()->addressesMatch(addr1->getFirstChild(), addr2->getFirstChild()))
1434
{
1435
if (addr1->getSecondChild()->getOpCode().isIntegralConst() &&
1436
addr2->getSecondChild()->getOpCode().isIntegralConst())
1437
{
1438
foundOffset = true;
1439
int64_t addr1Offset = addr1->getSecondChild()->get64bitIntegralValue() + addr1ExtraOffset;
1440
int64_t addr2Offset = addr2->getSecondChild()->get64bitIntegralValue() + addr2ExtraOffset;
1441
offset64 = (addr2Offset - addr1Offset);
1442
if (trace)
1443
traceMsg(comp,"\t\t(addr2 %s (%p) + %lld) = (addr1 %s (%p) + %lld) + offset (%lld) : both adds case\n",
1444
addr2->getOpCode().getName(),addr2,addr2ExtraOffset,addr1->getOpCode().getName(),addr1,addr1ExtraOffset,offset64);
1445
}
1446
}
1447
1448
// =>i2a
1449
//
1450
// aiadd
1451
// =>i2a
1452
// iconst 8
1453
//
1454
if (!foundOffset &&
1455
self()->isSupportedAdd(addr2) &&
1456
addr2->getSecondChild()->getOpCode().isIntegralConst() &&
1457
self()->addressesMatch(addr1, addr2->getFirstChild()))
1458
{
1459
foundOffset = true;
1460
int64_t addr2Offset = addr2->getSecondChild()->get64bitIntegralValue() + addr2ExtraOffset;
1461
offset64 = addr2Offset;
1462
if (trace)
1463
traceMsg(comp,"\t\t(addr2 %s (%p) + %lld) = addr1 %s (%p) + offset (%lld) : 2nd add case\n",
1464
addr2->getOpCode().getName(),addr2,addr2ExtraOffset,addr1->getOpCode().getName(),addr1,offset64);
1465
}
1466
1467
if (!foundOffset &&
1468
_baseLoadsThatAreNotKilled &&
1469
!_baseLoadsThatAreNotKilled->empty() &&
1470
self()->isSupportedAdd(addr1) &&
1471
self()->isSupportedAdd(addr2) &&
1472
addr1->getSecondChild()->getOpCode().isIntegralConst() &&
1473
addr2->getSecondChild()->getOpCode().isIntegralConst())
1474
{
1475
TR::Node *baseLoad1 = self()->getAddressLoadVar(addr1->getFirstChild(), trace);
1476
TR::Node *baseLoad2 = self()->getAddressLoadVar(addr2->getFirstChild(), trace);
1477
1478
if (baseLoad1 != NULL && baseLoad2 != NULL &&
1479
(std::find(_baseLoadsThatAreNotKilled->begin(),_baseLoadsThatAreNotKilled->end(), baseLoad1) !=
1480
_baseLoadsThatAreNotKilled->end()) &&
1481
self()->directLoadAddressMatch(baseLoad1, baseLoad2, trace))
1482
{
1483
foundOffset = true;
1484
int64_t addr1Offset = addr1->getSecondChild()->get64bitIntegralValue() + addr1ExtraOffset;
1485
int64_t addr2Offset = addr2->getSecondChild()->get64bitIntegralValue() + addr2ExtraOffset;
1486
offset64 = (addr2Offset - addr1Offset);
1487
if (trace)
1488
traceMsg(comp,"\t\t(addr2 %s (%p) + %lld) = (addr1 %s (%p) + %lld) + offset (%lld) : baseLoad1 %s (%p) in notKilledList, both adds case\n",
1489
addr2->getOpCode().getName(),addr2,addr2ExtraOffset,
1490
addr1->getOpCode().getName(),addr1,addr1ExtraOffset,
1491
offset64,
1492
baseLoad1->getOpCode().getName(),baseLoad1);
1493
}
1494
}
1495
1496
if (!foundOffset ||
1497
self()->isOutOf32BitPositiveRange(offset64, trace))
1498
{
1499
return;
1500
}
1501
1502
*canGetOffset = true;
1503
*offset = (int32_t)offset64;
1504
1505
return;
1506
}
1507
1508
TR::Node *
1509
J9::Z::CodeGenerator::getAddressLoadVar(TR::Node *node, bool trace)
1510
{
1511
// allow a non truncating address cast from integral types
1512
if ((node->getOpCodeValue() == TR::i2a || node->getOpCodeValue() == TR::l2a) &&
1513
(node->getSize() == node->getFirstChild()->getSize()))
1514
{
1515
node = node->getFirstChild();
1516
}
1517
1518
if (node->getOpCode().isLoadVar())
1519
return node;
1520
else
1521
return NULL;
1522
}
1523
1524
void
1525
J9::Z::CodeGenerator::addStorageReferenceHints(TR::Node *node)
1526
{
1527
TR::list<TR::Node*> leftMostNodesList(getTypedAllocator<TR::Node*>(self()->comp()->allocator()));
1528
TR::StackMemoryRegion stackMemoryRegion(*self()->trMemory());
1529
1530
self()->markStoreAsAnAccumulator(node);
1531
1532
TR::Node *bestNode = NULL;
1533
if (self()->traceBCDCodeGen())
1534
traceMsg(self()->comp(),"\n--start-- examining cg treeTop %s (%p)\n",node->getOpCode().getName(),node);
1535
int32_t storeSize = 0;
1536
self()->examineNode(NULL, node, bestNode, storeSize, leftMostNodesList);
1537
if (self()->traceBCDCodeGen())
1538
traceMsg(self()->comp(),"--end-- examining cg treeTop %s (%p)\n\n",node->getOpCode().getName(),node);
1539
1540
}
1541
1542
1543
// Z
1544
void
1545
J9::Z::CodeGenerator::examineNode(
1546
TR::Node *parent,
1547
TR::Node *node,
1548
TR::Node *&bestNode,
1549
int32_t &storeSize,
1550
TR::list<TR::Node*> &leftMostNodesList)
1551
{
1552
TR::Compilation *comp = self()->comp();
1553
TR::Node *checkNode = node;
1554
bool isAccumStore = node->getOpCode().canUseStoreAsAnAccumulator();
1555
bool isLoad = node->getOpCode().isLoad();
1556
bool endHintOnNode = self()->endHintOnOperation(node) || isLoad;
1557
bool isConversionToNonAggrOrNonBCD = node->getOpCode().isBCDToNonBCDConversion();
1558
1559
if (isAccumStore)
1560
storeSize = node->getSize();
1561
1562
if (!node->hasBeenVisitedForHints()) // check other nodes using hasBeenVisitedForHints
1563
{
1564
if (self()->traceBCDCodeGen())
1565
traceMsg(comp,"\tvisiting node - %s (%p), bestNode - %s (%p) (endHintOnNode=%s)\n",
1566
node->getOpCode().getName(),node,bestNode?bestNode->getOpCode().getName():"NULL",bestNode,endHintOnNode?"true":"false");
1567
1568
node->setHasBeenVisitedForHints();
1569
1570
bool nodeCanHaveHint = node->getOpCode().canHaveStorageReferenceHint();
1571
bool isInterestingStore = nodeCanHaveHint || isAccumStore || isConversionToNonAggrOrNonBCD;
1572
bool isNonOverflowPDShift = node->getOpCode().isPackedShift() && node->getOpCodeValue() != TR::pdshlOverflow;
1573
bool isSafeWideningConversion =
1574
TR::ILOpCode::isPackedConversionToWiderType(node->getOpCodeValue()) && node->getDecimalPrecision() <= node->getFirstChild()->getDecimalPrecision();
1575
1576
if (isInterestingStore &&
1577
(isNonOverflowPDShift ||
1578
isSafeWideningConversion ||
1579
node->getOpCodeValue() == TR::pdModifyPrecision) &&
1580
(node->getFirstChild()->getReferenceCount() == 1))
1581
{
1582
// pdshl/pdModPrec nodes take care of the zeroing the top nibble in the pad byte for the final shifted value (so we can skip clearing
1583
// the nibble in the intermediate arithmetic result.
1584
// non-widening pd2zd nodes only select the exact number of digits so the top nibble will be ignored for even precision values
1585
// If the child has a refCount > 1 then subsequent uses may not also be have a pdshl/pdModPrec/pd2zd parent so we must conservatively clear the nibble right
1586
// after the arithmetic operation.
1587
// TODO: if all subsequent uses are also under truncating pdshl/pdModPrec nodes then the clearing can also be skipped -- but finding this out will
1588
// require more analysis
1589
node->getFirstChild()->setSkipPadByteClearing(true);
1590
}
1591
1592
if (nodeCanHaveHint &&
1593
bestNode &&
1594
node->getStorageReferenceSize() > bestNode->getStorageReferenceSize() && // end hint before
1595
endHintOnNode && // end hint after
1596
!leftMostNodesList.empty())
1597
{
1598
1599
// when the current node will end a hint before and after then tag any nodes above this node with the store hint so it can store into the final receiver
1600
// pdstore <- hint
1601
// pdshr <- tag this list node with the pdstore hint
1602
// ud2pd <- node (endHintOnNode=true and ud2pd size > store size) -- alloc a new temp
1603
//
1604
// when the current node only ends the hint after (such as a zd2pd) then delay calling processNodeList so the zd2pd will also get the store hint
1605
// pdstore <- hint
1606
// pdshr <- tag this list node with the pdstore hint
1607
// zd2pd <- node (endHintOnNode=true and ud2pd size <= store size) <- tag this list node with the pdstore hint
1608
//
1609
if (self()->traceBCDCodeGen())
1610
traceMsg(comp,"\t\tendHintOnNode=true so call processNodeList before examining ending hint node %p\n",node);
1611
// processNodeList will reset storeSize so save and restore the current storeSize value so it will persist for the current node
1612
// pd2ud <-sets storeSize to 16
1613
// zd2pd <-node (should also use storeSize=16)
1614
// by persisting it for the zd2pd node this operation can initialize up to 16 bytes for its parent
1615
int32_t savedStoreSize = storeSize;
1616
self()->processNodeList(bestNode, storeSize, leftMostNodesList);
1617
storeSize = savedStoreSize;
1618
}
1619
1620
TR::ILOpCodes opCode = node->getOpCodeValue();
1621
if (isInterestingStore)
1622
{
1623
// TODO: if a pdstore is to an auto then continually increase the size of this auto so it is the biggest on the left
1624
// most subtree (i.e. force it to be the bestNode)
1625
if ((bestNode == NULL) ||
1626
(node->getStorageReferenceSize() > bestNode->getStorageReferenceSize()) ||
1627
(self()->nodeRequiresATemporary(node) && bestNode->getOpCode().isStore() && !self()->isAcceptableDestructivePDShiftRight(bestNode, NULL /* let the function find the load node */)))
1628
{
1629
if (!isAccumStore || node->useStoreAsAnAccumulator())
1630
{
1631
bestNode = node;
1632
if (self()->traceBCDCodeGen())
1633
{
1634
if (isAccumStore)
1635
traceMsg(comp,"\t\tfound new store (canUse = %s) bestNode - %s (%p) with actual size %d and storageRefResultSize %d\n",
1636
bestNode->useStoreAsAnAccumulator() ? "yes":"no", bestNode->getOpCode().getName(),bestNode, bestNode->getSize(),bestNode->getStorageReferenceSize());
1637
else
1638
traceMsg(comp,"\t\tfound new non-store bestNode - %s (%p) (isConversionToNonAggrOrNonBCD=%s, isForcedTemp=%s) with actual size %d and storageRefResultSize %d\n",
1639
bestNode->getOpCode().getName(),bestNode,isConversionToNonAggrOrNonBCD?"yes":"no",self()->nodeRequiresATemporary(node)?"yes":"no",bestNode->getSize(),bestNode->getStorageReferenceSize());
1640
}
1641
}
1642
}
1643
1644
if (!isAccumStore && !isConversionToNonAggrOrNonBCD && !isLoad) // don't add stores or bcd2x or load nodes to the list
1645
{
1646
if (self()->traceBCDCodeGen())
1647
traceMsg(comp,"\t\tadd node - %s (%p) to list\n",node->getOpCode().getName(),node);
1648
leftMostNodesList.push_front(node);
1649
}
1650
}
1651
// end hints on some nodes so
1652
// 1) the same storageReference isn't used for both sides of a zd2pd or pd2zd conversion
1653
// 2) a storageReference for a commoned node is not used 'across' a conversion:
1654
// pdadd
1655
// i2pd :: end hint here so the commoned pdshr storageReference is not used for the i2pd/pdadd subexpression
1656
// iadd
1657
// pd2i :: start new hint here
1658
// ==>pdshr
1659
if (endHintOnNode)
1660
{
1661
self()->processNodeList(bestNode, storeSize, leftMostNodesList);
1662
switch (node->getOpCodeValue())
1663
{
1664
case TR::pd2ud:
1665
case TR::pd2udsl:
1666
case TR::pd2udst:
1667
storeSize = TR::DataType::packedDecimalPrecisionToByteLength(node->getDecimalPrecision()); // i.e. the size of the result in packed bytes (node->getSize is in ud bytes)
1668
break;
1669
default:
1670
break;
1671
}
1672
1673
if (self()->traceBCDCodeGen())
1674
traceMsg(comp,"\t\tendHintOnNode=true for node - %s (%p) setting storeSize to %d\n",node->getOpCode().getName(),node,storeSize);
1675
}
1676
1677
// visit value child first for indirect stores so the possible store hint is not lost on the address child
1678
if (node->getOpCode().isStoreIndirect())
1679
{
1680
int32_t valueChildIndex = node->getOpCode().isIndirect() ? 1 : 0;
1681
self()->examineNode(node, node->getChild(valueChildIndex), bestNode, storeSize, leftMostNodesList);
1682
for (int32_t i = 0; i < node->getNumChildren(); i++)
1683
{
1684
if (i != valueChildIndex)
1685
self()->examineNode(node, node->getChild(i), bestNode, storeSize, leftMostNodesList);
1686
}
1687
}
1688
else
1689
{
1690
for (int32_t i = 0; i < node->getNumChildren(); i++)
1691
self()->examineNode(node, node->getChild(i), bestNode, storeSize, leftMostNodesList);
1692
}
1693
}
1694
else
1695
{
1696
checkNode = parent;
1697
if (self()->traceBCDCodeGen())
1698
traceMsg(comp,"\tnot descending node - %s (%p) because it has been visited already\n",node->getOpCode().getName(),node);
1699
TR_OpaquePseudoRegister *reg = node->getOpCodeValue() == TR::BBStart ? NULL : node->getOpaquePseudoRegister();
1700
if (reg)
1701
{
1702
if (self()->traceBCDCodeGen())
1703
traceMsg(comp,"\t\tnode - %s (%p) with reg %s is an already evaluated bcd node with refCount=%d\n",
1704
node->getOpCode().getName(),node,self()->getDebug()->getName(static_cast<TR::Register*>(reg)),node->getReferenceCount());
1705
1706
if (!reg->getStorageReference()->isTemporaryBased())
1707
{
1708
if (self()->traceBCDCodeGen())
1709
traceMsg(comp,"\t\t\treg storageRef #%d is not a temp so do not update bestNode with node - %s (%p) but do reset reg %s isInit to false\n",
1710
reg->getStorageReference()->getReferenceNumber(),node->getOpCode().getName(),node,self()->getDebug()->getName(reg));
1711
// setting to false here forces the commoned expression to re-initialize the register to the new hint for one of two reasons:
1712
// 1) functionally required for non-temps as these storage references can not be clobbered (they are program variables or constants)
1713
// 2) for perf to avoid a clobber evaluate (temp to temp move) of the already initialized reg -- instead begin using the store hint and leave the temp alone
1714
reg->setIsInitialized(false);
1715
}
1716
else if (bestNode && bestNode->getOpCode().isStore() && node->getReferenceCount() >= 1) // use >= 1 so useNewStoreHint can always be used for ZAP widening on initializations
1717
{
1718
if (self()->traceBCDCodeGen())
1719
traceMsg(comp,"\t\t\treg storageRef #%d with a store bestNode so do not update bestNode with node - %s (%p) refCount=%d\n",
1720
reg->getStorageReference()->getReferenceNumber(),node->getOpCode().getName(),node,node->getReferenceCount());
1721
}
1722
else
1723
{
1724
if (self()->traceBCDCodeGen())
1725
traceMsg(comp,"\t\t\treg storageRef #%d is a final-use (refCount==1) temp so set bestNode to node - %s (%p) reg->isInit=%s (and reuse temp storageRef))\n",
1726
reg->getStorageReference()->getReferenceNumber(),node->getOpCode().getName(),node,reg->isInitialized()?"yes":"no");
1727
if (bestNode)
1728
storeSize = bestNode->getSize();
1729
bestNode = node;
1730
}
1731
}
1732
}
1733
1734
if ((leftMostNodesList.empty()) || (checkNode == leftMostNodesList.front())) // just finished with a left most path and there are nodes to tag with hint
1735
// just finished with a left most but there are no nodes to tag with a hint
1736
{
1737
if (self()->traceBCDCodeGen())
1738
{
1739
traceMsg(comp,"\t\tdetected the end of a left most path because ");
1740
if ((!leftMostNodesList.empty()) && (checkNode == leftMostNodesList.front()))
1741
traceMsg(comp,"checkNode - %s (%p) matches head of list %p\n",checkNode?checkNode->getOpCode().getName():"NULL",checkNode,leftMostNodesList.front());
1742
else if (leftMostNodesList.empty()) // i.e. bestNode is your only node so you haven't seen any other higher up nodes to add to the list
1743
traceMsg(comp,"bestNode - %s (%p) is set and the head of list is NULL for node - %s (%p)\n",
1744
(bestNode ? bestNode->getOpCode().getName():"NULL"),bestNode,node->getOpCode().getName(),node);
1745
else
1746
traceMsg(comp,"of an unknown reason for node - %s (%p) (FIXME - add a reason) \n",node->getOpCode().getName(),node);
1747
}
1748
if (leftMostNodesList.empty())
1749
{
1750
if (self()->traceBCDCodeGen())
1751
traceMsg(comp,"\tleftMostNodesList is empty so clear bestNode - %s (%p->NULL) for current node - %s (%p)\n",
1752
bestNode?bestNode->getOpCode().getName():"NULL",bestNode,node->getOpCode().getName(),node);
1753
bestNode = NULL;
1754
storeSize = 0;
1755
}
1756
else
1757
{
1758
if (self()->traceBCDCodeGen())
1759
traceMsg(comp,"\tcalling processNodeList with bestNode - %s (%p) because leftMostNodesList is not empty for current node - %s (%p)\n",
1760
bestNode?bestNode->getOpCode().getName():"NULL",bestNode,node->getOpCode().getName(),node);
1761
self()->processNodeList(bestNode, storeSize, leftMostNodesList);
1762
}
1763
}
1764
}
1765
1766
1767
// Z
1768
void
1769
J9::Z::CodeGenerator::processNodeList(
1770
TR::Node *&bestNode,
1771
int32_t &storeSize,
1772
TR::list<TR::Node*> &leftMostNodesList)
1773
{
1774
TR::Compilation *comp = self()->comp();
1775
1776
if (bestNode)
1777
{
1778
bool keepTrackOfSharedNodes = false;
1779
TR::SymbolReference *memSlotHint = NULL;
1780
TR_StorageReference *storageRefHint = NULL;
1781
if (bestNode->getOpaquePseudoRegister())
1782
{
1783
TR_OpaquePseudoRegister *reg = bestNode->getOpaquePseudoRegister();
1784
if (self()->traceBCDCodeGen())
1785
traceMsg(comp,"\t\tbestNode - %s (%p) already has a register (%s) so use reg->getStorageReference #%d and %s\n",
1786
bestNode->getOpCode().getName(),bestNode,self()->getDebug()->getName(reg),reg->getStorageReference()->getReferenceNumber(),
1787
self()->getDebug()->getName(reg->getStorageReference()->getSymbol()));
1788
if (reg->getStorageReference()->isTemporaryBased() &&
1789
storeSize > reg->getLiveSymbolSize())
1790
{
1791
if (self()->traceBCDCodeGen())
1792
traceMsg(comp,"\t\treg->getStorageReference #%d is tempBased and requested storeSize %d > regLiveSymSize %d so increase tempSize\n",
1793
reg->getStorageReference()->getReferenceNumber(),storeSize,reg->getLiveSymbolSize());
1794
reg->increaseTemporarySymbolSize(storeSize-reg->getLiveSymbolSize());
1795
}
1796
storageRefHint = reg->getStorageReference();
1797
}
1798
else if (bestNode->getOpCode().isStore())
1799
{
1800
if (self()->traceBCDCodeGen())
1801
traceMsg(comp,"\t\tbestNode - %s (%p) is a store so create a new node based storage reference #%d\n",
1802
bestNode->getOpCode().getName(),bestNode,bestNode->getSymbolReference()->getReferenceNumber());
1803
storageRefHint = TR_StorageReference::createNodeBasedHintStorageReference(bestNode, comp);
1804
}
1805
else
1806
{
1807
if (!leftMostNodesList.empty())
1808
{
1809
int32_t bestNodeSize = bestNode->getStorageReferenceSize();
1810
int32_t tempSize = std::max(storeSize, bestNodeSize);
1811
if (self()->traceBCDCodeGen())
1812
{
1813
traceMsg(comp,"\t\tbestNode - %s (%p) is a BCD arithmetic or conversion op (isBCDToNonBCDConversion %s) and list is not empty so allocate a new temporary based storage reference\n",
1814
bestNode->getOpCode().getName(),bestNode,bestNode->getOpCode().isBCDToNonBCDConversion()?"yes":"no");
1815
traceMsg(comp,"\t\tsize of temp is max(storeSize,bestNodeSize) = max(%d,%d) = %d\n", storeSize, bestNodeSize, tempSize);
1816
}
1817
storageRefHint = TR_StorageReference::createTemporaryBasedStorageReference(tempSize, comp);
1818
if (tempSize == bestNodeSize)
1819
{
1820
keepTrackOfSharedNodes=true;
1821
if (self()->traceBCDCodeGen())
1822
traceMsg(comp,"\t\tsetting keepTrackOfSharedNodes=true because hintSize is based on a non-store operation (bestNode %s - %p)\n",
1823
bestNode->getOpCode().getName(),bestNode);
1824
}
1825
}
1826
else if (self()->traceBCDCodeGen())
1827
{
1828
traceMsg(comp,"\t\tbestNode %p is a BCD arithmetic or conversion op but list is empty so do not allocate a new temporary based storage reference\n",bestNode);
1829
}
1830
}
1831
for (auto listIt = leftMostNodesList.begin(); listIt != leftMostNodesList.end(); ++listIt)
1832
{
1833
TR_ASSERT(!(*listIt)->getOpCode().isStore(),"stores should not be in the list\n");
1834
if (self()->traceBCDCodeGen())
1835
traceMsg(comp,"\ttag (*listIt) - %s (%p) with storageRefHint #%d (%s)\n",
1836
(*listIt)->getOpCode().getName(),*listIt,storageRefHint->getReferenceNumber(),self()->getDebug()->getName(storageRefHint->getSymbol()));
1837
(*listIt)->setStorageReferenceHint(storageRefHint);
1838
if (keepTrackOfSharedNodes)
1839
storageRefHint->addSharedNode(*listIt);
1840
1841
1842
// If a child node has lower precision than the storage hint make sure its skipPadByteClearing is off
1843
if (TR::ILOpCode::isPackedConversionToWiderType((*listIt)->getOpCodeValue()))
1844
{
1845
TR::Node *firstChild = (*listIt)->getFirstChild();
1846
if (firstChild->chkSkipPadByteClearing() &&
1847
storageRefHint->getSymbolSize() > TR::DataType::getSizeFromBCDPrecision((*listIt)->getDataType(), firstChild->getDecimalPrecision()))
1848
{
1849
if (self()->traceBCDCodeGen())
1850
traceMsg(comp,"\tUnset skipPadByteClearing on node %s (%p): storage ref hint has size %d and converted node has size %d\n",
1851
firstChild->getOpCode().getName(),firstChild,storageRefHint->getSymbolSize(),TR::DataType::getSizeFromBCDPrecision((*listIt)->getDataType(), firstChild->getDecimalPrecision()));
1852
firstChild->setSkipPadByteClearing(false);
1853
}
1854
}
1855
}
1856
}
1857
1858
storeSize = 0;
1859
bestNode = NULL;
1860
leftMostNodesList.clear();
1861
}
1862
1863
1864
1865
// Z
1866
void
1867
J9::Z::CodeGenerator::markStoreAsAnAccumulator(TR::Node *node)
1868
{
1869
TR::Compilation *comp = self()->comp();
1870
LexicalTimer foldTimer("markStoreAsAccumulator", comp->phaseTimer());
1871
1872
if (!node->getOpCode().isStore())
1873
return;
1874
1875
if (self()->traceBCDCodeGen())
1876
traceMsg(comp,"markStoreAsAnAccumulator for node %s (%p) - useAliasing=%s\n",node->getOpCode().getName(),node,"yes");
1877
1878
TR::list<TR::Node*> conflictingAddressNodes(getTypedAllocator<TR::Node*>(comp->allocator()));
1879
1880
if (node->getOpCode().canUseStoreAsAnAccumulator())
1881
{
1882
TR_UseDefAliasSetInterface aliases = node->getSymbolReference()->getUseDefAliases();
1883
1884
if (self()->traceBCDCodeGen())
1885
{
1886
traceMsg(comp, "\nUseAsAnAccumulator check for store %s (%p) #%d",node->getOpCode().getName(),node,node->getSymbolReference()->getReferenceNumber());
1887
if (comp->getOption(TR_TraceAliases) && !aliases.isZero(comp))
1888
{
1889
traceMsg(comp, ", storeAliases : ");
1890
TR::SparseBitVector printAliases(comp->allocator());
1891
aliases.getAliases(printAliases);
1892
(*comp) << printAliases;
1893
}
1894
traceMsg(comp,"\n");
1895
}
1896
1897
if (node->getOpCode().isIndirect())
1898
{
1899
conflictingAddressNodes.clear();
1900
if (self()->traceBCDCodeGen())
1901
traceMsg(comp,"\tlook for conflicting nodes in address subtree starting at %s (%p)\n",node->getFirstChild()->getOpCode().getName(),node->getFirstChild());
1902
self()->collectConflictingAddressNodes(node, node->getFirstChild(), &conflictingAddressNodes);
1903
}
1904
1905
if (self()->traceBCDCodeGen())
1906
traceMsg(comp,"\n\texamine nodes in value subtree starting at %s [%s]\n",node->getValueChild()->getOpCode().getName(),node->getValueChild()->getName(comp->getDebug()));
1907
1908
self()->setAccumulatorNodeUsage(0);
1909
// parent=NULL, justLookForConflictingAddressNodes=false, isChainOfFirstChildren=true, mustCheckAllNodes=false
1910
bool canUse = self()->canUseSingleStoreAsAnAccumulator(NULL, node->getValueChild(), node, aliases, &conflictingAddressNodes, false, true, false);
1911
if (self()->traceBCDCodeGen())
1912
traceMsg(comp,"\tfinal accumulatorNodeUsage = %d/%d\n",self()->getAccumulatorNodeUsage(),TR_ACCUMULATOR_NODE_BUDGET);
1913
self()->setAccumulatorNodeUsage(0);
1914
1915
if (canUse &&
1916
performTransformation(comp, "%sset new UseStoreAsAnAccumulator=true on %s [%s]\n", OPT_DETAILS, node->getOpCode().getName(),node->getName(comp->getDebug())))
1917
{
1918
node->setUseStoreAsAnAccumulator(canUse);
1919
}
1920
}
1921
}
1922
1923
1924
/// If true, this node's operation might overwrite an accumulator by evaluating one child before loading
1925
/// the value from another, if we choose to accumulate. (Accumulation may still be safe, but we'll need
1926
/// to investigate all child nodes to be sure).
1927
/// eg.
1928
//
1929
/// pdstore "a"
1930
/// pdsub
1931
/// pdconst
1932
/// zd2pd
1933
/// zdload "a"
1934
///
1935
/// Accumulating to "a" is incorrect here because the pdconst will get evaluated into "a" before the
1936
/// zdload is evaluated, so when we encounter the pdsub, we need to check all children.
1937
///
1938
bool nodeMightClobberAccumulatorBeforeUse(TR::Node *node)
1939
{
1940
TR_ASSERT(node != NULL, "NULL node in nodeMightClobberAccumulatorBeforeUse\n");
1941
1942
if (!node->getType().isBCD())
1943
return false;
1944
1945
if (node->getOpCode().isAnyBCDArithmetic())
1946
return true;
1947
1948
if (node->getNumChildren() == 1)
1949
return false;
1950
1951
if (node->getOpCode().isShift()
1952
|| node->getOpCode().isConversion()
1953
|| node->getOpCode().isSetSign()
1954
|| node->getOpCode().isSetSignOnNode()
1955
|| node->getOpCode().isExponentiation())
1956
return false;
1957
1958
return true;
1959
}
1960
1961
void
1962
J9::Z::CodeGenerator::correctBadSign(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, TR::MemoryReference *memRef)
1963
{
1964
if (reg && reg->hasKnownBadSignCode())
1965
{
1966
int32_t sign = 0xf; // can choose any valid sign here but 0xf will be the cheapest to set
1967
if (self()->traceBCDCodeGen())
1968
traceMsg(self()->comp(),"\tcorrectBadSign node %p: reg %s hasKnownBadSignCode()=true so force sign to a valid sign 0x%x\n",node,self()->getDebug()->getName(reg),sign);
1969
self()->genSignCodeSetting(node, NULL, endByte, generateS390RightAlignedMemoryReference(*memRef, node, 0, self()), sign, reg, 0, false); // numericNibbleIsZero=false
1970
}
1971
}
1972
1973
int32_t
1974
J9::Z::CodeGenerator::genSignCodeSetting(TR::Node *node, TR_PseudoRegister *targetReg, int32_t endByte, TR::MemoryReference *targetMR, int32_t sign, TR_PseudoRegister *srcReg, int32_t digitsToClear, bool numericNibbleIsZero)
1975
{
1976
TR::CodeGenerator *cg = self();
1977
TR::Compilation *comp = cg->comp();
1978
int32_t digitsCleared = 0;
1979
int32_t signCodeOffset = TR::DataType::getSignCodeOffset(node->getDataType(), endByte);
1980
1981
TR_ASSERT(sign == TR::DataType::getIgnoredSignCode() || (sign >= TR::DataType::getFirstValidSignCode() && sign <= TR::DataType::getLastValidSignCode()),"unexpected sign of 0x%x in genSignCodeSetting\n",sign);
1982
1983
if (sign == TR::DataType::getIgnoredSignCode())
1984
{
1985
if (self()->traceBCDCodeGen())
1986
traceMsg(comp,"\tgenSignCodeSetting: node=%p, sign==ignored case srcReg %s, targetReg %s, srcReg->isSignInit %d, targetReg->isSignInit %d\n",
1987
node,srcReg?cg->getDebug()->getName(srcReg):"NULL",targetReg?cg->getDebug()->getName(targetReg):"NULL",srcReg?srcReg->signStateInitialized():0,targetReg?targetReg->signStateInitialized():0);
1988
if (targetReg != srcReg)
1989
{
1990
if (targetReg)
1991
{
1992
if (srcReg)
1993
{
1994
targetReg->transferSignState(srcReg, true); // digitsLost=true -- conservatively set as this may be part of a truncation
1995
}
1996
else
1997
{
1998
targetReg->setHasKnownBadSignCode();
1999
if (cg->traceBCDCodeGen())
2000
traceMsg(comp,"\tsign==ignored case and srcReg==NULL so setHasKnownBadSignCode=true on targetReg %s\n",cg->getDebug()->getName(targetReg));
2001
}
2002
}
2003
}
2004
return digitsCleared;
2005
}
2006
2007
int32_t srcSign = TR::DataType::getInvalidSignCode();
2008
if (srcReg)
2009
{
2010
if (srcReg->hasKnownOrAssumedSignCode())
2011
srcSign = srcReg->getKnownOrAssumedSignCode();
2012
else if (srcReg->hasTemporaryKnownSignCode())
2013
srcSign = srcReg->getTemporaryKnownSignCode();
2014
}
2015
2016
sign = (sign&0xF);
2017
bool isEffectiveNop = (srcSign == sign);
2018
2019
if (self()->traceBCDCodeGen())
2020
traceMsg(comp,"\tgenSignCodeSetting: node=%p, endByte=%d, sign=0x%x, signCodeOffset=%d, srcReg=%s, digitsToClear=%d, numericNibbleIsZero=%s (srcSign=0x%x, hasCleanSign=%s, hasPrefSign=%s, isEffectiveNop=%s)\n",
2021
node,endByte,sign,signCodeOffset,srcReg ? self()->getDebug()->getName(srcReg):"NULL",digitsToClear,numericNibbleIsZero ?"yes":"no",
2022
srcSign,srcReg && srcReg->hasKnownOrAssumedCleanSign()?"true":"false",
2023
srcReg && srcReg->hasKnownOrAssumedPreferredSign()?"true":"false",isEffectiveNop?"yes":"no");
2024
2025
if (isEffectiveNop)
2026
{
2027
if (srcReg && targetReg)
2028
targetReg->transferSignState(srcReg, true); // digitsLost=true -- conservatively set as this may be part of a truncation
2029
if (targetReg->signStateInitialized() == false) // when srcSign is from getTemporaryKnownSignCode()
2030
targetReg->setKnownSignCode(srcSign);
2031
return digitsCleared;
2032
}
2033
2034
TR::MemoryReference *signCodeMR = generateS390LeftAlignedMemoryReference(*targetMR, node, 0, cg, endByte-signCodeOffset);
2035
2036
// If the sign code is 0xc,0xd,0xe or 0xf then the top two bits are already set so an initial OI is not required and only an NI is required for some sign values
2037
bool topTwoBitsSet = false;
2038
bool knownSignIs0xC = false;
2039
bool knownSignIs0xF = false;
2040
if (srcReg)
2041
{
2042
topTwoBitsSet = srcReg->hasKnownOrAssumedCleanSign() || srcReg->hasKnownOrAssumedPreferredSign();
2043
if (srcSign != TR::DataType::getInvalidSignCode())
2044
{
2045
if (srcSign >= 0xc && srcSign <= 0xf)
2046
topTwoBitsSet = true;
2047
knownSignIs0xC = (srcSign == 0xc);
2048
knownSignIs0xF = (srcSign == 0xf);
2049
}
2050
}
2051
2052
TR::DataType dt = node->getDataType();
2053
TR_ASSERT(dt == TR::PackedDecimal || dt == TR::ZonedDecimal || dt == TR::ZonedDecimalSignLeadingEmbedded,
2054
"genSignCodeSetting only valid for embedded sign types and not type %s\n",dt.toString());
2055
bool isPacked = (dt == TR::PackedDecimal);
2056
2057
intptr_t litPoolOffset;
2058
switch (dt)
2059
{
2060
case TR::PackedDecimal:
2061
case TR::ZonedDecimal:
2062
case TR::ZonedDecimalSignLeadingEmbedded:
2063
{
2064
if (isPacked && digitsToClear >= 3)
2065
{
2066
int32_t bytesToSet = (digitsToClear+1)/2;
2067
int32_t leftMostByte = 0;
2068
TR::InstOpCode::Mnemonic op = TR::InstOpCode::bad;
2069
switch (bytesToSet)
2070
{
2071
case 2:
2072
case 3:
2073
op = TR::InstOpCode::MVHHI;
2074
digitsCleared = 3;
2075
leftMostByte = 2;
2076
break;
2077
case 4:
2078
case 5:
2079
case 6:
2080
case 7:
2081
op = TR::InstOpCode::MVHI;
2082
digitsCleared = 7;
2083
leftMostByte = 4;
2084
break;
2085
default:
2086
TR_ASSERT(bytesToSet >= 8,"unexpected bytesToSet value (%d) -- should be >= 8\n",bytesToSet);
2087
op = TR::InstOpCode::MVGHI;
2088
digitsCleared = 15;
2089
leftMostByte = 8;
2090
break;
2091
}
2092
signCodeMR->setLeftMostByte(leftMostByte);
2093
generateSILInstruction(cg, op, node, signCodeMR, sign);
2094
if (self()->traceBCDCodeGen())
2095
traceMsg(comp,"\t\tusing %d byte move imm (%s) for sign setting : set digitsCleared=%d\n",
2096
leftMostByte,leftMostByte==8?"MVGHI":(leftMostByte==4)?"MVHI":"MVHHI",digitsCleared);
2097
}
2098
else if (numericNibbleIsZero || digitsToClear >= 1)
2099
{
2100
generateSIInstruction(cg, TR::InstOpCode::MVI, node, signCodeMR, isPacked ? sign : sign << 4);
2101
digitsCleared = 1;
2102
if (self()->traceBCDCodeGen()) traceMsg(comp,"\t\tusing MVI for sign setting : set digitsCleared=1\n");
2103
}
2104
else
2105
{
2106
if (knownSignIs0xF)
2107
{
2108
generateSIInstruction(cg, TR::InstOpCode::NI, node, signCodeMR, isPacked ? (0xF0 | sign) : (0x0F | (sign<<4)));
2109
}
2110
else if (topTwoBitsSet && sign == 0xc)
2111
{
2112
generateSIInstruction(cg, TR::InstOpCode::NI, node, signCodeMR, isPacked ? 0xFC : 0xCF);
2113
}
2114
else if (knownSignIs0xC && sign == 0xd)
2115
{
2116
generateSIInstruction(cg, TR::InstOpCode::OI, node, signCodeMR, isPacked ? 0x01 : 0x10);
2117
}
2118
else if (sign == 0xf)
2119
{
2120
generateSIInstruction(cg, TR::InstOpCode::OI, node, signCodeMR, isPacked ? 0x0F : 0xF0);
2121
}
2122
else
2123
{
2124
{
2125
generateSIInstruction(cg, TR::InstOpCode::OI, node, signCodeMR, isPacked ? 0x0F : 0xF0);
2126
generateSIInstruction(cg, TR::InstOpCode::NI, node, generateS390LeftAlignedMemoryReference(*signCodeMR,
2127
node,
2128
0,
2129
cg,
2130
signCodeMR->getLeftMostByte()),
2131
isPacked ? (0xF0 | sign) : (0x0F | (sign<<4)));
2132
}
2133
}
2134
}
2135
}
2136
break;
2137
default:
2138
TR_ASSERT(false,"dt %s not handled yet in genSignCodeSetting\n",node->getDataType().toString());
2139
}
2140
2141
if (targetReg)
2142
targetReg->setKnownSignCode(sign);
2143
2144
return digitsCleared;
2145
}
2146
2147
/**
2148
* Input reg can be NULL (when called for a store node or other type that does not return a register)
2149
*/
2150
void
2151
J9::Z::CodeGenerator::widenBCDValue(TR::Node *node, TR_PseudoRegister *reg, int32_t startByte, int32_t endByte, TR::MemoryReference *targetMR)
2152
{
2153
TR_ASSERT(node->getType().isBCD(),
2154
"widenBCDValue is only valid for BCD types (type=%s)\n",node->getDataType().toString()); TR_ASSERT(targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"widenBCDValue is only valid for aligned memory references\n");
2155
TR_ASSERT(endByte >= startByte,"endByte (%d) >= startByte (%d) in widenBCDValue\n",endByte,startByte);
2156
2157
int32_t bytesToClear = endByte - startByte;
2158
if (bytesToClear > 0)
2159
{
2160
switch (node->getDataType())
2161
{
2162
case TR::PackedDecimal:
2163
self()->genZeroLeftMostPackedDigits(node, reg, endByte, bytesToClear*2, targetMR);
2164
break;
2165
case TR::ZonedDecimal:
2166
case TR::ZonedDecimalSignTrailingSeparate:
2167
self()->genZeroLeftMostZonedBytes(node, reg, endByte, bytesToClear, targetMR);
2168
break;
2169
case TR::ZonedDecimalSignLeadingEmbedded:
2170
self()->widenZonedSignLeadingEmbedded(node, reg, endByte, bytesToClear, targetMR);
2171
break;
2172
case TR::ZonedDecimalSignLeadingSeparate:
2173
self()->widenZonedSignLeadingSeparate(node, reg, endByte, bytesToClear, targetMR);
2174
break;
2175
case TR::UnicodeDecimal:
2176
case TR::UnicodeDecimalSignTrailing:
2177
self()->genZeroLeftMostUnicodeBytes(node, reg, endByte, bytesToClear, targetMR);
2178
break;
2179
case TR::UnicodeDecimalSignLeading:
2180
self()->widenUnicodeSignLeadingSeparate(node, reg, endByte, bytesToClear, targetMR);
2181
break;
2182
default:
2183
TR_ASSERT(false,"unsupported dataType %s in widenBCDValue\n",node->getDataType().toString());
2184
}
2185
}
2186
}
2187
2188
2189
/**
2190
* Input reg can be NULL (when called for a store node or other type that does not return a register)
2191
*/
2192
void
2193
J9::Z::CodeGenerator::widenBCDValueIfNeeded(TR::Node *node, TR_PseudoRegister *reg, int32_t startByte, int32_t endByte, TR::MemoryReference *targetMR)
2194
{
2195
TR_ASSERT(node->getType().isBCD(),
2196
"widenBCDValue is only valid for BCD types (type=%s)\n",node->getDataType().toString());
2197
TR_ASSERT(targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"widenBCDValue is only valid for aligned memory references\n");
2198
TR_ASSERT(endByte >= startByte,"endByte (%d) >= startByte (%d) in widenBCDValue\n",endByte,startByte);
2199
2200
int32_t bytesToClear = endByte - startByte;
2201
if (self()->traceBCDCodeGen())
2202
traceMsg(self()->comp(),"\twidenBCDValue for node %s (%p) : %d->%d (%d bytes)\n",node->getOpCode().getName(),node,startByte,endByte,bytesToClear);
2203
if (bytesToClear > 0)
2204
{
2205
if (reg && reg->trackZeroDigits())
2206
self()->clearByteRangeIfNeeded(node, reg, targetMR, startByte, endByte);
2207
else
2208
self()->widenBCDValue(node, reg, startByte, endByte, targetMR);
2209
}
2210
}
2211
2212
void
2213
J9::Z::CodeGenerator::genZeroLeftMostDigitsIfNeeded(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, int32_t digitsToClear, TR::MemoryReference *targetMR, bool widenOnLeft)
2214
{
2215
TR_ASSERT(reg->trackZeroDigits(),"genZeroLeftMostDigitsIfNeeded only valid for types where trackZeroDigits=true (dt %s)\n",reg->getDataType().toString());
2216
TR_ASSERT(endByte > 0,"genZeroLeftMostDigitsIfNeeded: endByte %d should be > 0\n",endByte);
2217
TR_ASSERT(digitsToClear >= 0,"genZeroLeftMostDigitsIfNeeded: digitsToClear %d should be >= 0\n",digitsToClear);
2218
TR_ASSERT(reg->getDataType() == node->getDataType(),"reg dt (%s) should match node dt (%s) in genZeroLeftMostDigitsIfNeeded\n",reg->getDataType().toString(),node->getDataType().toString());
2219
2220
if (digitsToClear <= 0)
2221
return;
2222
2223
TR_StorageReference *storageReference = reg->getStorageReference();
2224
TR_ASSERT(storageReference,"storageReference should be non-null at this point\n");
2225
int32_t endDigit = TR::DataType::getBCDPrecisionFromSize(node->getDataType(), endByte);
2226
int32_t startDigit = endDigit-digitsToClear;
2227
// -1 is the sign code position and it can be cleared. The caller is responsible for generating code to set a new and valid sign code.
2228
if (self()->traceBCDCodeGen())
2229
traceMsg(self()->comp(),"\tgenZeroLeftMostDigitsIfNeeded %s #%d for node %p: digitsToClear = %d, endByte = %d (digit range is %d->%d), widenOnLeft=%s\n",
2230
self()->getDebug()->getName(reg),storageReference->getReferenceNumber(),node,digitsToClear,endByte,startDigit,endDigit,widenOnLeft?"yes":"no");
2231
TR_ASSERT(startDigit >= -1,"genZeroLeftMostDigitsIfNeeded: startDigit %d should be >= -1\n",startDigit);
2232
2233
// If requested (widenOnLeft=true) then attempt to clear up to the live symbol size to save separate clears being needed later on
2234
// this would not be legal, for example, when this routine is called to clear an intermediate digit range only
2235
// where some left most digits have to be preserved -- such as in pdshlEvaluator (via clearAndSetSign) when the moved over sign code is cleared.
2236
int32_t actualDigitsToClear = reg->getDigitsToClear(startDigit, endDigit);
2237
int32_t origEndDigit = endDigit;
2238
// only respect widenOnLeft if the actualDigitsToClear exceeds the widenOnLeftThreshold
2239
int32_t widenOnLeftThreshold = 0;
2240
if (node->getType().isAnyPacked())
2241
{
2242
// for the half byte type do not increase a single digit clear (i.e. avoid NI -> XC/NI -- just do the NI and leave the XC until later if needed)
2243
widenOnLeftThreshold = 1;
2244
}
2245
else if (node->getType().isAnyZoned() || node->getType().isAnyUnicode())
2246
{
2247
// the full byte types use an MVC for the clear so always attempt to widen on the left
2248
widenOnLeftThreshold = 0;
2249
}
2250
else
2251
{
2252
TR_ASSERT(false,"unsupported datatype %s in genZeroLeftMostDigitsIfNeededA\n",node->getDataType().toString());
2253
}
2254
if (widenOnLeft &&
2255
actualDigitsToClear > widenOnLeftThreshold &&
2256
reg->getLiveSymbolSize() > endByte)
2257
{
2258
int32_t origEndByte = endByte;
2259
endByte = reg->getLiveSymbolSize();
2260
endDigit = TR::DataType::getBCDPrecisionFromSize(node->getDataType(), endByte);
2261
if (self()->traceBCDCodeGen())
2262
traceMsg(self()->comp(),"\ttargetMR->getStorageReference() #%d liveSymSize %d > endByte %d so increase endByte %d->%d (endDigit %d->%d) and retrieve the actualDigitsToClear based on this new endDigit\n",
2263
targetMR->getStorageReference()->getReferenceNumber(),reg->getLiveSymbolSize(),origEndByte,origEndByte,endByte,origEndDigit,endDigit);
2264
}
2265
2266
if (origEndDigit != endDigit)
2267
actualDigitsToClear = reg->getDigitsToClear(startDigit, endDigit);
2268
2269
if (actualDigitsToClear)
2270
{
2271
int32_t offset = reg->getByteOffsetFromLeftForClear(startDigit, endDigit, actualDigitsToClear, endByte); // might modify actualDigitsToClear
2272
switch (node->getDataType())
2273
{
2274
case TR::PackedDecimal:
2275
self()->genZeroLeftMostPackedDigits(node,
2276
reg,
2277
endByte,
2278
actualDigitsToClear,
2279
targetMR,
2280
offset);
2281
break;
2282
case TR::ZonedDecimal:
2283
self()->genZeroLeftMostZonedBytes(node,
2284
reg,
2285
endByte-offset,
2286
actualDigitsToClear,
2287
targetMR);
2288
break;
2289
default:
2290
TR_ASSERT(false,"unsupported datatype %s in genZeroLeftMostDigitsIfNeededB\n",node->getDataType().toString());
2291
break;
2292
}
2293
}
2294
else
2295
{
2296
self()->processUnusedNodeDuringEvaluation(NULL);
2297
}
2298
}
2299
2300
void
2301
J9::Z::CodeGenerator::clearByteRangeIfNeeded(TR::Node *node, TR_PseudoRegister *reg, TR::MemoryReference *targetMR, int32_t startByte, int32_t endByte, bool widenOnLeft)
2302
{
2303
TR_ASSERT(startByte <= endByte,"clearByteRangeIfNeeded: invalid range of %d->%d\n",startByte,endByte);
2304
if (startByte >= endByte) return;
2305
int32_t clearDigits = TR::DataType::bytesToDigits(node->getDataType(), endByte-startByte);
2306
return self()->genZeroLeftMostDigitsIfNeeded(node, reg, endByte, clearDigits, targetMR, widenOnLeft);
2307
}
2308
2309
void
2310
J9::Z::CodeGenerator::genZeroLeftMostPackedDigits(TR::Node *node, TR_PseudoRegister *reg, int32_t endByte, int32_t digitsToClear, TR::MemoryReference *targetMR, int32_t memRefOffset)
2311
{
2312
TR_ASSERT(targetMR->rightAlignMemRef() || targetMR->leftAlignMemRef(),"genZeroLeftMostPackedDigits is only valid for aligned memory references\n");
2313
2314
TR_StorageReference *storageRef = reg ? reg->getStorageReference() : NULL;
2315
targetMR = reuseS390LeftAlignedMemoryReference(targetMR, node, storageRef, self(), endByte);
2316
2317
if (digitsToClear)
2318
{
2319
int32_t fullBytesToClear = digitsToClear/2;
2320
if (self()->traceBCDCodeGen())
2321
traceMsg(self()->comp(),"\tgenZeroLeftMostPackedDigits: node %p, reg %s targetMemSlot #%d, endByte %d, digitsToClear %d (fullBytesToClear %d), memRefOffset %d\n",
2322
node,reg?self()->getDebug()->getName(reg):"0",reg?reg->getStorageReference()->getReferenceNumber():0,endByte,digitsToClear,fullBytesToClear,memRefOffset);
2323
if (fullBytesToClear)
2324
{
2325
int32_t destOffset = 0;
2326
if (self()->traceBCDCodeGen())
2327
traceMsg(self()->comp(),"\t\tgen XC with size %d and mr offset %d (destOffset %d + memRefOffset %d)\n",fullBytesToClear,destOffset+memRefOffset,destOffset,memRefOffset);
2328
generateSS1Instruction(self(), TR::InstOpCode::XC, node,
2329
fullBytesToClear-1,
2330
generateS390LeftAlignedMemoryReference(*targetMR, node, destOffset+memRefOffset, self(), targetMR->getLeftMostByte()), // left justified
2331
generateS390LeftAlignedMemoryReference(*targetMR, node, destOffset+memRefOffset, self(), targetMR->getLeftMostByte())); // left justified
2332
}
2333
if (digitsToClear&0x1)
2334
{
2335
int32_t destOffset = 0;
2336
{
2337
if (self()->traceBCDCodeGen())
2338
traceMsg(self()->comp(),"\tgen NI for odd clear digits with mr offset %d (fullBytesToClear %d + destOffset %d + memRefOffset %d)\n",fullBytesToClear+destOffset+memRefOffset,fullBytesToClear,destOffset,memRefOffset);
2339
generateSIInstruction(self(), TR::InstOpCode::NI, node,
2340
generateS390LeftAlignedMemoryReference(*targetMR, node, fullBytesToClear+destOffset+memRefOffset, self(), targetMR->getLeftMostByte()),
2341
0x0F);
2342
}
2343
}
2344
int32_t endDigit = (endByte*2)-(memRefOffset*2)-1; // -1 for the sign code
2345
if (reg)
2346
reg->addRangeOfZeroDigits(endDigit-digitsToClear, endDigit);
2347
}
2348
}
2349
2350
2351
void
2352
J9::Z::CodeGenerator::initializeStorageReference(TR::Node *node,
2353
TR_OpaquePseudoRegister *destReg,
2354
TR::MemoryReference *destMR,
2355
int32_t destSize,
2356
TR::Node *srcNode,
2357
TR_OpaquePseudoRegister *srcReg,
2358
TR::MemoryReference *sourceMR,
2359
int32_t sourceSize,
2360
bool performExplicitWidening,
2361
bool alwaysLegalToCleanSign,
2362
bool trackSignState)
2363
{
2364
TR::Compilation *comp = self()->comp();
2365
if (self()->traceBCDCodeGen())
2366
traceMsg(comp,"\tinitializeStorageReference for %s (%p), destReg %s, srcReg %s, sourceSize %d, destSize %d, performExplicitWidening=%s, trackSignState=%s\n",
2367
node->getOpCode().getName(),node,
2368
destReg ? self()->getDebug()->getName(destReg):"NULL",srcReg ? self()->getDebug()->getName(srcReg):"NULL",sourceSize,destSize,performExplicitWidening?"yes":"no",trackSignState?"yes":"no");
2369
2370
TR_ASSERT( srcReg,"expecting a non-null srcReg in initializeStorageReference\n");
2371
TR_ASSERT( srcReg->getStorageReference(),"expecting a non-null srcReg->storageRef in initializeStorageReference\n");
2372
2373
TR::CodeGenerator *cg = self();
2374
// if a non-null destReg does not have a memory slot set then the addRangeOfZeroBytes/addRangeOfZeroDigits calls will
2375
// not be able to query the symbol size
2376
TR_ASSERT( !destReg || destReg->getStorageReference(),"a non-null destReg must have a storageReference set\n");
2377
bool isBCD = node->getType().isBCD();
2378
TR_ASSERT(!isBCD || sourceSize <= TR_MAX_MVC_SIZE,"sourceSize %d > max %d for node %p\n",sourceSize,TR_MAX_MVC_SIZE,node);
2379
TR_ASSERT(!isBCD || destSize <= TR_MAX_MVC_SIZE,"destSize %d > max %d for node %p\n",destSize,TR_MAX_MVC_SIZE,node);
2380
TR_PseudoRegister *srcPseudoReg = srcReg->getPseudoRegister();
2381
TR_PseudoRegister *destPseudoReg = destReg ? destReg->getPseudoRegister() : NULL;
2382
2383
// widening and truncations only supported for pseudoRegisters
2384
TR_ASSERT(srcPseudoReg || destSize == sourceSize,"destSize %d != sourceSize %d for opaquePseudoReg on node %p\n",destSize,sourceSize,node);
2385
TR_ASSERT(destPseudoReg == NULL || srcPseudoReg == NULL || (srcPseudoReg && destPseudoReg),"both src and dest must be pseudoRegisters for node %p\n",node);
2386
TR_ASSERT(!isBCD || srcPseudoReg,"srcPseudoReg should be set for BCD node %p\n",node);
2387
2388
if (sourceMR == NULL)
2389
{
2390
sourceMR = isBCD ?
2391
generateS390RightAlignedMemoryReference(srcNode, srcReg->getStorageReference(), cg) :
2392
generateS390MemRefFromStorageRef(srcNode, srcReg->getStorageReference(), cg);
2393
}
2394
2395
int32_t mvcSize = std::min(sourceSize, destSize);
2396
TR_StorageReference *dstStorageRef = destMR->getStorageReference();
2397
TR_StorageReference *srcStorageRef = sourceMR->getStorageReference();
2398
TR_ASSERT(dstStorageRef,"dstStorageRef should be non-NULL\n");
2399
TR_ASSERT(srcStorageRef,"srcStorageRef should be non-NULL\n");
2400
2401
if (!self()->storageReferencesMatch(dstStorageRef, srcStorageRef))
2402
{
2403
int32_t bytesToClear = (destSize > sourceSize) ? srcReg->getBytesToClear(sourceSize, destSize) : 0;
2404
bool srcCastedToBCD = srcReg->getStorageReference()->isNodeBased() && srcReg->getStorageReference()->getNode()->castedToBCD();
2405
2406
if (self()->traceBCDCodeGen())
2407
traceMsg(comp,"\t\tnode %p : srcReg %s (hasBadSign %s) on srcNode %p has bytes %d->%d %salready clear (bytesToClear=%d), srcCastedToBCD=%d\n",
2408
node,self()->getDebug()->getName(srcReg),srcPseudoReg ? (srcPseudoReg->hasKnownBadSignCode()?"yes":"no") : "no",
2409
srcNode,sourceSize,destSize,bytesToClear==0?"":"not ",bytesToClear,srcCastedToBCD);
2410
2411
if (destSize > sourceSize &&
2412
bytesToClear == 0)
2413
{
2414
mvcSize = destSize;
2415
if (destReg)
2416
destReg->addRangeOfZeroBytes(sourceSize,destSize);
2417
if (self()->traceBCDCodeGen())
2418
traceMsg(comp,"\tincrease mvcSize %d->%d to account for already cleared %d bytes\n",sourceSize,mvcSize,bytesToClear);
2419
}
2420
2421
if (self()->traceBCDCodeGen())
2422
traceMsg(comp,"\t\tgen MVC/memcpy to initialize storage reference with size = %d\n",mvcSize);
2423
2424
TR::MemoryReference *initDstMR = NULL;
2425
TR::MemoryReference *initSrcMR = NULL;
2426
if (isBCD)
2427
{
2428
initDstMR = generateS390RightAlignedMemoryReference(*destMR, node, 0, cg);
2429
initSrcMR = generateS390RightAlignedMemoryReference(*sourceMR, srcNode, 0, cg);
2430
}
2431
else
2432
{
2433
initDstMR = generateS390MemoryReference(*destMR, 0, cg);
2434
initSrcMR = generateS390MemoryReference(*sourceMR, 0, cg);
2435
}
2436
self()->genMemCpy(initDstMR, node, initSrcMR, srcNode, mvcSize);
2437
}
2438
2439
if (isBCD && performExplicitWidening && (destSize > sourceSize))
2440
self()->widenBCDValueIfNeeded(node, destPseudoReg, sourceSize, destSize, destMR);
2441
2442
if (destPseudoReg)
2443
{
2444
TR_ASSERT(srcPseudoReg,"srcPseudoReg must be non-NULL if destPseudoReg is non-NULL on node %p\n",node);
2445
// the destReg can be refined further by the caller but for now set it to a conservative value
2446
int32_t targetPrecision = 0;
2447
if (destSize >= sourceSize)
2448
targetPrecision = srcPseudoReg->getDecimalPrecision();
2449
else
2450
targetPrecision = TR::DataType::getBCDPrecisionFromSize(node->getDataType(), destSize);
2451
destPseudoReg->setDecimalPrecision(targetPrecision);
2452
if (self()->traceBCDCodeGen())
2453
traceMsg(comp,"\tset destReg targetPrecision to %d (from %s for node dt %s)\n",
2454
targetPrecision,destSize >= sourceSize?"srcReg precision":"destSize",node->getDataType().toString());
2455
}
2456
if (destReg)
2457
destReg->setIsInitialized();
2458
}
2459
2460
TR_StorageReference *
2461
J9::Z::CodeGenerator::initializeNewTemporaryStorageReference(TR::Node *node,
2462
TR_OpaquePseudoRegister *destReg,
2463
int32_t destSize,
2464
TR::Node *srcNode,
2465
TR_OpaquePseudoRegister *srcReg,
2466
int32_t sourceSize,
2467
TR::MemoryReference *sourceMR,
2468
bool performExplicitWidening,
2469
bool alwaysLegalToCleanSign,
2470
bool trackSignState)
2471
{
2472
if (self()->traceBCDCodeGen())
2473
traceMsg(self()->comp(),"\tinitializeNewTemporaryStorageReference for node %p, destReg %s, srcNode %p, srcReg %s (with size %d), sourceSize %d, destSize %d\n",
2474
node,destReg ? self()->getDebug()->getName(destReg):"NULL",srcNode,srcReg ? self()->getDebug()->getName(srcReg):"NULL",srcReg?srcReg->getSize():0,sourceSize,destSize);
2475
2476
TR_StorageReference *tempStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(destSize, self()->comp());
2477
if (destReg)
2478
destReg->setStorageReference(tempStorageReference, node);
2479
else
2480
tempStorageReference->setTemporaryReferenceCount(1);
2481
2482
TR_ASSERT(srcReg,"expecting a non-null srcReg in initializeNewTemporaryStorageReference for srcNode %p\n",srcNode);
2483
2484
TR::MemoryReference *destMR = NULL;
2485
if (srcReg->getPseudoRegister())
2486
destMR = generateS390RightAlignedMemoryReference(node, tempStorageReference, self(), true, true); // enforceSSLimits=true, isNewTemp=true
2487
else
2488
destMR = generateS390MemRefFromStorageRef(node, tempStorageReference, self());
2489
2490
self()->initializeStorageReference(node,
2491
destReg,
2492
destMR,
2493
destSize,
2494
srcNode,
2495
srcReg,
2496
sourceMR,
2497
sourceSize,
2498
performExplicitWidening,
2499
alwaysLegalToCleanSign,
2500
trackSignState);
2501
if (destReg == NULL)
2502
tempStorageReference->setTemporaryReferenceCount(0);
2503
return tempStorageReference;
2504
}
2505
2506
TR_OpaquePseudoRegister *
2507
J9::Z::CodeGenerator::privatizePseudoRegister(TR::Node *node, TR_OpaquePseudoRegister *reg, TR_StorageReference *storageRef, size_t sizeOverride)
2508
{
2509
TR::CodeGenerator *cg = self();
2510
TR::Compilation *comp = cg->comp();
2511
size_t regSize = reg->getSize();
2512
if (self()->traceBCDCodeGen())
2513
{
2514
if (sizeOverride != 0 && sizeOverride != regSize)
2515
traceMsg(comp,"\tsizeOverride=%d : use this as the size for privatizing reg %s (instead of regSize %d)\n",sizeOverride,cg->getDebug()->getName(reg),reg->getSize());
2516
else
2517
traceMsg(comp,"\tsizeOverride=0 : use reg %s regSize %d as the size for privatizing\n",cg->getDebug()->getName(reg),reg->getSize());
2518
}
2519
size_t size = sizeOverride == 0 ? regSize : sizeOverride;
2520
bool isBCD = node->getType().isBCD();
2521
TR_StorageReference *tempStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(size, comp);
2522
tempStorageReference->setIsSingleUseTemporary();
2523
TR::MemoryReference *origSrcMR = NULL;
2524
TR::MemoryReference *copyMR = NULL;
2525
if (isBCD)
2526
{
2527
origSrcMR = generateS390RightAlignedMemoryReference(node, storageRef, cg);
2528
copyMR = generateS390RightAlignedMemoryReference(node, tempStorageReference, cg, true, true); // enforceSSLimits=true, isNewTemp=true
2529
}
2530
else
2531
{
2532
origSrcMR = generateS390MemRefFromStorageRef(node, storageRef, cg);
2533
copyMR = generateS390MemRefFromStorageRef(node, tempStorageReference, cg); // enforceSSLimits=true
2534
}
2535
2536
if (self()->traceBCDCodeGen())
2537
traceMsg(comp,"\ta^a : gen memcpy of size = %d to privatize node %s (%p) with storageRef #%d (%s) to #%d (%s) on line_no=%d\n",
2538
size,node->getOpCode().getName(),node,
2539
storageRef->getReferenceNumber(),self()->getDebug()->getName(storageRef->getSymbol()),
2540
tempStorageReference->getReferenceNumber(),self()->getDebug()->getName(tempStorageReference->getSymbol()),
2541
comp->getLineNumber(node));
2542
2543
// allocate a new register so any storageRef dep state (like leftAlignedZeroDigits) is cleared (as the mempcy isn't going transfer these over to copyMR)
2544
TR_OpaquePseudoRegister *tempRegister = isBCD ? cg->allocatePseudoRegister(reg->getPseudoRegister()) : cg->allocateOpaquePseudoRegister(reg);
2545
tempRegister->setStorageReference(tempStorageReference, NULL); // node==NULL as the temp refCounts are explicitly being managed as the temp will only live for this evaluator
2546
tempRegister->setIsInitialized();
2547
2548
cg->genMemCpy(copyMR, node, origSrcMR, node, size);
2549
2550
return tempRegister;
2551
}
2552
2553
TR_OpaquePseudoRegister*
2554
J9::Z::CodeGenerator::privatizePseudoRegisterIfNeeded(TR::Node *parent, TR::Node *child, TR_OpaquePseudoRegister *childReg)
2555
{
2556
TR::Compilation *comp = self()->comp();
2557
TR_OpaquePseudoRegister *outReg = childReg;
2558
TR_StorageReference *hint = parent->getStorageReferenceHint();
2559
if (hint && hint->isNodeBased())
2560
{
2561
TR::Node *hintNode = hint->getNode();
2562
TR_StorageReference *childStorageRef = childReg->getStorageReference();
2563
if (self()->traceBCDCodeGen())
2564
traceMsg(comp,"\tprivatizePseudoRegisterIfNeeded for %s (%p) with hint %s (%p) (isInMemoryCopyProp=%s) and child %s (%p) (child storageRef isNonConstNodeBased=%s)\n",
2565
parent->getOpCode().getName(),parent,
2566
hintNode->getOpCode().getName(),hintNode,hintNode->isInMemoryCopyProp()?"yes":"no",
2567
child->getOpCode().getName(),child,
2568
childStorageRef ? (childStorageRef->isNonConstantNodeBased() ? "yes":"no") : "null");
2569
if (childStorageRef &&
2570
childStorageRef->isNonConstantNodeBased() &&
2571
hintNode->getOpCode().hasSymbolReference())
2572
{
2573
TR::Node *childStorageRefNode = childStorageRef->getNode();
2574
// see comment in pdstoreEvaluator for isUsingStorageRefFromAnotherStore and childRegHasDeadOrIgnoredBytes
2575
bool isUsingStorageRefFromAnotherStore = childStorageRefNode->getOpCode().isStore() && childStorageRefNode != hintNode;
2576
bool childRegHasDeadOrIgnoredBytes = childReg->getRightAlignedIgnoredBytes() > 0;
2577
if (self()->traceBCDCodeGen())
2578
traceMsg(comp,"\tisInMemoryCopyProp=%s, isUsingStorageRefFromAnotherStore=%s, childRegHasDeadOrIgnoredBytes=%s : childStorageRef %s (%p), hintNode %s (%p)\n",
2579
hintNode->isInMemoryCopyProp() ? "yes":"no",
2580
isUsingStorageRefFromAnotherStore ? "yes":"no",
2581
childRegHasDeadOrIgnoredBytes ? "yes":"no",
2582
childStorageRefNode->getOpCode().getName(),childStorageRefNode,
2583
hintNode->getOpCode().getName(),hintNode);
2584
if (hintNode->isInMemoryCopyProp() || isUsingStorageRefFromAnotherStore || childRegHasDeadOrIgnoredBytes)
2585
{
2586
bool useAliasing = true;
2587
if (self()->traceBCDCodeGen())
2588
traceMsg(comp,"\tcheck overlap between store hint %s (%p) and childStorageRefNode %s (%p)\n",
2589
hintNode->getOpCode().getName(),hintNode,childStorageRefNode->getOpCode().getName(),childStorageRefNode);
2590
if (self()->loadAndStoreMayOverlap(hintNode,
2591
hintNode->getSize(),
2592
childStorageRefNode,
2593
childStorageRefNode->getSize()))
2594
{
2595
bool needsPrivitization = true;
2596
if (self()->traceBCDCodeGen())
2597
traceMsg(comp,"\toverlap=true (from %s test) -- privatize the source memref to a temp memref\n",useAliasing?"aliasing":"pattern");
2598
if (useAliasing && // checking useAliasing here because in the no info case the above loadAndStoreMayOverlap already did the pattern match
2599
self()->storageMayOverlap(hintNode, hintNode->getSize(), childStorageRefNode, childStorageRefNode->getSize()) == TR_NoOverlap)
2600
{
2601
// get a second opinion -- the aliasing says the operations overlap but perhaps it is too conservative
2602
// so do pattern matching based test to see if the operations are actually disjoint
2603
if (self()->traceBCDCodeGen())
2604
traceMsg(comp,"\t\t but overlap=false (from 2nd opinion pattern test) -- set needsPrivitization to false\n");
2605
needsPrivitization = false;
2606
}
2607
2608
if (needsPrivitization)
2609
{
2610
if (self()->traceBCDCodeGen())
2611
{
2612
if (hintNode->isInMemoryCopyProp())
2613
traceMsg(comp,"\ta^a : privatize needed due to isInMemoryCopyProp hintNode %s (%p) on line_no=%d (privatizeCase)\n",
2614
hintNode->getOpCode().getName(),hintNode,comp->getLineNumber(hintNode));
2615
if (isUsingStorageRefFromAnotherStore)
2616
traceMsg(comp,"\ta^a : privatize needed due to isUsingStorageRefFromAnotherStore childStorageRefNode %s (%p) on line_no=%d (privatizeCase)\n",
2617
childStorageRefNode->getOpCode().getName(),childStorageRefNode,comp->getLineNumber(hintNode));
2618
if (childRegHasDeadOrIgnoredBytes)
2619
traceMsg(comp,"\ta^a : privatize needed due to childRegHasDeadOrIgnoredBytes valueReg %s child %s (%p) on line_no=%d (privatizeCase)\n",
2620
self()->getDebug()->getName(childReg),child->getOpCode().getName(),child,comp->getLineNumber(hintNode));
2621
}
2622
2623
outReg = self()->privatizePseudoRegister(child, childReg, childStorageRef);
2624
TR_ASSERT(!comp->getOption(TR_EnablePerfAsserts),"gen overlap copy for hintNode %s (%p) on line_no=%d (privatePseudoCase)\n",
2625
hintNode->getOpCode().getName(),hintNode,comp->getLineNumber(hintNode));
2626
}
2627
}
2628
else
2629
{
2630
if (self()->traceBCDCodeGen())
2631
traceMsg(comp,"\toverlap=false (from %s test) -- do not privatize the source memref\n",useAliasing?"aliasing":"pattern");
2632
}
2633
}
2634
else
2635
{
2636
if (self()->traceBCDCodeGen())
2637
traceMsg(comp,"y^y : temp copy saved isInMemoryCopyProp = false on %s (%p) (privatizeCase)\n",hintNode->getOpCode().getName(),hintNode);
2638
}
2639
}
2640
}
2641
return outReg;
2642
}
2643
2644
TR_PseudoRegister*
2645
J9::Z::CodeGenerator::privatizeBCDRegisterIfNeeded(TR::Node *parent, TR::Node *child, TR_OpaquePseudoRegister *childReg)
2646
{
2647
TR_OpaquePseudoRegister *reg = self()->privatizePseudoRegisterIfNeeded(parent, child, childReg);
2648
TR_PseudoRegister *pseudoReg = reg->getPseudoRegister();
2649
TR_ASSERT(pseudoReg,"pseudoReg should not be NULL after privatizing of child %p\n",child);
2650
return pseudoReg;
2651
}
2652
2653
TR_StorageReference *
2654
J9::Z::CodeGenerator::privatizeStorageReference(TR::Node *node, TR_OpaquePseudoRegister *reg, TR::MemoryReference *memRef)
2655
{
2656
TR::Compilation *comp = self()->comp();
2657
2658
// Copy a node-based storageReference with a refCount > 1 to a temporary as the underlying symRef may be killed before the next commoned reference
2659
// to the node.
2660
// The flag skipCopyOnLoad is set in lowerTrees to prevent unnecessary copies when the symRef is known not to be killed for any commoned reference.
2661
TR_StorageReference *storageRef = reg->getStorageReference();
2662
TR_StorageReference *tempStorageRef = NULL;
2663
bool isPassThruCase = node != storageRef->getNode();
2664
if (self()->traceBCDCodeGen())
2665
traceMsg(comp,"privatizeStorageReference: %s (%p) refCount %d :: storageRef #%d, storageRefNode %s (%p) nodeRefCount %d, isNodeBased %s\n",
2666
node->getOpCode().getName(),
2667
node,
2668
node->getReferenceCount(),
2669
storageRef->getReferenceNumber(),
2670
storageRef->getNode()?storageRef->getNode()->getOpCode().getName():"NULL",
2671
storageRef->getNode(),
2672
storageRef->isNodeBased()?storageRef->getNodeReferenceCount():-99,
2673
storageRef->isNodeBased()?"yes":"no");
2674
2675
bool force = comp->getOption(TR_ForceBCDInit) && node->getOpCode().isBCDLoad();
2676
if (force ||
2677
(storageRef->isNodeBased() &&
2678
node->getReferenceCount() > 1 &&
2679
!node->skipCopyOnLoad()))
2680
{
2681
if (self()->traceBCDCodeGen())
2682
{
2683
traceMsg(comp,"\tnode %p (%s) with skipCopyOnLoad=false does need to be privatized for node based storageRef node %p (%s-based) (force=%s)\n",
2684
node,node->getOpCode().getName(),storageRef->getNode(),storageRef->getNode()->getOpCode().isStore()?"store":"load",force?"yes":"no");
2685
traceMsg(comp,"\tb^b : gen memcpy of size = %d to privatizeStorageReference node %s (%p) with storageRef #%d (%s) on line_no=%d\n",
2686
reg->getSize(),node->getOpCode().getName(),node,
2687
storageRef->getReferenceNumber(),self()->getDebug()->getName(storageRef->getSymbol()),
2688
comp->getLineNumber(node));
2689
}
2690
2691
if (force && storageRef->getNodeReferenceCount() == 1)
2692
storageRef->incrementNodeReferenceCount(); // prevent nodeRefCount underflow (dec'd for init and on setStorageRef call)
2693
2694
if (memRef == NULL)
2695
{
2696
if (reg->getPseudoRegister())
2697
memRef = generateS390RightAlignedMemoryReference(node, storageRef, self());
2698
else
2699
memRef = generateS390MemRefFromStorageRef(node, storageRef, self());
2700
}
2701
2702
if (reg->getSize() == 0)
2703
{
2704
TR_ASSERT(false,"register should have its size initialized before calling privatizeStorageReference\n");
2705
2706
if (reg->getPseudoRegister())
2707
reg->getPseudoRegister()->setDecimalPrecision(node->getDecimalPrecision());
2708
else
2709
reg->setSize(node->getSize());
2710
}
2711
tempStorageRef = self()->initializeNewTemporaryStorageReference(node, reg, reg->getSize(), node, reg, reg->getSize(), memRef, false, false, false); // performExplicitWidening=false, alwaysLegalToCleanSign=false, trackSignState=false
2712
}
2713
else if (self()->traceBCDCodeGen())
2714
{
2715
traceMsg(comp,"\t%s (%p) does NOT need to be privatised because isTemp (%s) and/or refCount %d <= 1 and/or skipCopyOnLoad=true (flag is %s)\n",
2716
node->getOpCode().getName(),node,storageRef->isTemporaryBased()?"yes":"no",node->getReferenceCount(),node->skipCopyOnLoad()?"true":"false");
2717
}
2718
return tempStorageRef;
2719
}
2720
2721
/**
2722
* A binary coded decimal value may have had its storageReference size reduced
2723
* (by a pdshr for example) and/or have implied left most zeroes. This routine
2724
* will ensure the storageReference is at least resultSize and zero digits are
2725
* explicitly generated up and including clearSize. This full materialization
2726
* is required in several cases such as before calls or when used in an
2727
* instruction that requires a fixed size temp (like UNPKU in pd2ud or
2728
* CVB/CVBG)
2729
*/
2730
TR::MemoryReference *
2731
J9::Z::CodeGenerator::materializeFullBCDValue(TR::Node *node,
2732
TR_PseudoRegister *&reg,
2733
int32_t resultSize,
2734
int32_t clearSize,
2735
bool updateStorageReference,
2736
bool alwaysEnforceSSLimits)
2737
{
2738
TR::Compilation *comp = self()->comp();
2739
2740
int32_t regSize = reg->getSize();
2741
if (self()->traceBCDCodeGen())
2742
traceMsg(comp,"\tmaterializeFullBCDValue evaluated %s (%p) (nodeSize %d, requested resultSize %d) to reg %s (regSize %d), clearSize=%d, updateStorageReference=%s\n",
2743
node->getOpCode().getName(),node,node->getStorageReferenceSize(),resultSize,self()->getDebug()->getName(reg),regSize,clearSize,updateStorageReference?"yes":"no");
2744
2745
TR_ASSERT(clearSize >= 0,"invalid clearSize %d for node %p\n",clearSize,node);
2746
if (clearSize == 0)
2747
{
2748
clearSize = resultSize;
2749
if (self()->traceBCDCodeGen())
2750
traceMsg(comp,"\tspecific clearSize not requested : set clearSize=resultSize=%d\n",resultSize);
2751
}
2752
else
2753
{
2754
// enforce this condition : regSize <= clearSize <= resultSize
2755
TR_ASSERT(clearSize <= resultSize,"clearSize %d should be <= resultSize %d on node %p\n",clearSize,resultSize,node);
2756
if (self()->traceBCDCodeGen())
2757
traceMsg(comp,"\tupdate clearSize %d to max(clearSize, regSize) = max(%d,%d) = %d\n",clearSize,clearSize,regSize,std::max(clearSize, regSize));
2758
clearSize = std::max(clearSize, regSize);
2759
}
2760
2761
TR::MemoryReference *memRef = NULL;
2762
if (regSize < resultSize &&
2763
reg->getLiveSymbolSize() >= resultSize &&
2764
reg->getBytesToClear(regSize, clearSize) == 0)
2765
{
2766
if (self()->traceBCDCodeGen())
2767
traceMsg(comp,"\tbytes regSize->clearSize (%d->%d) are already clear -- no work to do to materializeFullBCDValue\n",regSize,clearSize);
2768
memRef = reuseS390RightAlignedMemoryReference(memRef, node, reg->getStorageReference(), self(), alwaysEnforceSSLimits);
2769
}
2770
else if (regSize < resultSize)
2771
{
2772
if (self()->traceBCDCodeGen())
2773
traceMsg(comp,"\treg->getSize() < resultSize (%d < %d) so check liveSymSize on reg\n",regSize,resultSize);
2774
int32_t liveSymSize = reg->getLiveSymbolSize();
2775
int32_t bytesToClear = clearSize-regSize;
2776
bool enforceSSLimitsForClear = alwaysEnforceSSLimits || bytesToClear > 1;
2777
2778
if (reg->isInitialized() &&
2779
reg->getStorageReference()->isReadOnlyTemporary() &&
2780
liveSymSize > regSize &&
2781
reg->getBytesToClear(regSize, clearSize) > 0)
2782
{
2783
// 1 pd2i
2784
// 1 pdModPrec p=3,s=2 <- (node) passThrough + initialized (setAsReadOnly due to lazy clobber evaluate)
2785
// 2 pdX p=8,s=5 <- initialized and refCount > 1 (used again)
2786
//
2787
// Have to clobber evaluate in this case so the clearing of firstRegSize (2) to sourceSize (8) does not destroy
2788
// the 6 upper bytes required by the commoned reference to pdX
2789
memRef = reuseS390RightAlignedMemoryReference(memRef, node, reg->getStorageReference(), self(), enforceSSLimitsForClear);
2790
TR_OpaquePseudoRegister *opaqueReg = self()->ssrClobberEvaluate(node, memRef);
2791
reg = opaqueReg->getPseudoRegister();
2792
TR_ASSERT(reg,"reg should be set for node %p\n",node);
2793
}
2794
2795
if (reg->isInitialized() && reg->trackZeroDigits() && liveSymSize >= resultSize)
2796
{
2797
if (self()->traceBCDCodeGen())
2798
traceMsg(comp,"\treg->getLiveSymbolSize() >= resultSize (%d >= %d) so call clearByteRangeIfNeeded\n",liveSymSize,resultSize);
2799
memRef = reuseS390RightAlignedMemoryReference(memRef, node, reg->getStorageReference(), self(), enforceSSLimitsForClear);
2800
self()->clearByteRangeIfNeeded(node, reg, memRef, regSize, clearSize);
2801
}
2802
else if (reg->isInitialized() && reg->trackZeroDigits() && reg->getStorageReference()->isTemporaryBased())
2803
{
2804
if (self()->traceBCDCodeGen())
2805
traceMsg(comp,"\treg->getLiveSymbolSize() < resultSize (%d < %d) so call increaseTemporarySymbolSize but first check for already cleared bytes\n",liveSymSize,resultSize);
2806
//int32_t bytesToClear = clearSize-regSize; // e.g. clearSize=16, regSize=3 so bytesToClear=13, liveSymSize=15
2807
int32_t alreadyClearedBytes = 0;
2808
int32_t endByteForClearCheck = 0;
2809
if (clearSize > liveSymSize) // 16 > 15
2810
endByteForClearCheck = liveSymSize; // endByteForClearCheck = 15
2811
else
2812
endByteForClearCheck = clearSize;
2813
2814
if (reg->getBytesToClear(regSize, endByteForClearCheck) == 0) // increaseTemporarySymbolSize resets leftAlignedZeroDigits so check cleared bytes first
2815
alreadyClearedBytes = endByteForClearCheck-regSize; // endByteForClearCheck=15,regSize=3 so alreadyClearedBytes=12
2816
2817
if (self()->traceBCDCodeGen())
2818
traceMsg(comp,"\tfound %d alreadyClearedBytes : adjust bytesToClear %d -> %d\n",alreadyClearedBytes,bytesToClear,bytesToClear-alreadyClearedBytes);
2819
bytesToClear-=alreadyClearedBytes; // bytesToClear = bytesToClear-alreadyClearedBytes = 13-12 = 1
2820
if (bytesToClear < 0)
2821
{
2822
TR_ASSERT(false,"bytesToClear should always be >=0 and not %d\n",bytesToClear);
2823
bytesToClear = clearSize-regSize;
2824
}
2825
int32_t savedLeftAlignedZeroDigits = reg->getLeftAlignedZeroDigits();
2826
reg->increaseTemporarySymbolSize(resultSize - liveSymSize); // also resets leftAlignedZeroDigits
2827
2828
// create memRef after temp size increase so correct TotalSizeForAlignment is set
2829
memRef = reuseS390RightAlignedMemoryReference(memRef, node, reg->getStorageReference(), self(), enforceSSLimitsForClear);
2830
int32_t startByte = clearSize-bytesToClear;
2831
int32_t endByte = clearSize;
2832
self()->widenBCDValue(node, reg, startByte, endByte, memRef);
2833
if (clearSize == resultSize)
2834
{
2835
// bytesToClear may have been reduced to less than resultSize-regSize if the source already had some cleared bytes
2836
// in this case the already cleared bytes should also be transferred to the size increased temporary
2837
int32_t newLeftAlignedZeroDigits = TR::DataType::bytesToDigits(reg->getDataType(), resultSize-regSize); // (16-3)*2 = 26
2838
if (TR::DataType::getDigitSize(reg->getDataType()) == HalfByteDigit && reg->isEvenPrecision() && reg->isLeftMostNibbleClear())
2839
newLeftAlignedZeroDigits++;
2840
reg->setLeftAlignedZeroDigits(newLeftAlignedZeroDigits);
2841
if (self()->traceBCDCodeGen())
2842
traceMsg(comp,"\tset leftAlignedZeroDigits to %d after temporarySymbolSize increase\n",newLeftAlignedZeroDigits);
2843
}
2844
else // if not clearing all the new bytes than the zero digits will not be left aligned
2845
{
2846
// TODO: when actual zero ranges are tracked can transfer the range on the reg from before the increaseTemporarySymbolSize
2847
// to now in the clearSize < resultSize case
2848
if (self()->traceBCDCodeGen() && savedLeftAlignedZeroDigits > 0)
2849
traceMsg(comp,"x^x : missed transferring savedLeftAlignedZeroDigits %d on matFull, node %p\n",savedLeftAlignedZeroDigits,node);
2850
}
2851
}
2852
else
2853
{
2854
if (self()->traceBCDCodeGen())
2855
traceMsg(comp,"\tstorageReference #%d is not tempBased (or is not packed) and reg->getLiveSymbolSize() < resultSize (%d < %d) so alloc a new temporary reference\n",
2856
reg->getStorageReference()->getReferenceNumber(),liveSymSize,resultSize);
2857
memRef = reuseS390RightAlignedMemoryReference(memRef, node, reg->getStorageReference(), self(), enforceSSLimitsForClear);
2858
TR_PseudoRegister *destReg = NULL;
2859
if (updateStorageReference)
2860
destReg = reg;
2861
bool clearWidenedBytes = clearSize == resultSize;
2862
TR_StorageReference *tempStorageRef = self()->initializeNewTemporaryStorageReference(node,
2863
destReg,
2864
resultSize,
2865
node,
2866
reg,
2867
reg->getSize(),
2868
memRef,
2869
clearWidenedBytes, // performExplicitWidening
2870
false, // alwaysLegalToCleanSign
2871
false); // trackSignState=false
2872
if (destReg == NULL)
2873
tempStorageRef->setTemporaryReferenceCount(1);
2874
2875
// pass in isNewTemp=true for the memref gen below so any deadBytes on the node's register are *not* counted for this new temporary
2876
// (these deadBytes should only be counted for the source memRef created just above)
2877
memRef = generateS390RightAlignedMemoryReference(node, tempStorageRef, self(), true, true); // enforceSSLimits=true, isNewTemp=true
2878
2879
if (!clearWidenedBytes && clearSize > regSize)
2880
self()->widenBCDValue(node, destReg, regSize, clearSize, memRef);
2881
2882
if (destReg == NULL)
2883
tempStorageRef->setTemporaryReferenceCount(0);
2884
self()->pendingFreeVariableSizeSymRef(tempStorageRef->getTemporarySymbolReference()); // free after this treetop has been evaluated if the refCount is still 0 at that point
2885
}
2886
}
2887
memRef = reuseS390RightAlignedMemoryReference(memRef, node, reg->getStorageReference(), self(), alwaysEnforceSSLimits);
2888
return memRef;
2889
}
2890
2891
bool topBitIsZero(uint8_t byte)
2892
{
2893
return (byte & 0x80) == 0;
2894
}
2895
2896
bool topBitIsOne(uint8_t byte)
2897
{
2898
return (byte & 0x80) == 0x80;
2899
}
2900
2901
#define TR_TWO_BYTE_TABLE_SIZE 17
2902
static uint8_t zeroTable[TR_TWO_BYTE_TABLE_SIZE] =
2903
{
2904
0,
2905
0,
2906
0,
2907
0,
2908
0,
2909
0,
2910
0,
2911
0,
2912
0,
2913
0,
2914
0,
2915
0,
2916
0,
2917
0,
2918
0,
2919
0,
2920
0
2921
};
2922
2923
static uint8_t oneTable[TR_TWO_BYTE_TABLE_SIZE] =
2924
{
2925
0xFF,
2926
0xFF,
2927
0xFF,
2928
0xFF,
2929
0xFF,
2930
0xFF,
2931
0xFF,
2932
0xFF,
2933
0xFF,
2934
0xFF,
2935
0xFF,
2936
0xFF,
2937
0xFF,
2938
0xFF,
2939
0xFF,
2940
0xFF,
2941
0xFF
2942
};
2943
2944
TR::MemoryReference *getNextMR(TR::MemoryReference *baseMR, TR::Node *node, intptr_t offset, size_t destLeftMostByte, bool isBCD, TR::CodeGenerator *cg)
2945
{
2946
if (isBCD)
2947
return generateS390LeftAlignedMemoryReference(*baseMR, node, offset, cg, destLeftMostByte);
2948
else
2949
return generateS390MemoryReference(*baseMR, offset, cg);
2950
}
2951
2952
bool checkMVHI(char *lit, int32_t offset)
2953
{
2954
if (memcmp(lit+offset,zeroTable,2) == 0 && topBitIsZero(lit[offset+2])) // zero extend 0x7FFF to lit value of 0x00007FFF
2955
return true;
2956
else if (memcmp(lit+offset,oneTable,2) == 0 && topBitIsOne(lit[offset+2])) // sign extend 0xFFF to lit value of 0xffffFFFF
2957
return true;
2958
else
2959
return false;
2960
}
2961
2962
bool checkMVGHI(char *lit, int32_t offset)
2963
{
2964
if (memcmp(lit+offset,zeroTable,6) == 0 && topBitIsZero(lit[offset+6])) // zero extend 0x7FFF to lit value of 0x00000000 00007FFF
2965
return true;
2966
else if (memcmp(lit+offset,oneTable,6) == 0 && topBitIsOne(lit[offset+6])) // sign extend 0xFFFF to lit value of 0xffffFFFF ffffFFFF
2967
return true;
2968
else
2969
return false;
2970
}
2971
2972
void genMVI(TR::MemoryReference *destMR, TR::Node *node, uint8_t value, TR::CodeGenerator *cg)
2973
{
2974
if (cg->traceBCDCodeGen())
2975
traceMsg(cg->comp(),"\tgen MVI 0x%02x\n",value);
2976
generateSIInstruction(cg, TR::InstOpCode::MVI, node, destMR, value);
2977
}
2978
2979
void genMVHHI(TR::MemoryReference *destMR, TR::Node *node, int16_t value, TR::CodeGenerator *cg)
2980
{
2981
if (cg->traceBCDCodeGen())
2982
traceMsg(cg->comp(),"\tgen MVHHI 0x%04x\n",(uint16_t)value);
2983
generateSILInstruction(cg, TR::InstOpCode::MVHHI, node, destMR, value);
2984
}
2985
2986
void genMVHI(TR::MemoryReference *destMR, TR::Node *node, int16_t value, TR::CodeGenerator *cg)
2987
{
2988
if (cg->traceBCDCodeGen())
2989
traceMsg(cg->comp(),"\tgen MVHI 0x%04x\n",(uint16_t)value);
2990
generateSILInstruction(cg, TR::InstOpCode::MVHI, node, destMR, value);
2991
}
2992
2993
void genMVGHI(TR::MemoryReference *destMR, TR::Node *node, int16_t value, TR::CodeGenerator *cg)
2994
{
2995
if (cg->traceBCDCodeGen())
2996
traceMsg(cg->comp(),"\tgen MVGHI 0x%04x\n",(uint16_t)value);
2997
generateSILInstruction(cg, TR::InstOpCode::MVGHI, node, destMR, value);
2998
}
2999
3000
3001
3002
/**
3003
* This method must be kept in sync with cases handled by useMoveImmediateCommon below
3004
*/
3005
bool
3006
J9::Z::CodeGenerator::canCopyWithOneOrTwoInstrs(char *lit, size_t size)
3007
{
3008
if (size < 1 || size >= TR_TWO_BYTE_TABLE_SIZE)
3009
{
3010
return false;
3011
}
3012
3013
bool canCopy = false;
3014
switch (size)
3015
{
3016
case 0:
3017
canCopy = false;
3018
break;
3019
case 1: // MVI
3020
case 2: // MVI/MVI or MVHHI
3021
case 3: // MVHHI/MVI
3022
canCopy = true;
3023
break;
3024
case 4: // MVHHI/MVHHI (always) or MVHI (value <= 0x7FFF)
3025
canCopy = true;
3026
break;
3027
case 5: // MVHI/MVI (MVHI 0,1,2,3 bytes value <= 0x7FFF) or MVI/MVHI (MVHI 1,2,3,4 bytes value <= 0x7FFF)
3028
if (checkMVHI(lit,0) || checkMVHI(lit,1))
3029
canCopy = true;
3030
break;
3031
case 6: // MVHI/MVHHI (MVHI 0,1,2,3 bytes value <= 0x7FFF) or MVHHI/MVHI (MVHI 2,3,4,5 bytes value <= 0x7FFF)
3032
if (checkMVHI(lit,0) || checkMVHI(lit,2))
3033
canCopy = true;
3034
break;
3035
case 7:
3036
canCopy = false;
3037
break;
3038
case 8: // MVGHI (value <= 0x7FFF) or MVHI/MVHI (e.g. 0x00007FFF FFFFffff or vice-versa)
3039
if (checkMVGHI(lit,0))
3040
canCopy = true;
3041
else if (checkMVHI(lit,0) && checkMVHI(lit,4))
3042
canCopy = true;
3043
break;
3044
case 9: // MVGHI/MVI (MVGHI <= 0x7FFF)
3045
case 10: // MVGHI/MVHHI (MVGHI <= 0x7FFF)
3046
if (checkMVGHI(lit,0))
3047
canCopy = true;
3048
break;
3049
case 11:
3050
canCopy = false;
3051
break;
3052
case 12: // MVGHI/MVHI (MVGHI and MVHI value both <= 0x7FFF)
3053
if (checkMVGHI(lit,0) && checkMVHI(lit,8))
3054
{
3055
canCopy = true;
3056
}
3057
break;
3058
case 13:
3059
case 14:
3060
case 15:
3061
canCopy=false;
3062
break;
3063
case 16: // MVGHI/MVGHI (both MVGHI values <= 0x7FFF)
3064
if (checkMVGHI(lit,0) && checkMVGHI(lit,8))
3065
{
3066
canCopy = true;
3067
}
3068
break;
3069
default:
3070
canCopy = false;
3071
break;
3072
}
3073
return canCopy;
3074
}
3075
3076
3077
/**
3078
* This method must be kept in sync with cases handled by canCopyWithOneOrTwoInstrs above
3079
*/
3080
bool
3081
J9::Z::CodeGenerator::useMoveImmediateCommon(TR::Node *node,
3082
char *srcLiteral,
3083
size_t srcSize,
3084
TR::Node *srcNode,
3085
size_t destSize,
3086
intptr_t destBaseOffset,
3087
size_t destLeftMostByte,
3088
TR::MemoryReference *inputDestMR)
3089
{
3090
TR::CodeGenerator *cg = self();
3091
size_t size = destSize;
3092
char *lit = srcLiteral;
3093
bool isBCD = node->getType().isBCD();
3094
3095
TR::MemoryReference *destMR = getNextMR(inputDestMR, node, destBaseOffset, destLeftMostByte, isBCD, cg);
3096
3097
switch (size)
3098
{
3099
case 0:
3100
TR_ASSERT(false,"copySize %d not supported on node %p\n",size,node);
3101
break;
3102
case 1: // MVI
3103
genMVI(destMR, node, lit[0], cg);
3104
break;
3105
case 2: // MVI/MVI or MVHHI
3106
{
3107
genMVHHI(destMR, node, (lit[0]<<8)|lit[1], cg);
3108
break;
3109
}
3110
case 3: // MVHHI/MVI
3111
genMVHHI(destMR, node, (lit[0]<<8)|lit[1], cg);
3112
genMVI(getNextMR(destMR, node, 2, destLeftMostByte, isBCD, cg), node, lit[2], cg);
3113
break;
3114
case 4: // MVHHI/MVHHI (always) or MVHI (value <= 0x7FFF)
3115
if (checkMVHI(lit,0))
3116
{
3117
genMVHI(destMR, node, (lit[2]<<8)|lit[3], cg);
3118
}
3119
else
3120
{
3121
genMVHHI(destMR, node, (lit[0]<<8)|lit[1], cg);
3122
genMVHHI(getNextMR(destMR, node, 2, destLeftMostByte, isBCD, cg), node, (lit[2]<<8)|lit[3], cg);
3123
}
3124
break;
3125
case 5:
3126
if (checkMVHI(lit,0))
3127
{
3128
// MVHI/MVI (MVHI 0,1,2,3 bytes value <= 0x7FFF)
3129
genMVHI(destMR, node, (lit[2]<<8)|lit[3], cg);
3130
genMVI(getNextMR(destMR, node, 4, destLeftMostByte, isBCD, cg), node, lit[4], cg);
3131
}
3132
else
3133
{
3134
// MVI/MVHI (MVHI 1,2,3,4 bytes value <= 0x7FFF)
3135
TR_ASSERT(checkMVHI(lit,1),"checkMVHI should be true\n");
3136
genMVI(destMR, node, lit[0], cg);
3137
genMVHI(getNextMR(destMR, node, 1, destLeftMostByte, isBCD, cg), node, (lit[3]<<8)|lit[4], cg);
3138
}
3139
break;
3140
case 6:
3141
if (checkMVHI(lit,0))
3142
{
3143
// MVHI/MVHHI (MVHI 0,1,2,3 bytes value <= 0x7FFF)
3144
genMVHI(destMR, node, (lit[2]<<8)|lit[3], cg);
3145
genMVHHI(getNextMR(destMR, node, 4, destLeftMostByte, isBCD, cg), node, (lit[4]<<8)|lit[5], cg);
3146
}
3147
else
3148
{
3149
// MVHHI/MVHI (MVHI 2,3,4,5 bytes value <= 0x7FFF)
3150
TR_ASSERT(checkMVHI(lit,2),"checkMVHI should be true\n");
3151
genMVHHI(destMR, node, (lit[0]<<8)|lit[1], cg);
3152
genMVHI(getNextMR(destMR, node, 2, destLeftMostByte, isBCD, cg), node, (lit[4]<<8)|lit[5], cg);
3153
}
3154
break;
3155
case 7:
3156
TR_ASSERT(false,"copySize %d not supported on node %p\n",size,node);
3157
break;
3158
case 8: // MVGHI (value <= 0x7FFF)
3159
if (checkMVGHI(lit,0))
3160
{
3161
genMVGHI(destMR, node, (lit[6]<<8)|lit[7], cg);
3162
}
3163
else
3164
{
3165
TR_ASSERT(checkMVHI(lit,0) && checkMVHI(lit,4),"checkMVHI+checkMVHI should be true\n");
3166
genMVHI(destMR, node, (lit[2]<<8)|lit[3], cg);
3167
genMVHI(getNextMR(destMR, node, 4, destLeftMostByte, isBCD, cg), node, (lit[6]<<8)|lit[7], cg);
3168
}
3169
break;
3170
case 9: // MVGHI/MVI (MVGHI <= 0x7FFF)
3171
genMVGHI(destMR, node, (lit[6]<<8)|lit[7], cg);
3172
genMVI(getNextMR(destMR, node, 8, destLeftMostByte, isBCD, cg), node, lit[8], cg);
3173
break;
3174
case 10: // MVGHI/MVHHI (MVGHI <= 0x7FFF)
3175
genMVGHI(destMR, node, (lit[6]<<8)|lit[7], cg);
3176
genMVHHI(getNextMR(destMR, node, 8, destLeftMostByte, isBCD, cg), node, (lit[8]<<8)|lit[9], cg);
3177
break;
3178
case 11:
3179
TR_ASSERT(false,"copySize %d not supported on node %p\n",size,node);
3180
break;
3181
case 12: // MVGHI/MVHI (MVGHI and MVHI value both <= 0x7FFF)
3182
genMVGHI(destMR, node, (lit[6]<<8)|lit[7], cg);
3183
genMVHI(getNextMR(destMR, node, 8, destLeftMostByte, isBCD, cg), node, (lit[10]<<8)|lit[11], cg);
3184
break;
3185
case 13:
3186
case 14:
3187
case 15:
3188
TR_ASSERT(false,"copySize %d not supported on node %p\n",size,node);
3189
break;
3190
case 16: // MVGHI/MVGHI (both MVGHI values <= 0x7FFF)
3191
genMVGHI(destMR, node, (lit[6]<<8)|lit[7], cg);
3192
genMVGHI(getNextMR(destMR, node, 8, destLeftMostByte, isBCD, cg), node, (lit[14]<<8)|lit[15], cg);
3193
break;
3194
default:
3195
TR_ASSERT(false,"copySize %d not supported on node %p\n",size,node);
3196
break;
3197
}
3198
3199
return true;
3200
}
3201
3202
bool
3203
J9::Z::CodeGenerator::inlineSmallLiteral(size_t srcSize, char *srcLiteral, size_t destSize, bool trace)
3204
{
3205
TR::Compilation *comp = self()->comp();
3206
3207
bool inlineLiteral = false;
3208
if (srcSize != destSize)
3209
{
3210
inlineLiteral = false;
3211
if (trace)
3212
traceMsg(comp,"\t\tinlineLiteral=false : srcSize %d != destSize %d\n",srcSize,destSize);
3213
}
3214
else if (srcSize == 1)
3215
{
3216
inlineLiteral = true;
3217
if (trace)
3218
traceMsg(comp,"\t\tinlineLiteral=true : srcSize == 1 (destSize %d)\n",destSize);
3219
}
3220
else if (destSize <= 2)
3221
{
3222
inlineLiteral = true;
3223
if (trace)
3224
traceMsg(comp,"\t\tinlineLiteral=true : destSize %d <= 2 (srcSize %d)\n",destSize,srcSize);
3225
}
3226
else if (self()->canCopyWithOneOrTwoInstrs(srcLiteral, srcSize))
3227
{
3228
inlineLiteral = true;
3229
if (trace)
3230
traceMsg(comp,"\t\tinlineLiteral=true : canCopyWithOneOrTwoInstrs = true (srcSize %d, destSize %d)\n",srcSize,destSize);
3231
}
3232
else
3233
{
3234
inlineLiteral = false;
3235
if (trace)
3236
traceMsg(comp,"\t\tinlineSmallLiteral=false : unhandled case (srcSize %d, destSize %d)\n",srcSize,destSize);
3237
}
3238
return inlineLiteral;
3239
}
3240
3241
3242
bool
3243
J9::Z::CodeGenerator::checkFieldAlignmentForAtomicLong()
3244
{
3245
TR_OpaqueClassBlock * classBlock = self()->comp()->fej9()->getSystemClassFromClassName("java/util/concurrent/atomic/AtomicLong", 38, true);
3246
3247
// TR_J9SharedCacheVM::getSystemClassFromClassName can return 0 when it's impossible to relocate a J9Class later for AOT loads.
3248
if (!classBlock)
3249
return false;
3250
3251
char* fieldName = "value";
3252
int32_t fieldNameLen = 5;
3253
char * fieldSig = "J";
3254
int32_t fieldSigLen = 1;
3255
int32_t intOrBoolOffset = self()->fe()->getObjectHeaderSizeInBytes() + self()->fej9()->getInstanceFieldOffset(classBlock, fieldName, fieldNameLen, fieldSig, fieldSigLen);
3256
return (intOrBoolOffset & 0x3) == 0;
3257
}
3258
3259
3260
TR_PseudoRegister *
3261
J9::Z::CodeGenerator::evaluateBCDNode(TR::Node * node)
3262
{
3263
TR_ASSERT(node->getType().isBCD(),"evaluateBCDNode only valid for binary coded decimal types\n");
3264
bool isFirstTime = node->getRegister() == NULL;
3265
TR::Register *reg = self()->evaluate(node);
3266
TR_PseudoRegister *pseudoReg = reg->getPseudoRegister();
3267
TR_ASSERT(pseudoReg,"pseudoReg should not be NULL after evaluation of node %p\n",node);
3268
if (isFirstTime)
3269
{
3270
if (node->getOpCode().canHaveStorageReferenceHint() &&
3271
node->getStorageReferenceHint() &&
3272
node->getStorageReferenceHint()->isTemporaryBased())
3273
{
3274
if (self()->traceBCDCodeGen())
3275
traceMsg(self()->comp(),"evaluateBCDNode: found temp based hint #%d on %s (%p)\n",
3276
node->getStorageReferenceHint()->getReferenceNumber(),
3277
node->getOpCode().getName(),
3278
node);
3279
node->getStorageReferenceHint()->removeSharedNode(node);
3280
}
3281
// to prevent refCount underflow on the padding address node can only use this tree on the first reference to a node
3282
if (node->getOpCode().canHavePaddingAddress())
3283
{
3284
if (self()->traceBCDCodeGen())
3285
traceMsg(self()->comp(),"evaluateBCDNode: set UsedPaddingAnchorAddress flag to true on %s (%p)\n",
3286
node->getOpCode().getName(),
3287
node);
3288
}
3289
}
3290
// TR_ASSERT(pseudoReg->signStateInitialized(),"sign state for node %p register not initialized\n",node);
3291
return pseudoReg;
3292
}
3293
3294
void
3295
J9::Z::CodeGenerator::addAllocatedRegister(TR_PseudoRegister * temp)
3296
{
3297
uint32_t idx = _registerArray.add(temp);
3298
temp->setIndex(idx);
3299
self()->startUsingRegister(temp);
3300
}
3301
3302
3303
/**
3304
* These routines return the minimum precision and size values for a packed arithmetic node so the corresponding
3305
* hardware instruction (AP,SP,MP,DP) can be legally encode
3306
*/
3307
uint32_t
3308
J9::Z::CodeGenerator::getPDMulEncodedSize(TR::Node *pdmul, TR_PseudoRegister *multiplicand, TR_PseudoRegister *multiplier)
3309
{
3310
TR_ASSERT(pdmul->getType().isAnyPacked(), "getPDMulEncodedSize only valid for packed types and not type %s\n",pdmul->getDataType().toString());
3311
return multiplicand->getSize() + multiplier->getSize();
3312
}
3313
3314
uint32_t
3315
J9::Z::CodeGenerator::getPDMulEncodedSize(TR::Node *pdmul)
3316
{
3317
TR_ASSERT(pdmul->getType().isAnyPacked(), "getPDMulEncodedSize only valid for packed types and not type %s\n",pdmul->getDataType().toString());
3318
return pdmul->getFirstChild()->getSize() + pdmul->getSecondChild()->getSize();
3319
}
3320
3321
uint32_t
3322
J9::Z::CodeGenerator::getPDMulEncodedSize(TR::Node *pdmul, int32_t exponent)
3323
{
3324
TR_ASSERT(pdmul->getType().isAnyPacked(), "getPDMulEncodedSize only valid for packed types and not type %s\n",pdmul->getDataType().toString());
3325
return pdmul->getFirstChild()->getSize() * exponent;
3326
}
3327
3328
int32_t
3329
J9::Z::CodeGenerator::getPDMulEncodedPrecision(TR::Node *pdmul, TR_PseudoRegister *multiplicand, TR_PseudoRegister *multiplier)
3330
{
3331
TR_ASSERT(pdmul->getType().isAnyPacked(), "getPDMulEncodedPrecision only valid for packed types and not type %s\n",pdmul->getDataType().toString());
3332
return TR::DataType::byteLengthToPackedDecimalPrecisionFloor(self()->getPDMulEncodedSize(pdmul, multiplicand, multiplier));
3333
}
3334
3335
int32_t
3336
J9::Z::CodeGenerator::getPDMulEncodedPrecision(TR::Node *pdmul)
3337
{
3338
TR_ASSERT(pdmul->getType().isAnyPacked(), "getPDMulEncodedPrecision only valid for packed types and not type %s\n",pdmul->getDataType().toString());
3339
return TR::DataType::byteLengthToPackedDecimalPrecisionFloor(self()->getPDMulEncodedSize(pdmul));
3340
}
3341
3342
int32_t
3343
J9::Z::CodeGenerator::getPDMulEncodedPrecision(TR::Node *pdmul, int32_t exponent)
3344
{
3345
TR_ASSERT(pdmul->getType().isAnyPacked(), "getPDMulEncodedPrecision only valid for packed types and not type %s\n",pdmul->getDataType().toString());
3346
return TR::DataType::byteLengthToPackedDecimalPrecisionFloor(self()->getPDMulEncodedSize(pdmul, exponent));
3347
}
3348
3349
/**
3350
* Motivating example for the packedAddSubSize
3351
* pdsub p=3,s=2 // correct answer is 12111-345 truncated to 3 digits = (11)766
3352
* pdload p=5,s=3 // 12111
3353
* pdload p=3,s=3 // 345
3354
* If an SP of size=2 is used then the answer will be 111-345 = -234 instead of 766 as SP/AP are destructive operations
3355
* so for AP/SP the encoded firstOp/result size must be at least as big as the first operand.
3356
*/
3357
uint32_t
3358
J9::Z::CodeGenerator::getPDAddSubEncodedSize(TR::Node *node)
3359
{
3360
TR_ASSERT( node->getType().isAnyPacked() && node->getFirstChild()->getType().isAnyPacked(),"getPackedAddSubSize only valid for packed types\n");
3361
return std::max(node->getSize(), node->getFirstChild()->getSize());
3362
}
3363
3364
int32_t
3365
J9::Z::CodeGenerator::getPDAddSubEncodedPrecision(TR::Node *node)
3366
{
3367
TR_ASSERT( node->getType().isAnyPacked() && node->getFirstChild()->getType().isAnyPacked(),"getPackedAddSubPrecision only valid for packed types\n");
3368
return std::max(node->getDecimalPrecision(), node->getFirstChild()->getDecimalPrecision());
3369
}
3370
3371
uint32_t
3372
J9::Z::CodeGenerator::getPDAddSubEncodedSize(TR::Node *node, TR_PseudoRegister *firstReg)
3373
{
3374
TR_ASSERT( node->getType().isAnyPacked() && firstReg->getDataType().isAnyPacked(),"getPackedAddSubSize only valid for packed types\n");
3375
return std::max<uint32_t>(node->getSize(), firstReg->getSize());
3376
}
3377
3378
int32_t
3379
J9::Z::CodeGenerator::getPDAddSubEncodedPrecision(TR::Node *node, TR_PseudoRegister *firstReg)
3380
{
3381
TR_ASSERT( node->getType().isAnyPacked() && firstReg->getDataType().isAnyPacked(),"getPackedAddSubPrecision only valid for packed types\n");
3382
return std::max<int32_t>(node->getDecimalPrecision(), firstReg->getDecimalPrecision());
3383
}
3384
3385
bool
3386
J9::Z::CodeGenerator::supportsPackedShiftRight(int32_t resultPrecision, TR::Node *shiftSource, int32_t shiftAmount)
3387
{
3388
bool isSupported = false;
3389
int32_t maxPrecision = TR::DataType::getMaxPackedDecimalPrecision();
3390
int32_t sourceDigits = shiftSource->getDecimalPrecision();
3391
int32_t shiftedPrecision = sourceDigits - shiftAmount;
3392
if (resultPrecision <= maxPrecision)
3393
{
3394
isSupported = true; // fits in an MVO or SRP (and all SS2/SS3 instructions)
3395
}
3396
else if (shiftedPrecision <= maxPrecision)
3397
{
3398
isSupported = true; // fits in an MVO or SRP (and all SS2/SS3 instructions)
3399
}
3400
else if (isEven(shiftAmount))
3401
{
3402
isSupported = true; // uses MVN to move just the sign code so no restriction on length
3403
}
3404
3405
if (self()->traceBCDCodeGen())
3406
traceMsg(self()->comp(),"%ssupportsPackedShiftRight = %s : shiftSource %s (%p) p=%d by shiftAmount=%d -> shiftedPrec=%d (resultPrec %d) on line_no=%d (offset=%06X)\n",
3407
isSupported?"":"t^t : ",isSupported?"yes":"no",shiftSource->getOpCode().getName(),shiftSource,
3408
sourceDigits,shiftAmount,shiftedPrecision,resultPrecision,
3409
self()->comp()->getLineNumber(shiftSource),self()->comp()->getLineNumber(shiftSource));
3410
3411
return isSupported;
3412
}
3413
3414
int32_t
3415
J9::Z::CodeGenerator::getPDDivEncodedPrecision(TR::Node *node)
3416
{
3417
TR_ASSERT(node->getOpCodeValue() == TR::pddiv || node->getOpCodeValue() == TR::pdrem,
3418
"getPackedDividendPrecision only valid for pddiv/pdrem\n");
3419
return self()->getPDDivEncodedPrecisionCommon(node,
3420
node->getFirstChild()->getDecimalPrecision(),
3421
node->getSecondChild()->getDecimalPrecision(),
3422
node->getSecondChild()->isEvenPrecision());
3423
}
3424
3425
int32_t
3426
J9::Z::CodeGenerator::getPDDivEncodedPrecision(TR::Node *node, TR_PseudoRegister *dividendReg, TR_PseudoRegister *divisorReg)
3427
{
3428
TR_ASSERT(node->getOpCodeValue() == TR::pddiv || node->getOpCodeValue() == TR::pdrem,
3429
"getPackedDividendPrecision only valid for pddiv/pdrem\n");
3430
return self()->getPDDivEncodedPrecisionCommon(node,
3431
dividendReg->getDecimalPrecision(),
3432
divisorReg->getDecimalPrecision(),
3433
divisorReg->isEvenPrecision());
3434
}
3435
3436
3437
int32_t
3438
J9::Z::CodeGenerator::getPDDivEncodedPrecisionCommon(TR::Node *node, int32_t dividendPrecision, int32_t divisorPrecision, bool isDivisorEvenPrecision)
3439
{
3440
int32_t basePrecision = dividendPrecision;
3441
int32_t quotientAdjust = 1; // always subtract off second sign code when computing the quotient precision
3442
if (isDivisorEvenPrecision)
3443
quotientAdjust++; // adjust for the pad nibble
3444
return basePrecision+divisorPrecision+quotientAdjust;
3445
}
3446
3447
uint32_t
3448
J9::Z::CodeGenerator::getPDDivEncodedSize(TR::Node *node)
3449
{
3450
TR_ASSERT(node->getOpCodeValue() == TR::pddiv || node->getOpCodeValue() == TR::pdrem,
3451
"getPDDivEncodedSize only valid for pddiv/pdrem\n");
3452
return TR::DataType::packedDecimalPrecisionToByteLength(self()->getPDDivEncodedPrecision(node));
3453
}
3454
3455
uint32_t
3456
J9::Z::CodeGenerator::getPDDivEncodedSize(TR::Node *node, TR_PseudoRegister *dividendReg, TR_PseudoRegister *divisorReg)
3457
{
3458
TR_ASSERT(node->getOpCodeValue() == TR::pddiv || node->getOpCodeValue() == TR::pdrem,
3459
"getPDDivEncodedSize only valid for pddiv/pdrem\n");
3460
return TR::DataType::packedDecimalPrecisionToByteLength(self()->getPDDivEncodedPrecision(node, dividendReg, divisorReg));
3461
}
3462
3463
bool
3464
J9::Z::CodeGenerator::canGeneratePDBinaryIntrinsic(TR::ILOpCodes opCode, TR::Node * op1PrecNode, TR::Node * op2PrecNode, TR::Node * resultPrecNode)
3465
{
3466
if(!op2PrecNode->getOpCode().isLoadConst() || !op1PrecNode->getOpCode().isLoadConst() || !resultPrecNode->getOpCode().isLoadConst())
3467
return false;
3468
3469
int32_t max = TR::DataType::getMaxPackedDecimalPrecision();
3470
3471
int32_t op1Prec = op1PrecNode->getInt();
3472
int32_t op2Prec = op2PrecNode->getInt();
3473
int32_t resultPrec = resultPrecNode->getInt();
3474
3475
if(op1Prec > max || op2Prec > max || resultPrec > max)
3476
return false;
3477
3478
int32_t op1Size = TR::DataType::packedDecimalPrecisionToByteLength(op1Prec);
3479
int32_t op2Size = TR::DataType::packedDecimalPrecisionToByteLength(op2Prec);
3480
int32_t resultSize = TR::DataType::packedDecimalPrecisionToByteLength(resultPrec);
3481
3482
switch(opCode)
3483
{
3484
case TR::pdadd:
3485
case TR::pdsub:
3486
case TR::pdmul:
3487
if(op2Prec > 15)
3488
return false;
3489
if(resultSize < (op1Size + op2Size))
3490
return false;
3491
break;
3492
case TR::pddiv:
3493
case TR::pdrem:
3494
if(op2Size >= op1Size)
3495
return false;
3496
if(op2Prec > 15 || op1Prec > 31 || (op1Prec-op2Prec) > 29)
3497
return false;
3498
break;
3499
default:
3500
TR_ASSERT(0, "not implemented yet");
3501
return false;
3502
}
3503
3504
return true;
3505
}
3506
3507
void
3508
J9::Z::CodeGenerator::incRefCountForOpaquePseudoRegister(TR::Node * node)
3509
{
3510
if (node->getOpaquePseudoRegister())
3511
{
3512
TR_OpaquePseudoRegister *reg = node->getOpaquePseudoRegister();
3513
TR_StorageReference *ref = reg->getStorageReference();
3514
if (ref && ref->isNodeBased() && ref->getNodeReferenceCount() > 0)
3515
{
3516
if (self()->traceBCDCodeGen())
3517
self()->comp()->getDebug()->trace("\tnode %s (%p) with storageRef #%d (%s): increment nodeRefCount %d->%d when artificially incrementing ref count\n",
3518
node->getOpCode().getName(),node,ref->getReferenceNumber(),self()->comp()->getDebug()->getName(ref->getSymbol()),ref->getNodeReferenceCount(),ref->getNodeReferenceCount()+1);
3519
ref->incrementNodeReferenceCount();
3520
}
3521
}
3522
}
3523
3524
TR::Instruction* J9::Z::CodeGenerator::generateVMCallHelperSnippet(TR::Instruction* cursor, TR::LabelSymbol* vmCallHelperSnippetLabel)
3525
{
3526
TR::Compilation* comp = self()->comp();
3527
3528
// Associate all generated instructions with the first node
3529
TR::Node* node = comp->getStartTree()->getNode();
3530
3531
cursor = generateS390LabelInstruction(self(), TR::InstOpCode::label, node, vmCallHelperSnippetLabel, cursor);
3532
3533
TR::Instruction* vmCallHelperSnippetLabelInstruction = cursor;
3534
3535
// Store all arguments to the stack for access by the interpreted method
3536
J9::Z::PrivateLinkage *privateLinkage = static_cast<J9::Z::PrivateLinkage *>(self()->getLinkage());
3537
cursor = static_cast<TR::Instruction*>(privateLinkage->saveArguments(cursor, false, true));
3538
3539
// Load the EP register with the address of the next instruction
3540
cursor = generateRRInstruction(self(), TR::InstOpCode::BASR, node, self()->getEntryPointRealRegister(), self()->machine()->getRealRegister(TR::RealRegister::GPR0), cursor);
3541
3542
TR::Instruction* basrInstruction = cursor;
3543
3544
// Displacement will be updated later once we know the offset
3545
TR::MemoryReference* j9MethodAddressMemRef = generateS390MemoryReference(self()->getEntryPointRealRegister(), 0, self());
3546
3547
// Load the address of the J9Method corresponding to this JIT compilation
3548
cursor = generateRXInstruction(self(), TR::InstOpCode::getLoadOpCode(), node, self()->machine()->getRealRegister(TR::RealRegister::GPR1), j9MethodAddressMemRef, cursor);
3549
3550
// Displacement will be updated later once we know the offset
3551
TR::MemoryReference* vmCallHelperAddressMemRef = generateS390MemoryReference(self()->getEntryPointRealRegister(), 0, self());
3552
3553
// Load the address of the VM call helper
3554
cursor = generateRXInstruction(self(), TR::InstOpCode::getLoadOpCode(), node, self()->getEntryPointRealRegister(), vmCallHelperAddressMemRef, cursor);
3555
3556
// Call the VM call helper
3557
cursor = generateS390BranchInstruction(self(), TR::InstOpCode::BCR, node, TR::InstOpCode::COND_BCR, self()->getEntryPointRealRegister(), cursor);
3558
3559
const int32_t offsetFromEPRegisterValueToVMCallHelperAddress = CalcCodeSize(basrInstruction->getNext(), cursor);
3560
3561
vmCallHelperAddressMemRef->setOffset(offsetFromEPRegisterValueToVMCallHelperAddress);
3562
3563
TR::ResolvedMethodSymbol* methodSymbol = comp->getJittedMethodSymbol();
3564
3565
TR::SymbolReference* helperSymRef = self()->symRefTab()->findOrCreateRuntimeHelper(TR_j2iTransition);
3566
3567
// AOT relocation for the helper address
3568
TR::S390EncodingRelocation* encodingRelocation = new (self()->trHeapMemory()) TR::S390EncodingRelocation(TR_AbsoluteHelperAddress, helperSymRef);
3569
3570
AOTcgDiag3(comp, "Add encodingRelocation = %p reloType = %p symbolRef = %p\n", encodingRelocation, encodingRelocation->getReloType(), encodingRelocation->getSymbolReference());
3571
3572
const intptr_t vmCallHelperAddress = reinterpret_cast<intptr_t>(helperSymRef->getMethodAddress());
3573
3574
// Encode the address of the VM call helper
3575
if (comp->target().is64Bit())
3576
{
3577
cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, UPPER_4_BYTES(vmCallHelperAddress), cursor);
3578
cursor->setEncodingRelocation(encodingRelocation);
3579
3580
cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, LOWER_4_BYTES(vmCallHelperAddress), cursor);
3581
}
3582
else
3583
{
3584
cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, vmCallHelperAddress, cursor);
3585
cursor->setEncodingRelocation(encodingRelocation);
3586
}
3587
3588
const int32_t offsetFromEPRegisterValueToJ9MethodAddress = CalcCodeSize(basrInstruction->getNext(), cursor);
3589
3590
j9MethodAddressMemRef->setOffset(offsetFromEPRegisterValueToJ9MethodAddress);
3591
TR::SymbolReference *methodSymRef = new (self()->trHeapMemory()) TR::SymbolReference(self()->symRefTab(), methodSymbol);
3592
encodingRelocation = new (self()->trHeapMemory()) TR::S390EncodingRelocation(TR_RamMethod, methodSymRef);
3593
3594
AOTcgDiag2(comp, "Add encodingRelocation = %p reloType = %p\n", encodingRelocation, encodingRelocation->getReloType());
3595
3596
const intptr_t j9MethodAddress = reinterpret_cast<intptr_t>(methodSymbol->getResolvedMethod()->resolvedMethodAddress());
3597
3598
if (comp->target().is64Bit())
3599
{
3600
cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, UPPER_4_BYTES(j9MethodAddress), cursor);
3601
cursor->setEncodingRelocation(encodingRelocation);
3602
3603
cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, LOWER_4_BYTES(j9MethodAddress), cursor);
3604
}
3605
else
3606
{
3607
cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, j9MethodAddress, cursor);
3608
cursor->setEncodingRelocation(encodingRelocation);
3609
}
3610
3611
if (comp->getOption(TR_EnableHCR))
3612
{
3613
comp->getStaticHCRPICSites()->push_front(cursor);
3614
}
3615
3616
int32_t padSize = CalcCodeSize(vmCallHelperSnippetLabelInstruction, cursor) % TR::Compiler->om.sizeofReferenceAddress();
3617
3618
if (padSize != 0)
3619
{
3620
padSize = TR::Compiler->om.sizeofReferenceAddress() - padSize;
3621
}
3622
3623
// Align to the size of the reference field to ensure alignment of subsequent sections for atomic patching
3624
cursor = self()->insertPad(node, cursor, padSize, false);
3625
3626
return cursor;
3627
}
3628
3629
bool J9::Z::CodeGenerator::canUseRelativeLongInstructions(int64_t value)
3630
{
3631
if (self()->comp()->isOutOfProcessCompilation())
3632
{
3633
return false;
3634
}
3635
return OMR::CodeGeneratorConnector::canUseRelativeLongInstructions(value);
3636
}
3637
3638
TR::Instruction* J9::Z::CodeGenerator::generateVMCallHelperPrePrologue(TR::Instruction* cursor)
3639
{
3640
TR::Compilation* comp = self()->comp();
3641
3642
// Associate all generated instructions with the first node
3643
TR::Node* node = comp->getStartTree()->getNode();
3644
3645
TR::LabelSymbol* vmCallHelperSnippetLabel = generateLabelSymbol(self());
3646
3647
cursor = self()->generateVMCallHelperSnippet(cursor, vmCallHelperSnippetLabel);
3648
3649
cursor = generateS390BranchInstruction(self(), TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, vmCallHelperSnippetLabel, cursor);
3650
3651
// The following 4 bytes are used for various patching sequences that overwrite the JIT entry point with a 4 byte
3652
// branch (BRC) to some location. Before patching in the branch we must save the 4 bytes at the JIT entry point
3653
// to this location so that we can later reverse the patching at JIT entry point if needed.
3654
cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, 0xdeafbeef, cursor);
3655
3656
// Generated a pad for the body info address to keep offsets in PreprologueConst.hpp constant for simplicity
3657
if (comp->target().is64Bit())
3658
{
3659
cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, 0x00000000, cursor);
3660
cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, 0x00000000, cursor);
3661
}
3662
else
3663
{
3664
cursor = generateDataConstantInstruction(self(), TR::InstOpCode::dd, node, 0x00000000, cursor);
3665
}
3666
3667
return cursor;
3668
}
3669
3670
bool
3671
J9::Z::CodeGenerator::suppressInliningOfRecognizedMethod(TR::RecognizedMethod method)
3672
{
3673
TR::Compilation *comp = self()->comp();
3674
3675
if (self()->isMethodInAtomicLongGroup(method))
3676
return true;
3677
3678
if (self()->getSupportsVectorRegisters()){
3679
if (method == TR::java_lang_Math_fma_D ||
3680
method == TR::java_lang_StrictMath_fma_D)
3681
{
3682
return true;
3683
}
3684
if (comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_FACILITY_ENHANCEMENT_1) &&
3685
(method == TR::java_lang_Math_fma_F ||
3686
method == TR::java_lang_StrictMath_fma_F))
3687
{
3688
return true;
3689
}
3690
}
3691
3692
if (method == TR::java_lang_Integer_highestOneBit ||
3693
method == TR::java_lang_Integer_numberOfLeadingZeros ||
3694
method == TR::java_lang_Integer_numberOfTrailingZeros ||
3695
method == TR::java_lang_Long_highestOneBit ||
3696
method == TR::java_lang_Long_numberOfLeadingZeros ||
3697
method == TR::java_lang_Long_numberOfTrailingZeros)
3698
{
3699
return true;
3700
}
3701
3702
if (method == TR::java_util_concurrent_atomic_AtomicBoolean_getAndSet ||
3703
method == TR::java_util_concurrent_atomic_AtomicInteger_getAndAdd ||
3704
method == TR::java_util_concurrent_atomic_AtomicInteger_getAndIncrement ||
3705
method == TR::java_util_concurrent_atomic_AtomicInteger_getAndDecrement ||
3706
method == TR::java_util_concurrent_atomic_AtomicInteger_getAndSet ||
3707
method == TR::java_util_concurrent_atomic_AtomicInteger_addAndGet ||
3708
method == TR::java_util_concurrent_atomic_AtomicInteger_decrementAndGet ||
3709
method == TR::java_util_concurrent_atomic_AtomicInteger_incrementAndGet ||
3710
method == TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_incrementAndGet ||
3711
method == TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_decrementAndGet ||
3712
method == TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_addAndGet ||
3713
method == TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_getAndIncrement ||
3714
method == TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_getAndDecrement ||
3715
method == TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_getAndAdd)
3716
{
3717
return true;
3718
}
3719
3720
// Transactional Memory
3721
if (self()->getSupportsInlineConcurrentLinkedQueue())
3722
{
3723
if (method == TR::java_util_concurrent_ConcurrentLinkedQueue_tmOffer ||
3724
method == TR::java_util_concurrent_ConcurrentLinkedQueue_tmPoll ||
3725
method == TR::java_util_concurrent_ConcurrentLinkedQueue_tmEnabled)
3726
{
3727
return true;
3728
}
3729
}
3730
3731
return false;
3732
}
3733
3734
#define IS_OBJ true
3735
#define IS_NOT_OBJ false
3736
3737
bool isKnownMethod(TR::MethodSymbol * methodSymbol)
3738
{
3739
return methodSymbol &&
3740
(methodSymbol->getRecognizedMethod() == TR::java_lang_Math_sqrt ||
3741
methodSymbol->getRecognizedMethod() == TR::java_lang_StrictMath_sqrt ||
3742
methodSymbol->getRecognizedMethod() == TR::java_lang_Class_isAssignableFrom);
3743
}
3744
3745
bool
3746
J9::Z::CodeGenerator::inlineDirectCall(
3747
TR::Node *node,
3748
TR::Register *&resultReg)
3749
{
3750
TR::CodeGenerator *cg = self();
3751
TR::Compilation *comp = cg->comp();
3752
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
3753
3754
TR::MethodSymbol * methodSymbol = node->getSymbol()->getMethodSymbol();
3755
3756
// If the method to be called is marked as an inline method, see if it can
3757
// actually be generated inline.
3758
//
3759
3760
if (comp->getSymRefTab()->isNonHelper(node->getSymbolReference(), TR::SymbolReferenceTable::encodeASCIISymbol))
3761
{
3762
TR::TreeEvaluator::inlineEncodeASCII(node, cg);
3763
return true;
3764
}
3765
else if (comp->getSymRefTab()->isNonHelper(node->getSymbolReference(), TR::SymbolReferenceTable::currentTimeMaxPrecisionSymbol))
3766
{
3767
resultReg = TR::TreeEvaluator::inlineCurrentTimeMaxPrecision(cg, node);
3768
return true;
3769
}
3770
else if (comp->getSymRefTab()->isNonHelper(node->getSymbolReference(), TR::SymbolReferenceTable::singlePrecisionSQRTSymbol))
3771
{
3772
resultReg = TR::TreeEvaluator::inlineSinglePrecisionSQRT(node, cg);
3773
return true;
3774
}
3775
else if (comp->getSymRefTab()->isNonHelper(node->getSymbolReference(), TR::SymbolReferenceTable::synchronizedFieldLoadSymbol))
3776
{
3777
ReduceSynchronizedFieldLoad::inlineSynchronizedFieldLoad(node, cg);
3778
return true;
3779
}
3780
3781
static const char * enableTRTRE = feGetEnv("TR_enableTRTRE");
3782
switch (methodSymbol->getRecognizedMethod())
3783
{
3784
case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z:
3785
// In Java9 this can be either the jdk.internal JNI method or the sun.misc Java wrapper.
3786
// In Java8 it will be sun.misc which will contain the JNI directly.
3787
// We only want to inline the JNI methods, so add an explicit test for isNative().
3788
if (!methodSymbol->isNative())
3789
break;
3790
3791
if ((!TR::Compiler->om.canGenerateArraylets() || node->isUnsafeGetPutCASCallOnNonArray()) && node->isSafeForCGToFastPathUnsafeCall())
3792
{
3793
resultReg = TR::TreeEvaluator::VMinlineCompareAndSwap(node, cg, TR::InstOpCode::CS, IS_NOT_OBJ);
3794
return true;
3795
}
3796
3797
case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z:
3798
// As above, we only want to inline the JNI methods, so add an explicit test for isNative()
3799
if (!methodSymbol->isNative())
3800
break;
3801
3802
if (comp->target().is64Bit() && (!TR::Compiler->om.canGenerateArraylets() || node->isUnsafeGetPutCASCallOnNonArray()) && node->isSafeForCGToFastPathUnsafeCall())
3803
{
3804
resultReg = TR::TreeEvaluator::VMinlineCompareAndSwap(node, cg, TR::InstOpCode::CSG, IS_NOT_OBJ);
3805
return true;
3806
}
3807
// Too risky to do Long-31bit version now.
3808
break;
3809
3810
case TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z:
3811
// As above, we only want to inline the JNI methods, so add an explicit test for isNative()
3812
if (!methodSymbol->isNative())
3813
break;
3814
3815
if ((!TR::Compiler->om.canGenerateArraylets() || node->isUnsafeGetPutCASCallOnNonArray()) && node->isSafeForCGToFastPathUnsafeCall())
3816
{
3817
resultReg = TR::TreeEvaluator::VMinlineCompareAndSwap(node, cg, (comp->useCompressedPointers() ? TR::InstOpCode::CS : TR::InstOpCode::getCmpAndSwapOpCode()), IS_OBJ);
3818
return true;
3819
}
3820
break;
3821
3822
case TR::java_util_concurrent_atomic_Fences_reachabilityFence:
3823
case TR::java_util_concurrent_atomic_Fences_orderAccesses:
3824
case TR::java_util_concurrent_atomic_Fences_orderReads:
3825
case TR::java_util_concurrent_atomic_Fences_orderWrites:
3826
cg->decReferenceCount(node->getChild(0));
3827
break;
3828
3829
case TR::java_util_concurrent_atomic_AtomicBoolean_getAndSet:
3830
case TR::java_util_concurrent_atomic_AtomicInteger_getAndAdd:
3831
case TR::java_util_concurrent_atomic_AtomicInteger_getAndIncrement:
3832
case TR::java_util_concurrent_atomic_AtomicInteger_getAndDecrement:
3833
case TR::java_util_concurrent_atomic_AtomicInteger_getAndSet:
3834
case TR::java_util_concurrent_atomic_AtomicInteger_addAndGet:
3835
case TR::java_util_concurrent_atomic_AtomicInteger_incrementAndGet:
3836
case TR::java_util_concurrent_atomic_AtomicInteger_decrementAndGet:
3837
resultReg = TR::TreeEvaluator::inlineAtomicOps(node, cg, 4, methodSymbol);
3838
return true;
3839
break;
3840
3841
case TR::java_util_concurrent_atomic_AtomicIntegerArray_getAndAdd:
3842
case TR::java_util_concurrent_atomic_AtomicIntegerArray_getAndIncrement:
3843
case TR::java_util_concurrent_atomic_AtomicIntegerArray_getAndDecrement:
3844
case TR::java_util_concurrent_atomic_AtomicIntegerArray_getAndSet:
3845
case TR::java_util_concurrent_atomic_AtomicIntegerArray_addAndGet:
3846
case TR::java_util_concurrent_atomic_AtomicIntegerArray_incrementAndGet:
3847
case TR::java_util_concurrent_atomic_AtomicIntegerArray_decrementAndGet:
3848
resultReg = TR::TreeEvaluator::inlineAtomicOps(node, cg, 4, methodSymbol, true);
3849
return true;
3850
break;
3851
3852
case TR::java_util_concurrent_atomic_AtomicLong_addAndGet:
3853
case TR::java_util_concurrent_atomic_AtomicLong_getAndAdd:
3854
case TR::java_util_concurrent_atomic_AtomicLong_incrementAndGet:
3855
case TR::java_util_concurrent_atomic_AtomicLong_getAndIncrement:
3856
case TR::java_util_concurrent_atomic_AtomicLong_decrementAndGet:
3857
case TR::java_util_concurrent_atomic_AtomicLong_getAndDecrement:
3858
if (cg->checkFieldAlignmentForAtomicLong() && comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z196))
3859
{
3860
// TODO: I'm not sure we need the z196 restriction here given that the function already checks for z196 and
3861
// has a compare and swap fallback path
3862
resultReg = TR::TreeEvaluator::inlineAtomicOps(node, cg, 8, methodSymbol);
3863
return true;
3864
}
3865
break;
3866
3867
case TR::java_util_concurrent_atomic_AtomicLongArray_addAndGet:
3868
case TR::java_util_concurrent_atomic_AtomicLongArray_getAndAdd:
3869
case TR::java_util_concurrent_atomic_AtomicLongArray_incrementAndGet:
3870
case TR::java_util_concurrent_atomic_AtomicLongArray_getAndIncrement:
3871
case TR::java_util_concurrent_atomic_AtomicLongArray_decrementAndGet:
3872
case TR::java_util_concurrent_atomic_AtomicLongArray_getAndDecrement:
3873
if (cg->checkFieldAlignmentForAtomicLong() && comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z196))
3874
{
3875
// TODO: I'm not sure we need the z196 restriction here given that the function already checks for z196 and
3876
// has a compare and swap fallback path
3877
resultReg = TR::TreeEvaluator::inlineAtomicOps(node, cg, 8, methodSymbol);
3878
return true;
3879
}
3880
break;
3881
3882
case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_incrementAndGet:
3883
case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_decrementAndGet:
3884
case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_addAndGet:
3885
case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_getAndIncrement:
3886
case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_getAndDecrement:
3887
case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_getAndAdd:
3888
if (cg->getSupportsAtomicLoadAndAdd())
3889
{
3890
resultReg = TR::TreeEvaluator::inlineAtomicFieldUpdater(node, cg, methodSymbol);
3891
return true;
3892
}
3893
break;
3894
3895
case TR::java_nio_Bits_keepAlive:
3896
case TR::java_lang_ref_Reference_reachabilityFence:
3897
resultReg = TR::TreeEvaluator::inlineKeepAlive(node, cg);
3898
return true;
3899
3900
case TR::java_util_concurrent_ConcurrentLinkedQueue_tmOffer:
3901
if (cg->getSupportsInlineConcurrentLinkedQueue())
3902
{
3903
resultReg = TR::TreeEvaluator::inlineConcurrentLinkedQueueTMOffer(node, cg);
3904
return true;
3905
}
3906
break;
3907
3908
case TR::java_util_concurrent_ConcurrentLinkedQueue_tmPoll:
3909
if (cg->getSupportsInlineConcurrentLinkedQueue())
3910
{
3911
resultReg = TR::TreeEvaluator::inlineConcurrentLinkedQueueTMPoll(node, cg);
3912
return true;
3913
}
3914
break;
3915
// HashCode routine for Compressed and Decompressed String Shares lot of code so combining them.
3916
case TR::java_lang_String_hashCodeImplDecompressed:
3917
if (cg->getSupportsInlineStringHashCode())
3918
{
3919
return resultReg = TR::TreeEvaluator::inlineStringHashCode(node, cg, false);
3920
}
3921
break;
3922
3923
case TR::java_lang_String_hashCodeImplCompressed:
3924
if (cg->getSupportsInlineStringHashCode())
3925
{
3926
return resultReg = TR::TreeEvaluator::inlineStringHashCode(node, cg, true);
3927
}
3928
break;
3929
3930
case TR::java_lang_StringLatin1_inflate:
3931
if (cg->getSupportsInlineStringLatin1Inflate())
3932
{
3933
resultReg = TR::TreeEvaluator::inlineStringLatin1Inflate(node, cg);
3934
return resultReg != NULL;
3935
}
3936
break;
3937
case TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Big:
3938
return resultReg = comp->getOption(TR_DisableUTF16BEEncoder) ? TR::TreeEvaluator::inlineUTF16BEEncodeSIMD(node, cg)
3939
: TR::TreeEvaluator::inlineUTF16BEEncode (node, cg);
3940
break;
3941
case TR::java_lang_Integer_stringSize:
3942
case TR::java_lang_Long_stringSize:
3943
if (cg->getSupportsIntegerStringSize())
3944
{
3945
resultReg = TR::TreeEvaluator::inlineIntegerStringSize(node, cg);
3946
return resultReg != NULL;
3947
}
3948
break;
3949
case TR::java_lang_Integer_getChars:
3950
case TR::java_lang_Long_getChars:
3951
if (cg->getSupportsIntegerToChars())
3952
{
3953
resultReg = TR::TreeEvaluator::inlineIntegerToCharsForLatin1Strings(node, cg);
3954
return resultReg != NULL;
3955
}
3956
break;
3957
case TR::java_lang_StringUTF16_getChars_Integer:
3958
case TR::java_lang_StringUTF16_getChars_Long:
3959
case TR::java_lang_Integer_getChars_charBuffer:
3960
case TR::java_lang_Long_getChars_charBuffer:
3961
if (cg->getSupportsIntegerToChars())
3962
{
3963
resultReg = TR::TreeEvaluator::inlineIntegerToCharsForUTF16Strings(node, cg);
3964
return resultReg != NULL;
3965
}
3966
break;
3967
3968
default:
3969
break;
3970
3971
}
3972
3973
switch (methodSymbol->getRecognizedMethod())
3974
{
3975
case TR::java_lang_Integer_highestOneBit:
3976
resultReg = TR::TreeEvaluator::inlineHighestOneBit(node, cg, false);
3977
return true;
3978
case TR::java_lang_Integer_numberOfLeadingZeros:
3979
resultReg = TR::TreeEvaluator::inlineNumberOfLeadingZeros(node, cg, false);
3980
return true;
3981
case TR::java_lang_Integer_numberOfTrailingZeros:
3982
resultReg = TR::TreeEvaluator::inlineNumberOfTrailingZeros(node, cg, 32);
3983
return true;
3984
case TR::java_lang_Long_highestOneBit:
3985
resultReg = TR::TreeEvaluator::inlineHighestOneBit(node, cg, true);
3986
return true;
3987
case TR::java_lang_Long_numberOfLeadingZeros:
3988
resultReg = TR::TreeEvaluator::inlineNumberOfLeadingZeros(node, cg, true);
3989
return true;
3990
case TR::java_lang_Long_numberOfTrailingZeros:
3991
resultReg = TR::TreeEvaluator::inlineNumberOfTrailingZeros(node, cg, 64);
3992
return true;
3993
default:
3994
break;
3995
}
3996
3997
#ifdef J9VM_OPT_JAVA_CRYPTO_ACCELERATION
3998
if (self()->inlineCryptoMethod(node, resultReg))
3999
{
4000
return true;
4001
}
4002
#endif
4003
4004
if (cg->getSupportsInlineStringCaseConversion())
4005
{
4006
switch (methodSymbol->getRecognizedMethod())
4007
{
4008
case TR::com_ibm_jit_JITHelpers_toUpperIntrinsicUTF16:
4009
resultReg = TR::TreeEvaluator::toUpperIntrinsic(node, cg, false);
4010
return true;
4011
case TR::com_ibm_jit_JITHelpers_toUpperIntrinsicLatin1:
4012
resultReg = TR::TreeEvaluator::toUpperIntrinsic(node, cg, true);
4013
return true;
4014
case TR::com_ibm_jit_JITHelpers_toLowerIntrinsicUTF16:
4015
resultReg = TR::TreeEvaluator::toLowerIntrinsic(node, cg, false);
4016
return true;
4017
case TR::com_ibm_jit_JITHelpers_toLowerIntrinsicLatin1:
4018
resultReg = TR::TreeEvaluator::toLowerIntrinsic(node, cg, true);
4019
return true;
4020
default:
4021
break;
4022
}
4023
}
4024
4025
if (cg->getSupportsInlineStringIndexOf())
4026
{
4027
switch (methodSymbol->getRecognizedMethod())
4028
{
4029
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfLatin1:
4030
resultReg = TR::TreeEvaluator::inlineIntrinsicIndexOf(node, cg, true);
4031
return true;
4032
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfUTF16:
4033
resultReg = TR::TreeEvaluator::inlineIntrinsicIndexOf(node, cg, false);
4034
return true;
4035
case TR::java_lang_StringLatin1_indexOf:
4036
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfStringLatin1:
4037
resultReg = TR::TreeEvaluator::inlineVectorizedStringIndexOf(node, cg, false);
4038
return resultReg != NULL;
4039
case TR::java_lang_StringUTF16_indexOf:
4040
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfStringUTF16:
4041
resultReg = TR::TreeEvaluator::inlineVectorizedStringIndexOf(node, cg, true);
4042
return resultReg != NULL;
4043
default:
4044
break;
4045
}
4046
}
4047
4048
if (!comp->getOption(TR_DisableSIMDDoubleMaxMin) && cg->getSupportsVectorRegisters())
4049
{
4050
switch (methodSymbol->getRecognizedMethod())
4051
{
4052
case TR::java_lang_Math_max_D:
4053
resultReg = TR::TreeEvaluator::inlineDoubleMax(node, cg);
4054
return true;
4055
case TR::java_lang_Math_min_D:
4056
resultReg = TR::TreeEvaluator::inlineDoubleMin(node, cg);
4057
return true;
4058
default:
4059
break;
4060
}
4061
}
4062
if (cg->getSupportsVectorRegisters())
4063
{
4064
switch (methodSymbol->getRecognizedMethod())
4065
{
4066
case TR::java_lang_Math_fma_D:
4067
case TR::java_lang_StrictMath_fma_D:
4068
resultReg = TR::TreeEvaluator::inlineMathFma(node, cg);
4069
return true;
4070
4071
case TR::java_lang_Math_fma_F:
4072
case TR::java_lang_StrictMath_fma_F:
4073
if (comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_FACILITY_ENHANCEMENT_1))
4074
{
4075
resultReg = TR::TreeEvaluator::inlineMathFma(node, cg);
4076
return true;
4077
}
4078
break;
4079
default:
4080
break;
4081
}
4082
}
4083
4084
TR::MethodSymbol * symbol = node->getSymbol()->castToMethodSymbol();
4085
if ((symbol->isVMInternalNative() || symbol->isJITInternalNative()) || isKnownMethod(methodSymbol))
4086
{
4087
if (TR::TreeEvaluator::VMinlineCallEvaluator(node, false, cg))
4088
{
4089
resultReg = node->getRegister();
4090
return true;
4091
}
4092
}
4093
4094
// No method specialization was done.
4095
//
4096
resultReg = NULL;
4097
return false;
4098
}
4099
4100
/**
4101
* Check if arithmetic operations with a constant requires entry in the literal pool.
4102
*/
4103
bool
4104
J9::Z::CodeGenerator::arithmeticNeedsLiteralFromPool(TR::Node *node)
4105
{
4106
int64_t value = getIntegralValue(node);
4107
return value > GE_MAX_IMMEDIATE_VAL || value < GE_MIN_IMMEDIATE_VAL;
4108
}
4109
4110
4111
bool
4112
J9::Z::CodeGenerator::supportsTrapsInTMRegion()
4113
{
4114
return self()->comp()->target().isZOS();
4115
}
4116
4117
4118