Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/z/codegen/J9BCDTreeEvaluator.cpp
6004 views
1
/*******************************************************************************
2
* Copyright (c) 2018, 2022 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
23
//On zOS XLC linker can't handle files with same name at link time
24
//This workaround with pragma is needed. What this does is essentially
25
//give a different name to the codesection (csect) for this file. So it
26
//doesn't conflict with another file with same name.
27
#pragma csect(CODE,"TRJ9ZBCDTreeEvalBase#C")
28
#pragma csect(STATIC,"TRJ9ZBCDTreeEvalBase#S")
29
#pragma csect(TEST,"TRJ9ZBCDTreeEvalBase#T")
30
31
#include <algorithm>
32
#include <limits.h>
33
#include <math.h>
34
#include <stdint.h>
35
#include "j9.h"
36
#include "j9cfg.h"
37
#include "j9consts.h"
38
#include "j9modron.h"
39
#include "thrdsup.h"
40
#include "thrtypes.h"
41
#include "codegen/CodeGenerator.hpp"
42
#include "codegen/CodeGenerator_inlines.hpp"
43
#include "codegen/Machine.hpp"
44
#include "compile/ResolvedMethod.hpp"
45
#include "env/CompilerEnv.hpp"
46
#include "env/jittypes.h"
47
#include "env/VMJ9.h"
48
#include "il/DataTypes.hpp"
49
#include "il/LabelSymbol.hpp"
50
#include "il/MethodSymbol.hpp"
51
#include "il/Node.hpp"
52
#include "il/Node_inlines.hpp"
53
#include "il/RegisterMappedSymbol.hpp"
54
#include "il/ResolvedMethodSymbol.hpp"
55
#include "il/Symbol.hpp"
56
#include "il/TreeTop.hpp"
57
#include "il/TreeTop_inlines.hpp"
58
#include "ras/DebugCounter.hpp"
59
#include "env/VMJ9.h"
60
#include "z/codegen/J9S390Snippet.hpp"
61
#include "z/codegen/S390J9CallSnippet.hpp"
62
#include "z/codegen/S390Evaluator.hpp"
63
#include "z/codegen/S390GenerateInstructions.hpp"
64
#include "z/codegen/S390HelperCallSnippet.hpp"
65
#include "z/codegen/S390Instruction.hpp"
66
#include "z/codegen/S390Register.hpp"
67
#include "z/codegen/SystemLinkage.hpp"
68
69
TR::MemoryReference *
70
J9::Z::TreeEvaluator::asciiAndUnicodeToPackedHelper(TR::Node *node,
71
TR_PseudoRegister *targetReg,
72
TR::MemoryReference *sourceMR,
73
TR_PseudoRegister *childReg,
74
TR::CodeGenerator * cg)
75
{
76
TR::Node *child = node->getFirstChild();
77
bool isUnicode = child->getType().isAnyUnicode();
78
bool isZoned = child->getType().isAnyZoned();
79
80
TR::DataType sourceType = TR::NoType;
81
TR::Compilation *comp = cg->comp();
82
if (isUnicode)
83
sourceType = TR::UnicodeDecimal;
84
else if (isZoned)
85
sourceType = TR::ZonedDecimal;
86
else
87
TR_ASSERT(false,"unexpected type on node %s (%p)\n",child->getOpCode().getName(),child);
88
89
TR_StorageReference *hint = node->getStorageReferenceHint();
90
TR_StorageReference *targetStorageReference = NULL;
91
int32_t destSize = isUnicode ? cg->getUnicodeToPackedFixedResultSize() : cg->getAsciiToPackedFixedResultSize();
92
TR_ASSERT(TR::DataType::getBCDPrecisionFromSize(node->getDataType(), destSize) >= childReg->getDecimalPrecision(),
93
"%s source precision of %d should not exceed the fixed precision of %d\n",
94
node->getOpCode().getName(), childReg->getDecimalPrecision(), TR::DataType::getBCDPrecisionFromSize(node->getDataType(), destSize));
95
96
if (hint)
97
{
98
if (childReg->isInitialized() && hint == childReg->getStorageReference())
99
{
100
TR_ASSERT( false,"ad2pd/ud2pd operands will overlap because child storageReference of ud2pd is initialized hint\n");
101
}
102
else
103
{
104
TR_ASSERT(hint->getSymbolSize() >= destSize, "ad2pd/ud2pd hint size of %d should be >= the fixed size of %d\n",hint->getSymbolSize(),destSize);
105
targetStorageReference = hint;
106
}
107
}
108
109
if (targetStorageReference == NULL)
110
targetStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(destSize, comp);
111
112
targetReg->setStorageReference(targetStorageReference, node);
113
114
int32_t sourcePrecision = childReg->getDecimalPrecision();
115
bool isTruncation = sourcePrecision > node->getDecimalPrecision();
116
int32_t pkxSourcePrecision = isTruncation ? node->getDecimalPrecision() : sourcePrecision;
117
int32_t pkxSourceSize = TR::DataType::getSizeFromBCDPrecision(sourceType, pkxSourcePrecision);
118
int32_t targetPrecision = pkxSourcePrecision;
119
int32_t sourceEndByte = TR::DataType::getLeftMostByte(child->getDataType(), pkxSourceSize);
120
121
if (cg->traceBCDCodeGen())
122
traceMsg(comp,"\tasciiAndUnicodeToPackedHelper %p : op %s, isTruncation=%s, fixedDestSize %d, targetRegPrec %d, sourcePrecision %d, sourceEndByte %d, sourceSize %d, pkuSourceSize %d\n",
123
node,node->getOpCode().getName(),isTruncation?"yes":"no",destSize,targetPrecision,sourcePrecision,sourceEndByte,childReg->getSize(),pkxSourceSize);
124
125
// For PKA/PKU the 1st operand (target) size is fixed at 16 bytes and the 2nd operand (source) is variable.
126
// For this reason use left, instead of right, aligned memory references so the correct alignment is done for both operands
127
// (using right aligned references with SS1 would apply the same bump to both operands)
128
TR::MemoryReference *destMR = generateS390LeftAlignedMemoryReference(node, targetReg->getStorageReference(), cg, destSize);
129
sourceMR = reuseS390LeftAlignedMemoryReference(sourceMR, child, childReg->getStorageReference(), cg, sourceEndByte);
130
131
if (cg->traceBCDCodeGen())
132
traceMsg(comp,"\tgen %s with fixed dest size of %d and source size %d. Set targetRegPrec to sourcePrec (%d)\n",isUnicode?"PKU":"PKA",destSize,pkxSourceSize,sourcePrecision);
133
134
generateSS1Instruction(cg, isUnicode ? TR::InstOpCode::PKU : TR::InstOpCode::PKA, node, pkxSourceSize-1, destMR, sourceMR);
135
136
int32_t destSizeAsCeilingPrecision = TR::DataType::byteLengthToPackedDecimalPrecisionCeiling(destSize);
137
if (destSizeAsCeilingPrecision > pkxSourcePrecision)
138
targetReg->addRangeOfZeroDigits(pkxSourcePrecision, destSizeAsCeilingPrecision);
139
140
if (node->getOpCode().isSetSign())
141
{
142
TR::Node *setSignNode = node->getSetSignValueNode();
143
TR_ASSERT(setSignNode->getOpCode().isLoadConst() && setSignNode->getOpCode().getSize() <= 4,"expecting a <= 4 size integral constant set sign amount on node %p\n",setSignNode);
144
int32_t sign = setSignNode->get32bitIntegralValue();
145
if (sign == TR::DataType::getPreferredPlusCode())
146
targetReg->setKnownSignCode(TR::DataType::getPreferredPlusCode());
147
else
148
cg->genSignCodeSetting(node, targetReg, targetReg->getSize(), destMR, sign, targetReg, 0, false); // numericNibbleIsZero=false
149
cg->decReferenceCount(setSignNode);
150
}
151
else
152
{
153
// PKA/PKU always sets the preferred positive code and therefore a known clean sign is generated.
154
targetReg->setKnownSignCode(TR::DataType::getPreferredPlusCode());
155
}
156
157
targetReg->setDecimalPrecision(targetPrecision);
158
targetReg->transferDataState(childReg);
159
targetReg->setIsInitialized();
160
node->setRegister(targetReg);
161
return destMR;
162
}
163
164
TR::Register *
165
J9::Z::TreeEvaluator::ud2pdVectorEvaluatorHelper(TR::Node * node, TR::CodeGenerator * cg)
166
{
167
// 1. use ud2pd helper to put ud->pd in some storage reference
168
TR_PseudoRegister *packedPseudoReg = cg->allocatePseudoRegister(node->getDataType());
169
TR::Node *child = node->getFirstChild();
170
TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);
171
childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);
172
TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(child, childReg->getStorageReference(), cg);
173
asciiAndUnicodeToPackedHelper(node, packedPseudoReg, sourceMR, childReg, cg);
174
175
// 2. load packed decimal from storage reference to register.
176
TR::Register * targetReg = cg->allocateRegister(TR_VRF);
177
TR::MemoryReference * pdSourceMR = generateS390RightAlignedMemoryReference(node,
178
packedPseudoReg->getStorageReference(),
179
cg);
180
181
// PKU always puts the result into 16 bytes space
182
uint8_t lengthToLoad = TR_VECTOR_REGISTER_SIZE - 1;
183
generateVSIInstruction(cg, TR::InstOpCode::VLRL, node, targetReg, pdSourceMR, lengthToLoad);
184
185
cg->decReferenceCount(child);
186
node->setRegister(targetReg);
187
return targetReg;
188
}
189
190
/**
191
* Handles TR::ud2pd
192
*/
193
TR::Register *
194
J9::Z::TreeEvaluator::ud2pdEvaluator(TR::Node * node, TR::CodeGenerator * cg)
195
{
196
TR::Compilation *comp = cg->comp();
197
cg->traceBCDEntry("ud2pd",node);
198
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "PD-Op/%s", node->getOpCode().getName()),
199
1, TR::DebugCounter::Cheap);
200
TR::Register* targetReg = NULL;
201
202
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
203
if(comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&
204
!comp->getOption(TR_DisableVectorBCD) ||
205
isVectorBCDEnv)
206
{
207
targetReg = ud2pdVectorEvaluatorHelper(node, cg);
208
}
209
else
210
{
211
targetReg = cg->allocatePseudoRegister(node->getDataType());
212
TR::Node *child = node->getFirstChild();
213
TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);
214
childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);
215
TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(child, childReg->getStorageReference(), cg);
216
asciiAndUnicodeToPackedHelper(node, static_cast<TR_PseudoRegister*>(targetReg), sourceMR, childReg, cg);
217
cg->decReferenceCount(child);
218
node->setRegister(targetReg);
219
}
220
221
cg->traceBCDExit("ud2pd",node);
222
return targetReg;
223
}
224
225
/**
226
* Handles TR::udsl2pd, TR::udst2pd
227
*/
228
TR::Register *
229
J9::Z::TreeEvaluator::udsl2pdEvaluator(TR::Node *node, TR::CodeGenerator *cg)
230
{
231
TR::Compilation *comp = cg->comp();
232
cg->traceBCDEntry("udsl2pd",node);
233
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "PD-Op/%s", node->getOpCode().getName()),
234
1, TR::DebugCounter::Cheap);
235
TR_PseudoRegister *targetReg = cg->allocatePseudoRegister(node->getDataType());
236
TR::Node *child = node->getFirstChild();
237
TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);
238
childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);
239
240
bool isSrcTrailingSign = (child->getDataType() == TR::UnicodeDecimalSignTrailing);
241
int32_t sourceSignEndByte = isSrcTrailingSign ? TR::DataType::getUnicodeSignSize() : childReg->getSize();
242
TR::MemoryReference *sourceMR = generateS390LeftAlignedMemoryReference(child, childReg->getStorageReference(), cg, sourceSignEndByte);
243
TR::MemoryReference *destMR = asciiAndUnicodeToPackedHelper(node, targetReg, sourceMR, childReg, cg);
244
245
if (!node->getOpCode().isSetSign())
246
{
247
TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);
248
TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(cg);
249
250
bool isImplicitValue = node->getNumChildren() < 2;
251
252
TR::RegisterDependencyConditions * deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, isImplicitValue ? 4 : 2, cg);
253
254
if (destMR->getIndexRegister())
255
deps->addPostConditionIfNotAlreadyInserted(destMR->getIndexRegister(), TR::RealRegister::AssignAny);
256
if (destMR->getBaseRegister())
257
deps->addPostConditionIfNotAlreadyInserted(destMR->getBaseRegister(), TR::RealRegister::AssignAny);
258
259
bool isTruncation = childReg->getDecimalPrecision() > node->getDecimalPrecision();
260
261
if (cg->traceBCDCodeGen())
262
traceMsg(comp,"\tudsl2pdEvaluator %p : op %s, isTruncation=%s, targetReg->isInit=%s, targetRegSize=%d, targetRegPrec=%d, srcRegSize=%d, srcRegPrec=%d, sourceSignEndByte=%d\n",
263
node,node->getOpCode().getName(),isTruncation?"yes":"no",targetReg->isInitialized()?"yes":"no",targetReg->getSize(),targetReg->getDecimalPrecision(),childReg->getSize(),childReg->getDecimalPrecision(),sourceSignEndByte);
264
265
if (isImplicitValue)
266
{
267
if (sourceMR->getIndexRegister())
268
deps->addPostConditionIfNotAlreadyInserted(sourceMR->getIndexRegister(), TR::RealRegister::AssignAny);
269
if (sourceMR->getBaseRegister())
270
deps->addPostConditionIfNotAlreadyInserted(sourceMR->getBaseRegister(), TR::RealRegister::AssignAny);
271
272
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart, deps);
273
cFlowRegionStart->setStartInternalControlFlow();
274
275
// The primary (and currently the only) consumer of BCD evaluators in Java is the DAA intrinsics
276
// library. The DAA library assumes all BCD types are positive, unless an explicit negative sign
277
// code is present. Because of this deviation from the COBOL treatment of sign codes we must
278
// take a specialized control path when generating instructions for Java.
279
280
generateSILInstruction(cg, TR::InstOpCode::CLHHSI, node, generateS390LeftAlignedMemoryReference(*sourceMR, node, 0, cg, sourceSignEndByte), 0x002D);
281
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, cFlowRegionEnd);
282
}
283
else
284
{
285
TR::Node *minusSign = node->getSecondChild();
286
287
TR::MemoryReference *minusSignMR = generateS390ConstantAreaMemoryReference(cg, minusSign, true); // forSS=true
288
289
generateSS1Instruction(cg, TR::InstOpCode::CLC, node,
290
TR::DataType::getUnicodeSignSize()-1,
291
generateS390LeftAlignedMemoryReference(*sourceMR, node, 0, cg, sourceSignEndByte),
292
minusSignMR);
293
294
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart, deps);
295
cFlowRegionStart->setStartInternalControlFlow();
296
297
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK7, node, cFlowRegionEnd);
298
}
299
300
cg->genSignCodeSetting(node, NULL, targetReg->getSize(),
301
generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),
302
TR::DataType::getPreferredMinusCode(), targetReg, 0, false); // numericNibbleIsZero=false
303
304
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, deps);
305
cFlowRegionEnd->setEndInternalControlFlow();
306
307
targetReg->resetSignState();
308
targetReg->setHasKnownPreferredSign();
309
310
if (!isTruncation)
311
targetReg->transferCleanSign(childReg);
312
else
313
traceMsg(comp,"\tudsx2p is a truncation (srcRegPrec %d > nodePrec %d) so do not transfer any clean sign flags\n",childReg->getDecimalPrecision(),node->getDecimalPrecision());
314
}
315
316
//at this point targetReg is PseudoRegister that has converted Packed decimal value.
317
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
318
if (comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&
319
!comp->getOption(TR_DisableVectorBCD) ||
320
isVectorBCDEnv)
321
{
322
TR::Register * pdVectorTargetReg = cg->allocateRegister(TR_VRF);
323
TR::MemoryReference * pdSourceMR = generateS390RightAlignedMemoryReference(node,
324
targetReg->getStorageReference(),
325
cg);
326
//PKU always puts the result into 16 bytes space
327
uint8_t lengthToLoad = TR_VECTOR_REGISTER_SIZE - 1;
328
generateVSIInstruction(cg, TR::InstOpCode::VLRL, node, pdVectorTargetReg, pdSourceMR, lengthToLoad);
329
330
cg->decReferenceCount(child);
331
node->setRegister(pdVectorTargetReg);
332
cg->traceBCDExit("udsl2pd",node);
333
return pdVectorTargetReg;
334
}
335
else
336
{
337
cg->decReferenceCount(child);
338
node->setRegister(targetReg);
339
cg->traceBCDExit("udsl2pd",node);
340
return targetReg;
341
}
342
}
343
344
/**
345
* Handles pd2udsl,pd2udst, where the Unicode decimal signs are separate.
346
*/
347
TR::Register *
348
J9::Z::TreeEvaluator::pd2udslEvaluator(TR::Node *node, TR::CodeGenerator *cg)
349
{
350
cg->traceBCDEntry("pd2udsl",node);
351
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),
352
1, TR::DebugCounter::Cheap);
353
354
TR::Node* childNode = node->getFirstChild();
355
TR::Compilation *comp = cg->comp();
356
TR_PseudoRegister *childReg = NULL;
357
TR::MemoryReference *sourceMR = NULL;
358
TR_StorageReference* pdStorageRef = NULL;
359
360
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
361
if(comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !comp->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
362
{
363
// Perform an intermediate vector store. See pd2udVectorEvaluateHelper().
364
TR::Register* pdValueReg = cg->evaluate(childNode);
365
pdStorageRef = TR_StorageReference::createTemporaryBasedStorageReference(TR_VECTOR_REGISTER_SIZE, comp);
366
pdStorageRef->setIsSingleUseTemporary();
367
368
TR::MemoryReference* pdMR = generateS390RightAlignedMemoryReference(node, pdStorageRef, cg);
369
sourceMR = pdMR;
370
371
childReg = cg->allocatePseudoRegister(childNode->getDataType());
372
childReg->setIsInitialized();
373
childReg->setSize(childNode->getSize());
374
childReg->setHasKnownValidData();
375
childReg->setDecimalPrecision(childNode->getDecimalPrecision());
376
377
generateVSIInstruction(cg, TR::InstOpCode::VSTRL, node, pdValueReg, pdMR, TR_VECTOR_REGISTER_SIZE - 1);
378
379
}
380
else
381
{
382
int32_t byteLength = TR::DataType::packedDecimalPrecisionToByteLength(node->getDecimalPrecision());
383
childReg = cg->evaluateBCDNode(childNode);
384
childReg = cg->privatizeBCDRegisterIfNeeded(node, childNode, childReg);
385
sourceMR = cg->materializeFullBCDValue(childNode, childReg,
386
cg->getPackedToUnicodeFixedSourceSize(),
387
byteLength);
388
}
389
390
// One of two sequences generated by the reset of this evaluator:
391
// for non-setSign ops when the knownSign=negative (known positive signs are more common so '+' is the initial/default setting)
392
//
393
// MVC [destSign],[minusSign] // [sign] <- 002B '+'
394
// UNPKU [destData],[src]
395
// MVI [destSign+1],0x2D // '-'
396
//
397
// for non-setSign ops (pd2udsl/pd2udst)
398
//
399
// MVC [destSign],[minusSign] // [sign] <- 002B '+'
400
// UNPKU [destData],[src]
401
// BRC 0x8,done // if src sign is + (cc=0) we are done, otherwise in '-' (cc=1) and invalid (cc=3) case fall through and set '-' sign
402
// MVI [destSign+1],0x2D // '-'
403
// done:
404
//
405
// The MVC/UNPKU are generated by the shared routine packedToUnicodeHelper and the BRC/MVI by this routine
406
407
408
TR_PseudoRegister *targetReg = cg->allocatePseudoRegister(node->getDataType());
409
TR::MemoryReference *destMR = packedToUnicodeHelper(node, targetReg, sourceMR, childReg, true, cg, pdStorageRef); // isSeparateSign=true
410
411
int32_t destSignEndByte = (node->getDataType() == TR::UnicodeDecimalSignTrailing) ? TR::DataType::getUnicodeSignSize() : targetReg->getSize();
412
413
if (childReg->hasKnownSignCode())
414
{
415
int32_t convertedSign = TR::DataType::convertSignEncoding(childNode->getDataType(), node->getDataType(), childReg->getKnownSignCode());
416
if (convertedSign == TR::DataType::getNationalSeparateMinus())
417
{
418
if (cg->traceBCDCodeGen())
419
traceMsg(comp,"\tchildReg has negative knownSignCode 0x%x so generate an MVI of the converted sign 0x%x\n",childReg->getKnownSignCode(),convertedSign);
420
generateSIInstruction(cg, TR::InstOpCode::MVI, node, generateS390LeftAlignedMemoryReference(*destMR, node, 1, cg, destSignEndByte), convertedSign);
421
}
422
else
423
{
424
if (cg->traceBCDCodeGen())
425
traceMsg(comp,"\tchildReg has positive knownSignCode 0x%x so no more codegen is needed (an MVC of 002B was already done)\n", childReg->getKnownSignCode());
426
TR_ASSERT(convertedSign == TR::DataType::getNationalSeparatePlus(), "converted sign should be nationalSeparatePlusSign of 0x%x and not 0x%x\n", TR::DataType::getNationalSeparatePlus(), convertedSign);
427
}
428
targetReg->setKnownSignCode(convertedSign);
429
}
430
else
431
{
432
TR_ASSERT(cg->getAppendInstruction()->getOpCodeValue() == TR::InstOpCode::UNPKU,
433
"the previous instruction should be an UNPKU\n");
434
435
TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);
436
TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(cg);
437
438
TR::RegisterDependencyConditions * targetMRDeps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);
439
440
if (destMR->getIndexRegister())
441
targetMRDeps->addPostConditionIfNotAlreadyInserted(destMR->getIndexRegister(), TR::RealRegister::AssignAny);
442
if (destMR->getBaseRegister())
443
targetMRDeps->addPostConditionIfNotAlreadyInserted(destMR->getBaseRegister(), TR::RealRegister::AssignAny);
444
445
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart, targetMRDeps);
446
cFlowRegionStart->setStartInternalControlFlow();
447
448
// DAA library assumes all BCD types are positive, unless an explicit negative sign code is present
449
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK9, node, cFlowRegionEnd);
450
451
TR_ASSERT(TR::DataType::getNationalSeparateMinus() <= 0xFF, "expecting nationalSeparateMinusSign to be <= 0xFF and not 0x%x\n", TR::DataType::getNationalSeparateMinus());
452
generateSIInstruction(cg, TR::InstOpCode::MVI, node, generateS390LeftAlignedMemoryReference(*destMR, node, 1, cg, destSignEndByte), TR::DataType::getNationalSeparateMinus());
453
454
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, targetMRDeps);
455
cFlowRegionEnd->setEndInternalControlFlow();
456
457
targetReg->setHasKnownPreferredSign();
458
}
459
460
cg->decReferenceCount(childNode);
461
node->setRegister(targetReg);
462
cg->traceBCDExit("pd2udsl",node);
463
return targetReg;
464
}
465
466
TR::Register *
467
J9::Z::TreeEvaluator::pd2udEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)
468
{
469
TR::Node *child = node->getFirstChild();
470
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),
471
1, TR::DebugCounter::Cheap);
472
TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);
473
TR_PseudoRegister* targetReg = cg->allocatePseudoRegister(node->getDataType());
474
childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);
475
int32_t byteLength = TR::DataType::packedDecimalPrecisionToByteLength(node->getDecimalPrecision());
476
TR::MemoryReference *sourceMR = cg->materializeFullBCDValue(child,
477
childReg,
478
cg->getPackedToUnicodeFixedSourceSize(),
479
byteLength);
480
481
packedToUnicodeHelper(node, targetReg, sourceMR, childReg, false, cg, NULL); // isSeparateSign=false
482
483
cg->decReferenceCount(child);
484
return targetReg;
485
}
486
487
TR::Register *
488
J9::Z::TreeEvaluator::pd2udVectorEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)
489
{
490
// 1. Evaluate child node and get a packed decimal in vector register
491
TR::Node* childNode = node->getFirstChild();
492
TR::Register* pdValueReg = cg->evaluate(childNode);
493
494
// 2. Create a temp storage reference of size 16 bytes and dump all vector register contents there, to be picked up by UNPKU later
495
// This intermediate vector store is needed because vectorized pdloadi puts packed decimal in registers;
496
// but UNPKU is an SS instruction that takes inputs from memory.
497
TR_StorageReference* pdStorageRef = TR_StorageReference::createTemporaryBasedStorageReference(TR_VECTOR_REGISTER_SIZE, cg->comp());
498
pdStorageRef->setIsSingleUseTemporary();
499
500
TR::MemoryReference* pdMR = generateS390RightAlignedMemoryReference(node, pdStorageRef, cg, true, true);
501
generateVSIInstruction(cg, TR::InstOpCode::VSTRL, node, pdValueReg, pdMR, TR_VECTOR_REGISTER_SIZE - 1);
502
503
// 3. Allocate and setup childReg PseudoRegister
504
TR_PseudoRegister* childReg = cg->allocatePseudoRegister(childNode->getDataType());
505
childReg->setIsInitialized();
506
childReg->setSize(childNode->getSize());
507
childReg->setDecimalPrecision(childNode->getDecimalPrecision());
508
childReg->setHasKnownValidData();
509
510
// 4. Generate UNPKU to unpack pdMR content to targetReg PseudoRegister
511
TR_PseudoRegister* targetReg = cg->allocatePseudoRegister(node->getDataType());
512
packedToUnicodeHelper(node, targetReg, pdMR, childReg, false, cg, pdStorageRef); // isSeparateSign=false
513
514
cg->decReferenceCount(childNode);
515
return targetReg;
516
}
517
518
TR::Register *
519
J9::Z::TreeEvaluator::pd2udEvaluator(TR::Node *node, TR::CodeGenerator *cg)
520
{
521
cg->traceBCDEntry("pd2ud",node);
522
TR::Register* targetReg = NULL;
523
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
524
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
525
{
526
targetReg = pd2udVectorEvaluatorHelper(node, cg);
527
}
528
else
529
{
530
targetReg = pd2udEvaluatorHelper(node, cg);
531
}
532
533
node->setRegister(targetReg);
534
cg->traceBCDExit("pd2ud",node);
535
return targetReg;
536
}
537
538
/**
539
* \brief This evaluator helper is invoked by pd2ud Evaluator and pd2udsl Evaluator to generate unpack unicode
540
* instruction (UNPKU).
541
*
542
* \param node Parent node object.
543
* \param targetReg PseudoRegister object for the parent node (the node)
544
* \param sourceMR MemoryRefernece object pointer
545
* \param childReg PseudoRegister object for the child node (e.g. pdloadi node)
546
* \param isSeparateSign True if the operation is pd2udsl or pd2udst, which all have separate sign code. False
547
* if it's pd2ud.
548
* \param cg The codegen object
549
* \param srcStorageReference If not null, this replaces the childReg's StorageReference for unpack to unicode
550
*/
551
TR::MemoryReference *
552
J9::Z::TreeEvaluator::packedToUnicodeHelper(TR::Node *node,
553
TR_PseudoRegister *targetReg,
554
TR::MemoryReference *sourceMR,
555
TR_PseudoRegister *childReg,
556
bool isSeparateSign,
557
TR::CodeGenerator * cg,
558
TR_StorageReference* srcStorageReference)
559
{
560
TR::Node *child = node->getFirstChild();
561
TR_StorageReference *hint = node->getStorageReferenceHint();
562
TR_StorageReference *targetStorageReference = NULL;
563
TR::Compilation *comp = cg->comp();
564
565
int32_t destSize = node->getStorageReferenceSize();
566
567
if (hint)
568
{
569
if (childReg->isInitialized() && hint == childReg->getStorageReference())
570
{
571
TR_ASSERT( false,"pd2ud operands will overlap because child storageReference of pd2ud is initialized hint\n");
572
}
573
else
574
{
575
if (destSize <= hint->getSymbolSize())
576
targetStorageReference = hint;
577
else
578
TR_ASSERT(false,"pd2ud destSize (%d) should be <= hint size (%d)\n",destSize,hint->getSymbolSize());
579
}
580
}
581
582
if (targetStorageReference == NULL)
583
targetStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(destSize, comp);
584
585
targetReg->setStorageReference(targetStorageReference, node);
586
587
int32_t unpkuDestPrecision = node->getDecimalPrecision();
588
targetReg->setDecimalPrecision(unpkuDestPrecision);
589
int32_t unpkuDestSize = TR::DataType::getSizeFromBCDPrecision(TR::UnicodeDecimal, unpkuDestPrecision);
590
int32_t unpkuDestEndByte = TR::DataType::getLeftMostByte(node->getDataType(), unpkuDestSize);
591
592
if (cg->traceBCDCodeGen())
593
traceMsg(comp,"\tpackedToUnicodeHelper %p : op %s, targetRegSize %d, targetRegPrec %d, srcRegSize %d, srcRegPrec %d\n",
594
node,node->getOpCode().getName(),targetReg->getSize(),targetReg->getDecimalPrecision(),childReg->getSize(),childReg->getDecimalPrecision());
595
596
// For UNPKU the 1st operand (target-unicode) size is variable and the 2nd operand (source-packed) is fixed at 16 bytes.
597
// For this reason use left, instead of right, aligned memory references so the correct alignment is done for both operands
598
// (using right aligned references with SS1 would apply the same bump to both operands)
599
TR::MemoryReference *destMR = generateS390LeftAlignedMemoryReference(node, targetReg->getStorageReference(), cg, unpkuDestEndByte);
600
// The sourceMR should have been created by calling materializeFullBCDValue to ensure it is large enough to be used in the UNPKU
601
int32_t fixedSourceSize = cg->getPackedToUnicodeFixedSourceSize();
602
603
TR_ASSERT(sourceMR->getStorageReference()->getSymbolSize() >= fixedSourceSize,
604
"source memRef %d is not large enough to be used in the UNPKU (%d)\n",sourceMR->getStorageReference()->getSymbolSize(),fixedSourceSize);
605
606
sourceMR = reuseS390LeftAlignedMemoryReference(sourceMR, child,
607
(srcStorageReference == NULL) ? childReg->getStorageReference() : srcStorageReference,
608
cg, fixedSourceSize);
609
610
if (isSeparateSign)
611
{
612
//TR_ASSERT((node->getOpCode().isSetSign() && node->getNumChildren() == 3) || (node->getNumChildren() == 2),
613
// "expected two (or three if setSign) children on %s and not %d child(ren)\n",node->getOpCode().getName(),node->getNumChildren());
614
int32_t destSignEndByte = (node->getDataType() == TR::UnicodeDecimalSignTrailing) ? TR::DataType::getUnicodeSignSize() : unpkuDestEndByte + TR::DataType::getUnicodeSignSize();
615
616
bool isImplicitValue = node->getNumChildren() < 2;
617
618
if (isImplicitValue)
619
{
620
if (cg->traceBCDCodeGen())
621
traceMsg(comp, "\tgen 2 MVIs of unicode sign with size of %d and destSignEndByte of %d\n", TR::DataType::getUnicodeSignSize(),destSignEndByte);
622
generateSIInstruction(cg, TR::InstOpCode::MVI, node,
623
generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, destSignEndByte), 0x00);
624
generateSIInstruction(cg, TR::InstOpCode::MVI, node,
625
generateS390LeftAlignedMemoryReference(*destMR, node, 1, cg, destSignEndByte), 0x2B);
626
}
627
else
628
{
629
TR::Node *signNode = node->getSecondChild();
630
TR::MemoryReference *signMR = generateS390ConstantAreaMemoryReference(cg, signNode, true); // forSS=true
631
if (cg->traceBCDCodeGen())
632
traceMsg(comp, "\tgen MVC of unicode sign with size of %d and destSignEndByte of %d\n", TR::DataType::getUnicodeSignSize(),destSignEndByte);
633
generateSS1Instruction(cg, TR::InstOpCode::MVC, node,
634
TR::DataType::getUnicodeSignSize()-1,
635
generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, destSignEndByte),
636
signMR);
637
}
638
if (node->getOpCode().isSetSign())
639
{
640
TR::Node *setSignValue = node->getSetSignValueNode();
641
if (setSignValue->getOpCode().isLoadConst() && setSignValue->getOpCode().getSize() <= 4)
642
{
643
targetReg->setKnownSignCode(setSignValue->get32bitIntegralValue());
644
}
645
}
646
}
647
648
if (cg->traceBCDCodeGen())
649
traceMsg(comp,"\tgen UNPKU: unpkuDestSize %d, destEndByte %d and fixed source size %d\n",unpkuDestSize,unpkuDestEndByte,fixedSourceSize);
650
651
generateSS1Instruction(cg, TR::InstOpCode::UNPKU, node,
652
unpkuDestSize-1,
653
destMR,
654
sourceMR);
655
656
targetReg->transferDataState(childReg);
657
targetReg->setIsInitialized();
658
node->setRegister(targetReg);
659
return destMR;
660
}
661
662
void
663
J9::Z::TreeEvaluator::zonedToZonedSeparateSignHelper(TR::Node *node, TR_PseudoRegister *srcReg, TR_PseudoRegister *targetReg, TR::MemoryReference *sourceMR, TR::MemoryReference *destMR, TR::CodeGenerator * cg)
664
{
665
TR_ASSERT(targetReg->isInitialized(),"targetRegister must be initialized before calling zonedToZonedSeparateSignHelper\n");
666
targetReg->resetSignState(); // reset any incoming sign state now as sign is being moved from embedded to separate by this routine (so embedded setting is no longer valid)
667
bool isSetSign = node->getOpCode().isSetSign();
668
int32_t sign = 0;
669
TR::Node *signCodeNode = NULL;
670
TR::Compilation *comp = cg->comp();
671
672
if (isSetSign)
673
{
674
signCodeNode = node->getSecondChild();
675
TR_ASSERT(signCodeNode->getOpCode().isLoadConst(),"excepting zdsle2zdSetSign sign code to be a const\n");
676
sign = signCodeNode->get32bitIntegralValue();
677
}
678
bool isDestTrailingSign = (node->getDataType() == TR::ZonedDecimalSignTrailingSeparate);
679
bool isTruncation = false;
680
int32_t digitsToClear = 0;
681
if (node->getDecimalPrecision() < targetReg->getDecimalPrecision())
682
isTruncation = true;
683
else if (node->getDecimalPrecision() > targetReg->getDecimalPrecision())
684
digitsToClear = node->getDecimalPrecision()-targetReg->getDecimalPrecision();
685
686
if (cg->traceBCDCodeGen())
687
traceMsg(comp,"\tzonedToZonedSeparateSignHelper %p : op %s, isTruncation=%s, targetReg->knownSign=0x%x, trgSignIsZone=%s, targetReg->size=%d, targetRegPrec=%d, , digitsToClear=%d, (isSetSign=%s, sign 0x%x)\n",
688
node,node->getOpCode().getName(),isTruncation?"yes":"no",targetReg->hasKnownOrAssumedSignCode() ? targetReg->getKnownOrAssumedSignCode() : 0,targetReg->knownOrAssumedSignIsZone()?"yes":"no",
689
targetReg->getSize(),targetReg->getDecimalPrecision(),digitsToClear,isSetSign?"yes":"no",sign);
690
691
TR_ASSERT(!isTruncation,"a zd2zdsxs operation should not truncate\n");
692
if (digitsToClear > 0)
693
{
694
if (cg->traceBCDCodeGen())
695
traceMsg(comp,"\tdigitsToClear > 0 (%d) so set upper bytes to 0x%x and set targetRegPrec to nodePrec %d\n",digitsToClear,TR::DataType::getZonedZeroCode(),node->getDecimalPrecision());
696
int32_t endByte = isDestTrailingSign ? node->getSize() : node->getSize() - TR::DataType::getZonedSignSize();
697
cg->genZeroLeftMostZonedBytes(node, targetReg, endByte, digitsToClear, generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, endByte));
698
targetReg->setDecimalPrecision(node->getDecimalPrecision());
699
}
700
701
int32_t endByteForDestSign = isDestTrailingSign ? TR::DataType::getZonedSignSize() : targetReg->getSize();
702
TR::MemoryReference *destSignCodeMR = generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, endByteForDestSign);
703
704
int32_t endByteForSourceSign = isDestTrailingSign ? (TR::DataType::getZonedSignSize() + TR::DataType::getZonedSignSize()) : TR::DataType::getZonedSignSize();
705
TR::MemoryReference *srcSignCodeMR = generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, endByteForSourceSign);
706
707
// no 'invalid sign' message is ever required for a setSign operation or when a known (but *not* assumed) sign is 0xc,0xd or 0xf
708
intptr_t litPoolOffset = 0;
709
if (isSetSign || (srcReg->hasKnownSignCode() && srcReg->knownSignIsEmbeddedPreferredOrUnsigned()))
710
{
711
int32_t signToSet = isSetSign ? sign :
712
TR::DataType::convertSignEncoding(TR::ZonedDecimal, node->getDataType(), srcReg->getKnownSignCode());
713
bool srcSignAlreadyZone = srcReg->knownOrAssumedSignIsZone(); // || targetReg->temporaryKnownSignCodeIs(TR::DataType::getZonedValue());
714
if (cg->traceBCDCodeGen())
715
traceMsg(comp,"\t%s case so gen MVI to set target sign to 0x%x (from source sign 0x%x) and do %sgen OI because srcReg->knownOrAssumedSignIsZone() = %s\n",
716
isSetSign?"isSetSign=true":"srcReg->hasKnownSignCode",
717
signToSet,
718
isSetSign?sign:srcReg->getKnownSignCode(),
719
srcSignAlreadyZone?"not ":"",
720
srcSignAlreadyZone?"true":"false");
721
722
TR_ASSERT(signToSet == TR::DataType::getZonedSeparatePlus() || signToSet == TR::DataType::getZonedSeparateMinus(),
723
"signToSet value should be 0x%x ('+') or 0x%x ('-') and not 0x%x\n", TR::DataType::getZonedSeparatePlus(), TR::DataType::getZonedSeparateMinus(), sign);
724
if (!srcSignAlreadyZone)
725
{
726
generateSIInstruction(cg, TR::InstOpCode::OI, node, srcSignCodeMR, TR::DataType::getZonedCode());
727
}
728
generateSIInstruction(cg, TR::InstOpCode::MVI, node, destSignCodeMR, (signToSet & 0xFF));
729
targetReg->setKnownSignCode(signToSet);
730
}
731
else if (srcReg->hasKnownCleanSign())
732
{
733
TR_ASSERT(TR::DataType::getZonedSeparatePlus() == 0x4E && TR::DataType::getZonedSeparateMinus() == 0x60, "zd2zdsxs sequence only works when plus sign is 0x4E and minus sign is 0x60\n");
734
TR::Register *tempReg1 = cg->allocateRegister(TR_GPR);
735
TR::Register *tempReg2 = cg->allocateRegister(TR_GPR);
736
737
generateRXInstruction(cg, TR::InstOpCode::IC, node, tempReg1, generateS390LeftAlignedMemoryReference(*srcSignCodeMR, node, 0, cg, srcSignCodeMR->getLeftMostByte()));
738
739
generateRIInstruction(cg, TR::InstOpCode::NILL, node, tempReg1, 0x10);
740
generateRSInstruction(cg, TR::InstOpCode::RLL, node, tempReg2, tempReg1, 29); // rotate right by 3 (32-3=29)
741
if (!targetReg->knownSignIsZone())
742
{
743
generateSIInstruction(cg, TR::InstOpCode::OI, node, generateS390LeftAlignedMemoryReference(*srcSignCodeMR, node, 0, cg, srcSignCodeMR->getLeftMostByte()), TR::DataType::getZonedCode());
744
}
745
generateRRInstruction(cg, TR::InstOpCode::OR, node, tempReg2, tempReg1);
746
generateRIInstruction(cg, TR::InstOpCode::AHI, node, tempReg2, 0x4E);
747
generateRXInstruction(cg, TR::InstOpCode::STC, node, tempReg2, destSignCodeMR);
748
cg->stopUsingRegister(tempReg1);
749
cg->stopUsingRegister(tempReg2);
750
targetReg->setHasKnownPreferredSign();
751
if (!isTruncation)
752
targetReg->setHasKnownCleanSign();
753
}
754
else
755
{
756
// DAA library assumes all BCD types are positive, unless an explicit negative sign code is present
757
TR::LabelSymbol * processSign = generateLabelSymbol(cg);
758
TR::LabelSymbol * processPositive = generateLabelSymbol(cg);
759
TR::LabelSymbol * processNegative = generateLabelSymbol(cg);
760
TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(cg);
761
762
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processSign);
763
processSign->setStartInternalControlFlow();
764
765
// A negative sign code is represented by 0xB and 0xD (1011 and 1101 in binary). Due to the
766
// symmetry in the binary encoding of the negative sign codes we can get away with two bit
767
// mask tests to check if a sign code is negative:
768
//
769
// Step 1 : Test if bit 0 and bit 3 are set
770
// Step 2 : Test if there is exactly one bit set from bit 1 and bit 2
771
772
// Step 1
773
generateSIInstruction(cg, TR::InstOpCode::TM, node, generateS390LeftAlignedMemoryReference(*srcSignCodeMR, node, 0, cg, srcSignCodeMR->getLeftMostByte()), 0x90);
774
775
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK12, node, processPositive);
776
777
// Step 2
778
generateSIInstruction(cg, TR::InstOpCode::TM, node, generateS390LeftAlignedMemoryReference(*srcSignCodeMR, node, 0, cg, srcSignCodeMR->getLeftMostByte()), 0x60);
779
780
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK9, node, processPositive);
781
782
// ----------------- Incoming branch -----------------
783
784
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processNegative);
785
786
// Patch in the preferred negative sign code
787
generateSIInstruction(cg, TR::InstOpCode::MVI, node, generateS390LeftAlignedMemoryReference(*destSignCodeMR, node, 0, cg, destSignCodeMR->getLeftMostByte()), TR::DataType::getZonedSeparateMinus());
788
789
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK15, node, cFlowRegionEnd);
790
791
// ----------------- Incoming branch -----------------
792
793
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processPositive);
794
795
// Patch in the preferred positive sign code
796
generateSIInstruction(cg, TR::InstOpCode::MVI, node, generateS390LeftAlignedMemoryReference(*destSignCodeMR, node, 0, cg, destSignCodeMR->getLeftMostByte()), TR::DataType::getZonedSeparatePlus());
797
798
// ----------------- Incoming branch -----------------
799
800
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd);
801
cFlowRegionEnd->setEndInternalControlFlow();
802
803
// Clear the embedded sign code of the source
804
TR::Instruction* cursor = generateSIInstruction(cg, TR::InstOpCode::OI, node, generateS390LeftAlignedMemoryReference(*srcSignCodeMR, node, 0, cg, srcSignCodeMR->getLeftMostByte()), TR::DataType::getZonedCode());
805
806
// Set up the proper register dependencies
807
TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);
808
809
if (srcSignCodeMR->getIndexRegister())
810
dependencies->addPostCondition(srcSignCodeMR->getIndexRegister(), TR::RealRegister::AssignAny);
811
812
if (srcSignCodeMR->getBaseRegister())
813
dependencies->addPostCondition(srcSignCodeMR->getBaseRegister(), TR::RealRegister::AssignAny);
814
815
if (destSignCodeMR->getIndexRegister())
816
dependencies->addPostConditionIfNotAlreadyInserted(destSignCodeMR->getIndexRegister(), TR::RealRegister::AssignAny);
817
818
if (destSignCodeMR->getBaseRegister())
819
dependencies->addPostConditionIfNotAlreadyInserted(destSignCodeMR->getBaseRegister(), TR::RealRegister::AssignAny);
820
821
cursor->setDependencyConditions(dependencies);
822
823
targetReg->setHasKnownPreferredSign();
824
}
825
}
826
827
/**
828
* Handles pd2zdsls,pd2zdsts,pd2zdslsSetSign,pd2zdstsSetSign
829
*/
830
TR::Register *
831
J9::Z::TreeEvaluator::pd2zdslsEvaluator(TR::Node * node, TR::CodeGenerator * cg)
832
{
833
cg->traceBCDEntry("pd2zdsls",node);
834
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),
835
1, TR::DebugCounter::Cheap);
836
TR_PseudoRegister *targetReg = cg->allocatePseudoRegister(node->getDataType());
837
TR::Node *child = node->getFirstChild();
838
TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);
839
childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);
840
TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(child, childReg->getStorageReference(), cg);
841
TR::MemoryReference *destMR = packedToZonedHelper(node, targetReg, sourceMR, childReg, cg);
842
zonedToZonedSeparateSignHelper(node, childReg, targetReg, sourceMR, destMR, cg);
843
cg->decReferenceCount(child);
844
if (node->getOpCode().isSetSign())
845
cg->decReferenceCount(node->getSecondChild());
846
node->setRegister(targetReg);
847
cg->traceBCDExit("pd2zdsls",node);
848
return targetReg;
849
}
850
851
void
852
J9::Z::TreeEvaluator::zonedSeparateSignToPackedOrZonedHelper(TR::Node *node, TR_PseudoRegister *targetReg, TR::MemoryReference *sourceMR, TR::MemoryReference *destMR, TR::CodeGenerator * cg)
853
{
854
TR_ASSERT( targetReg->isInitialized(),"targetRegister must be initialized before calling zonedSeparateSignToPackedOrZonedHelper\n");
855
TR::Node *srcNode = node->getFirstChild();
856
TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);
857
bool isTruncation = srcReg->getDecimalPrecision() > node->getDecimalPrecision();
858
bool isSrcTrailingSign = (srcNode->getDataType() == TR::ZonedDecimalSignTrailingSeparate);
859
int32_t sourceSignEndByte = isSrcTrailingSign ? TR::DataType::getZonedSignSize() : srcReg->getSize();
860
TR::Compilation *comp = cg->comp();
861
if (node->getOpCode().isSetSign())
862
{
863
TR::Node *signCodeNode = node->getSetSignValueNode();
864
TR_ASSERT( signCodeNode->getOpCode().isLoadConst(),"excepting zonedSeparateSignToPackedOrZonedHelper sign code to be a const\n");
865
int32_t sign = signCodeNode->get32bitIntegralValue();
866
if (sign == TR::DataType::getIgnoredSignCode())
867
{
868
// just check for an invalid sign but do not set anything in this case
869
if (cg->traceBCDCodeGen())
870
traceMsg(comp,"\tzonedSeparateSignToPackedOrZonedHelper %p : op %s, ignoredSetSign=true case, sign 0x%x\n",node,node->getOpCode().getName(),sign);
871
872
TR::LabelSymbol * returnLabel = generateLabelSymbol(cg);
873
TR::LabelSymbol * callLabel = generateLabelSymbol(cg);
874
875
TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);
876
TR::LabelSymbol * cflowRegionEnd = generateLabelSymbol(cg);
877
878
TR::RegisterDependencyConditions * deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);
879
880
if (sourceMR->getIndexRegister())
881
deps->addPostConditionIfNotAlreadyInserted(sourceMR->getIndexRegister(), TR::RealRegister::AssignAny);
882
if (sourceMR->getBaseRegister())
883
deps->addPostConditionIfNotAlreadyInserted(sourceMR->getBaseRegister(), TR::RealRegister::AssignAny);
884
885
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart, deps);
886
cFlowRegionStart->setStartInternalControlFlow();
887
888
if (cg->traceBCDCodeGen())
889
traceMsg(comp,"\t\ttargetReg->isInit=%s, targetRegSize=%d, targetRegPrec=%d, srcRegSize=%d, srcRegPrec=%d, sourceSignEndByte=%d\n",
890
targetReg->isInitialized()?"yes":"no",targetReg->getSize(),targetReg->getDecimalPrecision(),srcReg->getSize(),srcReg->getDecimalPrecision(),sourceSignEndByte);
891
892
generateSIInstruction(cg, TR::InstOpCode::CLI, node, generateS390LeftAlignedMemoryReference(*sourceMR, node, 0, cg, sourceSignEndByte), TR::DataType::getZonedSeparatePlus());
893
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK8, node, cflowRegionEnd);
894
895
896
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cflowRegionEnd, deps);
897
cflowRegionEnd->setEndInternalControlFlow();
898
899
targetReg->transferSignState(srcReg, isTruncation);
900
}
901
else
902
{
903
if (cg->traceBCDCodeGen())
904
traceMsg(comp,"\tzonedSeparateSignToPackedOrZonedHelper %p : op %s, setSign=true case, sign 0x%x\n",node,node->getOpCode().getName(),sign);
905
cg->genSignCodeSetting(node, targetReg, targetReg->getSize(), generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), sign, targetReg, 0, false /* !numericNibbleIsZero */);
906
}
907
}
908
else
909
{
910
TR::LabelSymbol * checkMinusLabel = generateLabelSymbol(cg);
911
TR::LabelSymbol * returnLabel = generateLabelSymbol(cg);
912
TR::LabelSymbol * callLabel = generateLabelSymbol(cg);
913
914
TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);
915
TR::LabelSymbol * cflowRegionEnd = generateLabelSymbol(cg);
916
917
TR::RegisterDependencyConditions * deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg);
918
919
if (sourceMR->getIndexRegister())
920
deps->addPostConditionIfNotAlreadyInserted(sourceMR->getIndexRegister(), TR::RealRegister::AssignAny);
921
if (sourceMR->getBaseRegister())
922
deps->addPostConditionIfNotAlreadyInserted(sourceMR->getBaseRegister(), TR::RealRegister::AssignAny);
923
924
if (destMR->getIndexRegister())
925
deps->addPostConditionIfNotAlreadyInserted(destMR->getIndexRegister(), TR::RealRegister::AssignAny);
926
if (destMR->getBaseRegister())
927
deps->addPostConditionIfNotAlreadyInserted(destMR->getBaseRegister(), TR::RealRegister::AssignAny);
928
929
930
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart, deps);
931
cFlowRegionStart->setStartInternalControlFlow();
932
933
if (cg->traceBCDCodeGen())
934
traceMsg(comp,"\tzonedSeparateSignToPackedOrZonedHelper %p : op %s, targetReg->isInit=%s, targetRegSize=%d, targetRegPrec=%d, srcRegSize=%d, srcRegPrec=%d, sourceSignEndByte=%d\n",
935
node,node->getOpCode().getName(),targetReg->isInitialized()?"yes":"no",targetReg->getSize(),targetReg->getDecimalPrecision(),srcReg->getSize(),srcReg->getDecimalPrecision(),sourceSignEndByte);
936
937
// DAA library assumes all BCD types are positive, unless an explicit negative sign code is present
938
generateSIInstruction(cg, TR::InstOpCode::CLI, node, generateS390LeftAlignedMemoryReference(*sourceMR, node, 0, cg, sourceSignEndByte), TR::DataType::getZonedSeparateMinus());
939
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC0, node, checkMinusLabel);
940
941
cg->genSignCodeSetting(node, NULL, targetReg->getSize(), generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), TR::DataType::getPreferredPlusCode(), targetReg, 0, false /* !numericNibbleIsZero */);
942
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cflowRegionEnd);
943
944
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, checkMinusLabel);
945
946
947
cg->genSignCodeSetting(node, NULL, targetReg->getSize(), generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), TR::DataType::getPreferredMinusCode(), targetReg, 0, false /* !numericNibbleIsZero */);
948
949
950
951
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cflowRegionEnd, deps);
952
cflowRegionEnd->setEndInternalControlFlow();
953
954
targetReg->setHasKnownPreferredSign();
955
}
956
}
957
958
/**
959
* Handles zdsls2pd,zdsts2pd
960
*/
961
TR::Register *
962
J9::Z::TreeEvaluator::zdsls2pdEvaluator(TR::Node * node, TR::CodeGenerator * cg)
963
{
964
cg->traceBCDEntry("zdsls2pd",node);
965
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),
966
1, TR::DebugCounter::Cheap);
967
TR_PseudoRegister *targetReg = cg->allocatePseudoRegister(node->getDataType());
968
TR::Node *child = node->getFirstChild();
969
TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);
970
childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);
971
TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(child, childReg->getStorageReference(), cg);
972
TR::MemoryReference *destMR = zonedToPackedHelper(node, targetReg, sourceMR, childReg, cg);
973
targetReg->resetSignState(); // the conversion operation is not complete yet so reset any sign state transferred in the zonedToPackedHelper
974
// zonedToPackedHelper with a separate sign source will pack a zone code into the packed sign code position so set the zone value on the
975
// targetReg to improve the zonedSeparateSignToPackedOrZonedHelper code generation
976
targetReg->setTemporaryKnownSignCode(TR::DataType::getZonedValue());
977
zonedSeparateSignToPackedOrZonedHelper(node, targetReg, sourceMR, destMR, cg);
978
cg->decReferenceCount(child);
979
if (node->getOpCode().isSetSign())
980
cg->decReferenceCount(node->getSecondChild());
981
node->setRegister(targetReg);
982
cg->traceBCDExit("zdsls2pd",node);
983
return targetReg;
984
}
985
986
/**
987
* Handles zdsls2zd,zdsts2zd
988
*/
989
TR::Register *
990
J9::Z::TreeEvaluator::zdsls2zdEvaluator(TR::Node * node, TR::CodeGenerator * cg)
991
{
992
cg->traceBCDEntry("zdsls2zd",node);
993
TR::Node *srcNode = node->getFirstChild();
994
TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);
995
996
bool isSetSign = node->getOpCode().isSetSign();
997
int32_t sign = 0;
998
TR::Node *signCodeNode = NULL;
999
TR::Compilation *comp = cg->comp();
1000
if (isSetSign)
1001
{
1002
signCodeNode = node->getSecondChild();
1003
TR_ASSERT( signCodeNode->getOpCode().isLoadConst(),"excepting zdsle2zdSetSign sign code to be a const\n");
1004
sign = signCodeNode->get32bitIntegralValue();
1005
}
1006
1007
bool isSrcTrailingSign = (srcNode->getDataType() == TR::ZonedDecimalSignTrailingSeparate);
1008
int32_t sourceOffset = 0;
1009
bool isTruncation = false;
1010
int32_t targetPrecision = srcReg->getDecimalPrecision();
1011
if (srcReg->getDecimalPrecision() > node->getDecimalPrecision()) // a truncation
1012
{
1013
isTruncation = true;
1014
sourceOffset = srcReg->getDecimalPrecision() - node->getDecimalPrecision(); // reach into the source by sourceOffset bytes to get the correct digits
1015
targetPrecision = node->getDecimalPrecision();
1016
}
1017
1018
bool isEffectiveNop = isZonedOperationAnEffectiveNop(node, 0, isTruncation, srcReg, isSetSign, sign, cg);
1019
TR_PseudoRegister *targetReg = NULL;
1020
TR::MemoryReference *sourceMR = NULL;
1021
TR::MemoryReference *destMR = NULL;
1022
if (isEffectiveNop)
1023
{
1024
targetReg = evaluateBCDSignModifyingOperand(node, isEffectiveNop, false, false, sourceMR, cg); // isNondestructiveNop=false,initTarget=false
1025
}
1026
else
1027
{
1028
targetReg = evaluateBCDValueModifyingOperand(node, false, sourceMR, cg); // initTarget=false
1029
sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcReg->getStorageReference(), cg);
1030
destMR = generateS390RightAlignedMemoryReference(node, targetReg->getStorageReference(), cg);
1031
}
1032
1033
targetReg->setDecimalPrecision(targetPrecision);
1034
bool isInitialized = targetReg->isInitialized();
1035
if (cg->traceBCDCodeGen())
1036
traceMsg(comp,"\tzdsls2zdEvaluator %p : op %s, isInitialized=%s, targetRegSize=%d, targetRegPrec=%d, srcRegSize=%d, srcRegPrec=%d, isEffectiveNop=%s (isSetSign %s, sign 0x%x)\n",
1037
node,node->getOpCode().getName(),isInitialized?"yes":"no",
1038
targetReg->getSize(),targetReg->getDecimalPrecision(),srcReg->getSize(),srcReg->getDecimalPrecision(),isEffectiveNop?"yes":"no",isSetSign?"yes":"no",sign);
1039
1040
if (!isEffectiveNop)
1041
{
1042
if (!isInitialized)
1043
{
1044
int32_t mvcSize = targetReg->getDecimalPrecision();
1045
int32_t srcEndByte = isSrcTrailingSign ? srcReg->getSize() : srcReg->getSize() - TR::DataType::getZonedSignSize();
1046
if (cg->traceBCDCodeGen())
1047
traceMsg(comp,"\tisInit=false so gen MVC to init with size=%d and sourceOffset=%d, srcEndByte=%d\n",mvcSize,sourceOffset,srcEndByte);
1048
generateSS1Instruction(cg, TR::InstOpCode::MVC, node,
1049
mvcSize-1,
1050
generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),
1051
generateS390LeftAlignedMemoryReference(*sourceMR, node, sourceOffset, cg, srcEndByte));
1052
targetReg->transferDataState(srcReg);
1053
targetReg->setIsInitialized();
1054
}
1055
targetReg->setTemporaryKnownSignCode(TR::DataType::getZonedValue());
1056
if (isInitialized && isSrcTrailingSign)
1057
{
1058
destMR->addToTemporaryNegativeOffset(node, -TR::DataType::getZonedSignSize(), cg);
1059
}
1060
zonedSeparateSignToPackedOrZonedHelper(node, targetReg, sourceMR, destMR, cg);
1061
}
1062
1063
if (isSrcTrailingSign)
1064
{
1065
if (isEffectiveNop)
1066
{
1067
targetReg->addToRightAlignedIgnoredBytes(TR::DataType::getZonedSignSize());
1068
if (cg->traceBCDCodeGen())
1069
traceMsg(comp,"\tisSrcTrailingSign=true and isEffectiveNop=true (zdsls2zd) : increment targetReg %s ignoredBytes %d -> %d (by the TR::DataType::getZonedSignSize())\n",
1070
cg->getDebug()->getName(targetReg),targetReg->getRightAlignedIgnoredBytes() - TR::DataType::getZonedSignSize(),targetReg->getRightAlignedIgnoredBytes());
1071
}
1072
else if (isInitialized)
1073
{
1074
targetReg->addToRightAlignedDeadBytes(TR::DataType::getZonedSignSize());
1075
if (cg->traceBCDCodeGen())
1076
traceMsg(comp,"\tisSrcTrailingSign=true and isInitialized=true (zdsls2zd) : increment targetReg %s deadBytes %d -> %d (by the TR::DataType::getZonedSignSize())\n",
1077
cg->getDebug()->getName(targetReg),targetReg->getRightAlignedDeadBytes() - TR::DataType::getZonedSignSize(),targetReg->getRightAlignedDeadBytes());
1078
}
1079
}
1080
1081
cg->decReferenceCount(srcNode);
1082
if (node->getOpCode().isSetSign())
1083
cg->decReferenceCount(node->getSecondChild());
1084
node->setRegister(targetReg);
1085
cg->traceBCDExit("zdsls2zd",node);
1086
return targetReg;
1087
}
1088
1089
/**
1090
* Handles zd2zdsls,zd2zdsts
1091
*/
1092
TR::Register *
1093
J9::Z::TreeEvaluator::zd2zdslsEvaluator(TR::Node * node, TR::CodeGenerator * cg)
1094
{
1095
cg->traceBCDEntry("zd2zdsls",node);
1096
TR::Compilation *comp = cg->comp();
1097
TR::Node *srcNode = node->getFirstChild();
1098
TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);
1099
1100
TR_StorageReference *srcStorageReference = srcReg->getStorageReference();
1101
TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcStorageReference, cg);
1102
1103
TR_PseudoRegister *targetReg = evaluateBCDValueModifyingOperand(node, false, sourceMR, cg); // initTarget=false
1104
TR::MemoryReference *destMR = generateS390RightAlignedMemoryReference(node, targetReg->getStorageReference(), cg);
1105
1106
bool isTrailingSign = (node->getDataType() == TR::ZonedDecimalSignTrailingSeparate);
1107
1108
if (cg->traceBCDCodeGen())
1109
traceMsg(comp,"\tzd2zdslsEvaluator %p : op %s, targetReg->isInit=%s, targetRegSize=%d, targetRegPrec=%d\n",
1110
node,node->getOpCode().getName(),targetReg->isInitialized()?"yes":"no",targetReg->getSize(),targetReg->getDecimalPrecision());
1111
1112
bool isTruncation = node->getDecimalPrecision() < srcReg->getDecimalPrecision();
1113
TR_ASSERT( !isTruncation,"a zd2zdsxs operation should not truncate\n");
1114
1115
if (cg->traceBCDCodeGen())
1116
traceMsg(comp,"\tset targetReg->prec to srcReg->prec %d\n",srcReg->getDecimalPrecision());
1117
targetReg->setDecimalPrecision(srcReg->getDecimalPrecision());
1118
1119
// the (targetReg->isInitialized() && isTrailingSign) case below is needed to move the initialized data left by 1 byte to make room for the trailing separate sign code
1120
if (!targetReg->isInitialized() || (targetReg->isInitialized() && isTrailingSign))
1121
{
1122
int32_t mvcSize = srcReg->getSize();
1123
if (cg->traceBCDCodeGen())
1124
traceMsg(comp,"\t%s so gen MVC to init with size %d\n",!targetReg->isInitialized()?"isInit=false":"isInit=true and isTrailingSign=true", mvcSize);
1125
generateSS1Instruction(cg, TR::InstOpCode::MVC, node,
1126
mvcSize-1,
1127
generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, isTrailingSign ? srcReg->getSize() + TR::DataType::getZonedSignSize() : srcReg->getSize()),
1128
generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));
1129
targetReg->setIsInitialized();
1130
}
1131
1132
zonedToZonedSeparateSignHelper(node, srcReg, targetReg, sourceMR, destMR, cg);
1133
1134
cg->decReferenceCount(srcNode);
1135
if (node->getOpCode().isSetSign())
1136
cg->decReferenceCount(node->getSecondChild());
1137
node->setRegister(targetReg);
1138
cg->traceBCDExit("zd2zdsls",node);
1139
return targetReg;
1140
}
1141
1142
/**
1143
* Handles zdsle2zd,zd2zdsle
1144
*/
1145
TR::Register *
1146
J9::Z::TreeEvaluator::zdsle2zdEvaluator(TR::Node * node, TR::CodeGenerator * cg)
1147
{
1148
cg->traceBCDEntry("zdsle2zd",node);
1149
TR::Node *srcNode = node->getFirstChild();
1150
TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);
1151
1152
bool isSetSign = node->getOpCode().isSetSign();
1153
int32_t sign = 0;
1154
TR::Node *signCodeNode = NULL;
1155
TR::Compilation *comp = cg->comp();
1156
if (isSetSign)
1157
{
1158
signCodeNode = node->getSecondChild();
1159
TR_ASSERT(signCodeNode->getOpCode().isLoadConst(),"excepting zdsle2zdSetSign sign code to be a const\n");
1160
sign = signCodeNode->get32bitIntegralValue();
1161
}
1162
bool isTrailingDst = node->getDataType() == TR::ZonedDecimal;
1163
bool isLeadingDst = !isTrailingDst;
1164
bool isTrailingSrc = srcNode->getDataType() == TR::ZonedDecimal;
1165
bool isLeadingSrc = !isTrailingSrc;
1166
1167
bool isTruncation = false;
1168
int32_t digitsToClear = 0;
1169
if (node->getDecimalPrecision() < srcReg->getDecimalPrecision())
1170
isTruncation = true;
1171
else if (node->getDecimalPrecision() > srcReg->getDecimalPrecision())
1172
digitsToClear = node->getDecimalPrecision()-srcReg->getDecimalPrecision();
1173
1174
bool isEffectiveNop = isZonedOperationAnEffectiveNop(node, 0, isTruncation, srcReg, isSetSign, sign, cg);
1175
bool isNondestructiveNop = isEffectiveNop && !isTruncation;
1176
bool doWidening = true;
1177
1178
if (cg->traceBCDCodeGen())
1179
traceMsg(comp,"\tzdsle2zdEvaluator %p : op %s, isEffectiveNop=%s, isTruncation=%s, srcSignIsZone=%s, srcReg->getSize()=%d, (isSetSign=%s, sign 0x%x)\n",
1180
node,node->getOpCode().getName(),isEffectiveNop?"yes":"no",isTruncation?"yes":"no",srcReg->knownOrAssumedSignIsZone()?"yes":"no",srcReg->getSize(),isSetSign?"yes":"no",sign);
1181
1182
TR::MemoryReference *sourceMR = NULL;
1183
TR_PseudoRegister *targetReg = NULL;
1184
if (!isEffectiveNop &&
1185
isLeadingDst && // only do for leading sign so the sign code doesn't have to be moved again later
1186
doWidening &&
1187
digitsToClear > 0)
1188
{
1189
sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcReg->getStorageReference(), cg);
1190
targetReg = evaluateBCDValueModifyingOperand(node, true, sourceMR, cg); // initTarget=true
1191
if (cg->traceBCDCodeGen())
1192
traceMsg(comp,"\tperform an explicit widening (digitsToClear=%d, doWidening=yes, isEffectiveNop=no) set targetReg->prec to node->prec %d\n",digitsToClear,node->getDecimalPrecision());
1193
targetReg->setDecimalPrecision(node->getDecimalPrecision());
1194
}
1195
else
1196
{
1197
if (!isEffectiveNop)
1198
sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcReg->getStorageReference(), cg);
1199
targetReg = evaluateBCDSignModifyingOperand(node, isEffectiveNop, isNondestructiveNop, true /*initTarget*/, sourceMR, cg);
1200
int32_t targetPrecision = isTruncation ? node->getDecimalPrecision() : srcReg->getDecimalPrecision();
1201
if (cg->traceBCDCodeGen())
1202
traceMsg(comp,"\tdo not perform an explicit widening (set digitsToClear=%d->0, doWidening=%s, isEffectiveNop=%s) set targetReg->prec to %d\n",
1203
digitsToClear,doWidening?"yes":"no",isEffectiveNop ?"yes":"no",targetPrecision);
1204
digitsToClear = 0;
1205
targetReg->setDecimalPrecision(targetPrecision);
1206
}
1207
1208
if (!isEffectiveNop)
1209
{
1210
TR::MemoryReference *destMR = isTrailingDst ? generateS390RightAlignedMemoryReference(node, targetReg->getStorageReference(), cg) :
1211
generateS390LeftAlignedMemoryReference(node, targetReg->getStorageReference(), cg, targetReg->getSize());
1212
int32_t clearLeftMostByte = targetReg->getSize();
1213
if (isSetSign)
1214
{
1215
if (sign == TR::DataType::getIgnoredSignCode())
1216
{
1217
if (cg->traceBCDCodeGen()) traceMsg(comp,"\tisSetSign=true with ignored sign=0x%x\n",sign);
1218
if (isTrailingDst) // zdsle2zd
1219
{
1220
if (srcReg->getSize() == 1)
1221
targetReg->transferSignState(srcReg, isTruncation);
1222
else
1223
targetReg->setKnownSignCode(TR::DataType::getZonedValue());
1224
}
1225
else // zd2zdsle
1226
{
1227
if (targetReg->getSize() == 1)
1228
targetReg->transferSignState(srcReg, isTruncation);
1229
else if (targetReg->getSize() > srcReg->getSize()) // a widening in the leadingDst and ignored case leaves a bad sign code
1230
targetReg->setHasKnownBadSignCode();
1231
else
1232
targetReg->setKnownSignCode(TR::DataType::getZonedValue());
1233
}
1234
}
1235
else
1236
{
1237
if (cg->traceBCDCodeGen()) traceMsg(comp,"\tisSetSign=true : call genSignCodeSetting with sign=0x%x\n",sign);
1238
bool numericNibbleIsZero = false;
1239
if (isTrailingDst) // zdsle2zd
1240
{
1241
// bytes above the leftmost one have a top nibble of 0xf so use this knowledge to improve the sign code setting
1242
if (srcReg->getSize() == 1)
1243
targetReg->transferSignState(srcReg, isTruncation);
1244
else
1245
targetReg->setTemporaryKnownSignCode(TR::DataType::getZonedValue());
1246
}
1247
else // zd2zdsle
1248
{
1249
// when not performing an explicit widening then the bytes above the first one have a top nibble of 0xf so use this knowledge to improve the sign code setting
1250
if (targetReg->getSize() == 1)
1251
targetReg->transferSignState(srcReg, isTruncation);
1252
else if (targetReg->getSize() <= srcReg->getSize())
1253
targetReg->setTemporaryKnownSignCode(TR::DataType::getZonedValue());
1254
1255
if (digitsToClear > 0)
1256
{
1257
numericNibbleIsZero = true;
1258
digitsToClear--;
1259
clearLeftMostByte--;
1260
}
1261
}
1262
int32_t digitsCleared = cg->genSignCodeSetting(node, targetReg, targetReg->getSize(), destMR, sign, targetReg, 0, numericNibbleIsZero);
1263
TR_ASSERT(!numericNibbleIsZero || digitsCleared == 1,"the sign code setting should have also cleared 1 digit (digitsCleared = %d)\n",digitsCleared);
1264
}
1265
}
1266
1267
if (digitsToClear > 0)
1268
{
1269
cg->genZeroLeftMostZonedBytes(node, targetReg, clearLeftMostByte, digitsToClear, destMR);
1270
}
1271
1272
if (!isSetSign)
1273
{
1274
if (cg->traceBCDCodeGen()) traceMsg(comp,"\tisSetSign=false : generate MVZ of size 1 to transfer left aligned zdsle sign to right aligned zd sign position\n");
1275
1276
sourceMR = isTrailingSrc ? reuseS390RightAlignedMemoryReference(sourceMR, srcNode, srcReg->getStorageReference(), cg) :
1277
reuseS390LeftAlignedMemoryReference(sourceMR, srcNode, srcReg->getStorageReference(), cg, srcReg->getSize());
1278
destMR = isTrailingDst ? reuseS390RightAlignedMemoryReference(destMR, node, targetReg->getStorageReference(), cg) :
1279
reuseS390LeftAlignedMemoryReference(destMR, node, targetReg->getStorageReference(), cg, targetReg->getSize());
1280
int32_t mvzSize = 1;
1281
generateSS1Instruction(cg, TR::InstOpCode::MVZ, node,
1282
mvzSize-1,
1283
destMR,
1284
sourceMR);
1285
targetReg->transferSignState(srcReg, isTruncation);
1286
}
1287
1288
bool srcSignWillBeIgnored = false;
1289
bool srcSignResetRedundant = srcReg->knownOrAssumedSignIsZone() || (isLeadingSrc && isTruncation);
1290
bool srcSignResetIllegal = targetReg->getSize() == 1;
1291
1292
if (cg->traceBCDCodeGen())
1293
traceMsg(comp,"\tcheck before resetting srcSignCode: srcSignWillBeIgnored %s, srcSignResetRedundant %s, srcSignResetIllegal %s\n",
1294
srcSignWillBeIgnored?"yes":"no",srcSignResetRedundant?"yes":"no",srcSignResetIllegal?"yes":"no");
1295
if (!(srcSignWillBeIgnored || srcSignResetRedundant || srcSignResetIllegal))
1296
{
1297
{
1298
if (cg->traceBCDCodeGen()) traceMsg(comp,"\tgenerate OI 0xF0 to force %s-aligned high nibble to 0xF\n",isTrailingSrc?"right":"left");
1299
generateSIInstruction(cg, TR::InstOpCode::OI, node, generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, isTrailingSrc ? 1 : targetReg->getSize()), TR::DataType::getZonedCode());
1300
}
1301
}
1302
targetReg->setIsInitialized();
1303
}
1304
1305
cg->decReferenceCount(srcNode);
1306
if (isSetSign)
1307
cg->decReferenceCount(signCodeNode);
1308
node->setRegister(targetReg);
1309
cg->traceBCDExit("zdsle2zd",node);
1310
return targetReg;
1311
}
1312
1313
TR::MemoryReference *
1314
J9::Z::TreeEvaluator::zonedToPackedHelper(TR::Node *node, TR_PseudoRegister *targetReg, TR::MemoryReference *sourceMR, TR_PseudoRegister *childReg, TR::CodeGenerator * cg)
1315
{
1316
TR::Node *child = node->getFirstChild();
1317
TR_StorageReference *hint = node->getStorageReferenceHint();
1318
TR_StorageReference *targetStorageReference = NULL;
1319
int32_t destPrecision = 0;
1320
int32_t destSize = 0;
1321
TR::Compilation *comp = cg->comp();
1322
if (hint)
1323
{
1324
TR_ASSERT( !childReg->isInitialized() || hint != childReg->getStorageReference(),"bcd conversion operands will overlap\n");
1325
destSize = hint->getSymbolSize(); // may be larger than the node->getSize() so take this opportunity to widen as part of the PACK
1326
destPrecision = TR::DataType::getBCDPrecisionFromSize(node->getDataType(), destSize); // may be larger than the node->getSize() so take this opportunity to widen as part of the PACK
1327
targetStorageReference = hint;
1328
}
1329
else
1330
{
1331
destSize = node->getSize();
1332
destPrecision = node->getDecimalPrecision();
1333
targetStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(destSize, comp);
1334
}
1335
1336
targetReg->setStorageReference(targetStorageReference, node);
1337
1338
int32_t sourcePrecision = childReg->getDecimalPrecision();
1339
bool isTruncation = false;
1340
int32_t sourceOffsetForLeftAlignment = 0;
1341
1342
if (cg->traceBCDCodeGen())
1343
traceMsg(comp,"\tzonedToPackedHelper %p : op %s, destPrecision %d, destSize %d, sourcePrecision %d, sourceSize %d\n",
1344
node,node->getOpCode().getName(),destPrecision,destSize,sourcePrecision,childReg->getSize());
1345
1346
if (node->getDecimalPrecision() < sourcePrecision)
1347
{
1348
if (cg->traceBCDCodeGen())
1349
traceMsg(comp,"\tnodePrec <= sourcePrecision (%d <= %d) so set sourcePrecision=nodePrec=%d,isTruncation=true,sourceOffsetForLeftAlignment=%d\n",
1350
node->getDecimalPrecision(),sourcePrecision,node->getDecimalPrecision(),sourcePrecision - node->getDecimalPrecision());
1351
sourceOffsetForLeftAlignment = sourcePrecision - node->getDecimalPrecision();
1352
sourcePrecision = node->getDecimalPrecision();
1353
isTruncation = true;
1354
}
1355
1356
TR::MemoryReference *destMR = NULL;
1357
if (destSize > 16)
1358
{
1359
if (cg->traceBCDCodeGen())
1360
traceMsg(comp,"\tdestSize %d > 16 so reduce destSize to 16 and destPrecision to 31 for PACK encoding and clear top %d byte(s)\n",destSize,(destSize-16));
1361
destMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);
1362
cg->genZeroLeftMostPackedDigits(node, targetReg, destSize, (destSize-16)*2, destMR);
1363
destSize = 16;
1364
destPrecision = 31;
1365
}
1366
1367
if (cg->traceBCDCodeGen())
1368
traceMsg(comp,"\tsetting targetReg->prec to sourcePrecision %d\n",sourcePrecision);
1369
targetReg->setDecimalPrecision(sourcePrecision);
1370
1371
// skip over trailing sign for the unpack
1372
bool isSrcTrailingSign = (child->getDataType() == TR::ZonedDecimalSignTrailingSeparate);
1373
int32_t sourceEndByte = isSrcTrailingSign ? sourcePrecision + TR::DataType::getZonedSignSize() :
1374
sourcePrecision;
1375
1376
if (sourcePrecision <= 16)
1377
{
1378
if (cg->traceBCDCodeGen())
1379
traceMsg(comp,"\tsourcePrecision %d <= 16 so generate a single PACK destSize %d, sourcePrecision %d, sourceEndByte %d\n",sourcePrecision,destSize,sourcePrecision,sourceEndByte);
1380
destMR = reuseS390RightAlignedMemoryReference(destMR, node, targetStorageReference, cg);
1381
generateSS2Instruction(cg, TR::InstOpCode::PACK, node,
1382
destSize-1,
1383
destMR,
1384
sourcePrecision-1,
1385
generateS390LeftAlignedMemoryReference(*sourceMR, node, 0, cg, sourceEndByte));
1386
int32_t destSizeAsCeilingPrecision = TR::DataType::byteLengthToPackedDecimalPrecisionCeiling(destSize);
1387
if (destSizeAsCeilingPrecision > sourcePrecision)
1388
targetReg->addRangeOfZeroDigits(sourcePrecision, destSizeAsCeilingPrecision);
1389
}
1390
else if (sourcePrecision >= 17 && sourcePrecision <= 31)
1391
{
1392
if (cg->traceBCDCodeGen())
1393
{
1394
if (sourcePrecision >= 17 && sourcePrecision <= 30)
1395
traceMsg(comp,"\tsourcePrecision 17 <= %d <= 30 so generate two PACKs with sourceEndByte %d\n",sourcePrecision,sourceEndByte);
1396
else
1397
traceMsg(comp,"\tsourcePrecision == 31 so generate three PACKs with sourceEndByte %d\n",sourceEndByte);
1398
}
1399
bool needsThirdPack = false;
1400
if (sourcePrecision == 31)
1401
{
1402
sourcePrecision = 29; // The first two PACKs for the sourcePrecision=31 case are the same as for the sourcePrecision=29 case
1403
destPrecision = 29;
1404
needsThirdPack = true;
1405
if (cg->traceBCDCodeGen())
1406
traceMsg(comp,"\tsourcePrecision == 31 so reduce sourcePrecision and destPrecision to 29 and update sourceEndByte to %d\n",sourceEndByte);
1407
}
1408
1409
if (cg->traceBCDCodeGen())
1410
traceMsg(comp,"x^x : found large packed/zoned conv -- node %s (%p) prec %d, child %s (%p) prec %d (three=%s)\n",
1411
node->getOpCode().getName(),node,destPrecision,
1412
child->getOpCode().getName(),child,sourcePrecision,needsThirdPack?"yes":"no");
1413
1414
destMR = reuseS390LeftAlignedMemoryReference(destMR, node, targetStorageReference, cg, destSize);
1415
sourceMR = generateS390LeftAlignedMemoryReference(*sourceMR, node, 0, cg, sourceEndByte);
1416
int32_t pack1SourceSize = sourcePrecision-14;
1417
int32_t pack1DestSize = TR::DataType::getSizeFromBCDPrecision(node->getDataType(), destPrecision-14);
1418
if (cg->traceBCDCodeGen())
1419
traceMsg(comp,"\t\t1st PACK destSize=%d,srcSize=%d\n",pack1DestSize,pack1SourceSize);
1420
generateSS2Instruction(cg, TR::InstOpCode::PACK, node,
1421
pack1DestSize-1,
1422
destMR,
1423
pack1SourceSize-1,
1424
sourceMR);
1425
int32_t pack1DestSizeAsPrecision = TR::DataType::byteLengthToPackedDecimalPrecisionCeiling(pack1DestSize);
1426
if (pack1DestSizeAsPrecision > pack1SourceSize)
1427
{
1428
int32_t rightMostDigits = (destSize-pack1DestSize)*2;
1429
targetReg->addRangeOfZeroDigits(pack1SourceSize+rightMostDigits, pack1DestSizeAsPrecision+rightMostDigits);
1430
}
1431
int32_t pack2SourceSize = 15;
1432
int32_t pack2SourceOffset = pack1SourceSize-1;
1433
int32_t pack2DestSize = TR::DataType::getSizeFromBCDPrecision(node->getDataType(), pack2SourceSize);
1434
int32_t pack2DestOffset = pack1DestSize-1;
1435
if (cg->traceBCDCodeGen())
1436
traceMsg(comp,"\t\t2nd PACK destSize=%d,destOffset=%d, srcSize=%d,srcOffset=%d\n",pack2DestSize,pack2DestOffset,pack2SourceSize,pack2SourceOffset);
1437
generateSS2Instruction(cg, TR::InstOpCode::PACK, node,
1438
pack2DestSize-1,
1439
generateS390LeftAlignedMemoryReference(*destMR, node, pack2DestOffset, cg, destMR->getLeftMostByte()),
1440
pack2SourceSize-1,
1441
generateS390LeftAlignedMemoryReference(*sourceMR, node, pack2SourceOffset, cg, sourceMR->getLeftMostByte()));
1442
if (needsThirdPack)
1443
{
1444
int32_t pack3SourceSize = 3;
1445
int32_t pack3SourceOffset = pack2SourceOffset+(pack2SourceSize-1);
1446
int32_t pack3DestSize = TR::DataType::getSizeFromBCDPrecision(node->getDataType(), pack3SourceSize);
1447
int32_t pack3DestOffset = pack2DestOffset+(pack2DestSize-1);
1448
if (cg->traceBCDCodeGen())
1449
traceMsg(comp,"\t\t3rd PACK destSize=%d,destOffset=%d, srcSize=%d,srcOffset=%d\n",pack3DestSize,pack3DestOffset,pack3SourceSize,pack3SourceOffset);
1450
generateSS2Instruction(cg, TR::InstOpCode::PACK, node,
1451
pack3DestSize-1,
1452
generateS390LeftAlignedMemoryReference(*destMR, node, pack3DestOffset, cg, destMR->getLeftMostByte()),
1453
pack3SourceSize-1,
1454
generateS390LeftAlignedMemoryReference(*sourceMR, node, pack3SourceOffset, cg, sourceMR->getLeftMostByte()));
1455
}
1456
}
1457
else
1458
{
1459
TR_ASSERT(false,"zd2pd unexpected sourcePrecision %d\n",sourcePrecision);
1460
}
1461
1462
TR::Register* signCode = cg->allocateRegister();
1463
TR::Register* signCode4Bit = cg->allocateRegister();
1464
1465
TR::LabelSymbol * processSign = generateLabelSymbol(cg);
1466
TR::LabelSymbol * processSignEnd = generateLabelSymbol(cg);
1467
TR::LabelSymbol * processNegative = generateLabelSymbol(cg);
1468
TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(cg);
1469
1470
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processSign);
1471
processSign->setStartInternalControlFlow();
1472
1473
// Load the sign byte of the Packed Decimal from memory
1474
generateRXInstruction(cg, TR::InstOpCode::LLC, node, signCode, generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, 1));
1475
1476
generateRRInstruction(cg, TR::InstOpCode::LR, node, signCode4Bit, signCode);
1477
1478
// Clear most significant 4 bits
1479
generateRIInstruction(cg, TR::InstOpCode::NILL, node, signCode4Bit, 0x000F);
1480
1481
// Compare the sign byte against the preferred negative sign code
1482
generateRIInstruction(cg, TR::InstOpCode::CHI, node, signCode4Bit, TR::DataType::getPreferredMinusCode());
1483
1484
// Branch if equal
1485
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK8, node, cFlowRegionEnd);
1486
1487
// Clear least significant 4 bits
1488
generateRIInstruction(cg, TR::InstOpCode::NILL, node, signCode, 0x00F0);
1489
1490
// Compare the sign byte against the alternative negative sign code
1491
generateRIInstruction(cg, TR::InstOpCode::CHI, node, signCode4Bit, TR::DataType::getAlternateMinusCode());
1492
1493
// Branch if equal
1494
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK8, node, processNegative);
1495
1496
// Patch in the preferred positive sign code
1497
generateRIInstruction(cg, TR::InstOpCode::OILL, node, signCode, TR::DataType::getPreferredPlusCode());
1498
1499
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK15, node, processSignEnd);
1500
1501
// ----------------- Incoming branch -----------------
1502
1503
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processNegative);
1504
1505
// Patch in the preferred negative sign code
1506
generateRIInstruction(cg, TR::InstOpCode::OILL, node, signCode, TR::DataType::getPreferredMinusCode());
1507
1508
// ----------------- Incoming branch -----------------
1509
1510
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processSignEnd);
1511
1512
generateRXInstruction(cg, TR::InstOpCode::STC, node, signCode, generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, 1));
1513
1514
// Set up the proper register dependencies
1515
TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);
1516
1517
dependencies->addPostCondition(signCode, TR::RealRegister::AssignAny);
1518
dependencies->addPostCondition(signCode4Bit, TR::RealRegister::AssignAny);
1519
1520
if (destMR->getIndexRegister())
1521
dependencies->addPostCondition(destMR->getIndexRegister(), TR::RealRegister::AssignAny);
1522
1523
if (destMR->getBaseRegister())
1524
dependencies->addPostCondition(destMR->getBaseRegister(), TR::RealRegister::AssignAny);
1525
1526
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
1527
cFlowRegionEnd->setEndInternalControlFlow();
1528
1529
// Cleanup registers before returning
1530
cg->stopUsingRegister(signCode);
1531
cg->stopUsingRegister(signCode4Bit);
1532
1533
targetReg->transferSignState(childReg, isTruncation);
1534
targetReg->transferDataState(childReg);
1535
targetReg->setIsInitialized();
1536
node->setRegister(targetReg);
1537
return destMR;
1538
}
1539
1540
TR::Register *
1541
J9::Z::TreeEvaluator::zd2pdEvaluator(TR::Node * node, TR::CodeGenerator * cg)
1542
{
1543
cg->traceBCDEntry("zd2pd",node);
1544
TR::Register* targetReg = NULL;
1545
1546
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
1547
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
1548
{
1549
targetReg = zd2pdVectorEvaluatorHelper(node, cg);
1550
}
1551
else
1552
{
1553
targetReg = cg->allocatePseudoRegister(node->getDataType());
1554
TR::Node *child = node->getFirstChild();
1555
TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);
1556
childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);
1557
TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(child, childReg->getStorageReference(), cg);
1558
zonedToPackedHelper(node, static_cast<TR_PseudoRegister*>(targetReg), sourceMR, childReg, cg);
1559
cg->decReferenceCount(child);
1560
node->setRegister(targetReg);
1561
}
1562
1563
cg->traceBCDExit("zd2pd",node);
1564
return targetReg;
1565
}
1566
1567
/**
1568
* 1. Get zd value by evaluating child node. It's in zdNode's PseudoRegister
1569
* 2. Get the memory reference from the pseudo register.
1570
* 3. Allocate Vector register to return
1571
* 4. get size of the node( node->getsize)
1572
* 5. generateVSI instruction using the information above.
1573
* 6. attach Vector register to the node.
1574
* 7. decReference BCD node for the child/
1575
* 8. return targetRegister.
1576
*/
1577
TR::Register *
1578
J9::Z::TreeEvaluator::zd2pdVectorEvaluatorHelper(TR::Node * node, TR::CodeGenerator * cg)
1579
{
1580
TR::Register *targetReg = NULL;
1581
1582
TR::Node *child = node->getFirstChild();
1583
TR_PseudoRegister *sourceReg = cg->evaluateBCDNode(child);
1584
sourceReg = cg->privatizeBCDRegisterIfNeeded(node, child, sourceReg);
1585
TR::MemoryReference *sourceMR = generateS390LeftAlignedMemoryReference(child, sourceReg->getStorageReference(), cg, child->getDecimalPrecision());
1586
targetReg = cg->allocateRegister(TR_VRF);
1587
int32_t destPrecision = std::min(node->getDecimalPrecision(), child->getDecimalPrecision());
1588
generateVSIInstruction(cg, TR::InstOpCode::VPKZ, node, targetReg, sourceMR, destPrecision - 1);
1589
1590
node->setRegister(targetReg);
1591
cg->decReferenceCount(child);
1592
return targetReg;
1593
}
1594
1595
/**
1596
* \brief Check the sign of zd after pd2zd conversion.
1597
*
1598
* The UNPK instruction does not validate the digits nor the sign of the packed decimal.
1599
* We need to check the sign of PD and set ZD signs properly: use 0xc for positive, and 0xd for negative numbers.
1600
*
1601
*/
1602
void
1603
J9::Z::TreeEvaluator::pd2zdSignFixup(TR::Node *node,
1604
TR::MemoryReference *destMR,
1605
TR::CodeGenerator * cg,
1606
bool useLeftAlignedMR)
1607
{
1608
TR::Register* signCode = cg->allocateRegister();
1609
TR::Register* signCode4Bit = cg->allocateRegister();
1610
1611
TR::LabelSymbol * processSign = generateLabelSymbol(cg);
1612
TR::LabelSymbol * processSignEnd = generateLabelSymbol(cg);
1613
TR::LabelSymbol * processNegative = generateLabelSymbol(cg);
1614
TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(cg);
1615
1616
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processSign);
1617
processSign->setStartInternalControlFlow();
1618
1619
TR::MemoryReference* signByteMR = NULL;
1620
if (useLeftAlignedMR)
1621
signByteMR = generateS390LeftAlignedMemoryReference(*destMR, node, 0, cg, 1);
1622
else
1623
signByteMR = generateS390MemoryReference(*destMR, (node->getSecondChild())->getDecimalPrecision() - 1, cg);
1624
1625
// Load the sign byte of the Zoned Decimal from memory
1626
generateRXInstruction(cg, TR::InstOpCode::LLC, node, signCode, signByteMR);
1627
1628
generateRRInstruction(cg, TR::InstOpCode::LR, node, signCode4Bit, signCode);
1629
1630
// Clear least significant 4 bits
1631
generateRIInstruction(cg, TR::InstOpCode::NILL, node, signCode4Bit, 0x00F0);
1632
1633
// Compare the sign byte against the preferred negative sign code
1634
generateRIInstruction(cg, TR::InstOpCode::CHI, node, signCode4Bit, TR::DataType::getPreferredMinusCode() << 4);
1635
1636
// Branch if equal
1637
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK8, node, cFlowRegionEnd);
1638
1639
// Clear most significant 4 bits
1640
generateRIInstruction(cg, TR::InstOpCode::NILL, node, signCode, 0x000F);
1641
1642
// Compare the sign byte against the alternative negative sign code
1643
generateRIInstruction(cg, TR::InstOpCode::CHI, node, signCode4Bit, TR::DataType::getAlternateMinusCode() << 4);
1644
1645
// Branch if equal
1646
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK8, node, processNegative);
1647
1648
// Patch in the preferred positive sign code
1649
generateRIInstruction(cg, TR::InstOpCode::OILL, node, signCode, TR::DataType::getPreferredPlusCode() << 4);
1650
1651
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK15, node, processSignEnd);
1652
1653
// ----------------- Incoming branch -----------------
1654
1655
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processNegative);
1656
1657
// Patch in the preferred negative sign code
1658
generateRIInstruction(cg, TR::InstOpCode::OILL, node, signCode, TR::DataType::getPreferredMinusCode() << 4);
1659
1660
// ----------------- Incoming branch -----------------
1661
1662
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processSignEnd);
1663
1664
generateRXInstruction(cg, TR::InstOpCode::STC, node, signCode, generateS390MemoryReference(*signByteMR, 0, cg));
1665
1666
// Set up the proper register dependencies
1667
TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);
1668
1669
dependencies->addPostCondition(signCode, TR::RealRegister::AssignAny);
1670
dependencies->addPostCondition(signCode4Bit, TR::RealRegister::AssignAny);
1671
1672
if (destMR->getIndexRegister())
1673
dependencies->addPostCondition(destMR->getIndexRegister(), TR::RealRegister::AssignAny);
1674
1675
if (destMR->getBaseRegister())
1676
dependencies->addPostCondition(destMR->getBaseRegister(), TR::RealRegister::AssignAny);
1677
1678
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
1679
cFlowRegionEnd->setEndInternalControlFlow();
1680
1681
// Cleanup registers before returning
1682
cg->stopUsingRegister(signCode);
1683
cg->stopUsingRegister(signCode4Bit);
1684
}
1685
1686
TR::MemoryReference *
1687
J9::Z::TreeEvaluator::packedToZonedHelper(TR::Node *node, TR_PseudoRegister *targetReg, TR::MemoryReference *sourceMR, TR_PseudoRegister *childReg, TR::CodeGenerator * cg)
1688
{
1689
TR::Node *child = node->getFirstChild();
1690
TR::Compilation *comp = cg->comp();
1691
1692
TR_StorageReference *hint = node->getStorageReferenceHint();
1693
TR_StorageReference *targetStorageReference = NULL;
1694
int32_t destSize = 0;
1695
if (hint)
1696
{
1697
TR_ASSERT( !childReg->isInitialized() || hint != childReg->getStorageReference(),"bcd conversion operands will overlap\n");
1698
destSize = hint->getSymbolSize(); // may be larger than the node->getSize() so take this opportunity to widen as part of the UNPK
1699
targetStorageReference = hint;
1700
}
1701
else
1702
{
1703
destSize = node->getSize();
1704
targetStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(destSize, comp);
1705
}
1706
1707
targetReg->setStorageReference(targetStorageReference, node);
1708
1709
int32_t destPrecision = TR::DataType::getBCDPrecisionFromSize(node->getDataType(), destSize);
1710
// int32_t destPrecision = destSize;
1711
targetReg->setDecimalPrecision(destPrecision);
1712
int32_t sourcePrecision = childReg->getDecimalPrecision();
1713
int32_t sourceSize = childReg->getSize();
1714
1715
// skip over trailing sign for the unpack
1716
bool isDestTrailingSign = (node->getDataType() == TR::ZonedDecimalSignTrailingSeparate);
1717
int32_t destEndByte = isDestTrailingSign ? destPrecision + TR::DataType::getZonedSignSize() :
1718
destPrecision;
1719
1720
if (cg->traceBCDCodeGen())
1721
traceMsg(comp,"\tpackedToZonedHelper %p : op %s, destPrecision %d, destSize %d, destEndByte %d, sourcePrecision %d, sourceSize %d\n",
1722
node,node->getOpCode().getName(),destPrecision,destSize,destEndByte,sourcePrecision,childReg->getSize());
1723
1724
bool isTruncation = false;
1725
if (destPrecision < childReg->getDecimalPrecision())
1726
{
1727
isTruncation = true;
1728
sourcePrecision = destPrecision;
1729
sourceSize = TR::DataType::getSizeFromBCDPrecision(child->getDataType(), sourcePrecision);
1730
1731
if (cg->traceBCDCodeGen())
1732
traceMsg(comp,"\tisTruncation=true (dstPrec %d < srcPrec %d) reduce srcPrec %d->%d, srcSize %d->%d\n",
1733
destPrecision,childReg->getDecimalPrecision(),childReg->getDecimalPrecision(),sourcePrecision,childReg->getSize(),sourceSize);
1734
}
1735
1736
TR::Node *paddingAnchor = NULL;
1737
bool evaluatedPaddingAnchor = false;
1738
TR::MemoryReference *destMR = NULL;
1739
if (destPrecision <= 16 || sourcePrecision <= 16)
1740
{
1741
int32_t unpkDestOffset = 0;
1742
int32_t unpkDestSize = destPrecision;
1743
int32_t unpkSourceSize = sourceSize;
1744
destMR = generateS390LeftAlignedMemoryReference(node, targetStorageReference, cg, destEndByte);
1745
1746
if (destPrecision > 16)
1747
{
1748
int32_t bytesToSet = destPrecision-sourcePrecision;
1749
if (cg->traceBCDCodeGen())
1750
traceMsg(comp,"\tdestPrecision %d > 16, sourcePrecision %d <= 16 gen %d leftmost bytes of 0xF0\n",destPrecision,sourcePrecision,bytesToSet);
1751
TR_ASSERT(bytesToSet > 0,"destPrecision (%d) should be > sourcePrecision (%d)\n",destPrecision,sourcePrecision);
1752
cg->genZeroLeftMostZonedBytes(node, targetReg, destEndByte, bytesToSet, destMR);
1753
evaluatedPaddingAnchor = true;
1754
if (cg->traceBCDCodeGen())
1755
traceMsg(comp,"\treduce unpkDestOffset %d->%d and unpkDestSize %d->%d\n",unpkDestOffset,bytesToSet,unpkDestSize,sourcePrecision);
1756
unpkDestOffset = bytesToSet;
1757
unpkDestSize = sourcePrecision;
1758
}
1759
1760
if (cg->traceBCDCodeGen())
1761
traceMsg(comp,"\tdestPrecision %d <= 16 or sourcePrecision %d <= 16 so generate a single UNPK destPrecision %d, destOffset %d, unpkSourceSize %d\n",
1762
destPrecision,sourcePrecision,unpkDestSize,unpkDestOffset,unpkSourceSize);
1763
generateSS2Instruction(cg, TR::InstOpCode::UNPK, node,
1764
unpkDestSize-1,
1765
generateS390LeftAlignedMemoryReference(*destMR, node, unpkDestOffset, cg, destMR->getLeftMostByte()),
1766
unpkSourceSize-1,
1767
generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));
1768
if (unpkDestSize > sourcePrecision)
1769
{
1770
if (cg->traceBCDCodeGen())
1771
traceMsg(comp,"\tunpkDestSize %d > sourcePrecision %d adding range of zero digits for pd2zd op\n",unpkDestSize,sourcePrecision);
1772
targetReg->addRangeOfZeroDigits(sourcePrecision, unpkDestSize);
1773
}
1774
}
1775
else
1776
{
1777
TR_ASSERT(destPrecision <= 31,"pd2zd destPrecision should be <= 31 and not %d\n",destPrecision);
1778
TR_ASSERT(sourcePrecision <= 31,"pd2zd sourcePrecision should be <= 31 and not %d\n",sourcePrecision);
1779
if (cg->traceBCDCodeGen())
1780
{
1781
if (sourcePrecision >= 17 && sourcePrecision <= 30)
1782
traceMsg(comp,"\tsourcePrecision 17 <= %d <= 30 so generate two UNPKs\n",sourcePrecision);
1783
else
1784
traceMsg(comp,"\tsourcePrecision == 31 so generate three UNPKs\n");
1785
}
1786
bool needsThirdUnpk = false;
1787
int32_t precisionAdjustment = 14;
1788
if (sourcePrecision == 31)
1789
{
1790
precisionAdjustment=16;
1791
needsThirdUnpk = true;
1792
}
1793
else
1794
{
1795
// in this case can do the conversion in 2 UNPKs instead of 3. Keep the target precision up to 30 bytes to widen extra bytes.
1796
if (cg->traceBCDCodeGen())
1797
traceMsg(comp,"\tsourcePrecision < 31 (%d) so reduce destPrecision to min(destPrecision,30) = min(%d,30) = %d ",
1798
sourcePrecision,destPrecision,std::min(destPrecision,30));
1799
destPrecision = std::min(destPrecision, 30);
1800
destEndByte = isDestTrailingSign ? destPrecision + TR::DataType::getZonedSignSize() :
1801
destPrecision;
1802
targetReg->setDecimalPrecision(destPrecision);
1803
if (cg->traceBCDCodeGen())
1804
traceMsg(comp,"and update targetReg->prec to new destPrecision %d and update destEndByte to %d\n",destPrecision,destEndByte);
1805
}
1806
1807
if (cg->traceBCDCodeGen())
1808
traceMsg(comp,"x^x : found large packed/zoned conv -- node %s (%p) prec %d, child %s (%p) prec %d (three=%s)\n",
1809
node->getOpCode().getName(),node,destPrecision,
1810
child->getOpCode().getName(),child,sourcePrecision,needsThirdUnpk?"yes":"no");
1811
1812
destMR = generateS390LeftAlignedMemoryReference(node, targetStorageReference, cg, destEndByte);
1813
sourceMR = generateS390LeftAlignedMemoryReference(*sourceMR, node, 0, cg, sourceSize);
1814
int32_t unpk1DestSize = destPrecision-precisionAdjustment;
1815
int32_t unpk1SourceSize = TR::DataType::getSizeFromBCDPrecision(child->getDataType(), sourcePrecision-precisionAdjustment);
1816
if (cg->traceBCDCodeGen())
1817
traceMsg(comp,"\t\t1st UNPK destSize=%d,srcSize=%d\n",unpk1DestSize,unpk1SourceSize);
1818
generateSS2Instruction(cg, TR::InstOpCode::UNPK, node,
1819
unpk1DestSize-1,
1820
destMR,
1821
unpk1SourceSize-1,
1822
sourceMR);
1823
int32_t unpk2DestSize = 15;
1824
int32_t unpk2DestOffset = unpk1DestSize-1;
1825
int32_t unpk2SourceSize = TR::DataType::getSizeFromBCDPrecision(child->getDataType(), 15);
1826
int32_t unpk2SourceOffset = unpk1SourceSize-1;
1827
if (cg->traceBCDCodeGen())
1828
traceMsg(comp,"\t\t2nd UNPK destSize=%d,destOffset=%d, srcSize=%d,srcOffset=%d\n",unpk2DestSize,unpk2DestOffset,unpk2SourceSize,unpk2SourceOffset);
1829
generateSS2Instruction(cg, TR::InstOpCode::UNPK, node,
1830
unpk2DestSize-1,
1831
generateS390LeftAlignedMemoryReference(*destMR, node, unpk2DestOffset, cg, destMR->getLeftMostByte()),
1832
unpk2SourceSize-1,
1833
generateS390LeftAlignedMemoryReference(*sourceMR, node, unpk2SourceOffset, cg, sourceMR->getLeftMostByte()));
1834
if (needsThirdUnpk)
1835
{
1836
int32_t unpk3DestSize = 3;
1837
int32_t unpk3DestOffset = unpk2DestOffset+(unpk2DestSize-1);
1838
int32_t unpk3SourceSize = TR::DataType::getSizeFromBCDPrecision(child->getDataType(), 3);
1839
int32_t unpk3SourceOffset = unpk2SourceOffset+(unpk2SourceSize-1);
1840
if (cg->traceBCDCodeGen())
1841
traceMsg(comp,"\t\t3rd UNPK destSize=%d,destOffset=%d, srcSize=%d,srcOffset=%d\n",unpk3DestSize,unpk3DestOffset,unpk3SourceSize,unpk3SourceOffset);
1842
generateSS2Instruction(cg, TR::InstOpCode::UNPK, node,
1843
unpk3DestSize-1,
1844
generateS390LeftAlignedMemoryReference(*destMR, node, unpk3DestOffset, cg, destMR->getLeftMostByte()),
1845
unpk3SourceSize-1,
1846
generateS390LeftAlignedMemoryReference(*sourceMR, node, unpk3SourceOffset, cg, sourceMR->getLeftMostByte()));
1847
}
1848
}
1849
1850
if (!evaluatedPaddingAnchor)
1851
cg->processUnusedNodeDuringEvaluation(paddingAnchor);
1852
1853
pd2zdSignFixup(node, destMR, cg, true);
1854
1855
targetReg->transferSignState(childReg, isTruncation);
1856
targetReg->transferDataState(childReg);
1857
targetReg->setIsInitialized();
1858
node->setRegister(targetReg);
1859
return destMR;
1860
}
1861
1862
TR::Register *
1863
J9::Z::TreeEvaluator::pd2zdVectorEvaluatorHelper(TR::Node * node, TR::CodeGenerator * cg)
1864
{
1865
TR::Compilation* comp = cg->comp();
1866
traceMsg(comp, "DAA: Enter pd2zdVectorEvaluatorHelper\n");
1867
TR_PseudoRegister *targetReg = cg->allocatePseudoRegister(node->getDataType());
1868
1869
// pd2zd we need to create storagerefence and save this value to the memoryreference
1870
// associated to that storagereference.
1871
// To do this, we need to
1872
//
1873
// 1. create NodeBasedStorageReference,
1874
// 2. creatememoryreference from the StorageREference,
1875
// 3. Use the memory reference to create VUPKZ instruction
1876
//
1877
// return the allocate PseudoRegister associate the storage reference to the Pseudo register
1878
// return this pseudoregister/
1879
//
1880
TR_StorageReference *hint = node->getStorageReferenceHint();
1881
int32_t sizeOfZonedValue = node->getSize(); //for zoned node, precision and the size must be the same.
1882
int32_t precision = node->getDecimalPrecision();
1883
TR_StorageReference* targetStorageReference = hint ? hint : TR_StorageReference::createTemporaryBasedStorageReference(sizeOfZonedValue, comp);
1884
1885
targetReg->setStorageReference(targetStorageReference, node);
1886
TR::Node *child = node->getFirstChild(); //This child will evaluate to Vector Register
1887
TR::Register *valueRegister = cg->evaluate(child);
1888
TR_ASSERT((valueRegister->getKind() == TR_VRF || valueRegister->getKind() == TR_FPR),
1889
"valueChild should evaluate to Vector register.");
1890
1891
TR::MemoryReference *targetMR = generateS390LeftAlignedMemoryReference(node, targetStorageReference, cg, sizeOfZonedValue, false);
1892
1893
if (!targetStorageReference->isTemporaryBased())
1894
{
1895
TR::SymbolReference *memSymRef = targetStorageReference->getNode()->getSymbolReference();
1896
if (memSymRef)
1897
{
1898
targetMR->setListingSymbolReference(memSymRef);
1899
}
1900
}
1901
1902
if(cg->traceBCDCodeGen())
1903
{
1904
traceMsg(comp, "gen VUKPZ, sizeOfZonedValue=%d, precision=%d\n", sizeOfZonedValue, precision);
1905
}
1906
1907
generateVSIInstruction(cg, TR::InstOpCode::VUPKZ, node, valueRegister, targetMR, sizeOfZonedValue - 1);
1908
1909
// Fix pd2zd signs. VUPKZ and its non-vector counterpart don't validate digits nor signs.
1910
pd2zdSignFixup(node, targetMR, cg, true);
1911
1912
node->setRegister(targetReg);
1913
cg->decReferenceCount(child);
1914
targetReg->setIsInitialized();
1915
traceMsg(comp, "DAA: Leave pd2zdVectorEvaluatorHelper\n");
1916
return targetReg;
1917
}
1918
1919
TR::Register *
1920
J9::Z::TreeEvaluator::pd2zdEvaluator(TR::Node * node, TR::CodeGenerator * cg)
1921
{
1922
cg->traceBCDEntry("pd2zd",node);
1923
TR::Register* targetReg = NULL;
1924
cg->generateDebugCounter("PD-Op/pd2zd", 1, TR::DebugCounter::Cheap);
1925
1926
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
1927
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&
1928
!cg->comp()->getOption(TR_DisableVectorBCD) ||
1929
isVectorBCDEnv)
1930
{
1931
targetReg = pd2zdVectorEvaluatorHelper(node, cg);
1932
}
1933
else
1934
{
1935
targetReg = cg->allocatePseudoRegister(node->getDataType());
1936
TR::Node *child = node->getFirstChild();
1937
TR_PseudoRegister *childReg = cg->evaluateBCDNode(child);
1938
childReg = cg->privatizeBCDRegisterIfNeeded(node, child, childReg);
1939
TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(child, childReg->getStorageReference(), cg);
1940
packedToZonedHelper(node, static_cast<TR_PseudoRegister*>(targetReg), sourceMR, childReg, cg);
1941
cg->decReferenceCount(child);
1942
node->setRegister(targetReg);
1943
}
1944
1945
cg->traceBCDExit("pd2zd",node);
1946
return targetReg;
1947
}
1948
1949
bool
1950
J9::Z::TreeEvaluator::isZonedOperationAnEffectiveNop(TR::Node * node, int32_t shiftAmount, bool isTruncation, TR_PseudoRegister *srcReg, bool isSetSign, int32_t signToSet, TR::CodeGenerator * cg)
1951
{
1952
bool isEffectiveNop = false;
1953
int32_t zone = TR::DataType::getZonedValue();
1954
// For skipLeadingSignReset to be correct the node refCount must be 1 otherwise a commoned reference may be exposed to an incorrect
1955
// zone nibble (it will be the source's sign code and not the correct zone value)
1956
bool skipLeadingSignReset = false;
1957
bool srcSignIsZone = srcReg->knownOrAssumedSignIsZone();
1958
bool signIsAlreadySet = srcReg->hasKnownOrAssumedSignCode() && (srcReg->getKnownOrAssumedSignCode()==signToSet);
1959
bool signToSetIsZone = signToSet == zone;
1960
bool signToSetIsIgnored = signToSet == TR::DataType::getIgnoredSignCode();
1961
bool signToSetIsZoneOrIgnored = signToSetIsZone || signToSetIsIgnored;
1962
1963
TR_ASSERT(!node->getOpCode().isRightShift() || shiftAmount > 0,"shiftAmount should be > 0 for zoned right shifts and not a %d\n",shiftAmount);
1964
switch (node->getOpCodeValue())
1965
{
1966
case TR::zd2zdsle:
1967
isEffectiveNop = srcSignIsZone || (node->getDecimalPrecision() == 1);
1968
break;
1969
case TR::zdsle2zd:
1970
isEffectiveNop = srcSignIsZone || (srcReg->getDecimalPrecision() == 1);
1971
break;
1972
case TR::zdsts2zd:
1973
case TR::zdsls2zd:
1974
break;
1975
default:
1976
TR_ASSERT(false,"unexpected zoned opcode %d\n",node->getOpCodeValue());
1977
break;
1978
}
1979
return isEffectiveNop;
1980
}
1981
1982
/**
1983
* \brief This evaluator helper function evaluates BCDCHK nodes by emitting mainline and out-of-line instructions for
1984
* the underlying packed decimal operations. The mainline instructions perform the actual operations, and the OOL
1985
* instructions are for hardware exception handling.
1986
*
1987
* The canonical BCDCHK IL structure is the following:
1988
*
1989
* BCDCHK
1990
* pdOpNode // the operation node
1991
* aladd // optional address node. Exists only if the result of the operation is packed decimal
1992
* callParam1 // call parameter nodes of the original DAA API call
1993
* callParam2
1994
* .
1995
* .
1996
* callParamN
1997
*
1998
* With the new DAA BCDCHK node tree structure, the first child of a BCDCHK node is
1999
* always the PD opNode. The first child and its sub-tree could throw packed decimal related hardware exceptions, which is
2000
* to be handled by the designated OOL instruction sequence.
2001
*
2002
* As for the second child of BCDCHK, it will be an address node if the result of the PD operation is a packed decimal. This address
2003
* node is to be used by the OOL for result copy back.
2004
*
2005
* The steps to evaluate the new BCDCHK node is the following:
2006
*
2007
* -# Create a callNode and attached BCDCHK's call parameter children to it. This callNode is to be evaluated
2008
* later in the OOL section
2009
*
2010
* -# If applicable, evaluate address node's children (e.g. this is applicable to i2pd but not to PD comparisons)
2011
*
2012
* -# Create a handlerLabel that points to the start of the OOL section
2013
*
2014
* -# Evaluate the pdopNode (first child) and decrement its refCount.
2015
*
2016
* -# Emit a NOP BRC bearing the handlerLabel right after evaluating the pdopNode. This is for SignalHandler.c
2017
*
2018
* -# Switch to OOL code generation and evaluate the callNode
2019
*
2020
* -# Evaluate the addressNode (second child of BCDCHK node) to yield a correct address into the byte[]
2021
*
2022
* -# Copy the results produced by the call from byte[] back to mainline storage reference
2023
*
2024
* -# Finish up by decRefCount on callNode and addressNode
2025
*
2026
* \param node the BCDCHK node
2027
* \param cg codegen object
2028
* \param numCallParam number of callNode children
2029
* \param callChildStartIndex the index of the first callChild under the BCDCHK node
2030
* \param isResultPD True if the result of the pdOpNode a PD; false if the result is a binary integer/long
2031
* This also implies that the second node of the BCDCHK node is an address node.
2032
* \param isUseVector If true, emit vector packed decimal instructions
2033
* \param isVariableParam true if the PD operation's precision is not a constant.
2034
*/
2035
TR::Register *
2036
J9::Z::TreeEvaluator::BCDCHKEvaluatorImpl(TR::Node * node,
2037
TR::CodeGenerator * cg,
2038
uint32_t numCallParam,
2039
uint32_t callChildStartIndex,
2040
bool isResultPD,
2041
bool isUseVector,
2042
bool isVariableParam)
2043
{
2044
TR::Compilation *comp = cg->comp();
2045
TR_Debug* debugObj = cg->getDebug();
2046
TR::Node* pdopNode = node->getFirstChild();
2047
TR::Node* secondChild = node->getSecondChild();
2048
2049
bool isResultLong = pdopNode->getOpCodeValue() == TR::pd2l ||
2050
pdopNode->getOpCodeValue() == TR::pd2lOverflow ||
2051
pdopNode->getOpCodeValue() == TR::lcall;
2052
2053
TR::LabelSymbol* handlerLabel = generateLabelSymbol(cg);
2054
TR::LabelSymbol* passThroughLabel = generateLabelSymbol(cg);
2055
cg->setCurrentBCDCHKHandlerLabel(handlerLabel);
2056
2057
// This is where the call children node come from and the node that has the call symRef
2058
TR::Node* childRootNode = isVariableParam ? pdopNode : node;
2059
2060
// Create a call
2061
TR::ILOpCodes callType = isResultPD ? TR::call : (isResultLong ? TR::lcall : TR::icall);
2062
2063
TR::Node * callNode = TR::Node::createWithSymRef(node, callType, numCallParam,
2064
childRootNode->getSymbolReference());
2065
cg->incReferenceCount(callNode);
2066
callNode->setNumChildren(numCallParam);
2067
2068
// Setup callNode children
2069
for (uint32_t i = 0; i < numCallParam; ++i)
2070
callNode->setAndIncChild(i, childRootNode->getChild(i + callChildStartIndex));
2071
2072
// Evaluate secondChild's children, if the secondChild is an address node into a byte[]
2073
if(isResultPD && secondChild->getNumChildren() == 2)
2074
{
2075
cg->evaluate(secondChild->getFirstChild());
2076
cg->evaluate(secondChild->getSecondChild());
2077
}
2078
2079
// Evaluate intrinsics node
2080
TR::Register* bcdOpResultReg = NULL;
2081
if(isVariableParam)
2082
{
2083
bcdOpResultReg = pd2lVariableEvaluator(node, cg, isUseVector);
2084
}
2085
else if(isResultPD && !isUseVector)
2086
{
2087
bcdOpResultReg = cg->evaluateBCDNode(pdopNode);
2088
}
2089
else
2090
{
2091
bcdOpResultReg = cg->evaluate(pdopNode);
2092
}
2093
2094
// start of OOL section
2095
traceMsg(comp, "starting OOL section generation.\n");
2096
TR_S390OutOfLineCodeSection* outlinedHelperCall = new (INSN_HEAP) TR_S390OutOfLineCodeSection(handlerLabel, passThroughLabel, cg);
2097
cg->getS390OutOfLineCodeSectionList().push_front(outlinedHelperCall);
2098
outlinedHelperCall->swapInstructionListsWithCompilation();
2099
// snippetLabel : OOL Start label
2100
TR::Instruction* cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, handlerLabel);
2101
2102
if(debugObj)
2103
{
2104
debugObj->addInstructionComment(cursor, "Start of BCDCHK OOL sequence");
2105
}
2106
2107
// Debug counter for tracking how often we fall back to the OOL path of the DAA intrinsic
2108
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "DAA/OOL/(%s)/%p", comp->signature(), node), 1, TR::DebugCounter::Undetermined);
2109
2110
// Evaluate the callNode, duplicate and evaluate the address node, and then copy the
2111
// correct results back to the mainline storage ref or register
2112
TR::Register* callResultReg = cg->evaluate(callNode);
2113
2114
if(isResultPD)
2115
{
2116
TR::Register* srcBaseReg = cg->evaluate(secondChild);
2117
TR::MemoryReference* srcMR = generateS390MemoryReference(srcBaseReg, 0, cg);
2118
int32_t resultSize = TR::DataType::packedDecimalPrecisionToByteLength(pdopNode->getDecimalPrecision());
2119
2120
if(isUseVector)
2121
{
2122
TR_ASSERT(bcdOpResultReg && (bcdOpResultReg->getKind() == TR_VRF || bcdOpResultReg->getKind() == TR_FPR),
2123
"Vector register expected\n");
2124
2125
generateVSIInstruction(cg, TR::InstOpCode::VLRL, node, bcdOpResultReg, srcMR, resultSize - 1);
2126
}
2127
else
2128
{
2129
TR::MemoryReference* targetMR = generateS390RightAlignedMemoryReference(pdopNode, static_cast<TR_PseudoRegister*>(bcdOpResultReg)->getStorageReference(), cg);
2130
generateSS1Instruction(cg, TR::InstOpCode::MVC, node, resultSize - 1, targetMR, srcMR);
2131
}
2132
2133
cg->decReferenceCount(secondChild);
2134
cg->stopUsingRegister(callResultReg);
2135
}
2136
else
2137
{
2138
if(isResultLong)
2139
{
2140
generateRREInstruction(cg, TR::InstOpCode::LGR, node, bcdOpResultReg, callResultReg);
2141
}
2142
else
2143
{
2144
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, bcdOpResultReg, callResultReg);
2145
}
2146
}
2147
2148
cg->stopUsingRegister(callResultReg);
2149
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, passThroughLabel);
2150
2151
// Decrement reference counts
2152
cg->recursivelyDecReferenceCount(callNode);
2153
if(isVariableParam)
2154
{
2155
// variable parameter l2pd is a call node
2156
cg->recursivelyDecReferenceCount(pdopNode);
2157
}
2158
else
2159
{
2160
cg->decReferenceCount(pdopNode);
2161
}
2162
2163
if(debugObj)
2164
{
2165
debugObj->addInstructionComment(cursor, "End of BCDCHK OOL sequence: return to mainline");
2166
}
2167
2168
traceMsg(comp, "Finished OOL section generation.\n");
2169
2170
// ***Done using OOL with manual code generation *** //
2171
outlinedHelperCall->swapInstructionListsWithCompilation();
2172
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, passThroughLabel, cg->getCurrentCheckNodeRegDeps());
2173
2174
cg->setCurrentBCDCHKHandlerLabel(NULL);
2175
return bcdOpResultReg;
2176
}
2177
2178
/**
2179
* BCDCHKEvaluator -
2180
*/
2181
TR::Register *
2182
J9::Z::TreeEvaluator::BCDCHKEvaluator(TR::Node * node, TR::CodeGenerator * cg)
2183
{
2184
TR::Compilation *comp = cg->comp();
2185
TR::Node* pdopNode = node->getFirstChild();
2186
TR::Register* resultReg = pdopNode->getRegister();
2187
bool isResultPD = pdopNode->getDataType() == TR::PackedDecimal;
2188
bool isVariableParam = false;
2189
uint32_t firstCallParamIndex = 0;
2190
2191
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
2192
bool isEnableVectorBCD = comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL)
2193
&& !comp->getOption(TR_DisableVectorBCD)
2194
|| isVectorBCDEnv;
2195
2196
// Validate PD operations under BCDCHK node
2197
switch (pdopNode->getOpCodeValue())
2198
{
2199
case TR::pdcmpgt:
2200
case TR::pdcmplt:
2201
case TR::pdcmpge:
2202
case TR::pdcmple:
2203
case TR::pdcmpeq:
2204
case TR::pdcmpne:
2205
break;
2206
case TR::i2pd:
2207
case TR::l2pd:
2208
case TR::pd2l:
2209
case TR::pd2i:
2210
case TR::pd2iOverflow:
2211
case TR::pd2lOverflow:
2212
case TR::pdadd:
2213
case TR::pdsub:
2214
case TR::pdmul:
2215
case TR::pddiv:
2216
case TR::pdrem:
2217
case TR::pdshlOverflow:
2218
case TR::pdshr:
2219
{
2220
cg->setIgnoreDecimalOverflowException(node->getLastChild()->getInt() == 0);
2221
break;
2222
}
2223
case TR::lcall:
2224
case TR::icall:
2225
{
2226
switch (pdopNode->getSymbol()->getMethodSymbol()->getMethod()->getRecognizedMethod())
2227
{
2228
case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_:
2229
case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToInteger_ByteBuffer_:
2230
case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToLong_:
2231
case TR::com_ibm_dataaccess_DecimalData_convertPackedDecimalToLong_ByteBuffer_:
2232
{
2233
isVariableParam = true;
2234
2235
// Need a parameter check because variable PD2L and PD2I could have non-constant 'checkOverflow' (see IS_VARIABLE_PD2I macro).
2236
TR::Node* checkOverflowNode = pdopNode->getLastChild();
2237
cg->setIgnoreDecimalOverflowException(checkOverflowNode->getOpCode().isLoadConst() && (checkOverflowNode->getInt() == 0));
2238
break;
2239
}
2240
2241
default:
2242
{
2243
/**
2244
* BCDCHK can have a call node if the PD operation can be simplified to a No-Op.
2245
* For example, one can get an integer via a call
2246
* perform a i2pd followed by a pd2i. The pd2i (under BCDCHK) can be simplified to the icall.
2247
* If this is the case, the lcall/icall must have been evaluated.
2248
* We can skip the BCDCHK evaluation and return the call result.
2249
*/
2250
TR_ASSERT_FATAL(resultReg != NULL,
2251
"BCDCHKEvaluator: variable precision path encounters an unrecognized and unevaluated long/int call\n");
2252
}
2253
}
2254
break;
2255
}
2256
2257
default:
2258
{
2259
/**
2260
* Unrecognized opCodes under BCDCHK should come from optimizations such as local CSE and tree simplifications.
2261
* They should be commoned nodes that's evaluated previously. Skip these nodes.
2262
*/
2263
TR_ASSERT_FATAL(resultReg != NULL, "BCDCHKEvaluator: BCDCHK has an unevaluated non-PD node %p (non-PD op code %s) \n",
2264
pdopNode,
2265
pdopNode->getOpCode().getName());
2266
2267
traceMsg(comp, "BCDCHK node n%dn has non-PD operation %s\n",
2268
node->getGlobalIndex(), pdopNode->getOpCode().getName());
2269
}
2270
}
2271
2272
if (!isVariableParam)
2273
{
2274
firstCallParamIndex = isResultPD ? 2 : 1;
2275
}
2276
2277
// Evaluate call parameters
2278
TR::Node* callParamRoot = isVariableParam ? pdopNode : node;
2279
for (uint32_t i = firstCallParamIndex; i < callParamRoot->getNumChildren(); ++i)
2280
{
2281
TR::Node* callArg = callParamRoot->getChild(i);
2282
if (callArg->getReferenceCount() != 1 || callArg->getRegister() != NULL)
2283
cg->evaluate(callArg);
2284
}
2285
2286
/*
2287
* Avoid evaluating an evaluated pdOpNode (first child of BCDCHK) under a BCDCHK node if
2288
* it is already evaluated.
2289
*
2290
* This is to avoid generating OOL paths without mainline sequences. OOL without mainline can
2291
* cause RA to produce incorrect register use counts, and eventually produce incorrect GC maps that
2292
* make GC fail during runtime.
2293
*/
2294
if (resultReg != NULL)
2295
{
2296
if (isVariableParam)
2297
cg->recursivelyDecReferenceCount(pdopNode); // variable parameter l2pd is a call node
2298
else
2299
{
2300
// first child
2301
cg->decReferenceCount(pdopNode);
2302
2303
// second child
2304
if (isResultPD)
2305
cg->recursivelyDecReferenceCount(node->getSecondChild());
2306
2307
// call parameters: 2nd/3rd and above
2308
for (uint32_t i = firstCallParamIndex; i < node->getNumChildren(); ++i)
2309
cg->decReferenceCount(node->getChild(i));
2310
}
2311
2312
traceMsg(comp, "Skipped BCDCHK node n%dn\n", node->getGlobalIndex());
2313
}
2314
else
2315
{
2316
uint32_t numCallChildren = isVariableParam ? pdopNode->getNumChildren() : (node->getNumChildren() - firstCallParamIndex);
2317
2318
TR::RegisterDependencyConditions * daaDeps = new (INSN_HEAP) TR::RegisterDependencyConditions(0, 13, cg);
2319
2320
cg->setCurrentCheckNodeRegDeps(daaDeps);
2321
cg->setCurrentCheckNodeBeingEvaluated(node);
2322
2323
resultReg = BCDCHKEvaluatorImpl(node, cg, numCallChildren, firstCallParamIndex,
2324
isResultPD, isEnableVectorBCD, isVariableParam);
2325
2326
cg->setCurrentCheckNodeRegDeps(NULL);
2327
cg->setCurrentCheckNodeBeingEvaluated(NULL);
2328
}
2329
2330
cg->setIgnoreDecimalOverflowException(false);
2331
return resultReg;
2332
}
2333
2334
TR::Register*
2335
J9::Z::TreeEvaluator::pdcmpVectorEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)
2336
{
2337
TR::Register* resultReg = cg->allocateRegister(TR_GPR);
2338
generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, resultReg, resultReg);
2339
generateLoad32BitConstant(cg, node, 1, resultReg, true);
2340
2341
TR::RegisterDependencyConditions* deps = new(cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);
2342
deps->addPostConditionIfNotAlreadyInserted(resultReg, TR::RealRegister::AssignAny);
2343
2344
TR::Node* pd1Node = node->getFirstChild();
2345
TR::Node* pd2Node = node->getSecondChild();
2346
2347
TR::Register* pd1Value = cg->evaluate(pd1Node);
2348
TR::Register* pd2Value = cg->evaluate(pd2Node);
2349
2350
// TODO: should we correct bad sign before comparing them
2351
TR::Instruction* cursor = generateVRRhInstruction(cg, TR::InstOpCode::VCP, node, pd1Value, pd2Value, 0);
2352
2353
TR::LabelSymbol* cFlowRegionStart = generateLabelSymbol(cg);
2354
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
2355
cFlowRegionStart->setStartInternalControlFlow();
2356
2357
TR::LabelSymbol* cFlowRegionEnd = generateLabelSymbol(cg);
2358
2359
// Generate Branch Instructions
2360
switch(node->getOpCodeValue())
2361
{
2362
case TR::pdcmpeq:
2363
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC0, node, cFlowRegionEnd);
2364
break;
2365
case TR::pdcmpne:
2366
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, cFlowRegionEnd);
2367
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC2, node, cFlowRegionEnd);
2368
break;
2369
case TR::pdcmplt:
2370
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, cFlowRegionEnd);
2371
break;
2372
case TR::pdcmple:
2373
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC0, node, cFlowRegionEnd);
2374
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, cFlowRegionEnd);
2375
break;
2376
case TR::pdcmpgt:
2377
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC2, node, cFlowRegionEnd);
2378
break;
2379
case TR::pdcmpge:
2380
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC0, node, cFlowRegionEnd);
2381
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC2, node, cFlowRegionEnd);
2382
break;
2383
default:
2384
TR_ASSERT(0, "Unrecognized op code in pd cmp vector evaluator helper.");
2385
}
2386
2387
// TODO: The only reason we keep track of the cursor here is because `deps` has to be passed in after `cursor`. We
2388
// don't really need this restriction however if we rearrange the parameters.
2389
cursor = generateLoad32BitConstant(cg, node, 0, resultReg, true, cursor, deps);
2390
2391
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, deps);
2392
cFlowRegionEnd->setEndInternalControlFlow();
2393
2394
node->setRegister(resultReg);
2395
2396
cg->decReferenceCount(pd1Node);
2397
cg->decReferenceCount(pd2Node);
2398
2399
return resultReg;
2400
}
2401
2402
TR::Register*
2403
J9::Z::TreeEvaluator::pdcmpeqEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2404
{
2405
cg->traceBCDEntry("pdcmpeq",node);
2406
cg->generateDebugCounter("PD-Op/pdcmpeq", 1, TR::DebugCounter::Cheap);
2407
2408
// to support castedToBCD have to ensure generateS390CompareBool generates logical comparison only and not CP
2409
TR_ASSERT(node->castedToBCD() == false,"castedToBCD=true not supported for %s (%p)\n",node->getOpCode().getName(),node);
2410
TR::Register *targetReg = NULL;
2411
2412
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
2413
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
2414
{
2415
targetReg = pdcmpVectorEvaluatorHelper(node, cg);
2416
}
2417
else
2418
{
2419
targetReg = generateS390CompareBool(node, cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, TR::InstOpCode::COND_BE, false);
2420
}
2421
2422
cg->traceBCDExit("pdcmpeq",node);
2423
return targetReg;
2424
}
2425
2426
TR::Register *
2427
J9::Z::TreeEvaluator::pdcmpneEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2428
{
2429
cg->traceBCDEntry("pdcmpne",node);
2430
cg->generateDebugCounter("PD-Op/pdcmpne", 1, TR::DebugCounter::Cheap);
2431
2432
TR_ASSERT(node->castedToBCD() == false,"castedToBCD=true not supported for %s (%p)\n",node->getOpCode().getName(),node);
2433
TR::Register *targetReg = NULL;
2434
2435
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
2436
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
2437
{
2438
targetReg = pdcmpVectorEvaluatorHelper(node, cg);
2439
}
2440
else
2441
{
2442
targetReg = generateS390CompareBool(node, cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, TR::InstOpCode::COND_BNE, false);
2443
}
2444
2445
cg->traceBCDExit("pdcmpne",node);
2446
return targetReg;
2447
}
2448
2449
TR::Register *
2450
J9::Z::TreeEvaluator::pdcmpltEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2451
{
2452
cg->traceBCDEntry("pdcmplt",node);
2453
cg->generateDebugCounter("PD-Op/pdcmplt", 1, TR::DebugCounter::Cheap);
2454
2455
TR_ASSERT(node->castedToBCD() == false,"castedToBCD=true not supported for %s (%p)\n",node->getOpCode().getName(),node);
2456
TR::Register *targetReg = NULL;
2457
2458
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
2459
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
2460
{
2461
targetReg = pdcmpVectorEvaluatorHelper(node, cg);
2462
}
2463
else
2464
{
2465
targetReg = generateS390CompareBool(node, cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BL, TR::InstOpCode::COND_BH, false);
2466
}
2467
2468
cg->traceBCDExit("pdcmplt",node);
2469
return targetReg;
2470
}
2471
2472
TR::Register *J9::Z::TreeEvaluator::pdcmpgeEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2473
{
2474
cg->traceBCDEntry("pdcmpge",node);
2475
cg->generateDebugCounter("PD-Op/pdcmpge", 1, TR::DebugCounter::Cheap);
2476
2477
TR_ASSERT(node->castedToBCD() == false,"castedToBCD=true not supported for %s (%p)\n",node->getOpCode().getName(),node);
2478
TR::Register *targetReg = NULL;
2479
2480
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
2481
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
2482
{
2483
targetReg = pdcmpVectorEvaluatorHelper(node, cg);
2484
}
2485
else
2486
{
2487
targetReg = generateS390CompareBool(node, cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNL, TR::InstOpCode::COND_BNH, false);
2488
}
2489
2490
cg->traceBCDExit("pdcmpge",node);
2491
return targetReg;
2492
}
2493
2494
TR::Register *J9::Z::TreeEvaluator::pdcmpgtEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2495
{
2496
cg->traceBCDEntry("pdcmpgt",node);
2497
cg->generateDebugCounter("PD-Op/pdcmpgt", 1, TR::DebugCounter::Cheap);
2498
2499
TR_ASSERT(node->castedToBCD() == false,"castedToBCD=true not supported for %s (%p)\n",node->getOpCode().getName(),node);
2500
TR::Register *targetReg = NULL;
2501
2502
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
2503
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
2504
{
2505
targetReg = pdcmpVectorEvaluatorHelper(node, cg);
2506
}
2507
else
2508
{
2509
targetReg = generateS390CompareBool(node, cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BH, TR::InstOpCode::COND_BL, false);
2510
}
2511
2512
cg->traceBCDExit("pdcmpgt",node);
2513
return targetReg;
2514
}
2515
2516
TR::Register *J9::Z::TreeEvaluator::pdcmpleEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2517
{
2518
cg->traceBCDEntry("pdcmple",node);
2519
cg->generateDebugCounter("PD-Op/pdcmple", 1, TR::DebugCounter::Cheap);
2520
2521
TR_ASSERT(node->castedToBCD() == false,"castedToBCD=true not supported for %s (%p)\n",node->getOpCode().getName(),node);
2522
TR::Register *targetReg = NULL;
2523
2524
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
2525
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
2526
{
2527
targetReg = pdcmpVectorEvaluatorHelper(node, cg);
2528
}
2529
else
2530
{
2531
2532
targetReg = generateS390CompareBool(node, cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, TR::InstOpCode::COND_BNL, false);
2533
}
2534
2535
cg->traceBCDExit("pdcmple",node);
2536
return targetReg;
2537
}
2538
2539
TR::Register *
2540
J9::Z::TreeEvaluator::pd2iEvaluator(TR::Node * node, TR::CodeGenerator * cg)
2541
{
2542
cg->traceBCDEntry("pd2i",node);
2543
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),
2544
1, TR::DebugCounter::Cheap);
2545
TR::Register * reg = NULL;
2546
2547
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
2548
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
2549
{
2550
reg = generateVectorPackedToBinaryConversion(node, TR::InstOpCode::VCVB, cg);
2551
}
2552
else
2553
{
2554
reg = generatePackedToBinaryConversion(node, TR::InstOpCode::CVB, cg);
2555
}
2556
2557
cg->traceBCDExit("pd2i",node);
2558
return reg;
2559
}
2560
2561
TR::Register *
2562
J9::Z::TreeEvaluator::pd2lEvaluator(TR::Node * node, TR::CodeGenerator * cg)
2563
{
2564
cg->traceBCDEntry("pd2l",node);
2565
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),
2566
1, TR::DebugCounter::Cheap);
2567
TR::Register * reg = NULL;
2568
2569
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
2570
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
2571
{
2572
reg = generateVectorPackedToBinaryConversion(node, TR::InstOpCode::VCVBG, cg);
2573
}
2574
else
2575
{
2576
reg = generatePackedToBinaryConversion(node, TR::InstOpCode::CVBG, cg);
2577
}
2578
2579
cg->traceBCDExit("pd2l",node);
2580
return reg;
2581
}
2582
2583
TR::Register*
2584
J9::Z::TreeEvaluator::pd2lVariableEvaluator(TR::Node* node, TR::CodeGenerator* cg, bool isUseVectorBCD)
2585
{
2586
cg->traceBCDEntry("pd2lVariableEvaluator",node);
2587
cg->generateDebugCounter("PD-Op/pd2l-var", 1, TR::DebugCounter::Cheap);
2588
2589
TR::Node* pdOpNode = node->getChild(0);
2590
TR::Node* pdAddressNode = node->getChild(1);
2591
2592
TR::Compilation *comp = cg->comp();
2593
2594
// This function handles PD2I and PD2L
2595
bool PD2I = pdOpNode->getOpCode().getOpCodeValue() == TR::icall;
2596
2597
TR::Register* returnReg = cg->allocateRegister();
2598
2599
TR::InstOpCode::Mnemonic conversionOp = PD2I ? TR::InstOpCode::VCVB : TR::InstOpCode::VCVBG;
2600
2601
TR::Register* callAddrReg = cg->evaluate(pdAddressNode);
2602
TR::Register* precisionReg = cg->evaluate(pdOpNode->getChild(2));
2603
TR::Register* lengthReg = cg->allocateRegister();
2604
TR_ASSERT(precisionReg && (precisionReg->getKind() == TR_GPR), "precision should be a 32bit GPR");
2605
2606
// byteLength = precision/2 + 1. Note that the length codes of all instructions are (byteLength-1).
2607
// Thus, lengthCode = precision/2
2608
if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z196))
2609
{
2610
generateRSInstruction(cg, TR::InstOpCode::SRAK, pdOpNode, lengthReg, precisionReg, 0x1, NULL);
2611
}
2612
else
2613
{
2614
generateRRInstruction(cg, TR::InstOpCode::LR, pdOpNode, lengthReg, precisionReg);
2615
generateRSInstruction(cg, TR::InstOpCode::SRA, pdOpNode, lengthReg, 0x1);
2616
}
2617
2618
TR::MemoryReference* sourceMR = generateS390MemoryReference(callAddrReg, 0, cg);
2619
static bool disableTPBeforePD2I = feGetEnv("TR_DisableTPBeforePD2I") != NULL;
2620
2621
if (isUseVectorBCD)
2622
{
2623
// variable length load + vector convert to binary
2624
TR::Register* vPDReg = cg->allocateRegister(TR_VRF);
2625
generateVRSdInstruction(cg, TR::InstOpCode::VLRLR, node, lengthReg, vPDReg, sourceMR);
2626
2627
if (!disableTPBeforePD2I)
2628
{
2629
generateVRRgInstruction(cg, TR::InstOpCode::VTP, node, vPDReg);
2630
generateS390BranchInstruction(cg, TR::InstOpCode::BRC,
2631
TR::InstOpCode::COND_MASK7,
2632
node, cg->getCurrentBCDCHKHandlerLabel());
2633
}
2634
2635
uint8_t ignoreOverflowMask = 0;
2636
2637
if (comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY) && cg->getIgnoreDecimalOverflowException())
2638
{
2639
ignoreOverflowMask = 0x8;
2640
}
2641
2642
generateVRRiInstruction(cg, conversionOp, node, returnReg, vPDReg, 1, ignoreOverflowMask);
2643
cg->stopUsingRegister(vPDReg);
2644
}
2645
else
2646
{
2647
const uint32_t tempSRSize = PD2I ? cg->getPackedToIntegerFixedSize()
2648
: cg->getPackedToLongFixedSize();
2649
2650
// Allocate space on the stack for the PD to be copied to
2651
TR_StorageReference* tempSR = TR_StorageReference::createTemporaryBasedStorageReference(tempSRSize, comp);
2652
2653
tempSR->setTemporaryReferenceCount(1);
2654
2655
TR::MemoryReference* ZAPtargetMR = generateS390MemRefFromStorageRef(node, tempSR, cg, false, true);
2656
TR::Register* zapTargetBaseReg = cg->allocateRegister();
2657
/*
2658
* Insert an intermediate LA instruction before the ZAP+EX sequence to hold the ZAP target base address
2659
* value. Intermediate LA instructions are needed for all instructions targeted by EX (or EXRL) and have
2660
* memory references with unmaterialized base/index registers. This is done so that we are immune to
2661
* large displacement instruction adjustments.
2662
*
2663
* In this particular case, the instruction selection phase emits ZAP+EX. The peephole optimization later
2664
* replaces the EX with an EXRL and expands to three instructions:
2665
*
2666
* BRC [to EXRl]
2667
* ZAP
2668
* EXRL [of ZAP]
2669
*
2670
* These three instructions work fine if they are all together. If the ZAP is targeting a memory location that's
2671
* far away down the stack, large displacement instructions will be added in the memory reference binary encoding phase
2672
* to create the following functionally incorrect instruction sequence:
2673
*
2674
* BRC [to EXRL]
2675
* STG
2676
* LGHI
2677
* LA
2678
* ZAP
2679
* LG
2680
* EXRL
2681
*
2682
*
2683
* Having an intermediate LA instruction here prevents the large displacement adjustments on the ZAP instruction and holds
2684
* the BRC+ZAP+EXRL instructions together.
2685
*/
2686
generateRXInstruction(cg, TR::InstOpCode::LA, node, zapTargetBaseReg, ZAPtargetMR);
2687
2688
if (!disableTPBeforePD2I)
2689
{
2690
TR::Register* tempLengthForTP = cg->allocateRegister();
2691
2692
if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z196))
2693
{
2694
generateRSInstruction(cg, TR::InstOpCode::SLAK, node, tempLengthForTP, lengthReg, 4);
2695
}
2696
else
2697
{
2698
generateRRInstruction(cg, TR::InstOpCode::LR, node, tempLengthForTP, lengthReg);
2699
generateRSInstruction(cg, TR::InstOpCode::SLA, node, tempLengthForTP, 4);
2700
}
2701
2702
auto* testPackedInstruction = generateRSLInstruction(cg, TR::InstOpCode::TP, node, 0, generateS390MemoryReference(*sourceMR, 0, cg));
2703
2704
generateEXDispatch(node, cg, tempLengthForTP, testPackedInstruction);
2705
2706
// Fallback to the OOL path if anything is wrong with the input packed decimal
2707
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK7, node, cg->getCurrentBCDCHKHandlerLabel());
2708
2709
cg->stopUsingRegister(tempLengthForTP);
2710
}
2711
2712
TR::Instruction* instrZAP = generateSS2Instruction(cg, TR::InstOpCode::ZAP, node,
2713
tempSRSize - 1,
2714
generateS390MemoryReference(zapTargetBaseReg, 0, cg),
2715
0, sourceMR);
2716
2717
generateEXDispatch(node, cg, lengthReg, instrZAP);
2718
2719
if (PD2I)
2720
{
2721
generateRXInstruction (cg, TR::InstOpCode::CVB, node, returnReg, generateS390MemoryReference(*ZAPtargetMR, 0, cg));
2722
}
2723
else
2724
{
2725
generateRXInstruction(cg, TR::InstOpCode::CVBG, node, returnReg, generateS390MemoryReference(*ZAPtargetMR, 0, cg));
2726
}
2727
2728
tempSR->setTemporaryReferenceCount(0);
2729
cg->stopUsingRegister(zapTargetBaseReg);
2730
}
2731
2732
cg->decReferenceCount(pdAddressNode);
2733
cg->stopUsingRegister(lengthReg);
2734
pdOpNode->setRegister(returnReg);
2735
2736
// Create a debug counter to track how often we execute the inline path for variable operations
2737
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp,
2738
"DAA/variable/inline/(%s)/%p",
2739
comp->signature(), node),
2740
1, TR::DebugCounter::Undetermined);
2741
2742
cg->traceBCDExit("pd2lVariableEvaluator",node);
2743
2744
return returnReg;
2745
}
2746
2747
TR::Register *
2748
J9::Z::TreeEvaluator::generateVectorPackedToBinaryConversion(TR::Node * node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator * cg)
2749
{
2750
TR_ASSERT( op == TR::InstOpCode::VCVB || op == TR::InstOpCode::VCVBG,"unexpected opcode in gen vector pd2i\n");
2751
bool isPDToLong = (op == TR::InstOpCode::VCVBG);
2752
2753
TR::Register *rResultReg = (isPDToLong) ? cg->allocateRegister() : cg->allocateRegister();
2754
2755
// evaluate pdload
2756
TR::Node *pdValueNode = node->getFirstChild();
2757
TR::Register *vPdValueReg = cg->evaluate(pdValueNode);
2758
TR_ASSERT(vPdValueReg->getKind() == TR_VRF || vPdValueReg->getKind() == TR_FPR, "Vector register expected.");
2759
2760
static bool disableTPBeforePD2I = feGetEnv("TR_DisableTPBeforePD2I") != NULL;
2761
if (!disableTPBeforePD2I)
2762
{
2763
generateVRRgInstruction(cg, TR::InstOpCode::VTP, node, vPdValueReg);
2764
generateS390BranchInstruction(cg, TR::InstOpCode::BRC,
2765
TR::InstOpCode::COND_MASK7, node,
2766
cg->getCurrentBCDCHKHandlerLabel());
2767
}
2768
2769
uint8_t ignoreOverflowMask = 0;
2770
2771
if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY) && cg->getIgnoreDecimalOverflowException())
2772
{
2773
ignoreOverflowMask = 0x8;
2774
}
2775
2776
// Convert to signed binary of either 32-bit or 64-bit long
2777
generateVRRiInstruction(cg, op, node, rResultReg, vPdValueReg, 0x1, ignoreOverflowMask);
2778
2779
cg->decReferenceCount(pdValueNode);
2780
node->setRegister(rResultReg);
2781
return rResultReg;
2782
}
2783
2784
TR::Register *
2785
J9::Z::TreeEvaluator::generatePackedToBinaryConversion(TR::Node * node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator * cg)
2786
{
2787
TR_ASSERT( op == TR::InstOpCode::CVB || op == TR::InstOpCode::CVBG,"unexpected opcode in generatePackedToBinaryFixedConversion\n");
2788
TR::Register *targetReg = cg->allocateRegister();
2789
2790
TR::Node *firstChild = node->getFirstChild();
2791
TR_PseudoRegister *firstReg = cg->evaluateBCDNode(firstChild);
2792
int32_t requiredSourceSize = op == TR::InstOpCode::CVB ? cg->getPackedToIntegerFixedSize() : cg->getPackedToLongFixedSize();
2793
TR::MemoryReference *sourceMR = cg->materializeFullBCDValue(firstChild,
2794
firstReg,
2795
requiredSourceSize,
2796
requiredSourceSize,
2797
false, // updateStorageReference
2798
false); // alwaysEnforceSSLimits -- to be used in CVB
2799
2800
TR_StorageReference *firstStorageReference = firstReg->getStorageReference();
2801
sourceMR = reuseS390LeftAlignedMemoryReference(sourceMR, firstChild, firstStorageReference, cg, requiredSourceSize, false); // enforceSSLimits=false for CVB
2802
2803
static bool disableTPBeforePD2I = feGetEnv("TR_DisableTPBeforePD2I") != NULL;
2804
2805
if (!disableTPBeforePD2I)
2806
{
2807
generateRSLInstruction(cg, TR::InstOpCode::TP, node, firstReg->getSize() - 1, generateS390RightAlignedMemoryReference(*sourceMR, firstChild, 0, cg, false));
2808
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK7, node, cg->getCurrentBCDCHKHandlerLabel());
2809
}
2810
2811
TR::Instruction *inst = NULL;
2812
if (op == TR::InstOpCode::CVB)
2813
inst = generateRXInstruction(cg, op, node, targetReg, sourceMR);
2814
else
2815
inst = generateRXInstruction(cg, op, node, targetReg, sourceMR);
2816
2817
if (sourceMR->getStorageReference() == firstStorageReference)
2818
firstReg->setHasKnownValidSignAndData();
2819
2820
// Create a debug counter to track how often we execute the inline path
2821
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "DAA/inline/(%s)/%p", cg->comp()->signature(), node), 1, TR::DebugCounter::Undetermined);
2822
2823
cg->decReferenceCount(firstChild);
2824
node->setRegister(targetReg);
2825
return targetReg;
2826
}
2827
2828
TR::Register *
2829
J9::Z::TreeEvaluator::i2pdEvaluator(TR::Node * node, TR::CodeGenerator * cg)
2830
{
2831
cg->traceBCDEntry("i2pd",node);
2832
cg->generateDebugCounter("PD-Op/i2pd", 1, TR::DebugCounter::Cheap);
2833
TR::Register * reg = NULL;
2834
2835
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
2836
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
2837
{
2838
reg = generateVectorBinaryToPackedConversion(node, TR::InstOpCode::VCVD, cg);
2839
}
2840
else
2841
{
2842
reg = generateBinaryToPackedConversion(node, TR::InstOpCode::CVD, cg);
2843
}
2844
2845
cg->traceBCDExit("i2pd",node);
2846
return reg;
2847
}
2848
2849
TR::Register *
2850
J9::Z::TreeEvaluator::l2pdEvaluator(TR::Node * node, TR::CodeGenerator * cg)
2851
{
2852
cg->traceBCDEntry("l2pd",node);
2853
cg->generateDebugCounter("PD-Op/l2pd", 1, TR::DebugCounter::Cheap);
2854
TR::Register * reg = NULL;
2855
2856
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
2857
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
2858
{
2859
reg = generateVectorBinaryToPackedConversion(node, TR::InstOpCode::VCVDG, cg);
2860
}
2861
else
2862
{
2863
reg = generateBinaryToPackedConversion(node, TR::InstOpCode::CVDG, cg);
2864
}
2865
2866
cg->traceBCDExit("l2pd",node);
2867
return reg;
2868
}
2869
2870
/**
2871
* \brief This evaluator helper function evaluates i2pd and l2pd conversion nodes
2872
* using CVD or CVDG instructions.
2873
*
2874
*/
2875
TR::Register *
2876
J9::Z::TreeEvaluator::generateBinaryToPackedConversion(TR::Node * node,
2877
TR::InstOpCode::Mnemonic op,
2878
TR::CodeGenerator * cg)
2879
{
2880
TR_ASSERT( op == TR::InstOpCode::CVD || op == TR::InstOpCode::CVDG,
2881
"unexpected opcode in generateBinaryToPackedConversion\n");
2882
2883
TR_PseudoRegister *targetReg = cg->allocatePseudoRegister(node->getDataType());
2884
TR::Compilation *comp = cg->comp();
2885
bool isI2PD = op == TR::InstOpCode::CVD;
2886
TR_StorageReference *hint = node->getStorageReferenceHint();
2887
int32_t cvdSize = isI2PD ? cg->getIntegerToPackedFixedSize() : cg->getLongToPackedFixedSize();
2888
TR_StorageReference *targetStorageReference = hint ? hint : TR_StorageReference::createTemporaryBasedStorageReference(cvdSize, comp);
2889
targetReg->setStorageReference(targetStorageReference, node);
2890
2891
TR::Node *firstChild = node->getFirstChild();
2892
TR::Register *firstReg = cg->evaluate(firstChild);
2893
TR::MemoryReference *targetMR = generateS390LeftAlignedMemoryReference(node,
2894
targetStorageReference,
2895
cg,
2896
cvdSize,
2897
false); // enforceSSLimits=false for CVD
2898
2899
generateRXInstruction(cg, op, node, firstReg, targetMR);
2900
2901
targetReg->setIsInitialized();
2902
2903
cg->stopUsingRegister(firstReg);
2904
cg->decReferenceCount(firstChild);
2905
node->setRegister(targetReg);
2906
return targetReg;
2907
}
2908
2909
2910
TR::Register *
2911
J9::Z::TreeEvaluator::pdnegEvaluator(TR::Node * node, TR::CodeGenerator * cg)
2912
{
2913
cg->traceBCDEntry("pdneg",node);
2914
cg->generateDebugCounter("PD-Op/pdneg", 1, TR::DebugCounter::Cheap);
2915
2916
TR_ASSERT(node->getNumChildren() == 1, "pdneg should only have 1 child");
2917
2918
TR::Node *srcNode = node->getFirstChild();
2919
TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);
2920
TR::Compilation *comp = cg->comp();
2921
2922
TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcReg->getStorageReference(), cg);
2923
2924
// also do for assumed (PFD) preferred and clean signs?
2925
int32_t srcSign = srcReg->hasKnownOrAssumedSignCode() ? srcReg->getKnownOrAssumedSignCode() : TR::DataType::getInvalidSignCode();
2926
bool useRegBasedSequence = srcReg->hasKnownValidSign();
2927
bool isSrcSign0xF = srcSign == 0xf;
2928
bool isSimpleSignFlip = srcSign == TR::DataType::getPreferredPlusCode() ||
2929
srcSign == TR::DataType::getPreferredMinusCode() ||
2930
srcReg->hasKnownOrAssumedPreferredSign() ||
2931
srcReg->hasKnownOrAssumedCleanSign();
2932
bool isSimpleSignSet = isSrcSign0xF || isSimpleSignFlip;
2933
bool needsFullInitialization = !useRegBasedSequence || isSimpleSignSet;
2934
bool isTruncation = node->getDecimalPrecision() < srcReg->getDecimalPrecision();
2935
bool isWiden = node->getDecimalPrecision() > srcReg->getDecimalPrecision();
2936
2937
if (cg->traceBCDCodeGen())
2938
traceMsg(comp,"\tpdnegEvaluator: isTruncation=%s, isWiden=%s, srcSign = 0x%x, srcSignIsValid=%s, isSimpleSignSet=%s, useRegBasedSequence=%s, needsFullInitialization=%s (== !useRegBasedSequence || isSimpleSignSet)\n",
2939
isTruncation ? "yes":"no",
2940
isWiden ? "yes":"no",
2941
srcSign,
2942
srcReg->hasKnownValidSign() ? "yes":"no",
2943
isSimpleSignSet ? "yes":"no",
2944
useRegBasedSequence?"yes":"no",
2945
needsFullInitialization? "yes":"no");
2946
2947
2948
TR_PseudoRegister *targetReg = evaluateBCDSignModifyingOperand(node,
2949
false, // isEffectiveNop=false
2950
false, // isNondestructiveNop=false
2951
needsFullInitialization,
2952
sourceMR,
2953
cg);
2954
targetReg->setDecimalPrecision(std::min<int32_t>(node->getDecimalPrecision(), srcReg->getDecimalPrecision()));
2955
2956
TR::MemoryReference *destMR = generateS390LeftAlignedMemoryReference(node, targetReg->getStorageReference(), cg, targetReg->getSize());
2957
2958
if (srcReg->hasKnownValidData())
2959
targetReg->setHasKnownValidData();
2960
2961
if (!needsFullInitialization && !targetReg->isInitialized() && targetReg->getSize() > 1)
2962
{
2963
int32_t mvcSize = targetReg->getSize() - 1; // do not include the least significant byte as this is done as part of the sign setting below
2964
if (cg->traceBCDCodeGen())
2965
traceMsg(comp,"\ttargetReg is not init and size %d > 1 so gen MVC with size targetRegSize-1 = %d and leftMostByte %d\n",
2966
targetReg->getSize(),mvcSize,targetReg->getSize());
2967
generateSS1Instruction(cg, TR::InstOpCode::MVC, node,
2968
mvcSize-1,
2969
reuseS390LeftAlignedMemoryReference(destMR, node, targetReg->getStorageReference(), cg, targetReg->getSize()),
2970
reuseS390LeftAlignedMemoryReference(sourceMR, srcNode, srcReg->getStorageReference(), cg, targetReg->getSize()));
2971
}
2972
2973
bool isSignManipulation = false;
2974
if (isSrcSign0xF)
2975
{
2976
cg->genSignCodeSetting(node, targetReg, targetReg->getSize(), destMR, TR::DataType::getPreferredMinusCode(), srcReg, 0, false); // digitsToClear=0, numericNibbleIsZero=false
2977
if (targetReg->getDataType() == TR::PackedDecimal && targetReg->isEvenPrecision())
2978
cg->genZeroLeftMostDigitsIfNeeded(node, targetReg, targetReg->getSize(), 1, destMR);
2979
}
2980
else if (isSimpleSignFlip)
2981
{
2982
isSignManipulation = true;
2983
if (cg->traceBCDCodeGen())
2984
traceMsg(comp,"\tsrcReg has known preferred (%s) or known clean (%s) sign so gen XI 0x1 of sign byte to flip it\n",
2985
srcReg->hasKnownPreferredSign()?"yes":"no",srcReg->hasKnownCleanSign()?"yes":"no");
2986
generateSIInstruction(cg, TR::InstOpCode::XI, node, reuseS390LeftAlignedMemoryReference(destMR, node, targetReg->getStorageReference(), cg, 1), 0x01);
2987
if (targetReg->getDataType() == TR::PackedDecimal && targetReg->isEvenPrecision())
2988
cg->genZeroLeftMostDigitsIfNeeded(node, targetReg, targetReg->getSize(), 1, destMR);
2989
2990
}
2991
else if (useRegBasedSequence)
2992
{
2993
isSignManipulation = true;
2994
2995
if (cg->traceBCDCodeGen())
2996
traceMsg(comp,"\ttargetReg has unknown but valid sign so generate register based decode sequence\n");
2997
2998
TR::Register *tempSign = cg->allocateRegister();
2999
TR::Register *targetSign = cg->allocateRegister();
3000
TR::Register *targetData = cg->allocateRegister();
3001
3002
generateRXInstruction(cg, TR::InstOpCode::LB, node, tempSign, reuseS390LeftAlignedMemoryReference(sourceMR, srcNode, srcReg->getStorageReference(), cg, 1));
3003
3004
generateRRInstruction(cg, TR::InstOpCode::LR, node, targetSign, tempSign);
3005
generateRRInstruction(cg, TR::InstOpCode::LR, node, targetData, tempSign);
3006
3007
generateRIInstruction(cg, TR::InstOpCode::AHI, node, tempSign, 1);
3008
generateRIInstruction(cg, TR::InstOpCode::NILL, node, targetData, 0xF0);
3009
3010
if (targetReg->getDataType() == TR::PackedDecimal && targetReg->isEvenPrecision())
3011
cg->genZeroLeftMostDigitsIfNeeded(node, targetReg, targetReg->getSize(), 1, destMR);
3012
3013
if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_ZEC12))
3014
generateRIEInstruction(cg, TR::InstOpCode::RISBGN, node, targetData, tempSign, 63, 63, 64-3);
3015
else
3016
generateRIEInstruction(cg, TR::InstOpCode::RISBG, node, targetData, tempSign, 63, 63, 64-3);
3017
3018
generateRRInstruction(cg, comp->target().is64Bit() ? TR::InstOpCode::NGR : TR::InstOpCode::NR, node, targetSign, targetData);
3019
generateRILInstruction(cg, TR::InstOpCode::XILF, node, targetSign, 13);
3020
3021
generateRXInstruction(cg, TR::InstOpCode::STC, node, targetSign, reuseS390LeftAlignedMemoryReference(destMR, node, targetReg->getStorageReference(), cg, 1));
3022
3023
cg->stopUsingRegister(tempSign);
3024
cg->stopUsingRegister(targetSign);
3025
cg->stopUsingRegister(targetData);
3026
}
3027
else
3028
{
3029
// This path used to contain a call to an API which would have returned a garbage result. Rather than 100% of the
3030
// time generating an invalid sequence here which is guaranteed to crash if executed, we fail the compilation.
3031
cg->comp()->failCompilation<TR::CompilationException>("Existing code relied on an unimplemented API and is thus not safe. See eclipse/omr#5937.");
3032
}
3033
3034
if (isSignManipulation)
3035
{
3036
if (srcReg->hasKnownPreferredSign())
3037
targetReg->setHasKnownPreferredSign();
3038
else if (srcReg->hasAssumedPreferredSign())
3039
targetReg->setHasAssumedPreferredSign();
3040
else
3041
targetReg->setSignStateInitialized();
3042
3043
if (srcReg->hasKnownValidSign())
3044
targetReg->setHasKnownValidSign();
3045
}
3046
3047
targetReg->transferDataState(srcReg);
3048
targetReg->setIsInitialized();
3049
3050
node->setRegister(targetReg);
3051
cg->decReferenceCount(srcNode);
3052
cg->traceBCDExit("pdneg",node);
3053
return targetReg;
3054
}
3055
3056
TR_PseudoRegister *
3057
J9::Z::TreeEvaluator::evaluateBCDValueModifyingOperand(TR::Node * node,
3058
bool initTarget,
3059
TR::MemoryReference *sourceMR,
3060
TR::CodeGenerator * cg,
3061
bool trackSignState,
3062
int32_t sourceSize,
3063
bool alwaysLegalToCleanSign) // alwaysLegalToCleanSign=true then a ZAP can be used to init/widen if another signMod inst is coming (e.g. AP)
3064
{
3065
TR_ASSERT(node->getType().isBCD(),"node %p type %s must be BCD\n",node,node->getDataType().toString());
3066
TR_OpaquePseudoRegister *reg = evaluateValueModifyingOperand(node, initTarget, sourceMR, cg, trackSignState, sourceSize, alwaysLegalToCleanSign);
3067
TR_PseudoRegister *pseudoReg = reg->getPseudoRegister();
3068
TR_ASSERT(pseudoReg,"pseudoReg should be non-NULL for node %p\n",node);
3069
return pseudoReg;
3070
}
3071
3072
3073
TR_OpaquePseudoRegister *
3074
J9::Z::TreeEvaluator::evaluateValueModifyingOperand(TR::Node * node,
3075
bool initTarget,
3076
TR::MemoryReference *sourceMR,
3077
TR::CodeGenerator * cg,
3078
bool trackSignState,
3079
int32_t sourceSize,
3080
bool alwaysLegalToCleanSign) // alwaysLegalToCleanSign=true then a ZAP can be used to init/widen if another signMod inst is coming (e.g. AP)
3081
{
3082
bool isBCD = node->getType().isBCD();
3083
bool isAggr = node->getType().isAggregate();
3084
TR_ASSERT(isBCD || isAggr,"node %p type %s must be BCD or aggregate\n",node,node->getDataType().toString());
3085
3086
TR_OpaquePseudoRegister *targetReg = isBCD ? cg->allocatePseudoRegister(node->getDataType()) : cg->allocateOpaquePseudoRegister(node->getDataType());
3087
TR_PseudoRegister *targetBCDReg = targetReg->getPseudoRegister();
3088
3089
TR::Node *firstChild = node->getFirstChild();
3090
TR_OpaquePseudoRegister *firstReg = cg->evaluateOPRNode(firstChild);
3091
TR_PseudoRegister *firstBCDReg = firstReg->getPseudoRegister();
3092
TR_StorageReference *firstStorageReference = firstReg->getStorageReference();
3093
TR::Compilation *comp = cg->comp();
3094
3095
bool isInitialized = firstReg->isInitialized();
3096
if (cg->traceBCDCodeGen())
3097
traceMsg(comp,"\tevaluateValueModifyingOperand for %s (%p) with targetReg %s and firstReg %s (#%d isInit %s), sourceSize=%d : initTarget=%s, alwaysLegalToCleanSign=%s\n",
3098
node->getOpCode().getName(),node,cg->getDebug()->getName(targetReg),cg->getDebug()->getName(firstReg),
3099
firstStorageReference->getReferenceNumber(),isInitialized ? "yes":"no",sourceSize,initTarget ? "yes":"no",alwaysLegalToCleanSign ? "yes":"no");
3100
3101
if (sourceSize == 0)
3102
sourceSize = firstReg->getSize();
3103
3104
bool useZAP = false;
3105
3106
// to avoid a clobber evaluate in the isInitialized case favour initializing to an available store hint and leave the isInitialized child untouched
3107
// also force to a new hint even if refCount==1 if there is ZAP widening to be done (and save a later clear)
3108
bool useNewStoreHint = !comp->getOption(TR_DisableNewStoreHint) &&
3109
node->getOpCode().canHaveStorageReferenceHint() &&
3110
initTarget && // have to also be initializing here otherwise in caller
3111
node->getStorageReferenceHint() &&
3112
node->getStorageReferenceHint()->isNodeBasedHint() &&
3113
(firstChild->getReferenceCount() > 1 || node->getStorageReferenceSize() > sourceSize) &&
3114
node->getStorageReferenceHint() != firstStorageReference;
3115
3116
if (useNewStoreHint && node->getStorageReferenceHint()->getSymbolSize() < node->getStorageReferenceSize())
3117
{
3118
useNewStoreHint = false;
3119
TR_ASSERT(false,"a storageRef hint should be big enough for the node result (%d is not >= %d)\n",
3120
node->getStorageReferenceHint()->getSymbolSize(),node->getStorageReferenceSize());
3121
}
3122
3123
if (isInitialized && !useNewStoreHint)
3124
{
3125
// Save the storage reference dependent state leftAlignedZeroDigits, rightAlignedDeadBytes and the derived liveSymbolSize before
3126
// the possible call to ssrClobberEvaluate below.
3127
// If a clobber evaluate is done then the above mentioned state will be reset on firstReg (so subsequent commoned uses of firstReg that now
3128
// use the newly created temporary storage reference are correct). Cache the values here as this state *will* persist up this tree on the targetReg.
3129
int32_t savedLiveSymbolSize = firstReg->getLiveSymbolSize();
3130
int32_t savedLeftAlignedZeroDigits = firstReg->getLeftAlignedZeroDigits();
3131
int32_t savedRightAlignedDeadBytes = firstReg->getRightAlignedDeadBytes();
3132
int32_t savedRightAlignedIgnoredBytes = firstReg->getRightAlignedIgnoredBytes();
3133
bool skipClobberEvaluate = false;
3134
if (node->getOpCode().isBasicOrSpecialPackedArithmetic())
3135
{
3136
// The special case of mul/add/sub/div = op1*op1 does not need a clobber evaluate as there are no uses beyond the current node's operation
3137
if (node->getNumChildren() > 1 &&
3138
node->getFirstChild() == node->getSecondChild() &&
3139
node->getFirstChild()->getReferenceCount() == 2 &&
3140
firstStorageReference->getOwningRegisterCount() == 1)
3141
{
3142
skipClobberEvaluate = true;
3143
}
3144
}
3145
if (!skipClobberEvaluate)
3146
cg->ssrClobberEvaluate(firstChild, sourceMR);
3147
int32_t resultSize = node->getStorageReferenceSize();
3148
if (cg->traceBCDCodeGen())
3149
traceMsg(comp,"\tisInitialized==true: liveSymSize %d (symSize %d - firstReg->deadAndIgnoredBytes %d), resultSize = %d (nodeSize %d)\n",
3150
savedLiveSymbolSize,firstStorageReference->getSymbolSize(),firstReg->getRightAlignedDeadAndIgnoredBytes(),resultSize,node->getSize());
3151
if (savedLiveSymbolSize < resultSize)
3152
{
3153
// In this case the source memory slot has been initialized but it is no longer larger enough to contain the result for the current node.
3154
// Therefore either the size of the symbol must be increased (for autos) or a new larger, memory slot must be created and initialized (for non-autos)
3155
if (firstStorageReference->isTemporaryBased())
3156
{
3157
if (cg->traceBCDCodeGen())
3158
{
3159
traceMsg(comp,"\treg->getLiveSymbolSize() < resultSize (%d < %d) so call increaseTemporarySymbolSize\n",savedLiveSymbolSize,resultSize);
3160
traceMsg(comp,"\t\t * setting rightAlignedDeadBytes %d from firstReg %s to targetReg %s (valueMod incSize)\n",
3161
savedRightAlignedDeadBytes,cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));
3162
traceMsg(comp,"\t\t * setting rightAlignedIgnoredBytes %d from firstReg %s to targetReg %s (valueMod incSize)\n",
3163
savedRightAlignedIgnoredBytes,cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));
3164
}
3165
targetReg->setStorageReference(firstStorageReference, node);
3166
targetReg->increaseTemporarySymbolSize(resultSize - savedLiveSymbolSize);
3167
targetReg->setRightAlignedDeadBytes(savedRightAlignedDeadBytes);
3168
targetReg->setRightAlignedIgnoredBytes(savedRightAlignedIgnoredBytes);
3169
}
3170
else
3171
{
3172
if (cg->traceBCDCodeGen())
3173
traceMsg(comp,"\t\tfirstStorageReference is not temporary based and liveSymSize < resultSize (%d < %d) so alloc and init a new temp slot and clear left most bytes\n",
3174
savedLiveSymbolSize,resultSize);
3175
int32_t destLength = resultSize;
3176
int32_t srcLength = sourceSize;
3177
// If the firstStorageReference is not a temp or a hint then the recursive dec in setStorageReference() will be wrong.
3178
// This should always be true because this is the initialized case and it is not legal to initialize a non-temp or non-hint.
3179
TR_ASSERT( firstStorageReference->isNodeBasedHint(), "expecting the srcStorargeReference to be a node based hint\n");
3180
bool performExplicitWidening = false;
3181
cg->initializeNewTemporaryStorageReference(node, targetReg, destLength, firstChild, firstReg, srcLength, sourceMR, performExplicitWidening, alwaysLegalToCleanSign, trackSignState);
3182
if (targetBCDReg)
3183
{
3184
TR_ASSERT(firstBCDReg,"firstBCDReg should be non-NULL when targetBCDReg is non-NULL for node %p\n",firstChild);
3185
if (performExplicitWidening)
3186
targetBCDReg->setDecimalPrecision(node->getDecimalPrecision());
3187
else
3188
targetBCDReg->setDecimalPrecision(firstBCDReg->getDecimalPrecision());
3189
}
3190
else
3191
{
3192
if (performExplicitWidening)
3193
targetReg->setSize(node->getSize());
3194
else
3195
targetReg->setSize(firstReg->getSize());
3196
}
3197
}
3198
}
3199
else
3200
{
3201
if (cg->traceBCDCodeGen())
3202
{
3203
traceMsg(comp,"\tliveSymSize >= resultSize (%d >= %d) so can reuse the firstStorageReference #%d for the targetStorageReference\n",
3204
savedLiveSymbolSize,resultSize,firstStorageReference->getReferenceNumber());
3205
traceMsg(comp,"\t\t * setting rightAlignedDeadBytes %d from firstReg %s to targetReg %s (valueMod reuse)\n",
3206
savedRightAlignedDeadBytes,cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));
3207
traceMsg(comp,"\t\t * setting rightAlignedIgnoredBytes %d from firstReg %s to targetReg %s (valueMod reuse)\n",
3208
savedRightAlignedIgnoredBytes,cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));
3209
traceMsg(comp,"\t\t * setting savedLeftAlignedZeroDigits %d from firstReg %s to targetReg %s (valueMod reuse)\n",
3210
savedLeftAlignedZeroDigits,cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));
3211
}
3212
targetReg->setStorageReference(firstStorageReference, node);
3213
targetReg->setLeftAlignedZeroDigits(savedLeftAlignedZeroDigits);
3214
targetReg->setRightAlignedDeadBytes(savedRightAlignedDeadBytes);
3215
targetReg->setRightAlignedIgnoredBytes(savedRightAlignedIgnoredBytes);
3216
}
3217
targetReg->setIsInitialized();
3218
cg->freeUnusedTemporaryBasedHint(node);
3219
}
3220
else
3221
{
3222
// when initializing the hint storage reference use the symbol size and not the current node size so the same storage reference may be used
3223
// without further zero initialization for larger node sizes
3224
TR_StorageReference *targetStorageReference = NULL;
3225
int32_t destLength = 0;
3226
if (node->getOpCode().canHaveStorageReferenceHint() && node->getStorageReferenceHint())
3227
{
3228
int32_t resultSize = node->getStorageReferenceSize();
3229
targetStorageReference = node->getStorageReferenceHint();
3230
if (cg->traceBCDCodeGen())
3231
traceMsg(comp,"\tusing storageRefHint #%d on node %p (useNewStoreHintOnInit=%d)\n",targetStorageReference->getReferenceNumber(),node,useNewStoreHint && isInitialized);
3232
if (targetStorageReference->isTemporaryBased())
3233
{
3234
// Consider this scenario (common when a sub-expression is rooted in a load of a large value returned from a runtime routine)
3235
//
3236
// store
3237
// x <- size < 10
3238
// y <- current node size=10
3239
// z <- size > 10 and a passThrough operation
3240
// load <- size > 10
3241
//
3242
// The temporary hint is the size of z but if performExplicitWidening is also set to true below then code will be generated to initialize up
3243
// to the size of z even though this extra initialized space will be unused for the rest of the operation.
3244
// Nodes (x,y,z) that share the same hint are tracked and removed when the node is evaluated. At the current node's (y) initialization point
3245
// only x,y will be in this list and only up to size=10 will be initialized.
3246
destLength = targetStorageReference->getMaxSharedNodeSize();
3247
}
3248
}
3249
else
3250
{
3251
targetStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(node->getStorageReferenceSize(), comp);
3252
if (cg->traceBCDCodeGen())
3253
traceMsg(comp,"\tcreated new targetStorageReference #%d on node %p\n",targetStorageReference->getReferenceNumber(),node);
3254
}
3255
3256
if (destLength > 0)
3257
{
3258
// update the symSize so in the initTarget=false case a consumer will not do a needlessly large initialization
3259
targetStorageReference->getTemporarySymbol()->setActiveSize(destLength);
3260
if (cg->traceBCDCodeGen())
3261
traceMsg(comp,"\tsetting destLength and activeSize for initialization based on the smallest remaining node left on the temp based hint #%d : %d\n",
3262
targetStorageReference->getReferenceNumber(),destLength);
3263
}
3264
else if (destLength == 0)
3265
{
3266
destLength = targetStorageReference->getSymbolSize();
3267
if (cg->traceBCDCodeGen())
3268
traceMsg(comp,"\tsetting destLength for initialization based on the current storageRef #%d size : %d\n",targetStorageReference->getReferenceNumber(),destLength);
3269
}
3270
else
3271
{
3272
TR_ASSERT(false,"unexpected negative destLength of %d for node %p\n",destLength,node);
3273
}
3274
3275
targetReg->setStorageReference(targetStorageReference, node);
3276
if (initTarget)
3277
{
3278
int32_t srcLength = sourceSize;
3279
TR::MemoryReference *destMR = isBCD ?
3280
generateS390RightAlignedMemoryReference(node, targetStorageReference, cg) :
3281
generateS390MemRefFromStorageRef(node, targetStorageReference, cg);
3282
// for packed to packed operations this is likely the start of some (possibly large) computation so *do* perform the explicit widening all at once at
3283
// the start so later operations do not have to clear.
3284
bool performExplicitWidening = targetReg->getDataType() == TR::PackedDecimal && firstReg->getDataType() == TR::PackedDecimal;
3285
3286
int32_t zeroDigits = firstReg->getLeftAlignedZeroDigits();
3287
if (isBCD &&
3288
zeroDigits > 0 &&
3289
zeroDigits > targetReg->getLeftAlignedZeroDigits() &&
3290
firstReg->getLiveSymbolSize() == targetReg->getLiveSymbolSize() &&
3291
cg->storageReferencesMatch(targetStorageReference, firstStorageReference))
3292
{
3293
if (cg->traceBCDCodeGen())
3294
traceMsg(comp,"\ty^y : transfer leftAlignedZeroDigits %d from firstReg %s to targetReg %s (node %s %p)\n",
3295
zeroDigits,cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg),node->getOpCode().getName(),node);
3296
targetReg->setLeftAlignedZeroDigits(zeroDigits);
3297
}
3298
3299
cg->initializeStorageReference(node, targetReg, destMR, destLength, firstChild, firstReg, sourceMR, srcLength, performExplicitWidening, alwaysLegalToCleanSign, trackSignState);
3300
if (targetBCDReg)
3301
{
3302
TR_ASSERT(firstBCDReg,"firstBCDReg should be non-NULL when targetBCDReg is non-NULL for node %p\n",firstChild);
3303
if (performExplicitWidening)
3304
targetBCDReg->setDecimalPrecision(node->getDecimalPrecision());
3305
else
3306
targetBCDReg->setDecimalPrecision(firstBCDReg->getDecimalPrecision());
3307
targetBCDReg->transferDataState(firstBCDReg);
3308
}
3309
else
3310
{
3311
if (performExplicitWidening)
3312
targetReg->setSize(node->getSize());
3313
else
3314
targetReg->setSize(firstReg->getSize());
3315
}
3316
targetReg->setIsInitialized();
3317
}
3318
}
3319
if (cg->traceBCDCodeGen() && targetReg->getStorageReference()->isReadOnlyTemporary())
3320
traceMsg(comp,"reset readOnlyTemp flag on storageRef #%d (%s) (valueMod case)\n",
3321
targetReg->getStorageReference()->getReferenceNumber(),cg->getDebug()->getName(targetReg->getStorageReference()->getSymbol()));
3322
targetReg->getStorageReference()->setIsReadOnlyTemporary(false, NULL);
3323
node->setRegister(targetReg);
3324
return targetReg;
3325
}
3326
3327
/**
3328
* Handles all BCD and aggregate load and const types direct and indirect
3329
*
3330
* pdload
3331
* pdloadi
3332
*
3333
* zdload
3334
* zdloadi
3335
*
3336
* zdsleLoad
3337
* zdsleLoadi
3338
*
3339
* zdslsLoad
3340
* zdslsLoadi
3341
*
3342
* zdstsLoad
3343
* zdstsLoadi
3344
*
3345
* udLoad
3346
* udLoadi
3347
*
3348
* udstLoad
3349
* udstLoadi
3350
*
3351
* udslLoad
3352
* udslLoadi
3353
*/
3354
TR::Register *J9::Z::TreeEvaluator::pdloadEvaluator(TR::Node *node, TR::CodeGenerator *cg)
3355
{
3356
cg->traceBCDEntry("pdload",node);
3357
TR::Register* reg = NULL;
3358
3359
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),
3360
1, TR::DebugCounter::Cheap);
3361
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
3362
if((cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv) &&
3363
(node->getOpCodeValue() == TR::pdload || node->getOpCodeValue() == TR::pdloadi))
3364
{
3365
reg = pdloadVectorEvaluatorHelper(node, cg);
3366
}
3367
else
3368
{
3369
reg = pdloadEvaluatorHelper(node, cg);
3370
}
3371
3372
cg->traceBCDExit("pdload",node);
3373
return reg;
3374
}
3375
3376
3377
TR::Register *J9::Z::TreeEvaluator::pdloadEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)
3378
{
3379
TR::Compilation *comp = cg->comp();
3380
3381
bool isBCD = node->getType().isBCD();
3382
3383
TR_ASSERT(node->getOpCode().isLoadConst() ||
3384
(node->getOpCode().hasSymbolReference() && node->getSymbolReference() && !node->getSymbolReference()->isTempVariableSizeSymRef()),
3385
"load node %p must not be of a tempVariableSizeSymRef\n",node);
3386
3387
TR_StorageReference *storageRef = TR_StorageReference::createNodeBasedStorageReference(node, node->getReferenceCount(), comp);
3388
3389
TR_ASSERT(!node->getOpCode().isLoadConst() || node->getNumChildren() == 1,"BCD constant type (%s) should have 1 child and not %d children\n",
3390
node->getDataType().toString(),node->getNumChildren());
3391
bool isConstant = node->getOpCode().isLoadConst();
3392
bool isReadOnlyConstant = false;
3393
3394
TR_OpaquePseudoRegister *targetReg = NULL;
3395
if (isBCD)
3396
{
3397
targetReg = cg->allocatePseudoRegister(node->getDataType());
3398
TR_PseudoRegister *targetPseudoReg = targetReg->getPseudoRegister();
3399
TR_ASSERT(targetPseudoReg,"targetPseudoReg should be non-NULL for node %p\n",node);
3400
targetPseudoReg->setStorageReference(storageRef, node);
3401
if (isConstant)
3402
{
3403
if (cg->traceBCDCodeGen())
3404
traceMsg(comp,"\t%s (%p) is a constant load so set hasKnownValidSignAndData = true%s\n",
3405
node->getOpCode().getName(),node,isReadOnlyConstant?" and skip privatizeStorageReference":"");
3406
targetPseudoReg->setHasKnownValidSignAndData();
3407
}
3408
3409
if (node->hasKnownOrAssumedSignCode())
3410
{
3411
switch (node->getKnownOrAssumedSignCode())
3412
{
3413
case raw_bcd_sign_0xc:
3414
node->hasKnownSignCode() ? targetPseudoReg->setKnownSignCode(0xc) : targetPseudoReg->setAssumedSignCode(0xc);
3415
break;
3416
case raw_bcd_sign_0xd:
3417
node->hasKnownSignCode() ? targetPseudoReg->setKnownSignCode(0xd) : targetPseudoReg->setAssumedSignCode(0xd);
3418
break;
3419
case raw_bcd_sign_0xf:
3420
if (node->hasKnownOrAssumedCleanSign())
3421
{
3422
// Something has gone wrong and we've ended up with conflicting sign code properties on the node
3423
// This is a bug and should be fixed but in a prod build conservatively reset the clean sign flag and
3424
// do transfer the sign to the targetPseudoReg
3425
TR_ASSERT(false,"conflicting sign code: sign code 0xf is not clean\n");
3426
node->setHasKnownAndAssumedCleanSign(false);
3427
}
3428
else
3429
{
3430
node->hasKnownSignCode() ? targetPseudoReg->setKnownSignCode(0xf) : targetPseudoReg->setAssumedSignCode(0xf);
3431
}
3432
break;
3433
case raw_bcd_sign_unknown:
3434
break;
3435
default: TR_ASSERT(false,"unexpected node->getKnownOrAssumedSignCode() of %d\n",node->getKnownOrAssumedSignCode());
3436
}
3437
}
3438
3439
if (!node->getOpCode().isSignlessBCDType() && node->hasKnownOrAssumedCleanSign())
3440
{
3441
uint32_t preferredPlusSign = TR::DataType::getPreferredPlusSignCode(node->getDataType());
3442
uint32_t preferredMinusSign = TR::DataType::getPreferredMinusSignCode(node->getDataType());
3443
if (node->isNonNegative()) // >= 0
3444
node->hasKnownCleanSign() ? targetPseudoReg->setKnownSignCode(preferredPlusSign) : targetPseudoReg->setAssumedSignCode(preferredPlusSign);
3445
else if (node->isNonZero() && node->isNonPositive()) // < 0
3446
node->hasKnownCleanSign() ? targetPseudoReg->setKnownSignCode(preferredMinusSign) : targetPseudoReg->setAssumedSignCode(preferredMinusSign);
3447
if (cg->traceBCDCodeGen() && targetPseudoReg->hasKnownOrAssumedSignCode())
3448
traceMsg(comp,"\ttargetPseudoReg has%sSignCode = true and it is 0x%x\n",targetPseudoReg->hasAssumedSignCode()?"Assumed":"Known",targetPseudoReg->getKnownOrAssumedSignCode());
3449
// call setHasCleanSign() after the set*SignCode() calls so the TR::DataType::getPreferredMinusCode() does not unset
3450
// the clean flag (as it must conservatively do to account for the unclean case of -0)
3451
if (cg->traceBCDCodeGen())
3452
traceMsg(comp,"\tsetting Has%sCleanSign (due to node flag) on targetPseudoReg %s on %s (%p)\n",
3453
node->hasKnownCleanSign()?"Known":"Assumed",cg->getDebug()->getName(targetPseudoReg),node->getOpCode().getName(),node);
3454
node->hasKnownCleanSign() ? targetPseudoReg->setHasKnownCleanSign() : targetPseudoReg->setHasAssumedCleanSign();
3455
}
3456
3457
// set decimal precision here so any copy made in privatizeStorageReference is marked with the correct precision
3458
targetPseudoReg->setDecimalPrecision(node->getDecimalPrecision());
3459
3460
if (comp->fej9()->assumeLeftMostNibbleIsZero() && targetPseudoReg->isEvenPrecision() && TR::DataType::getDigitSize(node->getDataType()) == HalfByteDigit)
3461
targetPseudoReg->setLeftMostNibbleClear();
3462
3463
if (storageRef->isTemporaryBased())
3464
{
3465
TR_ASSERT(false,"storageRef for load node %p should not be temp based\n");
3466
if (cg->traceBCDCodeGen())
3467
traceMsg(comp,"\tstorageRef is tempBased so set targetReg %s to isInitialized=true\n",cg->getDebug()->getName(targetPseudoReg));
3468
targetPseudoReg->setIsInitialized();
3469
}
3470
3471
if (cg->traceBCDCodeGen())
3472
{
3473
traceMsg(comp,"\tsignState on targetReg %s for %s (%p) :\n",cg->getDebug()->getName(targetPseudoReg),node->getOpCode().getName(),node);
3474
traceMsg(comp,"\t\tknownCleanSign=%d, knownPrefSign=%d, knownSign=0x%x, assumedCleanSign=%d, assumedPrefSign=%d, assumedSign=0x%x (signStateKnown %d, signStateAssumed %d)\n",
3475
targetPseudoReg->hasKnownCleanSign(),targetPseudoReg->hasKnownPreferredSign(),targetPseudoReg->hasKnownSignCode()?targetPseudoReg->getKnownSignCode():0,
3476
targetPseudoReg->hasAssumedCleanSign(),targetPseudoReg->hasAssumedPreferredSign(),targetPseudoReg->hasAssumedSignCode()?targetPseudoReg->getAssumedSignCode():0,
3477
targetPseudoReg->signStateKnown(),
3478
targetPseudoReg->signStateAssumed());
3479
traceMsg(comp,"\t%s (%p) has hasSignStateOnLoad=%d\n",node->getOpCode().getName(),node,node->hasSignStateOnLoad());
3480
}
3481
3482
if (!node->hasSignStateOnLoad())
3483
{
3484
// even if a particular sign state is not known (i.e. clean,preferred, a particular value) knowing that a load does not have
3485
// any incoming sign state can help in generating better code (e.g. a ZAP can be used for widening as the side effect of cleaning
3486
// the sign will not matter vs using a ZAP to widen and illegally modifying a loaded value with an unsigned sign code 0xf->0xc)
3487
targetPseudoReg->setSignStateInitialized();
3488
if (cg->traceBCDCodeGen())
3489
traceMsg(comp,"\tsetting SignStateInitialized due to hasSignStateOnLoad=false flag on %s (%p)\n",node->getOpCode().getName(),node);
3490
}
3491
}
3492
else
3493
{
3494
targetReg = cg->allocateOpaquePseudoRegister(node->getDataType());
3495
targetReg->setStorageReference(storageRef, node);
3496
}
3497
node->setRegister(targetReg);
3498
if (comp->getOption(TR_ForceBCDInit) || !isReadOnlyConstant)
3499
cg->privatizeStorageReference(node, targetReg, NULL);
3500
return targetReg;
3501
}
3502
3503
/**
3504
* \brief This helper uses vector instructions to evaluate pdload and pdloadi.
3505
*
3506
* Other types of load (zd, ud, etc) can't use vector registers/instructions.
3507
*/
3508
TR::Register*
3509
J9::Z::TreeEvaluator::pdloadVectorEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)
3510
{
3511
TR_ASSERT(node->getOpCodeValue() == TR::pdload || node->getOpCodeValue() == TR::pdloadi, "vector instructions only support PD load.");
3512
traceMsg(cg->comp(), "pdload Vector Evaluator, node=%p %d\n", node, __LINE__);
3513
3514
TR::Register* vTargetReg = vTargetReg = cg->allocateRegister(TR_VRF);
3515
TR::Node* addressNode = node->getFirstChild();
3516
3517
// No need to evaluate the address node of the pdloadi.
3518
// generateVSIInstruction() API will call separateIndexRegister() to separate the index
3519
// register by emitting an LA instruction. If there's a need for large displacement adjustment,
3520
// LAY will be emitted instead.
3521
TR::MemoryReference* sourceMR = TR::MemoryReference::create(cg, node);
3522
3523
// Index of the first byte to load, counting from the right ranging from 0-15.
3524
uint8_t indexFromTheRight = TR_VECTOR_REGISTER_SIZE - 1;
3525
if (node->getDecimalPrecision() > TR_MAX_INPUT_PACKED_DECIMAL_PRECISION)
3526
{
3527
// we are loading as many digits as we can starting from the right most digit of the PD in memory
3528
// Need to calculate offset in order to load this way
3529
sourceMR->addToOffset(node->getSize() - TR_VECTOR_REGISTER_SIZE);
3530
}
3531
else
3532
{
3533
indexFromTheRight = node->getSize() - 1;
3534
}
3535
3536
TR_ASSERT(indexFromTheRight >= 0 && indexFromTheRight <= 15, "Load length too large for VLRL instruction");
3537
if(cg->traceBCDCodeGen())
3538
{
3539
traceMsg(cg->comp(),"\tGen VLRL for %s node->size=%d\n",
3540
node->getOpCode().getName(),
3541
node->getSize());
3542
}
3543
generateVSIInstruction(cg, TR::InstOpCode::VLRL, node, vTargetReg, sourceMR, indexFromTheRight);
3544
3545
node->setRegister(vTargetReg);
3546
cg->decReferenceCount(addressNode);
3547
return vTargetReg;
3548
}
3549
3550
/**
3551
* A ZAP with an overlapping dest (1st operand) and source (2nd operand) are allowed if the rightmost byte
3552
* of the 1st operand is coincident with or to the right of the rightmost byte of the second operand
3553
* Check for this special case here to allow it.
3554
*
3555
* pdstorei <mustClean> s=8 bytes
3556
* aiadd
3557
* aload
3558
* iconst 386
3559
* pdloadi s=5 bytes
3560
* aiadd
3561
* aload
3562
* iconst 388
3563
*
3564
* In this example the store is from 386->394 and the load from 388->393 so the rightmost byte (393->394) of the 1st operand (store) of the ZAP
3565
* is to the right of the rightmost byte of the 2nd operand (load) at 392->393
3566
*/
3567
bool
3568
isLegalOverlappingZAP(TR::Node *store, TR::CodeGenerator *cg)
3569
{
3570
TR::Compilation *comp = cg->comp();
3571
3572
if (cg->traceBCDCodeGen())
3573
traceMsg(comp,"\tisLegalOverlappingZAP check : store %s (%p), valueChild %s (%p)\n",
3574
store->getOpCode().getName(),store,store->getValueChild()->getOpCode().getName(),store->getValueChild());
3575
3576
if (!store->getOpCode().isStoreIndirect())
3577
return false;
3578
3579
TR::Node *load = store->getValueChild();
3580
if (!load->getOpCode().isLoadIndirect())
3581
return false;
3582
3583
if (load->getRegister())
3584
return false;
3585
3586
if (load->hasKnownOrAssumedCleanSign()) // won't need a ZAP anyway so don't bother going further
3587
return false;
3588
3589
TR::Node *storeAddr = store->getFirstChild();
3590
TR::Node *loadVarAddr = load->getFirstChild();
3591
3592
if (!cg->isSupportedAdd(storeAddr))
3593
return false;
3594
3595
if (!cg->isSupportedAdd(loadVarAddr))
3596
return false;
3597
3598
if (!cg->nodeMatches(storeAddr->getFirstChild(), loadVarAddr->getFirstChild()))
3599
return false;
3600
3601
if (!storeAddr->getSecondChild()->getOpCode().isIntegralConst())
3602
return false;
3603
3604
if (!loadVarAddr->getSecondChild()->getOpCode().isIntegralConst())
3605
return false;
3606
3607
int64_t storeSize = store->getSize();
3608
int64_t loadSize = load->getSize();
3609
3610
int64_t storeAddrOffset = storeAddr->getSecondChild()->get64bitIntegralValue() + store->getSymbolReference()->getOffset();
3611
int64_t loadAddrOffset = loadVarAddr->getSecondChild()->get64bitIntegralValue() + load->getSymbolReference()->getOffset();
3612
3613
int64_t storeStart = storeAddrOffset;
3614
int64_t storeEnd = storeStart + storeSize;
3615
3616
int64_t loadStart = loadAddrOffset;
3617
int64_t loadEnd = loadStart + loadSize;
3618
3619
if (cg->traceBCDCodeGen())
3620
{
3621
int64_t overlapStart = std::max(storeStart, loadStart);
3622
int64_t overlapEnd = std::min(storeEnd, loadEnd);
3623
traceMsg(comp,"\tstoreRange %lld->%lld vs loadRange %lld->%lld --> overlap range %lld -> %lld\n",
3624
storeStart,storeEnd,loadStart,loadEnd,overlapStart,overlapEnd);
3625
}
3626
3627
if (storeEnd >= loadEnd)
3628
{
3629
if (cg->traceBCDCodeGen())
3630
traceMsg(comp,"\t\tstoreEnd %lld >= loadEnd %lld : overlap ZAP is legal\n",storeEnd, loadEnd);
3631
return true;
3632
}
3633
else
3634
{
3635
if (cg->traceBCDCodeGen())
3636
traceMsg(comp,"\t\tstoreEnd %lld < loadEnd %lld : overlap ZAP is NOT legal\n",storeEnd, loadEnd);
3637
return false;
3638
}
3639
}
3640
3641
/**
3642
* This evaluator handles the following packed (pd) and unpacked (zd, ud)
3643
* direct/indirect store operations
3644
*
3645
* pdstore
3646
* pdstorei
3647
*
3648
* zdstore
3649
* zdstorei
3650
*
3651
* zdsleStore
3652
* zdsleStorei
3653
*
3654
* zdslsStore
3655
* zdslsStorei
3656
*
3657
* zdstsStore
3658
* zdstsStorei
3659
*
3660
* udStore
3661
* udStorei
3662
*
3663
* udstStore
3664
* udstStorei
3665
*
3666
* udslStore
3667
* udslStorei
3668
*/
3669
TR::Register*
3670
J9::Z::TreeEvaluator::pdstoreEvaluator(TR::Node *node, TR::CodeGenerator *cg)
3671
{
3672
cg->traceBCDEntry("pdstore",node);
3673
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),
3674
1, TR::DebugCounter::Cheap);
3675
3676
static bool disablePdstoreVectorEvaluator = (feGetEnv("TR_DisablePdstoreVectorEvaluator") != NULL);
3677
static bool disableZdstoreVectorEvaluator = (feGetEnv("TR_DisableZdstoreVectorEvaluator") != NULL);
3678
3679
if (!cg->comp()->getOption(TR_DisableVectorBCD) && !disablePdstoreVectorEvaluator
3680
&& cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL)
3681
&& (node->getOpCodeValue() == TR::pdstore || node->getOpCodeValue() == TR::pdstorei))
3682
{
3683
pdstoreVectorEvaluatorHelper(node, cg);
3684
}
3685
else if (!cg->comp()->getOption(TR_DisableVectorBCD) && !disableZdstoreVectorEvaluator
3686
&& cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY_2)
3687
&& node->getOpCodeValue() == TR::zdstorei
3688
&& node->getSecondChild()->getReferenceCount() == 1
3689
&& node->getSecondChild()->getRegister() == NULL
3690
&& (node->getSecondChild())->getOpCodeValue() == TR::pd2zd
3691
&& ((node->getSecondChild())->getFirstChild())->getOpCodeValue() == TR::pdloadi)
3692
{
3693
zdstoreiVectorEvaluatorHelper(node, cg);
3694
}
3695
else
3696
{
3697
pdstoreEvaluatorHelper(node, cg);
3698
}
3699
3700
cg->traceBCDExit("pdstore",node);
3701
return NULL;
3702
}
3703
3704
TR::Register* J9::Z::TreeEvaluator::pdstoreEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)
3705
{
3706
bool isBCD = node->getType().isBCD();
3707
bool isAggr = node->getType().isAggregate();
3708
3709
TR::Node * valueChild = node->getValueChild();
3710
bool isPacked = node->getType().isAnyPacked();
3711
bool isIndirect = node->getOpCode().isIndirect();
3712
TR::Compilation *comp = cg->comp();
3713
3714
bool evaluatedPaddingAnchor = false; // store nodes may contain an extra node giving an address of padding bytes (e.g. 0xF0F0..F0 for zoned)
3715
bool useZAP = isPacked && node->mustCleanSignInPDStoreEvaluator();
3716
3717
TR_ASSERT(isBCD || (node->getSize() == valueChild->getSize()),"nodeSize %d != srcSize %d for node %p\n",node->getSize(),valueChild->getSize(),node);
3718
3719
// If a temp copy may be needed for a child load or passthrough operations (such as a redundant pdclean) but the pdstore location
3720
// will live on (skipCopyOnStore=true) then force the use of the pdstore result location for the child value (and do not generate a temp copy)
3721
// Note: that size check below isn't quite the same as the isByteTruncation one below (when setting isLegalToChangeCommonedChildAddress)
3722
// as this first one uses valueChild nodeSize instead of the valueChild regSize.
3723
// However in cases where the flag will be checked then the valueReg will be uninitialized so the valueChild->getSize() will equal the valueReg->getSize().
3724
//
3725
// useStoreAsAnAccumulator check is needed below as it indicates no overlap between the store and any ancestor. If there is possible overlap then setting the skipCopyOnLoad
3726
// flag is incorrect as commoned references will use the updated value (updated by this store) instead of the correct value from the first reference point
3727
// pdstore "a1" // a1 and a2 overlap in some way
3728
// pdload "a2"
3729
//...
3730
// =>pdload "a2" // this commoned node needs the value at first reference and not the updated value after the pdstore to "a1"
3731
// // if skipCopyOnLoad is set then "a2" will be loaded again at the commoned point and get the wrong value.
3732
bool uninitializedSourceLocationMayBeKilled = false;
3733
bool mustUseZAP = false;
3734
bool overlapZAPIsAllowed = false;
3735
if (valueChild->getSize() <= node->getSize() &&
3736
!valueChild->skipCopyOnLoad() &&
3737
valueChild->getReferenceCount() > 1 &&
3738
node->skipCopyOnStore())
3739
{
3740
bool canForceSkipCopyOnLoad = false;
3741
if (node->useStoreAsAnAccumulator()) // see comment above
3742
{
3743
canForceSkipCopyOnLoad = true;
3744
if (cg->traceBCDCodeGen())
3745
traceMsg(comp,"\tsetting valueChild (%s) %p setSkipCopyOnLoad=true due to store with skipCopyOnStore=true (storeAccumCase)\n",valueChild->getOpCode().getName(),valueChild);
3746
}
3747
else if (useZAP && isLegalOverlappingZAP(node, cg))
3748
{
3749
canForceSkipCopyOnLoad = true;
3750
mustUseZAP = true; // the overlap check and forcing of skipCopyOnLoad is only valid if we do actually end up generating a ZAP (vs an MVC for example) so make sure this happens
3751
overlapZAPIsAllowed = true;
3752
if (cg->traceBCDCodeGen())
3753
traceMsg(comp,"\tsetting valueChild %s (%p) setSkipCopyOnLoad=true due to store with skipCopyOnStore=true (legalOverlappingZAPCase)\n",valueChild->getOpCode().getName(),valueChild);
3754
}
3755
if (canForceSkipCopyOnLoad)
3756
{
3757
valueChild->setSkipCopyOnLoad(true);
3758
uninitializedSourceLocationMayBeKilled = true;
3759
}
3760
}
3761
3762
if (useZAP && valueChild->getOpCode().isPackedLeftShift())
3763
{
3764
if (cg->traceBCDCodeGen())
3765
traceMsg(comp,"\tsetting valueChild %p cleanSignDuringPackedLeftShift=true due to store that needs a ZAP\n",valueChild);
3766
valueChild->setCleanSignDuringPackedLeftShift(true);
3767
}
3768
3769
TR_OpaquePseudoRegister *valueReg = cg->evaluateOPRNode(valueChild);
3770
3771
if (cg->traceBCDCodeGen())
3772
traceMsg(comp,"\t%s (%p) : isInMemoryCopyProp=%s\n",node->getOpCode().getName(),node,node->isInMemoryCopyProp()?"yes":"no");
3773
// NOTE: if a temp copy is generated below then valueStorageReference and valueReg are reset to point to the temp copies
3774
TR_StorageReference *valueStorageReference = valueReg->getStorageReference();
3775
TR::MemoryReference *sourceMR = NULL;
3776
TR_StorageReference *tempStorageReference = NULL;
3777
bool nodeAndValueRegSizeMatch = node->getSize() == valueReg->getSize();
3778
bool allSizesMatch = false;
3779
if (valueStorageReference->isNonConstantNodeBased())
3780
{
3781
allSizesMatch = nodeAndValueRegSizeMatch &&
3782
valueReg->getSize() == valueStorageReference->getNode()->getSize();
3783
}
3784
else
3785
{
3786
allSizesMatch = nodeAndValueRegSizeMatch;
3787
}
3788
3789
if (valueStorageReference->isNonConstantNodeBased() &&
3790
comp->getOption(TR_PrivatizeOverlaps) &&
3791
!overlapZAPIsAllowed &&
3792
!(node->useStoreAsAnAccumulator() || valueReg->isInitialized()))
3793
{
3794
// In addition to when the isInMemoryCopyProp flag is set on the store there are two other cases when an temp copy is needed for overlap
3795
// 1) isUsingStorageRefFromAnotherStore : even with CSE commoning (so not subject to isInMemoryCopyProp flag as the IL itself is safe)
3796
// can result in an overlap if 'b' is updated to point to 'c' storageRef and 'd' overlap
3797
// This is a lazy fixup -- could also pro-actively not set skipCopyOnStore for 'c' in the first place if the stores for any of the commoned 'b' nodes
3798
// are in memory types (BCD/Aggr) that also overlap with 'c' (e.g. 'd' in this case)
3799
//
3800
// c
3801
// b
3802
//
3803
// d
3804
// =>b (was just 'b' before CSE) but could be 'c' after 'c' is evaluated
3805
//
3806
TR::Node *storageRefNode = valueStorageReference->getNode();
3807
bool isUsingStorageRefFromAnotherStore = storageRefNode->getOpCode().isStore() && storageRefNode != node;
3808
3809
// 2) The valueRegHasDeadOrIgnoredBytes check is for when a ZAP could be generated for an overlapping copy where the rightmost
3810
// bytes are not coincident (due to the deadOrIgnoredBytes) so go through a temp in this case too
3811
//
3812
// This also handles the case like the below (so do not bother checking useZAP along with valueRegHasDeadOrIgnoredBytes)
3813
// The copy is not redundant when the valueReg has some dead or ignored bytes as the right most bytes of the source
3814
// and target will not be coincident in this case even if the addresses exactly match
3815
// izdstore p = 6 "A"
3816
// addr1
3817
// zdshrSetSign p = 1 --> valueReg has 5 ignored bytes
3818
// izdload "A" p = 6
3819
// =>addr1
3820
// iconst 5 // shift
3821
/// iconst 15 // setSign
3822
//
3823
// In this case have to move from offset +0 to offset +5 and then clear the top 5 bytes (starting at offset +0)
3824
// If copyIsRedundant is incorrectly set to true then only the clear of the top 5 bytes happens and the one surviving
3825
// digit from the zdshrSetSign is clobbered
3826
// MVC +0(base,L=1),+5(base) move surviving digit first
3827
// MVC +0(base,L-5),(constant) complete widening by setting top 5 bytes to 0xF0
3828
bool valueRegHasDeadOrIgnoredBytes = valueReg->getRightAlignedIgnoredBytes() > 0;
3829
3830
// 3) if there is any size mismatch between the sizes of node, valueReg and storageRefNode
3831
//
3832
// if nodeSize != storageRefNodeSize then this could be a truncating copy where the data needs to be moved back a number of bytes
3833
// "a" and "a_alias" start at the same address (so loadOrStoreAddressesMatch will return true) but "a" is 10 bytes and "a_alias" is 13 bytes
3834
// The meaning of the IL below is to move the low (addr+3) 10 bytes of "a_alias" back (to the left) 3 bytes.
3835
// This is actual needed data movement so a copy must be done (TODO : going through a temp here but this particular size mismatch case could
3836
// be done with an MVC as this direction of copy is non-destructive.
3837
// ipdstore "a" s=10
3838
// addr
3839
// ipdload "a_alias" s=13 // valueChild may not be a simple load but some commoned pdX operation that has the ipdload as its storageRefNode
3840
// =>addr
3841
3842
if (cg->traceBCDCodeGen())
3843
traceMsg(comp,"\tisInMemoryCopyProp=%s, isUsingStorageRefFromAnotherStore=%s, valueRegHasDeadOrIgnoredBytes=%s : node %s (%p), valueReg %s, storageRefNode %s (%p)\n",
3844
node->isInMemoryCopyProp() ? "yes":"no",
3845
isUsingStorageRefFromAnotherStore ? "yes":"no",
3846
valueRegHasDeadOrIgnoredBytes ? "yes":"no",
3847
node->getOpCode().getName(),node,
3848
cg->getDebug()->getName(valueReg),
3849
storageRefNode->getOpCode().getName(),storageRefNode);
3850
3851
if (cg->traceBCDCodeGen())
3852
traceMsg(comp,"\tallSizesMatch=%s (nodeSize=%d, valueRegSize=%d, storageRefNodeSize=%d)\n",
3853
allSizesMatch ? "yes":"no",node->getSize(),valueReg->getSize(),storageRefNode->getSize());
3854
3855
if (node->isInMemoryCopyProp() || isUsingStorageRefFromAnotherStore || valueRegHasDeadOrIgnoredBytes || !allSizesMatch)
3856
{
3857
// a redundant copy is an MVC with exact matching target and source. This is a nop but a very expensive nop as the hardware treats it
3858
// as any other overlap copy (i.e. very slowly)
3859
if (cg->traceBCDCodeGen())
3860
traceMsg(comp,"\tnode %s (%p) and source %s (%p) may overlap but first check if copy would be redundant\n",
3861
node->getOpCode().getName(),node,valueChild->getOpCode().getName(),valueChild);
3862
3863
bool copyIsRedundant = !valueRegHasDeadOrIgnoredBytes && allSizesMatch && cg->loadOrStoreAddressesMatch(node, valueStorageReference->getNode());
3864
3865
if (cg->traceBCDCodeGen())
3866
traceMsg(comp,"\tgot copyIsRedundant=%s from first test\n",copyIsRedundant?"yes":"no");
3867
3868
//Further check if there is potential destructive overlap based on storage info
3869
if (isAggr && !copyIsRedundant && !valueRegHasDeadOrIgnoredBytes && allSizesMatch)
3870
{
3871
if (cg->traceBCDCodeGen())
3872
traceMsg(comp,"\tperform test for definitelyNoDestructive overlap\n");
3873
3874
if (cg->getStorageDestructiveOverlapInfo(valueStorageReference->getNode(), valueReg->getSize(), node, node->getSize()) == TR_DefinitelyNoDestructiveOverlap)
3875
{
3876
copyIsRedundant = true;
3877
if (cg->traceBCDCodeGen())
3878
traceMsg(comp,"\t\tset copyIsRedundant=true : overlap check between node %s (%p) size=%d and valueStorageRefNode %s (%p) valueRegSize %d returns TR_DefinitelyNoDestructiveOverlap\n",
3879
node->getOpCode().getName(),node,node->getSize(),
3880
valueStorageReference->getNode()->getOpCode().getName(),valueStorageReference->getNode(),valueReg->getSize());
3881
}
3882
}
3883
3884
if (cg->traceBCDCodeGen())
3885
traceMsg(comp,"\t\tcopyIsRedundant=%s\n",copyIsRedundant?"yes":"no");
3886
3887
if (!copyIsRedundant)
3888
{
3889
// i.e. a simple load/store BUT load and store memory may overlap so must use a temp so MVC doesn't destructively overlap and lose some source bytes
3890
if (cg->traceBCDCodeGen())
3891
traceMsg(comp,"\tnode %s (%p) and source %s (%p) (uninitialized valueReg %s) may overlap -- must privatize valueReg\n",
3892
node->getOpCode().getName(),node,valueChild->getOpCode().getName(),valueChild,cg->getDebug()->getName(valueReg));
3893
3894
int32_t privatizedSize = valueReg->getSize();
3895
int32_t storageRefNodeSize = storageRefNode->getSize();
3896
if (!valueReg->isInitialized() &&
3897
storageRefNodeSize != privatizedSize)
3898
{
3899
// may need to increase the size of the memcpy so it captures all of the source value -- this is important for the example above of moving 10 bytes starting at addr_1+3
3900
// back 3 bytes to addr_1
3901
// This 13 byte copy will copy the entire original field and then the store generated by the usual pdstoreEvaluator will be MVC addr_1(10,br),addr_1+3(10,br)
3902
privatizedSize = storageRefNodeSize;
3903
if (cg->traceBCDCodeGen())
3904
traceMsg(comp,"\tset privatizedSize to storageRefNodeSize %d for uninit valueReg %s with mismatched storageRefNodeSize %d and valueRegSize %d\n",
3905
privatizedSize,cg->getDebug()->getName(valueReg),storageRefNodeSize,valueReg->getSize());
3906
3907
if (valueRegHasDeadOrIgnoredBytes)
3908
{
3909
// below IL comes from statements like : DIVIDE powerOfTenLit into var where var is an unsigned zoned type
3910
// zdstore s=15
3911
// addr
3912
// zdshrSetSign s=12 <- passThrough with 3 rightAligned deadBytes
3913
// izdload s=15
3914
// =>addr
3915
// iconst 3 // shift
3916
// iconst 0xf // sign
3917
//
3918
// in this case using an overridden size of 15 from the zdload is incorrect as there are only 12 valid bytes after the passThru zdshrSetSign
3919
// If the offset on the addr is less then the shift then the final offset will be < 0 and the binary encoding time assume will be hit
3920
// For larger offsets no compile time problem is hit but the temp copy reaches back to read bytes from before it's field (but the these bytes
3921
// are not actually examined so everything ends up 'working' (delta any access exceptions if this were the first field in storage)
3922
if (cg->traceBCDCodeGen())
3923
traceMsg(comp,"\t\tgetRightAlignedIgnoredBytes %d > 0 so reduce privatizedSize %d -> %d\n",
3924
valueReg->getRightAlignedIgnoredBytes(), privatizedSize, privatizedSize - valueReg->getRightAlignedIgnoredBytes());
3925
privatizedSize = privatizedSize - valueReg->getRightAlignedIgnoredBytes();
3926
}
3927
}
3928
TR_OpaquePseudoRegister *tempRegister = cg->privatizePseudoRegister(valueChild, valueReg, valueStorageReference, privatizedSize);
3929
tempStorageReference = tempRegister->getStorageReference();
3930
3931
if (cg->traceBCDCodeGen())
3932
{
3933
if (node->isInMemoryCopyProp())
3934
traceMsg(comp,"\ta^a : privatize needed due to isInMemoryCopyProp node %s (%p) on line_no=%d (storeCase)\n",
3935
node->getOpCode().getName(),node,comp->getLineNumber(node));
3936
if (isUsingStorageRefFromAnotherStore)
3937
traceMsg(comp,"\ta^a : privatize needed due to isUsingStorageRefFromAnotherStore storageRefNode %s (%p) on line_no=%d (storeCase)\n",
3938
storageRefNode->getOpCode().getName(),storageRefNode,comp->getLineNumber(node));
3939
if (valueRegHasDeadOrIgnoredBytes)
3940
traceMsg(comp,"\ta^a : privatize needed due to valueRegHasDeadOrIgnoredBytes valueReg %s valueChild %s (%p) on line_no=%d (storeCase)\n",
3941
cg->getDebug()->getName(valueReg),valueChild->getOpCode().getName(),valueChild,comp->getLineNumber(node));
3942
}
3943
3944
TR_ASSERT(!comp->getOption(TR_EnablePerfAsserts),"gen overlap copy on node %s (%p) on line_no=%d (storeCase)\n",
3945
node->getOpCode().getName(),node,comp->getLineNumber(node));
3946
3947
if (isBCD)
3948
sourceMR = generateS390RightAlignedMemoryReference(valueChild, tempStorageReference, cg);
3949
else
3950
sourceMR = generateS390MemRefFromStorageRef(valueChild, tempStorageReference, cg);
3951
3952
valueReg = tempRegister;
3953
valueStorageReference = tempStorageReference;
3954
3955
TR_ASSERT(!isBCD || valueReg->getPseudoRegister(),"valueReg must be a pseudoRegister on node %s (%p)\n",valueChild->getOpCode().getName(),valueChild);
3956
}
3957
}
3958
else
3959
{
3960
if (cg->traceBCDCodeGen())
3961
traceMsg(comp,"y^y : temp copy saved isInMemoryCopyProp = false on %s (%p) (storeCase)\n",node->getOpCode().getName(),node);
3962
}
3963
}
3964
3965
TR_PseudoRegister *bcdValueReg = NULL;
3966
if (valueReg->getPseudoRegister())
3967
{
3968
bcdValueReg = valueReg->getPseudoRegister();
3969
}
3970
3971
int32_t destSize = node->getSize();
3972
int32_t sourceSize = valueReg->getSize();
3973
3974
TR_ASSERT(isBCD || (destSize == sourceSize),"destSize %d != sourceSize %d for node %p\n",destSize,sourceSize,node);
3975
3976
bool isByteTruncation = sourceSize > destSize;
3977
bool isByteWidening = destSize > sourceSize;
3978
3979
bool isLeadingSignByteWidening = isByteWidening && node->getType().isLeadingSign();
3980
3981
useZAP = useZAP && bcdValueReg && (!bcdValueReg->hasKnownOrAssumedCleanSign() || mustUseZAP);
3982
//useZAP = useZAP || (isPacked && isByteTruncation); // truncating packed stores that need overflow exception should be using pdshlOverflow
3983
3984
bool preserveSrcSign = bcdValueReg && !bcdValueReg->isLegalToCleanSign();
3985
3986
bool savePreZappedValue = false;
3987
if (useZAP &&
3988
valueChild->getReferenceCount() > 1 &&
3989
preserveSrcSign)
3990
{
3991
savePreZappedValue = true;
3992
if (cg->traceBCDCodeGen())
3993
{
3994
traceMsg(comp,"\tsetting savePreZappedValue=true because valueReg (from valueChild %p with refCount %d > 1) ",valueChild,valueChild->getReferenceCount());
3995
if (!bcdValueReg->signStateInitialized())
3996
traceMsg(comp,"has an uninitialized sign state and a ZAP is to be used for the store\n");
3997
else
3998
traceMsg(comp,"has signCode 0x%x and a ZAP is to be used for the store\n", bcdValueReg->getKnownOrAssumedSignCode());
3999
}
4000
}
4001
4002
bool childContainsAccumulatedResult = valueStorageReference->isNodeBased() &&
4003
valueStorageReference->isNodeBasedHint() &&
4004
(valueStorageReference->getNode() == node);
4005
4006
if (cg->traceBCDCodeGen())
4007
traceMsg(comp,"\tisPacked=%s, useZAP=%s, valueReg->signStateInit()=%s, valueReg->hasKnownOrAssumedCleanSign()=%s, isByteTruncation=%s, isByteWidening=%s, destSize=%d, sourceSize=%d\n",
4008
isPacked?"true":"false",
4009
useZAP?"true":"false",
4010
bcdValueReg && bcdValueReg->signStateInitialized()?"true":"false",
4011
bcdValueReg && bcdValueReg->hasKnownOrAssumedCleanSign()?"true":"false",
4012
isByteTruncation?"true":"false",
4013
isByteWidening?"true":"false",
4014
destSize,
4015
sourceSize);
4016
4017
TR::Node *sourceNode = NULL;
4018
bool changeCommonedChildAddress = false;
4019
bool isLegalToChangeCommonedChildAddress = false;
4020
4021
TR_ASSERT( !childContainsAccumulatedResult || valueReg->isInitialized(),"an accumulated result should also be initialized\n");
4022
4023
if (!isByteTruncation &&
4024
!isLeadingSignByteWidening &&
4025
!savePreZappedValue &&
4026
tempStorageReference == NULL && // valueReg->setStorageReference() will not work in this case as the valueReg is pointing to the copy (tempRef count underflow)
4027
valueChild->getReferenceCount() > 1 &&
4028
node->skipCopyOnStore())
4029
{
4030
isLegalToChangeCommonedChildAddress = true;
4031
if (cg->traceBCDCodeGen())
4032
traceMsg(comp,"\tsetting isLegalToChangeCommonedChildAddress=true for valueChild %s (%p) because isByteTruncation=false, isLeadingSignByteWidening=false, refCount %d > 1, skipCopyOnStore=true and savePreZappedValue=false\n",
4033
valueChild->getOpCode().getName(),
4034
valueChild,
4035
valueChild->getReferenceCount());
4036
}
4037
4038
if (!valueStorageReference->isTemporaryBased() &&
4039
valueStorageReference->getNode() != node)
4040
{
4041
TR_ASSERT(!valueReg->isInitialized(),"expecting valueReg to not be initialized for valueChild %p\n",valueChild);
4042
TR_ASSERT(valueReg->getStorageReference()->isNodeBased(),"expecting valueReg storageRef to be nodeBased on valueChild %p\n",valueChild);
4043
if (valueStorageReference->getNode()->getOpCode().isStore())
4044
{
4045
if (cg->traceBCDCodeGen())
4046
traceMsg(comp,"found uninit storageRef node based STORE case valueChild %s (%p) and storageRefNode %s (%p)\n",
4047
valueChild->getOpCode().getName(),
4048
valueChild,
4049
valueStorageReference->getNode()->getOpCode().getName(),
4050
valueStorageReference->getNode());
4051
}
4052
else if (valueStorageReference->getNode()->getOpCode().isLoad())
4053
{
4054
if (cg->traceBCDCodeGen())
4055
traceMsg(comp,"found uninit storageRef node based LOAD case valueChild %s (%p) and storageRefNode %s (%p), skipCopyOnLoad storageRefNode is %s\n",
4056
valueChild->getOpCode().getName(),
4057
valueChild,
4058
valueStorageReference->getNode()->getOpCode().getName(),
4059
valueStorageReference->getNode(),
4060
valueStorageReference->getNode()->skipCopyOnLoad()?"yes":"no");
4061
}
4062
else
4063
{
4064
TR_ASSERT(false,"storageRefNode %p should be a load or a store node %p (%s)\n",valueStorageReference->getNode(),cg->getDebug()->getName(valueStorageReference->getNode()));
4065
}
4066
}
4067
4068
if (valueStorageReference->isTemporaryBased() || (valueStorageReference->getNode() != node))
4069
{
4070
if (cg->traceBCDCodeGen() && valueStorageReference->isTemporaryBased())
4071
traceMsg(comp,"\tvalueStorageReference->isTemporaryBased() case so see if changeCommonedChildAddress should be set to true\n");
4072
else if (cg->traceBCDCodeGen())
4073
traceMsg(comp,"\tvalueStorageReference->getNode() != node (%p != %p) case so see if changeCommonedChildAddress should be set to true\n",
4074
valueStorageReference->getNode(),node);
4075
4076
sourceNode = valueChild;
4077
if (isLegalToChangeCommonedChildAddress)
4078
{
4079
if (useZAP)
4080
{
4081
changeCommonedChildAddress = true;
4082
if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\tset changeCommonedChildAddress = true due to ZAP\n");
4083
}
4084
else if (isByteWidening)
4085
{
4086
changeCommonedChildAddress = true;
4087
if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\tset changeCommonedChildAddress = true due to byteWidening\n");
4088
}
4089
/* // disable this case, not a good enough reason for potential operand store compare
4090
else if (!isIndirect && valueChild->getOpCode().isIndirect()) // addressability is cheaper
4091
{
4092
changeCommonedChildAddress = true;
4093
if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\tset changeCommonedChildAddress = true due to cheaper addressability\n");
4094
}
4095
*/
4096
else if (uninitializedSourceLocationMayBeKilled &&
4097
!valueStorageReference->isTemporaryBased() && // last two conditions are true when source location is uninitialized (passThrough operations or just a load child)
4098
(valueStorageReference->getNode()->getOpCode().isLoadVar() || valueStorageReference->getNode()->getOpCode().isStore()))
4099
{
4100
changeCommonedChildAddress = true;
4101
if (cg->traceBCDCodeGen())
4102
traceMsg(comp,"\t\tset changeCommonedChildAddress = true due to uninitialized storageRefNode %p with skipCopyOnLoad that was forced to true\n",valueStorageReference->getNode());
4103
}
4104
else
4105
{
4106
if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\tleave changeCommonedChildAddress = false\n");
4107
}
4108
}
4109
else
4110
{
4111
if (cg->traceBCDCodeGen())
4112
traceMsg(comp,"\t\tisLegalToChangeCommonedChildAddress = false so do not attempt to look for cases to set changeCommonedChildAddress to true\n");
4113
}
4114
}
4115
else
4116
{
4117
TR_ASSERT( childContainsAccumulatedResult, "expecting the pdstore child node to contain the accumulated result\n");
4118
// If there is any byte truncation and we are in the accumulator case then this means some leftmost child of the store
4119
// may have written data outside the bounds of the current store and this would be (horribly) incorrect.
4120
// This case should never occur as hints should only be assigned when the pdstore memory location is large enough
4121
// to contain any leftmost result value.
4122
TR_ASSERT( !isByteTruncation,"byte truncation should not occur when using the pdstore as an accumulator\n");
4123
changeCommonedChildAddress = true;
4124
if (cg->traceBCDCodeGen()) traceMsg(comp,"\taccumulated hint case so unconditionally set changeCommonedChildAddress = true\n");
4125
}
4126
4127
if (cg->traceBCDCodeGen())
4128
traceMsg(comp,"\tbef legality check: changeCommonedChildAddress = %s and isLegalToChangeCommonedChildAddress=%s so final changeCommonedChildAddress=%s\n",
4129
changeCommonedChildAddress?"true":"false",
4130
isLegalToChangeCommonedChildAddress?"true":"false",
4131
(changeCommonedChildAddress && isLegalToChangeCommonedChildAddress)?"true":"false");
4132
4133
changeCommonedChildAddress = changeCommonedChildAddress && isLegalToChangeCommonedChildAddress;
4134
4135
// well this is unfortunate -- the valueChild has skipCopyOnLoad set on it but for some reason (likely some corner case savePreZappedValue)
4136
// isLegalToChangeCommonedChildAddress is false.
4137
// This means that it is not safe to keep using the storageRef on the valueChild past this store point so must force it to a temp
4138
bool mustPrivatizeValueChild = tempStorageReference == NULL && !valueReg->isInitialized() && uninitializedSourceLocationMayBeKilled && !changeCommonedChildAddress;
4139
if (cg->traceBCDCodeGen())
4140
traceMsg(comp,"\tmustPrivatizeValueChild=%s\n",mustPrivatizeValueChild?"yes":"no");
4141
4142
TR_StorageReference *targetStorageReference =
4143
TR_StorageReference::createNodeBasedStorageReference(node,
4144
changeCommonedChildAddress ? valueChild->getReferenceCount() : 1,
4145
comp);
4146
4147
rcount_t origValueChildRefCount = valueChild->getReferenceCount();
4148
4149
if (cg->traceBCDCodeGen())
4150
traceMsg(comp,"\tcreate node based targetStorageReference #%d from %s (%p) and nodeRefCount %d (%s)\n",
4151
targetStorageReference->getReferenceNumber(),
4152
node->getOpCode().getName(),
4153
node,
4154
targetStorageReference->getNodeReferenceCount(),
4155
changeCommonedChildAddress?"from valueChild":"fixed at 1");
4156
4157
TR::MemoryReference *targetMR = NULL;
4158
if (useZAP)
4159
{
4160
if (cg->traceBCDCodeGen())
4161
traceMsg(comp,"\tuseZAP=true so gen ZAP but first determine the zapDestSize, initial size is destSize=%d\n",destSize);
4162
int32_t zapDestSize = destSize;
4163
targetMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);
4164
TR::Node *sourceNodeForZAP = sourceNode;
4165
if (sourceNode)
4166
{
4167
if (sourceMR == NULL)
4168
sourceMR = generateS390RightAlignedMemoryReference(sourceNode, valueStorageReference, cg);
4169
cg->correctBadSign(sourceNode, bcdValueReg, sourceSize, sourceMR);
4170
}
4171
else
4172
{
4173
// when zapping a field against itself then we may be able to reduce the destSize if some of the upper bytes are already clear
4174
if (isByteWidening)
4175
{
4176
if (cg->traceBCDCodeGen())
4177
traceMsg(comp,"\t\tdestSize > sourceSize (%d > %d) so check valueReg->getLiveSymbolSize() %d against destSize %d before checking if the upper bytes are clear\n",
4178
destSize,sourceSize,valueReg->getLiveSymbolSize(),destSize);
4179
if (valueReg->getBytesToClear(sourceSize, destSize) == 0)
4180
{
4181
zapDestSize=sourceSize;
4182
if (cg->traceBCDCodeGen())
4183
traceMsg(comp,"\t\tvalueReg bytes sourceSize->destSize (%d->%d) are already clear so set zapDestSize=sourceSize=%d\n",sourceSize,destSize,sourceSize);
4184
}
4185
}
4186
cg->correctBadSign(node, bcdValueReg, zapDestSize, targetMR);
4187
// save the dead/ignored bytes here as it will be reset to 0 if savePreZappedValue is true as part of the setStorageReference call below
4188
int32_t savedRightAlignedDeadAndIgnoredBytes = valueReg->getRightAlignedDeadAndIgnoredBytes();
4189
if (savePreZappedValue)
4190
{
4191
TR_StorageReference *valueStorageReferenceCopy = TR_StorageReference::createTemporaryBasedStorageReference(sourceSize, comp);
4192
// when tempStorageReference != NULL then the valueReg->setStorageReference call below will not work as the temp ref count will underflow
4193
// valueReg in this case is actually pointing to the tempRegister created when copyMR was initialized
4194
// shouldn't reach here in this case as tempStorageReference is only used for the uninit and non-hint cases and this is an init path
4195
TR_ASSERT(tempStorageReference == NULL,"tempStorageReference == NULL should be null for node %p\n",node);
4196
valueReg->setStorageReference(valueStorageReferenceCopy, valueChild);
4197
valueReg->setIsInitialized();
4198
valueStorageReference = valueStorageReferenceCopy;
4199
if (cg->traceBCDCodeGen())
4200
traceMsg(comp,"\tsavePreZappedValue=true so gen MVC with sourceSize %d to copy #%d on pdstore for valueChild %p with refCnt %d\n",
4201
sourceSize,valueStorageReferenceCopy->getReferenceNumber(),valueChild,valueChild->getReferenceCount());
4202
TR::MemoryReference *targetCopyMR = generateS390RightAlignedMemoryReference(*targetMR, node, 0, cg);
4203
if (savedRightAlignedDeadAndIgnoredBytes > 0)
4204
{
4205
if (cg->traceBCDCodeGen())
4206
traceMsg(comp,"\tadd -savedRightAlignedDeadAndIgnoredBytes = -%d to sourceMR for savePreZappedValue copy\n",savedRightAlignedDeadAndIgnoredBytes);
4207
targetCopyMR->addToTemporaryNegativeOffset(node, -savedRightAlignedDeadAndIgnoredBytes, cg);
4208
}
4209
generateSS1Instruction(cg, TR::InstOpCode::MVC, node,
4210
sourceSize-1,
4211
generateS390RightAlignedMemoryReference(valueChild, valueStorageReferenceCopy, cg),
4212
targetCopyMR);
4213
4214
}
4215
sourceMR = generateS390RightAlignedMemoryReference(*targetMR, node, 0, cg); // ensure sourceMR and targetMR are the same when used for the ZAP below
4216
4217
if (savedRightAlignedDeadAndIgnoredBytes > 0)
4218
{
4219
if (cg->traceBCDCodeGen())
4220
traceMsg(comp,"\tadd -savedRightAlignedDeadAndIgnoredBytes = -%d to sourceMR for final ZAP\n",savedRightAlignedDeadAndIgnoredBytes);
4221
sourceMR->addToTemporaryNegativeOffset(node, -savedRightAlignedDeadAndIgnoredBytes, cg);
4222
}
4223
4224
sourceNodeForZAP = node; // so a NULL sourceNode is not passed in for the ZAP sourceMR reuse below
4225
}
4226
4227
if (isByteTruncation)
4228
{
4229
if (cg->traceBCDCodeGen())
4230
traceMsg(comp,"\tisByteTruncating ZAP so reduce sourceSize %d->%d\n",sourceSize,zapDestSize);
4231
sourceSize = zapDestSize;
4232
}
4233
4234
if (cg->traceBCDCodeGen())
4235
traceMsg(comp,"\tgen ZAP with zapDestSize=%d,sourceSize=%d\n",zapDestSize,sourceSize);
4236
generateSS2Instruction(cg, TR::InstOpCode::ZAP, node,
4237
zapDestSize-1,
4238
reuseS390RightAlignedMemoryReference(targetMR, node, targetStorageReference, cg),
4239
sourceSize-1,
4240
reuseS390RightAlignedMemoryReference(sourceMR, sourceNodeForZAP, valueStorageReference, cg));
4241
}
4242
else
4243
{
4244
if (sourceNode)
4245
{
4246
if (cg->traceBCDCodeGen())
4247
traceMsg(comp,"\tuseZAP=false and sourceNode %s (%p) is non-NULL so gen MVC but first determine the mvcSize\n",
4248
sourceNode->getOpCode().getName(),sourceNode);
4249
int32_t mvcSize = sourceSize;
4250
if (isByteTruncation)
4251
{
4252
mvcSize = destSize;
4253
}
4254
bool needsClear = false;
4255
if (isByteWidening)
4256
{
4257
needsClear = true;
4258
if (cg->traceBCDCodeGen())
4259
traceMsg(comp,"\t\tdestSize > sourceSize (%d > %d) so try to reduce mvcSize by checking if the upper bytes are clear\n",
4260
destSize,sourceSize,valueReg->getLiveSymbolSize(),destSize);
4261
if (valueReg->getBytesToClear(sourceSize, destSize) == 0)
4262
{
4263
needsClear=false;
4264
mvcSize=destSize;
4265
if (cg->traceBCDCodeGen())
4266
traceMsg(comp,"\t\tvalueReg bytes sourceSize->destSize (%d->%d) are already clear so set mvcSize=destSize=%d\n",sourceSize,destSize,mvcSize);
4267
}
4268
}
4269
4270
if (cg->traceBCDCodeGen())
4271
traceMsg(comp,"\tsourceNode %s (%p) is non-NULL so gen MVC/memcpy with size %d to store (isByteTruncation=%s)\n",
4272
sourceNode->getOpCode().getName(),sourceNode,mvcSize,isByteTruncation?"yes":"no");
4273
4274
if (isBCD)
4275
{
4276
targetMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);
4277
if (sourceMR == NULL)
4278
sourceMR = generateS390RightAlignedMemoryReference(sourceNode, valueStorageReference, cg);
4279
}
4280
else
4281
{
4282
targetMR = generateS390MemRefFromStorageRef(node, targetStorageReference, cg);
4283
if (sourceMR == NULL)
4284
sourceMR = generateS390MemRefFromStorageRef(sourceNode, valueStorageReference, cg);
4285
}
4286
4287
// if getRightAlignedIgnoredBytes > - then the rightmost bytes will not be coincident so the addressesMatch check is not sufficient
4288
// to detect if the copyIsRedundant
4289
//
4290
// Similarly if the node and storageRefNode sizes do not match (!allSizesMatch) then different offset bumps will be applied even if their starting addresses
4291
// are coincident (i.e. loadOrStoreAddressesMatch would return true)
4292
bool copyIsRedundant = valueReg->getRightAlignedIgnoredBytes() == 0 &&
4293
allSizesMatch &&
4294
valueStorageReference->isNonConstantNodeBased() &&
4295
cg->loadOrStoreAddressesMatch(node, valueStorageReference->getNode());
4296
if (cg->traceBCDCodeGen() && copyIsRedundant)
4297
traceMsg(comp,"\t\tcopyIsRedundant=yes so skip memcpy\n");
4298
if (!copyIsRedundant)
4299
cg->genMemCpy(targetMR, node, sourceMR, sourceNode, mvcSize);
4300
4301
if (needsClear)
4302
{
4303
cg->widenBCDValue(node, NULL, valueReg->getSize(), node->getSize(), targetMR);
4304
evaluatedPaddingAnchor = true;
4305
}
4306
}
4307
else if (isByteWidening)
4308
{
4309
if (cg->traceBCDCodeGen())
4310
traceMsg(comp,"\tuseZAP=false and sourceNode is NULL so just check if upper bytes need to be cleared\n");
4311
targetMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);
4312
cg->widenBCDValueIfNeeded(node, bcdValueReg, sourceSize, node->getSize(), targetMR);
4313
evaluatedPaddingAnchor = true;
4314
}
4315
}
4316
4317
if (valueChild->getReferenceCount() > 1)
4318
{
4319
if (changeCommonedChildAddress)
4320
{
4321
int32_t savedLeftAlignedZeroDigits = valueReg->getLeftAlignedZeroDigits();
4322
if (cg->traceBCDCodeGen())
4323
traceMsg(comp,"\tchangeCommonedChildAddress=true so update storage reference on valueReg %s (leftAlignedZeroDigits=%d) and reset isInit to false\n",
4324
cg->getDebug()->getName(valueReg),savedLeftAlignedZeroDigits);
4325
4326
valueReg->setStorageReference(targetStorageReference, valueChild); // also resets leftAlignedZeroDigits
4327
4328
// Reset isInit to false for correctness so the commoned reference does not clobber a user variable location
4329
// This reset is also done during addStorageReferenceHints but there is no guarantee this pass will be done for every
4330
// IL pattern
4331
if (!targetStorageReference->isTemporaryBased())
4332
valueReg->setIsInitialized(false);
4333
4334
if (isByteWidening)
4335
{
4336
bcdValueReg->addRangeOfZeroBytes(sourceSize, destSize);
4337
}
4338
else if (savedLeftAlignedZeroDigits > 0)
4339
{
4340
// TODO: is the size check below needed? -- isByteWidening is checked in the if above and isByteTruncation would never happen for an accum case
4341
if (childContainsAccumulatedResult &&
4342
valueReg->getSize() == node->getSize())
4343
{
4344
if (cg->traceBCDCodeGen())
4345
traceMsg(comp,"\tset leftAlignedZeroDigits to %d on %s after setStorageReference\n",savedLeftAlignedZeroDigits,cg->getDebug()->getName(valueReg));
4346
valueReg->setLeftAlignedZeroDigits(savedLeftAlignedZeroDigits);
4347
}
4348
else
4349
{
4350
// could also probably transfer savedLeftAlignedZeroDigits in some non-accum cases too but need to see a motivating case first
4351
if (cg->traceBCDCodeGen())
4352
traceMsg(comp,"z^z : missed transferring zeroDigits %d to valueChild %s (%p) (accum=%s, valueRegSize %d, nodeSize %d\n",
4353
savedLeftAlignedZeroDigits,valueChild->getOpCode().getName(),valueChild,childContainsAccumulatedResult?"yes":"no",valueReg->getSize(),node->getSize());
4354
}
4355
}
4356
4357
if (useZAP)
4358
{
4359
bcdValueReg->setHasKnownValidSignAndData();
4360
bcdValueReg->setHasKnownCleanSign();
4361
TR_ASSERT(!bcdValueReg->hasKnownOrAssumedSignCode() || bcdValueReg->getKnownOrAssumedSignCode() != 0xf,"inconsistent sign code of 0xf found for node %p\n",valueChild);
4362
if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\tsetting HasKnownCleanSign (due to ZAP) on valueReg %s on valueChild %p\n",cg->getDebug()->getName(bcdValueReg),valueChild);
4363
}
4364
}
4365
else if (mustPrivatizeValueChild ||
4366
(!valueStorageReference->isTemporaryBased() && // comment1 below
4367
childContainsAccumulatedResult && // comment2 below
4368
(!node->skipCopyOnStore() || isLeadingSignByteWidening))) // comments 2 and 3 below
4369
{
4370
// comment1 (explains the first case where a temp copy is *not* needed)
4371
// do not generate another temp copy if storing a temp that is already attached to a commoned load or pass thru node
4372
// pdstore
4373
// =>ipdload (in temp1), skipSSCopy=false <- temp1 will have the correct ref count for all its commoned uses
4374
//
4375
// comment2 (explains the second case where a temp copy is *not* needed)
4376
// pdstore
4377
// =>pdshr
4378
// here the pdshr storageReference is store based as the result of the initial (an earlier) store of the same pdshr node being marked with skipCopyOnStore.
4379
// In this case all commoned references of pdshr can use the store based storageReference as this flag guarantees the store symbol is
4380
// not killed before the last reference to the pdshr is seen.
4381
// comment3
4382
// skipCopyOnStore does not consider kills of the value that happen during the store itself. When storing a value
4383
// with a leading sign, if we have to widen that value, we move the sign code. This causes later uses of the value
4384
// child to see the wrong result unless we make a copy, so we ignore skipCopyOnStore if isLeadingSignByteWidening.
4385
4386
TR_StorageReference *valueStorageReferenceCopy = TR_StorageReference::createTemporaryBasedStorageReference(sourceSize, comp);
4387
// when tempStorageReference != NULL then the valueReg->setStorageReference call below will not work as the temp ref count will underflow
4388
// valueReg in this case is actually pointing to the tempRegister created when copyMR was initialized
4389
// shouldn't reach here in this case as tempStorageReference is only used for the uninit and non-hint cases and this is hint path
4390
TR_ASSERT(tempStorageReference == NULL,"tempStorageReference == NULL should be null for node %p\n",node);
4391
valueReg->setIsInitialized();
4392
4393
// do not clean sign for the BCD copy as the commoned use may not be a final use (so the sign cleaning may be premature)
4394
if (cg->traceBCDCodeGen())
4395
traceMsg(comp,"\tlate pdstore privatization of valueChild : so gen MVC/memcpy with sourceSize %d to copy #%d (%s) on %s for child %s (%p) with refCnt %d (mustPrivatizeValueChild %s)\n",
4396
sourceSize,valueStorageReferenceCopy->getReferenceNumber(),cg->getDebug()->getName(valueStorageReferenceCopy->getSymbol()),
4397
node->getOpCode().getName(),valueChild->getOpCode().getName(),valueChild,valueChild->getReferenceCount(),
4398
mustPrivatizeValueChild?"yes":"no");
4399
4400
bool useSourceMR = sourceMR && !overlapZAPIsAllowed;
4401
4402
TR::Node *copySourceNode = useSourceMR ? valueChild : node;
4403
TR::MemoryReference *copySourceMR = useSourceMR ? sourceMR : targetMR;
4404
TR_StorageReference *copySourceStorageRef = useSourceMR ? valueStorageReference : targetStorageReference;
4405
4406
TR::MemoryReference *copyTargetMR = NULL;
4407
if (isBCD)
4408
{
4409
copySourceMR = reuseS390RightAlignedMemoryReference(copySourceMR, copySourceNode, copySourceStorageRef, cg);
4410
valueReg->setStorageReference(valueStorageReferenceCopy, valueChild);
4411
copyTargetMR = generateS390RightAlignedMemoryReference(valueChild, valueStorageReferenceCopy, cg);
4412
}
4413
else
4414
{
4415
copySourceMR = reuseS390MemRefFromStorageRef(copySourceMR, 0, copySourceNode, copySourceStorageRef, cg);
4416
valueReg->setStorageReference(valueStorageReferenceCopy, valueChild);
4417
copyTargetMR = generateS390MemRefFromStorageRef(valueChild, valueStorageReferenceCopy, cg);
4418
}
4419
4420
cg->genMemCpy(copyTargetMR, node, copySourceMR, copySourceNode, sourceSize);
4421
4422
if (useSourceMR)
4423
sourceMR = copySourceMR;
4424
else
4425
targetMR = copySourceMR;
4426
4427
// If we are accumulating a leading sign type, then the above copy will include the
4428
// byte widening that we did before storing. The long-term fix is to rewrite this evaluator
4429
// to make the copy before we do any modification of the stored value.
4430
// The short term fix is to copy the widened sign back into this copy.
4431
if (childContainsAccumulatedResult && isLeadingSignByteWidening)
4432
{
4433
uint16_t signSize = 0;
4434
TR::InstOpCode::Mnemonic signCopyOp = TR::InstOpCode::bad;
4435
4436
switch (node->getType().getDataType())
4437
{
4438
case TR::ZonedDecimalSignLeadingEmbedded:
4439
signSize = 1;
4440
signCopyOp = TR::InstOpCode::MVZ;
4441
break;
4442
case TR::ZonedDecimalSignLeadingSeparate:
4443
signSize = 1;
4444
signCopyOp = TR::InstOpCode::MVC;
4445
break;
4446
case TR::UnicodeDecimalSignLeading:
4447
signSize = 2;
4448
signCopyOp = TR::InstOpCode::MVC;
4449
break;
4450
default:
4451
TR_ASSERT(0, "unknown leading sign type in pdStoreEvaluator");
4452
}
4453
4454
TR::MemoryReference *originalSignCodeMR =
4455
reuseS390LeftAlignedMemoryReference(targetMR, node, targetStorageReference, cg, node->getSize());
4456
4457
TR::MemoryReference *copyMR =
4458
reuseS390LeftAlignedMemoryReference(copyTargetMR, valueChild, valueStorageReferenceCopy, cg, sourceSize);
4459
4460
if (cg->traceBCDCodeGen())
4461
traceMsg(comp,"\tAccumulating a leading sign type: have to restore the sign code for the copy: signSize %d\n",
4462
signSize);
4463
4464
4465
generateSS1Instruction(cg, signCopyOp, node,
4466
signSize-1,
4467
copyMR,
4468
originalSignCodeMR);
4469
4470
}
4471
}
4472
}
4473
4474
rcount_t finalValueChildRefCount = valueChild->getReferenceCount();
4475
if (changeCommonedChildAddress &&
4476
finalValueChildRefCount != origValueChildRefCount)
4477
{
4478
// In this case the addressChild and the valueChild share a commoned node.
4479
// This will cause the addressChild evaluation (done as part of getting targetMR) to be an impliedMemoryReference and
4480
// the aiadd will be incremented by one (in anticipation of the valueChild using the targetStorageRef going forward)
4481
// In the trivial case where this future use is only under the current store ( == 1 check below) then have to take care to do the final
4482
// recDec of the addressChild to remove the extra increment done when forming the targetMR.
4483
//
4484
// izdstore
4485
// aiadd
4486
// ...
4487
// zdload
4488
// =>zdload
4489
//
4490
TR_ASSERT(finalValueChildRefCount > 0 && finalValueChildRefCount < origValueChildRefCount,
4491
"finalValueChildRefCount %d must be > 0 and less than origValueChildRefCount %d on store %p\n",finalValueChildRefCount,origValueChildRefCount,node);
4492
// the only way the refCounts can be not equal is if we evaluated a targetMR
4493
TR_ASSERT(targetMR,"finalValueChildRefCount %d must be equal to origValueChildRefCount %d if targetMR is non-NULL on store %p\n",finalValueChildRefCount,origValueChildRefCount,node);
4494
if (isIndirect && finalValueChildRefCount == 1)
4495
{
4496
// only remaining use is as the valueChild of this very store so must do the final recDec of the addressChild
4497
// a recDec is safe here as the targetMR would have already privatized any loads in the address child to registers
4498
if (cg->traceBCDCodeGen())
4499
traceMsg(comp,"\tfinalValueChildRefCount < origValueChildRefCount (%d < %d) and is 1 so recursively dec addrChild %s (%p) %d->%d\n",
4500
finalValueChildRefCount,origValueChildRefCount,
4501
node->getFirstChild()->getOpCode().getName(),
4502
node->getFirstChild(),
4503
node->getFirstChild()->getReferenceCount(),node->getFirstChild()->getReferenceCount()-1);
4504
cg->recursivelyDecReferenceCount(node->getFirstChild());
4505
}
4506
if (cg->traceBCDCodeGen())
4507
traceMsg(comp,"\tfinalValueChildRefCount < origValueChildRefCount (%d < %d) decrement the targetStorageReference nodeRefCount by the difference %d->%d\n",
4508
finalValueChildRefCount,origValueChildRefCount,
4509
targetStorageReference->getNodeReferenceCount(),targetStorageReference->getNodeReferenceCount()-(origValueChildRefCount-finalValueChildRefCount));
4510
// the valueChild may be commoned more than once under the addressChild of the store so dec by the difference of the before and after refCounts
4511
targetStorageReference->decrementNodeReferenceCount(origValueChildRefCount-finalValueChildRefCount);
4512
}
4513
4514
if (targetMR == NULL)
4515
{
4516
if (isIndirect)
4517
{
4518
// if changeCommonedChildAddress=true then we must not decrement the addressChild as it will be needed for future commoned references
4519
// to the valueChild
4520
// a recDec is safe here as the only way no store can be done (targetMR==NULL case) is when valueChildren have already privatized
4521
// any loads in the address child to registers when accumulating to the final store location
4522
if (!changeCommonedChildAddress)
4523
{
4524
if (cg->traceBCDCodeGen())
4525
traceMsg(comp,"\tno explicit store inst and changeCommonedChildAddress=false so recursively dec addrChild %p %d->%d\n",
4526
node->getFirstChild(),node->getFirstChild()->getReferenceCount(),node->getFirstChild()->getReferenceCount()-1);
4527
cg->recursivelyDecReferenceCount(node->getFirstChild());
4528
}
4529
else
4530
{
4531
if (cg->traceBCDCodeGen())
4532
traceMsg(comp,"\tno explicit store inst and changeCommonedChildAddress=true so do NOT recursively dec addrChild %p (refCount stays at %d)\n",
4533
node->getFirstChild(),node->getFirstChild()->getReferenceCount());
4534
}
4535
}
4536
if (cg->traceBCDCodeGen())
4537
traceMsg(comp,"\tno explicit store inst so decrement the targetStorageReference nodeRefCount %d->%d\n",
4538
targetStorageReference->getNodeReferenceCount(),targetStorageReference->getNodeReferenceCount()-1);
4539
targetStorageReference->decrementNodeReferenceCount();
4540
}
4541
4542
if (!evaluatedPaddingAnchor)
4543
cg->processUnusedNodeDuringEvaluation(NULL);
4544
4545
cg->decReferenceCount(valueChild);
4546
return NULL;
4547
}
4548
4549
/**
4550
* This only handles pdstore and pdstorei.
4551
* Other types of stores (zd, ud) can't use vector instructions.
4552
*/
4553
TR::Register*
4554
J9::Z::TreeEvaluator::pdstoreVectorEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)
4555
{
4556
traceMsg(cg->comp(), "DAA: Entering pdstoreVectorEvaluator %d\n", __LINE__);
4557
TR::Compilation *comp = cg->comp();
4558
TR::Node * valueChild = node->getValueChild();
4559
TR::Node* addressNode = node->getChild(0);
4560
// evaluate valueChild (which is assumed by the OMR layer to be the second child) to Vector register.
4561
// for this "pdStore" we assume if we evaluate value node we get Vector Register
4562
TR::Register* pdValueReg = cg->evaluate(valueChild);
4563
4564
TR_ASSERT((pdValueReg->getKind() == TR_FPR || pdValueReg->getKind() == TR_VRF),
4565
"vectorized pdstore is expecting its value in a vector register.");
4566
4567
if (cg->traceBCDCodeGen())
4568
{
4569
traceMsg(comp,"generating VSTRL for pdstore node->size = %d.\n", node->getSize());
4570
}
4571
4572
// No need to evaluate the address node of the pdstorei.
4573
// generateVSIInstruction() API will call separateIndexRegister() to separate the index
4574
// register by emitting an LA instruction. If there's a need for large displacement adjustment,
4575
// LAY will be emitted instead.
4576
TR::MemoryReference * targetMR = TR::MemoryReference::create(cg, node);;
4577
4578
// 0 we store 1 byte, 15 we store 16 bytes
4579
uint8_t lengthToStore = TR_VECTOR_REGISTER_SIZE - 1;
4580
if (node->getDecimalPrecision() > TR_MAX_INPUT_PACKED_DECIMAL_PRECISION )
4581
{
4582
targetMR->addToOffset(node->getSize() - TR_VECTOR_REGISTER_SIZE);
4583
}
4584
else
4585
{
4586
lengthToStore = node->getSize() - 1;
4587
}
4588
4589
generateVSIInstruction(cg, TR::InstOpCode::VSTRL, node, pdValueReg, targetMR, lengthToStore);
4590
cg->decReferenceCount(valueChild);
4591
cg->decReferenceCount(addressNode);
4592
4593
traceMsg(comp, "DAA: Exiting pdstoreVectorEvaluator %d\n", __LINE__);
4594
return NULL;
4595
}
4596
4597
TR_PseudoRegister * J9::Z::TreeEvaluator::evaluateBCDSignModifyingOperand(TR::Node *node,
4598
bool isEffectiveNop,
4599
bool isNondestructiveNop,
4600
bool initTarget,
4601
TR::MemoryReference *sourceMR,
4602
TR::CodeGenerator *cg)
4603
{
4604
TR_ASSERT(node->getType().isBCD(),"node %p type %s must be BCD\n",node,node->getDataType().toString());
4605
TR_OpaquePseudoRegister *reg = evaluateSignModifyingOperand(node, isEffectiveNop, isNondestructiveNop, initTarget, sourceMR, cg);
4606
TR_PseudoRegister *pseudoReg = reg->getPseudoRegister();
4607
TR_ASSERT(pseudoReg,"pseudoReg should be non-NULL for node %p\n",node);
4608
return pseudoReg;
4609
}
4610
4611
4612
TR_OpaquePseudoRegister * J9::Z::TreeEvaluator::evaluateSignModifyingOperand(TR::Node *node,
4613
bool isEffectiveNop,
4614
bool isNondestructiveNop,
4615
bool initTarget,
4616
TR::MemoryReference *sourceMR,
4617
TR::CodeGenerator *cg)
4618
{
4619
bool isBCD = node->getType().isBCD();
4620
TR::Node *child = node->getFirstChild();
4621
TR_OpaquePseudoRegister *firstReg = cg->evaluateOPRNode(child);
4622
TR::Compilation *comp = cg->comp();
4623
4624
if (isBCD)
4625
TR_ASSERT(firstReg->getPseudoRegister(),"firstReg->getPseudoRegister() is null in evaluateSignModifyingOperand for BCD node %p\n",child);
4626
4627
if (cg->traceBCDCodeGen())
4628
{
4629
if (isBCD)
4630
traceMsg(comp,"\tevaluateSignModOperand %s (%p) : firstReg %s firstReg->getPseudoRegister()->prec %d (isInit %s, isLegalToCleanSign %s, isEffectiveNop %s, initTarget %s)\n",
4631
node->getOpCode().getName(),node,cg->getDebug()->getName(firstReg),firstReg->getPseudoRegister()->getDecimalPrecision(),
4632
firstReg->isInitialized() ? "yes":"no",firstReg->getPseudoRegister()->isLegalToCleanSign()? "yes":"no",isEffectiveNop ? "yes":"no",initTarget ? "yes":"no");
4633
else
4634
traceMsg(comp,"\tevaluateSignModOperand for aggr type %s (%p) : firstReg %s (isInit %s, isEffectiveNop %s, initTarget %s)\n",
4635
node->getOpCode().getName(),node,cg->getDebug()->getName(firstReg),
4636
firstReg->isInitialized() ? "yes":"no",isEffectiveNop ? "yes":"no",initTarget ? "yes":"no");
4637
}
4638
4639
TR_OpaquePseudoRegister *targetReg = NULL;
4640
4641
// Note that a clobber evaluate must be done for any initialized firstReg -- even in the effectiveNop case:
4642
// 2 pdclean <- (isEffectiveNop=true) (temp1)
4643
// 1 pdremSelect <- node (isEffectiveNop=true) (temp1)
4644
// 2 pddivrem <- child (temp1)
4645
// ...
4646
// pdshr (clobbers temp1)
4647
// =>pddivrem (temp1)
4648
// ...
4649
// =>pdclean (uses invalid clobbered temp1 - wrong)
4650
// if a clobber evaluate is *not* done and temp1 is used for the pdremSelect and the pdclean then the parent of the second reference to the pddivrem node
4651
// will clobber temp1 and subsequent references to pdclean (and pdremSelect if any) will use the incorrectly clobbered temp1.
4652
// The clobber evaluate will copy the pddivrem result in temp1 to temp2 and the commoned pdclean will use the (now unclobbered) temp1
4653
// TODO: an alternative fix would be to *not* clobber evaluate for the isEffectiveNop=true case but to instead allocate and mark a new register as read-only
4654
// for the commoned pddivrem but clobberable for the pdremSelect and pdclean (basically do a clobber evaluate but don't generate an MVC to copy the value).
4655
// Doing the MVC copy lazily by any later consumer (the pdshr) would likely be better in some cases.
4656
// UPDATE: the above TODO is complete as part of ReadOnlyTemporary sets done below
4657
bool resetReadOnly = true;
4658
if (isEffectiveNop)
4659
{
4660
resetReadOnly = false;
4661
targetReg = isBCD? cg->allocatePseudoRegister(firstReg->getPseudoRegister()) : cg->allocateOpaquePseudoRegister(firstReg);
4662
4663
if (isBCD && (node->getDecimalPrecision() < firstReg->getPseudoRegister()->getDecimalPrecision()) &&
4664
(!firstReg->getPseudoRegister()->hasKnownOrAssumedSignCode() || (firstReg->getPseudoRegister()->getKnownOrAssumedSignCode() != TR::DataType::getPreferredPlusCode())))
4665
{
4666
// on a truncation of a value with an unknown or negative sign code then conservatively set clean to false as negative zero (unclean) may be produced
4667
targetReg->getPseudoRegister()->resetCleanSign();
4668
}
4669
TR_StorageReference *firstStorageReference = firstReg->getStorageReference();
4670
// transfer the zeroDigits/deadBytes and cache the firstReg->getStorageReference() *before* calling ssrClobberEvaluate in case
4671
// a new storage reference set on firstReg causes these values to be reset
4672
targetReg->setLeftAlignedZeroDigits(firstReg->getLeftAlignedZeroDigits());
4673
targetReg->setRightAlignedDeadBytes(firstReg->getRightAlignedDeadBytes());
4674
targetReg->setRightAlignedIgnoredBytes(firstReg->getRightAlignedIgnoredBytes());
4675
if (cg->traceBCDCodeGen())
4676
{
4677
traceMsg(comp,"\t * setting rightAlignedDeadBytes %d from firstReg %s to targetReg %s (signMod nop)\n",
4678
firstReg->getRightAlignedDeadBytes(),cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));
4679
traceMsg(comp,"\t * setting rightAlignedIgnoredBytes %d from firstReg %s to targetReg %s (signMod nop)\n",
4680
firstReg->getRightAlignedIgnoredBytes(),cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));
4681
if (isBCD)
4682
traceMsg(comp,"\t * setting savedLeftAlignedZeroDigits %d from firstReg %s to targetReg %s (signMod nop)\n",
4683
firstReg->getLeftAlignedZeroDigits(),cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));
4684
}
4685
4686
if (firstReg->isInitialized())
4687
{
4688
// The extra work to allow this for non-temp based is to expand the skipCopyOnStore check to all nodes (i.e. do not restrict this flag to those directly under a store node).
4689
// This skipCopyOnStore analysis will then guarantee that the underlying non-temp variable is not killed before its next use(s).
4690
if (!comp->getOption(TR_DisableRefinedBCDClobberEval) && firstStorageReference->isTemporaryBased() && isNondestructiveNop)
4691
{
4692
if (cg->traceBCDCodeGen())
4693
traceMsg(comp,"%sskipping ssrClobberEvaluate for %s (%p) with child %s (%p) refCount %d %s 1 owningRegisterCount %d %s 1-- %s mark #%d (%s) as readOnlyTemp (nondestructive nop case)\n",
4694
child->getReferenceCount() > 1 ? "y^y : ":"",
4695
node->getOpCode().getName(),node,child->getOpCode().getName(),child,
4696
child->getReferenceCount(),child->getReferenceCount() > 1 ? ">":"<=",
4697
firstStorageReference->getOwningRegisterCount(), firstStorageReference->getOwningRegisterCount() > 1 ? ">" : "<=",
4698
child->getReferenceCount() > 1 ? "do":"do not",firstStorageReference->getReferenceNumber(),
4699
cg->getDebug()->getName(firstStorageReference->getSymbol()));
4700
4701
if (child->getReferenceCount() > 1 || firstStorageReference->getOwningRegisterCount() > 1)
4702
{
4703
firstStorageReference->setIsReadOnlyTemporary(true, child);
4704
}
4705
resetReadOnly = false;
4706
}
4707
else
4708
{
4709
cg->ssrClobberEvaluate(child, sourceMR);
4710
}
4711
}
4712
4713
// transfer the storageRef *after* calling ssrClobberEvaluate so the referenceCounts of the temporaries are set correctly
4714
TR_StorageReference *targetStorageReference = firstStorageReference;
4715
targetReg->setStorageReference(targetStorageReference, node);
4716
if (!firstReg->isInitialized() && targetStorageReference->isNodeBased())
4717
{
4718
// NodeReferenceCounts are not used for node based hints and this path should never be reached for these hints
4719
// as this type of storage reference is only used when it has been initialized
4720
TR_ASSERT( !targetStorageReference->isNodeBasedHint(),"a node based hint should have been initialized\n");
4721
// This is the case where the firstChild is likely an ipdload (or a pdclean of ipdload etc)
4722
if (cg->traceBCDCodeGen())
4723
traceMsg(comp,"\tisEffectiveNop=yes and firstReg->isInit=false case so increment the targetStorageReference nodeRefCount by (node->refCount() - 1) = %d : %d->%d\n",
4724
node->getReferenceCount()-1,
4725
targetStorageReference->getNodeReferenceCount(),
4726
targetStorageReference->getNodeReferenceCount()+(node->getReferenceCount()-1));
4727
targetStorageReference->incrementNodeReferenceCount(node->getReferenceCount()-1);
4728
cg->privatizeStorageReference(node, targetReg, NULL);
4729
}
4730
}
4731
else if (firstReg->isInitialized())
4732
{
4733
TR_ASSERT( isBCD, "this path should only be taken for BCD nodes (unless we extend support for aggr types)\n");
4734
TR_StorageReference *firstStorageReference = firstReg->getStorageReference();
4735
// An initialized reg cannot have a non-hint node based storage reference as these would come from an ipdload node and pdload's never initialize a register
4736
TR_ASSERT( firstStorageReference->isTemporaryBased() || firstStorageReference->isNodeBasedHint(),"expecting the initalized firstReg to be either a temp or a node based hint\n");
4737
targetReg = cg->allocatePseudoRegister(node->getDataType());
4738
// transfer the zeroDigits/deadBytes and cache the firstReg->getStorageReference() *before* calling ssrClobberEvaluate in case
4739
// a new storage reference set on firstReg causes these values to be reset
4740
targetReg->setLeftAlignedZeroDigits(firstReg->getLeftAlignedZeroDigits());
4741
targetReg->setRightAlignedDeadBytes(firstReg->getRightAlignedDeadBytes());
4742
targetReg->setRightAlignedIgnoredBytes(firstReg->getRightAlignedIgnoredBytes());
4743
targetReg->getPseudoRegister()->transferDataState(firstReg->getPseudoRegister());
4744
if (cg->traceBCDCodeGen())
4745
{
4746
traceMsg(comp,"\t * setting rightAlignedDeadBytes %d from firstReg %s to targetReg %s (signMod isInit)\n",
4747
firstReg->getRightAlignedDeadBytes(),cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));
4748
traceMsg(comp,"\t * setting rightAlignedIgnoredBytes %d from firstReg %s to targetReg %s (signMod isInit)\n",
4749
firstReg->getRightAlignedIgnoredBytes(),cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));
4750
traceMsg(comp,"\t * setting savedLeftAlignedZeroDigits %d from firstReg %s to targetReg %s (signMod isInit)\n",
4751
firstReg->getLeftAlignedZeroDigits(),cg->getDebug()->getName(firstReg),cg->getDebug()->getName(targetReg));
4752
4753
}
4754
4755
if (!comp->getOption(TR_DisableRefinedBCDClobberEval) && firstReg->canBeConservativelyClobberedBy(node))
4756
{
4757
// pdclean
4758
// 3 pdadd
4759
//
4760
// AP t1,t2
4761
// ZAP t1,t1 // this ZAP is a conservative clobber as it will not modify the value in pdadd and there are no special sign codes to be preserved
4762
//
4763
// the t1 storageReference will be marked as readOnly and pdadd added to nodeToUpdateOnClobber list so if/when t1 is actually clobbered the commoned
4764
// register/node can have its storageRef updated to point to the saved value.
4765
//
4766
if (cg->traceBCDCodeGen())
4767
traceMsg(comp,"%sskipping ssrClobberEvaluate for %s (%p) with child %s (%p) refCount %d %s 1 owningRegisterCount %d %s 1-- %s mark #%d (%s) as readOnlyTemp (isInit case)\n",
4768
child->getReferenceCount() > 1 ? "y^y : ":"",
4769
node->getOpCode().getName(),node,child->getOpCode().getName(),child,
4770
child->getReferenceCount(),child->getReferenceCount() > 1 ? ">":"<=",
4771
firstStorageReference->getOwningRegisterCount(), firstStorageReference->getOwningRegisterCount() > 1 ? ">" : "<=",
4772
child->getReferenceCount() > 1 ? "do":"do not",firstStorageReference->getReferenceNumber(),
4773
cg->getDebug()->getName(firstStorageReference->getSymbol()));
4774
4775
if (child->getReferenceCount() > 1 || firstStorageReference->getOwningRegisterCount() > 1)
4776
{
4777
firstStorageReference->setIsReadOnlyTemporary(true, child);
4778
}
4779
resetReadOnly = false;
4780
}
4781
else
4782
{
4783
cg->ssrClobberEvaluate(child, sourceMR);
4784
}
4785
4786
// transfer the storageRef *after* calling ssrClobberEvaluate so the referenceCounts of the temporaries are set correctly
4787
targetReg->setStorageReference(firstStorageReference, node);
4788
targetReg->setIsInitialized();
4789
}
4790
else
4791
{
4792
TR_ASSERT( isBCD, "this path should only be taken for BCD nodes (unless we extend support for aggr types)\n");
4793
targetReg = cg->allocatePseudoRegister(node->getDataType());
4794
TR_StorageReference *targetStorageReference = NULL;
4795
if (node->getOpCode().canHaveStorageReferenceHint() && node->getStorageReferenceHint())
4796
targetStorageReference = node->getStorageReferenceHint();
4797
else
4798
targetStorageReference = TR_StorageReference::createTemporaryBasedStorageReference(node->getStorageReferenceSize(), comp);
4799
targetReg->setStorageReference(targetStorageReference, node);
4800
if (initTarget)
4801
{
4802
int32_t srcLiveSymbolSize = firstReg->getLiveSymbolSize();
4803
int32_t targetLiveSymbolSize = targetReg->getLiveSymbolSize();
4804
int32_t mvcSize = node->getSize();
4805
bool isTruncation = node->getSize() < firstReg->getSize();
4806
// if there are some left aligned zero digits in the source then increase the mvcSize to capture these in the initializing MVC
4807
if (firstReg->trackZeroDigits() &&
4808
(targetLiveSymbolSize == srcLiveSymbolSize) &&
4809
(srcLiveSymbolSize > mvcSize) &&
4810
(firstReg->getBytesToClear(mvcSize, srcLiveSymbolSize) == 0))
4811
{
4812
// increasing the mvcSize to include already zero'd bytes is illegal if targetLiveSymbolSize < srcLiveSymbolSize and
4813
// legal if targetLiveSymbolSize>=srcLiveSymbolSize but pointless if targetLiveSymbolSize > srcLiveSymbolSize as the extra
4814
// zero bytes will not be tracked on the targetReg so only do this when targetLiveSymbolSize == srcLiveSymbolSize
4815
//
4816
// In this case the source register has some zero bytes above its register size so increase the MVC size to include these zero bytes
4817
// e.g. if targetReg->getSize()=6 but the childLiveSymbolSize=9 then increase the mvcSize by 3 to 9
4818
if (cg->traceBCDCodeGen())
4819
traceMsg(comp,"\tupper %d bytes on srcReg %s are already clear so set mvcSize=%d\n", srcLiveSymbolSize-mvcSize,cg->getDebug()->getName(firstReg),srcLiveSymbolSize);
4820
targetReg->addRangeOfZeroBytes(mvcSize,srcLiveSymbolSize);
4821
mvcSize = srcLiveSymbolSize;
4822
}
4823
else if (!isTruncation) // on a widening only initialize up to the source size
4824
{
4825
if (cg->traceBCDCodeGen())
4826
traceMsg(comp,"\tfirstReg->getSize() <= node->getSize() (%d <= %d) so reduce mvcSize\n",firstReg->getSize(),node->getSize());
4827
mvcSize = firstReg->getSize();
4828
}
4829
4830
if (isTruncation && node->getType().isSeparateSign())
4831
{
4832
mvcSize -= node->getDataType().separateSignSize();
4833
if (cg->traceBCDCodeGen())
4834
traceMsg(comp,"\tnode %s is a truncating separateSign type so reduce mvcSize by sign size (%d->%d)\n",
4835
node->getOpCode().getName(),mvcSize+node->getDataType().separateSignSize(),mvcSize);
4836
}
4837
4838
if (cg->traceBCDCodeGen())
4839
traceMsg(comp,"\tfirstReg->isInitialized()==false so gen MVC to init with mvcSize %d\n", mvcSize);
4840
TR_ASSERT( sourceMR,"source memory reference should have been created by caller\n");
4841
generateSS1Instruction(cg, TR::InstOpCode::MVC, node,
4842
mvcSize-1,
4843
generateS390RightAlignedMemoryReference(node, targetStorageReference, cg),
4844
generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));
4845
targetReg->getPseudoRegister()->transferDataState(firstReg->getPseudoRegister());
4846
targetReg->setIsInitialized();
4847
}
4848
}
4849
4850
if (isEffectiveNop || firstReg->isInitialized())
4851
cg->freeUnusedTemporaryBasedHint(node);
4852
4853
if (firstReg->getSize() < node->getSize())
4854
{
4855
TR_ASSERT( isBCD, "this path should only be taken for BCD nodes (unless we extend support for aggr types)\n");
4856
if (cg->traceBCDCodeGen())
4857
traceMsg(comp,"\twidening: firstRegSize < nodeSize (%d < %d) so set targetReg->getPseudoRegister()->prec to firstReg->prec (%d)\n",firstReg->getSize(), node->getSize(),firstReg->getPseudoRegister()->getDecimalPrecision());
4858
targetReg->getPseudoRegister()->setDecimalPrecision(firstReg->getPseudoRegister()->getDecimalPrecision());
4859
}
4860
4861
if (cg->traceBCDCodeGen() && targetReg->getStorageReference()->isReadOnlyTemporary())
4862
traceMsg(comp,"%sreset readOnlyTemp flag on storageRef #%d (%s) (signMod case)\n",
4863
resetReadOnly?"":"do not ",targetReg->getStorageReference()->getReferenceNumber(),cg->getDebug()->getName(targetReg->getStorageReference()->getSymbol()));
4864
4865
if (resetReadOnly)
4866
targetReg->getStorageReference()->setIsReadOnlyTemporary(false, NULL);
4867
4868
node->setRegister(targetReg);
4869
return targetReg;
4870
}
4871
4872
TR::Register *J9::Z::TreeEvaluator::pdSetSignHelper(TR::Node *node, int32_t sign, TR::CodeGenerator *cg)
4873
{
4874
TR::Node *srcNode = node->getFirstChild();
4875
TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);
4876
TR_PseudoRegister *targetReg = NULL;
4877
4878
if (node->getType().isAnyPacked())
4879
{
4880
targetReg = simpleWideningOrTruncation(node, srcReg, true, sign, cg); // setSign=true
4881
}
4882
else if (node->getDataType() == TR::ZonedDecimal)
4883
{
4884
bool isEffectiveNop = (sign == TR::DataType::getIgnoredSignCode()) || srcReg->knownOrAssumedSignCodeIs(sign);
4885
TR::MemoryReference *sourceMR = NULL;
4886
if (!srcReg->isInitialized() && !isEffectiveNop)
4887
sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcReg->getStorageReference(), cg);
4888
targetReg = evaluateBCDSignModifyingOperand(node, isEffectiveNop, isEffectiveNop, true, sourceMR, cg); // initTarget=true
4889
bool isTruncation = srcReg->getDecimalPrecision() > node->getDecimalPrecision();
4890
if (isTruncation)
4891
targetReg->setDecimalPrecision(node->getDecimalPrecision());
4892
else
4893
targetReg->setDecimalPrecision(srcReg->getDecimalPrecision());
4894
if (!isEffectiveNop)
4895
{
4896
TR_StorageReference *targetStorageReference = targetReg->getStorageReference();
4897
TR_StorageReference *firstStorageReference = srcReg->getStorageReference();
4898
TR::MemoryReference *destMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);
4899
int32_t destLength = targetReg->getSize();
4900
cg->genSignCodeSetting(node, targetReg, destLength, destMR, sign, srcReg, 0, false); // digitsToClear=0, numericNibbleIsZero=false
4901
}
4902
}
4903
else
4904
{
4905
TR_ASSERT(false,"unexpected datatype %s in pdSetSignHelper\n",node->getDataType().toString());
4906
}
4907
4908
node->setRegister(targetReg);
4909
cg->decReferenceCount(srcNode);
4910
return targetReg;
4911
}
4912
4913
/**
4914
* \brief Evaluator function to evaluate pdSetSign opCode
4915
*/
4916
TR::Register*
4917
J9::Z::TreeEvaluator::pdSetSignEvaluator(TR::Node *node, TR::CodeGenerator *cg)
4918
{
4919
cg->traceBCDEntry("pdSetSign",node);
4920
cg->generateDebugCounter("PD-Op/pdsetsign", 1, TR::DebugCounter::Cheap);
4921
4922
TR::Register *targetReg = NULL;
4923
TR::Node *signNode = node->getSecondChild();
4924
4925
TR_ASSERT(signNode->getOpCode().isLoadConst() && signNode->getOpCode().getSize() <= 4,
4926
"expecting a <= 4 size integral constant set sign amount\n");
4927
TR_ASSERT(node->getFirstChild()->getType().isAnyPacked(), "expecting setSign's first child of PD data type");
4928
4929
int32_t sign = (int32_t)signNode->get64bitIntegralValue();
4930
cg->decReferenceCount(signNode);
4931
4932
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
4933
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
4934
{
4935
targetReg = vectorPerformSignOperationHelper(node, cg, false, 0, node->hasKnownOrAssumedCleanSign(), SignOperationType::setSign, false, true, sign);
4936
}
4937
else
4938
{
4939
targetReg = pdSetSignHelper(node, sign, cg);
4940
}
4941
4942
cg->traceBCDExit("pdSetSign",node);
4943
return targetReg;
4944
}
4945
4946
/**
4947
* TR::pdclear
4948
* TR::pdclearSetSign
4949
* current limitation for this is that leftMostDigit must equal digitsToClear (i.e. clearing right most digits)
4950
*/
4951
TR::Register *
4952
J9::Z::TreeEvaluator::pdclearEvaluator(TR::Node *node, TR::CodeGenerator *cg)
4953
{
4954
cg->traceBCDEntry("pdclear",node);
4955
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),
4956
1, TR::DebugCounter::Cheap);
4957
TR_ASSERT(!node->getOpCode().isSetSign(),"isSetSign on child not supported for node %s (%p)\n",node->getOpCode().getName(),node);
4958
bool isSetSign = node->getOpCode().isSetSignOnNode();
4959
TR_RawBCDSignCode setSignValue = isSetSign ? node->getSetSign() : raw_bcd_sign_unknown;
4960
int32_t sign = TR::DataType::getValue(setSignValue);
4961
TR::Compilation *comp = cg->comp();
4962
4963
TR_ASSERT(!isSetSign || setSignValue != raw_bcd_sign_unknown,"setSignValue must be on the node for %p\n",node);
4964
4965
TR::Node *srcNode = node->getChild(0);
4966
TR::Node *leftMostDigitNode = node->getChild(1);
4967
TR::Node *digitsToClearNode = node->getChild(2);
4968
TR::Node *literalAddrNode = (isSetSign && node->getNumChildren() > 3) ? node->getChild(3) : NULL;
4969
4970
TR_ASSERT(leftMostDigitNode->getOpCode().isLoadConst() && leftMostDigitNode->getSize() <= 4,
4971
"leftMostDigitNode %p must be a <= 4 size const\n",leftMostDigitNode);
4972
TR_ASSERT(digitsToClearNode->getOpCode().isLoadConst() && digitsToClearNode->getSize() <= 4,
4973
"digitsToClearNode %p must be a <= 4 size const\n",digitsToClearNode);
4974
4975
int32_t leftMostDigit = leftMostDigitNode->get32bitIntegralValue();
4976
int32_t leftMostByte = TR::DataType::packedDecimalPrecisionToByteLength(leftMostDigit);
4977
int32_t digitsToClear = digitsToClearNode->get32bitIntegralValue();
4978
int32_t rightMostDigit = leftMostDigit - digitsToClear;
4979
4980
TR_ASSERT(leftMostDigit == digitsToClear,"leftMostDigit %d must equal digitsToClear for node %p\n",leftMostDigit,digitsToClear,node);
4981
4982
TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);
4983
bool isInitialized = srcReg->isInitialized();
4984
if (cg->traceBCDCodeGen())
4985
traceMsg(comp,"\t%s (%p) : srcNode %s (%p) isInit=%s, digitClearRange %d->%d (leftMostByte=%d), digitsToClear = %d (isSetSign %s, sign 0x%x)\n",
4986
node->getOpCode().getName(),node,
4987
srcNode->getOpCode().getName(),srcNode,
4988
isInitialized ? "yes":"no",
4989
leftMostDigit,rightMostDigit,leftMostByte,digitsToClear,isSetSign?"yes":"no",sign);
4990
TR_StorageReference *srcStorageReference = srcReg->getStorageReference();
4991
TR::MemoryReference *sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcStorageReference, cg);
4992
4993
TR_PseudoRegister *targetReg = evaluateBCDValueModifyingOperand(node, true, sourceMR, cg); // initTarget=true
4994
TR::MemoryReference *destMR = generateS390RightAlignedMemoryReference(node, targetReg->getStorageReference(), cg);
4995
4996
bool isTruncation = srcReg->getDecimalPrecision() > node->getDecimalPrecision();
4997
if (isTruncation)
4998
targetReg->setDecimalPrecision(node->getDecimalPrecision());
4999
else
5000
targetReg->setDecimalPrecision(srcReg->getDecimalPrecision());
5001
5002
int32_t targetRegPrec = targetReg->getDecimalPrecision();
5003
5004
if (cg->traceBCDCodeGen())
5005
traceMsg(comp,"\tset targetReg prec to %d (isTrucation %s)\n",targetRegPrec,isTruncation?"yes":"no");
5006
5007
bool truncatedIntoClearedDigits = false;
5008
if (targetRegPrec < leftMostDigit)
5009
{
5010
truncatedIntoClearedDigits = true;
5011
int32_t precDelta = leftMostDigit - targetRegPrec;
5012
leftMostDigit -= precDelta;
5013
leftMostByte = TR::DataType::packedDecimalPrecisionToByteLength(leftMostDigit);
5014
digitsToClear -= precDelta;
5015
rightMostDigit = leftMostDigit - digitsToClear;
5016
if (cg->traceBCDCodeGen())
5017
traceMsg(comp,"\ttargetRegPrec %d < leftMostDigit %d : update leftMostDigit %d->%d, leftMostByte = %d, digitsToClear %d->%d, rightMostDigit = %d\n",
5018
targetRegPrec,leftMostDigit+precDelta,leftMostDigit+precDelta,leftMostDigit,leftMostByte,digitsToClear+precDelta,digitsToClear,rightMostDigit);
5019
}
5020
5021
// do not bother checking !node->canSkipPadByteClearing() below because being able to clear the full byte generally results in better codegen
5022
// coincidentEvenDigitCorrection is true when leftMostNibble == targetRegPrec so instead of generating separate NI 0xF0 and then NI 0x0F on the same byte
5023
// just inc digitsToClear below so this full byte clearing can be done in one instruction
5024
// e.g. p4v0 = (p15v0 / 10000) * 10000
5025
int32_t leftMostByteForClear = leftMostByte;
5026
bool needsEvenDigitCorrection = !truncatedIntoClearedDigits && isTruncation && targetReg->isEvenPrecision();
5027
bool coincidentEvenDigitCorrection = needsEvenDigitCorrection && (leftMostByteForClear == targetReg->getSize());
5028
if (isEven(leftMostDigit))
5029
{
5030
if (cg->traceBCDCodeGen())
5031
traceMsg(comp,"\tleftMostDigit %d isEven : isInit=%s, truncatedIntoClearedDigits=%s, coincidentEvenDigitCorrection=%s -- adjust the leftMostNibble to preserve or clear the leftMostByte\n",
5032
leftMostDigit,isInitialized?"yes":"no",truncatedIntoClearedDigits?"yes":"no",needsEvenDigitCorrection?"yes":"no");
5033
5034
if (isInitialized && !truncatedIntoClearedDigits && !coincidentEvenDigitCorrection) // full byte will be cleared if truncatedIntoClearedDigits or coincidentEvenDigitCorrection are true
5035
{
5036
if (cg->traceBCDCodeGen())
5037
traceMsg(comp,"\t\tisInit=yes,truncatedIntoClearedDigits=no,coincidentEvenDigitCorrection=no so dec %d->%d to preserve initialized leftMostNibble\n",digitsToClear,digitsToClear-1);
5038
digitsToClear--; // must preserve the top byte and then clear just the top digit after the clearAndSetSign
5039
leftMostByteForClear--;
5040
}
5041
else
5042
{
5043
if (cg->traceBCDCodeGen())
5044
traceMsg(comp,"\t\tisInit=no or truncatedIntoClearedDigits=yes or coincidentEvenDigitCorrection=yes so inc %d->%d to clear initialized leftMostNibble\n",digitsToClear,digitsToClear+1);
5045
digitsToClear++; // clear a larger even # of digits and put back
5046
}
5047
}
5048
5049
if (!isTruncation && srcReg->isEvenPrecision() && srcReg->isLeftMostNibbleClear())
5050
{
5051
if (cg->traceBCDCodeGen())
5052
traceMsg(comp,"\twidening with even srcRegPrec %d update targetReg with zero range for leftMostNibble %d->%d\n",
5053
srcReg->getDecimalPrecision(),srcReg->getDecimalPrecision(),srcReg->getDecimalPrecision()+1);
5054
targetReg->addRangeOfZeroDigits(srcReg->getDecimalPrecision(),srcReg->getDecimalPrecision()+1);
5055
}
5056
5057
// clearAndSetSign will be clearing full bytes so half byte values or signs will be put back afterwards
5058
clearAndSetSign(node, targetReg, leftMostByteForClear, digitsToClear, destMR, srcReg, sourceMR, isSetSign, sign, isInitialized, cg); // isSignInitialized=isInitialized
5059
5060
if (!(truncatedIntoClearedDigits || coincidentEvenDigitCorrection))
5061
{
5062
if (isEven(leftMostDigit))
5063
{
5064
if (isInitialized)
5065
{
5066
{
5067
if (cg->traceBCDCodeGen())
5068
traceMsg(comp,"\tisInit=yes : gen NI to clear right most nibble at byte %d\n",leftMostByte);
5069
generateSIInstruction(cg, TR::InstOpCode::NI, node,
5070
reuseS390LeftAlignedMemoryReference(destMR, node, targetReg->getStorageReference(), cg, leftMostByte),
5071
0xF0);
5072
}
5073
}
5074
else
5075
{
5076
if (cg->traceBCDCodeGen())
5077
traceMsg(comp,"\tisInit=no : gen MVZ to restore left most nibble at byte %d\n",leftMostByte);
5078
int32_t mvzSize = 1;
5079
generateSS1Instruction(cg, TR::InstOpCode::MVZ, node,
5080
mvzSize-1,
5081
reuseS390LeftAlignedMemoryReference(destMR, node, targetReg->getStorageReference(), cg, leftMostByte),
5082
reuseS390LeftAlignedMemoryReference(sourceMR, srcNode, srcStorageReference, cg, leftMostByte));
5083
}
5084
}
5085
5086
if (needsEvenDigitCorrection && !node->canSkipPadByteClearing())
5087
cg->genZeroLeftMostPackedDigits(node, targetReg, targetReg->getSize(), 1, destMR);
5088
}
5089
5090
cg->decReferenceCount(srcNode);
5091
cg->decReferenceCount(leftMostDigitNode);
5092
cg->decReferenceCount(digitsToClearNode);
5093
cg->processUnusedNodeDuringEvaluation(literalAddrNode);
5094
cg->traceBCDExit("pdclear",node);
5095
return targetReg;
5096
}
5097
5098
TR::Register *
5099
J9::Z::TreeEvaluator::pdchkEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5100
{
5101
TR::Compilation *comp = cg->comp();
5102
TR::Register *chkResultReg = cg->allocateRegister(TR_GPR);
5103
generateRRInstruction(cg, comp->target().is64Bit() ? TR::InstOpCode::XGR : TR::InstOpCode::XR, node, chkResultReg, chkResultReg);
5104
5105
TR::Node * pdloadNode = node->getFirstChild();
5106
TR::Register* pdReg = NULL;
5107
5108
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
5109
if(comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&
5110
!comp->getOption(TR_DisableVectorBCD) ||
5111
isVectorBCDEnv)
5112
{
5113
pdReg = cg->evaluate(pdloadNode);
5114
generateVRRgInstruction(cg, TR::InstOpCode::VTP, node, pdReg);
5115
}
5116
else
5117
{
5118
pdReg = cg->evaluateBCDNode(pdloadNode);
5119
TR_StorageReference *pdStorageReference = static_cast<TR_PseudoRegister*>(pdReg)->getStorageReference();
5120
TR::MemoryReference *tempMR = generateS390RightAlignedMemoryReference(pdloadNode, pdStorageReference, cg);
5121
generateRSLInstruction(cg, TR::InstOpCode::TP, pdloadNode, static_cast<TR_PseudoRegister*>(pdReg)->getSize()-1, tempMR);
5122
}
5123
5124
generateRRInstruction(cg, TR::InstOpCode::IPM, node, chkResultReg, chkResultReg);
5125
5126
if(comp->target().is64Bit())
5127
{
5128
generateRRInstruction(cg, TR::InstOpCode::LLGTR, node, chkResultReg, chkResultReg);
5129
generateRSInstruction(cg, TR::InstOpCode::SRLG, node, chkResultReg, chkResultReg, 28);
5130
}
5131
else
5132
{
5133
generateRSInstruction(cg, TR::InstOpCode::SRL, node, chkResultReg, 28);
5134
}
5135
5136
node->setRegister(chkResultReg);
5137
cg->decReferenceCount(pdloadNode);
5138
return chkResultReg;
5139
}
5140
5141
/**
5142
* pd<op>Evaluator - various binary packed decimal evaluators
5143
*/
5144
void
5145
J9::Z::TreeEvaluator::correctPackedArithmeticPrecision(TR::Node *node, int32_t op1EncodingSize, TR_PseudoRegister *targetReg, int32_t computedResultPrecision, TR::CodeGenerator * cg)
5146
{
5147
int32_t computedResultSize = TR::DataType::packedDecimalPrecisionToByteLength(computedResultPrecision);
5148
if (op1EncodingSize >= computedResultSize)
5149
targetReg->removeRangeOfZeroDigits(0, computedResultPrecision);
5150
else
5151
targetReg->removeRangeOfZeroBytes(0, op1EncodingSize);
5152
5153
int32_t resultPrecision = std::min<int32_t>(computedResultPrecision, node->getDecimalPrecision());
5154
targetReg->setDecimalPrecision(resultPrecision);
5155
if (cg->traceBCDCodeGen())
5156
traceMsg(cg->comp(),"\tset targetRegPrec to min(computedResultPrecision, nodePrec) = min(%d, %d) = %d (targetRegSize = %d)\n",
5157
computedResultPrecision,node->getDecimalPrecision(),resultPrecision,targetReg->getSize());
5158
}
5159
5160
TR::Register *
5161
J9::Z::TreeEvaluator::pdaddEvaluator(TR::Node * node, TR::CodeGenerator * cg)
5162
{
5163
cg->traceBCDEntry("pdadd",node);
5164
cg->generateDebugCounter("PD-Op/pdadd", 1, TR::DebugCounter::Cheap);
5165
5166
TR::Register * reg = NULL;
5167
5168
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
5169
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
5170
{
5171
reg = pdArithmeticVectorEvaluatorHelper(node, TR::InstOpCode::VAP, cg);
5172
}
5173
else
5174
{
5175
reg = pdaddsubEvaluatorHelper(node, TR::InstOpCode::AP, cg);
5176
}
5177
5178
cg->traceBCDExit("pdadd",node);
5179
return reg;
5180
}
5181
5182
TR::Register *
5183
J9::Z::TreeEvaluator::pdsubEvaluator(TR::Node * node, TR::CodeGenerator * cg)
5184
{
5185
cg->traceBCDEntry("pdsub",node);
5186
cg->generateDebugCounter("PD-Op/pdsub", 1, TR::DebugCounter::Cheap);
5187
5188
TR::Register * reg = NULL;
5189
5190
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
5191
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
5192
{
5193
reg = pdArithmeticVectorEvaluatorHelper(node, TR::InstOpCode::VSP, cg);
5194
}
5195
else
5196
{
5197
reg = pdaddsubEvaluatorHelper(node, TR::InstOpCode::SP, cg);
5198
}
5199
5200
cg->traceBCDExit("pdsub",node);
5201
return reg;
5202
}
5203
5204
int32_t getAddSubComputedResultPrecision(TR::Node *node, TR::CodeGenerator * cg)
5205
{
5206
TR::Node *firstChild = node->getFirstChild();
5207
TR::Node *secondChild = node->getSecondChild();
5208
5209
TR_PseudoRegister *firstReg = firstChild->getPseudoRegister();
5210
if (firstReg == NULL)
5211
firstReg = cg->evaluateBCDNode(firstChild);
5212
5213
TR_PseudoRegister *secondReg = secondChild->getPseudoRegister();
5214
if (secondReg == NULL)
5215
secondReg = cg->evaluateBCDNode(secondChild);
5216
5217
int32_t precBump = (firstChild->isZero() || secondChild->isZero()) ? 0 : 1;
5218
int32_t computedResultPrecision = std::max(firstReg->getDecimalPrecision(), secondReg->getDecimalPrecision())+precBump;
5219
5220
return computedResultPrecision;
5221
}
5222
5223
/**
5224
* This evaluator helper function uses BCD vector instructions for PD arithmetic operations:
5225
*
5226
* -- pdadd
5227
* -- pdsub
5228
* -- pdmul
5229
* -- pddiv
5230
*
5231
* whose corresponding BCD vector instructions are of VRI-f format.
5232
*/
5233
TR::Register *
5234
J9::Z::TreeEvaluator::pdArithmeticVectorEvaluatorHelper(TR::Node * node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator * cg)
5235
{
5236
int32_t immediateValue = node->getDecimalPrecision();
5237
TR_ASSERT_FATAL((immediateValue >> 8) == 0, "Decimal precision (%d) exceeds 1 byte", immediateValue);
5238
5239
if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY) && cg->getIgnoreDecimalOverflowException())
5240
{
5241
immediateValue |= 0x80;
5242
}
5243
TR::Node* firstChild = node->getFirstChild();
5244
TR::Node* secondChild = node->getSecondChild();
5245
5246
TR::Register* firstChildReg = cg->evaluate(firstChild);
5247
TR::Register* secondChildReg = cg->evaluate(secondChild);
5248
5249
// For simple PD Decimal Operations, let's set the mask to 0: no force positive nor set CC
5250
TR::Register* targetReg = cg->allocateRegister(TR_VRF);
5251
generateVRIfInstruction(cg, op, node, targetReg, firstChildReg, secondChildReg, immediateValue, 0x1);
5252
node->setRegister(targetReg);
5253
5254
cg->decReferenceCount(firstChild);
5255
cg->decReferenceCount(secondChild);
5256
5257
return targetReg;
5258
}
5259
5260
/**
5261
* Handles pdadd,pdsub
5262
*/
5263
TR::Register *
5264
J9::Z::TreeEvaluator::pdaddsubEvaluatorHelper(TR::Node * node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator * cg)
5265
{
5266
bool produceOverflowMessage = node->getOpCode().isPackedArithmeticOverflowMessage();
5267
bool isAdd = (op == TR::InstOpCode::AP);
5268
TR::Node *firstChild = node->getFirstChild();
5269
TR::Node *secondChild = node->getSecondChild();
5270
TR::Compilation *comp = cg->comp();
5271
5272
TR_PseudoRegister *firstReg = cg->evaluateBCDNode(firstChild);
5273
bool trackSignState=false;
5274
bool alwaysLegalToCleanSign=true; // ok to use ZAP (and clobber srcSign) to init as there is an AP/SP coming
5275
TR_PseudoRegister *targetReg = evaluateBCDValueModifyingOperand(node, true, NULL, cg, trackSignState, 0, alwaysLegalToCleanSign); // initTarget=true, sourceMR=NULL, srcSize=0
5276
cg->decReferenceCount(firstChild); // dec bef evaluating the second child to avoid an unneeded clobber evaluate
5277
TR_PseudoRegister *secondReg = cg->evaluateBCDNode(secondChild);
5278
TR_StorageReference *targetStorageReference = targetReg->getStorageReference();
5279
TR::MemoryReference *destMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);
5280
TR::MemoryReference *secondMR = generateS390RightAlignedMemoryReference(secondChild, secondReg->getStorageReference(), cg);
5281
5282
5283
int32_t op1EncodingPrecision = cg->getPDAddSubEncodedPrecision(node, firstReg);
5284
int32_t op1EncodingSize = cg->getPDAddSubEncodedSize(node, firstReg);
5285
// The preparatory clearing operations need a length set so base it on the op1EncodingSize but the final returned precision will be set after the AP/SP instruction has been generated
5286
targetReg->setDecimalPrecision(op1EncodingPrecision);
5287
5288
if (cg->traceBCDCodeGen())
5289
traceMsg(comp,"\t%s: produceOverflowMessage=%s, node->getSize()=%d, firstReg->getSize()=%d, secondReg->getSize()=%d, op1EncodingPrec=%d, op1EncodingSize=%d\n",
5290
node->getOpCode().getName(),produceOverflowMessage?"yes":"no", node->getSize(), firstReg->getSize(), secondReg->getSize(),op1EncodingPrecision, targetReg->getSize());
5291
5292
if (op1EncodingSize > firstReg->getSize())
5293
cg->clearByteRangeIfNeeded(node, targetReg, generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), firstReg->getSize(), op1EncodingSize, true); // widenOnLeft=true
5294
5295
// endByte=firstReg->getSize but for types like packed where the sign is right aligned this endByte setting does not matter
5296
// as the leftMostByte for the sign is always known (== 1)
5297
cg->correctBadSign(firstChild, firstReg, firstReg->getSize(), destMR);
5298
cg->correctBadSign(secondChild, secondReg, secondReg->getSize(), secondMR);
5299
5300
int32_t computedResultPrecision = getAddSubComputedResultPrecision(node, cg);
5301
bool mayOverflow = computedResultPrecision > node->getDecimalPrecision();
5302
correctPackedArithmeticPrecision(node, op1EncodingSize, targetReg, computedResultPrecision, cg);
5303
5304
if (cg->traceBCDCodeGen())
5305
traceMsg(comp,"\tcomputedResultPrecision %s nodePrec (%d %s %d) -- mayOverflow = %s\n",
5306
mayOverflow?">":"<=",computedResultPrecision,mayOverflow?">":"<=",node->getDecimalPrecision(),mayOverflow?"yes":"no");
5307
5308
TR::LabelSymbol * cFlowRegionStart = NULL;
5309
TR::LabelSymbol * cflowRegionEnd = NULL;
5310
TR::RegisterDependencyConditions * deps = NULL;
5311
if (mayOverflow && produceOverflowMessage)
5312
{
5313
cFlowRegionStart = generateLabelSymbol(cg);
5314
cflowRegionEnd = generateLabelSymbol(cg);
5315
deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg);
5316
5317
if (destMR->getIndexRegister())
5318
deps->addPostConditionIfNotAlreadyInserted(destMR->getIndexRegister(), TR::RealRegister::AssignAny);
5319
if (destMR->getBaseRegister())
5320
deps->addPostConditionIfNotAlreadyInserted(destMR->getBaseRegister(), TR::RealRegister::AssignAny);
5321
if (secondMR->getIndexRegister())
5322
deps->addPostConditionIfNotAlreadyInserted(secondMR->getIndexRegister(), TR::RealRegister::AssignAny);
5323
if (secondMR->getBaseRegister())
5324
deps->addPostConditionIfNotAlreadyInserted(secondMR->getBaseRegister(), TR::RealRegister::AssignAny);
5325
5326
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart, deps);
5327
cFlowRegionStart->setStartInternalControlFlow();
5328
}
5329
5330
generateSS2Instruction(cg, op, node,
5331
op1EncodingSize-1,
5332
generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),
5333
secondReg->getSize()-1,
5334
generateS390RightAlignedMemoryReference(*secondMR, node, 0, cg));
5335
5336
targetReg->setHasKnownValidSignAndData();
5337
5338
if (mayOverflow)
5339
{
5340
if (targetReg->isEvenPrecision() && !node->canSkipPadByteClearing())
5341
{
5342
cg->genZeroLeftMostPackedDigits(node, targetReg, targetReg->getSize(), 1, generateS390RightAlignedMemoryReference(*destMR, node, 0, cg));
5343
}
5344
targetReg->setHasKnownPreferredSign();
5345
if (cg->traceBCDCodeGen())
5346
traceMsg(comp,"\toverflow may occur so set HasKnownPreferredSign = true on reg %s\n",cg->getDebug()->getName(targetReg));
5347
if (produceOverflowMessage)
5348
{
5349
// The only overflow message handled is overflow into the next byte (i.e. not 'even' to 'odd' precision 'overflow').
5350
// This is also an important restriction as no NI for the top nibble is done here and if it were to be done then this
5351
// would also overwrite the condition code in the isFoldedIf=true case
5352
TR_ASSERT(targetReg->isOddPrecision(),"expecting targetPrecision to be odd and not %d for addsubOverflowMessage\n",targetReg->getDecimalPrecision());
5353
5354
TR::LabelSymbol *oolEntryPoint = generateLabelSymbol(cg);
5355
TR::LabelSymbol *oolReturnPoint = generateLabelSymbol(cg);
5356
5357
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BO, node, oolEntryPoint);
5358
5359
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cflowRegionEnd, deps);
5360
cflowRegionEnd->setEndInternalControlFlow();
5361
}
5362
}
5363
else
5364
{
5365
targetReg->setHasKnownCleanSign();
5366
if (cg->traceBCDCodeGen())
5367
{
5368
if (firstChild->isZero() || secondChild->isZero())
5369
traceMsg(comp,"\t%s firstChild %p isZero=%s or secondChild %p isZero=%s so nibble clearing is NOT required and set HasKnownCleanSign = true on reg %s\n",
5370
isAdd?"add":"sub",firstChild,firstChild->isZero()?"yes":"no",secondChild,secondChild->isZero()?"yes":"no",cg->getDebug()->getName(targetReg));
5371
else
5372
traceMsg(comp,"\t%s result prec %d is > both reg1 prec %d and reg2 prec %d so nibble clearing is NOT required and set HasKnownCleanSign = true on reg %s\n",
5373
isAdd?"add":"sub",node->getDecimalPrecision(),firstReg->getDecimalPrecision(),secondReg->getDecimalPrecision(),cg->getDebug()->getName(targetReg));
5374
}
5375
// An NI to clear the top nibble is never required in this case:
5376
// If the largest source is even (eg prec 4) then biggest the result can be is odd (i.e. +1 largest source -- prec 5)
5377
// and on an odd result no clearing is needed
5378
// If the largest source is odd (eg prec 5) then the biggest the result can be is even (i.e. +1 largest source -- prec 6)
5379
// and the top nibble must already be clear as the whole byte must be clear before the operation
5380
}
5381
5382
5383
if (isAdd &&
5384
firstReg->hasKnownOrAssumedPositiveSignCode() &&
5385
secondReg->hasKnownOrAssumedPositiveSignCode())
5386
{
5387
if (cg->traceBCDCodeGen())
5388
traceMsg(comp, "\tfirstReg and secondReg have positive sign codes so set targetReg sign code to the preferred positive sign 0x%x\n", TR::DataType::getPreferredPlusCode());
5389
// positive+positive=positive and then AP will clean the positive sign to 0xc
5390
targetReg->setKnownSignCode(TR::DataType::getPreferredPlusCode());
5391
}
5392
5393
node->setRegister(targetReg);
5394
cg->decReferenceCount(secondChild);
5395
return targetReg;
5396
}
5397
5398
TR::Register *
5399
J9::Z::TreeEvaluator::pdmulEvaluator(TR::Node * node, TR::CodeGenerator * cg)
5400
{
5401
cg->traceBCDEntry("pdmul",node);
5402
cg->generateDebugCounter("PD-Op/pdmul", 1, TR::DebugCounter::Cheap);
5403
5404
TR::Register * reg = NULL;
5405
5406
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
5407
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&
5408
!cg->comp()->getOption(TR_DisableVectorBCD) ||
5409
isVectorBCDEnv)
5410
{
5411
reg = pdArithmeticVectorEvaluatorHelper(node, TR::InstOpCode::VMP, cg);
5412
}
5413
else
5414
{
5415
reg = pdmulEvaluatorHelper(node, cg);
5416
}
5417
5418
cg->traceBCDExit("pdmul",node);
5419
return reg;
5420
}
5421
5422
TR::Register *
5423
J9::Z::TreeEvaluator::pdmulEvaluatorHelper(TR::Node * node, TR::CodeGenerator * cg)
5424
{
5425
TR::Node *firstChild = node->getFirstChild();
5426
TR::Node *secondChild = node->getSecondChild();
5427
TR::Compilation *comp = cg->comp();
5428
5429
TR_PseudoRegister *firstReg = cg->evaluateBCDNode(firstChild);
5430
bool trackSignState=false;
5431
bool alwaysLegalToCleanSign=true; // ok to use ZAP (and clobber srcSign) to init as there is an MP coming
5432
TR_PseudoRegister *targetReg = evaluateBCDValueModifyingOperand(node, true, NULL, cg, trackSignState, 0, alwaysLegalToCleanSign); // initTarget=true, sourceMR=NULL, srcSize=0
5433
cg->decReferenceCount(firstChild);
5434
TR_PseudoRegister *secondReg = cg->evaluateBCDNode(secondChild);
5435
TR_StorageReference *targetStorageReference = targetReg->getStorageReference();
5436
TR::MemoryReference *destMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);
5437
TR::MemoryReference *secondMR = generateS390RightAlignedMemoryReference(secondChild, secondReg->getStorageReference(), cg);
5438
5439
int32_t op1EncodingPrecision = cg->getPDMulEncodedPrecision(node, firstReg, secondReg);
5440
int32_t op1EncodingSize = cg->getPDMulEncodedSize(node, firstReg, secondReg);
5441
// The preparatory clearing operations need a length set so base it on the op1EncodingSize but the final precision will be set after the MP instruction has been generated
5442
targetReg->setDecimalPrecision(op1EncodingPrecision);
5443
5444
TR_ASSERT( targetReg->getSize() >= firstReg->getSize() + secondReg->getSize(),"MP may result in a data exception\n");
5445
TR_ASSERT( secondReg->getSize() <= 8, "MP will result in a spec exception\n");
5446
5447
cg->clearByteRangeIfNeeded(node, targetReg, generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), firstReg->getSize(), op1EncodingSize, true); // widenOnLeft=true
5448
5449
cg->correctBadSign(firstChild, firstReg, firstReg->getSize(), destMR);
5450
cg->correctBadSign(secondChild, secondReg, secondReg->getSize(), secondMR);
5451
5452
TR::Instruction * cursor =
5453
generateSS2Instruction(cg, TR::InstOpCode::MP, node,
5454
op1EncodingSize-1,
5455
generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),
5456
secondReg->getSize()-1,
5457
generateS390RightAlignedMemoryReference(*secondMR, node, 0, cg));
5458
5459
targetReg->setHasKnownValidSignAndData();
5460
5461
int32_t computedResultPrecision = firstReg->getDecimalPrecision() + secondReg->getDecimalPrecision();
5462
correctPackedArithmeticPrecision(node, op1EncodingSize, targetReg, computedResultPrecision, cg);
5463
5464
if (targetReg->getDecimalPrecision() < computedResultPrecision)
5465
{
5466
if (!node->canSkipPadByteClearing() && targetReg->isEvenPrecision())
5467
cg->genZeroLeftMostPackedDigits(node, targetReg, targetReg->getSize(), 1, generateS390RightAlignedMemoryReference(*destMR, node, 0, cg));
5468
}
5469
else if (cg->traceBCDCodeGen())
5470
{
5471
traceMsg(comp,"TR::InstOpCode::MP node %p targetRegPrec %d >= computedResultPrecision %d (firstRegPrec %d + secondRegPrec %d) so skip nibble clearing\n",
5472
node,targetReg->getDecimalPrecision(),computedResultPrecision,firstReg->getDecimalPrecision(),secondReg->getDecimalPrecision());
5473
}
5474
5475
// Even with no overflow MP can produce a negative zero as the sign of the result is determined from the rules
5476
// of algebra *even when one or both of the operands are zero*. So 0 * -1 = -0 (0x0c * 0x1d = 0x0d -- not clean result)
5477
// MP will always produce a result with a preferred sign however.
5478
if (firstReg->hasKnownOrAssumedPositiveSignCode() &&
5479
secondReg->hasKnownOrAssumedPositiveSignCode())
5480
{
5481
if (cg->traceBCDCodeGen())
5482
traceMsg(comp, "\tfirstReg and secondReg have positive sign codes so set targetReg sign code to the preferred positive sign 0x%x\n", TR::DataType::getPreferredPlusCode());
5483
// positive*positive=positive and then MP will clean the positive sign to 0xc
5484
targetReg->setKnownSignCode(TR::DataType::getPreferredPlusCode());
5485
}
5486
else
5487
{
5488
targetReg->setHasKnownPreferredSign();
5489
}
5490
5491
cg->decReferenceCount(secondChild);
5492
return targetReg;
5493
}
5494
5495
/**
5496
* Handles pddiv, and pdrem.
5497
*/
5498
TR::Register *
5499
J9::Z::TreeEvaluator::pddivremEvaluator(TR::Node * node, TR::CodeGenerator * cg)
5500
{
5501
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "PD-Op/%s", node->getOpCode().getName()),
5502
1, TR::DebugCounter::Cheap);
5503
TR::Register * reg = NULL;
5504
5505
static char* isVectorBCDEnv = feGetEnv("TR_enableVectorBCD");
5506
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) && !cg->comp()->getOption(TR_DisableVectorBCD) || isVectorBCDEnv)
5507
{
5508
reg = pddivremVectorEvaluatorHelper(node, cg);
5509
}
5510
else
5511
{
5512
reg = pddivremEvaluatorHelper(node, cg);
5513
}
5514
5515
return reg;
5516
}
5517
5518
/**
5519
* Handles pddiv, and pdrem. This is the vector evaluator helper function.
5520
*/
5521
TR::Register *
5522
J9::Z::TreeEvaluator::pddivremVectorEvaluatorHelper(TR::Node * node, TR::CodeGenerator * cg)
5523
{
5524
TR::Register* vTargetReg = NULL;
5525
TR::InstOpCode::Mnemonic opCode;
5526
switch(node->getOpCodeValue())
5527
{
5528
case TR::pddiv:
5529
opCode = TR::InstOpCode::VDP;
5530
break;
5531
case TR::pdrem:
5532
opCode = TR::InstOpCode::VRP;
5533
break;
5534
default:
5535
TR_ASSERT(0, "Unexpected opcode in pddiv/remVectorEvaluatorHelper");
5536
break;
5537
}
5538
5539
vTargetReg = pdArithmeticVectorEvaluatorHelper(node, opCode, cg);
5540
return vTargetReg;
5541
}
5542
5543
/**
5544
* Handles pddiv, and pdrem. This is the non-vector evaluator helper function.
5545
*/
5546
TR::Register *
5547
J9::Z::TreeEvaluator::pddivremEvaluatorHelper(TR::Node * node, TR::CodeGenerator * cg)
5548
{
5549
TR_ASSERT( node->getOpCodeValue() == TR::pddiv || node->getOpCodeValue() == TR::pdrem,
5550
"pddivEvaluator only valid for pddiv/pdrem\n");
5551
5552
TR::Node *firstChild = node->getFirstChild();
5553
TR::Node *secondChild = node->getSecondChild();
5554
TR::Compilation *comp = cg->comp();
5555
5556
TR_PseudoRegister *firstReg = cg->evaluateBCDNode(firstChild);
5557
bool trackSignState=false;
5558
bool alwaysLegalToCleanSign=true; // ok to use ZAP (and clobber srcSign) to init as there is a DP coming
5559
TR_PseudoRegister *targetReg = evaluateBCDValueModifyingOperand(node, true, NULL, cg, trackSignState, 0, alwaysLegalToCleanSign); // initTarget=true, sourceMR=NULL, srcSize=0
5560
cg->decReferenceCount(firstChild);
5561
TR_PseudoRegister *secondReg = cg->evaluateBCDNode(secondChild);
5562
TR_StorageReference *targetStorageReference = targetReg->getStorageReference();
5563
TR::MemoryReference *destMR = generateS390RightAlignedMemoryReference(node, targetStorageReference, cg);
5564
5565
if (secondReg->getDecimalPrecision() > secondChild->getDecimalPrecision())
5566
{
5567
TR_ASSERT( false,"the secondRegPrec has grown so using an inline DP may not be legal\n"); // TODO: for now disallow this completely but the below fix is also correct.
5568
TR_ASSERT(secondReg->getSize() == secondChild->getSize(),
5569
"the secondRegSize (regSize %d != nodeSize %d) has grown so using an inline DP may not be legal\n",secondReg->getSize(),secondChild->getSize());
5570
// The register precision may have been conservatively adjusted from an even precision to the next odd precision so in these
5571
// cases set it back to the even precision so the inline divide will still be legal. This extra nibble of precision will be zero so this is safe.
5572
secondReg->setDecimalPrecision(secondReg->getDecimalPrecision()-1);
5573
}
5574
5575
int32_t dividendPrecision = 0;
5576
int32_t divisorSize = 0;
5577
int32_t dividendSizeBumpForClear = 0;
5578
TR::MemoryReference *divisorMR = NULL;
5579
5580
divisorMR = generateS390RightAlignedMemoryReference(secondChild, secondReg->getStorageReference(), cg);
5581
dividendPrecision = cg->getPDDivEncodedPrecision(node, firstReg, secondReg);
5582
divisorSize = secondReg->getSize();
5583
5584
targetReg->setDecimalPrecision(dividendPrecision);
5585
int32_t dividendSize = targetReg->getSize();
5586
TR_ASSERT( dividendSize <= node->getStorageReferenceSize(),"allocated symbol for pddiv/pdrem is too small\n");
5587
if (cg->traceBCDCodeGen())
5588
traceMsg(comp,"\t%s: gen DP dividendSize = %d, secondOpSize = secondRegSize = %d, targetRegSize = %d (firstRegPrec %d, secondRegPrec %d)\n",
5589
node->getOpCode().getName(),dividendSize,secondReg->getSize(),targetReg->getSize(),firstReg->getDecimalPrecision(),secondReg->getDecimalPrecision());
5590
5591
cg->clearByteRangeIfNeeded(node, targetReg, generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), dividendSize-divisorSize-dividendSizeBumpForClear, dividendSize, true); // widenOnLeft=true
5592
5593
cg->correctBadSign(firstChild, firstReg, targetReg->getSize(), destMR);
5594
cg->correctBadSign(secondChild, secondReg, secondReg->getSize(), divisorMR);
5595
5596
generateSS2Instruction(cg, TR::InstOpCode::DP, node,
5597
dividendSize-1,
5598
generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),
5599
divisorSize-1,
5600
generateS390RightAlignedMemoryReference(*divisorMR, node, 0, cg));
5601
5602
targetReg->setHasKnownValidSignAndData();
5603
5604
bool isRem = node->getOpCodeValue() == TR::pdrem;
5605
int32_t deadBytes = 0;
5606
bool isTruncation = false;
5607
if (isRem)
5608
{
5609
targetReg->setDecimalPrecision(secondReg->getDecimalPrecision());
5610
isTruncation = node->getDecimalPrecision() < targetReg->getDecimalPrecision();
5611
if (cg->traceBCDCodeGen())
5612
traceMsg(comp,"\tpdrem: setting targetReg prec to divisor prec %d (node prec is %d), isTruncation=%s\n",
5613
secondReg->getDecimalPrecision(),node->getDecimalPrecision(),isTruncation?"yes":"no");
5614
targetReg->removeRangeOfZeroDigits(0, TR::DataType::byteLengthToPackedDecimalPrecisionCeiling(dividendSize));
5615
}
5616
else
5617
{
5618
deadBytes = divisorSize;
5619
// computedQuotientPrecision is the size of the quotient as computed by the DP instruction.
5620
// The actual returned node precision may be less.
5621
int32_t computedQuotientPrecision = TR::DataType::byteLengthToPackedDecimalPrecisionCeiling(dividendSize - deadBytes);
5622
if (firstReg->isEvenPrecision())
5623
{
5624
if (cg->traceBCDCodeGen())
5625
traceMsg(comp,"\tfirstRegPrec (%d) isEven=true so reduce computedQuotientPrecision %d->%d\n",firstReg->getDecimalPrecision(),computedQuotientPrecision,computedQuotientPrecision-1);
5626
computedQuotientPrecision--;
5627
}
5628
isTruncation = node->getDecimalPrecision() < computedQuotientPrecision;
5629
int32_t resultQuotientPrecision = std::min<int32_t>(computedQuotientPrecision, node->getDecimalPrecision());
5630
targetReg->setDecimalPrecision(resultQuotientPrecision);
5631
targetReg->addToRightAlignedDeadBytes(deadBytes);
5632
if (cg->traceBCDCodeGen())
5633
{
5634
traceMsg(comp,"\tisDiv=true (pddivrem) : increment targetReg %s deadBytes %d -> %d (by the divisorSize)\n",
5635
cg->getDebug()->getName(targetReg),targetReg->getRightAlignedDeadBytes()-deadBytes,targetReg->getRightAlignedDeadBytes());
5636
traceMsg(comp,"\tsetting targetReg prec to min(computedQuotPrec, nodePrec) = min(%d, %d) = %d (size %d), isTruncation=%s\n",
5637
computedQuotientPrecision,node->getDecimalPrecision(),resultQuotientPrecision,targetReg->getSize(),isTruncation?"yes":"no");
5638
}
5639
targetReg->removeRangeOfZeroDigits(0, computedQuotientPrecision);
5640
}
5641
5642
if (!node->canSkipPadByteClearing() && targetReg->isEvenPrecision() && isTruncation)
5643
{
5644
TR_ASSERT( node->getStorageReferenceSize() >= dividendSize,"operand size should only shrink from original size\n");
5645
int32_t leftMostByte = targetReg->getSize();
5646
if (cg->traceBCDCodeGen())
5647
traceMsg(comp,"\t%s: generating NI to clear top nibble with leftMostByte = targetReg->getSize() = %d\n",isRem ? "pdrem":"pddiv",targetReg->getSize());
5648
cg->genZeroLeftMostPackedDigits(node, targetReg, leftMostByte, 1, generateS390RightAlignedMemoryReference(*destMR, node, -deadBytes, cg));
5649
}
5650
5651
targetReg->setHasKnownPreferredSign();
5652
if (isRem)
5653
{
5654
// sign of the remainder is the same as the sign of dividend (and then set to the preferred sign by the DP instruction)
5655
if (firstReg->hasKnownOrAssumedSignCode())
5656
{
5657
targetReg->setKnownSignCode(firstReg->hasKnownOrAssumedPositiveSignCode() ? TR::DataType::getPreferredPlusCode() : TR::DataType::getPreferredMinusCode());
5658
if (cg->traceBCDCodeGen())
5659
traceMsg(comp,"\tpdrem: firstReg has the knownSignCode 0x%x so set targetReg sign code to the preferred sign 0x%x\n",
5660
firstReg->getKnownOrAssumedSignCode(),targetReg->getKnownOrAssumedSignCode());
5661
}
5662
}
5663
else
5664
{
5665
// when the sign of the divisor and divident are different then the quotient sign is negative otherwise if the signs are the same then the
5666
// quotient sign is positive
5667
if (firstReg->hasKnownOrAssumedSignCode() && secondReg->hasKnownOrAssumedSignCode())
5668
{
5669
bool dividendSignIsPositive = firstReg->hasKnownOrAssumedPositiveSignCode();
5670
bool dividendSignIsNegative = !dividendSignIsPositive;
5671
bool divisorSignIsPositive = secondReg->hasKnownOrAssumedPositiveSignCode();
5672
bool divisorSignIsNegative = !divisorSignIsPositive;
5673
5674
if ((dividendSignIsPositive && divisorSignIsPositive) ||
5675
(dividendSignIsNegative && divisorSignIsNegative))
5676
{
5677
targetReg->setKnownSignCode(TR::DataType::getPreferredPlusCode());
5678
if (cg->traceBCDCodeGen())
5679
traceMsg(comp,"\tpddiv: dividendSign matches the divisorSign so set targetReg sign code to the preferred sign 0x%x\n", TR::DataType::getPreferredPlusCode());
5680
}
5681
else
5682
{
5683
targetReg->setKnownSignCode(TR::DataType::getPreferredMinusCode());
5684
if (cg->traceBCDCodeGen())
5685
traceMsg(comp,"\tpddiv: dividendSign does not match the divisorSign so set targetReg sign code to the preferred sign 0x%x\n", TR::DataType::getPreferredMinusCode());
5686
}
5687
}
5688
}
5689
5690
cg->decReferenceCount(secondChild);
5691
return targetReg;
5692
}
5693
5694
/**
5695
* Handles pdshr
5696
*/
5697
TR::Register *
5698
J9::Z::TreeEvaluator::pdshrEvaluator(TR::Node * node, TR::CodeGenerator * cg)
5699
{
5700
cg->traceBCDEntry("pdshr",node);
5701
cg->generateDebugCounter("PD-Op/pdshr", 1, TR::DebugCounter::Cheap);
5702
5703
TR::Register* targetReg = NULL;
5704
5705
static char* isEnableVectorBCD = feGetEnv("TR_enableVectorBCD");
5706
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&
5707
!cg->comp()->getOption(TR_DisableVectorBCD) ||
5708
isEnableVectorBCD)
5709
{
5710
targetReg = pdshrVectorEvaluatorHelper(node, cg);
5711
}
5712
else
5713
{
5714
targetReg = pdshiftEvaluatorHelper(node, cg, true);
5715
}
5716
5717
cg->traceBCDExit("pdshr",node);
5718
return targetReg;
5719
}
5720
5721
void
5722
J9::Z::TreeEvaluator::clearAndSetSign(TR::Node *node,
5723
TR_PseudoRegister *targetReg,
5724
int32_t leftMostByteForClear,
5725
int32_t digitsToClear,
5726
TR::MemoryReference *destMR,
5727
TR_PseudoRegister *srcReg,
5728
TR::MemoryReference *sourceMR,
5729
bool isSetSign,
5730
int32_t sign,
5731
bool signCodeIsInitialized,
5732
TR::CodeGenerator *cg)
5733
{
5734
TR::Compilation *comp = cg->comp();
5735
5736
if (cg->traceBCDCodeGen())
5737
traceMsg(comp,"\tclearAndSetSign: digitsToClear %d, leftMostByte %d (isSetSign=%s, sign 0x%x)\n",digitsToClear,leftMostByteForClear,isSetSign?"yes":"no",sign);
5738
bool clearingNeeded = digitsToClear > 0;
5739
if (isSetSign)
5740
{
5741
// a better sign code setting maybe possible if a current setting is known
5742
TR_PseudoRegister *signReg = signCodeIsInitialized ? targetReg : NULL;
5743
int32_t digitsCleared = cg->genSignCodeSetting(node, targetReg, node->getSize(), generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), sign, signReg, digitsToClear, !clearingNeeded);
5744
if (clearingNeeded)
5745
{
5746
digitsToClear-=digitsCleared;
5747
if (digitsToClear > 0 && (digitsToClear&0x1) && sign == TR::DataType::getIgnoredSignCode())
5748
{
5749
digitsToClear++; // when digitsToClear is odd for the ignore sign code case then bump up to the next even amount (and clear the sign too) as this is easier to clear
5750
targetReg->setHasKnownBadSignCode();
5751
if (cg->traceBCDCodeGen())
5752
traceMsg(comp,"\tignored setSign case so inc digitsToClear %d->%d and setHasKnownBadSignCode=true on targetReg %s\n",
5753
digitsToClear-1,digitsToClear,cg->getDebug()->getName(targetReg));
5754
}
5755
}
5756
signCodeIsInitialized = true;
5757
if (cg->traceBCDCodeGen())
5758
{
5759
if (clearingNeeded)
5760
traceMsg(comp,"\t\tisSetSign case (clearingNeeded==true): sign setting cleared %d digits so adjust digitsToClear %d->%d\n",
5761
digitsCleared,digitsToClear+digitsCleared,digitsToClear);
5762
traceMsg(comp,"\t\tisSetSign case: set signCode of 0x%x on targetReg %s\n",sign,cg->getDebug()->getName(targetReg));
5763
}
5764
}
5765
else if (!signCodeIsInitialized)
5766
{
5767
/* if (digitsToClear == 1) // MVN done later is better then MVC/NI as the latter suffers from an OSC
5768
{
5769
int32_t mvcSize = 1;
5770
generateSS1Instruction(cg, TR::InstOpCode::MVC, node,
5771
mvcSize-1,
5772
generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),
5773
generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));
5774
targetReg->transferSignState(srcReg, true); // digitsLost=true -- a clear always loses digits
5775
signCodeIsInitialized = true; // no longer clear the sign code in the code below for if (needLateClear)
5776
if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\tdigitsToClear==1 case: gen MVC to initialize sign code\n");
5777
}
5778
else */
5779
if (clearingNeeded)
5780
{
5781
digitsToClear++; // clear the sign code too and then MVN in the new sign code
5782
if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\t init=false && isSetSign=false case : bump digitsToClear %d->%d to clear entire field\n",digitsToClear,digitsToClear+1);
5783
}
5784
}
5785
TR_ASSERT(digitsToClear >= 0,"digitsToClear %d should be >= 0\n",digitsToClear);
5786
if (digitsToClear > 0)
5787
{
5788
if (cg->traceBCDCodeGen()) traceMsg(comp,"\t\tdigitsToClear %d > 0 so call genClearLeftMostDigitsIfNeeded\n",digitsToClear);
5789
cg->genZeroLeftMostDigitsIfNeeded(node, targetReg, leftMostByteForClear, digitsToClear, generateS390RightAlignedMemoryReference(*destMR, node, 0, cg));
5790
}
5791
5792
if (!signCodeIsInitialized)
5793
{
5794
if (cg->traceBCDCodeGen())
5795
traceMsg(comp,"\t\tsignCodeIsInitialized=false after clearing of %d digits : init the sign now with an MVN of size 1\n",digitsToClear,isSetSign?"yes":"no");
5796
// Move the sign code over from the source location. The top nibble has already been cleared above.
5797
int32_t mvnSize = 1;
5798
generateSS1Instruction(cg, TR::InstOpCode::MVN, node,
5799
mvnSize-1,
5800
generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),
5801
generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));
5802
targetReg->transferSignState(srcReg, true); // digitsLost=true -- a clear always loses digits
5803
}
5804
}
5805
5806
TR_PseudoRegister *
5807
J9::Z::TreeEvaluator::simpleWideningOrTruncation(TR::Node *node,
5808
TR_PseudoRegister *srcReg,
5809
bool isSetSign,
5810
int32_t sign,
5811
TR::CodeGenerator *cg)
5812
{
5813
TR::Compilation *comp = cg->comp();
5814
if (cg->traceBCDCodeGen())
5815
traceMsg(comp,"\tsimple widening or truncating shift: srcRegPrecision %d, isSetSign=%s, sign 0x%x\n",srcReg->getDecimalPrecision(),isSetSign?"yes":"no",sign);
5816
bool isDigitTruncation = false;
5817
bool needsTopNibbleClearing = false;
5818
int32_t srcPrecision = srcReg->getDecimalPrecision();
5819
if (srcReg->getDecimalPrecision() > node->getDecimalPrecision())
5820
{
5821
srcPrecision = node->getDecimalPrecision();
5822
isDigitTruncation = true;
5823
if (!node->canSkipPadByteClearing() && node->isEvenPrecision() && srcReg->getDigitsToClear(srcPrecision,srcPrecision+1) != 0)
5824
needsTopNibbleClearing = true;
5825
}
5826
5827
int32_t targetPrecision = node->getDecimalPrecision();
5828
5829
if (!isDigitTruncation && srcReg->isEvenPrecision() && !srcReg->isLeftMostNibbleClear())
5830
{
5831
if (targetPrecision != srcPrecision) // in case this routine starts doing explicit widenings at some point then !canSkipPadByteClearing alone is not valid
5832
{
5833
needsTopNibbleClearing = true;
5834
}
5835
else if (!node->canSkipPadByteClearing())
5836
{
5837
needsTopNibbleClearing = true;
5838
if (cg->traceBCDCodeGen()) traceMsg(comp,"z^z : new clear : simpleWide %p\n",node);
5839
}
5840
}
5841
5842
bool isPassThrough = false;
5843
bool initTargetAndSign = (isSetSign && !isPassThrough); // try to get a ZAP generated here for a widening as this can simplify the coming setSign operation
5844
bool isNondestructiveNop = isPassThrough && !isDigitTruncation;
5845
TR_PseudoRegister *targetReg = NULL;
5846
TR::MemoryReference *sourceMR = NULL;
5847
if (cg->traceBCDCodeGen())
5848
traceMsg(comp,"\tisDigitTruncation=%s, srcPrecision=%d, isPassThrough=%s, needsTopNibbleClearing=%s, initTargetAndSign=%s\n",
5849
isDigitTruncation?"true":"false",srcPrecision,isPassThrough?"true":"false",needsTopNibbleClearing?"true":"false",initTargetAndSign?"yes":"no");
5850
if (!isPassThrough)
5851
sourceMR = generateS390RightAlignedMemoryReference(node->getFirstChild(), srcReg->getStorageReference(), cg);
5852
if (initTargetAndSign || needsTopNibbleClearing)
5853
targetReg = evaluateBCDValueModifyingOperand(node, initTargetAndSign, sourceMR, cg, initTargetAndSign);
5854
else
5855
targetReg = evaluateBCDSignModifyingOperand(node, isPassThrough, isNondestructiveNop, false, sourceMR, cg); // initTarget=false
5856
5857
bool isInitialized = targetReg->isInitialized();
5858
TR::MemoryReference *destMR = NULL;
5859
if (!isPassThrough)
5860
destMR = generateS390RightAlignedMemoryReference(node, targetReg->getStorageReference(), cg);
5861
if (!isInitialized && !isPassThrough)
5862
{
5863
int32_t srcSize = TR::DataType::packedDecimalPrecisionToByteLength(srcPrecision);
5864
if (cg->traceBCDCodeGen())
5865
traceMsg(comp,"\tisInit=false and isPassThru=false so gen initializing MVC with size %d. Do not clear after MVC just set targetReg->prec to srcPrecision %d\n",srcSize,srcPrecision);
5866
generateSS1Instruction(cg, TR::InstOpCode::MVC, node,
5867
srcSize-1,
5868
generateS390RightAlignedMemoryReference(*destMR, node, 0, cg),
5869
generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));
5870
}
5871
else if (cg->traceBCDCodeGen())
5872
{
5873
traceMsg(comp,"\tisInit=true (%s) or isPassThru=true (%s): no move needed just set targetReg->prec to srcPrecision %d\n",isInitialized?"yes":"no",isPassThrough?"yes":"no",srcPrecision);
5874
}
5875
5876
// a ZAP may have been generated when initializing targetReg so in this case do not transfer the srcReg sign
5877
if (!targetReg->signStateInitialized() || !initTargetAndSign)
5878
targetReg->transferSignState(srcReg, isDigitTruncation);
5879
5880
targetReg->setDecimalPrecision(targetPrecision);
5881
5882
if (isSetSign && !isPassThrough)
5883
cg->genSignCodeSetting(node, targetReg, targetReg->getSize(), generateS390RightAlignedMemoryReference(*destMR, node, 0, cg), sign, targetReg, 0, false /* !topNibbleIsZero */);
5884
else
5885
targetReg->transferSignState(srcReg, isDigitTruncation);
5886
5887
targetReg->transferDataState(srcReg);
5888
5889
if (needsTopNibbleClearing)
5890
{
5891
if (cg->traceBCDCodeGen()) traceMsg(comp,"\tisDigitTruncation=true and targetReg->isEvenPrecision() (%d) so clear top nibble\n",targetReg->isEvenPrecision());
5892
int32_t leftMostByteForClear = TR::DataType::packedDecimalPrecisionToByteLength(srcPrecision);
5893
cg->genZeroLeftMostPackedDigits(node, targetReg, leftMostByteForClear, 1, generateS390RightAlignedMemoryReference(*destMR, node, 0, cg));
5894
}
5895
5896
if (!isPassThrough)
5897
targetReg->setIsInitialized();
5898
5899
return targetReg;
5900
}
5901
5902
/*
5903
* \brief
5904
* Generate non-exception throwing instructions for pdModifyPrecision node to narrow or widen packed decimals.
5905
* The generated instruction sequence does not validate the source packed decimals. Any invalid packed
5906
* decimals will be loaded as is and modified as if their digits and signs were valid.
5907
*/
5908
TR::Register *
5909
J9::Z::TreeEvaluator::pdModifyPrecisionEvaluator(TR::Node * node, TR::CodeGenerator * cg)
5910
{
5911
cg->traceBCDEntry("pdModifyPrecision",node);
5912
cg->generateDebugCounter("PD-Op/pdmodifyPrec", 1, TR::DebugCounter::Cheap);
5913
5914
TR::Register* targetReg = NULL;
5915
5916
static char* isEnableVectorBCD = feGetEnv("TR_enableVectorBCD");
5917
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&
5918
!cg->comp()->getOption(TR_DisableVectorBCD)
5919
|| isEnableVectorBCD)
5920
{
5921
int32_t targetPrec = node->getDecimalPrecision();
5922
targetReg = cg->allocateRegister(TR_VRF);
5923
5924
if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY))
5925
{
5926
// Overflow exceptions can be ignored for z15 vector packed decimal VRI-i,f,g and VRR-i instructions. Given
5927
// this, VPSOP now becomes suitable for data truncations without incurring exceptions which eventually lead to
5928
// performance degradations. This is usually used to truncate high nibble of an even precision PD.
5929
targetReg = vectorPerformSignOperationHelper(node, cg, true, targetPrec, true, SignOperationType::maintain, false, false, 0, false, true);
5930
}
5931
else
5932
{
5933
int32_t imm = 0x0FFFF >> (TR_VECTOR_REGISTER_SIZE - TR::DataType::packedDecimalPrecisionToByteLength(targetPrec));
5934
TR::Register* pdReg = cg->evaluate(node->getFirstChild());
5935
TR::Register* maskReg = cg->allocateRegister(TR_VRF);
5936
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, maskReg, imm, 0);
5937
5938
if (targetPrec % 2 == 0)
5939
{
5940
TR::Register* shiftAmountReg = cg->allocateRegister(TR_VRF);
5941
generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, shiftAmountReg, 4, 0);
5942
generateVRRcInstruction(cg, TR::InstOpCode::VSRL, node, maskReg, maskReg, shiftAmountReg, 0, 0, 0);
5943
cg->stopUsingRegister(shiftAmountReg);
5944
}
5945
5946
generateVRRcInstruction(cg, TR::InstOpCode::VN, node, targetReg, pdReg, maskReg, 0, 0, 0);
5947
5948
cg->stopUsingRegister(maskReg);
5949
cg->decReferenceCount(node->getFirstChild());
5950
}
5951
}
5952
else
5953
{
5954
TR::Node *srcNode = node->getChild(0);
5955
TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);
5956
targetReg = simpleWideningOrTruncation(node, srcReg, false, 0, cg);
5957
cg->decReferenceCount(srcNode);
5958
node->setRegister(targetReg);
5959
}
5960
5961
cg->traceBCDExit("pdModifyPrecision",node);
5962
return targetReg;
5963
}
5964
5965
TR::Register *
5966
J9::Z::TreeEvaluator::pdshlEvaluator(TR::Node * node, TR::CodeGenerator * cg)
5967
{
5968
cg->traceBCDEntry("pdshl",node);
5969
cg->generateDebugCounter("PD-Op/pdshl", 1, TR::DebugCounter::Cheap);
5970
5971
TR::Register* targetReg = NULL;
5972
5973
static char* isEnableVectorBCD = feGetEnv("TR_enableVectorBCD");
5974
if(cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL) &&
5975
!cg->comp()->getOption(TR_DisableVectorBCD) ||
5976
isEnableVectorBCD)
5977
{
5978
targetReg = pdshlVectorEvaluatorHelper(node, cg);
5979
}
5980
else
5981
{
5982
targetReg = pdshiftEvaluatorHelper(node, cg, false);
5983
}
5984
5985
cg->traceBCDExit("pdshl",node);
5986
return targetReg;
5987
}
5988
5989
/**
5990
* \brief This is a helper function that handles pdshl, pdshr, and pdshlOverflow nodes.
5991
*
5992
* pdshl is currently not used and replaced by pdshlOverflow.
5993
*/
5994
TR::Register *
5995
J9::Z::TreeEvaluator::pdshiftEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg, bool isRightShift)
5996
{
5997
TR::Node* srcNode = node->getChild(0);
5998
TR::Node* shiftAmountNode = node->getChild(1);
5999
TR::Compilation *comp = cg->comp();
6000
int32_t roundAmount = 0;
6001
int32_t shiftAmount = 0;
6002
6003
TR_ASSERT(shiftAmountNode, "expecting a shiftAmountNode\n");
6004
TR_ASSERT(shiftAmountNode->getOpCode().isLoadConst() &&
6005
shiftAmountNode->getOpCode().getSize() <= 4,
6006
"expecting a <= 4 size integral constant PD shift amount\n");
6007
shiftAmount = (int32_t)shiftAmountNode->get64bitIntegralValue();
6008
TR_ASSERT(shiftAmount >= 0, "unexpected PD shift amount of %d\n", shiftAmount);
6009
6010
if(isRightShift)
6011
{
6012
shiftAmount *= -1;
6013
TR::Node* roundAmountNode = node->getChild(2);
6014
TR_ASSERT(roundAmountNode, "round amount node should not be null\n");
6015
roundAmount = roundAmountNode->get32bitIntegralValue();
6016
TR_ASSERT(roundAmount == 0 || roundAmount == 5, "unexpected round amount of %d\n", roundAmount);
6017
cg->decReferenceCount(roundAmountNode);
6018
}
6019
6020
TR_PseudoRegister *srcReg = cg->evaluateBCDNode(srcNode);
6021
6022
uint32_t srcPrecision = srcNode->getDecimalPrecision();
6023
uint32_t resultPrecision = node->getDecimalPrecision();
6024
uint32_t resultSize = TR::DataType::packedDecimalPrecisionToByteLength(resultPrecision);
6025
uint32_t sourceSize = TR::DataType::packedDecimalPrecisionToByteLength(srcPrecision);
6026
6027
TR_StorageReference* targetStorageRef = TR_StorageReference::createTemporaryBasedStorageReference(resultSize, comp);
6028
TR_PseudoRegister* targetReg = cg->allocatePseudoRegister(node->getDataType());
6029
targetReg->setIsInitialized(true);
6030
targetReg->setSize(resultSize);
6031
targetReg->setStorageReference(targetStorageRef, node);
6032
6033
TR::MemoryReference* targetMR = generateS390RightAlignedMemoryReference(node, targetReg->getStorageReference(), cg);
6034
TR::MemoryReference* sourceMR = generateS390RightAlignedMemoryReference(srcNode, srcReg->getStorageReference(), cg);
6035
TR_StorageReference* tmpStorageRef = NULL;
6036
TR::MemoryReference* tmpMR = NULL;
6037
6038
if (cg->traceBCDCodeGen())
6039
{
6040
traceMsg(comp,"\tGen packed decimal shift: %s %p : shift by %d, roundAmount=%d, result Size=%d, precision %d, sourceSize=%d, precision %d\n",
6041
node->getOpCode().getName(),
6042
node,
6043
shiftAmount,
6044
roundAmount,
6045
resultSize,
6046
resultPrecision,
6047
sourceSize,
6048
srcNode->getDecimalPrecision());
6049
}
6050
6051
if(shiftAmount == 0)
6052
{
6053
if (srcPrecision > resultPrecision)
6054
{
6055
/* Packed decimal narrowing with exception handling:
6056
*
6057
* If the narrowing operation truncates non-zero digits (e.g. shift "123C" by 0 digts and keep 2 digits yields "23C")
6058
* and the 'checkOverflow' parameter is true, the JIT'ed sequence should trigger HW exception and
6059
* yield control to the Java code (via OOL call) so that overflow exceptions can be thrown.
6060
* This is why PD arithmetic operations use 'pdshlOverflow' to perform data truncations
6061
* instead of 'modifyPrecision'.
6062
*/
6063
6064
tmpStorageRef = TR_StorageReference::createTemporaryBasedStorageReference(sourceSize, comp);
6065
tmpStorageRef->setTemporaryReferenceCount(1);
6066
tmpMR = generateS390RightAlignedMemoryReference(node, tmpStorageRef, cg);
6067
6068
generateSS2Instruction(cg, TR::InstOpCode::ZAP, node,
6069
sourceSize - 1,
6070
generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg),
6071
sourceSize - 1,
6072
generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));
6073
6074
shiftAmount = srcPrecision - resultPrecision;
6075
if ((srcPrecision % 2) == 0)
6076
{
6077
// Source being even precision means we need an extra left shift to get right of the source's highest nibble.
6078
shiftAmount++;
6079
}
6080
6081
generateSS3Instruction(cg, TR::InstOpCode::SRP, node,
6082
sourceSize - 1,
6083
generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg),
6084
shiftAmount, roundAmount);
6085
6086
generateSS3Instruction(cg, TR::InstOpCode::SRP, node,
6087
sourceSize - 1,
6088
generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg),
6089
-1*shiftAmount, roundAmount);
6090
6091
generateSS2Instruction(cg, TR::InstOpCode::ZAP, node,
6092
resultSize - 1,
6093
generateS390RightAlignedMemoryReference(*targetMR, node, 0, cg),
6094
resultSize - 1,
6095
generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg));
6096
}
6097
else // zero shift, copy or widen result
6098
{
6099
generateSS2Instruction(cg, TR::InstOpCode::ZAP, node,
6100
resultSize - 1,
6101
generateS390RightAlignedMemoryReference(*targetMR, node, 0, cg),
6102
sourceSize - 1,
6103
generateS390RightAlignedMemoryReference(*sourceMR, node, 0, cg));
6104
6105
// Top nibble cleaning if the PD widening or copying source precision is even
6106
if ((srcPrecision % 2) == 0)
6107
{
6108
cg->genZeroLeftMostPackedDigits(node, targetReg, sourceSize, 1,
6109
generateS390RightAlignedMemoryReference(*targetMR, node, 0, cg));
6110
}
6111
}
6112
}
6113
else // shiftAmount != 0
6114
{
6115
int32_t tmpResultByteSize = sourceSize;
6116
bool needExtraShift = false;
6117
6118
if (!isRightShift)
6119
{
6120
if ((resultPrecision % 2) == 0)
6121
{
6122
/* An extra shift is needed when the left shift result's precision is even.
6123
* For example, let the input be 00 12 3C (precision=5), shiftAmount=2 and let the result precision be 4.
6124
* Shift this left by 2 should produce and expected result of 02 30 0C.
6125
*
6126
* To produce this expected result with HW exception, we need to
6127
*
6128
* 1. shift 00 12 3C by 3 (instead of 2) digits to produce an intermediate result 01 23 00 0C
6129
* 2. use ZAP to truncate this to 23 00 0C. The purpose of this ZAP is to truncate the leading digits,
6130
* which may or may not be zero, and trigger HW exception in case they are non-zero so that the
6131
* DAA Java implementation gets a chance to thrown Java exceptions. In our example, the leading
6132
* '1' should not be silently discarded (using the NI instruction) because the API 'checkOverflow' parameter
6133
* may be true.
6134
* 3. perform a right shift of 1 on the intermediate result to produce the expected result 02 30 0C.
6135
*
6136
*/
6137
shiftAmount++;
6138
needExtraShift = true;
6139
}
6140
6141
// Allocate enough temporary space to accommodate the amount of left shifts.
6142
tmpResultByteSize += (shiftAmount + 1)/2;
6143
}
6144
6145
tmpStorageRef = TR_StorageReference::createTemporaryBasedStorageReference(tmpResultByteSize, comp);
6146
tmpStorageRef->setTemporaryReferenceCount(1);
6147
tmpMR = generateS390RightAlignedMemoryReference(node, tmpStorageRef, cg);
6148
6149
// For this large tmp storage, we need to use XC+MVC to clear and move input into it.
6150
if (!isRightShift)
6151
{
6152
generateSS1Instruction(cg, TR::InstOpCode::XC, node,
6153
tmpResultByteSize - 1,
6154
generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg),
6155
generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg));
6156
}
6157
6158
generateSS1Instruction(cg, TR::InstOpCode::MVC, node,
6159
sourceSize - 1,
6160
generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg),
6161
sourceMR);
6162
6163
generateSS3Instruction(cg, TR::InstOpCode::SRP, node,
6164
tmpResultByteSize - 1,
6165
generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg),
6166
shiftAmount, roundAmount);
6167
6168
generateSS2Instruction(cg, TR::InstOpCode::ZAP, node,
6169
resultSize - 1,
6170
generateS390RightAlignedMemoryReference(*targetMR, node, 0, cg),
6171
tmpResultByteSize - 1,
6172
generateS390RightAlignedMemoryReference(*tmpMR, node, 0, cg));
6173
6174
if (needExtraShift)
6175
{
6176
generateSS3Instruction(cg, TR::InstOpCode::SRP, node,
6177
resultSize - 1,
6178
generateS390RightAlignedMemoryReference(*targetMR, node, 0, cg),
6179
-1, 0);
6180
}
6181
}
6182
6183
cg->decReferenceCount(srcNode);
6184
cg->decReferenceCount(shiftAmountNode);
6185
node->setRegister(targetReg);
6186
return targetReg;
6187
}
6188
6189
TR::Register*
6190
J9::Z::TreeEvaluator::vectorPerformSignOperationHelper(TR::Node *node,
6191
TR::CodeGenerator *cg,
6192
bool setPrecision,
6193
uint32_t precision,
6194
bool signedStatus,
6195
SignOperationType signOpType,
6196
bool signValidityCheck,
6197
bool digitValidityCheck,
6198
int32_t sign,
6199
bool setConditionCode,
6200
bool ignoreDecimalOverflow)
6201
{
6202
TR::Register *targetReg = cg->allocateRegister(TR_VRF);
6203
TR::Node *pdNode = node->getFirstChild();
6204
6205
TR::Register *childReg = cg->evaluate(pdNode);
6206
6207
int32_t numPrecisionDigits = setPrecision ? precision : TR_MAX_INPUT_PACKED_DECIMAL_PRECISION;
6208
if (numPrecisionDigits > TR_MAX_INPUT_PACKED_DECIMAL_PRECISION)
6209
{
6210
numPrecisionDigits = TR_MAX_INPUT_PACKED_DECIMAL_PRECISION;
6211
}
6212
6213
uint8_t constImm3 = numPrecisionDigits;
6214
6215
if (ignoreDecimalOverflow)
6216
{
6217
constImm3 |= 0x80;
6218
}
6219
6220
// Bit 4-5 Sign Operation, 6 Positive Sign code, 7 Sign validation on V2
6221
uint8_t constImm4 = signOpType << 2;
6222
6223
if (signOpType == SignOperationType::setSign)
6224
{
6225
switch (sign)
6226
{
6227
case TR_PREFERRED_PLUS_CODE:
6228
case TR_ALTERNATE_PLUS_CODE:
6229
case TR_ZONED_PLUS:
6230
constImm4 |= 0x1;
6231
break;
6232
case TR_PREFERRED_MINUS_CODE:
6233
case TR_ALTERNATE_MINUS_CODE:
6234
break;
6235
default:
6236
TR_ASSERT_FATAL(false, "Packed Decimal sign code 0x%x is invalid", sign);
6237
break;
6238
}
6239
}
6240
6241
// If signedStatus is true it means signed so use 0xC instead of 0xF
6242
constImm4 |= (signedStatus ? 0x0 : 0x2 );
6243
constImm4 |= (signValidityCheck ? 0x1 : 0x0);
6244
constImm4 |= (digitValidityCheck ? 0x0 : 0x80);
6245
6246
// Current use of TR::pdclean does not want to modifyprecision or set condition code.
6247
// TODO: We can probably come up with more complex optimization that will collapse modify precision and TR::setsign
6248
// or TR::pdclean to one instruction.
6249
generateVRIgInstruction(cg, TR::InstOpCode::VPSOP, node, targetReg, childReg, constImm3, constImm4, setConditionCode);
6250
6251
node->setRegister(targetReg);
6252
cg->decReferenceCount(pdNode);
6253
return targetReg;
6254
}
6255
6256
TR::Register *
6257
J9::Z::TreeEvaluator::generateVectorBinaryToPackedConversion(TR::Node * node, TR::InstOpCode::Mnemonic op, TR::CodeGenerator * cg)
6258
{
6259
TR_ASSERT(op == TR::InstOpCode::VCVD || op == TR::InstOpCode::VCVDG,
6260
"unexpected opcode in gen vector i2pd\n");
6261
6262
TR::Register *vTargetReg = cg->allocateRegister(TR_VRF);
6263
TR::Node * firstChild = node->getFirstChild();
6264
TR::Register *sourceReg = cg->evaluate(firstChild);
6265
bool isUseRegPair = (op == TR::InstOpCode::VCVDG && sourceReg->getRegisterPair());
6266
6267
if (isUseRegPair)
6268
{
6269
TR::Register *tempReg = cg->allocateRegister();
6270
generateRSInstruction(cg, TR::InstOpCode::SLLG, node, tempReg, sourceReg->getRegisterPair()->getHighOrder(), 32);
6271
generateRRInstruction(cg, TR::InstOpCode::LR, node, tempReg, sourceReg->getRegisterPair()->getLowOrder());
6272
sourceReg = tempReg;
6273
}
6274
6275
uint8_t decimalPrecision = node->getDecimalPrecision();
6276
6277
if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY) && cg->getIgnoreDecimalOverflowException())
6278
{
6279
decimalPrecision |= 0x80;
6280
}
6281
6282
generateVRIiInstruction(cg, op, node, vTargetReg, sourceReg, decimalPrecision, 0x1);
6283
6284
if (isUseRegPair)
6285
{
6286
cg->stopUsingRegister(sourceReg);
6287
}
6288
6289
cg->decReferenceCount(firstChild);
6290
node->setRegister(vTargetReg);
6291
return vTargetReg;
6292
}
6293
6294
TR::Register *
6295
J9::Z::TreeEvaluator::pdshlVectorEvaluatorHelper(TR::Node *node, TR::CodeGenerator * cg)
6296
{
6297
TR::Register * targetReg = NULL;
6298
TR::Node *firstChild = node->getChild(0);
6299
TR::Node *shiftAmountNode = node->getNumChildren() > 1 ? node->getSecondChild() : NULL;
6300
TR_ASSERT(shiftAmountNode, "shift amount node should not be null");
6301
TR_ASSERT(shiftAmountNode->getOpCode().isLoadConst() && shiftAmountNode->getOpCode().getSize() <= 4,
6302
"expecting a <= 4 size integral constant PD shift amount\n");
6303
6304
// If this is a pdshlOverflow with i2pd and other pd-arithmetic operations under it, these vector instructions will
6305
// truncate the resulting PD by the amount specified by 'decimalPrecision'. Therefore, we can
6306
// skip the shift and just return i2pd results.
6307
bool isSkipShift = node->getOpCodeValue() == TR::pdshlOverflow &&
6308
(firstChild->getOpCodeValue() == TR::i2pd ||
6309
firstChild->getOpCodeValue() == TR::l2pd ||
6310
firstChild->getOpCodeValue() == TR::pdadd ||
6311
firstChild->getOpCodeValue() == TR::pdsub ||
6312
firstChild->getOpCodeValue() == TR::pdmul ||
6313
firstChild->getOpCodeValue() == TR::pddiv ||
6314
firstChild->getOpCodeValue() == TR::pdrem) &&
6315
firstChild->getReferenceCount() == 1 &&
6316
firstChild->getRegister() == NULL;
6317
6318
int32_t shiftAmount = (int32_t)shiftAmountNode->get64bitIntegralValue();
6319
uint8_t decimalPrecision = node->getDecimalPrecision();
6320
6321
if (isSkipShift)
6322
{
6323
firstChild->setDecimalPrecision(decimalPrecision);
6324
}
6325
6326
TR::Register * sourceReg = cg->evaluate(firstChild);
6327
6328
if (isSkipShift)
6329
{
6330
// Passthrough. Assign register to node before decrementing refCount of the firstChild
6331
// to avoid killing this live register
6332
targetReg = sourceReg;
6333
}
6334
else
6335
{
6336
TR_ASSERT_FATAL((shiftAmount >= -32 && shiftAmount <= 31), "TR::pdshl/r shift amount (%d )not in range [-32, 31]", shiftAmount);
6337
6338
if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY) && cg->getIgnoreDecimalOverflowException())
6339
{
6340
decimalPrecision |= 0x80;
6341
}
6342
6343
targetReg = cg->allocateRegister(TR_VRF);
6344
generateVRIgInstruction(cg, TR::InstOpCode::VSRP, node, targetReg, sourceReg, decimalPrecision, shiftAmount, 0x01);
6345
}
6346
6347
node->setRegister(targetReg);
6348
cg->decReferenceCount(firstChild);
6349
cg->decReferenceCount(shiftAmountNode);
6350
return targetReg;
6351
}
6352
6353
TR::Register *
6354
J9::Z::TreeEvaluator::pdshrVectorEvaluatorHelper(TR::Node *node, TR::CodeGenerator * cg)
6355
{
6356
TR::Node *srcNode = node->getChild(0);
6357
TR::Node *shiftAmountNode = node->getNumChildren() > 1 ? node->getChild(1) : NULL;
6358
TR_ASSERT(shiftAmountNode != NULL, "pdshrVectorEvaluatorHelper is expecting a shiftAmountNode as child-1\n");
6359
TR_ASSERT(shiftAmountNode->getOpCode().isLoadConst() && shiftAmountNode->getOpCode().getSize() <= 4,
6360
"expecting a <= 4 size integral constant PD shift amount\n");
6361
6362
int32_t shiftAmount = (int32_t)shiftAmountNode->get32bitIntegralValue();
6363
TR_ASSERT((shiftAmount >=0 || shiftAmount <= 31),"unexpected TR::pdshr shift amount of %d\n",shiftAmount);
6364
6365
//set shift amount and round amount
6366
shiftAmount *= -1; // right shift is negative
6367
shiftAmount &= 0x0000007F; // clear off top bits
6368
6369
TR::Node *roundAmountNode = node->getChild(2);
6370
TR_ASSERT( roundAmountNode->getOpCode().isLoadConst(),"excepting pdshr round amount to be a const\n");
6371
int32_t roundAmount = roundAmountNode->get32bitIntegralValue();
6372
TR_ASSERT(roundAmount == 0 || roundAmount == 5, "round amount should be 0 or 5 and not %d\n",roundAmount);
6373
if (roundAmount)
6374
{
6375
shiftAmount |= 0x80; //set the round bit in the shift amount. (immediate3 field in VRIg)
6376
}
6377
6378
// Get PD value
6379
TR::Register * pdValueReg = cg->evaluate(srcNode);
6380
TR::Register* targetReg = cg->allocateRegister(TR_VRF);
6381
uint8_t decimalPrecision = node->getDecimalPrecision();
6382
6383
if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_PACKED_DECIMAL_ENHANCEMENT_FACILITY) && cg->getIgnoreDecimalOverflowException())
6384
{
6385
decimalPrecision |= 0x80;
6386
}
6387
6388
// Perform shift and set condition code on overflows
6389
generateVRIgInstruction(cg, TR::InstOpCode::VSRP, node, targetReg, pdValueReg, decimalPrecision, shiftAmount, 0x1);
6390
6391
node->setRegister(targetReg);
6392
6393
cg->decReferenceCount(srcNode);
6394
cg->decReferenceCount(shiftAmountNode);
6395
cg->decReferenceCount(roundAmountNode);
6396
6397
return targetReg;
6398
}
6399
6400
TR::Register*
6401
J9::Z::TreeEvaluator::zdstoreiVectorEvaluatorHelper(TR::Node *node, TR::CodeGenerator *cg)
6402
{
6403
if (cg->comp()->getOption(TR_TraceCG))
6404
traceMsg(cg->comp(), "DAA: Entering zdstoreiVectorEvaluator %d\n", __LINE__);
6405
6406
TR::Node* pd2zdNode = node->getSecondChild();
6407
TR::Node* pdloadiNode = pd2zdNode->getFirstChild();
6408
TR::Register* pdValueReg = cg->evaluate(pdloadiNode);
6409
TR_ASSERT_FATAL_WITH_NODE(pdloadiNode, (pdValueReg->getKind() == TR_FPR || pdValueReg->getKind() == TR_VRF),
6410
"vectorized zdstore is expecting the packed decimal to be in a vector register.");
6411
6412
// No need to evaluate the address node of the zdstorei.
6413
// generateVSIInstruction() API will call separateIndexRegister() to separate the index
6414
// register by emitting an LA instruction. If there's a need for large displacement adjustment,
6415
// LAY will be emitted instead.
6416
TR::MemoryReference * targetMR = TR::MemoryReference::create(cg, node);
6417
6418
TR::Register *zonedDecimalHigh = cg->allocateRegister(TR_VRF);
6419
TR::Register *zonedDecimalLow = cg->allocateRegister(TR_VRF);
6420
6421
// 0 we store 1 byte, 15 we store 16 bytes.
6422
// 15 - lengthToStore = index from which to start.
6423
uint8_t lengthToStore = pd2zdNode->getDecimalPrecision() - 1;
6424
uint8_t M3 = 0x8; // Disable sign validation.
6425
TR::MemoryReference * zonedDecimalMR = targetMR;
6426
generateVRRkInstruction(cg, TR::InstOpCode::VUPKZL, node, zonedDecimalLow, pdValueReg, M3); // Also copies the sign bit.
6427
6428
if (pd2zdNode->getDecimalPrecision() > TR_VECTOR_REGISTER_SIZE)
6429
{
6430
generateVRRkInstruction(cg, TR::InstOpCode::VUPKZH, node, zonedDecimalHigh, pdValueReg, M3);
6431
lengthToStore = pd2zdNode->getDecimalPrecision() - TR_VECTOR_REGISTER_SIZE;
6432
generateVSIInstruction(cg, TR::InstOpCode::VSTRL, node, zonedDecimalHigh, zonedDecimalMR, lengthToStore - 1);
6433
zonedDecimalMR = generateS390MemoryReference(*targetMR, lengthToStore, cg);
6434
lengthToStore = TR_VECTOR_REGISTER_SIZE - 1;
6435
}
6436
6437
generateVSIInstruction(cg, TR::InstOpCode::VSTRL, node, zonedDecimalLow, zonedDecimalMR, lengthToStore);
6438
6439
pd2zdSignFixup(node, targetMR, cg, false);
6440
6441
// This would have been decremented in pd2zdVectorEvaluatorHelper
6442
// but since we skip that evaluator we decrement it here.
6443
cg->decReferenceCount(pdloadiNode);
6444
6445
for (int32_t i = 0; i < node->getNumChildren(); ++i)
6446
{
6447
cg->decReferenceCount(node->getChild(i));
6448
}
6449
6450
cg->stopUsingRegister(zonedDecimalHigh);
6451
cg->stopUsingRegister(zonedDecimalLow);
6452
6453
return NULL;
6454
}
6455
6456