Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/z/codegen/J9TreeEvaluator.cpp
6004 views
1
/*******************************************************************************
2
* Copyright (c) 2000, 2022 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
23
//On zOS XLC linker can't handle files with same name at link time
24
//This workaround with pragma is needed. What this does is essentially
25
//give a different name to the codesection (csect) for this file. So it
26
//doesn't conflict with another file with same name.
27
#pragma csect(CODE,"TRJ9ZTreeEvalBase#C")
28
#pragma csect(STATIC,"TRJ9ZTreeEvalBase#S")
29
#pragma csect(TEST,"TRJ9ZTreeEvalBase#T")
30
31
#include <algorithm>
32
#include <limits.h>
33
#include <math.h>
34
#include <stdint.h>
35
#include "j9.h"
36
#include "j9cfg.h"
37
#include "j9consts.h"
38
#include "omrmodroncore.h"
39
#include "thrdsup.h"
40
#include "thrtypes.h"
41
#include "codegen/AheadOfTimeCompile.hpp"
42
#include "codegen/CodeGenerator.hpp"
43
#include "codegen/CodeGenerator_inlines.hpp"
44
#include "codegen/J9WatchedStaticFieldSnippet.hpp"
45
#include "codegen/Linkage_inlines.hpp"
46
#include "codegen/Machine.hpp"
47
#include "codegen/S390CHelperLinkage.hpp"
48
#include "codegen/S390PrivateLinkage.hpp"
49
#include "codegen/TreeEvaluator.hpp"
50
#include "compile/ResolvedMethod.hpp"
51
#include "compile/VirtualGuard.hpp"
52
#include "env/CompilerEnv.hpp"
53
#include "env/IO.hpp"
54
#include "env/jittypes.h"
55
#include "env/VMJ9.h"
56
#include "il/DataTypes.hpp"
57
#include "il/LabelSymbol.hpp"
58
#include "il/Node.hpp"
59
#include "il/Node_inlines.hpp"
60
#include "il/ResolvedMethodSymbol.hpp"
61
#include "il/RegisterMappedSymbol.hpp"
62
#include "il/ParameterSymbol.hpp"
63
#include "il/StaticSymbol.hpp"
64
#include "il/Symbol.hpp"
65
#include "il/TreeTop.hpp"
66
#include "il/TreeTop_inlines.hpp"
67
#include "infra/Bit.hpp"
68
#include "OMR/Bytes.hpp"
69
#include "ras/Delimiter.hpp"
70
#include "ras/DebugCounter.hpp"
71
#include "env/VMJ9.h"
72
#include "z/codegen/J9S390Snippet.hpp"
73
#include "z/codegen/BinaryCommutativeAnalyser.hpp"
74
#include "z/codegen/S390J9CallSnippet.hpp"
75
#include "z/codegen/ForceRecompilationSnippet.hpp"
76
#include "z/codegen/ReduceSynchronizedFieldLoad.hpp"
77
#include "z/codegen/S390Evaluator.hpp"
78
#include "z/codegen/S390GenerateInstructions.hpp"
79
#include "z/codegen/S390HelperCallSnippet.hpp"
80
#include "z/codegen/S390Instruction.hpp"
81
#include "z/codegen/S390Recompilation.hpp"
82
#include "z/codegen/S390Register.hpp"
83
#include "z/codegen/SystemLinkage.hpp"
84
#include "runtime/J9Profiler.hpp"
85
86
/*
87
* List of functions that is needed by J9 Specific Evaluators that were moved from codegen.
88
* Since other evaluators in codegen still calls these, extern here in order to call them.
89
*/
90
extern void updateReferenceNode(TR::Node * node, TR::Register * reg);
91
extern void killRegisterIfNotLocked(TR::CodeGenerator * cg, TR::RealRegister::RegNum reg, TR::Instruction * instr , TR::RegisterDependencyConditions * deps = NULL);
92
extern TR::Register * iDivRemGenericEvaluator(TR::Node * node, TR::CodeGenerator * cg, bool isDivision, TR::MemoryReference * divchkDivisorMR);
93
extern TR::Instruction * generateS390CompareOps(TR::Node * node, TR::CodeGenerator * cg, TR::InstOpCode::S390BranchCondition fBranchOpCond, TR::InstOpCode::S390BranchCondition rBranchOpCond, TR::LabelSymbol * targetLabel);
94
95
void
96
J9::Z::TreeEvaluator::inlineEncodeASCII(TR::Node *node, TR::CodeGenerator *cg)
97
{
98
// tree looks as follows:
99
// encodeASCIISymbol
100
// input ptr
101
// output ptr
102
// input length (in elements)
103
//
104
// The original Java loop that this IL is inlining is found in StringCoding.encodeASCII:
105
/* if (coder == LATIN1) {
106
byte[] dst = new byte[val.length];
107
for (int i = 0; i < val.length; i++) {
108
if (val[i] < 0) {
109
dst[i] = '?';
110
} else {
111
dst[i] = val[i];
112
}
113
}
114
return dst;
115
}
116
*/
117
// Get the children
118
TR::Register *inputPtrReg = cg->gprClobberEvaluate(node->getChild(0));
119
TR::Register *outputPtrReg = cg->gprClobberEvaluate(node->getChild(1));
120
TR::Register *inputLengthRegister = cg->evaluate(node->getChild(2));
121
122
TR::LabelSymbol *processMultiple16CharsStart = generateLabelSymbol(cg);
123
TR::LabelSymbol *processMultiple16CharsEnd = generateLabelSymbol(cg);
124
125
TR::LabelSymbol *processSaturatedInput1 = generateLabelSymbol(cg);
126
127
TR::LabelSymbol *cFlowRegionEnd = generateLabelSymbol(cg);
128
129
TR::Register *vInput1 = cg->allocateRegister(TR_VRF);
130
TR::Register *vRange = cg->allocateRegister(TR_VRF);
131
TR::Register *vRangeControl = cg->allocateRegister(TR_VRF);
132
TR::Register *numCharsLeftToProcess = cg->allocateRegister();
133
TR::Register *firstSaturatedCharacter = cg->allocateRegister(TR_VRF);
134
135
uint32_t saturatedRange = 127;
136
uint8_t saturatedRangeControl = 0x20; // > comparison
137
138
// Replicate the limit character and comparison controller into vector registers
139
generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, vRange, saturatedRange, 0);
140
generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, vRangeControl, saturatedRangeControl, 0);
141
142
// Copy length into numCharsLeftToProcess
143
generateRRInstruction(cg, TR::InstOpCode::LR, node, numCharsLeftToProcess, inputLengthRegister);
144
145
// Branch to the end of this section if there are less than 16 chars left to process
146
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, numCharsLeftToProcess, 16, TR::InstOpCode::COND_BL, processMultiple16CharsEnd, false, false);
147
148
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processMultiple16CharsStart);
149
processMultiple16CharsStart->setStartInternalControlFlow();
150
151
generateVRXInstruction(cg, TR::InstOpCode::VL, node, vInput1, generateS390MemoryReference(inputPtrReg, 0, cg));
152
// Check for vector saturation and branch to copy the unsaturated bytes
153
// VSTRC here will do an unsigned comparison and set the CC if any byte in the input vector is above 127.
154
// If all numbers are below 128, then we can do a straight copy of the 16 bytes. If not, then we branch to
155
// processSaturatedInput1 label that corrects the first 'bad' character and stores all characters up to and including the 'bad' character
156
// in the output destination. Then we branch back to this mainline loop and continue processing the rest of the array.
157
// The penalty for encountering 1 or more bad characters in a row can be big, but we bet that such cases are not
158
// common.
159
generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, firstSaturatedCharacter, vInput1, vRange, vRangeControl, 0x1, 0);
160
// If atleast one bad character was found, CC=1. So branch to handle this case.
161
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processSaturatedInput1);
162
163
// If we didn't take the branch above, then all 16 bytes can be copied directly over.
164
generateVRXInstruction(cg, TR::InstOpCode::VST, node, vInput1, generateS390MemoryReference(outputPtrReg, 0, cg));
165
166
// Update the counters
167
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, outputPtrReg, generateS390MemoryReference(outputPtrReg, 16, cg));
168
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, inputPtrReg, generateS390MemoryReference(inputPtrReg, 16, cg));
169
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, numCharsLeftToProcess, 16);
170
171
// Branch back up if we still have more than 16 characters to process.
172
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, numCharsLeftToProcess, 15, TR::InstOpCode::COND_BH, processMultiple16CharsStart, false, false);
173
174
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processMultiple16CharsEnd);
175
176
// start of sequence to process under 16 characters
177
178
// numCharsLeftToProcess holds length of final load.
179
// Branch to the end if there is no residue
180
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, numCharsLeftToProcess, 0, TR::InstOpCode::COND_BE, cFlowRegionEnd, false, false);
181
182
// Zero out the input register to avoid invalid VSTRC result
183
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, vInput1, 0, 0 /*unused*/);
184
185
// VLL and VSTL work on indices so we must subtract 1
186
TR::Register *numCharsLeftToProcessMinus1 = cg->allocateRegister();
187
// Due to the check above, the value in numCharsLeftToProcessMinus1 is guaranteed to be 0 or higher.
188
generateRIEInstruction(cg, TR::InstOpCode::AHIK, node, numCharsLeftToProcessMinus1, numCharsLeftToProcess, -1);
189
// Load residue bytes and check for saturation
190
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, vInput1, numCharsLeftToProcessMinus1, generateS390MemoryReference(inputPtrReg, 0, cg));
191
192
// Check for vector saturation and branch to copy the unsaturated bytes
193
generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, firstSaturatedCharacter, vInput1, vRange, vRangeControl, 0x1, 0);
194
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processSaturatedInput1);
195
196
// If no bad characters found, the store with length.
197
generateVRSbInstruction(cg, TR::InstOpCode::VSTL, node, vInput1, numCharsLeftToProcessMinus1, generateS390MemoryReference(outputPtrReg, 0, cg), 0);
198
// Branch to end.
199
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);
200
201
// Encountered an out of range character via the VSTRC instruction. Find it, replace it with '?', then jump back to mainline
202
// to continue processing. This sequence is not the most efficient and hitting it one or more times can be expensive,
203
// but we bet that this won't happen often for the targeted workload.
204
// Algorithm works as follows:
205
// First store upto and not including the bad character.
206
// Then store '?' in place for the bad character.
207
// Then, update the counters with the number of characters we have processed.
208
// Then go back to mainline code. Where we jump to depends on how many characters are left to process.
209
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processSaturatedInput1);
210
211
TR::Register *firstSaturatedCharacterGR = cg->allocateRegister();
212
// Extract the index of the first saturated char in the 2nd vector register
213
generateVRScInstruction(cg, TR::InstOpCode::VLGV, node, firstSaturatedCharacterGR, firstSaturatedCharacter, generateS390MemoryReference(7, cg), 0);
214
215
// Needed as VSTL operate on 0-based index.
216
TR::Register *firstSaturatedCharacterMinus1GR = cg->allocateRegister();
217
218
generateRIEInstruction(cg, TR::InstOpCode::AHIK, node, firstSaturatedCharacterMinus1GR, firstSaturatedCharacterGR, -1);
219
220
// If the result is less than 0, then it means the first character is saturated. So skip storing any good characters and jump to fixing the bad
221
// character.
222
TR::LabelSymbol *fixReplacementCharacter = generateLabelSymbol(cg);
223
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, fixReplacementCharacter);
224
225
// Copy only the unsaturated results using the index we calculated earlier
226
generateVRSbInstruction(cg, TR::InstOpCode::VSTL, node, vInput1, firstSaturatedCharacterMinus1GR, generateS390MemoryReference(outputPtrReg, 0, cg), 0);
227
generateRRInstruction(cg, cg->comp()->target().is64Bit() ? TR::InstOpCode::AGFR : TR::InstOpCode::AR, node, outputPtrReg, firstSaturatedCharacterGR);
228
229
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, fixReplacementCharacter);
230
const uint32_t replacementCharacter = 63;
231
generateSIInstruction(cg, TR::InstOpCode::MVI, node, generateS390MemoryReference(outputPtrReg, 0, cg), replacementCharacter);
232
233
generateRILInstruction(cg, cg->comp()->target().is64Bit() ? TR::InstOpCode::AGFI : TR::InstOpCode::AFI, node, outputPtrReg, 1);
234
235
// Now update the counters
236
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, inputPtrReg, generateS390MemoryReference(inputPtrReg, firstSaturatedCharacterGR, 1, cg));
237
generateRILInstruction(cg, TR::InstOpCode::AFI, node, numCharsLeftToProcess, -1);
238
generateRRInstruction(cg, TR::InstOpCode::SR, node, numCharsLeftToProcess, firstSaturatedCharacterGR);
239
240
// Counters have been updated. Now branch back to mainline. Where we branch depends on how many chars are left.
241
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, numCharsLeftToProcess, 15, TR::InstOpCode::COND_BH, processMultiple16CharsStart, false, false);
242
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, processMultiple16CharsEnd);
243
244
TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 11, cg);
245
dependencies->addPostConditionIfNotAlreadyInserted(vInput1, TR::RealRegister::AssignAny);
246
dependencies->addPostConditionIfNotAlreadyInserted(firstSaturatedCharacter, TR::RealRegister::AssignAny);
247
dependencies->addPostConditionIfNotAlreadyInserted(vRange, TR::RealRegister::AssignAny);
248
dependencies->addPostConditionIfNotAlreadyInserted(vRangeControl, TR::RealRegister::AssignAny);
249
dependencies->addPostConditionIfNotAlreadyInserted(outputPtrReg, TR::RealRegister::AssignAny);
250
dependencies->addPostConditionIfNotAlreadyInserted(inputPtrReg, TR::RealRegister::AssignAny);
251
dependencies->addPostConditionIfNotAlreadyInserted(numCharsLeftToProcess, TR::RealRegister::AssignAny);
252
dependencies->addPostConditionIfNotAlreadyInserted(numCharsLeftToProcessMinus1, TR::RealRegister::AssignAny);
253
dependencies->addPostConditionIfNotAlreadyInserted(inputLengthRegister, TR::RealRegister::AssignAny);
254
dependencies->addPostConditionIfNotAlreadyInserted(firstSaturatedCharacterGR, TR::RealRegister::AssignAny);
255
dependencies->addPostConditionIfNotAlreadyInserted(firstSaturatedCharacterMinus1GR, TR::RealRegister::AssignAny);
256
257
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
258
cFlowRegionEnd->setEndInternalControlFlow();
259
260
cg->decReferenceCount(node->getChild(0));
261
cg->decReferenceCount(node->getChild(1));
262
cg->decReferenceCount(node->getChild(2));
263
264
cg->stopUsingRegister(vInput1);
265
cg->stopUsingRegister(firstSaturatedCharacter);
266
cg->stopUsingRegister(vRange);
267
cg->stopUsingRegister(vRangeControl);
268
cg->stopUsingRegister(numCharsLeftToProcess);
269
cg->stopUsingRegister(numCharsLeftToProcessMinus1);
270
cg->stopUsingRegister(firstSaturatedCharacterGR);
271
cg->stopUsingRegister(firstSaturatedCharacterMinus1GR);
272
}
273
274
TR::Register*
275
J9::Z::TreeEvaluator::inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerator *cg)
276
{
277
static bool disableStringInflate = feGetEnv("TR_DisableStringInflate") != NULL;
278
if (disableStringInflate)
279
{
280
return NULL;
281
}
282
TR_ASSERT_FATAL(cg->getSupportsInlineStringLatin1Inflate(), "This evaluator should only be triggered when inlining StringLatin1.inflate([BI[CII)V is enabled on Java 11 onwards!\n");
283
TR::Node *sourceArrayReferenceNode = node->getChild(0);
284
TR::Node *srcOffNode = node->getChild(1);
285
TR::Node *charArrayReferenceNode = node->getChild(2);
286
TR::Node *dstOffNode = node->getChild(3);
287
TR::Node *lenNode = node->getChild(4);
288
289
TR::Register *lenRegister = cg->evaluate(lenNode);
290
TR::Register *sourceArrayReferenceRegister = cg->gprClobberEvaluate(sourceArrayReferenceNode);
291
TR::Register *srcOffRegister = cg->gprClobberEvaluate(srcOffNode);
292
TR::Register *charArrayReferenceRegister = cg->gprClobberEvaluate(charArrayReferenceNode);
293
TR::Register *dstOffRegister = cg->gprClobberEvaluate(dstOffNode);
294
295
// Adjust the array reference (source and destination) with offset in advance
296
if (srcOffNode->getOpCodeValue() == TR::iconst)
297
{
298
if (srcOffNode->getInt() != 0)
299
{
300
generateRILInstruction(cg, TR::InstOpCode::AFI, node, sourceArrayReferenceRegister, srcOffNode->getInt());
301
}
302
}
303
else
304
{
305
generateRRInstruction(cg, TR::InstOpCode::AGFR, node, sourceArrayReferenceRegister, srcOffRegister);
306
}
307
308
if (dstOffNode->getOpCodeValue() == TR::iconst)
309
{
310
if (dstOffNode->getInt() != 0)
311
{
312
generateRILInstruction(cg, TR::InstOpCode::AFI, node, charArrayReferenceRegister, dstOffNode->getInt() * 2);
313
}
314
}
315
else
316
{
317
generateRSInstruction(cg, TR::InstOpCode::SLAK, node, dstOffRegister, dstOffRegister, 1);
318
generateRRInstruction(cg, TR::InstOpCode::AGFR, node, charArrayReferenceRegister, dstOffRegister);
319
}
320
321
// The vector tight loop (starting at vectorLoopStart label) overwrites sourceArrayReferenceRegister as it processes characters. So we keep a backup of the
322
// copy here so that we can refer to it when handling the residual digits after the tight loop is finished executing.
323
TR::Register *sourceArrayReferenceRegister2 = cg->allocateRegister();
324
generateRRInstruction(cg, TR::InstOpCode::LGR, node, sourceArrayReferenceRegister2, sourceArrayReferenceRegister);
325
326
// charArrayReference is the destination array. Since the vector loop below processes 16 bytes into 16 chars per iteration, we will store 32 bytes per iteration.
327
// We use the `VST` instruction twice to store 16 bytes at a time. Hence, we need a "low" and "high" memref for the char array in order to store all 32 bytes per iteration
328
// of the vector loop.
329
TR::MemoryReference *charArrayReferenceMemRefLow = generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
330
TR::MemoryReference *charArrayReferenceMemRefHigh = generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 16, cg);
331
332
// numCharsMinusResidue is used as a scratch register to hold temporary values throughout the algorithm.
333
TR::Register *numCharsMinusResidue = cg->allocateRegister();
334
generateRRInstruction(cg, TR::InstOpCode::LR, node, numCharsMinusResidue, lenRegister);
335
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, numCharsMinusResidue, 16);
336
generateRRInstruction(cg, TR::InstOpCode::AR, node, numCharsMinusResidue, srcOffRegister);
337
338
TR::LabelSymbol *cFlowRegionStart = generateLabelSymbol(cg);
339
cFlowRegionStart->setStartInternalControlFlow();
340
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
341
342
TR::LabelSymbol *cFlowRegionEnd = generateLabelSymbol(cg);
343
TR::LabelSymbol *gprSequenceLabel = generateLabelSymbol(cg);
344
cFlowRegionEnd->setEndInternalControlFlow();
345
// Before starting the tight loop, check if length is 0. If so, then jump to end as there's no work to be done.
346
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, lenRegister, 0, TR::InstOpCode::COND_BE, cFlowRegionEnd, false, false);
347
// Also check if length < 8. If yes, then jump to gprSequenceLabel to handle this case with regular GPRs for speed.
348
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, lenRegister, 8, TR::InstOpCode::COND_BL, gprSequenceLabel, false, false);
349
350
TR::LabelSymbol *vectorLoopStart = generateLabelSymbol(cg);
351
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, vectorLoopStart);
352
TR::LabelSymbol *handleResidueLabel = generateLabelSymbol(cg);
353
354
// We keep executing the vector tight loop below until only the residual characters remain to process.
355
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CR, node, srcOffRegister, numCharsMinusResidue, TR::InstOpCode::COND_BH, handleResidueLabel, false, false);
356
TR::Register* registerV1 = cg->allocateRegister(TR_VRF);
357
TR::MemoryReference *sourceArrayMemRef = generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
358
// Do a vector load to batch process the characters.
359
generateVRXInstruction(cg, TR::InstOpCode::VL, node, registerV1, sourceArrayMemRef);
360
TR::Register* registerV2 = cg->allocateRegister(TR_VRF);
361
// Unpack the 1st and 2nd halves of the input vector. This will efectively inflate each character from 1 byte to 2 bytes.
362
generateVRRaInstruction(cg, TR::InstOpCode::VUPLH, node, registerV2, registerV1);
363
generateVRRaInstruction(cg, TR::InstOpCode::VUPLL, node, registerV1, registerV1);
364
365
// Store all characters.
366
generateVRXInstruction(cg, TR::InstOpCode::VST, node, registerV2, charArrayReferenceMemRefLow);
367
generateVRXInstruction(cg, TR::InstOpCode::VST, node, registerV1, charArrayReferenceMemRefHigh);
368
369
// Done storing 16 chars. Now do some bookkeeping and then branch back to start label.
370
generateRILInstruction(cg, TR::InstOpCode::AFI, node, srcOffRegister, 16);
371
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, sourceArrayReferenceRegister, generateS390MemoryReference(sourceArrayReferenceRegister, 16, cg));
372
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, charArrayReferenceRegister, generateS390MemoryReference(charArrayReferenceRegister, 32, cg));
373
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, vectorLoopStart);
374
375
// Once we reach this label, only the residual characters need to be processed.
376
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, handleResidueLabel);
377
378
TR::MemoryReference *sourceArrayMemRef2 = generateS390MemoryReference(sourceArrayReferenceRegister2, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
379
TR::MemoryReference *charArrayReferenceMemRefLow2 = generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
380
TR::MemoryReference *charArrayReferenceMemRefHigh2 = generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 16, cg);
381
382
TR::Register *quoRegister = cg->allocateRegister();
383
// Do lenRegister / 16 to calculate remaining number of chars using the Divide Logical (DLR) instruction.
384
// The dividend in a DLR instruction is a 64-bit integer. The top half is in remRegister, and the bottom half is in quoRegister.
385
// In our case the dividend is always a 32-bit integer (i.e. the length of the array). So we must always zero out the top half (i.e. remRegister) in order to make sure the dividend is never corrupted.
386
// The bottom half doesn't need to be zeroed out because we move a 32-bit integer in it, and then never use that register again.
387
TR::Register *remRegister = cg->allocateRegister();
388
generateRRInstruction(cg, TR::InstOpCode::XGR, node, remRegister, remRegister);
389
TR::RegisterPair *divRegisterPair = cg->allocateConsecutiveRegisterPair(quoRegister, remRegister); // rem is legal even of the pair
390
generateRRInstruction(cg, TR::InstOpCode::LR, node, quoRegister, lenRegister);
391
TR::Register *tempReg = cg->allocateRegister();
392
generateRILInstruction(cg, TR::InstOpCode::LGFI, node, /*divisor*/ tempReg, 16);
393
generateRRInstruction(cg, TR::InstOpCode::DLR, node, divRegisterPair, tempReg/*divisor*/);
394
395
// Branch to end if length was a multiple of 16. (We would have processed this in the vectorloop already).
396
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, remRegister, 0, TR::InstOpCode::COND_BE, cFlowRegionEnd, false, false);
397
// Now do srcOffRegister = lenRegister - remRegister to position index at the next character that we need to copy.
398
generateRRRInstruction(cg, TR::InstOpCode::SRK, node, srcOffRegister, lenRegister, remRegister);
399
// Now add that result to the base register
400
generateRRInstruction(cg, TR::InstOpCode::AGFR, node, sourceArrayReferenceRegister2, srcOffRegister);
401
402
// Now that you have the new index and the number of remaining characters, load those many chars into registerV1. We are guaranteed to have numChars < 16
403
//
404
// First load remainder (i.e. number of remaining chars) value into remRegister2 (Which is just a 0-index based version of remRegister).
405
// Then we subtract remRegister2 by 1 to get an indexed number. Then we use VLL to load the remaining bytes int registerV1.
406
TR::Register *remRegister2 = cg->allocateRegister();
407
generateRRInstruction(cg, TR::InstOpCode::LR, node, remRegister2, remRegister);
408
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, remRegister2, 1);
409
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, registerV1, remRegister2, sourceArrayMemRef2);
410
// Now unpack the low order. If we have less than 8 chars to process, there will be zeros in the register
411
generateVRRaInstruction(cg, TR::InstOpCode::VUPLH, node, registerV2, registerV1);
412
// Multiply numChars remaining by 2 to get the number of bytes we need to write
413
generateRSInstruction(cg, TR::InstOpCode::SLAK, node, tempReg, remRegister, 1);
414
// Subtract one from tempReg since the byte position is 0 based.
415
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, tempReg, 1);
416
generateVRSbInstruction(cg, TR::InstOpCode::VSTL, node, registerV2, tempReg, charArrayReferenceMemRefLow2, 0);
417
// Now subtract numChars by 8 and see if there's still more bytes left to write
418
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, remRegister, 8);
419
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, remRegister, 0, TR::InstOpCode::COND_BNH, cFlowRegionEnd, false, false);
420
// If we didn't branch, then there are still more characters to process, and the remaining amount is in remRegister.
421
// So unpack the characters and store back in memory
422
generateVRRaInstruction(cg, TR::InstOpCode::VUPLL, node, registerV1, registerV1);
423
generateRSInstruction(cg, TR::InstOpCode::SLAK, node, tempReg, remRegister, 1);
424
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, tempReg, 1);
425
generateVRSbInstruction(cg, TR::InstOpCode::VSTL, node, registerV1, tempReg, charArrayReferenceMemRefHigh2, 0);
426
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);
427
428
// We should only end up here if we initially detect that the input string's length < 8.
429
// For the GPR sequence we simply load one byte at a time using LLC, then store it as a char.
430
// If we are here, then lenRegister is less than remaining chars is in numCharsMinusResidue.
431
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, gprSequenceLabel);
432
433
// Repurpose numCharsMinusResidue here as a temp/scratch reg.
434
generateRSInstruction(cg, TR::InstOpCode::SLAK, node, numCharsMinusResidue, lenRegister, 1);
435
generateRSInstruction(cg, TR::InstOpCode::SLAK, node, tempReg, lenRegister, 3);
436
generateRRInstruction(cg, TR::InstOpCode::AR, node, numCharsMinusResidue, tempReg);
437
438
// First we figure out exactly how many chars are left.
439
generateRILInstruction(cg, TR::InstOpCode::LARL, node, tempReg, cFlowRegionEnd);
440
441
generateRRInstruction(cg, TR::InstOpCode::SR, node, tempReg, numCharsMinusResidue);
442
TR::Instruction *cursor = generateS390RegInstruction(cg, TR::InstOpCode::BCR, node, tempReg);
443
((TR::S390RegInstruction *)cursor)->setBranchCondition(TR::InstOpCode::COND_BCR);
444
445
// 7 chars left
446
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 6, cg));
447
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 12, cg));
448
// 6 chars left
449
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 5, cg));
450
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 10, cg));
451
// 5 chars left
452
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 4, cg));
453
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 8, cg));
454
// 4 chars left
455
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 3, cg));
456
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 6, cg));
457
// 3 chars left
458
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 2, cg));
459
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 4, cg));
460
// 2 chars left
461
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 1, cg));
462
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 2, cg));
463
// 1 chars left
464
generateRXInstruction(cg, TR::InstOpCode::LLC, node, tempReg, generateS390MemoryReference(sourceArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 0, cg));
465
generateRXInstruction(cg, TR::InstOpCode::STH, node, tempReg, generateS390MemoryReference(charArrayReferenceRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes() + 0, cg));
466
467
TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 14, cg);
468
dependencies->addPostConditionIfNotAlreadyInserted(sourceArrayReferenceRegister, TR::RealRegister::AssignAny);
469
dependencies->addPostConditionIfNotAlreadyInserted(lenRegister, TR::RealRegister::AssignAny);
470
dependencies->addPostConditionIfNotAlreadyInserted(srcOffRegister, TR::RealRegister::AssignAny);
471
dependencies->addPostConditionIfNotAlreadyInserted(dstOffRegister, TR::RealRegister::AssignAny);
472
dependencies->addPostConditionIfNotAlreadyInserted(charArrayReferenceRegister, TR::RealRegister::AssignAny);
473
dependencies->addPostConditionIfNotAlreadyInserted(numCharsMinusResidue, TR::RealRegister::AssignAny);
474
dependencies->addPostConditionIfNotAlreadyInserted(registerV1, TR::RealRegister::AssignAny);
475
dependencies->addPostConditionIfNotAlreadyInserted(registerV2, TR::RealRegister::AssignAny);
476
dependencies->addPostConditionIfNotAlreadyInserted(divRegisterPair, TR::RealRegister::EvenOddPair);
477
dependencies->addPostConditionIfNotAlreadyInserted(remRegister, TR::RealRegister::LegalEvenOfPair);
478
dependencies->addPostConditionIfNotAlreadyInserted(quoRegister, TR::RealRegister::LegalOddOfPair);
479
dependencies->addPostConditionIfNotAlreadyInserted(tempReg, TR::RealRegister::AssignAny);
480
dependencies->addPostConditionIfNotAlreadyInserted(sourceArrayReferenceRegister2, TR::RealRegister::AssignAny);
481
dependencies->addPostConditionIfNotAlreadyInserted(remRegister2, TR::RealRegister::AssignAny);
482
483
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
484
485
cg->decReferenceCount(srcOffNode);
486
cg->decReferenceCount(dstOffNode);
487
cg->decReferenceCount(lenNode);
488
cg->decReferenceCount(sourceArrayReferenceNode);
489
cg->decReferenceCount(charArrayReferenceNode);
490
491
cg->stopUsingRegister(remRegister2);
492
cg->stopUsingRegister(numCharsMinusResidue);
493
cg->stopUsingRegister(registerV1);
494
cg->stopUsingRegister(registerV2);
495
cg->stopUsingRegister(divRegisterPair);
496
cg->stopUsingRegister(tempReg);
497
cg->stopUsingRegister(sourceArrayReferenceRegister2);
498
return charArrayReferenceRegister;
499
}
500
501
TR::Register*
502
J9::Z::TreeEvaluator::zdloadEvaluator(TR::Node *node, TR::CodeGenerator *cg)
503
{
504
return TR::TreeEvaluator::pdloadEvaluator(node, cg);
505
}
506
507
TR::Register*
508
J9::Z::TreeEvaluator::zdloadiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
509
{
510
return TR::TreeEvaluator::pdloadEvaluator(node, cg);
511
}
512
513
TR::Register*
514
J9::Z::TreeEvaluator::zdstoreEvaluator(TR::Node *node, TR::CodeGenerator *cg)
515
{
516
return TR::TreeEvaluator::pdstoreEvaluator(node, cg);
517
}
518
519
TR::Register*
520
J9::Z::TreeEvaluator::zdstoreiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
521
{
522
return TR::TreeEvaluator::pdstoreEvaluator(node, cg);
523
}
524
525
TR::Register*
526
J9::Z::TreeEvaluator::zdsleLoadEvaluator(TR::Node *node, TR::CodeGenerator *cg)
527
{
528
return TR::TreeEvaluator::pdloadEvaluator(node, cg);
529
}
530
531
TR::Register*
532
J9::Z::TreeEvaluator::zdslsLoadEvaluator(TR::Node *node, TR::CodeGenerator *cg)
533
{
534
return TR::TreeEvaluator::pdloadEvaluator(node, cg);
535
}
536
537
TR::Register*
538
J9::Z::TreeEvaluator::zdstsLoadEvaluator(TR::Node *node, TR::CodeGenerator *cg)
539
{
540
return TR::TreeEvaluator::pdloadEvaluator(node, cg);
541
}
542
543
TR::Register*
544
J9::Z::TreeEvaluator::zdsleLoadiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
545
{
546
return TR::TreeEvaluator::pdloadEvaluator(node, cg);
547
}
548
549
TR::Register*
550
J9::Z::TreeEvaluator::zdslsLoadiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
551
{
552
return TR::TreeEvaluator::pdloadEvaluator(node, cg);
553
}
554
555
TR::Register*
556
J9::Z::TreeEvaluator::zdstsLoadiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
557
{
558
return TR::TreeEvaluator::pdloadEvaluator(node, cg);
559
}
560
561
TR::Register*
562
J9::Z::TreeEvaluator::zdsleStoreEvaluator(TR::Node *node, TR::CodeGenerator *cg)
563
{
564
return TR::TreeEvaluator::pdstoreEvaluator(node, cg);
565
}
566
567
TR::Register*
568
J9::Z::TreeEvaluator::zdslsStoreEvaluator(TR::Node *node, TR::CodeGenerator *cg)
569
{
570
return TR::TreeEvaluator::pdstoreEvaluator(node, cg);
571
}
572
573
TR::Register*
574
J9::Z::TreeEvaluator::zdstsStoreEvaluator(TR::Node *node, TR::CodeGenerator *cg)
575
{
576
return TR::TreeEvaluator::pdstoreEvaluator(node, cg);
577
}
578
579
TR::Register*
580
J9::Z::TreeEvaluator::zdsleStoreiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
581
{
582
return TR::TreeEvaluator::pdstoreEvaluator(node, cg);
583
}
584
585
TR::Register*
586
J9::Z::TreeEvaluator::zdslsStoreiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
587
{
588
return TR::TreeEvaluator::pdstoreEvaluator(node, cg);
589
}
590
591
TR::Register*
592
J9::Z::TreeEvaluator::zdstsStoreiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
593
{
594
return TR::TreeEvaluator::pdstoreEvaluator(node, cg);
595
}
596
597
TR::Register*
598
J9::Z::TreeEvaluator::zd2zdsleEvaluator(TR::Node *node, TR::CodeGenerator *cg)
599
{
600
return TR::TreeEvaluator::zdsle2zdEvaluator(node, cg);
601
}
602
603
TR::Register*
604
J9::Z::TreeEvaluator::zd2zdstsEvaluator(TR::Node *node, TR::CodeGenerator *cg)
605
{
606
return TR::TreeEvaluator::zd2zdslsEvaluator(node, cg);
607
}
608
609
TR::Register*
610
J9::Z::TreeEvaluator::zdsle2pdEvaluator(TR::Node *node, TR::CodeGenerator *cg)
611
{
612
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
613
}
614
615
TR::Register*
616
J9::Z::TreeEvaluator::zdsts2pdEvaluator(TR::Node *node, TR::CodeGenerator *cg)
617
{
618
return TR::TreeEvaluator::zdsls2pdEvaluator(node, cg);
619
}
620
621
TR::Register*
622
J9::Z::TreeEvaluator::zdsts2zdEvaluator(TR::Node *node, TR::CodeGenerator *cg)
623
{
624
return TR::TreeEvaluator::zdsls2zdEvaluator(node, cg);
625
}
626
627
TR::Register*
628
J9::Z::TreeEvaluator::pd2zdslsSetSignEvaluator(TR::Node *node, TR::CodeGenerator *cg)
629
{
630
return TR::TreeEvaluator::pd2zdslsEvaluator(node, cg);
631
}
632
633
TR::Register*
634
J9::Z::TreeEvaluator::pd2zdstsEvaluator(TR::Node *node, TR::CodeGenerator *cg)
635
{
636
return TR::TreeEvaluator::pd2zdslsEvaluator(node, cg);
637
}
638
639
TR::Register*
640
J9::Z::TreeEvaluator::pd2zdstsSetSignEvaluator(TR::Node *node, TR::CodeGenerator *cg)
641
{
642
return TR::TreeEvaluator::pd2zdslsEvaluator(node, cg);
643
}
644
645
TR::Register*
646
J9::Z::TreeEvaluator::udLoadEvaluator(TR::Node *node, TR::CodeGenerator *cg)
647
{
648
return TR::TreeEvaluator::pdloadEvaluator(node, cg);
649
}
650
651
TR::Register*
652
J9::Z::TreeEvaluator::udslLoadEvaluator(TR::Node *node, TR::CodeGenerator *cg)
653
{
654
return TR::TreeEvaluator::pdloadEvaluator(node, cg);
655
}
656
657
TR::Register*
658
J9::Z::TreeEvaluator::udstLoadEvaluator(TR::Node *node, TR::CodeGenerator *cg)
659
{
660
return TR::TreeEvaluator::pdloadEvaluator(node, cg);
661
}
662
663
TR::Register*
664
J9::Z::TreeEvaluator::udLoadiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
665
{
666
return TR::TreeEvaluator::pdloadEvaluator(node, cg);
667
}
668
669
TR::Register*
670
J9::Z::TreeEvaluator::udslLoadiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
671
{
672
return TR::TreeEvaluator::pdloadEvaluator(node, cg);
673
}
674
675
TR::Register*
676
J9::Z::TreeEvaluator::udstLoadiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
677
{
678
return TR::TreeEvaluator::pdloadEvaluator(node, cg);
679
}
680
681
TR::Register*
682
J9::Z::TreeEvaluator::udStoreEvaluator(TR::Node *node, TR::CodeGenerator *cg)
683
{
684
return TR::TreeEvaluator::pdstoreEvaluator(node, cg);
685
}
686
687
TR::Register*
688
J9::Z::TreeEvaluator::udslStoreEvaluator(TR::Node *node, TR::CodeGenerator *cg)
689
{
690
return TR::TreeEvaluator::pdstoreEvaluator(node, cg);
691
}
692
693
TR::Register*
694
J9::Z::TreeEvaluator::udstStoreEvaluator(TR::Node *node, TR::CodeGenerator *cg)
695
{
696
return TR::TreeEvaluator::pdstoreEvaluator(node, cg);
697
}
698
699
TR::Register*
700
J9::Z::TreeEvaluator::udStoreiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
701
{
702
return TR::TreeEvaluator::pdstoreEvaluator(node, cg);
703
}
704
705
TR::Register*
706
J9::Z::TreeEvaluator::udslStoreiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
707
{
708
return TR::TreeEvaluator::pdstoreEvaluator(node, cg);
709
}
710
711
TR::Register*
712
J9::Z::TreeEvaluator::udstStoreiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
713
{
714
return TR::TreeEvaluator::pdstoreEvaluator(node, cg);
715
}
716
717
TR::Register*
718
J9::Z::TreeEvaluator::pd2udstEvaluator(TR::Node *node, TR::CodeGenerator *cg)
719
{
720
return TR::TreeEvaluator::pd2udslEvaluator(node, cg);
721
}
722
723
TR::Register*
724
J9::Z::TreeEvaluator::udsl2udEvaluator(TR::Node *node, TR::CodeGenerator *cg)
725
{
726
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
727
}
728
729
TR::Register*
730
J9::Z::TreeEvaluator::udst2udEvaluator(TR::Node *node, TR::CodeGenerator *cg)
731
{
732
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
733
}
734
735
TR::Register*
736
J9::Z::TreeEvaluator::udst2pdEvaluator(TR::Node *node, TR::CodeGenerator *cg)
737
{
738
return TR::TreeEvaluator::udsl2pdEvaluator(node, cg);
739
}
740
741
TR::Register*
742
J9::Z::TreeEvaluator::pdloadiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
743
{
744
return TR::TreeEvaluator::pdloadEvaluator(node, cg);
745
}
746
747
TR::Register*
748
J9::Z::TreeEvaluator::pdstoreiEvaluator(TR::Node *node, TR::CodeGenerator *cg)
749
{
750
return TR::TreeEvaluator::pdstoreEvaluator(node, cg);
751
}
752
753
TR::Register*
754
J9::Z::TreeEvaluator::pddivEvaluator(TR::Node *node, TR::CodeGenerator *cg)
755
{
756
return TR::TreeEvaluator::pddivremEvaluator(node, cg);
757
}
758
759
TR::Register*
760
J9::Z::TreeEvaluator::pdremEvaluator(TR::Node *node, TR::CodeGenerator *cg)
761
{
762
return TR::TreeEvaluator::pddivremEvaluator(node, cg);
763
}
764
765
TR::Register*
766
J9::Z::TreeEvaluator::pdabsEvaluator(TR::Node *node, TR::CodeGenerator *cg)
767
{
768
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
769
}
770
771
TR::Register*
772
J9::Z::TreeEvaluator::pdshrSetSignEvaluator(TR::Node *node, TR::CodeGenerator *cg)
773
{
774
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
775
}
776
777
TR::Register*
778
J9::Z::TreeEvaluator::pdshlSetSignEvaluator(TR::Node *node, TR::CodeGenerator *cg)
779
{
780
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
781
}
782
783
TR::Register*
784
J9::Z::TreeEvaluator::pdshlOverflowEvaluator(TR::Node *node, TR::CodeGenerator *cg)
785
{
786
return TR::TreeEvaluator::pdshlEvaluator(node, cg);
787
}
788
789
TR::Register*
790
J9::Z::TreeEvaluator::pd2iOverflowEvaluator(TR::Node *node, TR::CodeGenerator *cg)
791
{
792
return TR::TreeEvaluator::pd2iEvaluator(node, cg);
793
}
794
795
TR::Register*
796
J9::Z::TreeEvaluator::pd2iuEvaluator(TR::Node *node, TR::CodeGenerator *cg)
797
{
798
return TR::TreeEvaluator::pd2iEvaluator(node, cg);
799
}
800
801
TR::Register*
802
J9::Z::TreeEvaluator::iu2pdEvaluator(TR::Node *node, TR::CodeGenerator *cg)
803
{
804
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
805
}
806
807
TR::Register*
808
J9::Z::TreeEvaluator::pd2lOverflowEvaluator(TR::Node *node, TR::CodeGenerator *cg)
809
{
810
return TR::TreeEvaluator::pd2lEvaluator(node, cg);
811
}
812
813
TR::Register*
814
J9::Z::TreeEvaluator::pd2luEvaluator(TR::Node *node, TR::CodeGenerator *cg)
815
{
816
return TR::TreeEvaluator::pd2lEvaluator(node, cg);
817
}
818
819
TR::Register*
820
J9::Z::TreeEvaluator::lu2pdEvaluator(TR::Node *node, TR::CodeGenerator *cg)
821
{
822
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
823
}
824
825
TR::Register*
826
J9::Z::TreeEvaluator::pd2fEvaluator(TR::Node *node, TR::CodeGenerator *cg)
827
{
828
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
829
}
830
831
TR::Register*
832
J9::Z::TreeEvaluator::pd2dEvaluator(TR::Node *node, TR::CodeGenerator *cg)
833
{
834
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
835
}
836
837
TR::Register*
838
J9::Z::TreeEvaluator::f2pdEvaluator(TR::Node *node, TR::CodeGenerator *cg)
839
{
840
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
841
}
842
843
TR::Register*
844
J9::Z::TreeEvaluator::d2pdEvaluator(TR::Node *node, TR::CodeGenerator *cg)
845
{
846
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
847
}
848
849
TR::Register*
850
J9::Z::TreeEvaluator::pdcleanEvaluator(TR::Node *node, TR::CodeGenerator *cg)
851
{
852
return TR::TreeEvaluator::unImpOpEvaluator(node, cg);
853
}
854
855
TR::Register*
856
J9::Z::TreeEvaluator::pdclearSetSignEvaluator(TR::Node *node, TR::CodeGenerator *cg)
857
{
858
return TR::TreeEvaluator::pdclearEvaluator(node, cg);
859
}
860
861
/* Moved from Codegen to FE */
862
///////////////////////////////////////////////////////////////////////////////////
863
// Generate code to perform a comparison and branch to a snippet.
864
// This routine is used mostly by bndchk evaluator.
865
//
866
// The comparison type is determined by the choice of CMP operators:
867
// - fBranchOp: Operator used for forward operation -> A fCmp B
868
// - rBranchOp: Operator user for reverse operation -> B rCmp A <=> A fCmp B
869
//
870
// TODO - avoid code duplication, this routine may be able to merge with the one
871
// above which has the similar logic.
872
///////////////////////////////////////////////////////////////////////////////////
873
TR::Instruction *
874
generateS390CompareBranchLabel(TR::Node * node, TR::CodeGenerator * cg, TR::InstOpCode::Mnemonic branchOp, TR::InstOpCode::S390BranchCondition fBranchOpCond, TR::InstOpCode::S390BranchCondition rBranchOpCond,
875
TR::LabelSymbol * label)
876
{
877
return generateS390CompareOps(node, cg, fBranchOpCond, rBranchOpCond, label);
878
}
879
880
/* Moved from Codegen to FE since only awrtbarEvaluator calls this function */
881
static TR::Register *
882
allocateWriteBarrierInternalPointerRegister(TR::CodeGenerator * cg, TR::Node * sourceChild)
883
{
884
TR::Register * sourceRegister;
885
886
if (sourceChild->getRegister() != NULL && !cg->canClobberNodesRegister(sourceChild))
887
{
888
if (!sourceChild->getRegister()->containsInternalPointer())
889
{
890
sourceRegister = cg->allocateCollectedReferenceRegister();
891
}
892
else
893
{
894
sourceRegister = cg->allocateRegister();
895
sourceRegister->setPinningArrayPointer(sourceChild->getRegister()->getPinningArrayPointer());
896
sourceRegister->setContainsInternalPointer();
897
}
898
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), sourceChild, sourceRegister, sourceChild->getRegister());
899
}
900
else
901
{
902
sourceRegister = cg->evaluate(sourceChild);
903
}
904
905
return sourceRegister;
906
}
907
908
909
extern TR::Register *
910
doubleMaxMinHelper(TR::Node *node, TR::CodeGenerator *cg, bool isMaxOp)
911
{
912
TR_ASSERT(node->getNumChildren() >= 1 || node->getNumChildren() <= 2, "node has incorrect number of children");
913
914
/* ===================== Allocating Registers ===================== */
915
916
TR::Register * v16 = cg->allocateRegister(TR_VRF);
917
TR::Register * v17 = cg->allocateRegister(TR_VRF);
918
TR::Register * v18 = cg->allocateRegister(TR_VRF);
919
920
/* ===================== Generating instructions ===================== */
921
922
/* ====== LD FPR0,16(GPR5) Load a ====== */
923
TR::Register * v0 = cg->fprClobberEvaluate(node->getFirstChild());
924
925
/* ====== LD FPR2, 0(GPR5) Load b ====== */
926
TR::Register * v2 = cg->evaluate(node->getSecondChild());
927
928
/* ====== WFTCIDB V16,V0,X'F' a == NaN ====== */
929
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v16, v0, 0xF, 8, 3);
930
931
/* ====== For Max: WFCHE V17,V0,V2 Compare a >= b ====== */
932
if(isMaxOp)
933
{
934
generateVRRcInstruction(cg, TR::InstOpCode::VFCH, node, v17, v0, v2, 0, 8, 3);
935
}
936
/* ====== For Min: WFCHE V17,V0,V2 Compare a <= b ====== */
937
else
938
{
939
generateVRRcInstruction(cg, TR::InstOpCode::VFCH, node, v17, v2, v0, 0, 8, 3);
940
}
941
942
/* ====== VO V16,V16,V17 (a >= b) || (a == NaN) ====== */
943
generateVRRcInstruction(cg, TR::InstOpCode::VO, node, v16, v16, v17, 0, 0, 0);
944
945
/* ====== For Max: WFTCIDB V17,V0,X'800' a == +0 ====== */
946
if(isMaxOp)
947
{
948
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v17, v0, 0x800, 8, 3);
949
}
950
/* ====== For Min: WFTCIDB V17,V0,X'400' a == -0 ====== */
951
else
952
{
953
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v17, v0, 0x400, 8, 3);
954
}
955
/* ====== WFTCIDB V18,V2,X'C00' b == 0 ====== */
956
generateVRIeInstruction(cg, TR::InstOpCode::VFTCI, node, v18, v2, 0xC00, 8, 3);
957
958
/* ====== VN V17,V17,V18 (a == -0) && (b == 0) ====== */
959
generateVRRcInstruction(cg, TR::InstOpCode::VN, node, v17, v17, v18, 0, 0, 0);
960
961
/* ====== VO V16,V16,V17 (a >= b) || (a == NaN) || ((a == -0) && (b == 0)) ====== */
962
generateVRRcInstruction(cg, TR::InstOpCode::VO, node, v16, v16, v17, 0, 0, 0);
963
964
/* ====== VSEL V0,V0,V2,V16 ====== */
965
generateVRReInstruction(cg, TR::InstOpCode::VSEL, node, v0, v0, v2, v16);
966
967
/* ===================== Deallocating Registers ===================== */
968
cg->stopUsingRegister(v2);
969
cg->stopUsingRegister(v16);
970
cg->stopUsingRegister(v17);
971
cg->stopUsingRegister(v18);
972
973
node->setRegister(v0);
974
975
cg->decReferenceCount(node->getFirstChild());
976
cg->decReferenceCount(node->getSecondChild());
977
978
return node->getRegister();
979
}
980
981
TR::Register*
982
J9::Z::TreeEvaluator::inlineVectorizedStringIndexOf(TR::Node* node, TR::CodeGenerator* cg, bool isUTF16)
983
{
984
#define iComment(str) if (compDebug) compDebug->addInstructionComment(cursor, (const_cast<char*>(str)));
985
TR::Compilation *comp = cg->comp();
986
const uint32_t elementSizeMask = isUTF16 ? 1 : 0;
987
const int8_t vectorSize = cg->machine()->getVRFSize();
988
const uintptr_t headerSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
989
const bool supportsVSTRS = comp->target().cpu.supportsFeature(OMR_FEATURE_S390_VECTOR_FACILITY_ENHANCEMENT_2);
990
TR_Debug *compDebug = comp->getDebug();
991
TR::Instruction* cursor;
992
993
static bool disableIndexOfStringIntrinsic = feGetEnv("TR_DisableIndexOfStringIntrinsic") != NULL;
994
if (disableIndexOfStringIntrinsic)
995
return NULL;
996
997
if (comp->getOption(TR_TraceCG))
998
traceMsg(comp, "inlineVectorizedStringIndexOf. Is isUTF16 %d\n", isUTF16);
999
1000
// This evaluator function handles different indexOf() intrinsics, some of which are static calls without a
1001
// receiver. Hence, the need for static call check.
1002
const bool isStaticCall = node->getSymbolReference()->getSymbol()->castToMethodSymbol()->isStatic();
1003
const uint8_t firstCallArgIdx = isStaticCall ? 0 : 1;
1004
1005
TR_S390ScratchRegisterManager *srm = cg->generateScratchRegisterManager(9);
1006
1007
// Get call parameters where stringValue and patternValue are byte arrays
1008
TR::Register* stringValueReg = cg->evaluate(node->getChild(firstCallArgIdx));
1009
TR::Register* stringLenReg = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+1));
1010
TR::Register* patternValueReg = cg->evaluate(node->getChild(firstCallArgIdx+2));
1011
TR::Register* patternLenReg = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+3));
1012
TR::Register* stringIndexReg = cg->gprClobberEvaluate(node->getChild(firstCallArgIdx+4));
1013
1014
// Registers
1015
TR::Register* matchIndexReg = cg->allocateRegister();
1016
TR::Register* maxIndexReg = srm->findOrCreateScratchRegister();
1017
TR::Register* patternIndexReg = srm->findOrCreateScratchRegister();
1018
TR::Register* loadLenReg = srm->findOrCreateScratchRegister();
1019
TR::Register* stringVReg = srm->findOrCreateScratchRegister(TR_VRF);
1020
TR::Register* patternVReg = srm->findOrCreateScratchRegister(TR_VRF);
1021
TR::Register* searchResultVReg = srm->findOrCreateScratchRegister(TR_VRF);
1022
1023
// Register dependencies
1024
TR::RegisterDependencyConditions* regDeps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, supportsVSTRS ? 16 : 13, cg);
1025
1026
regDeps->addPostCondition(stringValueReg, TR::RealRegister::AssignAny);
1027
regDeps->addPostCondition(stringLenReg, TR::RealRegister::AssignAny);
1028
regDeps->addPostCondition(patternValueReg, TR::RealRegister::AssignAny);
1029
regDeps->addPostCondition(patternLenReg, TR::RealRegister::AssignAny);
1030
regDeps->addPostCondition(stringIndexReg, TR::RealRegister::AssignAny);
1031
regDeps->addPostCondition(matchIndexReg, TR::RealRegister::AssignAny);
1032
1033
// Labels
1034
TR::LabelSymbol* labelStart = generateLabelSymbol(cg);
1035
TR::LabelSymbol* labelFindPatternHead = generateLabelSymbol(cg);
1036
TR::LabelSymbol* labelLoadString16Bytes = generateLabelSymbol(cg);
1037
TR::LabelSymbol* labelLoadStringLenDone = generateLabelSymbol(cg);
1038
TR::LabelSymbol* labelMatchPatternLoop = generateLabelSymbol(cg);
1039
TR::LabelSymbol* labelMatchPatternResidue = generateLabelSymbol(cg);
1040
TR::LabelSymbol* labelMatchPatternLoopSetup = generateLabelSymbol(cg);
1041
TR::LabelSymbol* labelPartialPatternMatch = generateLabelSymbol(cg);
1042
TR::LabelSymbol* labelLoadResult = generateLabelSymbol(cg);
1043
TR::LabelSymbol* labelResultDone = generateLabelSymbol(cg);
1044
TR::LabelSymbol* labelPatternNotFound = generateLabelSymbol(cg);
1045
TR::LabelSymbol* labelDone = generateLabelSymbol(cg);
1046
1047
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelStart);
1048
iComment("retrieve string len, pattern len and starting pos");
1049
labelStart->setStartInternalControlFlow();
1050
1051
// Decompressed strings have [byte_length = char_length * 2]
1052
if (isUTF16 && comp->target().is64Bit())
1053
{
1054
generateShiftThenKeepSelected64Bit(node, cg, stringLenReg, stringLenReg, 31, 62, 1);
1055
generateShiftThenKeepSelected64Bit(node, cg, patternLenReg, patternLenReg, 31, 62, 1);
1056
generateShiftThenKeepSelected64Bit(node, cg, stringIndexReg, stringIndexReg, 31, 62, 1);
1057
}
1058
else
1059
{
1060
generateRRInstruction(cg, TR::InstOpCode::LGFR, node, stringLenReg, stringLenReg);
1061
generateRRInstruction(cg, TR::InstOpCode::LGFR, node, patternLenReg, patternLenReg);
1062
generateRRInstruction(cg, TR::InstOpCode::LGFR, node, stringIndexReg, stringIndexReg);
1063
1064
if (isUTF16)
1065
{
1066
generateRSInstruction(cg, TR::InstOpCode::SLL, node, stringLenReg, 1);
1067
generateRSInstruction(cg, TR::InstOpCode::SLL, node, patternLenReg, 1);
1068
generateRSInstruction(cg, TR::InstOpCode::SLL, node, stringIndexReg, 1);
1069
}
1070
}
1071
1072
cursor = generateRRRInstruction(cg, TR::InstOpCode::getSubtractThreeRegOpCode(), node, maxIndexReg, stringLenReg, patternLenReg);
1073
iComment("maximum valid index for a potential match");
1074
generateRIEInstruction(cg, TR::InstOpCode::getCmpRegAndBranchRelOpCode(), node, maxIndexReg, stringIndexReg, labelPatternNotFound, TR::InstOpCode::COND_BLR);
1075
1076
// patternLen debug counters
1077
static bool enableIndexOfDebugCounter = feGetEnv("TR_EnableIndexOfDebugCounter") != NULL;
1078
if (enableIndexOfDebugCounter)
1079
{
1080
TR::LabelSymbol* labelPatternLenGT10 = generateLabelSymbol(cg);
1081
TR::LabelSymbol* labelPatternLenGT30 = generateLabelSymbol(cg);
1082
TR::LabelSymbol* labelPatternLenGT60 = generateLabelSymbol(cg);
1083
TR::LabelSymbol* labelPatternLenGT100 = generateLabelSymbol(cg);
1084
TR::LabelSymbol* labelPatternLenCheckDone = generateLabelSymbol(cg);
1085
1086
uint8_t boundary10Char = isUTF16 ? 20 : 10;
1087
uint8_t boundary30Char = isUTF16 ? 60 : 30;
1088
uint8_t boundary60Char = isUTF16 ? 120 : 60;
1089
uint8_t boundary100Char = isUTF16 ? 200 : 100;
1090
1091
generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, patternLenReg, boundary10Char, labelPatternLenGT10, TR::InstOpCode::COND_BH);
1092
cg->generateDebugCounter("indexOfString/PatternLen/below-10", 1, TR::DebugCounter::Cheap);
1093
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelPatternLenCheckDone);
1094
1095
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelPatternLenGT10);
1096
generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, patternLenReg, boundary30Char, labelPatternLenGT30, TR::InstOpCode::COND_BH);
1097
cg->generateDebugCounter("indexOfString/PatternLen/10-30", 1, TR::DebugCounter::Cheap);
1098
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelPatternLenCheckDone);
1099
1100
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelPatternLenGT30);
1101
generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, patternLenReg, boundary60Char, labelPatternLenGT60, TR::InstOpCode::COND_BH);
1102
cg->generateDebugCounter("indexOfString/PatternLen/30-60", 1, TR::DebugCounter::Cheap);
1103
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelPatternLenCheckDone);
1104
1105
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelPatternLenGT60);
1106
generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, patternLenReg, boundary100Char, labelPatternLenGT100, TR::InstOpCode::COND_BH);
1107
cg->generateDebugCounter("indexOfString/PatternLen/60-100", 1, TR::DebugCounter::Cheap);
1108
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelPatternLenCheckDone);
1109
1110
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelPatternLenGT100);
1111
cg->generateDebugCounter("indexOfString/PatternLen/above-100", 1, TR::DebugCounter::Cheap);
1112
1113
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelPatternLenCheckDone);
1114
}
1115
1116
if (supportsVSTRS)
1117
{
1118
TR::Register* patternHeadVReg = srm->findOrCreateScratchRegister(TR_VRF); // used for first 16 bytes of the pattern
1119
TR::Register* patternLenVReg = srm->findOrCreateScratchRegister(TR_VRF); // length of the pattern being searched for through VSTRS instruction
1120
1121
// Load the first piece of patternValue (pattern header) which is either 16 bytes or patternLen
1122
TR::LabelSymbol* labelPatternLoad16Bytes = generateLabelSymbol(cg);
1123
TR::LabelSymbol* labelPatternLoadDone = generateLabelSymbol(cg);
1124
1125
generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, patternLenReg, (int8_t)vectorSize, labelPatternLoad16Bytes, TR::InstOpCode::COND_BNL);
1126
generateRIEInstruction(cg, TR::InstOpCode::getAddHalfWordImmDistinctOperandOpCode(), node, loadLenReg, patternLenReg, -1);
1127
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, patternHeadVReg, loadLenReg, generateS390MemoryReference(patternValueReg, headerSize, cg));
1128
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, loadLenReg, patternLenReg);
1129
generateVRSbInstruction(cg, TR::InstOpCode::VLVG, node, patternLenVReg, patternLenReg, generateS390MemoryReference(7, cg), 0);
1130
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelPatternLoadDone);
1131
1132
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelPatternLoad16Bytes);
1133
iComment("load first 16 bytes of the pattern");
1134
generateVRXInstruction(cg, TR::InstOpCode::VL, node, patternHeadVReg, generateS390MemoryReference(patternValueReg, headerSize, cg));
1135
generateRIInstruction(cg, TR::InstOpCode::LHI, node, loadLenReg, vectorSize);
1136
generateVRSbInstruction(cg, TR::InstOpCode::VLVG, node, patternLenVReg, loadLenReg, generateS390MemoryReference(7, cg), 0);
1137
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelPatternLoadDone);
1138
iComment("min(16,pattern length) bytes have been loaded");
1139
1140
// Loop to search for pattern header in string
1141
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelFindPatternHead);
1142
iComment("look for pattern head in the string");
1143
1144
// Determine string load length and load a piece of string
1145
generateRRRInstruction(cg, TR::InstOpCode::getSubtractThreeRegOpCode(), node, loadLenReg, stringLenReg, stringIndexReg);
1146
generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, loadLenReg, (int8_t)vectorSize, labelLoadString16Bytes, TR::InstOpCode::COND_BNL);
1147
TR::Register* stringCharPtrReg = srm->findOrCreateScratchRegister();
1148
generateRRRInstruction(cg, TR::InstOpCode::getAddThreeRegOpCode(), node, stringCharPtrReg, stringValueReg, stringIndexReg);
1149
// Needs -1 because VLL's third operand is the highest index to load.
1150
// e.g. If the load length is 8 bytes, the highest index is 7. Hence, the need for -1.
1151
cursor = generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, loadLenReg, -1);
1152
iComment("needs -1 because VLL's third operand is the highest index to load");
1153
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, stringVReg, loadLenReg, generateS390MemoryReference(stringCharPtrReg, headerSize, cg));
1154
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, loadLenReg, 1);
1155
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelLoadStringLenDone);
1156
srm->reclaimScratchRegister(stringCharPtrReg);
1157
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelLoadString16Bytes);
1158
iComment("load 16 bytes of the string");
1159
generateVRXInstruction(cg, TR::InstOpCode::VL, node, stringVReg, generateS390MemoryReference(stringValueReg, stringIndexReg, headerSize, cg));
1160
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, loadLenReg, vectorSize);
1161
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelLoadStringLenDone);
1162
iComment("16 bytes of the string have been loaded");
1163
1164
// VSTRS sets CC with the following values:
1165
// CC = 0, no match or partial match, AND (zs = 0 OR no zero byte in source VRF)
1166
// CC = 1, no match AND (zs = 1) AND (zero byte in source VRF)
1167
// CC = 2, full match
1168
// CC = 3, partial match but no full match.
1169
TR::LabelSymbol* labelPatternHeadFullMatch = generateLabelSymbol(cg);
1170
TR::LabelSymbol* labelPatternHeadPartMatch = generateLabelSymbol(cg);
1171
1172
generateVRRdInstruction(cg, TR::InstOpCode::VSTRS, node, searchResultVReg, stringVReg, patternHeadVReg, patternLenVReg, 0, elementSizeMask);
1173
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC2, node, labelPatternHeadFullMatch);
1174
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC3, node, labelPatternHeadPartMatch);
1175
1176
// pattern header not found in first 16 bytes of the string
1177
// Load the next 16 bytes of the string and continue
1178
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, stringIndexReg, loadLenReg);
1179
cursor = generateRIEInstruction(cg, TR::InstOpCode::getCmpRegAndBranchRelOpCode(), node, stringIndexReg, maxIndexReg, labelPatternNotFound, TR::InstOpCode::COND_BH);
1180
iComment("Updated stringIndex for next iteration exceeds maxIndex of valid match. Full pattern cannot be matched.")
1181
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelFindPatternHead);
1182
iComment("neither full nor partial match was found for pattern head, load next 16 bytes of the string and try again");
1183
1184
// pattern header full match
1185
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelPatternHeadFullMatch);
1186
iComment("full match found of pattern head");
1187
1188
// If patternLen <= 16 then we are done, otherwise we continue to check the rest of pattern. We first handle residue bytes
1189
// of pattern, then handle the rest 16-byte chunks.
1190
cursor = generateVRScInstruction(cg, TR::InstOpCode::VLGV, node, matchIndexReg, searchResultVReg, generateS390MemoryReference(7, cg), 0);
1191
iComment("check 7th index of search result vec for byte index");
1192
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, matchIndexReg, stringIndexReg);
1193
cursor = generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, patternLenReg, (int8_t)vectorSize, labelLoadResult, TR::InstOpCode::COND_BNH);
1194
iComment("if patternLen <= 16 then we are done, otherwise we continue to check the rest of pattern");
1195
cursor = generateRIEInstruction(cg, TR::InstOpCode::getCmpRegAndBranchRelOpCode(), node, stringIndexReg, maxIndexReg, labelPatternNotFound, TR::InstOpCode::COND_BH);
1196
iComment("Updated stringIndex for start of matched section exceeds maxIndex. Full pattern cannot be matched.");
1197
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, stringIndexReg, loadLenReg);
1198
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, patternIndexReg, vectorSize);
1199
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelMatchPatternResidue);
1200
iComment("find residual pattern");
1201
1202
// pattern header partial match
1203
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelPatternHeadPartMatch);
1204
iComment("partial match of first 16 bytes of pattern was found");
1205
1206
// Starting from the beginning of the partial match, load the next 16 bytes from string and redo pattern header search.
1207
// This implies that the partial match will be re-matched by the next VSTRS. This can potentially benefit string
1208
// search cases where pattern is shorter than 16 bytes. For short string strings, string search can potentially be done in
1209
// the next VSTRS and can we avoid residue matching which requires several index adjustments that do not provide
1210
// performance benefits.
1211
cursor = generateVRScInstruction(cg, TR::InstOpCode::VLGV, node, matchIndexReg, searchResultVReg, generateS390MemoryReference(7, cg), 0);
1212
iComment("check 7th index of search result vec for byte index");
1213
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, stringIndexReg, matchIndexReg);
1214
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelFindPatternHead);
1215
1216
srm->reclaimScratchRegister(patternLenVReg);
1217
srm->reclaimScratchRegister(patternHeadVReg);
1218
}
1219
else
1220
{
1221
TR::Register* patternFirstCharVReg = srm->findOrCreateScratchRegister(TR_VRF);
1222
1223
/************************************** 1st char of pattern ******************************************/
1224
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelFindPatternHead);
1225
iComment("find first character of pattern");
1226
generateVRXInstruction(cg, TR::InstOpCode::VLREP, node, patternFirstCharVReg, generateS390MemoryReference(patternValueReg, headerSize, cg), elementSizeMask);
1227
1228
// Determine string load length. loadLenReg is either vectorSize-1 (15) or the 1st_char_matching residue length.
1229
generateRIEInstruction(cg, TR::InstOpCode::getAddHalfWordImmDistinctOperandOpCode(), node, loadLenReg, stringIndexReg, vectorSize);
1230
generateRIEInstruction(cg, TR::InstOpCode::getCmpRegAndBranchRelOpCode(), node, loadLenReg, stringLenReg, labelLoadString16Bytes, TR::InstOpCode::COND_BNHR);
1231
generateRRRInstruction(cg, TR::InstOpCode::getSubtractThreeRegOpCode(), node, loadLenReg, stringLenReg, stringIndexReg);
1232
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, loadLenReg, -1);
1233
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelLoadStringLenDone);
1234
1235
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelLoadString16Bytes);
1236
iComment("update loadLenReg to load 16 characters from the string later on");
1237
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, loadLenReg, vectorSize-1);
1238
1239
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelLoadStringLenDone);
1240
iComment("load 16 characters into string VRF register and search for first chracter of the pattern");
1241
1242
TR::Register* stringCharPtrReg = srm->findOrCreateScratchRegister();
1243
TR::LabelSymbol* labelExtractFirstCharPos = generateLabelSymbol(cg);
1244
generateRRRInstruction(cg, TR::InstOpCode::getAddThreeRegOpCode(), node, stringCharPtrReg, stringValueReg, stringIndexReg);
1245
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, stringVReg, loadLenReg, generateS390MemoryReference(stringCharPtrReg, headerSize, cg));
1246
generateVRRbInstruction(cg, TR::InstOpCode::VFEE, node, searchResultVReg, stringVReg, patternFirstCharVReg, 0x1, elementSizeMask);
1247
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, labelExtractFirstCharPos);
1248
srm->reclaimScratchRegister(stringCharPtrReg);
1249
1250
// 1st char not found. Loop back and retry from the next chunk
1251
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, stringIndexReg, loadLenReg);
1252
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, stringIndexReg, 1);
1253
generateRIEInstruction(cg, TR::InstOpCode::getCmpRegAndBranchRelOpCode(), node, stringIndexReg, maxIndexReg, labelPatternNotFound, TR::InstOpCode::COND_BHR);
1254
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelFindPatternHead);
1255
iComment("1st char not found. Loop back and retry from the next chunk");
1256
1257
// Found 1st char. check it's byte index in searchResultVReg byte 7.
1258
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelExtractFirstCharPos);
1259
iComment("check 7th index of search result vec for byte index");
1260
1261
generateVRScInstruction(cg, TR::InstOpCode::VLGV, node, matchIndexReg, searchResultVReg, generateS390MemoryReference(7, cg), 0);
1262
generateRIEInstruction(cg, TR::InstOpCode::getCmpRegAndBranchRelOpCode(), node, matchIndexReg, loadLenReg, labelPatternNotFound, TR::InstOpCode::COND_BHR);
1263
1264
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, matchIndexReg, stringIndexReg); // convert relative index to absolute index
1265
generateRIEInstruction(cg, TR::InstOpCode::getCmpRegAndBranchRelOpCode(), node, matchIndexReg, maxIndexReg, labelPatternNotFound, TR::InstOpCode::COND_BHR);
1266
1267
/************************************** s2 Residue matching ******************************************/
1268
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, stringIndexReg, matchIndexReg); // use the absolute match index as starting index when matching rest of the pattern
1269
srm->reclaimScratchRegister(patternFirstCharVReg);
1270
}
1271
1272
srm->addScratchRegistersToDependencyList(regDeps);
1273
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelMatchPatternResidue);
1274
iComment("match remainder of the pattern");
1275
1276
// pattern residue length = patternLenReg mod 16
1277
generateRRInstruction(cg, TR::InstOpCode::LLGHR, node, loadLenReg, patternLenReg);
1278
generateRIInstruction(cg, TR::InstOpCode::NILL, node, loadLenReg, 0x000F);
1279
generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, loadLenReg, (int8_t)0, labelMatchPatternLoopSetup, TR::InstOpCode::COND_BE);
1280
1281
TR::Register* stringCharPtrReg = srm->findOrCreateScratchRegister();
1282
generateRRRInstruction(cg, TR::InstOpCode::getAddThreeRegOpCode(), node, stringCharPtrReg, stringValueReg, stringIndexReg);
1283
1284
// Vector loads use load index. And [load_index = load_len - 1]
1285
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, loadLenReg, -1);
1286
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, stringVReg, loadLenReg, generateS390MemoryReference(stringCharPtrReg, headerSize, cg));
1287
srm->reclaimScratchRegister(stringCharPtrReg);
1288
// If VSTRS is supported, the first VSTRS already handled the 1st 16 bytes at this point (full match in the 1st 16
1289
// bytes). Hence, residue offset starts at 16.
1290
uint32_t patternResidueDisp = headerSize + (supportsVSTRS ? vectorSize : 0);
1291
1292
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, patternVReg, loadLenReg, generateS390MemoryReference(patternValueReg, patternResidueDisp, cg));
1293
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, loadLenReg, 1);
1294
1295
if (supportsVSTRS)
1296
{
1297
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, patternIndexReg, vectorSize);
1298
}
1299
1300
generateVRRbInstruction(cg, TR::InstOpCode::VCEQ, node, searchResultVReg, stringVReg, patternVReg, 1, elementSizeMask);
1301
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC0, node, labelMatchPatternLoopSetup);
1302
1303
// The residue does not match. Continue to find the 1st char in string, starting from the next element.
1304
generateRIEInstruction(cg, TR::InstOpCode::getAddHalfWordImmDistinctOperandOpCode(), node, stringIndexReg, matchIndexReg, isUTF16 ? 2 : 1);
1305
generateRIEInstruction(cg, TR::InstOpCode::getCmpRegAndBranchRelOpCode(), node, stringIndexReg, maxIndexReg, labelPatternNotFound, TR::InstOpCode::COND_BHR);
1306
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelFindPatternHead);
1307
1308
/************************************** pattern matching loop ENTRY ******************************************/
1309
1310
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelMatchPatternLoopSetup);
1311
iComment("loop setup to search for rest of the pattern");
1312
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, stringIndexReg, loadLenReg);
1313
1314
if (supportsVSTRS)
1315
{
1316
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, patternIndexReg, loadLenReg);
1317
}
1318
else
1319
{
1320
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, patternIndexReg, loadLenReg);
1321
}
1322
1323
srm->reclaimScratchRegister(loadLenReg);
1324
TR::Register* loopCountReg = srm->findOrCreateScratchRegister();
1325
generateRSInstruction(cg, TR::InstOpCode::SRLG, node, loopCountReg, patternLenReg, 4);
1326
1327
if (supportsVSTRS)
1328
{
1329
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, loopCountReg, -1);
1330
}
1331
1332
generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, loopCountReg, static_cast<int8_t>(0), labelLoadResult, TR::InstOpCode::COND_BE);
1333
1334
/************************************** pattern matching loop ******************************************/
1335
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelMatchPatternLoop);
1336
iComment("start search for reset of the pattern");
1337
// Start to match the reset of pattern
1338
generateVRXInstruction(cg, TR::InstOpCode::VL, node, stringVReg, generateS390MemoryReference(stringValueReg, stringIndexReg, headerSize, cg));
1339
generateVRXInstruction(cg, TR::InstOpCode::VL, node, patternVReg, generateS390MemoryReference(patternValueReg, patternIndexReg, headerSize, cg));
1340
1341
generateVRRbInstruction(cg, TR::InstOpCode::VCEQ, node, searchResultVReg, stringVReg, patternVReg, 1, elementSizeMask);
1342
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC0, node, labelPartialPatternMatch);
1343
1344
// pattern chunk does not match. Go back and search again
1345
generateRIEInstruction(cg, TR::InstOpCode::getAddHalfWordImmDistinctOperandOpCode(), node, stringIndexReg, matchIndexReg, isUTF16 ? 2 : 1);
1346
generateRIEInstruction(cg, TR::InstOpCode::getCmpRegAndBranchRelOpCode(), node, stringIndexReg, maxIndexReg, labelPatternNotFound, TR::InstOpCode::COND_BHR);
1347
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelFindPatternHead);
1348
iComment("pattern chunk does not match. Go back and search again");
1349
1350
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelPartialPatternMatch);
1351
iComment("there was a complete match for the characters currently loaded in pattern VRF register");
1352
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, stringIndexReg, vectorSize);
1353
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, patternIndexReg, vectorSize);
1354
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, loopCountReg, -1);
1355
generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, loopCountReg, (int8_t)0, labelMatchPatternLoop, TR::InstOpCode::COND_BNE);
1356
srm->reclaimScratchRegister(loopCountReg);
1357
// Load -1 if pattern is no found in string or load the character index of the 1st character of pattern in string
1358
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelLoadResult);
1359
1360
if (isUTF16)
1361
{
1362
// Byte-index to char-index conversion
1363
cursor = generateRSInstruction(cg, TR::InstOpCode::SRA, node, matchIndexReg, 1);
1364
iComment("byte-index to char-index conversion");
1365
}
1366
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelResultDone);
1367
1368
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelPatternNotFound);
1369
iComment("pattern was not found in the string");
1370
generateRIInstruction(cg, TR::InstOpCode::LHI, node, matchIndexReg, -1);
1371
1372
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelResultDone);
1373
1374
// Result debug counters
1375
if (enableIndexOfDebugCounter)
1376
{
1377
TR::LabelSymbol* labelResultGT10 = generateLabelSymbol(cg);
1378
TR::LabelSymbol* labelResultGT30 = generateLabelSymbol(cg);
1379
TR::LabelSymbol* labelResultGT60 = generateLabelSymbol(cg);
1380
TR::LabelSymbol* labelResultGT100 = generateLabelSymbol(cg);
1381
TR::LabelSymbol* labelResultCheckDone = generateLabelSymbol(cg);
1382
1383
uint8_t boundary10Char = 10;
1384
uint8_t boundary30Char = 30;
1385
uint8_t boundary60Char = 60;
1386
uint8_t boundary100Char = 100;
1387
1388
generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, patternLenReg, boundary10Char, labelResultGT10, TR::InstOpCode::COND_BH);
1389
cg->generateDebugCounter("indexOfString/result/below-10", 1, TR::DebugCounter::Cheap);
1390
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelResultCheckDone);
1391
1392
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelResultGT10);
1393
generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, patternLenReg, boundary30Char, labelResultGT30, TR::InstOpCode::COND_BH);
1394
cg->generateDebugCounter("indexOfString/result/10-30", 1, TR::DebugCounter::Cheap);
1395
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelResultCheckDone);
1396
1397
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelResultGT30);
1398
generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, patternLenReg, boundary60Char, labelResultGT60, TR::InstOpCode::COND_BH);
1399
cg->generateDebugCounter("indexOfString/result/30-60", 1, TR::DebugCounter::Cheap);
1400
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelResultCheckDone);
1401
1402
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelResultGT60);
1403
generateRIEInstruction(cg, TR::InstOpCode::getCmpImmBranchRelOpCode(), node, patternLenReg, boundary100Char, labelResultGT100, TR::InstOpCode::COND_BH);
1404
cg->generateDebugCounter("indexOfString/result/60-100", 1, TR::DebugCounter::Cheap);
1405
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelResultCheckDone);
1406
1407
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelResultGT100);
1408
cg->generateDebugCounter("indexOfString/result/above-100", 1, TR::DebugCounter::Cheap);
1409
1410
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelResultCheckDone);
1411
}
1412
1413
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelDone, regDeps);
1414
labelDone->setEndInternalControlFlow();
1415
1416
node->setRegister(matchIndexReg);
1417
1418
for (int32_t i = 0; i < node->getNumChildren(); ++i)
1419
{
1420
cg->decReferenceCount(node->getChild(i));
1421
}
1422
cg->stopUsingRegister(stringIndexReg);
1423
srm->stopUsingRegisters();
1424
1425
return matchIndexReg;
1426
}
1427
1428
1429
/** \brief
1430
* Attempts to use vector registers to perform SIMD conversion of characters from lowercase to uppercase.
1431
*
1432
* \detail
1433
* Uses vector registers to convert 16 bytes at a time.
1434
*
1435
* \param node
1436
* The node representing the HW optimized toUpper and toLower recognized calls.
1437
*
1438
* \param cg
1439
* The code generator used to generate the instructions.
1440
*
1441
* \param isToUpper
1442
* Boolean representing case conversion, either to upper or to lower.
1443
*
1444
* \param isCompressedString
1445
* Boolean representing the string's compression.
1446
*
1447
* \return
1448
* A register containing the return value of the Java call. The return value
1449
* will be 1 if the entire contents of the input array was translated and 0 if
1450
* we were unable to translate the entire contents of the array (up to the specified length).
1451
*/
1452
TR::Register * caseConversionHelper(TR::Node* node, TR::CodeGenerator* cg, bool isToUpper, bool isCompressedString)
1453
{
1454
TR::Register* sourceRegister = cg->evaluate(node->getChild(1));
1455
TR::Register* destRegister = cg->evaluate(node->getChild(2));
1456
TR::Register* lengthRegister = cg->gprClobberEvaluate(node->getChild(3));
1457
1458
TR::Register* addressOffset = cg->allocateRegister();
1459
TR::Register* loadLength = cg->allocateRegister();
1460
1461
// Loopcounter register for number of 16 byte conversions, when it is used, the length is not needed anymore
1462
TR::Register* loopCounter = lengthRegister;
1463
1464
TR::Register* charBufferVector = cg->allocateRegister(TR_VRF);
1465
TR::Register* selectionVector = cg->allocateRegister(TR_VRF);
1466
TR::Register* modifiedCaseVector = cg->allocateRegister(TR_VRF);
1467
TR::Register* charOffsetVector = cg->allocateRegister(TR_VRF);
1468
TR::Register* alphaRangeVector = cg->allocateRegister(TR_VRF);
1469
TR::Register* alphaCondVector = cg->allocateRegister(TR_VRF);
1470
TR::Register* invalidRangeVector = cg->allocateRegister(TR_VRF);
1471
TR::Register* invalidCondVector = cg->allocateRegister(TR_VRF);
1472
1473
TR::LabelSymbol* cFlowRegionStart = generateLabelSymbol( cg);
1474
TR::LabelSymbol* fullVectorConversion = generateLabelSymbol( cg);
1475
TR::LabelSymbol* cFlowRegionEnd = generateLabelSymbol( cg);
1476
TR::LabelSymbol* success = generateLabelSymbol( cg);
1477
TR::LabelSymbol* handleInvalidChars = generateLabelSymbol( cg);
1478
TR::LabelSymbol* loop = generateLabelSymbol( cg);
1479
1480
TR::Instruction* cursor;
1481
1482
const int elementSizeMask = (isCompressedString) ? 0x0 : 0x1; // byte or halfword mask
1483
const int32_t sizeOfVector = cg->machine()->getVRFSize();
1484
const bool is64 = cg->comp()->target().is64Bit();
1485
uintptr_t headerSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
1486
1487
TR::RegisterDependencyConditions * regDeps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 13, cg);
1488
regDeps->addPostCondition(sourceRegister, TR::RealRegister::AssignAny);
1489
regDeps->addPostCondition(destRegister, TR::RealRegister::AssignAny);
1490
regDeps->addPostCondition(lengthRegister, TR::RealRegister::AssignAny);
1491
regDeps->addPostCondition(addressOffset, TR::RealRegister::AssignAny);
1492
regDeps->addPostCondition(loadLength, TR::RealRegister::AssignAny);
1493
regDeps->addPostCondition(charBufferVector, TR::RealRegister::AssignAny);
1494
regDeps->addPostCondition(selectionVector, TR::RealRegister::AssignAny);
1495
regDeps->addPostCondition(modifiedCaseVector, TR::RealRegister::AssignAny);
1496
regDeps->addPostCondition(charOffsetVector, TR::RealRegister::AssignAny);
1497
regDeps->addPostCondition(alphaRangeVector, TR::RealRegister::AssignAny);
1498
regDeps->addPostCondition(alphaCondVector, TR::RealRegister::AssignAny);
1499
regDeps->addPostCondition(invalidRangeVector, TR::RealRegister::AssignAny);
1500
regDeps->addPostCondition(invalidCondVector, TR::RealRegister::AssignAny);
1501
1502
generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, addressOffset, addressOffset);
1503
1504
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, alphaRangeVector, 0, 0);
1505
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, alphaCondVector, 0, 0);
1506
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, invalidRangeVector, 0, 0);
1507
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, invalidCondVector, 0, 0);
1508
1509
// Characters a-z (0x61-0x7A) when to upper and A-Z (0x41-0x5A) when to lower
1510
generateVRIaInstruction (cg, TR::InstOpCode::VLEIH, node, alphaRangeVector, isToUpper ? 0x617A : 0x415A, 0x0);
1511
// Characters (0xE0-0xF6) when to upper and (0xC0-0xD6) when to lower
1512
generateVRIaInstruction (cg, TR::InstOpCode::VLEIH, node, alphaRangeVector, isToUpper ? 0xE0F6 : 0xC0D6, 0x1);
1513
// Characters (0xF8-0xFE) when to upper and (0xD8-0xDE) when to lower
1514
generateVRIaInstruction (cg, TR::InstOpCode::VLEIH, node, alphaRangeVector, isToUpper ? 0xF8FE : 0xD8DE, 0X2);
1515
1516
if (!isCompressedString)
1517
{
1518
generateVRRaInstruction(cg, TR::InstOpCode::VUPLH, node, alphaRangeVector, alphaRangeVector, 0, 0, 0, 0);
1519
}
1520
1521
// Condition codes for >= (bits 0 and 2) and <= (bits 0 and 1)
1522
if (isCompressedString)
1523
{
1524
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, alphaCondVector, 0XA0C0, 0X0);
1525
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, alphaCondVector, 0XA0C0, 0X1);
1526
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, alphaCondVector, 0XA0C0, 0X2);
1527
}
1528
else
1529
{
1530
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, alphaCondVector, 0XA000, 0X0);
1531
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, alphaCondVector, 0XC000, 0X1);
1532
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, alphaCondVector, 0XA000, 0X2);
1533
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, alphaCondVector, 0XC000, 0X3);
1534
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, alphaCondVector, 0XA000, 0X4);
1535
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, alphaCondVector, 0XC000, 0X5);
1536
}
1537
1538
if (isToUpper)
1539
{
1540
// Can't uppercase \u00DF (capital sharp s) nor \u00B5 (mu) with a simple addition of 0x20 so we do an equality
1541
// comparison (bit 0) and greater than or equal comparison (bits 0 and 2) for codes larger than or equal to 0xFF
1542
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidRangeVector, 0xDFDF, 0x0);
1543
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidRangeVector, 0xB5B5, 0x1);
1544
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidRangeVector, 0xFFFF, 0x2);
1545
1546
if (isCompressedString)
1547
{
1548
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidCondVector, 0x8080, 0x0);
1549
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidCondVector, 0x8080, 0x1);
1550
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidCondVector, 0xA0A0, 0x2);
1551
}
1552
else
1553
{
1554
generateVRRaInstruction(cg, TR::InstOpCode::VUPLH, node, invalidRangeVector, invalidRangeVector, 0, 0, 0, 0);
1555
1556
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidCondVector, 0x8000, 0x0);
1557
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidCondVector, 0x8000, 0x1);
1558
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidCondVector, 0x8000, 0x2);
1559
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidCondVector, 0x8000, 0x3);
1560
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidCondVector, 0xA000, 0x4);
1561
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidCondVector, 0xA000, 0x5);
1562
}
1563
}
1564
else if (!isCompressedString)
1565
{
1566
// Can't lowercase codes larger than 0xFF but we only need to check this if our input is not compressed since
1567
// all compressed values will be <= 0xFF
1568
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidRangeVector, 0x00FF, 0x0);
1569
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidRangeVector, 0x00FF, 0x1);
1570
1571
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidCondVector, 0x2000, 0x0);
1572
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, invalidCondVector, 0x2000, 0x1);
1573
}
1574
1575
// Constant value of 0x20, used to convert between upper and lower
1576
generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, charOffsetVector, 0x20, elementSizeMask);
1577
1578
generateRRInstruction(cg, TR::InstOpCode::LR, node, loadLength, lengthRegister);
1579
generateRILInstruction(cg, TR::InstOpCode::NILF, node, loadLength, 0xF);
1580
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
1581
cFlowRegionStart->setStartInternalControlFlow();
1582
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BZ, node, fullVectorConversion);
1583
1584
// VLL and VSTL take an index, not a count, so subtract the input length by 1
1585
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, loadLength, 1);
1586
1587
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, charBufferVector, loadLength, generateS390MemoryReference(sourceRegister, headerSize, cg));
1588
1589
// Check for invalid characters, go to fallback individual character conversion implementation
1590
if (isToUpper || !isCompressedString)
1591
{
1592
generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, selectionVector, charBufferVector, invalidRangeVector, invalidCondVector, 0x1 , elementSizeMask);
1593
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, handleInvalidChars);
1594
}
1595
1596
generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, selectionVector, charBufferVector, alphaRangeVector, alphaCondVector, 0x4, elementSizeMask);
1597
generateVRRcInstruction(cg, isToUpper ? TR::InstOpCode::VS : TR::InstOpCode::VA, node, modifiedCaseVector, charBufferVector, charOffsetVector, 0x0, 0x0, elementSizeMask);
1598
generateVRReInstruction(cg, TR::InstOpCode::VSEL, node, modifiedCaseVector, modifiedCaseVector, charBufferVector, selectionVector);
1599
1600
generateVRSbInstruction(cg, TR::InstOpCode::VSTL, node, modifiedCaseVector, loadLength, generateS390MemoryReference(destRegister, headerSize, cg), 0);
1601
1602
// Increment index by the remainder then add 1, since the loadLength contains the highest index, we must go one past that
1603
generateRIEInstruction(cg, TR::InstOpCode::AHIK, node, addressOffset, loadLength, 1);
1604
1605
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, fullVectorConversion);
1606
1607
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node,
1608
lengthRegister, sizeOfVector,
1609
TR::InstOpCode::COND_BL, success, false);
1610
1611
// Set the loopCounter to the amount of groups of 16 bytes left, ignoring already accounted for remainder
1612
generateRSInstruction(cg, TR::InstOpCode::SRL, node, loopCounter, loopCounter, 4);
1613
1614
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, loop);
1615
1616
generateVRXInstruction(cg, TR::InstOpCode::VL, node, charBufferVector, generateS390MemoryReference(sourceRegister, addressOffset, headerSize, cg));
1617
1618
if (isToUpper || !isCompressedString)
1619
{
1620
generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, selectionVector, charBufferVector, invalidRangeVector, invalidCondVector, 0x1 , elementSizeMask);
1621
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, handleInvalidChars);
1622
}
1623
1624
generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, selectionVector, charBufferVector, alphaRangeVector, alphaCondVector, 0x4, elementSizeMask);
1625
generateVRRcInstruction(cg, isToUpper ? TR::InstOpCode::VS : TR::InstOpCode::VA, node, modifiedCaseVector, charBufferVector, charOffsetVector, 0x0, 0x0, elementSizeMask);
1626
generateVRReInstruction(cg, TR::InstOpCode::VSEL, node, modifiedCaseVector, modifiedCaseVector, charBufferVector, selectionVector);
1627
1628
generateVRXInstruction(cg, TR::InstOpCode::VST, node, modifiedCaseVector, generateS390MemoryReference(destRegister, addressOffset, headerSize, cg), 0);
1629
1630
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, addressOffset, generateS390MemoryReference(addressOffset, sizeOfVector, cg));
1631
generateS390BranchInstruction(cg, TR::InstOpCode::BRCT, node, loopCounter, loop);
1632
1633
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, success);
1634
1635
generateRIInstruction(cg, TR::InstOpCode::LHI, node, lengthRegister, 1);
1636
1637
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);
1638
1639
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, handleInvalidChars);
1640
cg->generateDebugCounter(isToUpper? "z13/simd/toUpper/null" : "z13/simd/toLower/null", 1, TR::DebugCounter::Cheap);
1641
generateRRInstruction(cg, TR::InstOpCode::XR, node, lengthRegister, lengthRegister);
1642
1643
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, regDeps);
1644
cFlowRegionEnd->setEndInternalControlFlow();
1645
1646
cg->stopUsingRegister(addressOffset);
1647
cg->stopUsingRegister(loadLength);
1648
1649
cg->stopUsingRegister(charBufferVector);
1650
cg->stopUsingRegister(selectionVector);
1651
cg->stopUsingRegister(modifiedCaseVector);
1652
cg->stopUsingRegister(charOffsetVector);
1653
cg->stopUsingRegister(alphaRangeVector);
1654
cg->stopUsingRegister(alphaCondVector);
1655
cg->stopUsingRegister(invalidRangeVector);
1656
cg->stopUsingRegister(invalidCondVector);
1657
1658
node->setRegister(lengthRegister);
1659
1660
cg->decReferenceCount(node->getChild(0));
1661
cg->decReferenceCount(node->getChild(1));
1662
cg->decReferenceCount(node->getChild(2));
1663
cg->decReferenceCount(node->getChild(3));
1664
1665
return node->getRegister();
1666
}
1667
1668
TR::Register *
1669
J9::Z::TreeEvaluator::inlineIntrinsicIndexOf(TR::Node * node, TR::CodeGenerator * cg, bool isLatin1)
1670
{
1671
cg->generateDebugCounter("z13/simd/indexOf", 1, TR::DebugCounter::Free);
1672
1673
TR::Register* array = cg->evaluate(node->getChild(1));
1674
TR::Register* ch = cg->evaluate(node->getChild(2));
1675
TR::Register* offset = cg->evaluate(node->getChild(3));
1676
TR::Register* length = cg->gprClobberEvaluate(node->getChild(4));
1677
1678
1679
const int32_t sizeOfVector = cg->machine()->getVRFSize();
1680
1681
// load length isn't used after loop, size must is adjusted to become bytes left
1682
TR::Register* loopCounter = length;
1683
TR::Register* loadLength = cg->allocateRegister();
1684
TR::Register* indexRegister = cg->allocateRegister();
1685
TR::Register* offsetAddress = cg->allocateRegister();
1686
TR::Register* scratch = offsetAddress;
1687
1688
TR::Register* charBufferVector = cg->allocateRegister(TR_VRF);
1689
TR::Register* resultVector = cg->allocateRegister(TR_VRF);
1690
TR::Register* valueVector = cg->allocateRegister(TR_VRF);
1691
1692
TR::LabelSymbol* cFlowRegionStart = generateLabelSymbol( cg);
1693
TR::LabelSymbol* loopLabel = generateLabelSymbol( cg);
1694
TR::LabelSymbol* fullVectorLabel = generateLabelSymbol( cg);
1695
TR::LabelSymbol* notFoundInResidue = generateLabelSymbol( cg);
1696
TR::LabelSymbol* foundLabel = generateLabelSymbol( cg);
1697
TR::LabelSymbol* foundLabelExtractedScratch = generateLabelSymbol( cg);
1698
TR::LabelSymbol* failureLabel = generateLabelSymbol( cg);
1699
TR::LabelSymbol* cFlowRegionEnd = generateLabelSymbol( cg);
1700
1701
TR::RegisterDependencyConditions* regDeps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 8, cg);
1702
regDeps->addPostCondition(array, TR::RealRegister::AssignAny);
1703
regDeps->addPostCondition(loopCounter, TR::RealRegister::AssignAny);
1704
regDeps->addPostCondition(indexRegister, TR::RealRegister::AssignAny);
1705
regDeps->addPostCondition(loadLength, TR::RealRegister::AssignAny);
1706
regDeps->addPostCondition(offsetAddress, TR::RealRegister::AssignAny);
1707
regDeps->addPostCondition(charBufferVector, TR::RealRegister::AssignAny);
1708
regDeps->addPostCondition(resultVector, TR::RealRegister::AssignAny);
1709
regDeps->addPostCondition(valueVector, TR::RealRegister::AssignAny);
1710
1711
generateVRRfInstruction(cg, TR::InstOpCode::VLVGP, node, valueVector, offset, ch);
1712
1713
// Byte or halfword mask
1714
const int elementSizeMask = isLatin1 ? 0x0 : 0x1;
1715
generateVRIcInstruction(cg, TR::InstOpCode::VREP, node, valueVector, valueVector, (cg->machine()->getVRFSize() / (1 << elementSizeMask)) - 1, elementSizeMask);
1716
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, resultVector, 0, 0);
1717
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, charBufferVector, 0, 0);
1718
1719
if (cg->comp()->target().is64Bit())
1720
{
1721
generateRREInstruction(cg, TR::InstOpCode::LLGFR, node, indexRegister, offset);
1722
}
1723
else
1724
{
1725
generateRRInstruction(cg, TR::InstOpCode::LR, node, indexRegister, offset);
1726
}
1727
generateRRInstruction(cg, TR::InstOpCode::SR, node, length, offset);
1728
1729
if (!isLatin1)
1730
{
1731
generateRSInstruction(cg, TR::InstOpCode::SLL, node, length, 1);
1732
generateRSInstruction(cg, TR::InstOpCode::SLL, node, indexRegister, 1);
1733
}
1734
1735
generateRRInstruction(cg, TR::InstOpCode::LR, node, loadLength, length);
1736
generateRILInstruction(cg, TR::InstOpCode::NILF, node, loadLength, 0xF);
1737
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
1738
cFlowRegionStart->setStartInternalControlFlow();
1739
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BZ, node, fullVectorLabel);
1740
1741
// VLL takes an index, not a count, so subtract 1 from the count
1742
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, loadLength, 1);
1743
1744
generateRXInstruction(cg, TR::InstOpCode::LA, node, offsetAddress, generateS390MemoryReference(array, indexRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
1745
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, charBufferVector, loadLength, generateS390MemoryReference(offsetAddress, 0, cg));
1746
1747
generateVRRbInstruction(cg, TR::InstOpCode::VFEE, node, resultVector, charBufferVector, valueVector, 0x1, elementSizeMask);
1748
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK1, node, notFoundInResidue);
1749
1750
generateVRScInstruction(cg, TR::InstOpCode::VLGV, node, scratch, resultVector, generateS390MemoryReference(7, cg), 0);
1751
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CR, node,
1752
scratch, loadLength,
1753
TR::InstOpCode::COND_BNH, foundLabelExtractedScratch);
1754
1755
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, notFoundInResidue);
1756
1757
// Increment index by loaded length + 1, since we subtracted 1 earlier
1758
generateRIEInstruction(cg, TR::InstOpCode::AHIK, node, loadLength, loadLength, 1);
1759
generateRRInstruction(cg, TR::InstOpCode::AR, node, indexRegister, loadLength);
1760
1761
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, fullVectorLabel);
1762
1763
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node,
1764
length, sizeOfVector,
1765
TR::InstOpCode::COND_BL, failureLabel);
1766
1767
// Set loopcounter to 1/16 of the length, remainder has already been accounted for
1768
generateRSInstruction(cg, TR::InstOpCode::SRL, node, loopCounter, loopCounter, 4);
1769
1770
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, loopLabel);
1771
1772
generateVRXInstruction(cg, TR::InstOpCode::VL, node, charBufferVector, generateS390MemoryReference(array, indexRegister, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
1773
1774
generateVRRbInstruction(cg, TR::InstOpCode::VFEE, node, resultVector, charBufferVector, valueVector, 0x1, elementSizeMask);
1775
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK4, node, foundLabel);
1776
1777
generateRILInstruction(cg, TR::InstOpCode::AFI, node, indexRegister, cg->machine()->getVRFSize());
1778
1779
generateS390BranchInstruction(cg, TR::InstOpCode::BRCT, node, loopCounter, loopLabel);
1780
1781
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, failureLabel);
1782
generateRIInstruction(cg, TR::InstOpCode::LHI, node, indexRegister, 0xFFFF);
1783
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_B, node, cFlowRegionEnd);
1784
1785
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, foundLabel);
1786
generateVRScInstruction(cg, TR::InstOpCode::VLGV, node, scratch, resultVector, generateS390MemoryReference(7, cg), 0);
1787
1788
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, foundLabelExtractedScratch);
1789
generateRRInstruction(cg, TR::InstOpCode::AR, node, indexRegister, scratch);
1790
1791
if (!isLatin1)
1792
{
1793
generateRSInstruction(cg, TR::InstOpCode::SRL, node, indexRegister, indexRegister, 1);
1794
}
1795
1796
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, regDeps);
1797
cFlowRegionEnd->setEndInternalControlFlow();
1798
1799
cg->stopUsingRegister(loopCounter);
1800
cg->stopUsingRegister(loadLength);
1801
cg->stopUsingRegister(offsetAddress);
1802
1803
cg->stopUsingRegister(charBufferVector);
1804
cg->stopUsingRegister(resultVector);
1805
cg->stopUsingRegister(valueVector);
1806
1807
node->setRegister(indexRegister);
1808
1809
cg->recursivelyDecReferenceCount(node->getChild(0));
1810
cg->decReferenceCount(node->getChild(1));
1811
cg->decReferenceCount(node->getChild(2));
1812
cg->decReferenceCount(node->getChild(3));
1813
cg->decReferenceCount(node->getChild(4));
1814
1815
return indexRegister;
1816
}
1817
1818
TR::Register*
1819
J9::Z::TreeEvaluator::inlineUTF16BEEncode(TR::Node *node, TR::CodeGenerator *cg)
1820
{
1821
TR::Compilation* comp = cg->comp();
1822
1823
// Create the necessary registers
1824
TR::Register* output = cg->gprClobberEvaluate(node->getChild(1));
1825
TR::Register* input = cg->gprClobberEvaluate(node->getChild(0));
1826
1827
TR::Register* inputLen = cg->gprClobberEvaluate(node->getChild(2));
1828
TR::Register* inputLen8 = cg->allocateRegister();
1829
1830
TR::Register* temp1 = cg->allocateRegister();
1831
TR::Register* temp2 = cg->allocateRegister();
1832
1833
// Number of bytes currently translated (also used as a stride register)
1834
TR::Register* translated = cg->allocateRegister();
1835
1836
// Convert input length in number of characters to number of bytes
1837
generateRSInstruction(cg, TR::InstOpCode::getShiftLeftLogicalSingleOpCode(), node, inputLen, inputLen, 1);
1838
1839
// Calculate inputLen8 = inputLen / 8
1840
generateRSInstruction(cg, TR::InstOpCode::SRLK, node, inputLen8, inputLen, 3);
1841
1842
// Initialize the number of translated bytes to 0
1843
generateRREInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, translated, translated);
1844
1845
// Create the necessary labels
1846
TR::LabelSymbol * processChar4 = generateLabelSymbol( cg);
1847
TR::LabelSymbol * processChar4End = generateLabelSymbol( cg);
1848
TR::LabelSymbol * processChar1 = generateLabelSymbol( cg);
1849
TR::LabelSymbol * processChar1End = generateLabelSymbol( cg);
1850
TR::LabelSymbol * processChar1Copy = generateLabelSymbol( cg);
1851
1852
const uint16_t surrogateRange1 = 0xD800;
1853
const uint16_t surrogateRange2 = 0xDFFF;
1854
1855
const uint32_t surrogateMaskAND = 0xF800F800;
1856
const uint32_t surrogateMaskXOR = 0xD800D800;
1857
1858
TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 7, cg);
1859
1860
// ----------------- Incoming branch -----------------
1861
1862
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processChar4);
1863
processChar4->setStartInternalControlFlow();
1864
1865
// Branch to the end if there are no more multiples of 4 chars left to process
1866
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, inputLen8, 0, TR::InstOpCode::COND_MASK8, processChar4End, false, false, NULL, dependencies);
1867
1868
// Load 4 input characters from memory and make a copy
1869
generateRXInstruction(cg, TR::InstOpCode::LG, node, temp1, generateS390MemoryReference(input, translated, 0, cg));
1870
generateRREInstruction(cg, TR::InstOpCode::LGR, node, temp2, temp1);
1871
1872
// AND temp2 by the surrogate mask
1873
generateRILInstruction(cg, TR::InstOpCode::NIHF, node, temp2, surrogateMaskAND);
1874
generateRILInstruction(cg, TR::InstOpCode::NILF, node, temp2, surrogateMaskAND);
1875
1876
// XOR temp2 by the surrogate mask and branch if CC = 1 (meaning there is a surrogate)
1877
generateRILInstruction(cg, TR::InstOpCode::XIHF, node, temp2, surrogateMaskXOR);
1878
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processChar4End);
1879
generateRILInstruction(cg, TR::InstOpCode::XILF, node, temp2, surrogateMaskXOR);
1880
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processChar4End);
1881
1882
generateRXInstruction(cg, TR::InstOpCode::STG, node, temp1, generateS390MemoryReference(output, translated, 0, cg));
1883
1884
// Advance the number of bytes processed
1885
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, translated, 8);
1886
1887
// Branch back to the start of the loop
1888
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK15, node, processChar4);
1889
1890
// ----------------- Incoming branch -----------------
1891
1892
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processChar4End);
1893
processChar4End->setEndInternalControlFlow();
1894
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processChar1);
1895
processChar1->setStartInternalControlFlow();
1896
1897
// Branch to the end if there are no more characters left to process
1898
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, translated, inputLen, TR::InstOpCode::COND_BNL, processChar1End, false, false);
1899
1900
// Load an input character from memory
1901
generateRXInstruction(cg, TR::InstOpCode::LLH, node, temp1, generateS390MemoryReference(input, translated, 0, cg));
1902
1903
// Compare the input character against the lower bound surrogate character range
1904
generateRILInstruction(cg, TR::InstOpCode::getCmpImmOpCode(), node, temp1, surrogateRange1);
1905
1906
// Branch if < (non-surrogate char)
1907
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK4, node, processChar1Copy);
1908
1909
// Compare the input character against the upper bound surrogate character range
1910
generateRILInstruction(cg, TR::InstOpCode::getCmpImmOpCode(), node, temp1, surrogateRange2);
1911
1912
// Branch if > (non-surrogate char)
1913
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK2, node, processChar1Copy);
1914
1915
// If we get here it must be a surrogate char
1916
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK15, node, processChar1End);
1917
1918
// ----------------- Incoming branch -----------------
1919
1920
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processChar1Copy);
1921
1922
// Store the lower byte of the character into the output buffer
1923
generateRXInstruction (cg, TR::InstOpCode::STH, node, temp1, generateS390MemoryReference(output, translated, 0, cg));
1924
1925
// Advance the number of bytes processed
1926
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, translated, 2);
1927
1928
// Branch back to the start of the loop
1929
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK15, node, processChar1);
1930
1931
// Set up the proper register dependencies
1932
dependencies->addPostCondition(input, TR::RealRegister::AssignAny);
1933
dependencies->addPostCondition(inputLen, TR::RealRegister::AssignAny);
1934
dependencies->addPostCondition(inputLen8, TR::RealRegister::AssignAny);
1935
dependencies->addPostCondition(temp1, TR::RealRegister::AssignAny);
1936
dependencies->addPostCondition(temp2, TR::RealRegister::AssignAny);
1937
dependencies->addPostCondition(output, TR::RealRegister::AssignAny);
1938
dependencies->addPostCondition(translated, TR::RealRegister::AssignAny);
1939
1940
// ----------------- Incoming branch -----------------
1941
1942
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processChar1End, dependencies);
1943
processChar1End->setEndInternalControlFlow();
1944
1945
// Convert translated length in number of bytes to number of characters
1946
generateRSInstruction(cg, TR::InstOpCode::getShiftRightLogicalSingleOpCode(), node, translated, translated, 1);
1947
1948
// Cleanup nodes before returning
1949
cg->decReferenceCount(node->getChild(0));
1950
cg->decReferenceCount(node->getChild(1));
1951
cg->decReferenceCount(node->getChild(2));
1952
1953
// Cleanup registers before returning
1954
cg->stopUsingRegister(input);
1955
cg->stopUsingRegister(inputLen);
1956
cg->stopUsingRegister(inputLen8);
1957
cg->stopUsingRegister(temp1);
1958
cg->stopUsingRegister(temp2);
1959
cg->stopUsingRegister(output);
1960
1961
return node->setRegister(translated);
1962
}
1963
1964
TR::Register*
1965
J9::Z::TreeEvaluator::inlineUTF16BEEncodeSIMD(TR::Node *node, TR::CodeGenerator *cg)
1966
{
1967
TR::Compilation* comp = cg->comp();
1968
1969
// Create the necessary registers
1970
TR::Register* output = cg->gprClobberEvaluate(node->getChild(1));
1971
TR::Register* input = cg->gprClobberEvaluate(node->getChild(0));
1972
1973
TR::Register* inputLen;
1974
TR::Register* inputLen16 = cg->allocateRegister();
1975
TR::Register* inputLenMinus1 = inputLen16;
1976
1977
// Number of characters currently translated
1978
TR::Register* translated = cg->allocateRegister();
1979
1980
// Initialize the number of translated characters to 0
1981
generateRREInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, translated, translated);
1982
1983
TR::Node* inputLenNode = node->getChild(2);
1984
1985
// Optimize the constant length case
1986
bool isLenConstant = inputLenNode->getOpCode().isLoadConst() && performTransformation(comp, "O^O [%p] Reduce input length to constant.\n", inputLenNode);
1987
1988
if (isLenConstant)
1989
{
1990
inputLen = cg->allocateRegister();
1991
1992
// Convert input length in number of characters to number of bytes
1993
generateLoad32BitConstant(cg, inputLenNode, ((getIntegralValue(inputLenNode) * 2)), inputLen, true);
1994
generateLoad32BitConstant(cg, inputLenNode, ((getIntegralValue(inputLenNode) * 2) >> 4) << 4, inputLen16, true);
1995
}
1996
else
1997
{
1998
inputLen = cg->gprClobberEvaluate(inputLenNode, true);
1999
2000
// Convert input length in number of characters to number of bytes
2001
generateRSInstruction(cg, TR::InstOpCode::getShiftLeftLogicalSingleOpCode(), node, inputLen, inputLen, 1);
2002
2003
// Sign extend the value if needed
2004
if (cg->comp()->target().is64Bit() && !(inputLenNode->getOpCode().isLong()))
2005
{
2006
generateRRInstruction(cg, TR::InstOpCode::getLoadRegWidenOpCode(), node, inputLen, inputLen);
2007
generateRRInstruction(cg, TR::InstOpCode::getLoadRegWidenOpCode(), node, inputLen16, inputLen);
2008
}
2009
else
2010
{
2011
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, inputLen16, inputLen);
2012
}
2013
2014
// Truncate the 4 right most bits
2015
generateRIInstruction(cg, TR::InstOpCode::NILL, node, inputLen16, static_cast <int16_t> (0xFFF0));
2016
}
2017
2018
// Create the necessary vector registers
2019
TR::Register* vInput = cg->allocateRegister(TR_VRF);
2020
TR::Register* vSurrogate = cg->allocateRegister(TR_VRF); // Track index of first surrogate char
2021
2022
TR::Register* vRange = cg->allocateRegister(TR_VRF);
2023
TR::Register* vRangeControl = cg->allocateRegister(TR_VRF);
2024
2025
// Initialize the vector registers
2026
uint16_t surrogateRange1 = 0xD800;
2027
uint16_t surrogateRange2 = 0xDFFF;
2028
2029
uint16_t surrogateControl1 = 0xA000; // >= comparison
2030
uint16_t surrogateControl2 = 0xC000; // <= comparison
2031
2032
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, vRange, 0, 0 /*unused*/);
2033
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, vRangeControl, 0, 0 /*unused*/);
2034
2035
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, vRange, surrogateRange1, 0);
2036
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, vRange, surrogateRange2, 1);
2037
2038
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, vRangeControl, surrogateControl1, 0);
2039
generateVRIaInstruction(cg, TR::InstOpCode::VLEIH, node, vRangeControl, surrogateControl2, 1);
2040
2041
// Create the necessary labels
2042
TR::LabelSymbol * process8Chars = generateLabelSymbol(cg);
2043
TR::LabelSymbol * process8CharsEnd = generateLabelSymbol(cg);
2044
2045
TR::LabelSymbol * processUnder8Chars = generateLabelSymbol(cg);
2046
TR::LabelSymbol * processUnder8CharsEnd = generateLabelSymbol(cg);
2047
2048
TR::LabelSymbol * processSurrogate = generateLabelSymbol(cg);
2049
TR::LabelSymbol * processSurrogateEnd = generateLabelSymbol(cg);
2050
2051
// Branch to the end if there are no more multiples of 8 chars left to process
2052
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, inputLen16, 0, TR::InstOpCode::COND_MASK8, process8CharsEnd, false, false);
2053
2054
// ----------------- Incoming branch -----------------
2055
2056
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, process8Chars);
2057
process8Chars->setStartInternalControlFlow();
2058
2059
// Load 16 bytes (8 chars) into vector register
2060
generateVRXInstruction(cg, TR::InstOpCode::VL, node, vInput, generateS390MemoryReference(input, translated, 0, cg));
2061
2062
// Check for vector surrogates and branch to copy the non-surrogate bytes
2063
generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, vSurrogate, vInput, vRange, vRangeControl, 0x1, 1);
2064
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processSurrogate);
2065
2066
// Store the result
2067
generateVRXInstruction(cg, TR::InstOpCode::VST, node, vInput, generateS390MemoryReference(output, translated, 0, cg));
2068
2069
// Advance the stride register
2070
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, translated, 16);
2071
2072
// Loop back if there is at least 8 chars left to process
2073
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, translated, inputLen16, TR::InstOpCode::COND_BL, process8Chars, false, false);
2074
2075
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, process8CharsEnd);
2076
process8CharsEnd->setEndInternalControlFlow();
2077
2078
// ----------------- Incoming branch -----------------
2079
2080
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processUnder8Chars);
2081
processUnder8Chars->setStartInternalControlFlow();
2082
2083
// Calculate the number of residue bytes available
2084
generateRRInstruction(cg, TR::InstOpCode::getSubstractRegOpCode(), node, inputLen, translated);
2085
2086
// Branch to the end if there is no residue
2087
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC0, node, processUnder8CharsEnd);
2088
2089
// VLL and VSTL work on indices so we must subtract 1
2090
generateRIEInstruction(cg, TR::InstOpCode::getAddLogicalRegRegImmediateOpCode(), node, inputLenMinus1, inputLen, -1);
2091
2092
// Zero out the input register to avoid invalid VSTRC result
2093
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, vInput, 0, 0 /*unused*/);
2094
2095
// VLL instruction can only handle memory references of type D(B), so increment the base input address
2096
generateRRInstruction (cg, TR::InstOpCode::getAddRegOpCode(), node, input, translated);
2097
2098
// Load residue bytes into vector register
2099
generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, vInput, inputLenMinus1, generateS390MemoryReference(input, 0, cg));
2100
2101
// Check for vector surrogates and branch to copy the non-surrogate bytes
2102
generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, vSurrogate, vInput, vRange, vRangeControl, 0x1, 1);
2103
2104
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC3, node, processSurrogateEnd);
2105
2106
// ----------------- Incoming branch -----------------
2107
2108
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processSurrogate);
2109
2110
// Extract the index of the first surrogate char
2111
generateVRScInstruction(cg, TR::InstOpCode::VLGV, node, inputLen, vSurrogate, generateS390MemoryReference(7, cg), 0);
2112
2113
// Return in the case of saturation at index 0
2114
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, inputLen, 0, TR::InstOpCode::COND_CC0, processUnder8CharsEnd, false, false);
2115
2116
// VLL and VSTL work on indices so we must subtract 1
2117
generateRIEInstruction(cg, TR::InstOpCode::getAddLogicalRegRegImmediateOpCode(), node, inputLenMinus1, inputLen, -1);
2118
2119
// ----------------- Incoming branch -----------------
2120
2121
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processSurrogateEnd);
2122
2123
// VSTL instruction can only handle memory references of type D(B), so increment the base output address
2124
generateRRInstruction (cg, TR::InstOpCode::getAddRegOpCode(), node, output, translated);
2125
2126
// Store the result
2127
generateVRSbInstruction(cg, TR::InstOpCode::VSTL, node, vInput, inputLenMinus1, generateS390MemoryReference(output, 0, cg), 0);
2128
2129
// Advance the stride register
2130
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, translated, inputLen);
2131
2132
// Set up the proper register dependencies
2133
TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 9, cg);
2134
2135
dependencies->addPostCondition(input, TR::RealRegister::AssignAny);
2136
dependencies->addPostCondition(inputLen, TR::RealRegister::AssignAny);
2137
dependencies->addPostCondition(inputLen16, TR::RealRegister::AssignAny);
2138
dependencies->addPostCondition(output, TR::RealRegister::AssignAny);
2139
dependencies->addPostCondition(translated, TR::RealRegister::AssignAny);
2140
2141
dependencies->addPostCondition(vInput, TR::RealRegister::AssignAny);
2142
dependencies->addPostCondition(vSurrogate, TR::RealRegister::AssignAny);
2143
dependencies->addPostCondition(vRange, TR::RealRegister::AssignAny);
2144
dependencies->addPostCondition(vRangeControl, TR::RealRegister::AssignAny);
2145
2146
// ----------------- Incoming branch -----------------
2147
2148
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processUnder8CharsEnd, dependencies);
2149
processUnder8CharsEnd->setEndInternalControlFlow();
2150
2151
// Convert translated length in number of bytes to number of characters
2152
generateRSInstruction(cg, TR::InstOpCode::getShiftRightLogicalSingleOpCode(), node, translated, translated, 1);
2153
2154
// Cleanup nodes before returning
2155
cg->decReferenceCount(node->getChild(0));
2156
cg->decReferenceCount(node->getChild(1));
2157
cg->decReferenceCount(node->getChild(2));
2158
2159
// Cleanup registers before returning
2160
cg->stopUsingRegister(input);
2161
cg->stopUsingRegister(inputLen);
2162
cg->stopUsingRegister(inputLen16);
2163
cg->stopUsingRegister(output);
2164
2165
cg->stopUsingRegister(vInput);
2166
cg->stopUsingRegister(vSurrogate);
2167
cg->stopUsingRegister(vRange);
2168
cg->stopUsingRegister(vRangeControl);
2169
2170
return node->setRegister(translated);
2171
}
2172
2173
TR::Register*
2174
J9::Z::TreeEvaluator::inlineStringHashCode(TR::Node* node, TR::CodeGenerator* cg, bool isCompressed)
2175
{
2176
TR::Compilation* comp = cg->comp();
2177
//stringSize = Number of bytes to load to process 4 characters in SIMD loop
2178
//terminateVal = SIMD loop cotroller allowing characters in multiple of 4 to be processes by loop
2179
//VLLEZ instruction will load word(compressed String) or double word (decompressed String), elementSize is used for that
2180
const short stringSize = (isCompressed ? 4 : 8);
2181
const short terminateVal = (isCompressed ? 3 : 6);
2182
const short elementSize = (isCompressed ? 2 : 3);
2183
2184
TR::Node* nodeValue = node->getChild(0);
2185
TR::Node* nodeIndex = node->getChild(1);
2186
TR::Node* nodeCount = node->getChild(2);
2187
2188
// Create the necessary labels
2189
TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);
2190
2191
TR::LabelSymbol * labelVector = generateLabelSymbol(cg);
2192
TR::LabelSymbol * labelVectorLoop = generateLabelSymbol(cg);
2193
TR::LabelSymbol * labelVectorReduce = generateLabelSymbol(cg);
2194
2195
TR::LabelSymbol * labelSerial = generateLabelSymbol(cg);
2196
2197
TR::LabelSymbol * labelSerialLoop = generateLabelSymbol(cg);
2198
2199
TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(cg);
2200
2201
// Create the necessary registers
2202
TR::Register* registerHash = cg->allocateRegister();
2203
2204
TR::Register* registerValue = cg->evaluate(nodeValue);
2205
TR::Register* registerIndex = cg->gprClobberEvaluate(nodeIndex);
2206
TR::Register* registerCount = cg->gprClobberEvaluate(nodeCount);
2207
2208
if (cg->comp()->target().is64Bit())
2209
{
2210
generateRRInstruction(cg, TR::InstOpCode::getLoadRegWidenOpCode(), node, registerIndex, registerIndex);
2211
generateRRInstruction(cg, TR::InstOpCode::getLoadRegWidenOpCode(), node, registerCount, registerCount);
2212
}
2213
2214
TR::Register* registerVA = cg->allocateRegister(TR_VRF);
2215
TR::Register* registerVB = cg->allocateRegister(TR_VRF);
2216
TR::Register* registerVC = cg->allocateRegister(TR_VRF);
2217
2218
TR::Register* registerEnd = cg->allocateRegister(TR_GPR);
2219
2220
TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 12, cg);
2221
2222
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
2223
cFlowRegionStart->setStartInternalControlFlow();
2224
2225
if(!isCompressed)
2226
{
2227
// registerIndex *= 2 and registerCount *= 2
2228
generateRSInstruction(cg, TR::InstOpCode::getShiftLeftLogicalSingleOpCode(), node, registerIndex, registerIndex, 1);
2229
generateRSInstruction(cg, TR::InstOpCode::getShiftLeftLogicalSingleOpCode(), node, registerCount, registerCount, 1);
2230
}
2231
2232
// registerEnd = registerIndex + registerCount
2233
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerEnd, generateS390MemoryReference(registerIndex, registerCount, 0, cg));
2234
2235
// registerHash = 0
2236
generateRREInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, registerHash, registerHash);
2237
2238
// Branch to labelSerial if registerCount < stringSize
2239
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, registerCount, static_cast<int32_t>(stringSize), TR::InstOpCode::COND_MASK4, labelSerial, false, false);
2240
2241
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelVector);
2242
2243
// registerEnd -= terminateVal
2244
generateRILInstruction(cg, TR::InstOpCode::getSubtractLogicalImmOpCode(), node, registerEnd, terminateVal);
2245
2246
// snippetData1 = [31^4, 31^4, 31^4, 31^4]
2247
int32_t snippetData1[4] = {923521, 923521, 923521, 923521};
2248
2249
TR::MemoryReference* memrefSnippet1 = generateS390MemoryReference(cg->findOrCreateConstant(node, snippetData1, 16), cg, 0, node);
2250
2251
dependencies->addAssignAnyPostCondOnMemRef(memrefSnippet1);
2252
2253
// registerVA = snippetData1
2254
generateVRXInstruction(cg, TR::InstOpCode::VL, node, registerVA, memrefSnippet1);
2255
2256
// registerVB = 0
2257
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, registerVB, 0, 0 /*unused*/);
2258
2259
// ----------------- Incoming branch -----------------
2260
2261
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelVectorLoop);
2262
2263
// registerVC = 4 consecutive chars (16 bit shorts or 8 bit bytes depending on String Compression) at the current index
2264
generateVRXInstruction(cg, TR::InstOpCode::VLLEZ, node, registerVC, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), elementSize);
2265
2266
if (!isCompressed)
2267
{
2268
// registerVC = unpack 4 (16 bit) short elements into 4 (32 bit) int elements
2269
generateVRRaInstruction(cg, TR::InstOpCode::VUPLH, node, registerVC, registerVC, 0, 0, 1);
2270
}
2271
else
2272
{
2273
// registerVC = unpack 4 (8 bit) byte elements into 4 (32 bit) int elements
2274
generateVRRaInstruction(cg, TR::InstOpCode::VUPLH, node, registerVC, registerVC, 0, 0, 0);
2275
generateVRRaInstruction(cg, TR::InstOpCode::VUPLL, node, registerVC, registerVC, 0, 0, 1);
2276
}
2277
2278
// registerVB = registerVB * registerVA + registerVC
2279
generateVRRdInstruction(cg, TR::InstOpCode::VMAL, node, registerVB, registerVB, registerVA, registerVC, 0, 2);
2280
2281
// registerIndex += stringSize
2282
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerIndex, generateS390MemoryReference(registerIndex, stringSize, cg));
2283
2284
// Branch to labelVectorLoop if registerIndex < registerEnd
2285
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalRegOpCode(), node, registerIndex, registerEnd, TR::InstOpCode::COND_MASK4, labelVectorLoop, false, false);
2286
2287
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelVectorReduce);
2288
2289
// snippetData2 = [31^3, 31^2, 31^1, 31^0]
2290
int32_t snippetData2[4] = {29791, 961, 31, 1};
2291
2292
TR::MemoryReference* memrefSnippet2 = generateS390MemoryReference(cg->findOrCreateConstant(node, snippetData2, 16), cg, 0, node);
2293
2294
dependencies->addAssignAnyPostCondOnMemRef(memrefSnippet2);
2295
2296
// registerVA = snippetData2
2297
generateVRXInstruction(cg, TR::InstOpCode::VL, node, registerVA, memrefSnippet2);
2298
2299
// registerVB = registerVB * registerVA
2300
generateVRRcInstruction(cg, TR::InstOpCode::VML, node, registerVB, registerVB, registerVA, 2);
2301
2302
// registerVA = 0
2303
generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, registerVA, 0, 0 /*unused*/);
2304
2305
// registerVA = sum of 4 (32 bit) int elements
2306
generateVRRcInstruction(cg, TR::InstOpCode::VSUMQ, node, registerVA, registerVB, registerVA, 0, 0, 2);
2307
2308
// registerEnd += terminateVal
2309
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerEnd, generateS390MemoryReference(registerEnd, terminateVal, cg));
2310
2311
generateVRScInstruction(cg, TR::InstOpCode::VLGV, node, registerHash, registerVA, generateS390MemoryReference(3, cg), 2);
2312
2313
// ----------------- Incoming branch -----------------
2314
2315
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelSerial);
2316
labelSerial->setEndInternalControlFlow();
2317
2318
// Branch to labelEnd if registerIndex >= registerEnd
2319
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalRegOpCode(), node, registerIndex, registerEnd, TR::InstOpCode::COND_MASK10, cFlowRegionEnd, false, false);
2320
2321
// ----------------- Incoming branch -----------------
2322
2323
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelSerialLoop);
2324
labelSerialLoop->setStartInternalControlFlow();
2325
2326
TR::Register* registerTemp = registerCount;
2327
2328
// registerTemp = registerHash << 5
2329
generateRSInstruction(cg, TR::InstOpCode::SLLK, node, registerTemp, registerHash, 5);
2330
2331
// registerTemp -= registerHash
2332
generateRRInstruction(cg, TR::InstOpCode::getSubstractRegOpCode(), node, registerTemp, registerHash);
2333
2334
// registerHash = char at registerIndex
2335
if(isCompressed)
2336
generateRXInstruction(cg, TR::InstOpCode::LLGC, node, registerHash, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
2337
else
2338
generateRXInstruction(cg, TR::InstOpCode::LLH, node, registerHash, generateS390MemoryReference(registerValue, registerIndex, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg));
2339
2340
if(isCompressed) //registerIndex += 1
2341
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerIndex, generateS390MemoryReference(registerIndex, 1, cg));
2342
else //registerIndex += 2
2343
generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, registerIndex, generateS390MemoryReference(registerIndex, 2, cg));
2344
2345
2346
// registerHash += registerTemp
2347
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, registerHash, registerTemp);
2348
2349
// Branch to labelSerialLoop if registerIndex < registerEnd
2350
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalRegOpCode(), node, registerIndex, registerEnd, TR::InstOpCode::COND_MASK4, labelSerialLoop, false, false);
2351
2352
// Set up the proper register dependencies
2353
dependencies->addPostConditionIfNotAlreadyInserted(registerValue, TR::RealRegister::AssignAny);
2354
dependencies->addPostConditionIfNotAlreadyInserted(registerIndex, TR::RealRegister::AssignAny);
2355
dependencies->addPostConditionIfNotAlreadyInserted(registerCount, TR::RealRegister::AssignAny);
2356
2357
dependencies->addPostConditionIfNotAlreadyInserted(registerHash, TR::RealRegister::AssignAny);
2358
dependencies->addPostConditionIfNotAlreadyInserted(registerEnd, TR::RealRegister::AssignAny);
2359
2360
dependencies->addPostConditionIfNotAlreadyInserted(registerVA, TR::RealRegister::AssignAny);
2361
dependencies->addPostConditionIfNotAlreadyInserted(registerVB, TR::RealRegister::AssignAny);
2362
dependencies->addPostConditionIfNotAlreadyInserted(registerVC, TR::RealRegister::AssignAny);
2363
2364
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
2365
cFlowRegionEnd->setEndInternalControlFlow();
2366
2367
// Cleanup nodes before returning
2368
cg->decReferenceCount(nodeValue);
2369
cg->decReferenceCount(nodeIndex);
2370
cg->decReferenceCount(nodeCount);
2371
2372
// Cleanup registers before returning
2373
cg->stopUsingRegister(registerValue);
2374
cg->stopUsingRegister(registerIndex);
2375
cg->stopUsingRegister(registerCount);
2376
2377
cg->stopUsingRegister(registerEnd);
2378
2379
cg->stopUsingRegister(registerVA);
2380
cg->stopUsingRegister(registerVB);
2381
cg->stopUsingRegister(registerVC);
2382
2383
return node->setRegister(registerHash);
2384
}
2385
2386
TR::Register*
2387
J9::Z::TreeEvaluator::toUpperIntrinsic(TR::Node *node, TR::CodeGenerator *cg, bool isCompressedString)
2388
{
2389
cg->generateDebugCounter("z13/simd/toUpper", 1, TR::DebugCounter::Free);
2390
return caseConversionHelper(node, cg, true, isCompressedString);
2391
}
2392
2393
TR::Register*
2394
J9::Z::TreeEvaluator::toLowerIntrinsic(TR::Node *node, TR::CodeGenerator *cg, bool isCompressedString)
2395
{
2396
cg->generateDebugCounter("z13/simd/toLower", 1, TR::DebugCounter::Free);
2397
return caseConversionHelper(node, cg, false, isCompressedString);
2398
}
2399
2400
TR::Register*
2401
J9::Z::TreeEvaluator::inlineDoubleMax(TR::Node *node, TR::CodeGenerator *cg)
2402
{
2403
cg->generateDebugCounter("z13/simd/doubleMax", 1, TR::DebugCounter::Free);
2404
return doubleMaxMinHelper(node, cg, true);
2405
}
2406
2407
TR::Register*
2408
J9::Z::TreeEvaluator::inlineDoubleMin(TR::Node *node, TR::CodeGenerator *cg)
2409
{
2410
cg->generateDebugCounter("z13/simd/doubleMin", 1, TR::DebugCounter::Free);
2411
return doubleMaxMinHelper(node, cg, false);
2412
}
2413
2414
TR::Register *
2415
J9::Z::TreeEvaluator::inlineMathFma(TR::Node *node, TR::CodeGenerator *cg)
2416
{
2417
TR_ASSERT_FATAL(node->getNumChildren() == 3,
2418
"In function inlineMathFma, the node at address %p should have exactly 3 children, but got %u instead", node, node->getNumChildren());
2419
2420
TR::Register * targetRegister = cg->allocateRegister(TR_FPR);
2421
2422
TR::Register * v1 = cg->evaluate(node->getFirstChild());
2423
TR::Register * v2 = cg->evaluate(node->getSecondChild());
2424
TR::Register * v3 = cg->evaluate(node->getThirdChild());
2425
2426
uint8_t mask6 = getVectorElementSizeMask(TR::DataType::getSize(node->getDataType()));
2427
generateVRReInstruction(cg, TR::InstOpCode::VFMA, node, targetRegister, v1, v2, v3, mask6, 0);
2428
2429
node->setRegister(targetRegister);
2430
2431
cg->decReferenceCount(node->getFirstChild());
2432
cg->decReferenceCount(node->getSecondChild());
2433
cg->decReferenceCount(node->getThirdChild());
2434
2435
return targetRegister;
2436
}
2437
2438
/*
2439
* J9 S390 specific tree evaluator table overrides
2440
*/
2441
extern void TEMPORARY_initJ9S390TreeEvaluatorTable(TR::CodeGenerator *cg)
2442
{
2443
TR_TreeEvaluatorFunctionPointer *tet = cg->getTreeEvaluatorTable();
2444
2445
tet[TR::monent] = TR::TreeEvaluator::monentEvaluator;
2446
tet[TR::monexit] = TR::TreeEvaluator::monexitEvaluator;
2447
tet[TR::monexitfence] = TR::TreeEvaluator::monexitfenceEvaluator;
2448
tet[TR::asynccheck] = TR::TreeEvaluator::asynccheckEvaluator;
2449
tet[TR::instanceof] = TR::TreeEvaluator::instanceofEvaluator;
2450
tet[TR::checkcast] = TR::TreeEvaluator::checkcastEvaluator;
2451
tet[TR::checkcastAndNULLCHK] = TR::TreeEvaluator::checkcastAndNULLCHKEvaluator;
2452
tet[TR::New] = TR::TreeEvaluator::newObjectEvaluator;
2453
tet[TR::variableNew] = TR::TreeEvaluator::newObjectEvaluator;
2454
tet[TR::newarray] = TR::TreeEvaluator::newArrayEvaluator;
2455
tet[TR::anewarray] = TR::TreeEvaluator::anewArrayEvaluator;
2456
tet[TR::variableNewArray] = TR::TreeEvaluator::anewArrayEvaluator;
2457
tet[TR::multianewarray] = TR::TreeEvaluator::multianewArrayEvaluator;
2458
tet[TR::arraylength] = TR::TreeEvaluator::arraylengthEvaluator;
2459
tet[TR::ResolveCHK] = TR::TreeEvaluator::resolveCHKEvaluator;
2460
tet[TR::DIVCHK] = TR::TreeEvaluator::DIVCHKEvaluator;
2461
tet[TR::BNDCHK] = TR::TreeEvaluator::BNDCHKEvaluator;
2462
tet[TR::ArrayCopyBNDCHK] = TR::TreeEvaluator::ArrayCopyBNDCHKEvaluator;
2463
tet[TR::BNDCHKwithSpineCHK] = TR::TreeEvaluator::BNDCHKwithSpineCHKEvaluator;
2464
tet[TR::SpineCHK] = TR::TreeEvaluator::BNDCHKwithSpineCHKEvaluator;
2465
tet[TR::ArrayStoreCHK] = TR::TreeEvaluator::ArrayStoreCHKEvaluator;
2466
tet[TR::ArrayCHK] = TR::TreeEvaluator::ArrayCHKEvaluator;
2467
tet[TR::MethodEnterHook] = TR::TreeEvaluator::conditionalHelperEvaluator;
2468
tet[TR::MethodExitHook] = TR::TreeEvaluator::conditionalHelperEvaluator;
2469
2470
tet[TR::tstart] = TR::TreeEvaluator::tstartEvaluator;
2471
tet[TR::tfinish] = TR::TreeEvaluator::tfinishEvaluator;
2472
tet[TR::tabort] = TR::TreeEvaluator::tabortEvaluator;
2473
2474
tet[TR::NULLCHK] = TR::TreeEvaluator::NULLCHKEvaluator;
2475
tet[TR::ResolveAndNULLCHK] = TR::TreeEvaluator::resolveAndNULLCHKEvaluator;
2476
}
2477
2478
2479
TR::Instruction *
2480
J9::Z::TreeEvaluator::genLoadForObjectHeaders(TR::CodeGenerator *cg, TR::Node *node, TR::Register *reg, TR::MemoryReference *tempMR, TR::Instruction *iCursor)
2481
{
2482
if (TR::Compiler->om.compressObjectReferences())
2483
return generateRXInstruction(cg, TR::InstOpCode::LLGF, node, reg, tempMR, iCursor);
2484
return generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, reg, tempMR, iCursor);
2485
}
2486
2487
TR::Instruction *
2488
J9::Z::TreeEvaluator::genLoadForObjectHeadersMasked(TR::CodeGenerator *cg, TR::Node *node, TR::Register *reg, TR::MemoryReference *tempMR, TR::Instruction *iCursor)
2489
{
2490
// Bit-mask for masking J9Object header to extract J9Class
2491
uint16_t mask = 0xFF00;
2492
TR::Compilation *comp = cg->comp();
2493
TR::Instruction *loadInstr;
2494
2495
if (TR::Compiler->om.compressObjectReferences())
2496
{
2497
if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z13))
2498
{
2499
iCursor = generateRXInstruction(cg, TR::InstOpCode::LLZRGF, node, reg, tempMR, iCursor);
2500
loadInstr = iCursor;
2501
cg->generateDebugCounter("z13/LoadAndMask", 1, TR::DebugCounter::Free);
2502
}
2503
else
2504
{
2505
// Zero out top 32 bits and load the unmasked J9Class
2506
iCursor = generateRXInstruction(cg, TR::InstOpCode::LLGF, node, reg, tempMR, iCursor);
2507
loadInstr = iCursor;
2508
// Now mask it to get the actual pointer
2509
iCursor = generateRIInstruction(cg, TR::InstOpCode::NILL, node, reg, mask, iCursor);
2510
}
2511
}
2512
else
2513
{
2514
if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z13))
2515
{
2516
iCursor = generateRXInstruction(cg, TR::InstOpCode::getLoadAndMaskOpCode(), node, reg, tempMR, iCursor);
2517
loadInstr = iCursor;
2518
cg->generateDebugCounter("z13/LoadAndMask", 1, TR::DebugCounter::Free);
2519
}
2520
else
2521
{
2522
iCursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, reg, tempMR, iCursor);
2523
loadInstr = iCursor;
2524
iCursor = generateRIInstruction(cg, TR::InstOpCode::NILL, node, reg, mask, iCursor);
2525
}
2526
}
2527
2528
// The intended functionality of rdbar/wrtbar IL nodes is to first report to the VM that a field is being watched
2529
// (i.e. being read or being written to), and then perform the actual load/store operation. To achieve this, evaluators
2530
// for rdbar/wrtbar opcodes first call helper routines to generate code that will report to the VM that a field is being
2531
// read or written to. Following this, they will perform the actual load/store operation on the field.
2532
// The helper routines can call this routine in order to determine if fieldwatch is enabled
2533
// on a particular Java class. In those cases we may end up loading the Java class before the actual indirect load occurs
2534
// on the field. In general, if the object we are trying to load is null, an exception is thrown during the load.
2535
// To handle this we need to set an exception point and the GC Map for the VM. We must do the same here for rdbar/wrtbar for
2536
// the above explained reason.
2537
if (node->getOpCode().isReadBar() || node->getOpCode().isWrtBar())
2538
{
2539
cg->setImplicitExceptionPoint(loadInstr);
2540
loadInstr->setNeedsGCMap(0x0000FFFF);
2541
if (node->getOpCodeValue() == TR::checkcastAndNULLCHK)
2542
{
2543
loadInstr->setNode(comp->findNullChkInfo(node));
2544
}
2545
}
2546
return iCursor;
2547
}
2548
2549
// max number of cache slots used by checkcat/instanceof
2550
#define NUM_PICS 3
2551
2552
static TR::Instruction *
2553
genTestIsSuper(TR::CodeGenerator * cg, TR::Node * node,
2554
TR::Register * objClassReg, TR::Register * castClassReg,
2555
TR::Register * scratch1Reg, TR::Register * scratch2Reg, TR::Register * resultReg,
2556
TR::Register * litPoolBaseReg, int32_t castClassDepth,
2557
TR::LabelSymbol * failLabel, TR::LabelSymbol * trueLabel, TR::LabelSymbol * callHelperLabel,
2558
TR::RegisterDependencyConditions * conditions, TR::Instruction * cursor,
2559
bool addDataSnippetAsSecondaryCache,
2560
TR::Register * classObjectClazzSnippetReg,
2561
TR::Register * instanceOfClazzSnippetReg
2562
)
2563
{
2564
TR::Compilation *comp = cg->comp();
2565
TR_Debug * debugObj = cg->getDebug();
2566
2567
int32_t superClassOffset = castClassDepth * TR::Compiler->om.sizeofReferenceAddress();
2568
bool outOfBound = (superClassOffset > MAX_IMMEDIATE_VAL || superClassOffset < MIN_IMMEDIATE_VAL) ? true : false;
2569
// For the scenario where a call to Class.isInstance() is converted to instanceof,
2570
// we need to load the class depth at runtime because we don't have it at compile time
2571
bool dynamicCastClass = (castClassDepth == -1);
2572
bool eliminateSuperClassArraySizeCheck = (!dynamicCastClass && (castClassDepth < comp->getOptions()->_minimumSuperclassArraySize));
2573
2574
2575
#ifdef OMR_GC_COMPRESSED_POINTERS
2576
// objClassReg contains the class offset, so we may need to
2577
// convert this offset to a real J9Class pointer
2578
#endif
2579
if (dynamicCastClass)
2580
{
2581
TR::LabelSymbol * notInterfaceLabel = generateLabelSymbol(cg);
2582
TR_ASSERT((node->getOpCodeValue() == TR::instanceof &&
2583
node->getSecondChild()->getOpCodeValue() != TR::loadaddr), "genTestIsSuper: castClassDepth == -1 is only supported for transformed isInstance calls.");
2584
2585
// check if cast class is an interface
2586
cursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, scratch1Reg,
2587
generateS390MemoryReference(castClassReg, offsetof(J9Class, romClass), cg), cursor);
2588
2589
cursor = generateRXInstruction(cg, TR::InstOpCode::L, node, scratch1Reg,
2590
generateS390MemoryReference(scratch1Reg, offsetof(J9ROMClass, modifiers), cg), cursor);
2591
2592
2593
TR_ASSERT(((J9AccInterface | J9AccClassArray) < UINT_MAX && (J9AccInterface | J9AccClassArray) > 0),
2594
"genTestIsSuper::(J9AccInterface | J9AccClassArray) is not a 32-bit number\n");
2595
2596
cursor = generateRILInstruction(cg, TR::InstOpCode::NILF, node, scratch1Reg, static_cast<int32_t>((J9AccInterface | J9AccClassArray)), cursor);
2597
2598
if (debugObj)
2599
debugObj->addInstructionComment(cursor, "Check if castClass is an interface or class array and jump to helper sequence");
2600
2601
// insert snippet check
2602
if ( addDataSnippetAsSecondaryCache )
2603
{
2604
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, notInterfaceLabel, cursor);
2605
// classObjectClazzSnippet and instanceOfClazzSnippet stores values of currentObject and cast Object when
2606
// the helper call returns success.
2607
// test if class is interface of not.
2608
// if interface, we do the following.
2609
//
2610
// insert instanceof site snippet test
2611
// cmp objectClassReg, classObjectClazzSnippet
2612
// jne helper call
2613
// cmp castclassreg, instanceOfClazzSnippet
2614
// je true_label
2615
// jump to outlined label
2616
// test jitInstanceOf results
2617
// JE fail_label // instanceof result is not true
2618
//
2619
// the following will be done at the end of instanceof evaluation when we do helperCall
2620
// cmp snippet1 with value -1
2621
// jne true_label // snippet already updated
2622
// update classObjectClazzSnippet, instanceOfClazzSnippet with object class and instance of class
2623
// jmp true_label
2624
//NO need for cache test for z, if it is dynamic we will already have failed cache test if we got here.
2625
cursor = generateRXInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, objClassReg, generateS390MemoryReference(classObjectClazzSnippetReg,0,cg), cursor);
2626
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, callHelperLabel, cursor);
2627
cursor = generateRXInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, castClassReg, generateS390MemoryReference(instanceOfClazzSnippetReg,0,cg), cursor);
2628
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, trueLabel, cursor);
2629
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, callHelperLabel, cursor);
2630
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, notInterfaceLabel, cursor);
2631
}
2632
else
2633
{
2634
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, callHelperLabel, cursor);
2635
}
2636
}
2637
2638
2639
TR::InstOpCode::Mnemonic loadOp;
2640
int32_t bytesOffset;
2641
2642
if (comp->target().is64Bit())
2643
{
2644
loadOp = TR::InstOpCode::LLGH;
2645
bytesOffset = 6;
2646
}
2647
else
2648
{
2649
loadOp = TR::InstOpCode::LLH;
2650
bytesOffset = 2;
2651
}
2652
2653
if (dynamicCastClass)
2654
{
2655
cursor = generateRXInstruction(cg, loadOp, node, scratch2Reg,
2656
generateS390MemoryReference(castClassReg, offsetof(J9Class, classDepthAndFlags) + bytesOffset, cg), cursor);
2657
2658
TR_ASSERT(sizeof(((J9Class*)0)->classDepthAndFlags) == sizeof(uintptr_t),
2659
"genTestIsSuper::J9Class->classDepthAndFlags is wrong size\n");
2660
}
2661
2662
if (!eliminateSuperClassArraySizeCheck)
2663
{
2664
if (resultReg)
2665
{
2666
cursor = generateRIInstruction(cg, TR::InstOpCode::LHI, node, resultReg, 0, cursor);
2667
}
2668
2669
cursor = generateRXInstruction(cg, loadOp, node, scratch1Reg,
2670
generateS390MemoryReference(objClassReg, offsetof(J9Class, classDepthAndFlags) + bytesOffset, cg) , cursor);
2671
TR_ASSERT(sizeof(((J9Class*)0)->classDepthAndFlags) == sizeof(uintptr_t),
2672
"genTestIsSuper::J9Class->classDepthAndFlags is wrong size\n");
2673
2674
bool generateCompareAndBranchIsPossible = false;
2675
2676
if (dynamicCastClass)
2677
generateCompareAndBranchIsPossible = true;
2678
else if (outOfBound)
2679
{
2680
if (comp->target().is64Bit())
2681
{
2682
cursor = genLoadLongConstant(cg, node, castClassDepth, scratch2Reg, cursor, conditions, litPoolBaseReg);
2683
}
2684
else
2685
{
2686
cursor = generateLoad32BitConstant(cg, node, castClassDepth, scratch2Reg, false, cursor, conditions, litPoolBaseReg);
2687
}
2688
generateCompareAndBranchIsPossible = true;
2689
}
2690
else
2691
{
2692
cursor = generateRIInstruction(cg, TR::InstOpCode::getCmpHalfWordImmOpCode(), node, scratch1Reg, castClassDepth, cursor);
2693
}
2694
2695
if (generateCompareAndBranchIsPossible)
2696
cursor = generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, scratch1Reg, scratch2Reg, TR::InstOpCode::COND_BNH, failLabel, false, false);
2697
else
2698
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, node, failLabel, cursor);
2699
2700
if (debugObj)
2701
debugObj->addInstructionComment(cursor, "Fail if depth(obj) > depth(castClass)");
2702
2703
}
2704
2705
if (resultReg)
2706
{
2707
cursor = generateRIInstruction(cg, TR::InstOpCode::LHI, node, resultReg, 1, cursor);
2708
}
2709
#ifdef OMR_GC_COMPRESSED_POINTERS
2710
// objClassReg contains the class offset, so we may need to
2711
// convert this offset to a real J9Class pointer
2712
#endif
2713
cursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, scratch1Reg,
2714
generateS390MemoryReference(objClassReg, offsetof(J9Class, superclasses), cg), cursor);
2715
2716
if (outOfBound || dynamicCastClass)
2717
{
2718
if (comp->target().is64Bit())
2719
{
2720
cursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, scratch2Reg, scratch2Reg, 3, cursor);
2721
}
2722
else
2723
{
2724
cursor = generateRSInstruction(cg, TR::InstOpCode::SLL, node, scratch2Reg, 2, cursor);
2725
}
2726
#ifdef OMR_GC_COMPRESSED_POINTERS
2727
// castClassReg contains the class offset, but the memory reference below will
2728
// generate a J9Class pointer. We may need to convert this pointer to an offset
2729
#endif
2730
cursor = generateRXInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, castClassReg,
2731
generateS390MemoryReference(scratch1Reg, scratch2Reg, 0, cg), cursor);
2732
}
2733
else
2734
{
2735
#ifdef OMR_GC_COMPRESSED_POINTERS
2736
// castClassReg contains the class offset, but the memory reference below will
2737
// generate a J9Class pointer. We may need to convert this pointer to an offset
2738
#endif
2739
cursor = generateRXInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, castClassReg,
2740
generateS390MemoryReference(scratch1Reg, superClassOffset, cg), cursor);
2741
}
2742
2743
if (debugObj)
2744
debugObj->addInstructionComment(cursor, "Check if objClass is subclass of castClass");
2745
2746
return cursor;
2747
}
2748
2749
// Checks for the scenario where a call to Class.isInstance() is converted to instanceof,
2750
// and we need to load the j9class of the cast class at runtime because we don't have it at compile time
2751
static bool isDynamicCastClassPointer(TR::Node * castOrInstanceOfNode)
2752
{
2753
if (castOrInstanceOfNode->getOpCodeValue() == TR::instanceof)
2754
{
2755
TR::Node * castClassNode = castOrInstanceOfNode->getSecondChild();
2756
TR_OpaqueClassBlock* castClassAddr = TR::TreeEvaluator::getCastClassAddress(castClassNode);
2757
2758
bool isUnresolved = castOrInstanceOfNode->getOpCode().hasSymbolReference() && castOrInstanceOfNode->getSymbolReference()->isUnresolved();
2759
2760
// came from transformed call isInstance to node instanceof, can't resolve at compile time
2761
return !castClassAddr && !isUnresolved;
2762
}
2763
return false;
2764
}
2765
2766
/*
2767
* generate test if object class is reference array
2768
* testerReg = load (objectClassReg+offset_romClass)
2769
* andImmediate with J9AccClassArray(0x10000)
2770
* MASK6 failLabel(If not Array we Fail)
2771
* testerReg = load (objectClassReg + leafcomponent_offset)
2772
* testerReg = load (objectClassReg + offset_romClass)
2773
* testerReg = load (objectClassReg + offset_modifiers)
2774
* andImmediate with J9AccClassInternalPrimitiveType(0x20000)
2775
* MASK6 trueLabel(if equal we fail, not equal we succeed)
2776
*/
2777
static void genIsReferenceArrayTest(TR::Node *node,
2778
TR::Register *objectClassReg,
2779
TR::Register *scratchReg1,
2780
TR::Register *scratchReg2,
2781
TR::Register *resultReg,
2782
TR::LabelSymbol *failLabel,
2783
TR::LabelSymbol *trueLabel,
2784
bool needsResult,
2785
bool trueFallThrough,
2786
TR::CodeGenerator *cg)
2787
{
2788
if (needsResult)
2789
{
2790
generateRIInstruction(cg, TR::InstOpCode::LHI, node, resultReg, 0);
2791
}
2792
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, scratchReg1,
2793
generateS390MemoryReference(objectClassReg, offsetof(J9Class,romClass), cg));
2794
generateRXInstruction(cg, TR::InstOpCode::L, node, scratchReg1,
2795
generateS390MemoryReference(scratchReg1, offsetof(J9ROMClass, modifiers), cg));
2796
generateRILInstruction(cg, TR::InstOpCode::NILF, node, scratchReg1, static_cast<int32_t>(J9AccClassArray));
2797
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, failLabel);
2798
2799
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, scratchReg1,
2800
generateS390MemoryReference(objectClassReg, offsetof(J9ArrayClass,componentType), cg));
2801
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, scratchReg1,
2802
generateS390MemoryReference(scratchReg1, offsetof(J9Class,romClass), cg));
2803
generateRXInstruction(cg, TR::InstOpCode::L, node, scratchReg1,
2804
generateS390MemoryReference(scratchReg1, offsetof(J9ROMClass, modifiers), cg));
2805
generateRILInstruction(cg, TR::InstOpCode::NILF, node, scratchReg1, static_cast<int32_t>(J9AccClassInternalPrimitiveType));
2806
2807
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, failLabel);
2808
if (needsResult)
2809
{
2810
generateRIInstruction(cg, TR::InstOpCode::LHI, node, resultReg, 1);
2811
}
2812
if (!trueFallThrough)
2813
{
2814
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, trueLabel);
2815
}
2816
}
2817
// only need a helper call if the class is not super and not final, otherwise
2818
// it can be determined without a call-out
2819
static bool needHelperCall(TR::Node * castOrInstanceOfNode, bool testCastClassIsSuper, bool isFinalClass)
2820
{
2821
return (!testCastClassIsSuper || isDynamicCastClassPointer(castOrInstanceOfNode)) && !isFinalClass;
2822
}
2823
2824
static bool needTestCache(bool cachingEnabled, bool needsHelperCall, bool superClassTest)
2825
{
2826
return cachingEnabled && needsHelperCall && !superClassTest;
2827
}
2828
2829
static TR::Register * establishLitPoolBaseReg(TR::Node * castOrInstanceOfNode, TR::CodeGenerator * cg)
2830
{
2831
if (castOrInstanceOfNode->getNumChildren() != 3)
2832
{
2833
return NULL;
2834
}
2835
else
2836
{
2837
TR::Node* litPoolBaseChild = castOrInstanceOfNode->getLastChild();
2838
TR_ASSERT((litPoolBaseChild->getOpCodeValue()==TR::aload) || (litPoolBaseChild->getOpCodeValue()==TR::aRegLoad),
2839
"Literal pool base child expected\n");
2840
return cg->evaluate(litPoolBaseChild);
2841
}
2842
}
2843
2844
// this is messy and a rough approximation - there can be no more than 10
2845
// post dependencies in instance-of.
2846
static int maxInstanceOfPostDependencies()
2847
{
2848
return 10;
2849
}
2850
2851
// similarly yucky... instanceof takes 2 parms and kills the return address
2852
bool killedByInstanceOfHelper(int32_t regIndex, TR::Node * node, TR::CodeGenerator * cg)
2853
{
2854
if (regIndex == -1)
2855
{
2856
return false; // not mapped to a specific register
2857
}
2858
2859
TR::Compilation *comp = cg->comp();
2860
int realReg = cg->getGlobalRegister(regIndex);
2861
2862
#if defined(TR_TARGET_64BIT)
2863
bool needsHelperCall = false;
2864
#if defined(J9ZOS390)
2865
if (comp->getOption(TR_EnableRMODE64))
2866
#endif
2867
{
2868
TR::Node * castClassNode = node->getSecondChild();
2869
TR::SymbolReference * castClassSymRef = castClassNode->getSymbolReference();
2870
bool testCastClassIsSuper = TR::TreeEvaluator::instanceOfOrCheckCastNeedSuperTest(node, cg);
2871
bool isFinalClass = (castClassSymRef == NULL) ? false : castClassSymRef->isNonArrayFinal(comp);
2872
needsHelperCall = needHelperCall(node, testCastClassIsSuper, isFinalClass);
2873
}
2874
2875
#endif
2876
2877
if (realReg == TR::RealRegister::GPR1 ||
2878
realReg == TR::RealRegister::GPR2 ||
2879
realReg == cg->getReturnAddressRegister()
2880
#if defined(TR_TARGET_64BIT)
2881
|| (needsHelperCall &&
2882
#if defined(J9ZOS390)
2883
comp->getOption(TR_EnableRMODE64) &&
2884
#endif
2885
realReg == cg->getEntryPointRegister())
2886
#endif
2887
)
2888
{
2889
return true;
2890
}
2891
else
2892
{
2893
return false;
2894
}
2895
}
2896
2897
static bool generateInlineTest(TR::CodeGenerator * cg, TR::Node * node, TR::Node * castClassNode,
2898
TR::Register * objClassReg, TR::Register * resultReg,
2899
TR::Register * scratchReg, TR::Register * litPoolReg,
2900
bool needsResult, TR::LabelSymbol * falseLabel,
2901
TR::LabelSymbol * trueLabel, TR::LabelSymbol * doneLabel, bool isCheckCast, int32_t maxNum_PICS = NUM_PICS)
2902
{
2903
TR::Compilation *comp = cg->comp();
2904
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
2905
TR_OpaqueClassBlock* guessClassArray[NUM_PICS];
2906
TR_OpaqueClassBlock* castClassAddr = TR::TreeEvaluator::getCastClassAddress(castClassNode);
2907
uint8_t num_PICs = 0, i;
2908
2909
if (!castClassAddr)
2910
{
2911
return false;
2912
}
2913
2914
if (isCheckCast)
2915
{
2916
TR_OpaqueClassBlock *tempGuessClassArray[NUM_PICS];
2917
uint8_t numberOfGuessClasses = TR::TreeEvaluator::interpreterProfilingInstanceOfOrCheckCastInfo(cg, node, tempGuessClassArray);
2918
if (numberOfGuessClasses > 0)
2919
{
2920
for (i = 0; i < numberOfGuessClasses; i++)
2921
{
2922
if (fej9->instanceOfOrCheckCast((J9Class*)tempGuessClassArray[i], (J9Class*)castClassAddr))
2923
{
2924
guessClassArray[num_PICs++] = tempGuessClassArray[i];
2925
if (maxNum_PICS == num_PICs) break;
2926
}
2927
}
2928
}
2929
}
2930
else
2931
{
2932
num_PICs = TR::TreeEvaluator::interpreterProfilingInstanceOfOrCheckCastInfo(cg, node, guessClassArray);
2933
}
2934
2935
// defect 92901
2936
// if test fails, in case of checkcast, there is no need to generate inline check for guess value
2937
if (num_PICs == 0)
2938
return false;
2939
2940
bool result_bool;
2941
TR::LabelSymbol *result_label;
2942
TR::Instruction * unloadableConstInstr[NUM_PICS];
2943
num_PICs = ((num_PICs > maxNum_PICS) ? maxNum_PICS : num_PICs);
2944
for (i = 0; i < num_PICs; i++)
2945
{
2946
dumpOptDetails(comp, "inline test with guess class address of %p\n", guessClassArray[i]);
2947
if (cg->needClassAndMethodPointerRelocations())
2948
unloadableConstInstr[i] = generateRegLitRefInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, scratchReg,(uintptr_t) guessClassArray[i], TR_ClassPointer, NULL, NULL, NULL);
2949
else
2950
unloadableConstInstr[i] = generateRILInstruction(cg, TR::InstOpCode::LARL, node, scratchReg, guessClassArray[i]);
2951
2952
if (fej9->isUnloadAssumptionRequired((TR_OpaqueClassBlock *)(guessClassArray[i]), comp->getCurrentMethod()))
2953
comp->getStaticPICSites()->push_front(unloadableConstInstr[i]);
2954
2955
if (cg->wantToPatchClassPointer(guessClassArray[i], node))
2956
comp->getStaticHCRPICSites()->push_front(unloadableConstInstr[i]);
2957
2958
result_bool = fej9->instanceOfOrCheckCast((J9Class*)(guessClassArray[i]), (J9Class*)castClassAddr);
2959
result_label = (falseLabel != trueLabel ) ? (result_bool ? trueLabel : falseLabel) : doneLabel;
2960
2961
if (needsResult)
2962
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, resultReg, (int32_t)result_bool);
2963
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalRegOpCode(), node, objClassReg, scratchReg, TR::InstOpCode::COND_BE, result_label);
2964
2965
}
2966
return true;
2967
}
2968
static void
2969
generateTestBitFlag(
2970
TR::CodeGenerator *cg,
2971
TR::Node *node,
2972
TR::Register *mdReg,
2973
int32_t offset,
2974
int32_t size,
2975
uint64_t bitFlag)
2976
{
2977
TR::MemoryReference * tempMR;
2978
int shiftForFlag = TR::TreeEvaluator::checkNonNegativePowerOfTwo((int64_t) bitFlag);
2979
TR_ASSERT(shiftForFlag > 0, "generateTestBitFlag: flag is assumed to be power of 2\n");
2980
2981
// point offset to the end of the word we point to, so we can make a byte comparison using tm
2982
offset += size - 1;
2983
2984
// TM tests the bits for one byte, so we calculate several displacements for different flags
2985
// Even though TM does not require the flag to be a power of two, the following code and the previous assumption require it
2986
if (shiftForFlag < 8)
2987
{
2988
tempMR = generateS390MemoryReference(mdReg, offset, cg);
2989
}
2990
else if (shiftForFlag < 16)
2991
{
2992
tempMR = generateS390MemoryReference(mdReg, offset - 1, cg);
2993
bitFlag = bitFlag >> 8;
2994
}
2995
else if (shiftForFlag < 24)
2996
{
2997
tempMR = generateS390MemoryReference(mdReg, offset - 2, cg);
2998
bitFlag = bitFlag >> 16;
2999
}
3000
else if (shiftForFlag < 32)
3001
{
3002
tempMR = generateS390MemoryReference(mdReg, offset - 3, cg);
3003
bitFlag = bitFlag >> 24;
3004
}
3005
#if defined(TR_TARGET_64BIT)
3006
else if (shiftForFlag < 40)
3007
{
3008
tempMR = generateS390MemoryReference(mdReg, offset - 4, cg);
3009
bitFlag = bitFlag >> 32;
3010
}
3011
else if (shiftForFlag < 48)
3012
{
3013
tempMR = generateS390MemoryReference(mdReg, offset - 5, cg);
3014
bitFlag = bitFlag >> 40;
3015
}
3016
else if (shiftForFlag < 56)
3017
{
3018
tempMR = generateS390MemoryReference(mdReg, offset - 6, cg);
3019
bitFlag = bitFlag >> 48;
3020
}
3021
else if (shiftForFlag < 64)
3022
{
3023
tempMR = generateS390MemoryReference(mdReg, offset - 7, cg);
3024
bitFlag = bitFlag >> 56;
3025
}
3026
#endif
3027
else
3028
{
3029
TR_ASSERT(0, "generateTestBitFlag: flag size assumption incorrect\n");
3030
}
3031
3032
generateSIInstruction(cg, TR::InstOpCode::TM, node, tempMR, (uint32_t) bitFlag);
3033
}
3034
3035
static void
3036
VMnonNullSrcWrtBarCardCheckEvaluator(
3037
TR::Node * node,
3038
TR::Register * owningObjectReg,
3039
TR::Register * srcReg,
3040
TR::Register *temp1Reg,
3041
TR::Register *temp2Reg,
3042
TR::LabelSymbol *doneLabel,
3043
TR::SymbolReference *wbRef ,
3044
TR::RegisterDependencyConditions *conditions,
3045
TR::CodeGenerator *cg,
3046
bool doCompileTimeCheckForHeapObj = true)
3047
{
3048
TR::Compilation *comp = cg->comp();
3049
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
3050
auto gcMode = TR::Compiler->om.writeBarrierType();
3051
bool doWrtBar = (gcMode == gc_modron_wrtbar_oldcheck || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_always);
3052
//We need to do a runtime check on cardmarking for gencon policy if our owningObjReg is in tenure
3053
bool doCrdMrk = (gcMode == gc_modron_wrtbar_cardmark_and_oldcheck);
3054
3055
TR_ASSERT(srcReg != NULL, "VMnonNullSrcWrtBarCardCheckEvaluator: Cannot send in a null source object...look at the fcn name\n");
3056
TR_ASSERT(doWrtBar == true,"VMnonNullSrcWrtBarCardCheckEvaluator: Invalid call to VMnonNullSrcWrtBarCardCheckEvaluator\n");
3057
3058
TR::Node * wrtbarNode = NULL;
3059
TR::LabelSymbol * helperSnippetLabel = generateLabelSymbol(cg);
3060
if (node->getOpCodeValue() == TR::awrtbari || node->getOpCodeValue() == TR::awrtbar)
3061
wrtbarNode = node;
3062
else if (node->getOpCodeValue() == TR::ArrayStoreCHK)
3063
wrtbarNode = node->getFirstChild();
3064
if (gcMode != gc_modron_wrtbar_always)
3065
{
3066
bool is64Bit = comp->target().is64Bit();
3067
bool isConstantHeapBase = !comp->getOptions()->isVariableHeapBaseForBarrierRange0();
3068
bool isConstantHeapSize = !comp->getOptions()->isVariableHeapSizeForBarrierRange0();
3069
int32_t shiftAmount = TR::Compiler->om.compressedReferenceShift();
3070
TR::InstOpCode::Mnemonic opLoadReg = TR::InstOpCode::getLoadRegOpCode();
3071
TR::InstOpCode::Mnemonic opSubtractReg = TR::InstOpCode::getSubstractRegOpCode();
3072
TR::InstOpCode::Mnemonic opSubtract = TR::InstOpCode::getSubstractOpCode();
3073
TR::InstOpCode::Mnemonic opCmpLog = TR::InstOpCode::getCmpLogicalOpCode();
3074
bool disableSrcObjCheck = true; //comp->getOption(TR_DisableWrtBarSrcObjCheck);
3075
bool constantHeapCase = ((!comp->compileRelocatableCode()) && isConstantHeapBase && isConstantHeapSize && shiftAmount == 0 && (!is64Bit || TR::Compiler->om.generateCompressedObjectHeaders()));
3076
if (constantHeapCase)
3077
{
3078
// these return uintptr_t but because of the if(constantHeapCase) they are guaranteed to be <= MAX(uint32_t). The uses of heapSize, heapBase, and heapSum need to be uint32_t.
3079
uint32_t heapSize = comp->getOptions()->getHeapSizeForBarrierRange0();
3080
uint32_t heapBase = comp->getOptions()->getHeapBaseForBarrierRange0();
3081
3082
if (!doCrdMrk && !disableSrcObjCheck)
3083
{
3084
uint32_t heapSum = heapBase + heapSize;
3085
generateRRInstruction(cg, opLoadReg, node, temp1Reg, owningObjectReg);
3086
generateRILInstruction(cg, TR::InstOpCode::IILF, node, temp2Reg, heapSum);
3087
generateRRInstruction(cg, opSubtractReg, node, temp1Reg, temp2Reg);
3088
generateRRInstruction(cg, opSubtractReg, node, temp2Reg, srcReg);
3089
generateRRInstruction(cg, is64Bit ? TR::InstOpCode::NGR : TR::InstOpCode::NR, node, temp1Reg, temp2Reg);
3090
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, doneLabel);
3091
}
3092
else
3093
{
3094
generateRRInstruction(cg, opLoadReg, node, temp1Reg, owningObjectReg); //copy owning into temp
3095
generateRILInstruction(cg, is64Bit ? TR::InstOpCode::SLGFI : TR::InstOpCode::SLFI, node, temp1Reg, heapBase); //temp = temp - heapbase
3096
generateS390CompareAndBranchInstruction(cg, is64Bit ? TR::InstOpCode::CLG : TR::InstOpCode::CL, node, temp1Reg, static_cast<int64_t>(heapSize), TR::InstOpCode::COND_BH, doneLabel, false, false, NULL, conditions);
3097
}
3098
}
3099
else
3100
{
3101
TR::MemoryReference * offset = generateS390MemoryReference(cg->getMethodMetaDataRealRegister(), offsetof(J9VMThread, heapBaseForBarrierRange0), cg);
3102
TR::MemoryReference * size = generateS390MemoryReference(cg->getMethodMetaDataRealRegister(), offsetof(J9VMThread, heapSizeForBarrierRange0), cg);
3103
generateRRInstruction(cg, opLoadReg, node, temp1Reg, owningObjectReg);
3104
generateRXInstruction(cg, opSubtract, node, temp1Reg, offset);
3105
generateRXInstruction(cg, opCmpLog, node, temp1Reg, size);
3106
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BH, node, doneLabel);
3107
}
3108
3109
TR::LabelSymbol *noChkLabel = generateLabelSymbol(cg);
3110
3111
if (!comp->getOptions()->realTimeGC())
3112
{
3113
bool isDefinitelyNonHeapObj = false, isDefinitelyHeapObj = false;
3114
if (wrtbarNode != NULL && doCompileTimeCheckForHeapObj)
3115
{
3116
isDefinitelyNonHeapObj = wrtbarNode->isNonHeapObjectWrtBar();
3117
isDefinitelyHeapObj = wrtbarNode->isHeapObjectWrtBar();
3118
}
3119
if (doCrdMrk && !isDefinitelyNonHeapObj)
3120
{
3121
TR::LabelSymbol *srcObjChkLabel = generateLabelSymbol(cg);
3122
// CompileTime check for heap object
3123
// SRLG r2, rHeapAddr, cardSize
3124
// L r1, cardTableVirtualStartOffset(metaData)
3125
// LHI r3,0x1
3126
// STC r3,0x0(r1,r2)
3127
uintptr_t cardSize = comp->getOptions()->getGcCardSize();
3128
int32_t shiftValue = TR::TreeEvaluator::checkNonNegativePowerOfTwo((int32_t) cardSize);
3129
TR::Register * cardOffReg = temp1Reg;
3130
TR::Register * mdReg = cg->getMethodMetaDataRealRegister();
3131
3132
// If conditions are NULL, we handle early assignment here.
3133
// O.w. caller is responsible for handling early assignment and making sure GPR1, GPR2 and RAREG are
3134
// available in conditions
3135
TR_ASSERT(shiftValue > 0,"VMnonNullSrcWrtBarCardCheckEvaluator: Card size must be power of 2");
3136
static_assert(CARD_DIRTY <= MAX_IMMEDIATE_VAL, "VMCardCheckEvaluator: CARD_DIRTY flag is assumed to be small enough for an imm op");
3137
3138
// If it is tarok balanced policy, we must generate card marking sequence.
3139
//
3140
auto gcMode = TR::Compiler->om.writeBarrierType();
3141
if (!(gcMode == gc_modron_wrtbar_cardmark_incremental || gcMode == gc_modron_wrtbar_satb))
3142
{
3143
generateTestBitFlag(cg, node, mdReg, offsetof(J9VMThread, privateFlags), sizeof(UDATA), J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE);
3144
// If the flag is not set, then we skip card marking
3145
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, srcObjChkLabel);
3146
}
3147
// dirty(activeCardTableBase + temp3Reg >> card_size_shift)
3148
if (comp->target().is64Bit())
3149
generateRSInstruction(cg, TR::InstOpCode::SRLG, node, cardOffReg, cardOffReg, shiftValue);
3150
else
3151
generateRSInstruction(cg, TR::InstOpCode::SRL, node, cardOffReg, shiftValue);
3152
3153
generateRXInstruction(cg, TR::InstOpCode::getAddOpCode(), node, cardOffReg,
3154
generateS390MemoryReference(mdReg, offsetof(J9VMThread, activeCardTableBase), cg));
3155
// Store the flag to the card's byte.
3156
generateSIInstruction(cg, TR::InstOpCode::MVI, node, generateS390MemoryReference(cardOffReg,0x0,cg), CARD_DIRTY);
3157
3158
if (!disableSrcObjCheck)
3159
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, noChkLabel);
3160
// If condition is NULL, the early assignment is handled by caller.
3161
// If not, early assignment handled here
3162
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, srcObjChkLabel, conditions);
3163
}
3164
}
3165
else
3166
TR_ASSERT(0, "card marking not supported for RT");
3167
3168
//Either if cardmarking is not on at compile time or runtime, we want to test srcobj because if its not in nursery, then
3169
//we don't have to do wrtbarrier
3170
if (!disableSrcObjCheck && !(!doCrdMrk && constantHeapCase))
3171
{
3172
generateRRInstruction(cg, opLoadReg, node, temp1Reg, srcReg);
3173
if (constantHeapCase)
3174
{
3175
// these return uintptr_t but because of the if(constantHeapCase) they are guaranteed to be <= MAX(uint32_t). The uses of heapSize, heapBase, and heapSum need to be uint32_t.
3176
uint32_t heapBase = comp->getOptions()->getHeapBaseForBarrierRange0();
3177
uint32_t heapSize = comp->getOptions()->getHeapSizeForBarrierRange0();
3178
generateRILInstruction(cg, is64Bit ? TR::InstOpCode::SLGFI : TR::InstOpCode::SLFI, node, temp1Reg, heapBase);
3179
generateS390CompareAndBranchInstruction(cg, is64Bit ? TR::InstOpCode::CLG : TR::InstOpCode::CL, node, temp1Reg, static_cast<int64_t>(heapSize), TR::InstOpCode::COND_BL, doneLabel, false);
3180
}
3181
else
3182
{
3183
TR::MemoryReference *offset = generateS390MemoryReference(cg->getMethodMetaDataRealRegister(),
3184
offsetof(J9VMThread, heapBaseForBarrierRange0), cg);
3185
TR::MemoryReference *size = generateS390MemoryReference(cg->getMethodMetaDataRealRegister(),
3186
offsetof(J9VMThread, heapSizeForBarrierRange0), cg);
3187
generateRXInstruction(cg, opSubtract, node, temp1Reg, offset);
3188
generateRXInstruction(cg, opCmpLog, node, temp1Reg, size);
3189
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BL, node, doneLabel);
3190
}
3191
}
3192
//If cardmarking is on at compile time (mode=wrtbaroldcrdmrkcheck) then need a label for when cardmarking is done
3193
//in which case we need to skip the srcobj check
3194
if (doCrdMrk)
3195
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, noChkLabel, conditions);
3196
3197
// inline checking remembered bit for generational or (gencon+cardmarking is inlined).
3198
static_assert(J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST <= 0xFF, "The constant is too big");
3199
int32_t offsetToAgeBits = TR::Compiler->om.offsetOfHeaderFlags() + 3;
3200
#if defined(J9VM_INTERP_FLAGS_IN_CLASS_SLOT) && defined(TR_TARGET_64BIT)
3201
if (!TR::Compiler->om.compressObjectReferences())
3202
offsetToAgeBits += 4;
3203
#endif
3204
TR::MemoryReference * tempMR = generateS390MemoryReference(owningObjectReg, offsetToAgeBits, cg);
3205
generateSIInstruction(cg, TR::InstOpCode::TM, node, tempMR, J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST);
3206
//Need to do wrtbarrer, go to the snippet
3207
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK8, node, helperSnippetLabel);
3208
}
3209
else
3210
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, helperSnippetLabel);
3211
3212
//Create a snipper to make the call so the fall through path is to doneLabel, we expect to call the helper less, this would remove a
3213
//branch
3214
cg->addSnippet(new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, helperSnippetLabel, wbRef, doneLabel));
3215
}
3216
3217
static void
3218
VMCardCheckEvaluator(
3219
TR::Node * node,
3220
TR::Register * owningObjectReg,
3221
TR::Register * tempReg,
3222
TR::RegisterDependencyConditions * conditions,
3223
TR::CodeGenerator * cg,
3224
bool clobberDstReg,
3225
TR::LabelSymbol *doneLabel = NULL,
3226
bool doCompileTimeCheckForHeapObj = true)
3227
{
3228
TR::Compilation *comp = cg->comp();
3229
if (!comp->getOptions()->realTimeGC())
3230
{
3231
TR::Node * wrtbarNode = NULL;
3232
if (node->getOpCodeValue() == TR::awrtbari || node->getOpCodeValue() == TR::awrtbar)
3233
wrtbarNode = node;
3234
else if (node->getOpCodeValue() == TR::ArrayStoreCHK)
3235
wrtbarNode = node->getFirstChild();
3236
3237
// CompileTime check for heap object
3238
bool isDefinitelyNonHeapObj = false, isDefinitelyHeapObj = false;
3239
3240
if (wrtbarNode != NULL && doCompileTimeCheckForHeapObj)
3241
{
3242
isDefinitelyNonHeapObj = wrtbarNode->isNonHeapObjectWrtBar();
3243
isDefinitelyHeapObj = wrtbarNode->isHeapObjectWrtBar();
3244
}
3245
3246
// 83613: We used to do inline CM for Old&CM Objects.
3247
// However, since all Old objects will go through the wrtbar helper,
3248
// which will CM too, our inline CM would become redundant.
3249
TR_ASSERT( (TR::Compiler->om.writeBarrierType()==gc_modron_wrtbar_cardmark || TR::Compiler->om.writeBarrierType()==gc_modron_wrtbar_cardmark_incremental) && !isDefinitelyNonHeapObj,
3250
"VMCardCheckEvaluator: Invalid call to cardCheckEvaluator\n");
3251
TR_ASSERT(doneLabel, "VMCardCheckEvaluator: doneLabel must be defined\n");
3252
TR_ASSERT((conditions && tempReg || clobberDstReg), "VMCardCheckEvaluator: Either a tempReg must be sent in to be used, or we should be able to clobber the owningObjReg\n");
3253
TR_ASSERT(!(clobberDstReg && tempReg), "VMCardCheckEvaluator: If owningObjReg is clobberable, don't allocate a tempReg\n");
3254
3255
// We do not card-mark non-heap objects.
3256
if (!isDefinitelyNonHeapObj)
3257
{
3258
// SRLG r2, rHeapAddr, cardSize
3259
// L r1, cardTableVirtualStartOffset(metaData)
3260
// LHI r3,0x1
3261
// STC r3,0x0(r1,r2)
3262
3263
uintptr_t cardSize = comp->getOptions()->getGcCardSize();
3264
int32_t shiftValue = TR::TreeEvaluator::checkNonNegativePowerOfTwo((int32_t) cardSize);
3265
3266
TR::Register * cardOffReg;
3267
TR::Register * mdReg = cg->getMethodMetaDataRealRegister();
3268
3269
if (!clobberDstReg)
3270
cardOffReg = tempReg;
3271
else if (clobberDstReg)
3272
cardOffReg = owningObjectReg;
3273
3274
TR_ASSERT(shiftValue > 0,"VMCardCheckEvaluator: Card size must be power of 2");
3275
static_assert(CARD_DIRTY <= MAX_IMMEDIATE_VAL, "VMCardCheckEvaluator: CARD_DIRTY flag is assumed to be small enough for an imm op");
3276
3277
// If it is tarok balanced policy, we must generate card marking sequence.
3278
auto gcMode = TR::Compiler->om.writeBarrierType();
3279
if (!(gcMode == gc_modron_wrtbar_cardmark_incremental || gcMode == gc_modron_wrtbar_satb))
3280
{
3281
generateTestBitFlag(cg, node, mdReg, offsetof(J9VMThread, privateFlags), sizeof(UDATA), J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE);
3282
// If the flag is not set, then we skip card marking
3283
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, doneLabel);
3284
}
3285
3286
// cardOffReg (Temp) = owningObjectReg - heapBaseForBarrierRange0
3287
// Defect 91242 - If we can clobber the destination reg, then use owningObjectReg instead of cardOffReg.
3288
if (!clobberDstReg)
3289
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, cardOffReg, owningObjectReg);
3290
generateRXInstruction(cg, TR::InstOpCode::getSubstractOpCode(), node, cardOffReg,
3291
generateS390MemoryReference(mdReg, offsetof(J9VMThread, heapBaseForBarrierRange0), cg));
3292
3293
// Unless we know it's definitely a heap object, we need to check if offset
3294
// from base is less than heap size to determine if object resides in heap.
3295
if (!isDefinitelyHeapObj)
3296
{
3297
// if (cardOffReg(Temp) >= heapSizeForBarrierRage0), object not in the heap
3298
generateRXInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, cardOffReg,
3299
generateS390MemoryReference(mdReg, offsetof(J9VMThread, heapSizeForBarrierRange0), cg));
3300
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNL, node, doneLabel);
3301
}
3302
3303
// dirty(activeCardTableBase + temp3Reg >> card_size_shift)
3304
if (comp->target().is64Bit())
3305
generateRSInstruction(cg, TR::InstOpCode::SRLG, node, cardOffReg, cardOffReg, shiftValue);
3306
else
3307
generateRSInstruction(cg, TR::InstOpCode::SRL, node, cardOffReg, shiftValue);
3308
3309
//add the ActiveCardTableBase to the card offset
3310
generateRXInstruction(cg, TR::InstOpCode::getAddOpCode(), node, cardOffReg,
3311
generateS390MemoryReference(mdReg, offsetof(J9VMThread, activeCardTableBase), cg));
3312
// Store the flag to the card's byte.
3313
generateSIInstruction(cg, TR::InstOpCode::MVI, node, generateS390MemoryReference(cardOffReg, 0x0, cg), CARD_DIRTY);
3314
}
3315
}
3316
else
3317
TR_ASSERT(0, "VMCardCheckEvaluator not supported for RT");
3318
}
3319
3320
static void
3321
VMwrtbarEvaluator(
3322
TR::Node * node,
3323
TR::Register * srcReg,
3324
TR::Register * owningObjectReg,
3325
bool srcNonNull,
3326
TR::CodeGenerator * cg)
3327
{
3328
TR::Instruction * cursor;
3329
TR::Compilation *comp = cg->comp();
3330
auto gcMode = TR::Compiler->om.writeBarrierType();
3331
bool doWrtBar = (gcMode == gc_modron_wrtbar_oldcheck || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_always);
3332
bool doCrdMrk = ((gcMode == gc_modron_wrtbar_cardmark ||gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_cardmark_incremental)&& !node->isNonHeapObjectWrtBar());
3333
3334
// See VM Design 2048 for when wrtbar can be skipped, as determined by VP.
3335
if ( (node->getOpCode().isWrtBar() && node->skipWrtBar()) ||
3336
((node->getOpCodeValue() == TR::ArrayStoreCHK) && node->getFirstChild()->getOpCode().isWrtBar() && node->getFirstChild()->skipWrtBar() ) )
3337
return;
3338
TR::RegisterDependencyConditions * conditions;
3339
TR::LabelSymbol * doneLabel = generateLabelSymbol(cg);
3340
if (doWrtBar)
3341
conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg);
3342
else
3343
conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 1, cg);
3344
3345
if (doWrtBar) // generational or gencon
3346
{
3347
TR::SymbolReference * wbRef = NULL;
3348
if (gcMode == gc_modron_wrtbar_always)
3349
wbRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef();
3350
else // use jitWriteBarrierStoreGenerational for both generational and gencon, because we inline card marking.
3351
{
3352
static char *disable = feGetEnv("TR_disableGenWrtBar");
3353
wbRef = disable ?
3354
comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef() :
3355
comp->getSymRefTab()->findOrCreateWriteBarrierStoreGenerationalSymbolRef();
3356
}
3357
TR::Register *epReg, *raReg;
3358
epReg = cg->allocateRegister();
3359
raReg = cg->allocateRegister();
3360
conditions->addPostCondition(raReg, cg->getReturnAddressRegister());
3361
conditions->addPostCondition(owningObjectReg, TR::RealRegister::GPR1);
3362
conditions->addPostCondition(srcReg, TR::RealRegister::GPR2);
3363
conditions->addPostCondition(epReg, cg->getEntryPointRegister());
3364
if (srcNonNull == false)
3365
{
3366
// If object is NULL, done
3367
generateRRInstruction(cg, TR::InstOpCode::getLoadTestRegOpCode(), node, srcReg, srcReg);
3368
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, doneLabel);
3369
}
3370
// Inlines cardmarking and remembered bit check for gencon.
3371
VMnonNullSrcWrtBarCardCheckEvaluator(node, owningObjectReg, srcReg, epReg, raReg, doneLabel, wbRef, conditions, cg, false);
3372
cg->stopUsingRegister(epReg);
3373
cg->stopUsingRegister(raReg);
3374
}
3375
else if (doCrdMrk) // -Xgc:optavgpause, concurrent marking only
3376
{
3377
conditions->addPostCondition(owningObjectReg, TR::RealRegister::AssignAny);
3378
VMCardCheckEvaluator(node, owningObjectReg, NULL, conditions, cg, true, doneLabel);
3379
}
3380
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions);
3381
}
3382
3383
///////////////////////////////////////////////////////////////////////////////////////
3384
// monentEvaluator: acquire lock for synchronising method
3385
///////////////////////////////////////////////////////////////////////////////////////
3386
TR::Register *
3387
J9::Z::TreeEvaluator::monentEvaluator(TR::Node * node, TR::CodeGenerator * cg)
3388
{
3389
return TR::TreeEvaluator::VMmonentEvaluator(node, cg);
3390
}
3391
3392
///////////////////////////////////////////////////////////////////////////////////////
3393
// monexitEvaluator: release lock for synchronising method
3394
///////////////////////////////////////////////////////////////////////////////////////
3395
TR::Register *
3396
J9::Z::TreeEvaluator::monexitEvaluator(TR::Node * node, TR::CodeGenerator * cg)
3397
{
3398
return TR::TreeEvaluator::VMmonexitEvaluator(node, cg);
3399
}
3400
3401
///////////////////////////////////////////////////////////////////////////////////////
3402
// asynccheckEvaluator: GC point
3403
///////////////////////////////////////////////////////////////////////////////////////
3404
TR::Register *
3405
J9::Z::TreeEvaluator::asynccheckEvaluator(TR::Node * node, TR::CodeGenerator * cg)
3406
{
3407
// used by asynccheck
3408
// The child contains an inline test.
3409
//
3410
TR::Node * testNode = node->getFirstChild();
3411
TR::Node * firstChild = testNode->getFirstChild();
3412
TR::Node * secondChild = testNode->getSecondChild();
3413
TR::Compilation *comp = cg->comp();
3414
intptr_t value = comp->target().is64Bit() ? secondChild->getLongInt() : secondChild->getInt();
3415
3416
TR_ASSERT( testNode->getOpCodeValue() == (comp->target().is64Bit() ? TR::lcmpeq : TR::icmpeq), "asynccheck bad format");
3417
TR_ASSERT( secondChild->getOpCode().isLoadConst() && secondChild->getRegister() == NULL, "asynccheck bad format");
3418
3419
TR::LabelSymbol * snippetLabel = generateLabelSymbol(cg);
3420
TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);
3421
TR::Instruction * gcPoint;
3422
3423
TR::LabelSymbol * reStartLabel = generateLabelSymbol(cg);
3424
3425
// (0) asynccheck #4[0x004d7a88]Method[jitCheckAsyncMessages]
3426
// (1) icmpeq
3427
// (1) iload #281[0x00543138] MethodMeta[stackOverflowMark]+28
3428
// (1) iconst -1
3429
3430
if (comp->target().is32Bit() &&
3431
(firstChild->getOpCodeValue() == TR::iload) &&
3432
firstChild->getRegister() == NULL && value < 0)
3433
{
3434
// instead of comparing to the value itself, we can compare to 0
3435
// and, if the value is less than zero, we know it must be an async-check
3436
// since non-code addresses are always positive in 31-bit 390 code so the only
3437
// negative address we could have would be the 'bogus' -1 address to force
3438
// async-check.
3439
// (the VM ensures that all malloc'ed storage has the high-order-bit cleared)
3440
TR::Register * testRegister = cg->allocateRegister();
3441
TR::MemoryReference * tempMR = TR::MemoryReference::create(cg, firstChild);
3442
3443
TR_ASSERT( getIntegralValue(secondChild) == -1, "asynccheck bad format");
3444
TR_ASSERT( comp->target().is32Bit(), "ICM can be used for 32bit code-gen only!");
3445
3446
static char * dontUseTM = feGetEnv("TR_DONTUSETMFORASYNC");
3447
if (firstChild->getReferenceCount()>1 || dontUseTM)
3448
{
3449
generateRSInstruction(cg, TR::InstOpCode::ICM, firstChild, testRegister, (uint32_t) 0xF, tempMR);
3450
gcPoint = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BL, node, snippetLabel);
3451
}
3452
else
3453
{
3454
generateSIInstruction(cg, TR::InstOpCode::TM, firstChild, tempMR, 0xFF);
3455
gcPoint = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BO, node, snippetLabel);
3456
}
3457
3458
firstChild->setRegister(testRegister);
3459
tempMR->stopUsingMemRefRegister(cg);
3460
}
3461
else
3462
{
3463
if (value >= MIN_IMMEDIATE_VAL && value <= MAX_IMMEDIATE_VAL)
3464
{
3465
TR::MemoryReference * tempMR = TR::MemoryReference::create(cg, firstChild);
3466
3467
if (tempMR->getIndexRegister() != NULL && tempMR->getBaseRegister() != NULL)
3468
{
3469
TR::SymbolReference * symRef = firstChild->getSymbolReference();
3470
TR::Symbol * symbol = symRef->getSymbol();
3471
TR::Register * src1Reg = NULL;
3472
if (firstChild->getDataType() == TR::Address &&
3473
!symbol->isInternalPointer() &&
3474
!symbol->isNotCollected() &&
3475
!symbol->isAddressOfClassObject())
3476
{
3477
src1Reg = cg->allocateCollectedReferenceRegister();
3478
}
3479
else
3480
{
3481
src1Reg = cg->allocateRegister();
3482
}
3483
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), firstChild, src1Reg, tempMR);
3484
3485
updateReferenceNode(firstChild, src1Reg);
3486
firstChild->setRegister(src1Reg);
3487
3488
generateRIInstruction(cg, TR::InstOpCode::getCmpHalfWordImmOpCode(), node, src1Reg, value);
3489
}
3490
else
3491
{
3492
generateSILInstruction(cg, TR::InstOpCode::getCmpHalfWordImmToMemOpCode(), node, tempMR, value);
3493
}
3494
tempMR->stopUsingMemRefRegister(cg);
3495
}
3496
else
3497
{
3498
TR::Register * src1Reg = cg->evaluate(firstChild);
3499
TR::Register * tempReg = cg->evaluate(secondChild);
3500
generateRRInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, src1Reg, tempReg);
3501
}
3502
gcPoint = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, snippetLabel);
3503
}
3504
3505
TR::RegisterDependencyConditions * dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);
3506
TR::Register * rRA = cg->allocateRegister();
3507
// only 64bit zLinux and zOS trampoline requires rEP
3508
#if defined(TR_TARGET_64BIT)
3509
TR::Register * rEP = NULL;
3510
#if defined(J9ZOS390)
3511
if (comp->getOption(TR_EnableRMODE64))
3512
#endif
3513
{
3514
rEP = cg->allocateRegister();
3515
dependencies->addPostCondition(rEP, cg->getEntryPointRegister());
3516
}
3517
#endif
3518
3519
dependencies->addPostCondition(rRA, cg->getReturnAddressRegister());
3520
3521
TR_Debug * debugObj = cg->getDebug();
3522
if (debugObj)
3523
debugObj->addInstructionComment(gcPoint, "Branch to OOL asyncCheck sequence");
3524
3525
// starts OOL sequence, replacing the helper call snippet
3526
TR_S390OutOfLineCodeSection *outlinedHelperCall = NULL;
3527
outlinedHelperCall = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(snippetLabel, reStartLabel, cg);
3528
cg->getS390OutOfLineCodeSectionList().push_front(outlinedHelperCall);
3529
outlinedHelperCall->swapInstructionListsWithCompilation();
3530
3531
// snippetLabel : OOL Start label
3532
TR::Instruction * cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, snippetLabel);
3533
if (debugObj)
3534
debugObj->addInstructionComment(cursor, "Denotes start of OOL asyncCheck sequence");
3535
3536
// BRASL R14, VMHelper, gc stack map on BRASL
3537
gcPoint = generateDirectCall(cg, node, false, node->getSymbolReference(), dependencies, cursor);
3538
gcPoint->setDependencyConditions(dependencies);
3539
3540
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, reStartLabel);
3541
if (debugObj)
3542
debugObj->addInstructionComment(cursor, "Denotes end of OOL asyncCheck sequence: return to mainline");
3543
3544
// Done using OOL with manual code generation
3545
outlinedHelperCall->swapInstructionListsWithCompilation();
3546
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, reStartLabel);
3547
if (debugObj)
3548
debugObj->addInstructionComment(cursor, "OOL asyncCheck return label");
3549
3550
gcPoint->setNeedsGCMap(0x0000FFFF);
3551
3552
cg->decReferenceCount(firstChild);
3553
cg->decReferenceCount(secondChild);
3554
cg->decReferenceCount(testNode);
3555
#if defined(TR_TARGET_64BIT)
3556
#if defined(J9ZOS390)
3557
if (comp->getOption(TR_EnableRMODE64))
3558
#endif
3559
{
3560
cg->stopUsingRegister(rEP);
3561
}
3562
#endif
3563
cg->stopUsingRegister(rRA);
3564
3565
return NULL;
3566
3567
}
3568
3569
/** \brief Generates ArrayOfJavaLangObjectTest (object class is reference array) for instanceOf or checkCast node
3570
* \details
3571
* scratchReg1 = load (objectClassReg+offset_romClass)
3572
* scratchReg1 = load (ROMClass+J9ROMClass+modifiers)
3573
* andImmediate with J9AccClassArray(0x10000)
3574
* If not Array -> Branch to Fail Label
3575
* testerReg = load (objectClassReg + leafcomponent_offset)
3576
* testerReg = load (objectClassReg + offset_romClass)
3577
* testerReg = load (objectClassReg + offset_modifiers)
3578
* andImmediate with J9AccClassInternalPrimitiveType(0x20000)
3579
* if not arrays of primitive set condition code to Zero indicating true result
3580
*/
3581
static
3582
void genInstanceOfOrCheckcastArrayOfJavaLangObjectTest(TR::Node *node, TR::CodeGenerator *cg, TR::Register *objectClassReg, TR::LabelSymbol *failLabel, TR_S390ScratchRegisterManager *srm)
3583
{
3584
TR::Compilation *comp = cg->comp();
3585
TR_Debug *debugObj = cg->getDebug();
3586
TR::Instruction *cursor = NULL;
3587
TR::Register *scratchReg1 = srm->findOrCreateScratchRegister();
3588
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, scratchReg1, generateS390MemoryReference(objectClassReg, offsetof(J9Class,romClass), cg));
3589
generateRXInstruction(cg, TR::InstOpCode::L, node, scratchReg1, generateS390MemoryReference(scratchReg1, offsetof(J9ROMClass, modifiers), cg));
3590
generateRILInstruction(cg, TR::InstOpCode::NILF, node, scratchReg1, static_cast<int32_t>(J9AccClassArray));
3591
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, failLabel);
3592
if (debugObj)
3593
debugObj->addInstructionComment(cursor,"Fail instanceOf/checkCast if Not Array");
3594
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, scratchReg1, generateS390MemoryReference(objectClassReg, offsetof(J9ArrayClass,componentType), cg));
3595
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, scratchReg1, generateS390MemoryReference(scratchReg1, offsetof(J9Class,romClass), cg));
3596
generateRXInstruction(cg, TR::InstOpCode::L, node, scratchReg1, generateS390MemoryReference(scratchReg1, offsetof(J9ROMClass, modifiers), cg));
3597
generateRILInstruction(cg, TR::InstOpCode::NILF, node, scratchReg1, static_cast<int32_t>(J9AccClassInternalPrimitiveType));
3598
srm->reclaimScratchRegister(scratchReg1);
3599
}
3600
3601
/** \brief Generates Superclass Test for both checkcast and instanceof nodes.
3602
* \details
3603
* It will generate pseudocode as follows.
3604
* if (objectClassDepth <= castClassDepth) call Helper
3605
* else
3606
* load superClassArrReg,superClassOfObjectClass
3607
* cmp superClassArrReg[castClassDepth], castClass
3608
* Here It sets up the condition code for callee to react on.
3609
*/
3610
static
3611
bool genInstanceOfOrCheckcastSuperClassTest(TR::Node *node, TR::CodeGenerator *cg, TR::Register *objClassReg, TR::Register *castClassReg, int32_t castClassDepth,
3612
TR::LabelSymbol *falseLabel, TR::LabelSymbol *callHelperLabel, TR_S390ScratchRegisterManager *srm)
3613
{
3614
TR::Compilation *comp = cg->comp();
3615
int32_t superClassDepth = castClassDepth * TR::Compiler->om.sizeofReferenceAddress();
3616
TR::Register *castClassDepthReg = NULL;
3617
TR::InstOpCode::Mnemonic loadOp;
3618
int32_t byteOffset;
3619
TR::Instruction *cursor = NULL;
3620
if (cg->comp()->target().is64Bit())
3621
{
3622
loadOp = TR::InstOpCode::LLGH;
3623
byteOffset = 6;
3624
}
3625
else
3626
{
3627
loadOp = TR::InstOpCode::LLH;
3628
byteOffset = 2;
3629
}
3630
//Following Changes are for dynamicCastClass only
3631
bool dynamicCastClass = castClassDepth == -1;
3632
bool eliminateSuperClassArraySizeCheck = (!dynamicCastClass && (castClassDepth < cg->comp()->getOptions()->_minimumSuperclassArraySize));
3633
// In case of dynamic Cast Class, We do not know the depth of the cast Class at compile time. So following routine compares depth at run time.
3634
if ( dynamicCastClass )
3635
{
3636
TR::Register *scratchRegister1 = srm->findOrCreateScratchRegister();
3637
//TR::Register *scratchRegister1 = scratch1Reg;
3638
TR_ASSERT((node->getOpCodeValue() == TR::instanceof &&
3639
node->getSecondChild()->getOpCodeValue() != TR::loadaddr), "genTestIsSuper: castClassDepth == -1 is only supported for transformed isInstance calls.");
3640
cursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, scratchRegister1,
3641
generateS390MemoryReference(castClassReg, offsetof(J9Class, romClass), cg), cursor);
3642
cursor = generateRXInstruction(cg, TR::InstOpCode::L, node, scratchRegister1,
3643
generateS390MemoryReference(scratchRegister1, offsetof(J9ROMClass, modifiers), cg), cursor);
3644
TR_ASSERT(((J9AccInterface | J9AccClassArray) < UINT_MAX && (J9AccInterface | J9AccClassArray) > 0),
3645
"genTestIsSuper::(J9AccInterface | J9AccClassArray) is not a 32-bit number\n");
3646
cursor = generateRILInstruction(cg, TR::InstOpCode::NILF, node, scratchRegister1, static_cast<int32_t>((J9AccInterface | J9AccClassArray)), cursor);
3647
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, callHelperLabel, cursor);
3648
castClassDepthReg = srm->findOrCreateScratchRegister();
3649
cursor = generateRXInstruction(cg, loadOp, node, castClassDepthReg,
3650
generateS390MemoryReference(castClassReg, offsetof(J9Class, classDepthAndFlags) + byteOffset, cg), cursor);
3651
3652
srm->reclaimScratchRegister(scratchRegister1);
3653
TR_ASSERT(sizeof(((J9Class*)0)->classDepthAndFlags) == sizeof(uintptr_t),
3654
"genTestIsSuper::J9Class->classDepthAndFlags is wrong size\n");
3655
}
3656
3657
3658
//objectClassDepthReg <- objectClassDepth
3659
if (!eliminateSuperClassArraySizeCheck)
3660
{
3661
TR::Register *objectClassDepthReg = srm->findOrCreateScratchRegister();
3662
cursor = generateRXInstruction(cg, loadOp, node, objectClassDepthReg,
3663
generateS390MemoryReference(objClassReg, offsetof(J9Class, classDepthAndFlags) + byteOffset, cg) , NULL);
3664
3665
//Compare objectClassDepth and castClassDepth
3666
if (dynamicCastClass)
3667
cursor = generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, objectClassDepthReg, castClassDepthReg, TR::InstOpCode::COND_BNH, falseLabel, false, false);
3668
else
3669
cursor = generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, objectClassDepthReg, castClassDepth, TR::InstOpCode::COND_BNH, falseLabel, true, false, cursor);
3670
srm->reclaimScratchRegister(objectClassDepthReg);
3671
}
3672
3673
//superClassArrReg <- objectClass->superClasses
3674
TR::Register *superClassArrReg = srm->findOrCreateScratchRegister();
3675
cursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, superClassArrReg,
3676
generateS390MemoryReference(objClassReg, offsetof(J9Class, superclasses), cg), cursor);
3677
if (dynamicCastClass)
3678
{
3679
if (cg->comp()->target().is64Bit())
3680
{
3681
cursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, castClassDepthReg, castClassDepthReg, 3, cursor);
3682
}
3683
else
3684
{
3685
cursor = generateRSInstruction(cg, TR::InstOpCode::SLL, node, castClassDepthReg, 2, cursor);
3686
}
3687
cursor = generateRXInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, castClassReg,
3688
generateS390MemoryReference(superClassArrReg, castClassDepthReg, 0, cg), cursor);
3689
srm->reclaimScratchRegister(castClassDepthReg);
3690
}
3691
else
3692
{
3693
//CG superClassArrReg[castClassDepth],castClassReg
3694
cursor = generateRXInstruction (cg, TR::InstOpCode::getCmpOpCode(), node, castClassReg,
3695
generateS390MemoryReference(superClassArrReg, superClassDepth, cg), cursor);
3696
}
3697
srm->reclaimScratchRegister(superClassArrReg);
3698
return dynamicCastClass;
3699
//We expect Result of the test reflects in Condition Code. Callee should react on this.
3700
}
3701
3702
///////////////////////////////////////////////////////////////////////////////////////
3703
// instanceofEvaluator: symref is the class object, cp index is in the "int" field,
3704
// child is the object reference
3705
///////////////////////////////////////////////////////////////////////////////////////
3706
TR::Register *
3707
J9::Z::TreeEvaluator::instanceofEvaluator(TR::Node * node, TR::CodeGenerator * cg)
3708
{
3709
TR::Compilation *comp = cg->comp();
3710
static bool initialResult = feGetEnv("TR_instanceOfInitialValue") != NULL;
3711
traceMsg(comp,"Initial result = %d\n",initialResult);
3712
// Complementing Initial Result to True if the floag is not passed.
3713
return VMgenCoreInstanceofEvaluator(node,cg,NULL,NULL,!initialResult,1,NULL,false);
3714
}
3715
3716
/** \brief
3717
* Generates null test of \p objectReg for instanceof or checkcast[AndNULLCHK] \p node. In case a NULLCHK is
3718
* required this function will generate the sequence which throws the appropriate exception.
3719
*
3720
* \param node
3721
* The instanceof, checkcast, or checkcastAndNULLCHK node.
3722
*
3723
* \param cg
3724
* The code generator used to generate the instructions.
3725
*
3726
* \param objectReg
3727
* The object to null test.
3728
*
3729
* \return
3730
* \c true if a boolean condition code is set and the callee is expected to act on it; \c false otherwise, meaning
3731
* a NULLCHK was performed and if \p objectReg was null an exception throwing fallback path will be taken.
3732
*/
3733
static bool
3734
genInstanceOfOrCheckCastNullTest(TR::Node* node, TR::CodeGenerator* cg, TR::Register* objectReg)
3735
{
3736
if (node->getOpCodeValue() == TR::checkcastAndNULLCHK)
3737
{
3738
if (cg->getHasResumableTrapHandler())
3739
{
3740
TR::Instruction* compareAndTrapInstruction = generateRIEInstruction(cg, TR::InstOpCode::getCmpImmTrapOpCode(), node, objectReg, 0, TR::InstOpCode::COND_BE);
3741
compareAndTrapInstruction->setExceptBranchOp();
3742
compareAndTrapInstruction->setNeedsGCMap(0x0000FFFF);
3743
}
3744
else
3745
{
3746
generateRRInstruction(cg, TR::InstOpCode::getLoadTestRegOpCode(), node, objectReg, objectReg);
3747
3748
TR::Compilation* comp = cg->comp();
3749
TR::LabelSymbol* snippetLabel = generateLabelSymbol(cg);
3750
TR::Node* nullChkInfo = comp->findNullChkInfo(node);
3751
3752
TR::Instruction* branchInstruction = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, nullChkInfo, snippetLabel);
3753
branchInstruction->setExceptBranchOp();
3754
branchInstruction->setNeedsGCMap(0x0000FFFF);
3755
3756
TR::SymbolReference* symRef = comp->getSymRefTab()->findOrCreateNullCheckSymbolRef(comp->getMethodSymbol());
3757
cg->addSnippet(new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, nullChkInfo, snippetLabel, symRef));
3758
}
3759
3760
return false;
3761
}
3762
else
3763
{
3764
generateRRInstruction(cg, TR::InstOpCode::getLoadTestRegOpCode(), node, objectReg, objectReg);
3765
3766
return true;
3767
}
3768
}
3769
3770
///////////////////////////////////////////////////////////////////////////////////////
3771
// checkcastEvaluator - checkcast
3772
///////////////////////////////////////////////////////////////////////////////////////
3773
TR::Register *
3774
J9::Z::TreeEvaluator::checkcastEvaluator(TR::Node * node, TR::CodeGenerator * cg)
3775
{
3776
TR::Compilation *comp = cg->comp();
3777
3778
// TODO: This is not the place to make such checks. If we really want to optimize for space or disable inlining
3779
// of instanceof/checkcast we should still go through the else path to the common infrastructure and it should just
3780
// generate a call to the helper (along with any null tests if needed for checkcastAndNULLCHK). This should be
3781
// handled at the common level.
3782
TR_J9VMBase *fej9 = (TR_J9VMBase *) (comp->fe());
3783
TR_OpaqueClassBlock *profiledClass, *compileTimeGuessClass;
3784
3785
int32_t maxProfiledClasses = comp->getOptions()->getCheckcastMaxProfiledClassTests();
3786
traceMsg(comp, "%s:Maximum Profiled Classes = %d\n", node->getOpCode().getName(),maxProfiledClasses);
3787
InstanceOfOrCheckCastProfiledClasses* profiledClassesList = (InstanceOfOrCheckCastProfiledClasses*)alloca(maxProfiledClasses * sizeof(InstanceOfOrCheckCastProfiledClasses));
3788
InstanceOfOrCheckCastSequences sequences[InstanceOfOrCheckCastMaxSequences];
3789
3790
// We use this information to decide if we want to do SuperClassTest inline or not
3791
bool topClassWasCastClass=false;
3792
float topClassProbability=0.0;
3793
bool dynamicCastClass = false;
3794
uint32_t numberOfProfiledClass;
3795
uint32_t numSequencesRemaining = calculateInstanceOfOrCheckCastSequences(node, sequences, &compileTimeGuessClass, cg, profiledClassesList, &numberOfProfiledClass, maxProfiledClasses, &topClassProbability, &topClassWasCastClass);
3796
3797
TR::Node *objectNode = node->getFirstChild();
3798
TR::Node *castClassNode = node->getSecondChild();
3799
TR::Register *objectReg = NULL;
3800
TR::Register *castClassReg = NULL;
3801
TR::Register *objClassReg = NULL;
3802
TR::Register *objectCopyReg = NULL;
3803
TR::Register *castClassCopyReg = NULL;
3804
TR::Register *resultReg = NULL;
3805
3806
// We need here at maximum two scratch registers so forcing scratchRegisterManager to create pool of two registers only.
3807
TR_S390ScratchRegisterManager *srm = cg->generateScratchRegisterManager(2);
3808
3809
TR::Instruction *gcPoint = NULL;
3810
TR::Instruction *cursor = NULL;
3811
TR_S390OutOfLineCodeSection *outlinedSlowPath = NULL;
3812
TR::LabelSymbol *doneOOLLabel = NULL;
3813
TR::LabelSymbol *startOOLLabel = NULL;
3814
TR::LabelSymbol *helperReturnOOLLabel = NULL;
3815
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
3816
TR::LabelSymbol *callLabel = generateLabelSymbol(cg);
3817
TR::LabelSymbol *resultLabel = doneLabel;
3818
3819
TR_Debug * debugObj = cg->getDebug();
3820
objectReg = cg->evaluate(objectNode);
3821
3822
// When we topProfiledClass in the profiled information is cast class with frequency greater than 0.5, we expect class equality to succeed so we put rest of the test outlined.
3823
bool outLinedTest = numSequencesRemaining >= 2 && sequences[numSequencesRemaining-2] == SuperClassTest && topClassProbability >= 0.5 && topClassWasCastClass;
3824
traceMsg(comp, "Outline Super Class Test: %d\n", outLinedTest);
3825
InstanceOfOrCheckCastSequences *iter = &sequences[0];
3826
3827
while (numSequencesRemaining > 1)
3828
{
3829
switch(*iter)
3830
{
3831
case EvaluateCastClass:
3832
TR_ASSERT(!castClassReg, "Cast class already evaluated");
3833
if (comp->getOption(TR_TraceCG))
3834
traceMsg(comp, "%s: Class Not Evaluated. Evaluating it\n", node->getOpCode().getName());
3835
castClassReg = cg->evaluate(castClassNode);
3836
break;
3837
case LoadObjectClass:
3838
if (comp->getOption(TR_TraceCG))
3839
traceMsg(comp, "%s: Loading Object Class\n",node->getOpCode().getName());
3840
objClassReg = cg->allocateRegister();
3841
TR::TreeEvaluator::genLoadForObjectHeadersMasked(cg, node, objClassReg, generateS390MemoryReference(objectReg, static_cast<int32_t>(TR::Compiler->om.offsetOfObjectVftField()), cg), NULL);
3842
break;
3843
case GoToTrue:
3844
TR_ASSERT(false, "Doesn't Make sense, GoToTrue should not be part of multiple sequences");
3845
break;
3846
case GoToFalse:
3847
TR_ASSERT(false, "Doesn't make sense, GoToFalse should be the terminal sequence");
3848
break;
3849
case NullTest:
3850
{
3851
//If Object is Null, no need to carry out rest of test and jump to Done Label
3852
if (comp->getOption(TR_TraceCG))
3853
traceMsg(comp, "%s: Emitting NullTest\n", node->getOpCode().getName());
3854
TR_ASSERT(!objectNode->isNonNull(), "Object is known to be non-null, no need for a null test");
3855
const bool isCCSet = genInstanceOfOrCheckCastNullTest(node, cg, objectReg);
3856
3857
if (isCCSet)
3858
{
3859
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, doneLabel);
3860
}
3861
}
3862
break;
3863
case ClassEqualityTest:
3864
if (comp->getOption(TR_TraceCG))
3865
traceMsg(comp, "%s: Emitting Class Equality Test\n", node->getOpCode().getName());
3866
if (outLinedTest)
3867
{
3868
// This is the case when we are going to have an Internal Control Flow in the OOL
3869
startOOLLabel = generateLabelSymbol(cg);
3870
doneOOLLabel = doneLabel;
3871
helperReturnOOLLabel = generateLabelSymbol(cg);
3872
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/EqualOOL", comp->signature()),1,TR::DebugCounter::Undetermined);
3873
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, castClassReg, objClassReg, TR::InstOpCode::COND_BNE, startOOLLabel, false, false);
3874
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/EqualOOLPass", comp->signature()),1,TR::DebugCounter::Undetermined);
3875
outlinedSlowPath = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(startOOLLabel,doneOOLLabel,cg);
3876
cg->getS390OutOfLineCodeSectionList().push_front(outlinedSlowPath);
3877
outlinedSlowPath->swapInstructionListsWithCompilation();
3878
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, startOOLLabel);
3879
resultLabel = helperReturnOOLLabel;
3880
}
3881
else
3882
{
3883
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/Equal", comp->signature()),1,TR::DebugCounter::Undetermined);
3884
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, castClassReg, objClassReg, TR::InstOpCode::COND_BE, doneLabel, false, false);
3885
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/EqualFail", comp->signature()),1,TR::DebugCounter::Undetermined);
3886
}
3887
break;
3888
case SuperClassTest:
3889
{
3890
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/SuperClass", comp->signature()),1,TR::DebugCounter::Undetermined);
3891
int32_t castClassDepth = castClassNode->getSymbolReference()->classDepth(comp);
3892
TR_ASSERT(numSequencesRemaining == 2, "SuperClassTest should always be followed by a GoToFalse and must always be the second last test generated");
3893
if (comp->getOption(TR_TraceCG))
3894
traceMsg(comp, "%s: Emitting Super Class Test, Cast Class Depth=%d\n", node->getOpCode().getName(),castClassDepth);
3895
dynamicCastClass = genInstanceOfOrCheckcastSuperClassTest(node, cg, objClassReg, castClassReg, castClassDepth, callLabel, NULL, srm);
3896
/* outlinedSlowPath will be non-NULL if we have a higher probability of ClassEqualityTest succeeding.
3897
* In such cases we will do rest of the tests in OOL section, and as such we need to skip the helper call
3898
* if the result of SuperClassTest is true and branch to resultLabel which will branch back to the doneLabel from OOL code.
3899
* In normal cases SuperClassTest will be inlined with doneLabel as fallThroughLabel so we need to branch to callLabel to generate CastClassException
3900
* through helper call if result of SuperClassTest turned out to be false.
3901
*/
3902
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, outlinedSlowPath != NULL ? TR::InstOpCode::COND_BE : TR::InstOpCode::COND_BNE, node, outlinedSlowPath ? resultLabel : callLabel);
3903
break;
3904
}
3905
/** Following switch case generates sequence of instructions for profiled class test for this checkCast node
3906
* arbitraryClassReg1 <= profiledClass
3907
* if (arbitraryClassReg1 == objClassReg)
3908
* JMP DoneLabel
3909
* else
3910
* continue to NextTest
3911
*/
3912
case ProfiledClassTest:
3913
{
3914
if (comp->getOption(TR_TraceCG))
3915
traceMsg(comp, "%s: Emitting Profiled Class Test\n", node->getOpCode().getName());
3916
TR::Register *arbitraryClassReg1 = srm->findOrCreateScratchRegister();
3917
uint8_t numPICs = 0;
3918
TR::Instruction *temp= NULL;
3919
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/Profiled", comp->signature()),1,TR::DebugCounter::Undetermined);
3920
while (numPICs < numberOfProfiledClass)
3921
{
3922
if (cg->needClassAndMethodPointerRelocations())
3923
temp = generateRegLitRefInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, arbitraryClassReg1, (uintptr_t) profiledClassesList[numPICs].profiledClass, TR_ClassPointer, NULL, NULL, NULL);
3924
else
3925
temp = generateRILInstruction(cg, TR::InstOpCode::LARL, node, arbitraryClassReg1, profiledClassesList[numPICs].profiledClass);
3926
3927
// Adding profiled classes to static PIC sites
3928
if (fej9->isUnloadAssumptionRequired((TR_OpaqueClassBlock *)(profiledClassesList[numPICs].profiledClass), comp->getCurrentMethod()))
3929
comp->getStaticPICSites()->push_front(temp);
3930
// Adding profiled classes to HCR PIC sites
3931
if (cg->wantToPatchClassPointer(profiledClassesList[numPICs].profiledClass, node))
3932
comp->getStaticHCRPICSites()->push_front(temp);
3933
3934
temp = generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, arbitraryClassReg1, objClassReg, TR::InstOpCode::COND_BE, resultLabel, false, false);
3935
numPICs++;
3936
}
3937
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/ProfiledFail", comp->signature()),1,TR::DebugCounter::Undetermined);
3938
srm->reclaimScratchRegister(arbitraryClassReg1);
3939
break;
3940
}
3941
case CompileTimeGuessClassTest:
3942
{
3943
if (comp->getOption(TR_TraceCG))
3944
traceMsg(comp, "%s: Emitting Compile Time Guess Class Test\n", node->getOpCode().getName());
3945
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/CompTimeGuess", comp->signature()),1,TR::DebugCounter::Undetermined);
3946
TR::Register *arbitraryClassReg2 = srm->findOrCreateScratchRegister();
3947
genLoadAddressConstant(cg, node, (uintptr_t)compileTimeGuessClass, arbitraryClassReg2);
3948
cursor = generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, arbitraryClassReg2, objClassReg, TR::InstOpCode::COND_BE, resultLabel , false, false);
3949
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/CompTimeFail", comp->signature()),1,TR::DebugCounter::Undetermined);
3950
srm->reclaimScratchRegister(arbitraryClassReg2);
3951
break;
3952
}
3953
case ArrayOfJavaLangObjectTest:
3954
{
3955
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/ArrayTest", comp->signature()),1,TR::DebugCounter::Undetermined);
3956
if (comp->getOption(TR_TraceCG))
3957
traceMsg(comp,"%s: Emitting ArrayOfJavaLangObjectTest\n",node->getOpCode().getName());
3958
genInstanceOfOrCheckcastArrayOfJavaLangObjectTest(node, cg, objClassReg, callLabel, srm) ;
3959
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, doneLabel);
3960
break;
3961
}
3962
/** Following switch case generates sequence of instructions for cast class cache test for this checkCast node
3963
* Load castClassCacheReg, offsetOf(J9Class,castClassCache)
3964
* if castClassCacheReg == castClassReg
3965
* JMP DoneLabel
3966
* else
3967
* continue to NextTest
3968
*/
3969
case CastClassCacheTest:
3970
{
3971
if (comp->getOption(TR_TraceCG))
3972
traceMsg(comp,"%s: Emitting CastClassCacheTest\n",node->getOpCode().getName());
3973
TR::Register *castClassCacheReg = srm->findOrCreateScratchRegister();
3974
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/Cache", comp->signature()),1,TR::DebugCounter::Undetermined);
3975
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, castClassCacheReg,
3976
generateS390MemoryReference(objClassReg, offsetof(J9Class, castClassCache), cg));
3977
cursor = generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, castClassCacheReg, castClassReg, TR::InstOpCode::COND_BE, resultLabel , false, false);
3978
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/CacheFail", comp->signature()),1,TR::DebugCounter::Undetermined);
3979
srm->reclaimScratchRegister(castClassCacheReg);
3980
break;
3981
}
3982
case HelperCall:
3983
TR_ASSERT(false, "Doesn't make sense, HelperCall should be the terminal sequence");
3984
break;
3985
default:
3986
break;
3987
}
3988
--numSequencesRemaining;
3989
++iter;
3990
}
3991
3992
TR::RegisterDependencyConditions *conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 7+srm->numAvailableRegisters(), cg);
3993
TR::RegisterDependencyConditions *outlinedConditions = NULL;
3994
3995
// In case of Higher probability of quality test to pass, we put rest of the test outlined
3996
if (!outlinedSlowPath)
3997
outlinedConditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg);
3998
else
3999
outlinedConditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4+srm->numAvailableRegisters(), cg);
4000
4001
conditions->addPostCondition(objectReg, TR::RealRegister::AssignAny);
4002
if (objClassReg)
4003
conditions->addPostCondition(objClassReg, TR::RealRegister::AssignAny);
4004
4005
4006
srm->addScratchRegistersToDependencyList(conditions);
4007
J9::Z::CHelperLinkage *helperLink = static_cast<J9::Z::CHelperLinkage*>(cg->getLinkage(TR_CHelper));
4008
// We will be generating sequence to call Helper if we have either GoToFalse or HelperCall Test
4009
if (numSequencesRemaining > 0 && *iter != GoToTrue)
4010
{
4011
4012
TR_ASSERT(*iter == HelperCall || *iter == GoToFalse, "Expecting helper call or fail here");
4013
bool helperCallForFailure = *iter != HelperCall;
4014
if (comp->getOption(TR_TraceCG))
4015
traceMsg(comp, "%s: Emitting helper call%s\n", node->getOpCode().getName(),helperCallForFailure?" for failure":"");
4016
//Following code is needed to put the Helper Call Outlined.
4017
if (!outlinedSlowPath)
4018
{
4019
// As SuperClassTest is the costliest test and is guaranteed to give results for checkCast node. Hence it will always be second last test
4020
// in iter array followed by GoToFalse as last test for checkCastNode
4021
if ( *(iter-1) != SuperClassTest)
4022
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, callLabel);
4023
doneOOLLabel = doneLabel;
4024
helperReturnOOLLabel = generateLabelSymbol(cg);
4025
outlinedSlowPath = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(callLabel,doneOOLLabel,cg);
4026
cg->getS390OutOfLineCodeSectionList().push_front(outlinedSlowPath);
4027
outlinedSlowPath->swapInstructionListsWithCompilation();
4028
}
4029
4030
4031
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, callLabel);
4032
outlinedConditions->addPostCondition(objectReg, TR::RealRegister::AssignAny);
4033
if (outLinedTest)
4034
{
4035
outlinedConditions->addPostCondition(objClassReg, TR::RealRegister::AssignAny);
4036
srm->addScratchRegistersToDependencyList(outlinedConditions);
4037
}
4038
4039
if(!castClassReg)
4040
castClassReg = cg->evaluate(castClassNode);
4041
conditions->addPostCondition(castClassReg, TR::RealRegister::AssignAny);
4042
outlinedConditions->addPostCondition(castClassReg, TR::RealRegister::AssignAny);
4043
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCast/(%s)/Helper", comp->signature()),1,TR::DebugCounter::Undetermined);
4044
TR::RegisterDependencyConditions *deps = NULL;
4045
resultReg = startOOLLabel ? helperLink->buildDirectDispatch(node, &deps) : helperLink->buildDirectDispatch(node);
4046
if (resultReg)
4047
outlinedConditions->addPostCondition(resultReg, TR::RealRegister::AssignAny);
4048
4049
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "checkCastStats/(%s)/HelperCall", comp->signature()),1,TR::DebugCounter::Undetermined);
4050
if(outlinedSlowPath)
4051
{
4052
TR::RegisterDependencyConditions *mergeConditions = NULL;
4053
if (startOOLLabel)
4054
mergeConditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(outlinedConditions, deps, cg);
4055
else
4056
mergeConditions = outlinedConditions;
4057
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, helperReturnOOLLabel, mergeConditions);
4058
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, doneOOLLabel);
4059
outlinedSlowPath->swapInstructionListsWithCompilation();
4060
}
4061
}
4062
if (resultReg)
4063
cg->stopUsingRegister(resultReg);
4064
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions);
4065
cg->stopUsingRegister(castClassReg);
4066
if (objClassReg)
4067
cg->stopUsingRegister(objClassReg);
4068
srm->stopUsingRegisters();
4069
cg->decReferenceCount(objectNode);
4070
cg->decReferenceCount(castClassNode);
4071
return NULL;
4072
}
4073
4074
///////////////////////////////////////////////////////////////////////////////////////
4075
// checkcastAndNULLCHKEvaluator - checkcastAndNULLCHK
4076
///////////////////////////////////////////////////////////////////////////////////////
4077
TR::Register *
4078
J9::Z::TreeEvaluator::checkcastAndNULLCHKEvaluator(TR::Node * node, TR::CodeGenerator * cg)
4079
{
4080
return checkcastEvaluator(node, cg);
4081
}
4082
4083
/** \brief Generates helper call sequence for all VMNew nodes.
4084
*
4085
* \param node
4086
* A new allocation node for which helper call is going to be generated
4087
*
4088
* \param cg
4089
* The code generator used to generate the instructions.
4090
*
4091
* \param doInlineAllocation
4092
* A boolean to notify if we have generated inline allocation sequence or not
4093
*
4094
* \param
4095
* A register to store return value from helper
4096
*
4097
* \return
4098
* A register that contains return value from helper.
4099
*
4100
* \details
4101
* Generates a helper call sequence for all new allocation nodes. It also handles special cases where we need to generate 64-bit extended children of call node
4102
*/
4103
TR::Register *
4104
J9::Z::TreeEvaluator::generateHelperCallForVMNewEvaluators(TR::Node *node, TR::CodeGenerator *cg, bool doInlineAllocation, TR::Register *resReg)
4105
{
4106
J9::Z::CHelperLinkage *helperLink = static_cast<J9::Z::CHelperLinkage*>(cg->getLinkage(TR_CHelper));
4107
TR::ILOpCodes opCode = node->getOpCodeValue();
4108
TR::Node *helperCallNode = TR::Node::createWithSymRef(node, TR::acall, (opCode == TR::New || opCode == TR::variableNew) ? 1 : 2, node->getSymbolReference());
4109
TR::Node *firstChild = node->getFirstChild();
4110
if (!(opCode == TR::New || opCode == TR::variableNew))
4111
{
4112
// For 64 bit target we need to make sure we use whole 64 bit register even for loading integers as helper expects arguments like that
4113
// For these scenarios where children of original node is 32-bit we generate a following helper call node
4114
// acall
4115
// #IF (firstChild ->iconst || iRegLoad ) && 64-bit platform
4116
// -> i2l
4117
// -> firstChild
4118
// #ELSE
4119
// ->firstChild
4120
// #ENDIF
4121
// #IF (secondChild -> iconst || iRegLoad) && 64-bit platform
4122
// -> i2l
4123
// -> secondChild
4124
// #ELSE
4125
// ->secondChild
4126
// #ENDIF
4127
// If we generate i2l node, we need to artificially set reference count of node to 1.
4128
// After helper call is generated we decrease reference count of this node so that a register will be marked dead for RA.
4129
TR::Node *secondChild = node->getSecondChild();
4130
if (cg->comp()->target().is64Bit())
4131
{
4132
if (firstChild->getOpCode().isLoadConst() || firstChild->getOpCodeValue() == TR::iRegLoad)
4133
{
4134
firstChild = TR::Node::create(TR::i2l, 1, firstChild);
4135
firstChild->setReferenceCount(1);
4136
}
4137
if (secondChild->getOpCode().isLoadConst() || secondChild->getOpCodeValue() == TR::iRegLoad)
4138
{
4139
secondChild = TR::Node::create(TR::i2l, 1, secondChild);
4140
secondChild->setReferenceCount(1);
4141
}
4142
}
4143
helperCallNode->setChild(1, secondChild);
4144
}
4145
helperCallNode->setChild(0, firstChild);
4146
resReg = helperLink->buildDirectDispatch(helperCallNode, resReg);
4147
for (auto i=0; i < helperCallNode->getNumChildren(); i++)
4148
{
4149
if (helperCallNode->getChild(i)->getOpCodeValue() == TR::i2l)
4150
cg->decReferenceCount(helperCallNode->getChild(i));
4151
}
4152
// For some cases, we can not generate inline allocation sequence such as variableNew*. In these cases only helper call is generated.
4153
// So for these cases we need to decrease reference count of node here.
4154
if (!doInlineAllocation)
4155
{
4156
node->setRegister(resReg);
4157
for (auto i=0; i<node->getNumChildren(); i++)
4158
cg->decReferenceCount(node->getChild(i));
4159
}
4160
return resReg;
4161
}
4162
4163
///////////////////////////////////////////////////////////////////////////////////////
4164
// newObjectEvaluator: new symref is the class object
4165
///////////////////////////////////////////////////////////////////////////////////////
4166
TR::Register *
4167
J9::Z::TreeEvaluator::newObjectEvaluator(TR::Node * node, TR::CodeGenerator * cg)
4168
{
4169
TR::Compilation* comp = cg->comp();
4170
if (cg->comp()->suppressAllocationInlining() ||
4171
TR::TreeEvaluator::requireHelperCallValueTypeAllocation(node, cg))
4172
return generateHelperCallForVMNewEvaluators(node, cg);
4173
else
4174
return TR::TreeEvaluator::VMnewEvaluator(node, cg);
4175
}
4176
4177
///////////////////////////////////////////////////////////////////////////////////////
4178
// newArrayEvaluator: new array of primitives
4179
///////////////////////////////////////////////////////////////////////////////////////
4180
TR::Register *
4181
J9::Z::TreeEvaluator::newArrayEvaluator(TR::Node * node, TR::CodeGenerator * cg)
4182
{
4183
if (cg->comp()->suppressAllocationInlining())
4184
return generateHelperCallForVMNewEvaluators(node, cg);
4185
else
4186
return TR::TreeEvaluator::VMnewEvaluator(node, cg);
4187
}
4188
4189
///////////////////////////////////////////////////////////////////////////////////////
4190
// newArrayEvaluator: new array of objects
4191
///////////////////////////////////////////////////////////////////////////////////////
4192
TR::Register *
4193
J9::Z::TreeEvaluator::anewArrayEvaluator(TR::Node * node, TR::CodeGenerator * cg)
4194
{
4195
if (cg->comp()->suppressAllocationInlining())
4196
return generateHelperCallForVMNewEvaluators(node, cg);
4197
else
4198
return TR::TreeEvaluator::VMnewEvaluator(node, cg);
4199
}
4200
4201
///////////////////////////////////////////////////////////////////////////////////////
4202
// multianewArrayEvaluator: multi-dimensional new array of objects
4203
///////////////////////////////////////////////////////////////////////////////////////
4204
TR::Register *
4205
J9::Z::TreeEvaluator::multianewArrayEvaluator(TR::Node * node, TR::CodeGenerator * cg)
4206
{
4207
#define iComment(str) if (compDebug) compDebug->addInstructionComment(cursor, (const_cast<char*>(str)));
4208
TR::Compilation *comp = cg->comp();
4209
TR_Debug *compDebug = comp->getDebug();
4210
TR_ASSERT_FATAL(comp->target().is64Bit(), "multianewArrayEvaluator is only supported on 64-bit JVMs!");
4211
TR_J9VMBase *fej9 = static_cast<TR_J9VMBase *>(comp->fe());
4212
TR::Register *targetReg = cg->allocateRegister();
4213
TR::Instruction *cursor = NULL;
4214
4215
TR::Node *firstChild = node->getFirstChild();
4216
TR::Node *secondChild = node->getSecondChild();
4217
TR::Node *thirdChild = node->getThirdChild();
4218
4219
TR::LabelSymbol *cFlowRegionStart = generateLabelSymbol(cg);
4220
TR::LabelSymbol *nonZeroFirstDimLabel = generateLabelSymbol(cg);
4221
TR::LabelSymbol *cFlowRegionEnd = generateLabelSymbol(cg);
4222
TR::LabelSymbol *oolFailLabel = generateLabelSymbol(cg);
4223
4224
// oolJumpLabel is a common point that all branches will jump to. From this label, we branch to OOL code.
4225
// We do this instead of jumping directly to OOL code from mainline because the RA can only handle the case where there's
4226
// a single jump point to OOL code.
4227
TR::LabelSymbol *oolJumpLabel = generateLabelSymbol(cg);
4228
4229
cFlowRegionStart->setStartInternalControlFlow();
4230
cFlowRegionEnd->setEndInternalControlFlow();
4231
4232
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
4233
4234
TR::Register *dimsPtrReg = cg->evaluate(firstChild);
4235
TR::Register *dimReg = cg->evaluate(secondChild);
4236
TR::Register *classReg = cg->evaluate(thirdChild);
4237
4238
// In the mainline, first load the first and second dimensions' lengths into registers.
4239
TR::Register *firstDimLenReg = cg->allocateRegister();
4240
cursor = generateRXInstruction(cg, TR::InstOpCode::LGF, node, firstDimLenReg, generateS390MemoryReference(dimsPtrReg, 4, cg));
4241
iComment("Load 1st dim length.");
4242
4243
TR::Register *secondDimLenReg = cg->allocateRegister();
4244
cursor = generateRXInstruction(cg, TR::InstOpCode::L, node, secondDimLenReg, generateS390MemoryReference(dimsPtrReg, 0, cg));
4245
iComment("Load 2nd dim length.");
4246
4247
// Check to see if second dimension is indeed 0. If yes, then proceed to handle the case here. Otherwise jump to OOL code.
4248
cursor = generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CL, node, secondDimLenReg, 0, TR::InstOpCode::COND_BNE, oolJumpLabel, false);
4249
iComment("if 2nd dim is 0, we handle it here. Else, jump to oolJumpLabel.");
4250
4251
// Now check to see if first dimension is also 0. If yes, continue below to handle the case when length for both dimensions is 0. Otherwise jump to nonZeroFirstDimLabel.
4252
cursor = generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CL, node, firstDimLenReg, 0, TR::InstOpCode::COND_BNE, nonZeroFirstDimLabel, false);
4253
iComment("if 1st dim is also 0, we handle it here. Else, jump to nonZeroFirstDimLabel.");
4254
4255
// First dimension zero, so only allocate 1 zero-length object array
4256
TR::Register *vmThreadReg = cg->getMethodMetaDataRealRegister();
4257
generateRXInstruction(cg, TR::InstOpCode::LG, node, targetReg, generateS390MemoryReference(vmThreadReg, offsetof(J9VMThread, heapAlloc), cg));
4258
4259
// Take into account alignment requirements for the size of the zero-length array header
4260
int32_t zeroArraySizeAligned = OMR::align(TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), TR::Compiler->om.getObjectAlignmentInBytes());
4261
4262
// Branch to OOL if there's not enough space for an array of size 0.
4263
TR::Register *temp1Reg = cg->allocateRegister();
4264
if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z196))
4265
{
4266
generateRIEInstruction(cg, TR::InstOpCode::AGHIK, node, temp1Reg, targetReg, zeroArraySizeAligned);
4267
}
4268
else
4269
{
4270
generateRRInstruction(cg, TR::InstOpCode::LGR, node, temp1Reg, targetReg);
4271
generateRILInstruction(cg, TR::InstOpCode::AGFI, node, temp1Reg, zeroArraySizeAligned);
4272
}
4273
4274
generateRXInstruction(cg, TR::InstOpCode::CLG, node, temp1Reg, generateS390MemoryReference(vmThreadReg, offsetof(J9VMThread, heapTop), cg));
4275
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BH, node, oolJumpLabel);
4276
iComment("Branch to oolJumpLabel if there isn't enough space for a 0 size array.");
4277
4278
// If there's enough space, then we can continue to allocate.
4279
generateRXInstruction(cg, TR::InstOpCode::STG, node, temp1Reg, generateS390MemoryReference(vmThreadReg, offsetof(J9VMThread, heapAlloc), cg));
4280
4281
bool use64BitClasses = comp->target().is64Bit() && !TR::Compiler->om.generateCompressedObjectHeaders();
4282
4283
// Init class field, then jump to end of ICF
4284
generateRXInstruction(cg, use64BitClasses ? TR::InstOpCode::STG : TR::InstOpCode::ST, node, classReg, generateS390MemoryReference(targetReg, TR::Compiler->om.offsetOfObjectVftField(), cg));
4285
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);
4286
iComment("Init class field and jump to end of ICF.");
4287
4288
// We end up in this region of the ICF if the first dimension is non-zero and the second dimension is zero.
4289
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, nonZeroFirstDimLabel);
4290
iComment("nonZeroFirstDimLabel, 2nd dim length is 0.");
4291
4292
TR::Register *componentClassReg = cg->allocateRegister();
4293
generateRXInstruction(cg, TR::InstOpCode::LG, node, componentClassReg, generateS390MemoryReference(classReg, offsetof(J9ArrayClass, componentType), cg));
4294
4295
// Calculate maximum allowable object size in elements and jump to OOL if firstDimLenReg is higher than it.
4296
int32_t elementSize = TR::Compiler->om.sizeofReferenceField();
4297
uintptr_t maxObjectSize = cg->getMaxObjectSizeGuaranteedNotToOverflow();
4298
uintptr_t maxObjectSizeInElements = maxObjectSize / elementSize;
4299
cursor = generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CL, node, firstDimLenReg, static_cast<int32_t>(maxObjectSizeInElements), TR::InstOpCode::COND_BHR, oolJumpLabel, false);
4300
iComment("Jump to oolJumpLabel if 1st dim len > the num of elements a block can fit.");
4301
4302
// Now check to see if we have enough space to do the allocation. If not then jump to OOL code.
4303
int32_t elementSizeAligned = OMR::align(elementSize, TR::Compiler->om.getObjectAlignmentInBytes());
4304
int32_t alignmentCompensation = (elementSize == elementSizeAligned) ? 0 : elementSizeAligned - 1;
4305
static const uint8_t multiplierToStrideMap[] = {0, 0, 1, 0, 2, 0, 0, 0, 3};
4306
TR_ASSERT_FATAL(elementSize <= 8, "multianewArrayEvaluator - elementSize cannot be greater than 8!");
4307
generateRSInstruction(cg, TR::InstOpCode::SLLG, node, temp1Reg, firstDimLenReg, multiplierToStrideMap[elementSize]);
4308
generateRILInstruction(cg, TR::InstOpCode::AGFI, node, temp1Reg, static_cast<int32_t>(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()) + alignmentCompensation);
4309
4310
if (alignmentCompensation != 0)
4311
{
4312
generateRILInstruction(cg, TR::InstOpCode::NILF, node, temp1Reg, -elementSizeAligned);
4313
}
4314
4315
TR::Register *temp2Reg = cg->allocateRegister();
4316
generateRRInstruction(cg, TR::InstOpCode::LGR, node, temp2Reg, firstDimLenReg);
4317
generateRILInstruction(cg, TR::InstOpCode::MSGFI, node, temp2Reg, zeroArraySizeAligned);
4318
4319
cursor = generateRRInstruction(cg, TR::InstOpCode::AGR, node, temp2Reg, temp1Reg);
4320
iComment("Calculates (firstDimLen * zeroArraySizeAligned) + (arrayStrideInBytes + arrayHeaderSize)");
4321
4322
generateRXInstruction(cg, TR::InstOpCode::LG, node, targetReg, generateS390MemoryReference(vmThreadReg, offsetof(J9VMThread, heapAlloc), cg));
4323
generateRRInstruction(cg, TR::InstOpCode::AGR, node, temp2Reg, targetReg);
4324
generateRXInstruction(cg, TR::InstOpCode::CLG, node, temp2Reg, generateS390MemoryReference(vmThreadReg, offsetof(J9VMThread, heapTop), cg));
4325
4326
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BH, node, oolJumpLabel);
4327
iComment("Branch to oolJumpLabel if we don't have enough space for both 1st and 2nd dim.");
4328
4329
// We have enough space, so proceed with the allocation.
4330
generateRXInstruction(cg, TR::InstOpCode::STG, node, temp2Reg, generateS390MemoryReference(vmThreadReg, offsetof(J9VMThread, heapAlloc), cg));
4331
4332
4333
// Init 1st dim array class and size fields.
4334
cursor = generateRXInstruction(cg, use64BitClasses ? TR::InstOpCode::STG : TR::InstOpCode::ST, node, classReg, generateS390MemoryReference(targetReg, TR::Compiler->om.offsetOfObjectVftField(), cg));
4335
iComment("Init 1st dim class field.");
4336
cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, firstDimLenReg, generateS390MemoryReference(targetReg, fej9->getOffsetOfContiguousArraySizeField(), cg));
4337
iComment("Init 1st dim size field.");
4338
// temp2 point to end of 1st dim array i.e. start of 2nd dim
4339
generateRRInstruction(cg, TR::InstOpCode::LGR, node, temp2Reg, targetReg);
4340
generateRRInstruction(cg, TR::InstOpCode::AGR, node, temp2Reg, temp1Reg);
4341
if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z196))
4342
{
4343
generateRIEInstruction(cg, TR::InstOpCode::AGHIK, node, temp1Reg, targetReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
4344
}
4345
else
4346
{
4347
generateRRInstruction(cg, TR::InstOpCode::LGR, node, temp1Reg, targetReg);
4348
generateRILInstruction(cg, TR::InstOpCode::AGFI, node, temp1Reg, static_cast<int32_t>(TR::Compiler->om.contiguousArrayHeaderSizeInBytes()));
4349
}
4350
4351
// Loop start
4352
TR::LabelSymbol *loopLabel = generateLabelSymbol(cg);
4353
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, loopLabel);
4354
iComment("loopLabel: init 2nd dim's class field.");
4355
4356
// Init 2nd dim element's class
4357
cursor = generateRXInstruction(cg, use64BitClasses ? TR::InstOpCode::STG : TR::InstOpCode::ST, node, componentClassReg, generateS390MemoryReference(temp2Reg, TR::Compiler->om.offsetOfObjectVftField(), cg));
4358
iComment("Init 2nd dim class field.");
4359
4360
// Store 2nd dim element into 1st dim array slot, compress temp2 if needed
4361
TR::Register *temp3Reg = cg->allocateRegister();
4362
if (comp->target().is64Bit() && comp->useCompressedPointers())
4363
{
4364
int32_t shiftAmount = TR::Compiler->om.compressedReferenceShift();
4365
generateRRInstruction(cg, TR::InstOpCode::LGR, node, temp3Reg, temp2Reg);
4366
if (shiftAmount != 0)
4367
{
4368
generateRSInstruction(cg, TR::InstOpCode::SRAG, node, temp3Reg, temp3Reg, shiftAmount);
4369
}
4370
generateRXInstruction(cg, TR::InstOpCode::ST, node, temp3Reg, generateS390MemoryReference(temp1Reg, 0, cg));
4371
}
4372
else
4373
{
4374
generateRXInstruction(cg, TR::InstOpCode::STG, node, temp2Reg, generateS390MemoryReference(temp1Reg, 0, cg));
4375
}
4376
4377
// Advance cursors temp1 and temp2. Then branch back or fall through if done.
4378
generateRIInstruction(cg, TR::InstOpCode::AGHI, node, temp2Reg, zeroArraySizeAligned);
4379
generateRIInstruction(cg, TR::InstOpCode::AGHI, node, temp1Reg, elementSize);
4380
4381
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, firstDimLenReg, 1);
4382
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CL, node, firstDimLenReg, 0, TR::InstOpCode::COND_BNE, loopLabel, false);
4383
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);
4384
4385
TR::RegisterDependencyConditions *dependencies = generateRegisterDependencyConditions(0,10,cg);
4386
dependencies->addPostCondition(dimReg, TR::RealRegister::AssignAny);
4387
dependencies->addPostCondition(secondDimLenReg, TR::RealRegister::AssignAny);
4388
dependencies->addPostCondition(firstDimLenReg, TR::RealRegister::AssignAny);
4389
dependencies->addPostCondition(targetReg, TR::RealRegister::AssignAny);
4390
dependencies->addPostCondition(dimsPtrReg, TR::RealRegister::AssignAny);
4391
dependencies->addPostCondition(temp1Reg, TR::RealRegister::AssignAny);
4392
dependencies->addPostCondition(classReg, TR::RealRegister::AssignAny);
4393
dependencies->addPostCondition(componentClassReg, TR::RealRegister::AssignAny);
4394
dependencies->addPostCondition(temp2Reg, TR::RealRegister::AssignAny);
4395
dependencies->addPostCondition(temp3Reg, TR::RealRegister::AssignAny);
4396
4397
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, oolJumpLabel);
4398
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, oolFailLabel);
4399
4400
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
4401
4402
TR::Register *targetRegisterFinal = cg->allocateCollectedReferenceRegister();
4403
generateRRInstruction(cg, TR::InstOpCode::LGR, node, targetRegisterFinal, targetReg);
4404
4405
// Generate the OOL code before final bookkeeping.
4406
TR_S390OutOfLineCodeSection *outlinedSlowPath = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(oolFailLabel, cFlowRegionEnd, cg);
4407
cg->getS390OutOfLineCodeSectionList().push_front(outlinedSlowPath);
4408
outlinedSlowPath->swapInstructionListsWithCompilation();
4409
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, oolFailLabel);
4410
4411
TR::ILOpCodes opCode = node->getOpCodeValue();
4412
TR::Node::recreate(node, TR::acall);
4413
TR::Register *targetReg2 = TR::TreeEvaluator::performCall(node, false, cg);
4414
TR::Node::recreate(node, opCode);
4415
4416
generateRRInstruction(cg, TR::InstOpCode::LGR, node, targetReg, targetReg2);
4417
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);
4418
outlinedSlowPath->swapInstructionListsWithCompilation();
4419
4420
// Note: We don't decrement the ref count node's children here (i.e. cg->decReferenceCount(node->getFirstChild())) because it is done by the performCall in the OOL code above.
4421
// Doing so here would end up double decrementing the children nodes' ref count.
4422
4423
cg->stopUsingRegister(targetReg);
4424
cg->stopUsingRegister(firstDimLenReg);
4425
cg->stopUsingRegister(secondDimLenReg);
4426
cg->stopUsingRegister(temp1Reg);
4427
cg->stopUsingRegister(temp2Reg);
4428
cg->stopUsingRegister(temp3Reg);
4429
cg->stopUsingRegister(componentClassReg);
4430
4431
node->setRegister(targetRegisterFinal);
4432
return targetRegisterFinal;
4433
#undef iComment
4434
}
4435
4436
TR::Register *
4437
J9::Z::TreeEvaluator::arraylengthEvaluator(TR::Node *node, TR::CodeGenerator *cg)
4438
{
4439
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
4440
TR::Register *objectReg = cg->evaluate(node->getFirstChild());
4441
TR::Register *lengthReg = cg->allocateRegister();
4442
4443
TR::MemoryReference *contiguousArraySizeMR = generateS390MemoryReference(objectReg, fej9->getOffsetOfContiguousArraySizeField(), cg);
4444
TR::MemoryReference *discontiguousArraySizeMR = generateS390MemoryReference(objectReg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg);
4445
4446
// Load the Contiguous Array Size and test if it's zero.
4447
generateRSInstruction(cg, TR::InstOpCode::ICM, node, lengthReg, (uint32_t) 0xF, contiguousArraySizeMR);
4448
4449
if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z196))
4450
{
4451
// Conditionally load from discontiguousArraySize if contiguousArraySize is zero
4452
generateRSInstruction(cg, TR::InstOpCode::LOC, node, lengthReg, 0x8, discontiguousArraySizeMR);
4453
}
4454
else
4455
{
4456
TR::LabelSymbol * oolStartLabel = generateLabelSymbol(cg);
4457
TR::LabelSymbol * oolReturnLabel = generateLabelSymbol(cg);
4458
4459
// Branch to OOL if contiguous array size is zero
4460
TR::Instruction * temp = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, oolStartLabel);
4461
4462
TR_S390OutOfLineCodeSection *outlinedDiscontigPath = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(oolStartLabel,oolReturnLabel,cg);
4463
cg->getS390OutOfLineCodeSectionList().push_front(outlinedDiscontigPath);
4464
outlinedDiscontigPath->swapInstructionListsWithCompilation();
4465
4466
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, oolStartLabel);
4467
4468
if (cg->getDebug())
4469
{
4470
cg->getDebug()->addInstructionComment(temp, "Start of OOL arraylength sequence");
4471
}
4472
4473
// Load from discontiguousArraySize if contiguousArraySize is zero
4474
generateRXInstruction(cg, TR::InstOpCode::L, node, lengthReg, discontiguousArraySizeMR);
4475
4476
TR::Instruction* returnInsturction = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, oolReturnLabel);
4477
4478
if (cg->getDebug())
4479
{
4480
cg->getDebug()->addInstructionComment(returnInsturction, "End of OOL arraylength sequence");
4481
}
4482
4483
outlinedDiscontigPath->swapInstructionListsWithCompilation();
4484
4485
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, oolReturnLabel);
4486
}
4487
4488
cg->decReferenceCount(node->getFirstChild());
4489
node->setRegister(lengthReg);
4490
return lengthReg;
4491
}
4492
4493
4494
///////////////////////////////////////////////////////////////////////////////////////
4495
// DIVCHKEvaluator - Divide by zero check. child 1 is the divide. Symbolref indicates
4496
// failure action/destination
4497
///////////////////////////////////////////////////////////////////////////////////////
4498
TR::Register *
4499
J9::Z::TreeEvaluator::DIVCHKEvaluator(TR::Node * node, TR::CodeGenerator * cg)
4500
{
4501
TR::Compilation *comp = cg->comp();
4502
TR::Node * secondChild = node->getFirstChild()->getSecondChild();
4503
TR::DataType dtype = secondChild->getType();
4504
bool constDivisor = secondChild->getOpCode().isLoadConst();
4505
TR::Snippet * snippet;
4506
TR::LabelSymbol * snippetLabel = generateLabelSymbol(cg);
4507
TR::Instruction * cursor = NULL; // Point to instruction that will assign targetReg
4508
TR::MemoryReference * divisorMr = NULL;
4509
4510
bool divisorIsFieldAccess = false;
4511
bool willUseIndexAndBaseReg = false;
4512
if (secondChild->getNumChildren() != 0 &&
4513
secondChild->getOpCode().isMemoryReference() &&
4514
secondChild->getReferenceCount() == 1 &&
4515
secondChild->getRegister() == NULL)
4516
{
4517
divisorIsFieldAccess = (secondChild->getFirstChild()->getOpCodeValue() != TR::aladd &&
4518
secondChild->getFirstChild()->getOpCodeValue() != TR::aiadd);
4519
// Defect 151061
4520
// The following comes from com/ibm/oti/vm/BootstrapClassLoader.addPackage
4521
// in hello world with a compressed pointers build
4522
//
4523
// [0x0000020007522994] ( 0) DIVCHK #11[0x000002000752293c] Method[jitThrowArithmeticException]
4524
// [0x0000020007522904] ( 2) irem <flags:"0x8000" (simpleDivCheck )/>
4525
// [0x000002000752262c] ( 1) iand <flags:"0x1100" (X>=0 cannotOverflow )/>
4526
// ( 3) ==>icall at [0x00000200075223f8] (in GPR_0049) <flags:"0x30" (arithmeticPreference invalid8BitGlobalRegister)/>
4527
// [0x00000200075225f4] ( 1) iconst 0x7fffffff <flags:"0x104" (X!=0 X>=0 )/>
4528
// [0x00000200075228cc] ( 1) iiload #251[0x000002000745c940]+12 Shadow[<array-size>] <flags:"0x1100" (X>=0 cannotOverflow )/>
4529
// [0x00000200074665d0] ( 1) l2a
4530
// [0x000002000745c908] ( 1) lshl <flags:"0x800" (compressionSequence )/>
4531
// ( 2) ==>iu2l at [0x000002000745c8d0] (in GPR_0072) <flags:"0x4" (X!=0 )/>
4532
// [0x000002000745c860] ( 2) iconst 1
4533
//
4534
// When generating a memref, because of the shift=1, the memref will use the same register
4535
// for the base and index register in order to avoid generating a shift instruction
4536
// But CLGHSI cannot take a memref which uses the index reg
4537
4538
willUseIndexAndBaseReg = secondChild->getFirstChild() != NULL &&
4539
secondChild->getFirstChild()->getOpCodeValue() == TR::l2a &&
4540
secondChild->getFirstChild()->getFirstChild() != NULL &&
4541
secondChild->getFirstChild()->getFirstChild()->chkCompressionSequence() &&
4542
TR::Compiler->om.compressedReferenceShiftOffset() == 1;
4543
}
4544
4545
bool disableS390CompareAndTrap = comp->getOption(TR_DisableTraps);
4546
4547
// Try to compare directly to memory if the child is a field access (load with no index reg)
4548
if (divisorIsFieldAccess &&
4549
!willUseIndexAndBaseReg &&
4550
(node->getFirstChild()->getOpCodeValue() == TR::idiv ||
4551
node->getFirstChild()->getOpCodeValue() == TR::irem))
4552
{
4553
divisorMr = TR::MemoryReference::create(cg, secondChild);
4554
4555
TR::InstOpCode::Mnemonic op = (dtype.isInt64())? TR::InstOpCode::CLGHSI : TR::InstOpCode::CLFHSI;
4556
generateSILInstruction(cg, op, node, divisorMr, 0);
4557
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, snippetLabel);
4558
4559
cursor->setExceptBranchOp();
4560
4561
TR::Snippet * snippet = new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference());
4562
cg->addSnippet(snippet);
4563
}
4564
else if (cg->getHasResumableTrapHandler() && !disableS390CompareAndTrap)
4565
{
4566
TR::InstOpCode::Mnemonic op = (dtype.isInt64())? TR::InstOpCode::CLGIT : TR::InstOpCode::CLFIT;
4567
TR::Register * srcReg = cg->evaluate(secondChild);
4568
TR::S390RIEInstruction* cursor =
4569
new (cg->trHeapMemory()) TR::S390RIEInstruction(op, node, srcReg, (int16_t)0, TR::InstOpCode::COND_BE, cg);
4570
cursor->setExceptBranchOp();
4571
cg->setCanExceptByTrap(true);
4572
cursor->setNeedsGCMap(0x0000FFFF);
4573
if (cg->comp()->target().isZOS())
4574
{
4575
killRegisterIfNotLocked(cg, TR::RealRegister::GPR4, cursor);
4576
}
4577
}
4578
// z9 legacy instructions
4579
else
4580
{
4581
// Generate explicit div by 0 test and snippet to jump to
4582
if (!constDivisor || (dtype.isInt32() && secondChild->getInt() == 0) || (dtype.isInt64() && secondChild->getLongInt() == 0))
4583
{
4584
// if divisor is a constant of zero, branch to the snippet to throw exception
4585
if (constDivisor)
4586
{
4587
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, snippetLabel);
4588
cursor->setExceptBranchOp();
4589
}
4590
else
4591
{
4592
// if divisor is non-constant, need explicit test for 0
4593
TR::Register * srcReg;
4594
srcReg = cg->evaluate(secondChild);
4595
TR::InstOpCode::Mnemonic op = dtype.isInt64() ? TR::InstOpCode::LTGR : TR::InstOpCode::LTR;
4596
generateRRInstruction(cg, op, node, srcReg, srcReg);
4597
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, snippetLabel);
4598
cursor->setExceptBranchOp();
4599
}
4600
TR::Snippet * snippet = new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference());
4601
cg->addSnippet(snippet);
4602
}
4603
}
4604
4605
if (divisorMr)
4606
{
4607
switch (node->getFirstChild()->getOpCodeValue())
4608
{
4609
case TR::idiv:
4610
iDivRemGenericEvaluator(node->getFirstChild(), cg, true, divisorMr);
4611
break;
4612
case TR::irem:
4613
iDivRemGenericEvaluator(node->getFirstChild(), cg, false, divisorMr);
4614
break;
4615
}
4616
divisorMr->stopUsingMemRefRegister(cg);
4617
}
4618
else
4619
{
4620
cg->evaluate(node->getFirstChild());
4621
}
4622
cg->decReferenceCount(node->getFirstChild());
4623
4624
return NULL;
4625
}
4626
4627
4628
///////////////////////////////////////////////////////////////////////////////////////
4629
// BNDCHKEvaluator - Array bounds check, checks that child 1 > child 2 >= 0
4630
// (child 1 is bound, 2 is index). Symbolref indicates failure action/destination
4631
///////////////////////////////////////////////////////////////////////////////////////
4632
TR::Register *
4633
J9::Z::TreeEvaluator::BNDCHKEvaluator(TR::Node * node, TR::CodeGenerator * cg)
4634
{
4635
TR::Node * firstChild = node->getFirstChild();
4636
TR::Node * secondChild = node->getSecondChild();
4637
TR::LabelSymbol * boundCheckFailureLabel = generateLabelSymbol(cg);
4638
TR::Snippet * snippet;
4639
bool swap;
4640
TR::Instruction* cursor = NULL;
4641
TR::Compilation *comp = cg->comp();
4642
4643
TR::Register * arrayLengthReg = firstChild->getRegister();
4644
TR::Register * arrayIndexReg = secondChild->getRegister();
4645
4646
// skip l2i. Grab the low order register if it's a register pair.
4647
bool skipArrayLengthReg = false;
4648
bool skipArrayIndexReg = false;
4649
if (firstChild->getOpCodeValue() == TR::l2i &&
4650
firstChild->getReferenceCount() == 1 &&
4651
firstChild->getRegister() == NULL &&
4652
firstChild->getFirstChild() &&
4653
firstChild->getFirstChild()->getRegister())
4654
{
4655
arrayLengthReg = firstChild->getFirstChild()->getRegister();
4656
skipArrayLengthReg = true;
4657
if(arrayLengthReg->getRegisterPair())
4658
{
4659
arrayLengthReg = arrayLengthReg->getRegisterPair()->getLowOrder();
4660
}
4661
}
4662
4663
if (secondChild->getOpCodeValue() == TR::l2i &&
4664
secondChild->getReferenceCount() == 1 &&
4665
secondChild->getRegister() == NULL &&
4666
secondChild->getFirstChild() &&
4667
secondChild->getFirstChild()->getRegister())
4668
{
4669
arrayIndexReg = secondChild->getFirstChild()->getRegister();
4670
skipArrayIndexReg = true;
4671
if(arrayIndexReg->getRegisterPair())
4672
{
4673
arrayIndexReg = arrayIndexReg->getRegisterPair()->getLowOrder();
4674
}
4675
}
4676
4677
// use CLRT (RR) if possible
4678
bool useS390CompareAndTrap = !comp->getOption(TR_DisableTraps) && cg->getHasResumableTrapHandler();
4679
4680
if (useS390CompareAndTrap &&
4681
(arrayIndexReg != NULL && arrayLengthReg != NULL))
4682
{
4683
//arrayIndex/arrayLength are max uint32, so 31 bit logical compare even in 64 bit JIT
4684
// The optimizer does not always fold away the BNDCHK if the index is a negative constant.
4685
// Explicit index<0 check is not needed here because negative array index is interpreted
4686
// as a large positive by the CLRT instruction.
4687
4688
// ** Generate a NOP LR R0,R0. The signal handler has to walk backwards to pattern match
4689
// the trap instructions. All trap instructions besides CRT/CLRT are 6-bytes in length.
4690
// Insert 2-byte NOP in front of the 4-byte CLRT to ensure we do not mismatch accidentally.
4691
cursor = new (cg->trHeapMemory()) TR::S390NOPInstruction(TR::InstOpCode::NOP, 2, node, cg);
4692
4693
TR::Instruction* cursor = generateRRFInstruction(cg, TR::InstOpCode::CLRT,
4694
node, arrayIndexReg, arrayLengthReg,
4695
getMaskForBranchCondition(TR::InstOpCode::COND_BNLR), true);
4696
cursor->setExceptBranchOp();
4697
cursor->setNeedsGCMap(0x0000FFFF);
4698
cg->setCanExceptByTrap(true);
4699
4700
if (cg->comp()->target().isZOS()) killRegisterIfNotLocked(cg, TR::RealRegister::GPR4, cursor);
4701
4702
if (skipArrayLengthReg)
4703
{
4704
cg->decReferenceCount(firstChild->getFirstChild());
4705
}
4706
if (skipArrayIndexReg)
4707
{
4708
cg->decReferenceCount(secondChild->getFirstChild());
4709
}
4710
cg->decReferenceCount(firstChild);
4711
cg->decReferenceCount(secondChild);
4712
4713
return NULL;
4714
}
4715
else
4716
{
4717
// Perform a bound check.
4718
//
4719
// Value propagation or profile-directed optimization may have determined
4720
// that the array bound is a constant, and lowered TR::arraylength into an
4721
// iconst. In this case, make sure that the constant is the second child.
4722
//
4723
// Only type of scenario where first/second children are const is if we need it to force a branch
4724
// otherwise simplifier should have cleaned it up
4725
4726
/**
4727
* Both Length and Index are constants
4728
*/
4729
if (firstChild->getOpCode().isLoadConst() && secondChild->getOpCode().isLoadConst())
4730
{
4731
int64_t secondChildConstValue = secondChild->get64bitIntegralValue();
4732
if (firstChild->getInt() > secondChildConstValue && secondChildConstValue >= 0)
4733
{
4734
//nothing to do since inside limit
4735
}
4736
else
4737
{
4738
// We must evaluate the non-const child if it has not been evaluated
4739
//
4740
if (!firstChild->getOpCode().isLoadConst() && firstChild->getRegister() == NULL)
4741
{
4742
cg->evaluate(firstChild);
4743
}
4744
4745
// Check will always fail, just jump to failure snippet
4746
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, boundCheckFailureLabel);
4747
cg->addSnippet(new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, boundCheckFailureLabel, node->getSymbolReference()));
4748
cursor->setExceptBranchOp();
4749
}
4750
cg->decReferenceCount(firstChild);
4751
cg->decReferenceCount(secondChild);
4752
return NULL;
4753
}
4754
4755
/**
4756
* One of Length and Index is a constant
4757
*/
4758
bool isForward = false;
4759
TR::Node * constNode = NULL;
4760
TR::Node * nonConstNode = NULL;
4761
bool oneConst = false; // exactly one child is a constant
4762
TR::Node * skippedL2iNode = NULL;
4763
4764
if (firstChild->getOpCode().isLoadConst() || secondChild->getOpCode().isLoadConst())
4765
{
4766
oneConst = true;
4767
if (firstChild->getOpCode().isLoadConst())
4768
{
4769
isForward = false;
4770
constNode = firstChild;
4771
nonConstNode = secondChild;
4772
}
4773
else
4774
{
4775
isForward = true;
4776
constNode = secondChild;
4777
nonConstNode = firstChild;
4778
}
4779
4780
skippedL2iNode = NULL;
4781
if (nonConstNode->getOpCodeValue() == TR::l2i &&
4782
nonConstNode->getRegister() == NULL &&
4783
nonConstNode->getReferenceCount() ==1)
4784
{
4785
skippedL2iNode = nonConstNode;
4786
nonConstNode = nonConstNode->getFirstChild();
4787
}
4788
}
4789
4790
int64_t value = -1;
4791
int32_t constValue = -1;
4792
if (constNode)
4793
{
4794
value = getIntegralValue(constNode);
4795
constValue = constNode->getInt();
4796
}
4797
4798
// always fail the BNDCHK if the index is negative.
4799
bool alwaysFailBNDCHK = oneConst && (constValue < 0) && isForward;
4800
4801
if (oneConst &&
4802
constValue <= MAX_UNSIGNED_IMMEDIATE_VAL && // CLFIT takes 16bit unsigned immediate
4803
(constValue & 0xFF00) != 0xB900 && // signal handler might get confused with CLR (opcode 0xB973), etc
4804
useS390CompareAndTrap)
4805
{
4806
// Any constValue <= MAX_UNSIGNED_IMMEDIATE_VAL is taken here.
4807
// The length is assumed to be non-negative and is within [0, max_uint32] range.
4808
// The index can be negative or [0, max_uint32]. An unconditional branch is generated if it's negative.
4809
// No need to use unconditional BRC because it requires a proceeding NO-OP instruction for proper signal
4810
// handling. And NOP+BRC is of the same length as CLFIT.
4811
TR::Register * testRegister = cg->evaluate(nonConstNode);
4812
TR::InstOpCode::S390BranchCondition bc = alwaysFailBNDCHK ? TR::InstOpCode::COND_BRC :
4813
isForward ? TR::InstOpCode::COND_BNH :
4814
TR::InstOpCode::COND_BNL ;
4815
4816
TR::Instruction* cursor = generateRIEInstruction(cg, TR::InstOpCode::CLFIT,
4817
node, testRegister, (int16_t)constValue, bc);
4818
4819
4820
cursor->setExceptBranchOp();
4821
cg->setCanExceptByTrap(true);
4822
cursor->setNeedsGCMap(0x0000FFFF);
4823
4824
if (cg->comp()->target().isZOS())
4825
{
4826
killRegisterIfNotLocked(cg, TR::RealRegister::GPR4, cursor);
4827
}
4828
4829
if (skippedL2iNode)
4830
{
4831
cg->decReferenceCount(skippedL2iNode);
4832
}
4833
cg->decReferenceCount(constNode);
4834
cg->decReferenceCount(nonConstNode);
4835
4836
return NULL;
4837
}
4838
else if (useS390CompareAndTrap &&
4839
((firstChild->getOpCode().isLoadVar() &&
4840
firstChild->getReferenceCount() == 1 &&
4841
firstChild->getRegister() == NULL) ||
4842
(secondChild->getOpCode().isLoadVar() &&
4843
secondChild->getReferenceCount() == 1 &&
4844
secondChild->getRegister() == NULL)) &&
4845
cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_ZEC12))
4846
{
4847
// Assume 1st child is the memory operand.
4848
TR::Node * memChild = firstChild;
4849
TR::Node * regChild = secondChild;
4850
TR::InstOpCode::S390BranchCondition compareCondition = TR::InstOpCode::COND_BNL;
4851
4852
// Check if first child is really the memory operand
4853
if (!(firstChild->getOpCode().isLoadVar() &&
4854
firstChild->getReferenceCount() == 1 &&
4855
firstChild->getRegister() == NULL))
4856
{
4857
// Nope... the second child is!
4858
memChild = secondChild;
4859
regChild = firstChild;
4860
compareCondition = TR::InstOpCode::COND_BNH;
4861
}
4862
4863
// Ensure register operand is evaluated into register
4864
if (regChild->getRegister() == NULL)
4865
cg->evaluate(regChild);
4866
4867
TR::InstOpCode::Mnemonic opCode = (regChild->getDataType()==TR::Int64) ? TR::InstOpCode::CLGT :
4868
TR::InstOpCode::CLT;
4869
cursor = generateRSInstruction(cg, opCode,
4870
node, regChild->getRegister(),
4871
getMaskForBranchCondition(compareCondition),
4872
TR::MemoryReference::create(cg, memChild));
4873
cursor->setExceptBranchOp();
4874
cg->setCanExceptByTrap(true);
4875
cursor->setNeedsGCMap(0x0000FFFF);
4876
4877
if (cg->comp()->target().isZOS())
4878
killRegisterIfNotLocked(cg, TR::RealRegister::GPR4, cursor);
4879
4880
cg->decReferenceCount(memChild);
4881
cg->decReferenceCount(regChild);
4882
4883
return NULL;
4884
}
4885
else if (oneConst)
4886
{
4887
TR::Register * testRegister = cg->evaluate(nonConstNode);
4888
TR::InstOpCode::S390BranchCondition bc = alwaysFailBNDCHK ? TR::InstOpCode::COND_BRC :
4889
isForward ? TR::InstOpCode::COND_BNH :
4890
TR::InstOpCode::COND_BNL;
4891
TR::Instruction* cursor = NULL;
4892
4893
if (alwaysFailBNDCHK)
4894
{
4895
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, bc, node, boundCheckFailureLabel);
4896
}
4897
else
4898
{
4899
cursor = generateS390CompareAndBranchInstruction(cg,
4900
TR::InstOpCode::CL,
4901
node,
4902
testRegister,
4903
constValue,
4904
bc,
4905
boundCheckFailureLabel, false, true);
4906
}
4907
4908
cursor->setExceptBranchOp();
4909
4910
if (skippedL2iNode)
4911
{
4912
cg->decReferenceCount(skippedL2iNode);
4913
}
4914
cg->decReferenceCount(constNode);
4915
cg->decReferenceCount(nonConstNode);
4916
}
4917
4918
4919
// We assume that there is no GRA stuff hanging of this node
4920
TR_ASSERT( node->getNumChildren() < 3,"BNDCHK Eval: We are not expecting a third child on BNDCHK trees");
4921
4922
/**
4923
* Neither Length nor Index is constant
4924
*/
4925
if (!oneConst)
4926
{
4927
// logical compare child1 (bound) and child2 (index).
4928
// Logical because all neg # > any pos # in unsigned form - for check that index > 0.
4929
// if child1 <= child2, branch on not high,
4930
// if the operands are switched, i.e. compare child2 < child1, branch on high
4931
TR_S390BinaryCommutativeAnalyser temp(cg);
4932
temp.genericAnalyser(node, TR::InstOpCode::CLR, TR::InstOpCode::CL, TR::InstOpCode::LR, true);
4933
swap = temp.getReversedOperands();
4934
4935
// There should be no register attached to the BNDCHK node, otherwise
4936
// the register would be kept live longer than it should.
4937
node->unsetRegister();
4938
cg->decReferenceCount(firstChild);
4939
cg->decReferenceCount(secondChild);
4940
4941
// Generate compare code, find out if ops were reversed
4942
// MASK10 - reversed. MASK12 - not reversed.
4943
TR::InstOpCode::Mnemonic brOp = TR::InstOpCode::BRC;
4944
TR::InstOpCode::S390BranchCondition brCond = (swap) ? TR::InstOpCode::COND_BNL : TR::InstOpCode::COND_BNH;
4945
cursor = generateS390BranchInstruction(cg, brOp, brCond, node, boundCheckFailureLabel);
4946
cursor->setExceptBranchOp();
4947
}
4948
4949
cg->addSnippet(new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, boundCheckFailureLabel, node->getSymbolReference()));
4950
}
4951
4952
return NULL;
4953
}
4954
4955
4956
4957
///////////////////////////////////////////////////////////////////////////////////////
4958
// ArrayCopyBNDCHKEvaluator - Array bounds check for arraycopy, checks that child 1 >= child 2
4959
///////////////////////////////////////////////////////////////////////////////////////
4960
TR::Register *
4961
J9::Z::TreeEvaluator::ArrayCopyBNDCHKEvaluator(TR::Node * node, TR::CodeGenerator * cg)
4962
{
4963
// Check that first child >= second child
4964
//
4965
// If the first child is a constant and the second isn't, swap the children.
4966
//
4967
TR::Node * firstChild = node->getFirstChild();
4968
TR::Node * secondChild = node->getSecondChild();
4969
TR::LabelSymbol * boundCheckFailureLabel = generateLabelSymbol(cg);
4970
TR::Instruction * instr = NULL;
4971
bool useCIJ = false;
4972
TR::Compilation *comp = cg->comp();
4973
4974
bool skipL2iArrayTargetLengthReg = false;
4975
bool skipL2iArrayCopyLengthReg = false;
4976
TR::Register * arrayTargetLengthReg = NULL;
4977
TR::Register * arrayCopyLengthReg = NULL;
4978
4979
arrayTargetLengthReg = firstChild->getRegister();
4980
arrayCopyLengthReg = secondChild->getRegister();
4981
4982
if (firstChild->getOpCodeValue() == TR::l2i &&
4983
firstChild->getFirstChild()->getRegister() != NULL &&
4984
firstChild->getReferenceCount() == 1 &&
4985
arrayTargetLengthReg == NULL)
4986
{
4987
skipL2iArrayTargetLengthReg = true;
4988
arrayTargetLengthReg = firstChild->getFirstChild()->getRegister();
4989
}
4990
4991
if (secondChild->getOpCodeValue() == TR::l2i &&
4992
secondChild->getFirstChild()->getRegister() != NULL &&
4993
secondChild->getReferenceCount() == 1 &&
4994
arrayCopyLengthReg == NULL)
4995
{
4996
skipL2iArrayCopyLengthReg = true;
4997
arrayCopyLengthReg = secondChild->getFirstChild()->getRegister();
4998
}
4999
5000
bool disableS390CompareAndTrap = comp->getOption(TR_DisableTraps);
5001
static const char*disableS390CompareAndBranch = feGetEnv("TR_DISABLES390CompareAndBranch");
5002
if (cg->getHasResumableTrapHandler() &&
5003
!disableS390CompareAndTrap &&
5004
arrayTargetLengthReg != NULL &&
5005
arrayCopyLengthReg != NULL )
5006
{
5007
//arrayIndex/arrayLength are max uint32, so 31 bit compare even in 64 bit JIT
5008
5009
// Generate a NOP LR R0,R0. The signal handler has to walk backwards to pattern match
5010
// the trap instructions. All trap instructions besides CRT/CLRT are 6-bytes in length.
5011
// Insert 2-byte NOP in front of the 4-byte CRT to ensure we do not mismatch accidentally.
5012
TR::Instruction *cursor = new (cg->trHeapMemory()) TR::S390NOPInstruction(TR::InstOpCode::NOP, 2, node, cg);
5013
5014
cursor = new (cg->trHeapMemory()) TR::S390RRFInstruction(TR::InstOpCode::CRT, node, arrayCopyLengthReg, arrayTargetLengthReg, getMaskForBranchCondition(TR::InstOpCode::COND_BH), true, cg);
5015
5016
cursor->setExceptBranchOp();
5017
cg->setCanExceptByTrap(true);
5018
cursor->setNeedsGCMap(0x0000FFFF);
5019
if (cg->comp()->target().isZOS()) killRegisterIfNotLocked(cg, TR::RealRegister::GPR4, cursor);
5020
5021
if (skipL2iArrayTargetLengthReg)
5022
{
5023
cg->decReferenceCount(firstChild->getFirstChild());
5024
}
5025
if (skipL2iArrayCopyLengthReg)
5026
{
5027
cg->decReferenceCount(secondChild->getFirstChild());
5028
}
5029
cg->decReferenceCount(firstChild);
5030
cg->decReferenceCount(secondChild);
5031
5032
return NULL;
5033
}
5034
else
5035
{
5036
if (firstChild->getOpCode().isLoadConst())
5037
{
5038
if (secondChild->getOpCode().isLoadConst())
5039
{
5040
if (firstChild->getInt() < secondChild->getInt())
5041
{
5042
// Check will always fail, just jump to failure snippet
5043
//
5044
instr = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, boundCheckFailureLabel);
5045
instr->setExceptBranchOp();
5046
}
5047
else
5048
{
5049
// Check will always succeed, no need for an instruction
5050
//
5051
instr = NULL;
5052
}
5053
cg->decReferenceCount(firstChild);
5054
cg->decReferenceCount(secondChild);
5055
}
5056
else
5057
{
5058
int32_t arrayTargetLengthConst = firstChild->getInt();
5059
5060
// CIT uses 16-bit immediates
5061
if (cg->getHasResumableTrapHandler() &&
5062
arrayTargetLengthConst <= MAX_IMMEDIATE_VAL &&
5063
arrayTargetLengthConst >= MIN_IMMEDIATE_VAL &&
5064
(arrayTargetLengthConst & 0xFF00) != 0xB900 && // signal handler might get confused with CRT (opcode 0xB972), etc
5065
!disableS390CompareAndTrap )
5066
{
5067
if (arrayCopyLengthReg == NULL)
5068
{
5069
arrayCopyLengthReg = cg->evaluate(secondChild);
5070
}
5071
5072
TR::S390RIEInstruction* cursor =
5073
new (cg->trHeapMemory()) TR::S390RIEInstruction(TR::InstOpCode::CIT, node, arrayCopyLengthReg, (int16_t)arrayTargetLengthConst, TR::InstOpCode::COND_BH, cg);
5074
cursor->setExceptBranchOp();
5075
cursor->setNeedsGCMap(0x0000FFFF);
5076
cg->setCanExceptByTrap(true);
5077
if (cg->comp()->target().isZOS()) killRegisterIfNotLocked(cg, TR::RealRegister::GPR4, cursor);
5078
5079
if (skipL2iArrayCopyLengthReg)
5080
{
5081
cg->decReferenceCount(secondChild->getFirstChild());
5082
}
5083
cg->decReferenceCount(firstChild);
5084
cg->decReferenceCount(secondChild);
5085
5086
return NULL;
5087
}
5088
// check if we can use Compare-and-Branch at least
5089
else if (arrayTargetLengthConst <= MAX_IMMEDIATE_BYTE_VAL &&
5090
arrayTargetLengthConst >= MIN_IMMEDIATE_BYTE_VAL &&
5091
!disableS390CompareAndBranch)
5092
{
5093
useCIJ = true;
5094
if (arrayCopyLengthReg == NULL)
5095
{
5096
arrayCopyLengthReg = cg->evaluate(secondChild);
5097
}
5098
5099
TR::Instruction* cursor =
5100
generateS390CompareAndBranchInstruction(cg,
5101
TR::InstOpCode::C,
5102
node,
5103
arrayCopyLengthReg,
5104
arrayTargetLengthConst,
5105
TR::InstOpCode::COND_BH,
5106
boundCheckFailureLabel,
5107
false,
5108
false,
5109
NULL,
5110
NULL);
5111
cursor->setExceptBranchOp();
5112
5113
if (skipL2iArrayCopyLengthReg)
5114
{
5115
cg->decReferenceCount(secondChild->getFirstChild());
5116
}
5117
cg->decReferenceCount(firstChild);
5118
cg->decReferenceCount(secondChild);
5119
}
5120
// z9 Instructions
5121
else
5122
{
5123
node->swapChildren();
5124
instr = generateS390CompareBranchLabel(node, cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BH, TR::InstOpCode::COND_BL, boundCheckFailureLabel);
5125
node->swapChildren();
5126
instr->setExceptBranchOp();
5127
}
5128
}
5129
}
5130
else
5131
{
5132
// The first child is not loadConstant
5133
// CIT uses 16-bit immediates
5134
if (secondChild->getOpCode().isLoadConst() &&
5135
cg->getHasResumableTrapHandler() &&
5136
secondChild->getInt() <= MAX_IMMEDIATE_VAL &&
5137
secondChild->getInt() >= MIN_IMMEDIATE_VAL &&
5138
(secondChild->getInt() & 0xFF00) != 0xB900 && // signal handler might get confused with CRT (opcode 0xB972), etc
5139
!disableS390CompareAndTrap )
5140
{
5141
int32_t arrayCopyLengthConst = secondChild->getInt();
5142
if (arrayTargetLengthReg == NULL)
5143
{
5144
arrayTargetLengthReg = cg->evaluate(firstChild);
5145
}
5146
5147
TR::S390RIEInstruction* cursor =
5148
new (cg->trHeapMemory()) TR::S390RIEInstruction(TR::InstOpCode::CIT, node, arrayTargetLengthReg, (int16_t)arrayCopyLengthConst, TR::InstOpCode::COND_BL, cg);
5149
cursor->setExceptBranchOp();
5150
cursor->setNeedsGCMap(0x0000FFFF);
5151
cg->setCanExceptByTrap(true);
5152
if (cg->comp()->target().isZOS()) killRegisterIfNotLocked(cg, TR::RealRegister::GPR4, cursor);
5153
5154
if (skipL2iArrayTargetLengthReg)
5155
{
5156
cg->decReferenceCount(firstChild->getFirstChild());
5157
}
5158
cg->decReferenceCount(firstChild);
5159
cg->decReferenceCount(secondChild);
5160
5161
return NULL;
5162
}
5163
// check if we can use Compare-and-Branch at least
5164
else if (secondChild->getOpCode().isLoadConst() &&
5165
secondChild->getInt() <= MAX_IMMEDIATE_BYTE_VAL &&
5166
secondChild->getInt() >= MIN_IMMEDIATE_BYTE_VAL &&
5167
!disableS390CompareAndBranch)
5168
{
5169
int32_t arrayCopyLengthConst = secondChild->getInt();
5170
if (arrayTargetLengthReg == NULL)
5171
{
5172
arrayTargetLengthReg = cg->evaluate(firstChild);
5173
}
5174
5175
useCIJ = true;
5176
TR::Instruction* cursor =
5177
generateS390CompareAndBranchInstruction(cg,
5178
TR::InstOpCode::C,
5179
node,
5180
arrayTargetLengthReg,
5181
arrayCopyLengthConst,
5182
TR::InstOpCode::COND_BL,
5183
boundCheckFailureLabel,
5184
false,
5185
false,
5186
NULL,
5187
NULL);
5188
5189
cursor->setExceptBranchOp();
5190
5191
if (skipL2iArrayTargetLengthReg)
5192
{
5193
cg->decReferenceCount(firstChild->getFirstChild());
5194
}
5195
cg->decReferenceCount(firstChild);
5196
cg->decReferenceCount(secondChild);
5197
}
5198
// z9
5199
else
5200
{
5201
instr = generateS390CompareOps(node, cg, TR::InstOpCode::COND_BL, TR::InstOpCode::COND_BH, boundCheckFailureLabel);
5202
5203
instr->setExceptBranchOp();
5204
}
5205
}
5206
5207
if (instr || useCIJ)
5208
{
5209
cg->addSnippet(new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, boundCheckFailureLabel, node->getSymbolReference()));
5210
}
5211
}
5212
5213
return NULL;
5214
}
5215
5216
void
5217
J9::Z::TreeEvaluator::generateFillInDataBlockSequenceForUnresolvedField(TR::CodeGenerator *cg, TR::Node *node, TR::Snippet *dataSnippet, bool isWrite, TR::Register *sideEffectRegister, TR::Register *dataSnippetRegister)
5218
{
5219
TR::LabelSymbol *unresolvedLabel = generateLabelSymbol(cg);
5220
TR::LabelSymbol *mergePointLabel = generateLabelSymbol(cg);
5221
TR::SymbolReference *symRef = node->getSymbolReference();
5222
bool isStatic = symRef->getSymbol()->getKind() == TR::Symbol::IsStatic;
5223
5224
TR::Register *offsetReg = cg->allocateRegister();
5225
TR::Register *dataBlockReg = cg->allocateRegister();
5226
5227
generateRILInstruction(cg, TR::InstOpCode::LARL, node, dataBlockReg, dataSnippet);
5228
5229
intptr_t offsetInDataBlock = isStatic ? offsetof(J9JITWatchedStaticFieldData, fieldAddress) : offsetof(J9JITWatchedInstanceFieldData, offset);
5230
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, offsetReg, generateS390MemoryReference(dataBlockReg, offsetInDataBlock, cg));
5231
// If the offset is not -1 then the field is already resolved. No more work is required and we can fall through to end (mergePointLabel).
5232
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, offsetReg, -1, TR::InstOpCode::COND_BE, unresolvedLabel, false, false, NULL, NULL);
5233
5234
// If the offset is -1, then we must call a VM helper routine (indicated by helperLink below) to resolve this field. The OOL code (below) inside unresolvedLabel
5235
// will prepare the registers and generate a directCall to the VM helper routine.
5236
TR_S390OutOfLineCodeSection *outlinedSlowPath = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(unresolvedLabel, mergePointLabel, cg);
5237
cg->getS390OutOfLineCodeSectionList().push_front(outlinedSlowPath);
5238
outlinedSlowPath->swapInstructionListsWithCompilation();
5239
5240
// OOL code start.
5241
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, unresolvedLabel);
5242
5243
if (isStatic)
5244
{
5245
// Fills in J9JITWatchedStaticFieldData.fieldClass.
5246
TR::Register *fieldClassReg;
5247
if (isWrite)
5248
{
5249
fieldClassReg = cg->allocateRegister();
5250
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, fieldClassReg, generateS390MemoryReference(sideEffectRegister, cg->comp()->fej9()->getOffsetOfClassFromJavaLangClassField(), cg));
5251
}
5252
else
5253
{
5254
fieldClassReg = sideEffectRegister;
5255
}
5256
generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, fieldClassReg, generateS390MemoryReference(dataBlockReg, offsetof(J9JITWatchedStaticFieldData, fieldClass), cg));
5257
if (isWrite)
5258
{
5259
cg->stopUsingRegister(fieldClassReg);
5260
}
5261
}
5262
5263
// These will be used as argument registers for the direct call to the VM helper.
5264
TR::Register *cpAddressReg = cg->allocateRegister();
5265
TR::Register *cpIndexReg = cg->allocateRegister();
5266
5267
// Populate the argument registers.
5268
TR::ResolvedMethodSymbol *methodSymbol = node->getByteCodeInfo().getCallerIndex() == -1 ? cg->comp()->getMethodSymbol() : cg->comp()->getInlinedResolvedMethodSymbol(node->getByteCodeInfo().getCallerIndex());
5269
generateRegLitRefInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, cpAddressReg, reinterpret_cast<uintptr_t>(methodSymbol->getResolvedMethod()->constantPool()), TR_ConstantPool, NULL, 0, 0);
5270
generateRILInstruction(cg, TR::InstOpCode::LGFI, node, cpIndexReg, symRef->getCPIndex());
5271
5272
TR_RuntimeHelper helperIndex = isWrite? (isStatic ? TR_jitResolveStaticFieldSetterDirect: TR_jitResolveFieldSetterDirect) :
5273
(isStatic ? TR_jitResolveStaticFieldDirect: TR_jitResolveFieldDirect);
5274
J9::Z::HelperLinkage *helperLink = static_cast<J9::Z::HelperLinkage*>(cg->getLinkage(runtimeHelperLinkage(helperIndex)));
5275
5276
5277
// We specify 2 preConditions because we need to provide 2 register arguments.
5278
// We specify 4 postConditions because both of the argument registers need to be specified as
5279
// register dependencies (GPR 1 as a dummy dependency and GPR2 is a return register), and we
5280
// need to specify 2 more register dependencies for Entry Point and Return Address register
5281
// when making a direct call.
5282
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions(2, 4, cg);
5283
int numArgs = 0;
5284
5285
// The VM helper routine that we call expects cpAddress to be in GPR1 and cpIndex inside GPR2.
5286
// So we set those dependencies here.
5287
deps->addPreCondition(cpAddressReg, helperLink->getIntegerArgumentRegister(numArgs));
5288
deps->addPostCondition(cpAddressReg, helperLink->getIntegerArgumentRegister(numArgs));
5289
numArgs++;
5290
5291
// Add pre and post condition because GPR2 is an argument register as well as return register.
5292
deps->addPreCondition(cpIndexReg, helperLink->getIntegerArgumentRegister(numArgs));
5293
deps->addPostCondition(cpIndexReg, helperLink->getIntegerReturnRegister()); // cpIndexReg (i.e. GPR2) will also hold the return value of the helper routine call.
5294
5295
// These two registers are used for Return Address and Entry Point registers. These dependencies are required when generating directCalls on Z.
5296
TR::Register *scratchReg1 = cg->allocateRegister();
5297
TR::Register *scratchReg2 = cg->allocateRegister();
5298
deps->addPostCondition(scratchReg1, cg->getEntryPointRegister());
5299
deps->addPostCondition(scratchReg2, cg->getReturnAddressRegister());
5300
5301
// Now make the call. Return value of the call is in GPR2 (cpIndexReg).
5302
TR::Instruction *call = generateDirectCall(cg, node, false /*myself*/, cg->symRefTab()->findOrCreateRuntimeHelper(helperIndex), deps);
5303
call->setNeedsGCMap(0x0000FFFF);
5304
call->setDependencyConditions(deps);
5305
5306
// For instance fields, the offset (i.e. result value) returned by the vmhelper includes the header size.
5307
// We subtract the header size from the return value here to get the actual offset.
5308
if (!isStatic)
5309
{
5310
generateRILInstruction(cg, TR::InstOpCode::getSubtractLogicalImmOpCode(), node, cpIndexReg, static_cast<uint32_t>(TR::Compiler->om.objectHeaderSizeInBytes()));
5311
}
5312
5313
// Store the field value into the data snippet to resolve it.
5314
generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, cpIndexReg, generateS390MemoryReference(dataBlockReg, offsetInDataBlock, cg));
5315
5316
// End of OOL code. Branch back to mainline.
5317
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, mergePointLabel);
5318
outlinedSlowPath->swapInstructionListsWithCompilation();
5319
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, mergePointLabel);
5320
5321
cg->stopUsingRegister(cpIndexReg);
5322
cg->stopUsingRegister(scratchReg1);
5323
cg->stopUsingRegister(scratchReg2);
5324
cg->stopUsingRegister(cpAddressReg);
5325
cg->stopUsingRegister(dataBlockReg);
5326
cg->stopUsingRegister(offsetReg);
5327
}
5328
5329
/*
5330
* This method will prepare the registers and then make a VM Helper call to report that a fieldwatch event has occurred
5331
* in a Java class with field watch enabled.
5332
*
5333
* The possible VM Helpers are:
5334
*
5335
* For indirect nodes (i.e. instance fields):
5336
* jitReportInstanceFieldRead (if node is indirect)
5337
* arg1 pointer to static data block
5338
* arg2 object being read
5339
*
5340
* jitReportInstanceFieldWrite (if node is indirect)
5341
* arg1 pointer to static data block
5342
* arg2 object being written to (represented by sideEffectRegister)
5343
* arg3 pointer to value being written
5344
*
5345
* For direct nodes (i.e. static fields):
5346
* jitReportStaticFieldRead (for direct/static nodes)
5347
* arg1 pointer to static data block
5348
*
5349
* jitReportStaticFieldWrite
5350
* arg1 pointer to static data block
5351
* arg2 pointer to value being written
5352
*/
5353
void generateReportFieldAccessOutlinedInstructions(TR::Node *node, TR::LabelSymbol *fieldReportLabel, TR::LabelSymbol *mergePointLabel, TR::Snippet *dataSnippet, bool isWrite, TR::CodeGenerator *cg, TR::Register *sideEffectRegister, TR::Register *valueReg)
5354
{
5355
bool isInstanceField = node->getSymbolReference()->getSymbol()->getKind() != TR::Symbol::IsStatic;
5356
// Figure out the VM Helper we need to call.
5357
TR_RuntimeHelper helperIndex = isWrite ? (isInstanceField ? TR_jitReportInstanceFieldWrite: TR_jitReportStaticFieldWrite):
5358
(isInstanceField ? TR_jitReportInstanceFieldRead: TR_jitReportStaticFieldRead);
5359
5360
// Figure out the number of dependencies needed to make the VM Helper call.
5361
// numPreConditions is equal to the number of arguments required by the VM Helper.
5362
uint8_t numPreConditions = 1; // All helpers need at least one parameter.
5363
if (helperIndex == TR_jitReportInstanceFieldWrite)
5364
{
5365
numPreConditions = 3;
5366
}
5367
else if (helperIndex == TR_jitReportInstanceFieldRead || helperIndex == TR_jitReportStaticFieldWrite)
5368
{
5369
numPreConditions = 2;
5370
}
5371
// Note: All preConditions need to be added as post dependencies (dummy dependencies). We also need to specify 2 more
5372
// post dependencies for Return Address register and Entry Point register.
5373
TR::RegisterDependencyConditions *dependencies = generateRegisterDependencyConditions(numPreConditions, numPreConditions + 2, cg);
5374
J9::Z::HelperLinkage *helperLink = static_cast<J9::Z::HelperLinkage*>(cg->getLinkage(runtimeHelperLinkage(helperIndex)));
5375
int numArgs = 0;
5376
5377
// Initialize OOL path and generate label that marks beginning of the OOL code.
5378
TR_S390OutOfLineCodeSection *outlinedSlowPath = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(fieldReportLabel, mergePointLabel, cg);
5379
cg->getS390OutOfLineCodeSectionList().push_front(outlinedSlowPath);
5380
outlinedSlowPath->swapInstructionListsWithCompilation();
5381
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, fieldReportLabel);
5382
5383
// Populate the first argument needed by the VM Helper (address to the data snippet), and set the dependencies.
5384
TR::Register *dataBlockReg = cg->allocateRegister();
5385
generateRILInstruction(cg, TR::InstOpCode::LARL, node, dataBlockReg, dataSnippet);
5386
dependencies->addPreCondition(dataBlockReg, helperLink->getIntegerArgumentRegister(numArgs));
5387
dependencies->addPostCondition(dataBlockReg, helperLink->getIntegerArgumentRegister(numArgs));
5388
dataBlockReg->setPlaceholderReg();
5389
numArgs++;
5390
5391
// Populate the next argument if needed.
5392
TR::Register *objectReg = NULL;
5393
if (isInstanceField)
5394
{
5395
dependencies->addPreCondition(sideEffectRegister, helperLink->getIntegerArgumentRegister(numArgs));
5396
dependencies->addPostCondition(sideEffectRegister, helperLink->getIntegerArgumentRegister(numArgs));
5397
sideEffectRegister->setPlaceholderReg();
5398
numArgs++;
5399
}
5400
5401
// Populate the final argument if needed.
5402
// Note: In the event that we have to write to a value, the VM helper routine expects that a pointer to the value being written to
5403
// is passed in as a parameter. So we must store the value into memory and then load the address back into a register in order
5404
// to pass the address of that value as an argument. We prepare the register below.
5405
if (isWrite)
5406
{
5407
TR::Node *valueNode = node->getFirstChild();
5408
if (isInstanceField)
5409
{
5410
// Pass in valueNode so it can be set to the correct node.
5411
TR::TreeEvaluator::getIndirectWrtbarValueNode(cg, node, valueNode, false);
5412
}
5413
5414
// First load the actual value into the register.
5415
TR::Register *valueReferenceReg = valueReg;
5416
5417
TR::DataType nodeType = valueNode->getDataType();
5418
TR::SymbolReference *sr = cg->allocateLocalTemp(nodeType);
5419
TR::MemoryReference *valueMR = generateS390MemoryReference(valueNode, sr, cg);
5420
if (valueReferenceReg->getKind() == TR_GPR)
5421
{
5422
// Use STG if the dataType is an uncompressed TR::Address or TR::Int64. ST otherwise.
5423
auto mnemonic = TR::DataType::getSize(nodeType) == 8 ? TR::InstOpCode::STG : TR::InstOpCode::ST;
5424
// Now store the value onto the stack.
5425
generateRXInstruction(cg, mnemonic, node, valueReferenceReg, valueMR);
5426
}
5427
else if (valueReferenceReg->getKind() == TR_FPR)
5428
{
5429
auto mnemonic = nodeType == TR::Float ? TR::InstOpCode::STE : TR::InstOpCode::STD;
5430
// Now store the value onto the stack.
5431
generateRXInstruction(cg, mnemonic, node, valueReferenceReg, valueMR);
5432
}
5433
else
5434
{
5435
TR_ASSERT_FATAL(false, "Unsupported register kind (%d) for fieldwatch.", valueReferenceReg->getKind());
5436
}
5437
valueReferenceReg = cg->allocateRegister();
5438
5439
// Now load the memory location back into the register so that it can be used
5440
// as an argument register for the VM helper call.
5441
TR::MemoryReference *tempMR = generateS390MemoryReference(*valueMR, 0, cg);
5442
generateRXInstruction(cg, TR::InstOpCode::LA, node, valueReferenceReg, tempMR);
5443
5444
dependencies->addPreCondition(valueReferenceReg, helperLink->getIntegerArgumentRegister(numArgs));
5445
dependencies->addPostCondition(valueReferenceReg, helperLink->getIntegerArgumentRegister(numArgs));
5446
valueReferenceReg->setPlaceholderReg();
5447
5448
cg->stopUsingRegister(valueReferenceReg);
5449
}
5450
5451
// These registers will hold Entry Point and Return Address registers, which are required when generating a directCall.
5452
TR::Register *scratch1 = cg->allocateRegister();
5453
TR::Register *scratch2 = cg->allocateRegister();
5454
dependencies->addPostCondition(scratch1, cg->getEntryPointRegister());
5455
dependencies->addPostCondition(scratch2, cg->getReturnAddressRegister());
5456
5457
// Now generate the call to VM Helper to report the fieldwatch.
5458
TR::Instruction *call = generateDirectCall(cg, node, false /*myself*/, cg->symRefTab()->findOrCreateRuntimeHelper(helperIndex), dependencies);
5459
call->setNeedsGCMap(0x0000FFFF);
5460
call->setDependencyConditions(dependencies);
5461
5462
// After returning from the VM Helper, branch back to mainline code.
5463
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, mergePointLabel);
5464
// End of OOL.
5465
outlinedSlowPath->swapInstructionListsWithCompilation();
5466
5467
cg->stopUsingRegister(scratch1);
5468
cg->stopUsingRegister(scratch2);
5469
5470
cg->stopUsingRegister(dataBlockReg);
5471
}
5472
5473
void
5474
J9::Z::TreeEvaluator::generateTestAndReportFieldWatchInstructions(TR::CodeGenerator *cg, TR::Node *node, TR::Snippet *dataSnippet, bool isWrite, TR::Register *sideEffectRegister, TR::Register *valueReg, TR::Register *dataSnippetRegister)
5475
{
5476
bool isResolved = !node->getSymbolReference()->isUnresolved();
5477
TR::LabelSymbol *mergePointLabel = generateLabelSymbol(cg);
5478
TR::LabelSymbol *fieldReportLabel = generateLabelSymbol(cg);
5479
5480
TR::Register *fieldClassReg;
5481
TR::Register *fieldClassFlags = cg->allocateRegister();
5482
bool opCodeIsIndirect = node->getOpCode().isIndirect();
5483
5484
if (opCodeIsIndirect)
5485
{
5486
// Load the class of the instance object into fieldClassReg.
5487
fieldClassReg = cg->allocateRegister();
5488
TR::TreeEvaluator::genLoadForObjectHeadersMasked(cg, node, fieldClassReg, generateS390MemoryReference(sideEffectRegister, static_cast<int32_t>(TR::Compiler->om.offsetOfObjectVftField()), cg), NULL);
5489
}
5490
else
5491
{
5492
if (isResolved)
5493
{
5494
fieldClassReg = cg->allocateRegister();
5495
J9Class *fieldClass = static_cast<TR::J9WatchedStaticFieldSnippet *>(dataSnippet)->getFieldClass();
5496
if (!(cg->needClassAndMethodPointerRelocations()) && cg->canUseRelativeLongInstructions(reinterpret_cast<int64_t>(fieldClass)))
5497
{
5498
// For non-AOT (JIT and JITServer) compiles we don't need to use sideEffectRegister here as the class information is available to us at compile time.
5499
TR_ASSERT_FATAL(fieldClass != NULL, "A valid J9Class must be provided for direct rdbar/wrtbar opcodes %p\n", node);
5500
generateRILInstruction(cg, TR::InstOpCode::LARL, node, fieldClassReg, static_cast<void *>(fieldClass));
5501
}
5502
else
5503
{
5504
// If this is an AOT compile, we generate instructions to load the fieldClass directly from the snippet because the fieldClass will be invalid
5505
// if we load using the dataSnippet's helper query at compile time.
5506
generateRILInstruction(cg, TR::InstOpCode::LARL, node, fieldClassReg, dataSnippet);
5507
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, fieldClassReg, generateS390MemoryReference(fieldClassReg, offsetof(J9JITWatchedStaticFieldData, fieldClass), cg));
5508
}
5509
}
5510
else
5511
{
5512
if (isWrite)
5513
{
5514
fieldClassReg = cg->allocateRegister();
5515
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, fieldClassReg, generateS390MemoryReference(sideEffectRegister, cg->comp()->fej9()->getOffsetOfClassFromJavaLangClassField(), cg));
5516
}
5517
else
5518
{
5519
fieldClassReg = sideEffectRegister;
5520
}
5521
}
5522
}
5523
// First load the class flags into a register.
5524
generateRXInstruction(cg, TR::InstOpCode::L, node, fieldClassFlags, generateS390MemoryReference(fieldClassReg, cg->comp()->fej9()->getOffsetOfClassFlags(), cg));
5525
// Then test the bit to test with the relevant flag to check if fieldwatch is enabled.
5526
generateRIInstruction(cg, TR::InstOpCode::TMLL, node, fieldClassFlags, J9ClassHasWatchedFields);
5527
// If Condition Code from above test is not 0, then we branch to OOL (instructions) to report the fieldwatch event. Otherwise fall through to mergePointLabel.
5528
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRNZ, node, fieldReportLabel);
5529
5530
// Generate instructions to call a VM Helper and report the fieldwatch event. Also generates an instruction to
5531
// branch back to mainline (mergePointLabel).
5532
generateReportFieldAccessOutlinedInstructions(node, fieldReportLabel, mergePointLabel, dataSnippet, isWrite, cg, sideEffectRegister, valueReg);
5533
5534
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, mergePointLabel);
5535
5536
if (opCodeIsIndirect || isResolved || isWrite)
5537
{
5538
cg->stopUsingRegister(fieldClassReg);
5539
}
5540
5541
cg->stopUsingRegister(fieldClassFlags);
5542
}
5543
5544
TR::Register *
5545
J9::Z::TreeEvaluator::irdbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5546
{
5547
// For rdbar and wrtbar nodes we first evaluate the children we need to
5548
// handle the side effects. Then we delegate the evaluation of the remaining
5549
// children and the load/store operation to the appropriate load/store evaluator.
5550
TR::Node *sideEffectNode = node->getFirstChild();
5551
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
5552
5553
if (cg->comp()->getOption(TR_EnableFieldWatch))
5554
{
5555
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);
5556
}
5557
cg->decReferenceCount(sideEffectNode);
5558
return TR::TreeEvaluator::iloadEvaluator(node, cg);
5559
}
5560
5561
TR::Register *
5562
J9::Z::TreeEvaluator::irdbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5563
{
5564
// For rdbar and wrtbar nodes we first evaluate the children we need to
5565
// handle the side effects. Then we delegate the evaluation of the remaining
5566
// children and the load/store operation to the appropriate load/store evaluator.
5567
TR::Register *sideEffectRegister = cg->evaluate(node->getFirstChild());
5568
5569
if (cg->comp()->getOption(TR_EnableFieldWatch))
5570
{
5571
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);
5572
}
5573
5574
// Note: For indirect rdbar nodes, the first child (sideEffectNode) is also used by the
5575
// load evaluator. The load evaluator will also evaluate+decrement it. In order to avoid double
5576
// decrementing the node we skip doing it here and let the load evaluator do it.
5577
return TR::TreeEvaluator::iloadEvaluator(node, cg);
5578
}
5579
5580
TR::Register *
5581
J9::Z::TreeEvaluator::ardbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5582
{
5583
// For rdbar and wrtbar nodes we first evaluate the children we need to
5584
// handle the side effects. Then we delegate the evaluation of the remaining
5585
// children and the load/store operation to the appropriate load/store evaluator.
5586
TR::Node *sideEffectNode = node->getFirstChild();
5587
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
5588
5589
if (cg->comp()->getOption(TR_EnableFieldWatch))
5590
{
5591
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);
5592
}
5593
cg->decReferenceCount(sideEffectNode);
5594
return TR::TreeEvaluator::aloadEvaluator(node, cg);
5595
}
5596
5597
TR::Register *
5598
J9::Z::TreeEvaluator::ardbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5599
{
5600
// For rdbar and wrtbar nodes we first evaluate the children we need to
5601
// handle the side effects. Then we delegate the evaluation of the remaining
5602
// children and the load/store operation to the appropriate load/store evaluator.
5603
TR::Register *sideEffectRegister = cg->evaluate(node->getFirstChild());
5604
5605
if (cg->comp()->getOption(TR_EnableFieldWatch))
5606
{
5607
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);
5608
}
5609
5610
TR::Register* resultReg = NULL;
5611
if (TR::Compiler->om.readBarrierType() != gc_modron_readbar_none)
5612
{
5613
bool dynLitPoolLoad = false;
5614
resultReg = TR::TreeEvaluator::checkAndAllocateReferenceRegister(node, cg, dynLitPoolLoad);
5615
// MemRef can generate BRCL to unresolved data snippet if needed.
5616
TR::MemoryReference* loadMemRef = TR::MemoryReference::create(cg, node);
5617
5618
if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_S390_GUARDED_STORAGE))
5619
{
5620
TR::TreeEvaluator::checkAndSetMemRefDataSnippetRelocationType(node, cg, loadMemRef);
5621
TR::InstOpCode::Mnemonic loadOp = cg->comp()->useCompressedPointers() ? TR::InstOpCode::LLGFSG : TR::InstOpCode::LGG;
5622
generateRXInstruction(cg, loadOp, node, resultReg, loadMemRef);
5623
}
5624
else
5625
{
5626
TR::TreeEvaluator::generateSoftwareReadBarrier(node, cg, resultReg, loadMemRef);
5627
}
5628
node->setRegister(resultReg);
5629
}
5630
else
5631
{
5632
resultReg = TR::TreeEvaluator::aloadEvaluator(node, cg);
5633
}
5634
// Note: For indirect rdbar nodes, the first child (sideEffectNode) is also used by the
5635
// load evaluator. The load evaluator will also evaluate+decrement it. In order to avoid double
5636
// decrementing the node we skip doing it here and let the load evaluator do it.
5637
return resultReg;
5638
}
5639
5640
TR::Register *
5641
J9::Z::TreeEvaluator::fwrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5642
{
5643
// For rdbar and wrtbar nodes we first evaluate the children we need to
5644
// handle the side effects. Then we delegate the evaluation of the remaining
5645
// children and the load/store operation to the appropriate load/store evaluator.
5646
TR::Register *valueReg = cg->evaluate(node->getSecondChild());
5647
TR::Node *sideEffectNode = node->getThirdChild();
5648
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
5649
5650
if (cg->comp()->getOption(TR_EnableFieldWatch))
5651
{
5652
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
5653
}
5654
5655
// Note: The reference count for valueReg's node is not decremented here because the
5656
// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here
5657
// to avoid double decrementing.
5658
cg->decReferenceCount(sideEffectNode);
5659
return TR::TreeEvaluator::fstoreEvaluator(node, cg);
5660
}
5661
5662
TR::Register *
5663
J9::Z::TreeEvaluator::fwrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5664
{
5665
// For rdbar and wrtbar nodes we first evaluate the children we need to
5666
// handle the side effects. Then we delegate the evaluation of the remaining
5667
// children and the load/store operation to the appropriate load/store evaluator.
5668
TR::Register *valueReg = cg->evaluate(node->getFirstChild());
5669
TR::Node *sideEffectNode = node->getSecondChild();
5670
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
5671
5672
if (cg->comp()->getOption(TR_EnableFieldWatch))
5673
{
5674
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
5675
}
5676
5677
// Note: The reference count for valueReg's node is not decremented here because the
5678
// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here
5679
// to avoid double decrementing.
5680
cg->decReferenceCount(sideEffectNode);
5681
return TR::TreeEvaluator::fstoreEvaluator(node, cg);
5682
}
5683
5684
TR::Register *
5685
J9::Z::TreeEvaluator::dwrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5686
{
5687
// For rdbar and wrtbar nodes we first evaluate the children we need to
5688
// handle the side effects. Then we delegate the evaluation of the remaining
5689
// children and the load/store operation to the appropriate load/store evaluator.
5690
TR::Register *valueReg = cg->evaluate(node->getSecondChild());
5691
TR::Node *sideEffectNode = node->getThirdChild();
5692
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
5693
5694
if (cg->comp()->getOption(TR_EnableFieldWatch))
5695
{
5696
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
5697
}
5698
5699
// Note: The reference count for valueReg's node is not decremented here because the
5700
// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here
5701
// to avoid double decrementing.
5702
cg->decReferenceCount(sideEffectNode);
5703
return TR::TreeEvaluator::dstoreEvaluator(node, cg);
5704
}
5705
5706
TR::Register *
5707
J9::Z::TreeEvaluator::dwrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5708
{
5709
// For rdbar and wrtbar nodes we first evaluate the children we need to
5710
// handle the side effects. Then we delegate the evaluation of the remaining
5711
// children and the load/store operation to the appropriate load/store evaluator.
5712
TR::Register *valueReg = cg->evaluate(node->getFirstChild());
5713
TR::Node *sideEffectNode = node->getSecondChild();
5714
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
5715
5716
if (cg->comp()->getOption(TR_EnableFieldWatch))
5717
{
5718
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
5719
}
5720
5721
// Note: The reference count for valueReg's node is not decremented here because the
5722
// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here
5723
// to avoid double decrementing.
5724
cg->decReferenceCount(sideEffectNode);
5725
return TR::TreeEvaluator::dstoreEvaluator(node, cg);
5726
}
5727
5728
TR::Register *
5729
J9::Z::TreeEvaluator::awrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5730
{
5731
return TR::TreeEvaluator::awrtbariEvaluator(node, cg);
5732
}
5733
5734
TR::Register *
5735
J9::Z::TreeEvaluator::awrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5736
{
5737
TR::Node *owningObjectChild;
5738
TR::Node *sourceChild;
5739
TR::Compilation *comp = cg->comp();
5740
bool opCodeIsIndirect = node->getOpCode().isIndirect();
5741
if (opCodeIsIndirect)
5742
{
5743
owningObjectChild = node->getChild(2);
5744
sourceChild = node->getSecondChild();
5745
}
5746
else
5747
{
5748
owningObjectChild = node->getSecondChild();
5749
sourceChild = node->getFirstChild();
5750
}
5751
5752
bool usingCompressedPointers = false;
5753
if (opCodeIsIndirect)
5754
{
5755
// Pass in valueNode so it can be set to the correct node. If the sourceChild is modified, usingCompressedPointers will be true.
5756
usingCompressedPointers = TR::TreeEvaluator::getIndirectWrtbarValueNode(cg, node, sourceChild, true);
5757
}
5758
5759
bool doWrtBar = (TR::Compiler->om.writeBarrierType() == gc_modron_wrtbar_oldcheck ||
5760
TR::Compiler->om.writeBarrierType() == gc_modron_wrtbar_cardmark_and_oldcheck ||
5761
TR::Compiler->om.writeBarrierType() == gc_modron_wrtbar_always);
5762
bool doCrdMrk = ((TR::Compiler->om.writeBarrierType() == gc_modron_wrtbar_cardmark ||
5763
TR::Compiler->om.writeBarrierType() == gc_modron_wrtbar_cardmark_incremental ||
5764
TR::Compiler->om.writeBarrierType() == gc_modron_wrtbar_cardmark_and_oldcheck) && !node->isNonHeapObjectWrtBar());
5765
5766
bool canSkip = false;
5767
TR::Register *owningObjectRegister = NULL;
5768
TR::Register *sourceRegister = NULL;
5769
5770
if ((node->getOpCode().isWrtBar() && node->skipWrtBar()) ||
5771
((node->getOpCodeValue() == TR::ArrayStoreCHK) &&
5772
node->getFirstChild()->getOpCode().isWrtBar() &&
5773
node->getFirstChild()->skipWrtBar()))
5774
{
5775
canSkip = true;
5776
}
5777
5778
if ((doWrtBar || doCrdMrk) && !canSkip)
5779
{
5780
owningObjectRegister = cg->gprClobberEvaluate(owningObjectChild);
5781
}
5782
else
5783
{
5784
owningObjectRegister = cg->evaluate(owningObjectChild);
5785
}
5786
5787
if (canSkip || opCodeIsIndirect)
5788
{
5789
sourceRegister = cg->evaluate(sourceChild);
5790
}
5791
else
5792
{
5793
sourceRegister = allocateWriteBarrierInternalPointerRegister(cg, sourceChild);
5794
}
5795
5796
TR::Register * compressedRegister = sourceRegister;
5797
if (usingCompressedPointers)
5798
{
5799
compressedRegister = cg->evaluate(node->getSecondChild());
5800
}
5801
5802
// Handle fieldwatch side effect first if it's enabled.
5803
if (cg->comp()->getOption(TR_EnableFieldWatch) && !node->getSymbolReference()->getSymbol()->isArrayShadowSymbol())
5804
{
5805
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, owningObjectRegister /* sideEffectRegister */, sourceRegister /* valueReg */);
5806
}
5807
5808
// We need to evaluate all the children first before we generate memory reference
5809
// since it will screw up the code sequence for patching when we do symbol resolution.
5810
TR::MemoryReference *tempMR = TR::MemoryReference::create(cg, node);
5811
TR::InstOpCode::Mnemonic storeOp = usingCompressedPointers ? TR::InstOpCode::ST : TR::InstOpCode::getStoreOpCode();
5812
TR::Instruction * instr = generateRXInstruction(cg, storeOp, node, opCodeIsIndirect ? compressedRegister : sourceRegister, tempMR);
5813
5814
// When a new object is stored into an old object, we need to invoke jitWriteBarrierStore
5815
// helper to update the remembered sets for GC. Helper call is needed only if the object
5816
// is in old space or is scanned (black). Since the checking involves control flow, we delay
5817
// the code gen for write barrier since RA cannot handle control flow.
5818
VMwrtbarEvaluator(node, sourceRegister, owningObjectRegister, sourceChild->isNonNull(), cg);
5819
5820
if (opCodeIsIndirect && comp->useCompressedPointers())
5821
{
5822
node->setStoreAlreadyEvaluated(true);
5823
}
5824
5825
cg->decReferenceCount(sourceChild);
5826
if (usingCompressedPointers)
5827
{
5828
cg->decReferenceCount(node->getSecondChild());
5829
cg->recursivelyDecReferenceCount(owningObjectChild);
5830
}
5831
else
5832
{
5833
cg->decReferenceCount(owningObjectChild);
5834
}
5835
5836
if (owningObjectRegister)
5837
{
5838
cg->stopUsingRegister(owningObjectRegister);
5839
}
5840
cg->stopUsingRegister(sourceRegister);
5841
tempMR->stopUsingMemRefRegister(cg);
5842
return NULL;
5843
}
5844
5845
TR::Register *
5846
J9::Z::TreeEvaluator::BNDCHKwithSpineCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
5847
{
5848
bool needsBoundCheck = (node->getOpCodeValue() == TR::BNDCHKwithSpineCHK);
5849
TR::Compilation *comp = cg->comp();
5850
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
5851
TR::Node *loadOrStoreChild = node->getFirstChild();
5852
TR::Node *baseArrayChild = node->getSecondChild();
5853
TR::Node *arrayLengthChild;
5854
TR::Node *indexChild;
5855
5856
if (needsBoundCheck)
5857
{
5858
arrayLengthChild = node->getChild(2);
5859
indexChild = node->getChild(3);
5860
}
5861
else
5862
{
5863
arrayLengthChild = NULL;
5864
indexChild = node->getChild(2);
5865
}
5866
5867
if (comp->getOption(TR_TraceCG))
5868
traceMsg(comp,"loadOrStoreChild: %p baseArrayChild: %p arrayLengthChild: %p indexChild: %p\n",loadOrStoreChild, baseArrayChild, arrayLengthChild, indexChild);
5869
5870
// Order of evaluation dictates that the value to be stored needs to be evaluated first.
5871
if (loadOrStoreChild->getOpCode().isStore() && !loadOrStoreChild->getRegister())
5872
{
5873
TR::Node *valueChild = loadOrStoreChild->getSecondChild();
5874
cg->evaluate(valueChild);
5875
}
5876
5877
TR::Register *baseArrayReg = cg->evaluate(baseArrayChild);
5878
preEvaluateEscapingNodesForSpineCheck(node, cg);
5879
5880
// Generate the SpinCheck.
5881
TR::MemoryReference *contiguousArraySizeMR = generateS390MemoryReference(baseArrayReg, fej9->getOffsetOfContiguousArraySizeField(), cg);
5882
TR::MemoryReference *discontiguousArraySizeMR = generateS390MemoryReference(baseArrayReg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg);
5883
5884
bool doLoadOrStore = false;
5885
bool doAddressComputation = true;
5886
5887
TR::Register* loadOrStoreReg = NULL;
5888
TR_Debug * debugObj = cg->getDebug();
5889
5890
TR::LabelSymbol * oolStartLabel = generateLabelSymbol(cg);
5891
TR::LabelSymbol * oolReturnLabel = generateLabelSymbol(cg);
5892
TR::Register *indexReg = cg->evaluate(indexChild);
5893
TR::Register *valueReg = NULL;
5894
5895
TR::Instruction * branchToOOL;
5896
5897
if (needsBoundCheck)
5898
{
5899
generateRXInstruction(cg, TR::InstOpCode::CL, node, indexReg, contiguousArraySizeMR);
5900
5901
// OOL Will actually throw the AIOB if necessary.
5902
branchToOOL = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNL, node, oolStartLabel);
5903
if (debugObj)
5904
debugObj->addInstructionComment(branchToOOL, "Start of OOL BNDCHKwithSpineCHK sequence");
5905
}
5906
else
5907
{
5908
// Load the Contiguous Array Size and test if it's zero.
5909
TR::Register *tmpReg = cg->allocateRegister();
5910
generateRSInstruction(cg, TR::InstOpCode::ICM, node, tmpReg, (uint32_t) 0xF, contiguousArraySizeMR);
5911
cg->stopUsingRegister(tmpReg);
5912
5913
// Branch to OOL if contiguous array size is zero.
5914
branchToOOL = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, oolStartLabel);
5915
if (debugObj)
5916
debugObj->addInstructionComment(branchToOOL, "Start of OOL BNDCHKwithSpineCHK sequence");
5917
}
5918
5919
// For reference stores, only evaluate the array element address because the store cannot
5920
// happen here (it must be done via the array store check).
5921
//
5922
// For primitive stores, evaluate them now.
5923
//
5924
// For loads, evaluate them now.
5925
//
5926
TR::Node * actualLoadOrStoreChild = loadOrStoreChild;
5927
TR::Node * evaluateConversionNode = loadOrStoreChild; // We want to match the top most conversion node and evaluate that.
5928
5929
bool doLoadDecompress = false;
5930
5931
// Top-level check whether a decompression sequence is necessary, because the first child
5932
// may have been created by a PRE temp.
5933
if ((loadOrStoreChild->getOpCodeValue() == TR::aload || loadOrStoreChild->getOpCodeValue() == TR::aRegLoad) &&
5934
node->isSpineCheckWithArrayElementChild() && cg->comp()->target().is64Bit() && comp->useCompressedPointers())
5935
{
5936
doLoadDecompress = true;
5937
}
5938
5939
while (actualLoadOrStoreChild->getOpCode().isConversion() ||
5940
( ( actualLoadOrStoreChild->getOpCode().isAdd() || actualLoadOrStoreChild->getOpCode().isSub() ||
5941
actualLoadOrStoreChild->getOpCode().isLeftShift() || actualLoadOrStoreChild->getOpCode().isRightShift()) &&
5942
actualLoadOrStoreChild->containsCompressionSequence()))
5943
{
5944
// If we find a compression sequence, then reset the topmost conversion node to the child of the compression sequence.
5945
// i.e. lshl
5946
// i2l <--- set evaluateConversionNode to this node
5947
//
5948
if (! (actualLoadOrStoreChild->getOpCode().isConversion()))
5949
{
5950
evaluateConversionNode = actualLoadOrStoreChild->getFirstChild();
5951
}
5952
actualLoadOrStoreChild = actualLoadOrStoreChild->getFirstChild();
5953
}
5954
5955
TR::Node * evaluatedNode = NULL;
5956
5957
if (actualLoadOrStoreChild->getOpCode().isStore())
5958
{
5959
if (actualLoadOrStoreChild->getReferenceCount() > 1)
5960
{
5961
TR_ASSERT(actualLoadOrStoreChild->getOpCode().isWrtBar(), "Opcode must be wrtbar");
5962
loadOrStoreReg = cg->evaluate(actualLoadOrStoreChild->getFirstChild());
5963
cg->decReferenceCount(actualLoadOrStoreChild->getFirstChild());
5964
evaluatedNode = actualLoadOrStoreChild->getFirstChild();
5965
}
5966
else
5967
{
5968
loadOrStoreReg = cg->evaluate(actualLoadOrStoreChild);
5969
valueReg = actualLoadOrStoreChild->getSecondChild()->getRegister();
5970
evaluatedNode = actualLoadOrStoreChild;
5971
}
5972
}
5973
else
5974
{
5975
evaluatedNode = evaluateConversionNode;
5976
loadOrStoreReg = cg->evaluate(evaluateConversionNode);
5977
}
5978
5979
if (comp->getOption(TR_TraceCG))
5980
traceMsg(comp,"Identified actualLoadOrStoreChild: %p and evaluated node: %p\n",actualLoadOrStoreChild, evaluatedNode);
5981
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, oolReturnLabel);
5982
5983
if (loadOrStoreChild != evaluatedNode)
5984
cg->evaluate(loadOrStoreChild);
5985
5986
// ---------------------------------------------
5987
// OOL Sequence to handle arraylet calculations.
5988
// ---------------------------------------------
5989
TR_S390OutOfLineCodeSection *outlinedDiscontigPath = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(oolStartLabel,oolReturnLabel,cg);
5990
cg->getS390OutOfLineCodeSectionList().push_front(outlinedDiscontigPath);
5991
outlinedDiscontigPath->swapInstructionListsWithCompilation();
5992
TR::Instruction * cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, oolStartLabel);
5993
5994
// get the correct liveLocals from the OOL entry branch instruction, so the GC maps can be correct in OOL slow path
5995
cursor->setLiveLocals(branchToOOL->getLiveLocals());
5996
5997
// Generate BNDCHK code.
5998
if (needsBoundCheck)
5999
{
6000
TR::LabelSymbol * boundCheckFailureLabel = generateLabelSymbol(cg);
6001
6002
// Check if contiguous arraysize is zero first. If not, throw AIOB
6003
TR::MemoryReference* contiguousArraySizeMR2 = generateS390MemoryReference(*contiguousArraySizeMR, 0, cg);
6004
TR::Register *tmpReg = cg->allocateRegister();
6005
cursor = generateRSInstruction(cg, TR::InstOpCode::ICM, node, tmpReg, (uint32_t) 0xF, contiguousArraySizeMR2, cursor);
6006
cg->stopUsingRegister(tmpReg);
6007
6008
// Throw AIOB if continuousArraySizeMR is zero.
6009
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, boundCheckFailureLabel, cursor);
6010
cursor->setExceptBranchOp();
6011
6012
// Don't use CompareAndTrap to save the load of discontiguousArraySize into a register
6013
cursor = generateRXInstruction(cg, TR::InstOpCode::CL, node, indexReg, discontiguousArraySizeMR, cursor);
6014
6015
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNL, node, boundCheckFailureLabel, cursor);
6016
cursor->setExceptBranchOp();
6017
cg->addSnippet(new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, boundCheckFailureLabel, node->getSymbolReference()));
6018
}
6019
6020
// TODO: Generate Arraylet calculation.
6021
TR::DataType dt = loadOrStoreChild->getDataType();
6022
int32_t elementSize = 0;
6023
if (dt == TR::Address)
6024
{
6025
elementSize = TR::Compiler->om.sizeofReferenceField();
6026
}
6027
else
6028
{
6029
elementSize = TR::Symbol::convertTypeToSize(dt);
6030
}
6031
6032
int32_t spinePointerSize = (cg->comp()->target().is64Bit() && !comp->useCompressedPointers()) ? 8 : 4;
6033
int32_t arrayHeaderSize = TR::Compiler->om.discontiguousArrayHeaderSizeInBytes();
6034
int32_t arrayletMask = fej9->getArrayletMask(elementSize);
6035
6036
// Load the arraylet from the spine.
6037
int32_t spineShift = fej9->getArraySpineShift(elementSize);
6038
int32_t spinePtrShift = TR::TreeEvaluator::checkNonNegativePowerOfTwo(spinePointerSize);
6039
int32_t elementShift = TR::TreeEvaluator::checkNonNegativePowerOfTwo(elementSize);
6040
TR::Register* tmpReg = cg->allocateRegister();
6041
if (cg->comp()->target().is64Bit())
6042
{
6043
if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_ZEC12))
6044
{
6045
cursor = generateRIEInstruction(cg, TR::InstOpCode::RISBGN, node, tmpReg, indexReg,(32+spineShift-spinePtrShift), (128+63-spinePtrShift),(64-spineShift+spinePtrShift),cursor);
6046
}
6047
else
6048
{
6049
cursor = generateRIEInstruction(cg, TR::InstOpCode::RISBG, node, tmpReg, indexReg,(32+spineShift-spinePtrShift), (128+63-spinePtrShift),(64-spineShift+spinePtrShift),cursor);
6050
}
6051
}
6052
else
6053
{
6054
cursor = generateRRInstruction(cg, TR::InstOpCode::LR, node, tmpReg, indexReg, cursor);
6055
cursor = generateRSInstruction(cg, TR::InstOpCode::SRA, node, tmpReg, tmpReg, spineShift, cursor);
6056
cursor = generateRSInstruction(cg, TR::InstOpCode::SLL, node, tmpReg, tmpReg, spinePtrShift, cursor);
6057
}
6058
6059
// Load Arraylet pointer from Spine
6060
// Pointer is compressed on 64-bit CmpRefs
6061
bool useCompressedPointers = cg->comp()->target().is64Bit() && comp->useCompressedPointers();
6062
TR::MemoryReference * spineMR = generateS390MemoryReference(baseArrayReg, tmpReg, arrayHeaderSize, cg);
6063
cursor = generateRXInstruction(cg, (useCompressedPointers)?TR::InstOpCode::LLGF:TR::InstOpCode::getLoadOpCode(), node, tmpReg, spineMR, cursor);
6064
6065
// Handle the compress shifting and addition of heap base.
6066
if (useCompressedPointers)
6067
{
6068
// Shift by compressed pointers shift amount if necessary.
6069
uint32_t cmpRefsShift = TR::Compiler->om.compressedReferenceShift();
6070
if (cmpRefsShift == 1)
6071
{
6072
TR::MemoryReference *cmpRefsShift1MR = generateS390MemoryReference(tmpReg, tmpReg, 0, cg);
6073
cursor = generateRXInstruction(cg, TR::InstOpCode::LA, node, tmpReg, cmpRefsShift1MR, cursor);
6074
}
6075
else if (cmpRefsShift >= 2)
6076
{
6077
cursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, tmpReg, tmpReg, cmpRefsShift, cursor);
6078
}
6079
}
6080
6081
// Calculate the offset with the arraylet for the index.
6082
TR::Register* tmpReg2 = cg->allocateRegister();
6083
TR::MemoryReference *arrayletMR;
6084
if (cg->comp()->target().is64Bit())
6085
{
6086
if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_ZEC12))
6087
{
6088
cursor = generateRIEInstruction(cg, TR::InstOpCode::RISBGN, node, tmpReg2, indexReg,(64-spineShift- elementShift), (128+63-elementShift),(elementShift),cursor);
6089
}
6090
else
6091
{
6092
cursor = generateRIEInstruction(cg, TR::InstOpCode::RISBG, node, tmpReg2, indexReg,(64-spineShift- elementShift), (128+63-elementShift),(elementShift),cursor);
6093
}
6094
}
6095
else
6096
{
6097
generateShiftThenKeepSelected31Bit(node, cg, tmpReg2, indexReg, 32-spineShift - elementShift, 31-elementShift, elementShift);
6098
}
6099
6100
arrayletMR = generateS390MemoryReference(tmpReg, tmpReg2, 0, cg);
6101
cg->stopUsingRegister(tmpReg);
6102
cg->stopUsingRegister(tmpReg2);
6103
6104
if (!actualLoadOrStoreChild->getOpCode().isStore())
6105
{
6106
TR::InstOpCode::Mnemonic op = TR::InstOpCode::bad;
6107
6108
TR::MemoryReference *highArrayletMR = NULL;
6109
TR::Register *highRegister = NULL;
6110
bool clearHighOrderBitsForUnsignedHalfwordLoads = false;
6111
6112
// If we're not loading an array shadow then this must be an effective
6113
// address computation on the array element (for a write barrier).
6114
if ((!actualLoadOrStoreChild->getOpCode().hasSymbolReference() ||
6115
!actualLoadOrStoreChild->getSymbolReference()->getSymbol()->isArrayShadowSymbol()) &&
6116
!node->isSpineCheckWithArrayElementChild())
6117
{
6118
op = TR::InstOpCode::LA;
6119
}
6120
else
6121
{
6122
switch (dt)
6123
{
6124
case TR::Int8: if (loadOrStoreChild->isZeroExtendedAtSource())
6125
op = (cg->comp()->target().is64Bit() ? TR::InstOpCode::LLGC : TR::InstOpCode::LLC);
6126
else
6127
op = (cg->comp()->target().is64Bit() ? TR::InstOpCode::LGB : TR::InstOpCode::LB);
6128
break;
6129
case TR::Int16:
6130
if (loadOrStoreChild->isZeroExtendedAtSource())
6131
op = (cg->comp()->target().is64Bit() ? TR::InstOpCode::LLGH : TR::InstOpCode::LLH);
6132
else
6133
op = (cg->comp()->target().is64Bit() ? TR::InstOpCode::LGH : TR::InstOpCode::LH);
6134
break;
6135
case TR::Int32:
6136
if (loadOrStoreChild->isZeroExtendedAtSource())
6137
op = (cg->comp()->target().is64Bit() ? TR::InstOpCode::LLGF : TR::InstOpCode::L);
6138
else
6139
op = (cg->comp()->target().is64Bit() ? TR::InstOpCode::LGF : TR::InstOpCode::L);
6140
break;
6141
case TR::Int64:
6142
if (cg->comp()->target().is64Bit())
6143
op = TR::InstOpCode::LG;
6144
else
6145
{
6146
TR_ASSERT(loadOrStoreReg->getRegisterPair(), "expecting a register pair");
6147
6148
op = TR::InstOpCode::L;
6149
highArrayletMR = generateS390MemoryReference(*arrayletMR, 4, cg);
6150
highRegister = loadOrStoreReg->getHighOrder();
6151
loadOrStoreReg = loadOrStoreReg->getLowOrder();
6152
}
6153
break;
6154
6155
case TR::Float: op = TR::InstOpCode::LE; break;
6156
case TR::Double: op = TR::InstOpCode::LD; break;
6157
6158
case TR::Address:
6159
if (cg->comp()->target().is32Bit())
6160
op = TR::InstOpCode::L;
6161
else if (comp->useCompressedPointers())
6162
op = TR::InstOpCode::LLGF;
6163
else
6164
op = TR::InstOpCode::LG;
6165
break;
6166
6167
default:
6168
TR_ASSERT(0, "unsupported array element load type");
6169
}
6170
}
6171
cursor = generateRXInstruction(cg, op, node, loadOrStoreReg, arrayletMR, cursor);
6172
6173
if (doLoadDecompress)
6174
{
6175
TR_ASSERT( dt == TR::Address, "Expecting loads with decompression trees to have data type TR::Address");
6176
6177
// Shift by compressed pointers shift amount if necessary.
6178
//
6179
uint32_t cmpRefsShift = TR::Compiler->om.compressedReferenceShift();
6180
if (cmpRefsShift == 1)
6181
{
6182
TR::MemoryReference *cmpRefsShift1MR = generateS390MemoryReference(loadOrStoreReg, loadOrStoreReg, 0, cg);
6183
cursor = generateRXInstruction(cg, TR::InstOpCode::LA, node, loadOrStoreReg, cmpRefsShift1MR, cursor);
6184
}
6185
else if (cmpRefsShift >= 2)
6186
{
6187
cursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, loadOrStoreReg, loadOrStoreReg, cmpRefsShift, cursor);
6188
}
6189
}
6190
6191
if (highArrayletMR)
6192
{
6193
cursor = generateRXInstruction(cg, op, node, highRegister, highArrayletMR, cursor);
6194
}
6195
// We may need to clear the upper 16-bits of a unsign halfword load.
6196
if (clearHighOrderBitsForUnsignedHalfwordLoads)
6197
cursor = generateRIInstruction(cg, TR::InstOpCode::NILH, node, loadOrStoreReg, (int16_t)0x0000, cursor);
6198
}
6199
else
6200
{
6201
if (dt != TR::Address)
6202
{
6203
TR::InstOpCode::Mnemonic op;
6204
bool needStore = true;
6205
6206
switch (dt)
6207
{
6208
case TR::Int8: op = TR::InstOpCode::STC; break;
6209
case TR::Int16: op = TR::InstOpCode::STH; break;
6210
case TR::Int32: op = TR::InstOpCode::ST; break;
6211
case TR::Int64:
6212
if (cg->comp()->target().is64Bit())
6213
{
6214
op = TR::InstOpCode::STG;
6215
}
6216
else
6217
{
6218
TR_ASSERT(valueReg->getRegisterPair(), "value must be a register pair");
6219
cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, valueReg->getLowOrder(), arrayletMR, cursor);
6220
cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, valueReg->getHighOrder(), generateS390MemoryReference(*arrayletMR,4,cg), cursor);
6221
needStore = false;
6222
}
6223
break;
6224
6225
case TR::Float: op = TR::InstOpCode::STE; break;
6226
case TR::Double: op = TR::InstOpCode::STD; break;
6227
6228
default:
6229
TR_ASSERT(0, "unsupported array element store type");
6230
op = TR::InstOpCode::bad;
6231
}
6232
6233
if (needStore)
6234
cursor = generateRXInstruction(cg, op, node, valueReg, arrayletMR, cursor);
6235
}
6236
else
6237
{
6238
TR_ASSERT(0, "OOL reference stores not supported yet");
6239
}
6240
}
6241
6242
6243
cursor = generateS390BranchInstruction(cg,TR::InstOpCode::BRC,TR::InstOpCode::COND_BRC,node,oolReturnLabel, cursor);
6244
if (debugObj)
6245
debugObj->addInstructionComment(cursor, "End of OOL BNDCHKwithSpineCHK sequence");
6246
6247
outlinedDiscontigPath->swapInstructionListsWithCompilation();
6248
6249
cg->decReferenceCount(loadOrStoreChild);
6250
cg->decReferenceCount(baseArrayChild);
6251
cg->decReferenceCount(indexChild);
6252
if (arrayLengthChild)
6253
cg->recursivelyDecReferenceCount(arrayLengthChild);
6254
6255
return NULL;
6256
}
6257
6258
static void
6259
VMarrayStoreCHKEvaluator(
6260
TR::Node * node,
6261
J9::Z::CHelperLinkage *helperLink,
6262
TR::Node *callNode,
6263
TR::Register * srcReg,
6264
TR::Register * owningObjectReg,
6265
TR::Register * t1Reg,
6266
TR::Register * t2Reg,
6267
TR::Register * litPoolBaseReg,
6268
TR::Register * owningObjectRegVal,
6269
TR::Register * srcRegVal,
6270
TR::LabelSymbol * wbLabel,
6271
TR::RegisterDependencyConditions * conditions,
6272
TR::CodeGenerator * cg)
6273
{
6274
TR::LabelSymbol * helperCallLabel = generateLabelSymbol(cg);
6275
TR::LabelSymbol * startOOLLabel = generateLabelSymbol(cg);
6276
TR::LabelSymbol * exitOOLLabel = generateLabelSymbol(cg);
6277
TR::LabelSymbol * exitPointLabel = wbLabel;
6278
TR::Compilation *comp = cg->comp();
6279
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
6280
6281
TR_S390OutOfLineCodeSection *arrayStoreCHKOOL;
6282
TR_Debug * debugObj = cg->getDebug();
6283
6284
TR::InstOpCode::Mnemonic loadOp;
6285
TR::Instruction * cursor;
6286
TR::Instruction * gcPoint;
6287
J9::Z::PrivateLinkage * linkage = static_cast<J9::Z::PrivateLinkage *>(cg->getLinkage());
6288
int bytesOffset;
6289
6290
TR::TreeEvaluator::genLoadForObjectHeadersMasked(cg, node, owningObjectRegVal, generateS390MemoryReference(owningObjectReg, (int32_t) TR::Compiler->om.offsetOfObjectVftField(), cg), NULL);
6291
TR::TreeEvaluator::genLoadForObjectHeadersMasked(cg, node, srcRegVal, generateS390MemoryReference( srcReg, (int32_t) TR::Compiler->om.offsetOfObjectVftField(), cg), NULL);
6292
6293
// may need to convert the class offset from t1Reg into a J9Class pointer
6294
cursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, t1Reg, generateS390MemoryReference(owningObjectRegVal, (int32_t) offsetof(J9ArrayClass, componentType), cg));
6295
6296
// check if obj.class(in t1Reg) == array.componentClass in t2Reg
6297
if (TR::Compiler->om.compressObjectReferences())
6298
cursor = generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CR, node, t1Reg, srcRegVal, TR::InstOpCode::COND_BER, wbLabel, false, false);
6299
else
6300
cursor = generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpLogicalRegOpCode(), node, t1Reg, srcRegVal, TR::InstOpCode::COND_BE, wbLabel, false, false);
6301
6302
if (debugObj)
6303
debugObj->addInstructionComment(cursor, "Check if src.type == array.type");
6304
6305
intptr_t objectClass = (intptr_t) fej9->getSystemClassFromClassName("java/lang/Object", 16, true);
6306
/*
6307
* objectClass is used for Object arrays check optimization: when we are storing to Object arrays we can skip all other array store checks
6308
* However, TR_J9SharedCacheVM::getSystemClassFromClassName can return 0 when it's impossible to relocate j9class later for AOT loads
6309
* in that case we don't want to generate the Object arrays check
6310
*/
6311
bool doObjectArrayCheck = objectClass != 0;
6312
6313
if (doObjectArrayCheck && (cg->wantToPatchClassPointer((TR_OpaqueClassBlock*)objectClass, node) || cg->needClassAndMethodPointerRelocations()))
6314
{
6315
if (cg->isLiteralPoolOnDemandOn())
6316
{
6317
TR::S390ConstantDataSnippet * targetsnippet;
6318
if (cg->comp()->target().is64Bit())
6319
{
6320
targetsnippet = cg->findOrCreate8ByteConstant(node, (int64_t)objectClass);
6321
cursor = (TR::S390RILInstruction *) generateRILInstruction(cg, TR::InstOpCode::CLGRL, node, t1Reg, targetsnippet, 0);
6322
}
6323
else
6324
{
6325
targetsnippet = cg->findOrCreate4ByteConstant(node, (int32_t)objectClass);
6326
cursor = (TR::S390RILInstruction *) generateRILInstruction(cg, TR::InstOpCode::CLRL, node, t1Reg, targetsnippet, 0);
6327
}
6328
6329
if(comp->getOption(TR_EnableHCR))
6330
comp->getSnippetsToBePatchedOnClassRedefinition()->push_front(targetsnippet);
6331
if (cg->needClassAndMethodPointerRelocations())
6332
{
6333
targetsnippet->setReloType(TR_ClassPointer);
6334
AOTcgDiag4(comp, "generateRegLitRefInstruction constantDataSnippet=%x symbolReference=%x symbol=%x reloType=%x\n",
6335
targetsnippet, targetsnippet->getSymbolReference(), targetsnippet->getSymbolReference()->getSymbol(), TR_ClassPointer);
6336
}
6337
}
6338
else
6339
{
6340
if (cg->needClassAndMethodPointerRelocations())
6341
{
6342
generateRegLitRefInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, t2Reg,(uintptr_t) objectClass, TR_ClassPointer, conditions, NULL, NULL);
6343
}
6344
else
6345
{
6346
genLoadAddressConstantInSnippet(cg, node, (intptr_t)objectClass, t2Reg, cursor, conditions, litPoolBaseReg, true);
6347
}
6348
6349
if (TR::Compiler->om.compressObjectReferences())
6350
generateRRInstruction(cg, TR::InstOpCode::CR, node, t1Reg, t2Reg);
6351
else
6352
generateRRInstruction(cg, TR::InstOpCode::getCmpLogicalRegOpCode(), node, t1Reg, t2Reg);
6353
}
6354
}
6355
else if (doObjectArrayCheck)
6356
{
6357
// make sure that t1Reg contains the class offset and not the J9Class pointer
6358
if (cg->comp()->target().is64Bit())
6359
generateS390ImmOp(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, t1Reg, t1Reg, (int64_t) objectClass, conditions, litPoolBaseReg);
6360
else
6361
generateS390ImmOp(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, t1Reg, t1Reg, (int32_t) objectClass, conditions, litPoolBaseReg);
6362
}
6363
6364
// Bringing back tests from outlined keeping only helper call in outlined section
6365
// TODO Attaching helper call predependency to BRASL instruction and combine ICF conditions with post dependency conditions of
6366
// helper call should fix the issue of unnecessary spillings in ICF. Currently bringing the tests back to main line here but
6367
// check performance of both case.
6368
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, wbLabel);
6369
if (debugObj)
6370
debugObj->addInstructionComment(cursor, "Check if array.type is type object, if yes jump to wbLabel");
6371
6372
generateRXInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, t1Reg,
6373
generateS390MemoryReference(srcRegVal, offsetof(J9Class, castClassCache), cg));
6374
6375
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, wbLabel);
6376
if (debugObj)
6377
debugObj->addInstructionComment(cursor, "Check if src.class(in t1Reg).castClassCache == array.componentClass");
6378
6379
// Check to see if array-type is a super-class of the src object
6380
if (cg->comp()->target().is64Bit())
6381
{
6382
loadOp = TR::InstOpCode::LLGH;
6383
bytesOffset = 6;
6384
}
6385
else
6386
{
6387
loadOp = TR::InstOpCode::LLH;
6388
bytesOffset = 2;
6389
}
6390
6391
// Get array element depth
6392
cursor = generateRXInstruction(cg, loadOp, node, owningObjectRegVal,
6393
generateS390MemoryReference(t1Reg, offsetof(J9Class, classDepthAndFlags) + bytesOffset, cg));
6394
6395
// Get src depth
6396
cursor = generateRXInstruction(cg, loadOp, node, t2Reg,
6397
generateS390MemoryReference(srcRegVal, offsetof(J9Class, classDepthAndFlags) + bytesOffset, cg));
6398
6399
TR_ASSERT(sizeof(((J9Class*)0)->classDepthAndFlags) == sizeof(uintptr_t),
6400
"VMarrayStoreCHKEvaluator::J9Class->classDepthAndFlags is wrong size\n");
6401
6402
// Check super class values
6403
static_assert(J9AccClassDepthMask == 0xffff, "VMarrayStoreCHKEvaluator::J9AccClassDepthMask should have be 16 bit of ones");
6404
6405
// Compare depths and makes sure depth(src) >= depth(array-type)
6406
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, owningObjectRegVal, t2Reg, TR::InstOpCode::COND_BH, helperCallLabel, false, false);
6407
if (debugObj)
6408
debugObj->addInstructionComment(cursor, "Failure if depth(src) < depth(array-type)");
6409
6410
if (cg->comp()->target().is64Bit())
6411
{
6412
generateRSInstruction(cg, TR::InstOpCode::SLLG, node, owningObjectRegVal, owningObjectRegVal, 3);
6413
}
6414
else
6415
{
6416
generateRSInstruction(cg, TR::InstOpCode::SLL, node, owningObjectRegVal, 2);
6417
}
6418
6419
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, t2Reg,
6420
generateS390MemoryReference(srcRegVal, offsetof(J9Class, superclasses), cg));
6421
6422
generateRXInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, t1Reg,
6423
generateS390MemoryReference(t2Reg, owningObjectRegVal, 0, cg));
6424
6425
if (debugObj)
6426
debugObj->addInstructionComment(cursor, "Check if src.type is subclass");
6427
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRNE, node, helperCallLabel);
6428
// FAIL
6429
arrayStoreCHKOOL = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(helperCallLabel,wbLabel,cg);
6430
cg->getS390OutOfLineCodeSectionList().push_front(arrayStoreCHKOOL);
6431
arrayStoreCHKOOL->swapInstructionListsWithCompilation();
6432
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, helperCallLabel);
6433
TR::Register *dummyResReg = helperLink->buildDirectDispatch(callNode);
6434
if (dummyResReg)
6435
cg->stopUsingRegister(dummyResReg);
6436
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, wbLabel);
6437
arrayStoreCHKOOL->swapInstructionListsWithCompilation();
6438
}
6439
6440
///////////////////////////////////////////////////////////////////////////////////////
6441
// ArrayStoreCHKEvaluator - Array store check. child 1 is object, 2 is array.
6442
// Symbolref indicates failure action/destination
6443
///////////////////////////////////////////////////////////////////////////////////////
6444
TR::Register *
6445
J9::Z::TreeEvaluator::ArrayStoreCHKEvaluator(TR::Node * node, TR::CodeGenerator * cg)
6446
{
6447
// Note: we take advantages of the register conventions of the helpers by limiting register usages on
6448
// the fast-path (most likely 4 registers; at most, 6 registers)
6449
6450
TR::Compilation * comp = cg->comp();
6451
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
6452
TR::Node * firstChild = node->getFirstChild();
6453
auto gcMode = TR::Compiler->om.writeBarrierType();
6454
// As arguments to ArrayStoreCHKEvaluator helper function is children of first child,
6455
// We need to create a dummy call node for helper call with children containing arguments to helper call.
6456
bool doWrtBar = (gcMode == gc_modron_wrtbar_oldcheck ||
6457
gcMode == gc_modron_wrtbar_cardmark_and_oldcheck ||
6458
gcMode == gc_modron_wrtbar_always);
6459
6460
bool doCrdMrk = ((gcMode == gc_modron_wrtbar_cardmark || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_cardmark_incremental) && !firstChild->isNonHeapObjectWrtBar());
6461
6462
TR::Node * litPoolBaseChild=NULL;
6463
TR::Node * sourceChild = firstChild->getSecondChild();
6464
TR::Node * classChild = firstChild->getChild(2);
6465
6466
bool nopASC = false;
6467
if (comp->performVirtualGuardNOPing() && node->getArrayStoreClassInNode() &&
6468
!fej9->classHasBeenExtended(node->getArrayStoreClassInNode()))
6469
nopASC = true;
6470
6471
bool usingCompressedPointers = false;
6472
if (comp->useCompressedPointers() && firstChild->getOpCode().isIndirect())
6473
{
6474
usingCompressedPointers = true;
6475
while (sourceChild->getNumChildren() > 0
6476
&& sourceChild->getOpCodeValue() != TR::a2l)
6477
{
6478
sourceChild = sourceChild->getFirstChild();
6479
}
6480
if (sourceChild->getOpCodeValue() == TR::a2l)
6481
{
6482
sourceChild = sourceChild->getFirstChild();
6483
}
6484
// artificially bump up the refCount on the value so
6485
// that different registers are allocated for the actual
6486
// and compressed values
6487
//
6488
sourceChild->incReferenceCount();
6489
}
6490
TR::Node * memRefChild = firstChild->getFirstChild();
6491
6492
TR::Register * srcReg, * classReg, * txReg, * tyReg, * baseReg, * indexReg, *litPoolBaseReg=NULL,*memRefReg;
6493
TR::MemoryReference * mr1, * mr2;
6494
TR::LabelSymbol * wbLabel, * cFlowRegionEnd, * simpleStoreLabel, * cFlowRegionStart;
6495
TR::RegisterDependencyConditions * conditions;
6496
J9::Z::PrivateLinkage * linkage = static_cast<J9::Z::PrivateLinkage *>(cg->getLinkage());
6497
TR::Register * tempReg = NULL;
6498
TR::Instruction *cursor;
6499
6500
cFlowRegionStart = generateLabelSymbol(cg);
6501
wbLabel = generateLabelSymbol(cg);
6502
cFlowRegionEnd = generateLabelSymbol(cg);
6503
simpleStoreLabel = generateLabelSymbol(cg);
6504
6505
txReg = cg->allocateRegister();
6506
tyReg = cg->allocateRegister();
6507
6508
TR::Register * owningObjectRegVal = cg->allocateRegister();
6509
TR::Register * srcRegVal = cg->allocateRegister();
6510
6511
// dst reg is read-only when we don't do wrtbar or crdmark
6512
// if destination node is the same as source node we also
6513
// need to create a copy because destination & source
6514
// are 1st and 2nd arguments to the call and as such
6515
// they need to be in 2 different registers
6516
if (doWrtBar || doCrdMrk || (classChild==sourceChild))
6517
{
6518
classReg = cg->gprClobberEvaluate(classChild);
6519
// evaluate using load and test
6520
if (sourceChild->getOpCode().isLoadVar() && sourceChild->getRegister()==NULL && !sourceChild->isNonNull())
6521
{
6522
srcReg = cg->allocateCollectedReferenceRegister();
6523
6524
generateRXInstruction(cg, TR::InstOpCode::getLoadTestOpCode(), sourceChild, srcReg, TR::MemoryReference::create(cg, sourceChild));
6525
6526
sourceChild->setRegister(srcReg);
6527
}
6528
else
6529
{
6530
srcReg = cg->gprClobberEvaluate(sourceChild);
6531
}
6532
}
6533
else
6534
{
6535
classReg = cg->evaluate(classChild);
6536
srcReg = cg->evaluate(sourceChild);
6537
}
6538
TR::Node *callNode = TR::Node::createWithSymRef(node, TR::call, 2, node->getSymbolReference());
6539
callNode->setChild(0, sourceChild);
6540
callNode->setChild(1, classChild);
6541
mr1 = TR::MemoryReference::create(cg, firstChild);
6542
6543
TR::Register *compressedReg = srcReg;
6544
if (usingCompressedPointers)
6545
compressedReg = cg->evaluate(firstChild->getSecondChild());
6546
6547
// We need deps to setup args for arrayStoreCHK helper and/or wrtBAR helper call.
6548
// We need 2 more regs for inline version of arrayStoreCHK (txReg & tyReg). We use RA/EP for these
6549
// We then need two extra regs for memref for the actual store.
6550
// A seventh, eighth and ninth post dep may be needed to manufacture imm values
6551
// used by the inlined version of arrayStoreCHK
6552
// The tenth post dep may be needed to generateDirectCall if it creates a RegLitRefInstruction.
6553
conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 11, cg);
6554
conditions->addPostCondition(classReg, linkage->getIntegerArgumentRegister(0));
6555
conditions->addPostCondition(srcReg, linkage->getIntegerArgumentRegister(1));
6556
if (usingCompressedPointers)
6557
{
6558
conditions->addPostConditionIfNotAlreadyInserted(compressedReg, TR::RealRegister::AssignAny);
6559
}
6560
conditions->addPostCondition(txReg, linkage->getReturnAddressRegister());
6561
conditions->addPostCondition(tyReg, linkage->getEntryPointRegister());
6562
conditions->addPostCondition(srcRegVal, TR::RealRegister::AssignAny);
6563
conditions->addPostCondition(owningObjectRegVal, TR::RealRegister::AssignAny);
6564
6565
TR::Instruction *current = cg->getAppendInstruction();
6566
TR_ASSERT( current != NULL, "Could not get current instruction");
6567
6568
if (node->getNumChildren()==2)
6569
{
6570
litPoolBaseChild=node->getSecondChild();
6571
TR_ASSERT((litPoolBaseChild->getOpCodeValue()==TR::aload) || (litPoolBaseChild->getOpCodeValue()==TR::aRegLoad),
6572
"Literal pool base child expected\n");
6573
litPoolBaseReg=cg->evaluate(litPoolBaseChild);
6574
conditions->addPostCondition(litPoolBaseReg, TR::RealRegister::AssignAny);
6575
}
6576
6577
if (!sourceChild->isNonNull())
6578
{
6579
// Note the use of 64-bit compare for compressedRefs and use of the decompressed `srcReg` register
6580
// Compare object with NULL. If NULL, branch around ASC, WrtBar and CrdMrk as they are not required
6581
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, srcReg, 0, TR::InstOpCode::COND_BE, (doWrtBar || doCrdMrk)?simpleStoreLabel:wbLabel, false, true);
6582
}
6583
6584
J9::Z::CHelperLinkage *helperLink = static_cast<J9::Z::CHelperLinkage*>(cg->getLinkage(TR_CHelper));
6585
if (nopASC)
6586
{
6587
// Speculatively NOP the array store check if VP is able to prove that the ASC
6588
// would always succeed given the current state of the class hierarchy.
6589
//
6590
TR::LabelSymbol * oolASCLabel = generateLabelSymbol(cg);
6591
TR_VirtualGuard *virtualGuard = TR_VirtualGuard::createArrayStoreCheckGuard(comp, node, node->getArrayStoreClassInNode());
6592
TR::Instruction *vgnopInstr = generateVirtualGuardNOPInstruction(cg, node, virtualGuard->addNOPSite(), NULL, oolASCLabel);
6593
6594
// nopASC assumes OOL is enabled
6595
TR_S390OutOfLineCodeSection *outlinedSlowPath = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(oolASCLabel, wbLabel, cg);
6596
cg->getS390OutOfLineCodeSectionList().push_front(outlinedSlowPath);
6597
outlinedSlowPath->swapInstructionListsWithCompilation();
6598
6599
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, oolASCLabel);
6600
TR::Register *dummyResReg = helperLink->buildDirectDispatch(callNode);
6601
if (dummyResReg)
6602
cg->stopUsingRegister(dummyResReg);
6603
6604
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, wbLabel);
6605
outlinedSlowPath->swapInstructionListsWithCompilation();
6606
}
6607
else
6608
VMarrayStoreCHKEvaluator(node, helperLink, callNode, srcReg, classReg, txReg, tyReg, litPoolBaseReg, owningObjectRegVal, srcRegVal, wbLabel, conditions, cg);
6609
6610
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, wbLabel);
6611
6612
if (mr1->getBaseRegister())
6613
{
6614
conditions->addPostConditionIfNotAlreadyInserted(mr1->getBaseRegister(), TR::RealRegister::AssignAny);
6615
}
6616
if (mr1->getIndexRegister())
6617
{
6618
conditions->addPostConditionIfNotAlreadyInserted(mr1->getIndexRegister(), TR::RealRegister::AssignAny);
6619
}
6620
6621
if (usingCompressedPointers)
6622
generateRXInstruction(cg, TR::InstOpCode::ST, node, compressedReg, mr1);
6623
else
6624
generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, srcReg, mr1);
6625
6626
if (doWrtBar)
6627
{
6628
TR::SymbolReference *wbRef ;
6629
if (gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_oldcheck)
6630
wbRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreGenerationalSymbolRef(comp->getMethodSymbol());
6631
else
6632
wbRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef(comp->getMethodSymbol());
6633
6634
// Cardmarking is not inlined for gencon. Consider doing so when perf issue arises.
6635
VMnonNullSrcWrtBarCardCheckEvaluator(firstChild, classReg, srcReg, tyReg, txReg, cFlowRegionEnd, wbRef, conditions, cg, false);
6636
}
6637
else if (doCrdMrk)
6638
{
6639
VMCardCheckEvaluator(firstChild, classReg, NULL, conditions, cg, true, cFlowRegionEnd);
6640
}
6641
6642
// Store for case where we have a NULL ptr detected at runtime and
6643
// branches around the wrtbar
6644
//
6645
// For the non-NULL case we chose to simply exec the ST twice as this is
6646
// cheaper than branching around the a single ST inst.
6647
//
6648
if (!sourceChild->isNonNull() && (doWrtBar || doCrdMrk))
6649
{
6650
// As we could hit a gc when doing the gencon wrtbar, we have to not
6651
// re-do the ST. We must branch around the second store.
6652
//
6653
if (doWrtBar)
6654
{
6655
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);
6656
}
6657
6658
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, simpleStoreLabel);
6659
6660
mr2 = generateS390MemoryReference(*mr1, 0, cg);
6661
if (usingCompressedPointers)
6662
generateRXInstruction(cg, TR::InstOpCode::ST, node, compressedReg, mr2);
6663
else
6664
generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, srcReg, mr2);
6665
}
6666
6667
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, conditions);
6668
cFlowRegionEnd->setEndInternalControlFlow();
6669
6670
if (comp->useCompressedPointers() && firstChild->getOpCode().isIndirect())
6671
firstChild->setStoreAlreadyEvaluated(true);
6672
6673
cg->decReferenceCount(sourceChild);
6674
cg->decReferenceCount(classChild);
6675
if (litPoolBaseChild!=NULL) cg->decReferenceCount(litPoolBaseChild);
6676
cg->decReferenceCount(firstChild);
6677
if (usingCompressedPointers)
6678
{
6679
cg->decReferenceCount(firstChild->getSecondChild());
6680
cg->stopUsingRegister(compressedReg);
6681
}
6682
mr1->stopUsingMemRefRegister(cg);
6683
cg->stopUsingRegister(txReg);
6684
cg->stopUsingRegister(tyReg);
6685
cg->stopUsingRegister(classReg);
6686
cg->stopUsingRegister(srcReg);
6687
cg->stopUsingRegister(owningObjectRegVal);
6688
cg->stopUsingRegister(srcRegVal);
6689
6690
if (tempReg)
6691
{
6692
cg->stopUsingRegister(tempReg);
6693
}
6694
6695
// determine where internal control flow begins by looking for the first branch
6696
// instruction after where the label instruction would have been inserted
6697
TR::Instruction *next = current->getNext();
6698
while(next != NULL && !next->isBranchOp())
6699
next = next->getNext();
6700
TR_ASSERT( next != NULL && next->getPrev() != NULL, "Could not find branch instruction where internal control flow begins");
6701
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart, next->getPrev());
6702
cFlowRegionStart->setStartInternalControlFlow();
6703
6704
return NULL;
6705
}
6706
6707
///////////////////////////////////////////////////////////////////////////////////////
6708
// ArrayCHKEvaluator - Array compatibility check. child 1 is object1, 2 is object2.
6709
// Symbolref indicates failure action/destination
6710
///////////////////////////////////////////////////////////////////////////////////////
6711
TR::Register *
6712
J9::Z::TreeEvaluator::ArrayCHKEvaluator(TR::Node * node, TR::CodeGenerator * cg)
6713
{
6714
return TR::TreeEvaluator::VMarrayCheckEvaluator(node, cg);
6715
}
6716
6717
TR::Register *
6718
J9::Z::TreeEvaluator::conditionalHelperEvaluator(TR::Node * node, TR::CodeGenerator * cg)
6719
{
6720
// used by methodEnterhook, and methodExitHook
6721
// Decrement the reference count on the constant placeholder parameter to
6722
// the MethodEnterHook call. An evaluation isn't necessary because the
6723
// constant value isn't used here.
6724
//
6725
if (node->getOpCodeValue() == TR::MethodEnterHook)
6726
{
6727
if (node->getSecondChild()->getOpCode().isCall() && node->getSecondChild()->getNumChildren() > 1)
6728
{
6729
cg->decReferenceCount(node->getSecondChild()->getFirstChild());
6730
}
6731
}
6732
6733
// The child contains an inline test.
6734
//
6735
TR::Node * testNode = node->getFirstChild();
6736
TR::Node * firstChild = testNode->getFirstChild();
6737
TR::Node * secondChild = testNode->getSecondChild();
6738
TR::Register * src1Reg = cg->evaluate(firstChild);
6739
if (secondChild->getOpCode().isLoadConst())
6740
// &&
6741
// secondChild->getRegister() == NULL)
6742
{
6743
int32_t value = secondChild->getInt();
6744
TR::Node * firstChild = testNode->getFirstChild();
6745
6746
if (value >= MIN_IMMEDIATE_VAL && value <= MAX_IMMEDIATE_VAL)
6747
{
6748
generateRIInstruction(cg, TR::InstOpCode::CHI, node, src1Reg, value);
6749
}
6750
else
6751
{
6752
TR::Register * tempReg = cg->evaluate(secondChild);
6753
generateRRInstruction(cg, TR::InstOpCode::CR, node, src1Reg, tempReg);
6754
}
6755
}
6756
else
6757
{
6758
TR::Register * src2Reg = cg->evaluate(secondChild);
6759
generateRRInstruction(cg, TR::InstOpCode::CR, node, src1Reg, src2Reg);
6760
}
6761
6762
TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);
6763
TR::LabelSymbol * snippetLabel = generateLabelSymbol(cg);
6764
TR::Instruction * gcPoint;
6765
6766
TR::Register * tempReg1 = cg->allocateRegister();
6767
TR::Register * tempReg2 = cg->allocateRegister();
6768
TR::RegisterDependencyConditions * dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);
6769
dependencies->addPostCondition(tempReg1, cg->getEntryPointRegister());
6770
dependencies->addPostCondition(tempReg2, cg->getReturnAddressRegister());
6771
snippetLabel->setEndInternalControlFlow();
6772
6773
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
6774
cFlowRegionStart->setStartInternalControlFlow();
6775
gcPoint = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, testNode->getOpCodeValue() == TR::icmpeq ? TR::InstOpCode::COND_BE : TR::InstOpCode::COND_BNE, node, snippetLabel);
6776
6777
TR::LabelSymbol * reStartLabel = generateLabelSymbol(cg);
6778
TR::Snippet * snippet = new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference(), reStartLabel);
6779
cg->addSnippet(snippet);
6780
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, reStartLabel, dependencies);
6781
6782
gcPoint->setNeedsGCMap(0x0000FFFF);
6783
6784
cg->decReferenceCount(firstChild);
6785
cg->decReferenceCount(secondChild);
6786
cg->decReferenceCount(testNode);
6787
cg->stopUsingRegister(tempReg1);
6788
cg->stopUsingRegister(tempReg2);
6789
6790
return NULL;
6791
}
6792
6793
/**
6794
* Null check a pointer. child 1 is indirect reference. Symbolref
6795
* indicates failure action/destination
6796
*/
6797
TR::Register *
6798
J9::Z::TreeEvaluator::NULLCHKEvaluator(TR::Node * node, TR::CodeGenerator * cg)
6799
{
6800
return TR::TreeEvaluator::evaluateNULLCHKWithPossibleResolve(node, false, cg);
6801
}
6802
6803
/**
6804
* resolveAndNULLCHKEvaluator - Resolve check a static, field or method and Null check
6805
* the underlying pointer. child 1 is reference to be resolved. Symbolref indicates
6806
* failure action/destination
6807
*/
6808
TR::Register *
6809
J9::Z::TreeEvaluator::resolveAndNULLCHKEvaluator(TR::Node * node, TR::CodeGenerator * cg)
6810
{
6811
return TR::TreeEvaluator::evaluateNULLCHKWithPossibleResolve(node, true, cg);
6812
}
6813
6814
/**
6815
* This is a helper function used to generate the snippet
6816
*/
6817
TR::Register *
6818
J9::Z::TreeEvaluator::evaluateNULLCHKWithPossibleResolve(TR::Node * node, bool needsResolve, TR::CodeGenerator * cg)
6819
{
6820
// NOTE:
6821
// If no code is generated for the null check, just evaluate the
6822
// child and decrement its use count UNLESS the child is a pass-through node
6823
// in which case some kind of explicit test or indirect load must be generated
6824
// to force the null check at this point.
6825
6826
TR::Node * firstChild = node->getFirstChild();
6827
TR::ILOpCode & opCode = firstChild->getOpCode();
6828
TR::Compilation *comp = cg->comp();
6829
TR::Node * reference = NULL;
6830
TR::InstOpCode::S390BranchCondition branchOpCond = TR::InstOpCode::COND_BZ;
6831
6832
bool hasCompressedPointers = false;
6833
6834
TR::Node * n = firstChild;
6835
6836
// NULLCHK has a special case with compressed pointers.
6837
// In the scenario where the first child is TR::l2a, the
6838
// node to be null checked is not the iiload, but its child.
6839
// i.e. aload, aRegLoad, etc.
6840
if (comp->useCompressedPointers()
6841
&& firstChild->getOpCodeValue() == TR::l2a)
6842
{
6843
hasCompressedPointers = true;
6844
TR::ILOpCodes loadOp = comp->il.opCodeForIndirectLoad(TR::Int32);
6845
TR::ILOpCodes rdbarOp = comp->il.opCodeForIndirectReadBarrier(TR::Int32);
6846
while (n->getOpCodeValue() != loadOp && n->getOpCodeValue() != rdbarOp)
6847
n = n->getFirstChild();
6848
reference = n->getFirstChild();
6849
}
6850
else
6851
{
6852
reference = node->getNullCheckReference();
6853
}
6854
6855
// Skip the NULLCHK for TR::loadaddr nodes.
6856
//
6857
if (cg->getSupportsImplicitNullChecks()
6858
&& reference->getOpCodeValue() == TR::loadaddr)
6859
{
6860
cg->evaluate(firstChild);
6861
cg->decReferenceCount(firstChild);
6862
return NULL;
6863
}
6864
6865
bool needExplicitCheck = true;
6866
bool needLateEvaluation = true;
6867
6868
// Add the explicit check after this instruction
6869
//
6870
TR::Instruction *appendTo = NULL;
6871
6872
// determine if an explicit check is needed
6873
if (cg->getSupportsImplicitNullChecks()
6874
&& !firstChild->isUnneededIALoad())
6875
{
6876
if (opCode.isLoadVar()
6877
|| (cg->comp()->target().is64Bit() && opCode.getOpCodeValue()==TR::l2i)
6878
|| (hasCompressedPointers && firstChild->getFirstChild()->getOpCode().getOpCodeValue() == TR::i2l))
6879
{
6880
TR::SymbolReference *symRef = NULL;
6881
6882
if (opCode.getOpCodeValue()==TR::l2i)
6883
symRef = n->getFirstChild()->getSymbolReference();
6884
else
6885
symRef = n->getSymbolReference();
6886
6887
// We prefer to generate an explicit NULLCHK vs an implicit one
6888
// to prevent potential costs of a cache miss on an unnecessary load.
6889
if (n->getReferenceCount() == 1
6890
&& !n->getSymbolReference()->isUnresolved())
6891
{
6892
// If the child is only used here, we don't need to evaluate it
6893
// since all we need is the grandchild which will be evaluated by
6894
// the generation of the explicit check below.
6895
needLateEvaluation = false;
6896
6897
// at this point, n is the raw iiload (created by lowerTrees) and
6898
// reference is the aload of the object. node->getFirstChild is the
6899
// l2a sequence; as a result, n's refCount will always be 1
6900
// and node->getFirstChild's refCount will be at least 2 (one under the nullchk
6901
// and the other under the translate treetop)
6902
//
6903
if (hasCompressedPointers
6904
&& node->getFirstChild()->getReferenceCount() > 2)
6905
needLateEvaluation = true;
6906
}
6907
6908
// Check if offset from a NULL reference will fall into the inaccessible bytes,
6909
// resulting in an implicit trap being raised.
6910
else if (symRef
6911
&& ((symRef->getSymbol()->getOffset() + symRef->getOffset()) < cg->getNumberBytesReadInaccessible()))
6912
{
6913
needExplicitCheck = false;
6914
6915
// If the child is an arraylength which has been reduced to an iiload,
6916
// and is only going to be used immediately in a BNDCHK, combine the checks.
6917
//
6918
TR::TreeTop *nextTreeTop = cg->getCurrentEvaluationTreeTop()->getNextTreeTop();
6919
if (n->getReferenceCount() == 2 && nextTreeTop)
6920
{
6921
TR::Node *nextTopNode = nextTreeTop->getNode();
6922
6923
if (nextTopNode)
6924
{
6925
if (nextTopNode->getOpCode().isBndCheck())
6926
{
6927
if ((nextTopNode->getOpCode().isSpineCheck() && (nextTopNode->getChild(2) == n))
6928
|| (!nextTopNode->getOpCode().isSpineCheck() && (nextTopNode->getFirstChild() == n)))
6929
{
6930
needLateEvaluation = false;
6931
nextTopNode->setHasFoldedImplicitNULLCHK(true);
6932
traceMsg(comp, "\nMerging NULLCHK [%p] and BNDCHK [%p] of load child [%p]", node, nextTopNode, n);
6933
}
6934
}
6935
else if (nextTopNode->getOpCode().isIf()
6936
&& nextTopNode->isNonoverriddenGuard()
6937
&& nextTopNode->getFirstChild() == firstChild)
6938
{
6939
needLateEvaluation = false;
6940
needExplicitCheck = true;
6941
reference->incReferenceCount(); // will be decremented again later
6942
}
6943
}
6944
}
6945
}
6946
}
6947
else if (opCode.isStore())
6948
{
6949
TR::SymbolReference *symRef = n->getSymbolReference();
6950
if (n->getOpCode().hasSymbolReference()
6951
&& (symRef->getSymbol()->getOffset() + symRef->getOffset() < cg->getNumberBytesWriteInaccessible()))
6952
{
6953
needExplicitCheck = false;
6954
}
6955
}
6956
else if (opCode.isCall()
6957
&& opCode.isIndirect()
6958
&& (cg->getNumberBytesReadInaccessible() > TR::Compiler->om.offsetOfObjectVftField()))
6959
{
6960
needExplicitCheck = false;
6961
}
6962
else if (opCode.getOpCodeValue() == TR::iushr
6963
&& (cg->getNumberBytesReadInaccessible() > cg->fe()->getOffsetOfContiguousArraySizeField()))
6964
{
6965
// If the child is an arraylength which has been reduced to an iushr,
6966
// we must evaluate it here so that the implicit exception will occur
6967
// at the right point in the program.
6968
//
6969
// This can occur when the array length is represented in bytes, not elements.
6970
// The optimizer must intervene for this to happen.
6971
//
6972
cg->evaluate(n->getFirstChild());
6973
needExplicitCheck = false;
6974
}
6975
else if (opCode.getOpCodeValue() == TR::monent
6976
|| opCode.getOpCodeValue() == TR::monexit)
6977
{
6978
// The child may generate inline code that provides an implicit null check
6979
// but we won't know until the child is evaluated.
6980
//
6981
reference->incReferenceCount(); // will be decremented again later
6982
needLateEvaluation = false;
6983
cg->evaluate(reference);
6984
appendTo = cg->getAppendInstruction();
6985
cg->evaluate(firstChild);
6986
6987
if (cg->getImplicitExceptionPoint()
6988
&& (cg->getNumberBytesReadInaccessible() > cg->fe()->getOffsetOfContiguousArraySizeField()))
6989
{
6990
needExplicitCheck = false;
6991
cg->decReferenceCount(reference);
6992
}
6993
}
6994
}
6995
6996
// Generate the code for the null check
6997
//
6998
if(needExplicitCheck)
6999
{
7000
TR::Register * targetRegister = NULL;
7001
if (cg->getHasResumableTrapHandler())
7002
{
7003
// Use Load-And-Trap on zHelix if available.
7004
// This loads the field and performance a NULLCHK on the field value.
7005
// i.e. o.f == NULL
7006
if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_ZEC12)
7007
&& reference->getOpCode().isLoadVar()
7008
&& (reference->getOpCodeValue() != TR::ardbari)
7009
&& reference->getRegister() == NULL)
7010
{
7011
targetRegister = cg->allocateCollectedReferenceRegister();
7012
appendTo = generateRXInstruction(cg, TR::InstOpCode::getLoadAndTrapOpCode(), node, targetRegister, TR::MemoryReference::create(cg, reference), appendTo);
7013
reference->setRegister(targetRegister);
7014
}
7015
else if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_ZEC12)
7016
&& reference->getRegister() == NULL
7017
&& comp->useCompressedPointers()
7018
&& reference->getOpCodeValue() == TR::l2a
7019
&& reference->getFirstChild()->getOpCodeValue() == TR::iu2l
7020
&& reference->getFirstChild()->getFirstChild()->getOpCode().isLoadVar()
7021
&& TR::Compiler->om.compressedReferenceShiftOffset() == 0)
7022
{
7023
targetRegister = cg->evaluate(reference);
7024
appendTo = cg->getAppendInstruction();
7025
if (appendTo->getOpCodeValue() == TR::InstOpCode::LLGF)
7026
{
7027
appendTo->setOpCodeValue(TR::InstOpCode::LLGFAT);
7028
appendTo->setNode(node);
7029
}
7030
else
7031
{
7032
appendTo = generateRIEInstruction(cg, TR::InstOpCode::getCmpImmTrapOpCode(), node, targetRegister, (int16_t)0, TR::InstOpCode::COND_BE, appendTo);
7033
}
7034
}
7035
else
7036
{
7037
targetRegister = reference->getRegister();
7038
7039
if (targetRegister == NULL)
7040
targetRegister = cg->evaluate(reference);
7041
7042
appendTo = generateRIEInstruction(cg, TR::InstOpCode::getCmpImmTrapOpCode(), node, targetRegister, (int16_t)0, TR::InstOpCode::COND_BE, appendTo);
7043
}
7044
7045
TR::Instruction* cursor = appendTo;
7046
cursor->setThrowsImplicitException();
7047
cursor->setExceptBranchOp();
7048
cg->setCanExceptByTrap(true);
7049
cursor->setNeedsGCMap(0x0000FFFF);
7050
if (cg->comp()->target().isZOS())
7051
killRegisterIfNotLocked(cg, TR::RealRegister::GPR4, cursor);
7052
}
7053
else
7054
{
7055
// NULLCHK snippet label.
7056
TR::LabelSymbol * snippetLabel = generateLabelSymbol(cg);
7057
TR::SymbolReference *symRef = node->getSymbolReference();
7058
cg->addSnippet(new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, snippetLabel, symRef));
7059
7060
if (!firstChild->getOpCode().isCall()
7061
&& reference->getOpCode().isLoadVar()
7062
&& (reference->getOpCodeValue() != TR::ardbari) // ardbari needs to be evaluated before being NULLCHK'ed because of its side effect.
7063
&& reference->getOpCode().hasSymbolReference()
7064
&& reference->getRegister() == NULL)
7065
{
7066
bool isInternalPointer = reference->getSymbolReference()->getSymbol()->isInternalPointer();
7067
if ((reference->getOpCode().isLoadIndirect() || reference->getOpCodeValue() == TR::aload)
7068
&& !isInternalPointer)
7069
{
7070
targetRegister = cg->allocateCollectedReferenceRegister();
7071
}
7072
else
7073
{
7074
targetRegister = cg->allocateRegister();
7075
if (isInternalPointer)
7076
{
7077
targetRegister->setPinningArrayPointer(reference->getSymbolReference()->getSymbol()->castToInternalPointerAutoSymbol()->getPinningArrayPointer());
7078
targetRegister->setContainsInternalPointer();
7079
}
7080
}
7081
7082
reference->setRegister(targetRegister);
7083
TR::MemoryReference * tempMR = TR::MemoryReference::create(cg, reference);
7084
appendTo = generateRXInstruction(cg, TR::InstOpCode::getLoadTestOpCode(), reference, targetRegister, tempMR, appendTo);
7085
tempMR->stopUsingMemRefRegister(cg);
7086
7087
appendTo = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, snippetLabel, appendTo);
7088
TR::Instruction *brInstr = appendTo;
7089
brInstr->setExceptBranchOp();
7090
}
7091
else
7092
{
7093
TR::Node *n = NULL;
7094
7095
// After the NULLCHK is generated, the nodes are guaranteed
7096
// to be non-zero. Mark the nodes, so that subsequent
7097
// evaluations can be optimized.
7098
if (comp->useCompressedPointers()
7099
&& reference->getOpCodeValue() == TR::l2a)
7100
{
7101
reference->setIsNonNull(true);
7102
n = reference->getFirstChild();
7103
TR::ILOpCodes loadOp = comp->il.opCodeForIndirectLoad(TR::Int32);
7104
TR::ILOpCodes rdbarOp = comp->il.opCodeForIndirectReadBarrier(TR::Int32);
7105
while (n->getOpCodeValue() != loadOp && n->getOpCodeValue() != rdbarOp)
7106
{
7107
n->setIsNonZero(true);
7108
n = n->getFirstChild();
7109
}
7110
n->setIsNonZero(true);
7111
}
7112
7113
TR::InstOpCode::Mnemonic cmpOpCode = TR::InstOpCode::bad;
7114
7115
// For compressed pointers case, if we find the compressed value,
7116
// and it has already been evaluated into a register,
7117
// we can take advantage of the uncompressed value and evaluate
7118
// the compare result earlier.
7119
//
7120
// If it hasn't been evalauted yet, we want to evaluate the entire
7121
// l2a tree, which might generate LLGF. In that case, the better
7122
// choice is to perform the NULLCHK on the decompressed address.
7123
if (n != NULL && n->getRegister() != NULL)
7124
{
7125
targetRegister = n->getRegister();
7126
cg->evaluate(reference);
7127
7128
// For concurrent scavenge the source is loaded and shifted by the guarded load, thus we need to use CG
7129
// here for a non-zero compressedrefs shift value
7130
if (TR::Compiler->om.readBarrierType() != gc_modron_readbar_none)
7131
{
7132
cmpOpCode = TR::InstOpCode::getCmpOpCode();
7133
}
7134
else
7135
{
7136
cmpOpCode = (n->getOpCode().getSize() > 4) ? TR::InstOpCode::CG: TR::InstOpCode::C;
7137
}
7138
}
7139
else
7140
{
7141
targetRegister = cg->evaluate(reference);
7142
cmpOpCode = TR::InstOpCode::getCmpOpCode(); // reference is always an address type
7143
}
7144
appendTo = generateS390CompareAndBranchInstruction(cg, cmpOpCode, node, targetRegister, NULLVALUE, branchOpCond, snippetLabel, false, true, appendTo);
7145
TR::Instruction * cursor = appendTo;
7146
cursor->setExceptBranchOp();
7147
}
7148
}
7149
}
7150
7151
if (needLateEvaluation)
7152
{
7153
cg->evaluate(firstChild);
7154
}
7155
else if (needExplicitCheck)
7156
{
7157
cg->decReferenceCount(reference);
7158
}
7159
7160
if (comp->useCompressedPointers())
7161
cg->decReferenceCount(node->getFirstChild());
7162
else
7163
cg->decReferenceCount(firstChild);
7164
7165
// If an explicit check has not been generated for the null check, there is
7166
// an instruction that will cause a hardware trap if the exception is to be
7167
// taken. If this method may catch the exception, a GC stack map must be
7168
// created for this instruction. All registers are valid at this GC point
7169
// TODO - if the method may not catch the exception we still need to note
7170
// that the GC point exists, since maps before this point and after it cannot
7171
// be merged.
7172
//
7173
if (cg->getSupportsImplicitNullChecks() && !needExplicitCheck)
7174
{
7175
TR::Instruction *faultingInstruction = cg->getImplicitExceptionPoint();
7176
if (faultingInstruction)
7177
{
7178
faultingInstruction->setNeedsGCMap(0x0000FFFF);
7179
faultingInstruction->setThrowsImplicitNullPointerException();
7180
cg->setCanExceptByTrap(true);
7181
7182
TR_Debug * debugObj = cg->getDebug();
7183
if (debugObj)
7184
{
7185
debugObj->addInstructionComment(faultingInstruction, "Throws Implicit Null Pointer Exception");
7186
}
7187
}
7188
}
7189
7190
if (comp->useCompressedPointers()
7191
&& reference->getOpCodeValue() == TR::l2a)
7192
{
7193
reference->setIsNonNull(true);
7194
TR::Node *n = NULL;
7195
n = reference->getFirstChild();
7196
TR::ILOpCodes loadOp = comp->il.opCodeForIndirectLoad(TR::Int32);
7197
TR::ILOpCodes rdbarOp = comp->il.opCodeForIndirectReadBarrier(TR::Int32);
7198
while (n->getOpCodeValue() != loadOp && n->getOpCodeValue() != rdbarOp)
7199
{
7200
n->setIsNonZero(true);
7201
n = n->getFirstChild();
7202
}
7203
n->setIsNonZero(true);
7204
}
7205
7206
reference->setIsNonNull(true);
7207
7208
return NULL;
7209
}
7210
7211
static TR::Register *
7212
reservationLockEnter(TR::Node *node, int32_t lwOffset, TR::Register *objectClassReg, TR::CodeGenerator *cg, J9::Z::CHelperLinkage *helperLink)
7213
{
7214
TR::Register *objReg, *monitorReg, *valReg, *tempReg;
7215
TR::Register *EPReg, *returnAddressReg;
7216
TR::LabelSymbol *resLabel, *callLabel, *doneLabel;
7217
TR::Instruction *instr;
7218
TR::Compilation * comp = cg->comp();
7219
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
7220
int numICFDeps = 6 + (comp->getOptions()->enableDebugCounters() ? 4: 0);
7221
TR::RegisterDependencyConditions *ICFConditions =
7222
new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, numICFDeps, cg);
7223
7224
if (objectClassReg)
7225
objReg = objectClassReg;
7226
else
7227
objReg = node->getFirstChild()->getRegister();
7228
7229
TR::Register *metaReg = cg->getMethodMetaDataRealRegister();
7230
7231
monitorReg = cg->allocateRegister();
7232
valReg = cg->allocateRegister();
7233
tempReg = cg->allocateRegister();
7234
7235
resLabel = generateLabelSymbol(cg);
7236
callLabel = generateLabelSymbol(cg);
7237
doneLabel = generateLabelSymbol(cg);
7238
7239
// TODO - primitive monitors are disabled. Enable it after testing
7240
//TR::TreeEvaluator::isPrimitiveMonitor(node, cg);
7241
//
7242
TR::LabelSymbol *helperReturnOOLLabel, *doneOOLLabel = NULL;
7243
TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);
7244
TR_S390OutOfLineCodeSection *outlinedSlowPath = NULL;
7245
TR_Debug *debugObj = cg->getDebug();
7246
TR::Snippet *snippet = NULL;
7247
7248
// This is just for test. (may not work in all cases)
7249
static bool enforcePrimitive = feGetEnv("EnforcePrimitiveLockRes")? 1 : 0;
7250
bool isPrimitive = enforcePrimitive ? 1 : node->isPrimitiveLockedRegion();
7251
7252
// Opcodes:
7253
bool use64b = true;
7254
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
7255
use64b = false;
7256
else if (!cg->comp()->target().is64Bit())
7257
use64b = false;
7258
TR::InstOpCode::Mnemonic loadOp = use64b ? TR::InstOpCode::LG : TR::InstOpCode::L;
7259
TR::InstOpCode::Mnemonic loadRegOp = use64b ? TR::InstOpCode::LGR : TR::InstOpCode::LR;
7260
TR::InstOpCode::Mnemonic orImmOp = TR::InstOpCode::OILF;
7261
TR::InstOpCode::Mnemonic compareOp = use64b ? TR::InstOpCode::CGR : TR::InstOpCode::CR;
7262
TR::InstOpCode::Mnemonic compareImmOp = use64b ? TR::InstOpCode::CG : TR::InstOpCode::C;
7263
TR::InstOpCode::Mnemonic addImmOp = use64b ? TR::InstOpCode::AGHI : TR::InstOpCode::AHI;
7264
TR::InstOpCode::Mnemonic storeOp = use64b ? TR::InstOpCode::STG : TR::InstOpCode::ST;
7265
TR::InstOpCode::Mnemonic xorOp = use64b ? TR::InstOpCode::XGR : TR::InstOpCode::XR;
7266
TR::InstOpCode::Mnemonic casOp = use64b ? TR::InstOpCode::CSG : TR::InstOpCode::CS;
7267
TR::InstOpCode::Mnemonic loadImmOp = use64b ? TR::InstOpCode::LGHI : TR::InstOpCode::LHI ;
7268
TR::InstOpCode::Mnemonic andOp = use64b ? TR::InstOpCode::NGR : TR::InstOpCode::NR;
7269
7270
//ICF RA constraints
7271
//////////////
7272
ICFConditions->addPostConditionIfNotAlreadyInserted(objReg, TR::RealRegister::AssignAny);
7273
ICFConditions->addPostConditionIfNotAlreadyInserted(monitorReg, TR::RealRegister::AssignAny);
7274
ICFConditions->addPostConditionIfNotAlreadyInserted(valReg, TR::RealRegister::AssignAny);
7275
ICFConditions->addPostConditionIfNotAlreadyInserted(tempReg, TR::RealRegister::AssignAny);
7276
//////////////
7277
7278
// Main path instruction sequence (non-primitive).
7279
// L monitorReg, #lwOffset(objectReg)
7280
// LR valReg, metaReg
7281
// OILF valReg, LR-Bit
7282
// CRJ valReg, monitorReg, MASK6, callLabel
7283
// AHI monitorReg, INC_DEC_VALUE
7284
// ST monitorReg, #lwOffset(objectReg)
7285
7286
// load monitor reg
7287
generateRXInstruction(cg, loadOp, node, monitorReg, generateS390MemoryReference(objReg, lwOffset, cg));
7288
// load r13|LOCK_RESERVATION_BIT
7289
generateRRInstruction(cg, loadRegOp, node, valReg, metaReg);
7290
generateRILInstruction(cg, orImmOp, node, valReg, LOCK_RESERVATION_BIT);
7291
7292
// Jump to OOL path if lock is not reserved (monReg != r13|LOCK_RESERVATION_BIT)
7293
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
7294
cFlowRegionStart->setStartInternalControlFlow();
7295
instr = generateS390CompareAndBranchInstruction(cg, compareOp, node, valReg, monitorReg,
7296
TR::InstOpCode::COND_BNE, resLabel, false, false);
7297
7298
helperReturnOOLLabel = generateLabelSymbol(cg);
7299
doneOOLLabel = generateLabelSymbol(cg);
7300
if (debugObj)
7301
debugObj->addInstructionComment(instr, "Branch to OOL reservation enter sequence");
7302
outlinedSlowPath = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(resLabel, doneOOLLabel, cg);
7303
cg->getS390OutOfLineCodeSectionList().push_front(outlinedSlowPath);
7304
7305
cg->generateDebugCounter("LockEnt/LR/LRSuccessfull", 1, TR::DebugCounter::Undetermined);
7306
if (!isPrimitive)
7307
{
7308
generateRIInstruction (cg, addImmOp, node, monitorReg, (uintptr_t) LOCK_INC_DEC_VALUE);
7309
generateRXInstruction(cg, storeOp, node, monitorReg, generateS390MemoryReference(objReg, lwOffset, cg));
7310
}
7311
7312
if (outlinedSlowPath) // Means we have OOL
7313
{
7314
TR::LabelSymbol *reserved_checkLabel = generateLabelSymbol(cg);
7315
outlinedSlowPath->swapInstructionListsWithCompilation(); // Toggle instruction list
7316
TR::Instruction *temp = generateS390LabelInstruction(cg,TR::InstOpCode::label,node,resLabel);
7317
if (debugObj)
7318
{
7319
if (isPrimitive)
7320
debugObj->addInstructionComment(temp, "Denotes start of OOL primitive reservation enter sequence");
7321
else
7322
debugObj->addInstructionComment(temp, "Denotes start of OOL non-primitive reservation enter sequence");
7323
}
7324
// XXX: Temporary fix, OOL instruction stream does not pick up live locals or monitors correctly.
7325
TR_ASSERT(!temp->getLiveLocals() && !temp->getLiveMonitors(), "Expecting first OOL instruction to not have live locals/monitors info");
7326
temp->setLiveLocals(instr->getLiveLocals());
7327
temp->setLiveMonitors(instr->getLiveMonitors());
7328
7329
// Non-Primitive lockReservation enter sequence: Primitive lockReservation enter sequence:
7330
7331
// CIJ monitorReg, 0, MASK6, checkLabel TODO - Add Primitive lockReservation enter sequence
7332
// AHI valReg, INC_DEC_VALUE
7333
// XR monitorReg, monitorReg
7334
// CS monitorReg, valReg, #lwOffset(objectReg)
7335
// BRC MASK6, callHelper
7336
// BRC returnLabel
7337
// checkLabel:
7338
// LGFI tempReg, LOCK_RES_NON_PRIMITIVE_ENTER_MASK
7339
// NR tempReg, monitorReg
7340
// CRJ tempReg, valReg, MASK6, callHelper
7341
// AHI monitorReg, INC_DEC_VALUE
7342
// ST monitorReg, #lwOffset(objectReg)
7343
// BRC returnLabel
7344
// callHelper:
7345
// BRASL R14, jitMonitorEntry
7346
//returnLabel:
7347
7348
// Avoid CAS in case lock value is not zero
7349
generateS390CompareAndBranchInstruction(cg, compareImmOp, node, monitorReg, 0, TR::InstOpCode::COND_BNE, reserved_checkLabel, false);
7350
if (!isPrimitive)
7351
{
7352
generateRIInstruction (cg, addImmOp, node, valReg, (uintptr_t) LOCK_INC_DEC_VALUE);
7353
}
7354
// Try to acquire the lock using CAS
7355
generateRRInstruction(cg, xorOp, node, monitorReg, monitorReg);
7356
generateRSInstruction(cg, casOp, node, monitorReg, valReg, generateS390MemoryReference(objReg, lwOffset, cg));
7357
// Call VM helper if the CAS fails (contention)
7358
instr = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, callLabel);
7359
7360
cg->generateDebugCounter("LockEnt/LR/CASSuccessfull", 1, TR::DebugCounter::Undetermined);
7361
7362
// Lock is acquired successfully
7363
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, helperReturnOOLLabel);
7364
7365
generateS390LabelInstruction(cg,TR::InstOpCode::label,node,reserved_checkLabel);
7366
// Mask the counter
7367
// Mask is 8 bit value which will be sign extended, We will be using cheaper instruction like LGHI or LHI
7368
generateRIInstruction(cg, loadImmOp, node, tempReg, ~(isPrimitive ? LOCK_RES_PRIMITIVE_ENTER_MASK : LOCK_RES_NON_PRIMITIVE_ENTER_MASK));
7369
generateRRInstruction(cg, andOp, node, tempReg, monitorReg);
7370
7371
// Call VM helper if the R13 != (masked MonReg)
7372
generateS390CompareAndBranchInstruction(cg, compareOp,node, tempReg, valReg,
7373
TR::InstOpCode::COND_BNE, callLabel, false, false);
7374
7375
cg->generateDebugCounter("LockEnt/LR/Recursive", 1, TR::DebugCounter::Undetermined);
7376
7377
// Recursive lock. Increment the counter
7378
if (!isPrimitive)
7379
{
7380
generateRIInstruction (cg, addImmOp, node, monitorReg, (uintptr_t) LOCK_INC_DEC_VALUE);
7381
generateRXInstruction(cg, storeOp, node, monitorReg, generateS390MemoryReference(objReg, lwOffset, cg));
7382
}
7383
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, helperReturnOOLLabel);
7384
// call to jithelper
7385
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, callLabel);
7386
cg->generateDebugCounter("LockEnt/LR/VMHelper", 1, TR::DebugCounter::Undetermined);
7387
uintptr_t returnAddress = (uintptr_t) (node->getSymbolReference()->getMethodAddress());
7388
7389
// We are calling helper within ICF so we need to combine dependency from ICF and helper call at merge label
7390
TR::RegisterDependencyConditions *deps = NULL;
7391
helperLink->buildDirectDispatch(node, &deps);
7392
TR::RegisterDependencyConditions *mergeConditions = mergeConditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(ICFConditions, deps, cg);
7393
// OOL return label
7394
instr = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, helperReturnOOLLabel, mergeConditions);
7395
helperReturnOOLLabel->setEndInternalControlFlow();
7396
if (debugObj)
7397
{
7398
debugObj->addInstructionComment(instr, "OOL reservation enter VMHelper return label");
7399
}
7400
7401
instr = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, doneOOLLabel);
7402
if (debugObj)
7403
{
7404
if (isPrimitive)
7405
debugObj->addInstructionComment(instr, "Denotes end of OOL primitive reservation enter sequence: return to mainline");
7406
else
7407
debugObj->addInstructionComment(instr, "Denotes end of OOL non-primitive reservation enter sequence: return to mainline");
7408
}
7409
7410
outlinedSlowPath->swapInstructionListsWithCompilation(); // Toggle instruction list
7411
7412
instr = generateS390LabelInstruction(cg,TR::InstOpCode::label,node,doneOOLLabel);
7413
if (debugObj)
7414
debugObj->addInstructionComment(instr, "OOL reservation enter return label");
7415
generateS390LabelInstruction(cg,TR::InstOpCode::label,node, doneLabel);
7416
}
7417
else
7418
{
7419
TR_ASSERT(0, "Not implemented:Lock reservation with Disable OOL.");
7420
}
7421
if (monitorReg)
7422
cg->stopUsingRegister(monitorReg);
7423
if (valReg)
7424
cg->stopUsingRegister(valReg);
7425
if (tempReg)
7426
cg->stopUsingRegister(tempReg);
7427
7428
cg->decReferenceCount(node->getFirstChild());
7429
return NULL;
7430
}
7431
7432
static TR::Register *
7433
reservationLockExit(TR::Node *node, int32_t lwOffset, TR::Register *objectClassReg, TR::CodeGenerator *cg, J9::Z::CHelperLinkage *helperLink )
7434
{
7435
TR::Register *objReg, *monitorReg, *valReg, *tempReg;
7436
TR::Register *EPReg, *returnAddressReg;
7437
TR::LabelSymbol *resLabel, *callLabel, *doneLabel;
7438
TR::Instruction *instr;
7439
TR::Instruction *startICF = NULL;
7440
TR::Compilation *comp = cg->comp();
7441
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
7442
7443
int numICFDeps = 6 + (comp->getOptions()->enableDebugCounters() ? 4: 0);
7444
TR::RegisterDependencyConditions *ICFConditions =
7445
new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, numICFDeps, cg);
7446
if (objectClassReg)
7447
objReg = objectClassReg;
7448
else
7449
objReg = node->getFirstChild()->getRegister();
7450
7451
TR::Register *metaReg = cg->getMethodMetaDataRealRegister();
7452
7453
monitorReg = cg->allocateRegister();
7454
valReg = cg->allocateRegister();
7455
tempReg = cg->allocateRegister();
7456
7457
7458
//ICF RA constraints
7459
//////////////
7460
ICFConditions->addPostConditionIfNotAlreadyInserted(objReg, TR::RealRegister::AssignAny);
7461
ICFConditions->addPostConditionIfNotAlreadyInserted(monitorReg, TR::RealRegister::AssignAny);
7462
ICFConditions->addPostConditionIfNotAlreadyInserted(valReg, TR::RealRegister::AssignAny);
7463
ICFConditions->addPostConditionIfNotAlreadyInserted(tempReg, TR::RealRegister::AssignAny);
7464
7465
resLabel = generateLabelSymbol(cg);
7466
callLabel = generateLabelSymbol(cg);
7467
doneLabel = generateLabelSymbol(cg);
7468
7469
TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);
7470
7471
TR::LabelSymbol *helperReturnOOLLabel, *doneOOLLabel = NULL;
7472
TR_S390OutOfLineCodeSection *outlinedSlowPath = NULL;
7473
TR_Debug *debugObj = cg->getDebug();
7474
TR::Snippet *snippet = NULL;
7475
static bool enforcePrimitive = feGetEnv("EnforcePrimitiveLockRes")? 1 : 0;
7476
bool isPrimitive = enforcePrimitive ? 1 : node->isPrimitiveLockedRegion();
7477
7478
// Opcodes:
7479
bool use64b = true;
7480
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
7481
use64b = false;
7482
else if (!cg->comp()->target().is64Bit())
7483
use64b = false;
7484
TR::InstOpCode::Mnemonic loadOp = use64b ? TR::InstOpCode::LG : TR::InstOpCode::L;
7485
TR::InstOpCode::Mnemonic loadRegOp = use64b ? TR::InstOpCode::LGR : TR::InstOpCode::LR;
7486
TR::InstOpCode::Mnemonic orImmOp = TR::InstOpCode::OILF;
7487
TR::InstOpCode::Mnemonic compareOp = use64b ? TR::InstOpCode::CGR : TR::InstOpCode::CR;
7488
TR::InstOpCode::Mnemonic compareImmOp = use64b ? TR::InstOpCode::CG : TR::InstOpCode::C;
7489
TR::InstOpCode::Mnemonic addImmOp = use64b ? TR::InstOpCode::AGHI : TR::InstOpCode::AHI;
7490
TR::InstOpCode::Mnemonic storeOp = use64b ? TR::InstOpCode::STG : TR::InstOpCode::ST;
7491
TR::InstOpCode::Mnemonic xorOp = use64b ? TR::InstOpCode::XGR : TR::InstOpCode::XR;
7492
TR::InstOpCode::Mnemonic casOp = use64b ? TR::InstOpCode::CSG : TR::InstOpCode::CS;
7493
TR::InstOpCode::Mnemonic loadImmOp = use64b ? TR::InstOpCode::LGHI : TR::InstOpCode::LHI;
7494
TR::InstOpCode::Mnemonic andOp = use64b ? TR::InstOpCode::NGR : TR::InstOpCode::NR;
7495
TR::InstOpCode::Mnemonic andImmOp = TR::InstOpCode::NILF;
7496
7497
// Main path instruction sequence (non-primitive).
7498
// L monitorReg, #lwOffset(objectReg)
7499
// LR valReg, metaReg
7500
// OILF valReg, INC_DEC_VALUE | LR-Bit
7501
// CRJ valReg, monitorReg, BNE, callLabel
7502
// AHI valReg, -INC_DEC_VALUE
7503
// ST valReg, #lwOffset(objectReg)
7504
7505
generateRXInstruction(cg, loadOp, node, monitorReg, generateS390MemoryReference(objReg, lwOffset, cg));
7506
if (!isPrimitive)
7507
{
7508
generateRRInstruction(cg, loadRegOp, node, tempReg, metaReg);
7509
generateRILInstruction(cg, orImmOp, node, tempReg, LOCK_RESERVATION_BIT + LOCK_INC_DEC_VALUE);
7510
instr = generateS390CompareAndBranchInstruction(cg, compareOp, node, tempReg, monitorReg,
7511
TR::InstOpCode::COND_BNE, resLabel, false, false);
7512
cg->generateDebugCounter("LockExit/LR/LRSuccessfull", 1, TR::DebugCounter::Undetermined);
7513
}
7514
else
7515
{
7516
generateRRInstruction(cg, loadRegOp, node, tempReg, monitorReg);
7517
generateRILInstruction(cg, andImmOp, node, tempReg, LOCK_RES_PRIMITIVE_EXIT_MASK);
7518
instr = generateS390CompareAndBranchInstruction(cg, compareImmOp, node, tempReg, LOCK_RESERVATION_BIT,
7519
TR::InstOpCode::COND_BNE, resLabel, false);
7520
}
7521
7522
helperReturnOOLLabel = generateLabelSymbol(cg);
7523
doneOOLLabel = generateLabelSymbol(cg);
7524
if (debugObj)
7525
debugObj->addInstructionComment(instr, "Branch to OOL reservation exit sequence");
7526
outlinedSlowPath = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(resLabel, doneOOLLabel, cg);
7527
cg->getS390OutOfLineCodeSectionList().push_front(outlinedSlowPath);
7528
7529
if (!isPrimitive)
7530
{
7531
generateRIInstruction (cg, use64b? TR::InstOpCode::AGHI : TR::InstOpCode::AHI, node, tempReg, -LOCK_INC_DEC_VALUE);
7532
generateRXInstruction(cg, use64b? TR::InstOpCode::STG : TR::InstOpCode::ST,
7533
node, tempReg, generateS390MemoryReference(objReg, lwOffset, cg));
7534
}
7535
7536
if (outlinedSlowPath) // Means we have OOL
7537
{
7538
outlinedSlowPath->swapInstructionListsWithCompilation(); // Toggle instruction list
7539
TR::Instruction *temp = generateS390LabelInstruction(cg,TR::InstOpCode::label,node,resLabel);
7540
if (debugObj)
7541
{
7542
if (isPrimitive)
7543
debugObj->addInstructionComment(temp, "Denotes start of OOL primitive reservation exit sequence");
7544
else
7545
debugObj->addInstructionComment(temp, "Denotes start of OOL non-primitive reservation exit sequence");
7546
}
7547
// XXX: Temporary fix, OOL instruction stream does not pick up live locals or monitors correctly.
7548
TR_ASSERT(!temp->getLiveLocals() && !temp->getLiveMonitors(), "Expecting first OOL instruction to not have live locals/monitors info");
7549
temp->setLiveLocals(instr->getLiveLocals());
7550
temp->setLiveMonitors(instr->getLiveMonitors());
7551
7552
// Non-PRIMITIVE reservationLock exit sequence PRIMITIVE reservationLock exit sequence
7553
// LGFI tempReg, LOCK_RES_OWNING TODO - PRIMITIVE reservationLock exit sequence
7554
// NR tempReg, monitorReg
7555
// LR valReg, metaReg
7556
// AHI valReg, LR-Bit
7557
// CRJ tempReg, valReg, BNE, callHelper
7558
// LR tempReg, monitorReg
7559
// NILF tempReg, LOCK_RES_NON_PRIMITIVE_EXIT_MASK
7560
// BRC BERC, callHelper
7561
// AHI monitorReg, -INC_DEC_VALUE
7562
// ST monitorReg, #lwOffset(objectReg)
7563
// BRC returnLabel
7564
// callHelper:
7565
// BRASL R14, jitMonitorExit
7566
// returnLabel:
7567
7568
generateRIInstruction(cg, loadImmOp, node, tempReg, ~(LOCK_RES_OWNING_COMPLEMENT));
7569
generateRRInstruction(cg, andOp, node, tempReg, monitorReg);
7570
generateRRInstruction(cg, loadRegOp, node, valReg, metaReg);
7571
generateRIInstruction (cg, addImmOp, node, valReg, (uintptr_t) LOCK_RESERVATION_BIT);
7572
7573
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
7574
cFlowRegionStart->setStartInternalControlFlow();
7575
generateS390CompareAndBranchInstruction(cg, compareOp, node, tempReg, valReg,
7576
TR::InstOpCode::COND_BNE, callLabel, false, false);
7577
7578
generateRRInstruction(cg, loadRegOp, node, tempReg, monitorReg);
7579
generateRILInstruction(cg, andImmOp, node, tempReg,
7580
isPrimitive ? OBJECT_HEADER_LOCK_RECURSION_MASK : LOCK_RES_NON_PRIMITIVE_EXIT_MASK);
7581
7582
if (isPrimitive)
7583
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, helperReturnOOLLabel);
7584
else
7585
{
7586
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, callLabel/*,conditions*/);
7587
}
7588
cg->generateDebugCounter("LockExit/LR/Recursive", 1, TR::DebugCounter::Undetermined);
7589
generateRIInstruction (cg, addImmOp, node, monitorReg,
7590
(uintptr_t) (isPrimitive ? LOCK_INC_DEC_VALUE : -LOCK_INC_DEC_VALUE) & 0x0000FFFF);
7591
generateRXInstruction(cg, storeOp, node, monitorReg, generateS390MemoryReference(objReg, lwOffset, cg));
7592
7593
if (!isPrimitive)
7594
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, helperReturnOOLLabel);
7595
// call to jithelper
7596
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, callLabel);
7597
cg->generateDebugCounter("LockExit/LR/VMHelper", 1, TR::DebugCounter::Undetermined);
7598
uintptr_t returnAddress = (uintptr_t) (node->getSymbolReference()->getMethodAddress());
7599
TR::RegisterDependencyConditions *deps = NULL;
7600
helperLink->buildDirectDispatch(node, &deps);
7601
TR::RegisterDependencyConditions *mergeConditions = mergeConditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(ICFConditions, deps, cg);
7602
instr = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, helperReturnOOLLabel, mergeConditions);
7603
// OOL return label
7604
helperReturnOOLLabel->setEndInternalControlFlow();
7605
if (debugObj)
7606
{
7607
debugObj->addInstructionComment(instr, "OOL reservation exit VMHelper return label");
7608
}
7609
instr = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, doneOOLLabel);
7610
if (debugObj)
7611
{
7612
if (isPrimitive)
7613
debugObj->addInstructionComment(instr, "Denotes end of OOL primitive reversation exit sequence: return to mainline");
7614
else
7615
debugObj->addInstructionComment(instr, "Denotes end of OOL non-primitive reversation exit sequence: return to mainline");
7616
}
7617
outlinedSlowPath->swapInstructionListsWithCompilation(); // Toggle instruction list
7618
instr = generateS390LabelInstruction(cg,TR::InstOpCode::label,node,doneOOLLabel);
7619
if (debugObj)
7620
debugObj->addInstructionComment(instr, "OOL reservation exit return label");
7621
7622
generateS390LabelInstruction(cg,TR::InstOpCode::label,node, doneLabel);
7623
}
7624
else
7625
{
7626
TR_ASSERT(0, "Not implemented: Lock reservation with Disable OOL.");
7627
}
7628
7629
if (monitorReg)
7630
cg->stopUsingRegister(monitorReg);
7631
if (valReg)
7632
cg->stopUsingRegister(valReg);
7633
if (tempReg)
7634
cg->stopUsingRegister(tempReg);
7635
7636
cg->decReferenceCount(node->getFirstChild());
7637
return NULL;
7638
}
7639
7640
// the following routine is a bit grotty - it has to determine if there are any GRA
7641
// assigned real registers that will conflict with real registers required by
7642
// instance-of generation.
7643
// it also has to verify that instance-of won't require more registers than are
7644
// available.
7645
static bool graDepsConflictWithInstanceOfDeps(TR::Node * depNode, TR::Node * node, TR::CodeGenerator * cg)
7646
{
7647
TR::Node * castClassNode = node->getSecondChild();
7648
TR::SymbolReference * castClassSymRef = castClassNode->getSymbolReference();
7649
TR::Compilation *comp = cg->comp();
7650
7651
bool testCastClassIsSuper = TR::TreeEvaluator::instanceOfOrCheckCastNeedSuperTest(node, cg);
7652
bool isFinalClass = (castClassSymRef == NULL) ? false : castClassSymRef->isNonArrayFinal(comp);
7653
bool needsHelperCall = needHelperCall(node, testCastClassIsSuper, isFinalClass);
7654
7655
if (maxInstanceOfPostDependencies() + depNode->getNumChildren() > cg->getMaximumNumberOfAssignableGPRs())
7656
{
7657
return true;
7658
}
7659
if (!needsHelperCall)
7660
{
7661
return false;
7662
}
7663
7664
for (int i=0; i<depNode->getNumChildren(); i++)
7665
{
7666
TR::Node * child = depNode->getChild(i);
7667
if ((child->getOpCodeValue() == TR::lRegLoad || child->getOpCodeValue() == TR::PassThrough)
7668
&& cg->comp()->target().is32Bit())
7669
{
7670
int32_t regIndex = child->getHighGlobalRegisterNumber();
7671
if (killedByInstanceOfHelper(regIndex, node, cg))
7672
{
7673
return true;
7674
}
7675
7676
regIndex = child->getLowGlobalRegisterNumber();
7677
if (killedByInstanceOfHelper(regIndex, node, cg))
7678
{
7679
return true;
7680
}
7681
}
7682
else
7683
{
7684
int32_t regIndex = child->getGlobalRegisterNumber();
7685
if (killedByInstanceOfHelper(regIndex, node, cg))
7686
{
7687
return true;
7688
}
7689
}
7690
}
7691
return false;
7692
}
7693
7694
/** \brief
7695
* Generates a dynamicCache test with helper call for instanceOf/ifInstanceOf node
7696
*
7697
* \details
7698
* This function generates a sequence to check per site cache for object class and cast class before calling out to jitInstanceOf helper
7699
*/
7700
static
7701
void genInstanceOfDynamicCacheAndHelperCall(TR::Node *node, TR::CodeGenerator *cg, TR::Register *castClassReg, TR::Register *objClassReg, TR::Register *resultReg, TR::RegisterDependencyConditions *deps, TR_S390ScratchRegisterManager *srm, TR::LabelSymbol *doneLabel, TR::LabelSymbol *helperCallLabel, TR::LabelSymbol *dynamicCacheTestLabel, TR::LabelSymbol *branchLabel, TR::LabelSymbol *trueLabel, TR::LabelSymbol *falseLabel, bool dynamicCastClass, bool generateDynamicCache, bool cacheCastClass, bool ifInstanceOf, bool trueFallThrough )
7702
{
7703
TR::Compilation *comp = cg->comp();
7704
bool needResult = resultReg != NULL;
7705
if (!castClassReg)
7706
castClassReg = cg->gprClobberEvaluate(node->getSecondChild());
7707
7708
int32_t maxOnsiteCacheSlots = comp->getOptions()->getMaxOnsiteCacheSlotForInstanceOf();
7709
int32_t sizeofJ9ClassFieldWithinReference = TR::Compiler->om.sizeofReferenceField();
7710
bool isTarget64Bit = comp->target().is64Bit();
7711
bool isCompressedRef = comp->useCompressedPointers();
7712
/* Layout of the writable data snippet
7713
* Case - 1 : Cast class is runtime variable
7714
* Case - 1A: 64 Bit Compressedrefs / 31-Bit JVM
7715
* -----------------------------------------------------------------------------------------
7716
* |Header | ObjectClassSlot-0 | CastClassSlot-0 |...| ObjectClassSlot-N | CastClassSlot-N |
7717
* -----------------------------------------------------------------------------------------
7718
* 0 8 12 ... 8n 8n+4
7719
* Case - 1B: 64 Bit Non Compressedrefs
7720
* -----------------------------------------------------------------------------------------
7721
* |Header | ObjectClassSlot-0 | CastClassSlot-0 |...| ObjectClassSlot-N | CastClassSlot-N |
7722
* -----------------------------------------------------------------------------------------
7723
* 0 16 24 ... 16n 16n+8
7724
* Case - 2 : Cast Class is resolved
7725
* Case - 2A: 64 Bit Compressedrefs / 31-Bit JVM
7726
* --------------------------------------------------------------------------
7727
* | Header | ObjectClassSlot-0 | ObjectClassSlot-1 |...| ObjectClassSlot-N |
7728
* --------------------------------------------------------------------------
7729
* 0 4 8 ... 4n
7730
* Case - 2B: 64 Bit Non Compressedrefs
7731
* --------------------------------------------------------------------------
7732
* | Header | ObjectClassSlot-0 | ObjectClassSlot-1 |...| ObjectClassSlot-N |
7733
* --------------------------------------------------------------------------
7734
* 0 8 16 ... 8n
7735
*
7736
* If there is only one cache slot, we will not have header.
7737
* Last bit of cached objectClass will set to 1 indicating false cast
7738
*
7739
* We can request the snippet size of power 2. Following Table summarizes bytes needed for corresponding number of cache slots.
7740
*
7741
* Following is the table for the number of bytes in snippet needed by each of the Cases mentioned above
7742
*
7743
* Number Of Slots | Case 1A | Case 1B | Case 2A | Case 2B |
7744
* 1 | 8 | 16 | 4 | 8 |
7745
* 2 | 16 | 64 | 16 | 32 |
7746
* 3 | 32 | 64 | 16 | 32 |
7747
* 4 | 64 | 128 | 32 | 64 |
7748
* 5 | 64 | 128 | 32 | 64 |
7749
* 6 | 64 | 128 | 32 | 64 |
7750
*
7751
*/
7752
7753
int32_t snippetSizeInBytes = ((cacheCastClass ? 2 : 1) * maxOnsiteCacheSlots * sizeofJ9ClassFieldWithinReference) + (sizeofJ9ClassFieldWithinReference * (maxOnsiteCacheSlots != 1) * (cacheCastClass ? 2 : 1));
7754
TR::Register *dynamicCacheReg = NULL;
7755
TR::Register *cachedObjectClass = NULL;
7756
TR::Register *cachedCastClass = NULL;
7757
TR::RegisterPair *cachedClassDataRegPair = NULL;
7758
7759
if (generateDynamicCache)
7760
{
7761
TR::S390WritableDataSnippet *dynamicCacheSnippet = NULL;
7762
int32_t requestedBytes = 1 << (int) (log2(snippetSizeInBytes-1)+1);
7763
if (comp->getOption(TR_TraceCG))
7764
{
7765
traceMsg(comp, "Number Of Dynamic Cache Slots = %d, Caching CastClass: %s\n"
7766
"Bytes needed for Snippet = %d, requested Bytes = %d\n",maxOnsiteCacheSlots, cacheCastClass ? "true" : "false", snippetSizeInBytes, requestedBytes);
7767
}
7768
7769
TR_ASSERT_FATAL(maxOnsiteCacheSlots <= 7, "Maximum 7 slots per site allowed because we use a fixed stack allocated buffer to construct the snippet\n");
7770
U_32 initialSnippet[32] = { 0 };
7771
initialSnippet[0] = static_cast<U_32>( sizeofJ9ClassFieldWithinReference * (cacheCastClass ? 2 : 1) );
7772
dynamicCacheSnippet = (TR::S390WritableDataSnippet*)cg->CreateConstant(node, initialSnippet, requestedBytes, true);
7773
7774
int32_t currentIndex = maxOnsiteCacheSlots > 1 ? sizeofJ9ClassFieldWithinReference * (cacheCastClass ? 2 : 1) : 0;
7775
7776
dynamicCacheReg = srm->findOrCreateScratchRegister();
7777
7778
// Start of the Dyanamic Cache Test
7779
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, dynamicCacheTestLabel);
7780
generateRILInstruction(cg, TR::InstOpCode::LARL, node, dynamicCacheReg, dynamicCacheSnippet, 0);
7781
7782
// For 64-Bit Non Compressedrefs JVM, we need to make sure that we are loading associated class data from the cache that appears quadwoerd concurrent as observed by other CPUs/
7783
// For that reason, We need to use LPQ/STPQ instruction which needs register pair.
7784
// In case of 64 bit compressedrefs or 31-Bit JVM, size of J9Class pointer takes 4 bytes only, so in loading associated class data from the cache we can use instruction for 8 byte load/store.
7785
if (cacheCastClass && isTarget64Bit && !isCompressedRef)
7786
{
7787
cachedObjectClass = cg->allocateRegister();
7788
cachedCastClass = cg->allocateRegister();
7789
cachedClassDataRegPair = cg->allocateConsecutiveRegisterPair(cachedCastClass, cachedObjectClass);
7790
deps->addPostCondition(cachedObjectClass, TR::RealRegister::LegalEvenOfPair);
7791
deps->addPostCondition(cachedCastClass, TR::RealRegister::LegalOddOfPair);
7792
deps->addPostCondition(cachedClassDataRegPair, TR::RealRegister::EvenOddPair);
7793
}
7794
else
7795
{
7796
cachedObjectClass = srm->findOrCreateScratchRegister();
7797
}
7798
/**
7799
* Instructions generated for dynamicCache Test are as follows.
7800
* dynamicCacheTestLabel :
7801
* LARL dynamicCacheReg, dynamicCacheSnippet
7802
* if (cacheCastClass)
7803
* if (isCompressedRef || targetIs31Bit)
7804
* LG cachedData, @(dynamicCacheReg, currentIndex)
7805
* CLRJ castClass, cachedData, COND_BNE, gotoNextTest
7806
* RISBG cachaedData, cachedData, 32, 191, 32 // cachedData >> 32
7807
* else
7808
* LPQ cachedObjectClass:cachedCastClass, @(dynamicCacheReg, currentIndex)
7809
* CLGRJ castClass, cachedCastClass, COND_BNE, gotoNextTest
7810
* else
7811
* Load cachedObjectClass, @(dynamicCacheReg, currentIndex)
7812
* XOR cachedData/cachedObjectClass, objClass
7813
* if (cachedData/cachedObjectClass == 0) gotoTrueLabel
7814
* else if (cachedData/cachedObjectClass == 1) gotoFalseLabel
7815
* gotoNextTest:
7816
*/
7817
7818
TR::LabelSymbol *gotoNextTest = NULL;
7819
for (auto i=0; i<maxOnsiteCacheSlots; i++)
7820
{
7821
if (cacheCastClass)
7822
{
7823
gotoNextTest = (i+1 == maxOnsiteCacheSlots) ? helperCallLabel : generateLabelSymbol(cg);
7824
if (isTarget64Bit && !isCompressedRef)
7825
{
7826
generateRXInstruction(cg, TR::InstOpCode::LPQ, node, cachedClassDataRegPair, generateS390MemoryReference(dynamicCacheReg, currentIndex, cg));
7827
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, castClassReg, cachedCastClass, TR::InstOpCode::COND_BNE, gotoNextTest, false);
7828
}
7829
else
7830
{
7831
generateRXInstruction(cg, TR::InstOpCode::LG, node, cachedObjectClass, generateS390MemoryReference(dynamicCacheReg, currentIndex, cg));
7832
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CLR, node, castClassReg, cachedObjectClass, TR::InstOpCode::COND_BNE, gotoNextTest, false);
7833
generateRIEInstruction(cg, TR::InstOpCode::RISBG, node, cachedObjectClass, cachedObjectClass, 32, 191, 32);
7834
}
7835
}
7836
else
7837
{
7838
generateRXInstruction(cg, isTarget64Bit ? (isCompressedRef ? TR::InstOpCode::LLGF : TR::InstOpCode::LG) : TR::InstOpCode::L, node, cachedObjectClass, generateS390MemoryReference(dynamicCacheReg,currentIndex,cg));
7839
}
7840
7841
generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node,cachedObjectClass, objClassReg);
7842
7843
if (i+1 == maxOnsiteCacheSlots)
7844
{
7845
if (trueFallThrough)
7846
{
7847
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, cachedObjectClass, 1, TR::InstOpCode::COND_BE, falseLabel, false, false);
7848
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, helperCallLabel);
7849
}
7850
else
7851
{
7852
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, trueLabel);
7853
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, cachedObjectClass, 1, TR::InstOpCode::COND_BNE, helperCallLabel, false, false);
7854
}
7855
}
7856
else
7857
{
7858
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, trueLabel);
7859
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, cachedObjectClass, 1, TR::InstOpCode::COND_BE, falseLabel, false, false);
7860
}
7861
7862
if (gotoNextTest && gotoNextTest != helperCallLabel)
7863
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, gotoNextTest);
7864
7865
currentIndex += ( cacheCastClass ? 2 : 1 ) * sizeofJ9ClassFieldWithinReference;
7866
}
7867
if (!cacheCastClass || !isTarget64Bit || isCompressedRef)
7868
srm->reclaimScratchRegister(cachedObjectClass);
7869
}
7870
else if (!dynamicCastClass)
7871
{
7872
// If dynamic Cache Test is not generated and it is not dynamicCastClass, we need to generate following branch
7873
// In cases of dynamic cache test / dynamic Cast Class, we would have a branch to helper call at appropriate location.
7874
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, helperCallLabel);
7875
}
7876
7877
TR_S390OutOfLineCodeSection *outlinedSlowPath = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(helperCallLabel, doneLabel, cg);
7878
cg->getS390OutOfLineCodeSectionList().push_front(outlinedSlowPath);
7879
outlinedSlowPath->swapInstructionListsWithCompilation();
7880
7881
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, helperCallLabel);
7882
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOf/(%s)/Helper", comp->signature()),1,TR::DebugCounter::Undetermined);
7883
J9::Z::CHelperLinkage *helperLink = static_cast<J9::Z::CHelperLinkage*>(cg->getLinkage(TR_CHelper));
7884
resultReg = helperLink->buildDirectDispatch(node, resultReg);
7885
7886
if (generateDynamicCache)
7887
{
7888
TR::RegisterDependencyConditions *OOLConditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 9, cg);
7889
if (cacheCastClass && isTarget64Bit && !comp->useCompressedPointers())
7890
{
7891
OOLConditions->addPostCondition(cachedObjectClass, TR::RealRegister::LegalEvenOfPair);
7892
OOLConditions->addPostCondition(cachedCastClass, TR::RealRegister::LegalOddOfPair);
7893
OOLConditions->addPostCondition(cachedClassDataRegPair, TR::RealRegister::EvenOddPair);
7894
}
7895
OOLConditions->addPostCondition(objClassReg, TR::RealRegister::AssignAny);
7896
OOLConditions->addPostCondition(castClassReg, TR::RealRegister::AssignAny);
7897
OOLConditions->addPostCondition(resultReg, TR::RealRegister::AssignAny);
7898
OOLConditions->addPostCondition(dynamicCacheReg, TR::RealRegister::AssignAny);
7899
7900
TR::LabelSymbol *cFlowRegionStart = generateLabelSymbol(cg);
7901
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
7902
cFlowRegionStart->setStartInternalControlFlow();
7903
TR::LabelSymbol *skipSettingBitForFalseResult = generateLabelSymbol(cg);
7904
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, resultReg, 1, TR::InstOpCode::COND_BE, skipSettingBitForFalseResult, false);
7905
// We will set the last bit of objectClassRegister to 1 if helper returns false.
7906
generateRIInstruction(cg, TR::InstOpCode::OILL, node, objClassReg, 0x1);
7907
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, skipSettingBitForFalseResult);
7908
TR::MemoryReference *updateMemRef = NULL;
7909
// Update cache sequence
7910
7911
TR::Register *offsetRegister = NULL;
7912
if (maxOnsiteCacheSlots == 1)
7913
{
7914
updateMemRef = generateS390MemoryReference(dynamicCacheReg, 0, cg);
7915
}
7916
else
7917
{
7918
offsetRegister = cg->allocateRegister();
7919
OOLConditions->addPostCondition(offsetRegister, TR::RealRegister::AssignAny);
7920
generateRXInstruction(cg, TR::InstOpCode::LLGF, node, offsetRegister, generateS390MemoryReference(dynamicCacheReg,0,cg));
7921
updateMemRef = generateS390MemoryReference(dynamicCacheReg, offsetRegister, 0, cg);
7922
}
7923
7924
if (cacheCastClass)
7925
{
7926
if (isTarget64Bit && !isCompressedRef)
7927
{
7928
generateRRInstruction(cg, TR::InstOpCode::LGR, node, cachedObjectClass, objClassReg);
7929
generateRRInstruction(cg, TR::InstOpCode::LGR, node, cachedCastClass, castClassReg);
7930
generateRXInstruction(cg, TR::InstOpCode::STPQ, node, cachedClassDataRegPair, updateMemRef);
7931
}
7932
else
7933
{
7934
TR::Register *storeDataCacheReg = castClassReg;
7935
if (!isTarget64Bit)
7936
{
7937
storeDataCacheReg = cg->allocateRegister();
7938
OOLConditions->addPostCondition(storeDataCacheReg, TR::RealRegister::AssignAny);
7939
generateRRInstruction(cg, TR::InstOpCode::LGFR, node, storeDataCacheReg, castClassReg);
7940
}
7941
generateRIEInstruction(cg, TR::InstOpCode::RISBG, node, storeDataCacheReg, objClassReg, 0, 31, 32);
7942
generateRXInstruction(cg, TR::InstOpCode::STG, node, storeDataCacheReg, updateMemRef);
7943
if (!isTarget64Bit)
7944
cg->stopUsingRegister(storeDataCacheReg);
7945
}
7946
}
7947
else
7948
{
7949
generateRXInstruction(cg, sizeofJ9ClassFieldWithinReference == 8 ? TR::InstOpCode::STG : TR::InstOpCode::ST, node, objClassReg, updateMemRef);
7950
}
7951
7952
if (maxOnsiteCacheSlots != 1)
7953
{
7954
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, offsetRegister, static_cast<int32_t>(cacheCastClass?sizeofJ9ClassFieldWithinReference*2:sizeofJ9ClassFieldWithinReference));
7955
TR::LabelSymbol *skipResetOffsetLabel = generateLabelSymbol(cg);
7956
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, offsetRegister, snippetSizeInBytes, TR::InstOpCode::COND_BNE, skipResetOffsetLabel, false);
7957
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode() , node, offsetRegister, sizeofJ9ClassFieldWithinReference * (cacheCastClass ? 2 : 1));
7958
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, skipResetOffsetLabel);
7959
generateRXInstruction(cg, TR::InstOpCode::ST, node, offsetRegister, generateS390MemoryReference(dynamicCacheReg,0,cg));
7960
}
7961
7962
TR::LabelSymbol *doneCacheUpdateLabel = generateLabelSymbol(cg);
7963
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneCacheUpdateLabel, OOLConditions);
7964
doneCacheUpdateLabel->setEndInternalControlFlow();
7965
srm->reclaimScratchRegister(dynamicCacheReg);
7966
if (offsetRegister != NULL)
7967
cg->stopUsingRegister(offsetRegister);
7968
}
7969
7970
// WARNING: It is not recommended to have two exit point in OOL section
7971
// In this case we need it in case of ifInstanceOf to save additional complex logic in mainline section
7972
// In case if there is GLRegDeps attached to ifInstanceOf node, it will be evaluated and attached as post dependency conditions
7973
// at the end of node
7974
// We can take a risk of having two exit points in OOL here as there is no other register instruction between them
7975
if (ifInstanceOf)
7976
{
7977
generateRRInstruction(cg, TR::InstOpCode::getLoadTestRegOpCode(), node, resultReg, resultReg);
7978
if (trueFallThrough)
7979
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, branchLabel);
7980
else
7981
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, branchLabel);
7982
}
7983
7984
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, doneLabel);
7985
outlinedSlowPath->swapInstructionListsWithCompilation();
7986
if (!needResult)
7987
cg->stopUsingRegister(resultReg);
7988
}
7989
7990
/** \brief Generates inlined sequence of tests for instanceOf/ifInstanceOf node.
7991
* \details
7992
* It calls common function to generate list of inlined tests and generates instructions handling both instanceOf and ifInstanceOf case.
7993
*/
7994
TR::Register *
7995
J9::Z::TreeEvaluator::VMgenCoreInstanceofEvaluator(TR::Node * node, TR::CodeGenerator * cg, TR::LabelSymbol *trueLabel, TR::LabelSymbol *falseLabel,
7996
bool initialResult, bool needResult, TR::RegisterDependencyConditions *graDeps, bool ifInstanceOf)
7997
{
7998
TR::Compilation *comp = cg->comp();
7999
TR_J9VMBase *fej9 = (TR_J9VMBase *) (comp->fe());
8000
TR_OpaqueClassBlock *compileTimeGuessClass;
8001
int32_t maxProfiledClasses = comp->getOptions()->getCheckcastMaxProfiledClassTests();
8002
traceMsg(comp, "%s:Maximum Profiled Classes = %d\n", node->getOpCode().getName(),maxProfiledClasses);
8003
InstanceOfOrCheckCastProfiledClasses* profiledClassesList = (InstanceOfOrCheckCastProfiledClasses*)alloca(maxProfiledClasses * sizeof(InstanceOfOrCheckCastProfiledClasses));
8004
8005
TR::Node *objectNode = node->getFirstChild();
8006
TR::Node *castClassNode = node->getSecondChild();
8007
8008
TR::Register *objectReg = cg->evaluate(objectNode);
8009
TR::Register *objClassReg = NULL;
8010
TR::Register *resultReg = NULL;
8011
TR::Register *castClassReg = NULL;
8012
8013
// In the evaluator, We need at maximum two scratch registers, so creating a pool of scratch registers with 2 size.
8014
TR_S390ScratchRegisterManager *srm = cg->generateScratchRegisterManager(2);
8015
bool topClassWasCastClass=false;
8016
float topClassProbability=0.0;
8017
InstanceOfOrCheckCastSequences sequences[InstanceOfOrCheckCastMaxSequences];
8018
uint32_t numberOfProfiledClass;
8019
uint32_t numSequencesRemaining = calculateInstanceOfOrCheckCastSequences(node, sequences, &compileTimeGuessClass, cg, profiledClassesList, &numberOfProfiledClass, maxProfiledClasses, &topClassProbability, &topClassWasCastClass);
8020
bool outLinedSuperClass = false;
8021
TR::Instruction *cursor = NULL;
8022
TR::Instruction *gcPoint = NULL;
8023
8024
// We load resultReg with the parameter initialResult when we need result as outcome for routine
8025
if (needResult)
8026
{
8027
resultReg = cg->allocateRegister();
8028
cursor = generateRIInstruction(cg,TR::InstOpCode::getLoadHalfWordImmOpCode(),node,resultReg,static_cast<int32_t>(initialResult));
8029
}
8030
8031
TR_S390OutOfLineCodeSection *outlinedSlowPath = NULL;
8032
8033
TR::LabelSymbol *doneOOLLabel = NULL;
8034
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
8035
TR::LabelSymbol *callLabel = generateLabelSymbol(cg);
8036
TR::LabelSymbol *doneTestCacheLabel = NULL;
8037
TR::LabelSymbol *oppositeResultLabel = generateLabelSymbol(cg);
8038
TR::LabelSymbol *helperTrueLabel = NULL;
8039
TR::LabelSymbol *helperFalseLabel = NULL;
8040
TR::LabelSymbol *helperReturnLabel = NULL;
8041
TR::LabelSymbol *dynamicCacheTestLabel = NULL;
8042
TR::LabelSymbol *branchLabel = NULL;
8043
TR::LabelSymbol *jmpLabel = NULL;
8044
8045
TR::InstOpCode::S390BranchCondition branchCond;
8046
TR_Debug *debugObj = cg->getDebug();
8047
bool trueFallThrough;
8048
bool dynamicCastClass = false;
8049
bool generateGoToFalseBRC = true;
8050
8051
if (ifInstanceOf)
8052
{
8053
if (trueLabel)
8054
{
8055
traceMsg(comp,"IfInstanceOf Node : Branch True\n");
8056
falseLabel = (needResult) ? oppositeResultLabel : doneLabel;
8057
branchLabel = trueLabel;
8058
branchCond = TR::InstOpCode::COND_BE;
8059
jmpLabel = falseLabel;
8060
trueFallThrough = false;
8061
}
8062
else
8063
{
8064
traceMsg(comp,"IfInstanceOf Node : Branch False\n");
8065
trueLabel = (needResult)? oppositeResultLabel : doneLabel;
8066
branchLabel = falseLabel;
8067
branchCond = TR::InstOpCode::COND_BNE;
8068
jmpLabel = trueLabel;
8069
trueFallThrough = true;
8070
}
8071
}
8072
else
8073
{
8074
if (initialResult)
8075
{
8076
trueLabel = doneLabel;
8077
falseLabel = oppositeResultLabel;
8078
branchCond = TR::InstOpCode::COND_BE;
8079
trueFallThrough = false;
8080
}
8081
else
8082
{
8083
trueLabel = oppositeResultLabel;
8084
falseLabel = doneLabel;
8085
branchCond = TR::InstOpCode::COND_BNE;
8086
trueFallThrough = true;
8087
}
8088
branchLabel = doneLabel;
8089
jmpLabel = oppositeResultLabel;
8090
}
8091
8092
bool generateDynamicCache = false;
8093
bool cacheCastClass = false;
8094
InstanceOfOrCheckCastSequences *iter = &sequences[0];
8095
while (numSequencesRemaining > 1 || (numSequencesRemaining==1 && *iter!=HelperCall))
8096
{
8097
switch (*iter)
8098
{
8099
case EvaluateCastClass:
8100
TR_ASSERT(!castClassReg, "Cast class already evaluated");
8101
if (comp->getOption(TR_TraceCG))
8102
traceMsg(comp, "%s: Class Not Evaluated. Evaluating it\n", node->getOpCode().getName());
8103
castClassReg = cg->gprClobberEvaluate(node->getSecondChild());
8104
break;
8105
case LoadObjectClass:
8106
if (comp->getOption(TR_TraceCG))
8107
traceMsg(comp, "%s: Loading Object Class\n",node->getOpCode().getName());
8108
objClassReg = cg->allocateRegister();
8109
TR::TreeEvaluator::genLoadForObjectHeadersMasked(cg, node, objClassReg, generateS390MemoryReference(objectReg, static_cast<int32_t>(TR::Compiler->om.offsetOfObjectVftField()), cg), NULL);
8110
break;
8111
case GoToTrue:
8112
traceMsg(comp, "%s: Emitting GoToTrue\n", node->getOpCode().getName());
8113
// If fall through in True (Initial Result False)
8114
//if (trueLabel != oppositeResultLabel)
8115
if (trueLabel != oppositeResultLabel || (ifInstanceOf && !trueFallThrough))
8116
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BC, node, trueLabel);
8117
break;
8118
case GoToFalse:
8119
traceMsg(comp, "%s: Emitting GoToFalse\n", node->getOpCode().getName());
8120
// There is only one case when we generate a GoToFalse branch here, when we have a primitive Cast Class other wise all tests take care of generating terminating sequence
8121
if (generateGoToFalseBRC)
8122
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BC, node, falseLabel);
8123
break;
8124
case NullTest:
8125
{
8126
if (comp->getOption(TR_TraceCG))
8127
traceMsg(comp, "%s: Emitting NullTest\n", node->getOpCode().getName());
8128
TR_ASSERT(!objectNode->isNonNull(), "Object is known to be non-null, no need for a null test");
8129
const bool isCCSet = genInstanceOfOrCheckCastNullTest(node, cg, objectReg);
8130
8131
if (isCCSet)
8132
{
8133
// If object is Null, and initialResult is true, go to oppositeResultLabel else goto done Label
8134
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, falseLabel);
8135
}
8136
}
8137
break;
8138
case ClassEqualityTest:
8139
if (comp->getOption(TR_TraceCG))
8140
traceMsg(comp, "%s: Emitting Class Equality Test\n", node->getOpCode().getName());
8141
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/Equality", comp->signature()),1,TR::DebugCounter::Undetermined);
8142
/* #IF NextTest = GoToFalse
8143
* branchCond = ifInstanceOf ? (!trueFallThrough ? COND_BE : COND_BNE ) : (init=true ? COND_BE : COND_BNE )
8144
* branchLabel = ifInstanceOf ? (!trueFallThrough ? trueLabel : falseLabel ) : doneLabel
8145
* CGRJ castClassReg, objClassReg, branchCond, branchLabel
8146
* #ELSE
8147
* CGRJ castClassReg, objClassReg, COND_BE, trueLabel
8148
*/
8149
if ( *(iter+1) == GoToFalse )
8150
{
8151
cursor = generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, castClassReg, objClassReg, branchCond, branchLabel, false, false);
8152
generateGoToFalseBRC = false;
8153
}
8154
else
8155
{
8156
cursor = generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, castClassReg, objClassReg, TR::InstOpCode::COND_BE, trueLabel, false, false);
8157
generateGoToFalseBRC = true;
8158
}
8159
if (debugObj)
8160
debugObj->addInstructionComment(cursor, "ClassEqualityTest");
8161
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/EqualityFail", comp->signature()),1,TR::DebugCounter::Undetermined);
8162
break;
8163
case SuperClassTest:
8164
{
8165
/*** genInstanceOfOrCheckcastSuperClassTest generates sequences for Super Class Test handling all cases when we have a normal static class or dynamic class
8166
* Mostly this will be last test except in case of dynamic cast class.
8167
* case-1 instanceof , initial Result = false: BRC 0x8, doneLabel
8168
* case-2 instanceof , initial Result = true: BRC 0x6, doneLabel
8169
* case-3 ifInstanceOf , trueLabel == branchLabel : BRC 0x8, branchLabel
8170
* case-4 ifInstanceOf , falseLabel == branchLabel : BRC 0x6, branchLabel
8171
*/
8172
int32_t castClassDepth = castClassNode->getSymbolReference()->classDepth(comp);
8173
dynamicCacheTestLabel = generateLabelSymbol(cg);
8174
if (comp->getOption(TR_TraceCG))
8175
traceMsg(comp, "%s: Emitting Super Class Test, Cast Class Depth = %d\n", node->getOpCode().getName(),castClassDepth);
8176
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/SuperClassTest", comp->signature()),1,TR::DebugCounter::Undetermined);
8177
// For dynamic cast class genInstanceOfOrCheckcastSuperClassTest will generate branch to either helper call or dynamicCacheTest depending on the next generated test.
8178
dynamicCastClass = genInstanceOfOrCheckcastSuperClassTest(node, cg, objClassReg, castClassReg, castClassDepth, falseLabel, *(iter+1) == DynamicCacheDynamicCastClassTest ? dynamicCacheTestLabel : callLabel, srm);
8179
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, branchCond, node, branchLabel);
8180
// If next test is dynamicCacheTest then generate a Branch to Skip it.
8181
if (*(iter+1) == DynamicCacheDynamicCastClassTest)
8182
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BC, node, jmpLabel);
8183
generateGoToFalseBRC=false;
8184
break;
8185
}
8186
/** Following switch case generates sequence of instructions for profiled class test for instanceOf node
8187
* arbitraryClassReg1 <= profiledClass
8188
* if (arbitraryClassReg1 == objClassReg)
8189
* profiledClassIsInstanceOfCastClass ? return true : return false
8190
* else
8191
* continue to NextTest
8192
*/
8193
case ProfiledClassTest:
8194
{
8195
if (comp->getOption(TR_TraceCG))
8196
traceMsg(comp, "%s: Emitting ProfiledClass Test\n", node->getOpCode().getName());
8197
TR::Register *arbitraryClassReg1 = srm->findOrCreateScratchRegister();
8198
uint8_t numPICs = 0;
8199
TR::Instruction *temp= NULL;
8200
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/Profile", comp->signature()),1,TR::DebugCounter::Undetermined);
8201
while (numPICs < numberOfProfiledClass)
8202
{
8203
if (cg->needClassAndMethodPointerRelocations())
8204
temp = generateRegLitRefInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, arbitraryClassReg1, (uintptr_t) profiledClassesList[numPICs].profiledClass, TR_ClassPointer, NULL, NULL, NULL);
8205
else
8206
temp = generateRILInstruction(cg, TR::InstOpCode::LARL, node, arbitraryClassReg1, profiledClassesList[numPICs].profiledClass);
8207
8208
// Adding profiled class to the static PIC slots.
8209
if (fej9->isUnloadAssumptionRequired((TR_OpaqueClassBlock *)(profiledClassesList[numPICs].profiledClass), comp->getCurrentMethod()))
8210
comp->getStaticPICSites()->push_front(temp);
8211
// Adding profiled class to static HCR PIC sites.
8212
if (cg->wantToPatchClassPointer(profiledClassesList[numPICs].profiledClass, node))
8213
comp->getStaticHCRPICSites()->push_front(temp);
8214
8215
if (profiledClassesList[numPICs].isProfiledClassInstanceOfCastClass)
8216
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, arbitraryClassReg1, objClassReg, TR::InstOpCode::COND_BE, trueLabel, false, false);
8217
else
8218
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, arbitraryClassReg1, objClassReg, TR::InstOpCode::COND_BE, falseLabel, false, false);
8219
numPICs++;
8220
}
8221
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/ProfileFail", comp->signature()),1,TR::DebugCounter::Undetermined);
8222
srm->reclaimScratchRegister(arbitraryClassReg1);
8223
break;
8224
}
8225
/** In case of Single Implementer of the Interface,
8226
* arbitraryClassReg1 <= compileTimeGuessClass
8227
* CGRJ arbitraryClassReg,objClassReg,0x8,trueLabel
8228
*/
8229
case CompileTimeGuessClassTest:
8230
{
8231
TR::Register *arbitraryClassReg2 = srm->findOrCreateScratchRegister();
8232
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/compTimeGuess", comp->signature()),1,TR::DebugCounter::Undetermined);
8233
genLoadAddressConstant(cg, node, (uintptr_t)compileTimeGuessClass, arbitraryClassReg2);
8234
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, arbitraryClassReg2, objClassReg, TR::InstOpCode::COND_BE, trueLabel, false, false);
8235
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/compTimeGuessFail", comp->signature()),1,TR::DebugCounter::Undetermined);
8236
srm->reclaimScratchRegister(arbitraryClassReg2);
8237
break;
8238
}
8239
case ArrayOfJavaLangObjectTest:
8240
{
8241
if (comp->getOption(TR_TraceCG))
8242
traceMsg(comp,"Emitting ArrayOfJavaLangObjectTest\n",node->getOpCode().getName());
8243
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "instanceOfStats/(%s)/ArrayTest", comp->signature()),1,TR::DebugCounter::Undetermined);
8244
genInstanceOfOrCheckcastArrayOfJavaLangObjectTest(node, cg, objClassReg, falseLabel, srm) ;
8245
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, branchCond, node, branchLabel);
8246
generateGoToFalseBRC = false;
8247
break;
8248
}
8249
/** Following switch case generates sequence of instructions for cast class cache test
8250
* Load castClassCacheReg, offsetOf(J9Class,castClassCache)
8251
* castClassCacheReg <= castClassCacheReg XOR castClassReg
8252
* if castClassCacheReg == 0 (Success)
8253
* return true
8254
* else if castClassCacheReg == 1 (Failed instanceOf)
8255
* return false
8256
* else
8257
* continue
8258
*/
8259
case CastClassCacheTest:
8260
{
8261
doneTestCacheLabel = generateLabelSymbol(cg);
8262
if (comp->getOption(TR_TraceCG))
8263
traceMsg(comp,"Emitting CastClassCacheTest\n",node->getOpCode().getName());
8264
TR::Register *castClassCacheReg = srm->findOrCreateScratchRegister();
8265
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, castClassCacheReg,
8266
generateS390MemoryReference(objClassReg, offsetof(J9Class, castClassCache), cg));
8267
generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, castClassCacheReg, castClassReg);
8268
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, trueLabel);
8269
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, castClassCacheReg, 1, TR::InstOpCode::COND_BE, falseLabel, false, false);
8270
srm->reclaimScratchRegister(castClassCacheReg);
8271
break;
8272
}
8273
case DynamicCacheObjectClassTest:
8274
{
8275
generateDynamicCache = true;
8276
dynamicCacheTestLabel = generateLabelSymbol(cg);
8277
if (comp->getOption(TR_TraceCG))
8278
traceMsg(comp,"Emitting Dynamic Cache for ObjectClass only\n",node->getOpCode().getName());
8279
break;
8280
}
8281
case DynamicCacheDynamicCastClassTest:
8282
{
8283
generateDynamicCache = true;
8284
cacheCastClass = true;
8285
TR_ASSERT(dynamicCacheTestLabel!=NULL, "DynamicCacheDynamicCastClassTest: dynamicCacheTestLabel should be generated by SuperClassTest before reaching this point");
8286
if (comp->getOption(TR_TraceCG))
8287
traceMsg(comp,"Emitting Dynamic Cache for CastClass and ObjectClass\n",node->getOpCode().getName());
8288
break;
8289
}
8290
case HelperCall:
8291
TR_ASSERT(false, "Doesn't make sense, HelperCall should be the terminal sequence");
8292
break;
8293
default:
8294
break;
8295
}
8296
--numSequencesRemaining;
8297
++iter;
8298
}
8299
8300
TR::RegisterDependencyConditions *conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(graDeps, 0, 8+srm->numAvailableRegisters(), cg);
8301
if (numSequencesRemaining > 0 && *iter == HelperCall)
8302
genInstanceOfDynamicCacheAndHelperCall(node, cg, castClassReg, objClassReg, resultReg, conditions, srm, doneLabel, callLabel, dynamicCacheTestLabel, branchLabel, trueLabel, falseLabel, dynamicCastClass, generateDynamicCache, cacheCastClass, ifInstanceOf, trueFallThrough);
8303
8304
if (needResult)
8305
{
8306
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, oppositeResultLabel);
8307
generateRIInstruction(cg,TR::InstOpCode::getLoadHalfWordImmOpCode(),node,resultReg,static_cast<int32_t>(!initialResult));
8308
}
8309
8310
if (objClassReg)
8311
conditions->addPostConditionIfNotAlreadyInserted(objClassReg, TR::RealRegister::AssignAny);
8312
if (needResult)
8313
conditions->addPostCondition(resultReg, TR::RealRegister::AssignAny);
8314
conditions->addPostConditionIfNotAlreadyInserted(objectReg, TR::RealRegister::AssignAny);
8315
if (castClassReg)
8316
conditions->addPostConditionIfNotAlreadyInserted(castClassReg, TR::RealRegister::AssignAny);
8317
srm->addScratchRegistersToDependencyList(conditions);
8318
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions);
8319
if (objClassReg)
8320
cg->stopUsingRegister(objClassReg);
8321
if (castClassReg)
8322
cg->stopUsingRegister(castClassReg);
8323
srm->stopUsingRegisters();
8324
cg->decReferenceCount(objectNode);
8325
cg->decReferenceCount(castClassNode);
8326
TR::Register *ret = needResult ? resultReg : NULL;
8327
conditions->stopUsingDepRegs(cg, objectReg, ret);
8328
if (needResult)
8329
node->setRegister(resultReg);
8330
return resultReg;
8331
}
8332
8333
/** \brief Sets up parameters for VMgenCoreInstanceOfEvaluator when we have a ifInstanceOf node
8334
* \details
8335
* For ifInstanceOf node, it checks if the node has GRA dependency node as third child and if it has, calls normal instanceOf
8336
* Otherwise calls VMgenCoreInstanceOfEvaluator with parameters to generate instructions for ifInstanceOf.
8337
*/
8338
TR::Register *
8339
J9::Z::TreeEvaluator::VMifInstanceOfEvaluator(TR::Node * node, TR::CodeGenerator * cg)
8340
{
8341
TR::Node * graDepNode = NULL;
8342
TR::ILOpCodes opCode = node->getOpCodeValue();
8343
TR::Node * instanceOfNode = node->getFirstChild();
8344
TR::Node * valueNode = node->getSecondChild();
8345
int32_t value = valueNode->getInt();
8346
TR::LabelSymbol * branchLabel = node->getBranchDestination()->getNode()->getLabel();
8347
TR::RegisterDependencyConditions * graDeps = NULL;
8348
8349
TR::LabelSymbol * falseLabel = NULL;
8350
TR::LabelSymbol * trueLabel = NULL;
8351
8352
if (node->getNumChildren() == 3)
8353
{
8354
graDepNode = node->getChild(2);
8355
}
8356
8357
if (graDepNode && graDepsConflictWithInstanceOfDeps(graDepNode, instanceOfNode, cg))
8358
{
8359
return (TR::Register*) 1;
8360
}
8361
8362
bool needResult = (instanceOfNode->getReferenceCount() > 1);
8363
8364
if ((opCode == TR::ificmpeq && value == 1) || (opCode != TR::ificmpeq && value == 0))
8365
trueLabel = branchLabel;
8366
else
8367
falseLabel = branchLabel;
8368
8369
if (graDepNode)
8370
{
8371
cg->evaluate(graDepNode);
8372
graDeps = generateRegisterDependencyConditions(cg, graDepNode, 0);
8373
}
8374
bool initialResult = trueLabel != NULL;
8375
8376
VMgenCoreInstanceofEvaluator(instanceOfNode, cg, trueLabel, falseLabel, initialResult, needResult, graDeps, true);
8377
8378
cg->decReferenceCount(instanceOfNode);
8379
node->setRegister(NULL);
8380
8381
return NULL;
8382
}
8383
8384
/**
8385
* Generates a quick runtime test for valueType/valueBased node and in case if node is of valueType or valueBased, generates a branch to helper call
8386
*
8387
* @param node monent/exit node
8388
* @param mergeLabel Label pointing to merge point
8389
* @param helperCallLabel Label pointing to helper call dispatch sequence.
8390
* @param cg Codegenerator object
8391
* @return Returns a register containing objectClassPointer
8392
*/
8393
static TR::Register*
8394
generateCheckForValueMonitorEnterOrExit(TR::Node *node, TR::LabelSymbol* mergeLabel, TR::LabelSymbol *helperCallLabel, TR::CodeGenerator *cg)
8395
{
8396
TR::Register *objReg = cg->evaluate(node->getFirstChild());
8397
TR::Register *objectClassReg = cg->allocateRegister();
8398
8399
TR::TreeEvaluator::genLoadForObjectHeadersMasked(cg, node, objectClassReg, generateS390MemoryReference(objReg, TR::Compiler->om.offsetOfObjectVftField(), cg), NULL);
8400
8401
TR::Register *tempReg = cg->allocateRegister();
8402
generateLoad32BitConstant(cg, node, J9_CLASS_DISALLOWS_LOCKING_FLAGS, tempReg, false);
8403
8404
TR::MemoryReference *classFlagsMemRef = generateS390MemoryReference(objectClassReg, static_cast<uint32_t>(static_cast<TR_J9VMBase *>(cg->comp()->fe())->getOffsetOfClassFlags()), cg);
8405
generateRXInstruction(cg, TR::InstOpCode::N, node, tempReg, classFlagsMemRef);
8406
8407
bool generateOOLSection = helperCallLabel == NULL;
8408
if (generateOOLSection)
8409
helperCallLabel = generateLabelSymbol(cg);
8410
8411
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRNZ, node, helperCallLabel);
8412
8413
// TODO: There is now the possibility of multiple distinct OOL sections with helper calls to be generated when
8414
// evaluating the TR::monent or TR::monexit nodes:
8415
//
8416
// 1. Monitor cache lookup OOL
8417
// 2. Lock reservation OOL
8418
// 3. Value types or value based object OOL
8419
// 4. Recursive CAS sequence for Locking
8420
//
8421
// These distinct OOL sections may perform non-trivial logic but what they all have in common is they all have a
8422
// call to the same JIT helper which acts as a fall back. This complexity exists because of the way the evaluators
8423
// are currently architected and due to the restriction that we cannot have nested OOL code sections. Whenever
8424
// making future changes to these evaluators we should consider refactoring them to reduce the complexity and
8425
// attempt to consolidate the calls to the JIT helper so as to not have multiple copies.
8426
if (generateOOLSection)
8427
{
8428
TR_S390OutOfLineCodeSection *helperCallOOLSection = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(helperCallLabel, mergeLabel, cg);
8429
cg->getS390OutOfLineCodeSectionList().push_front(helperCallOOLSection);
8430
helperCallOOLSection->swapInstructionListsWithCompilation();
8431
8432
TR::Instruction *cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, helperCallLabel);
8433
8434
TR_Debug *debugObj = cg->getDebug();
8435
if (debugObj)
8436
debugObj->addInstructionComment(cursor, "Denotes Start of OOL for ValueType or ValueBased Node");
8437
8438
cg->getLinkage(TR_CHelper)->buildDirectDispatch(node);
8439
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, mergeLabel);
8440
8441
if (debugObj)
8442
debugObj->addInstructionComment(cursor, "Denotes End of OOL for ValueType or ValueBased Node");
8443
8444
helperCallOOLSection->swapInstructionListsWithCompilation();
8445
}
8446
8447
cg->stopUsingRegister(tempReg);
8448
return objectClassReg;
8449
}
8450
8451
TR::Register *
8452
J9::Z::TreeEvaluator::VMmonentEvaluator(TR::Node * node, TR::CodeGenerator * cg)
8453
{
8454
TR::Compilation *comp = cg->comp();
8455
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
8456
int32_t lwOffset = fej9->getByteOffsetToLockword((TR_OpaqueClassBlock *) cg->getMonClass(node));
8457
J9::Z::CHelperLinkage *helperLink = static_cast<J9::Z::CHelperLinkage*>(cg->getLinkage(TR_CHelper));
8458
TR_YesNoMaybe isMonitorValueBasedOrValueType = cg->isMonitorValueBasedOrValueType(node);
8459
8460
if ((isMonitorValueBasedOrValueType == TR_yes) ||
8461
comp->getOption(TR_DisableInlineMonEnt) ||
8462
comp->getOption(TR_FullSpeedDebug)) // Required for Live Monitor Meta Data in FSD.
8463
{
8464
TR::ILOpCodes opCode = node->getOpCodeValue();
8465
TR::Node::recreate(node, TR::call);
8466
TR::Register *targetRegister = helperLink->buildDirectDispatch(node);
8467
cg->decReferenceCount(node->getFirstChild());
8468
TR::Node::recreate(node, opCode);
8469
return targetRegister;
8470
}
8471
8472
8473
TR_S390ScratchRegisterManager *srm = cg->generateScratchRegisterManager();
8474
8475
TR::Node *objNode = node->getFirstChild();
8476
TR::Register *objReg = cg->evaluate(objNode);
8477
TR::Register *baseReg = objReg;
8478
TR::Register *monitorReg = cg->allocateRegister();
8479
TR::Register *objectClassReg = NULL;
8480
TR::Register *lookupOffsetReg = NULL;
8481
TR::Register *tempRegister = NULL;
8482
TR::Register *metaReg = cg->getMethodMetaDataRealRegister();
8483
TR::Register *wasteReg = NULL;
8484
TR::Register *lockPreservingReg = NULL;
8485
TR::Register *dummyResultReg = NULL;
8486
8487
8488
TR::LabelSymbol *cFlowRegionEnd = generateLabelSymbol(cg);
8489
TR::LabelSymbol *callLabel = generateLabelSymbol(cg);
8490
TR::LabelSymbol *monitorLookupCacheLabel = generateLabelSymbol(cg);
8491
TR::Instruction *gcPoint = NULL;
8492
TR::Instruction *startICF = NULL;
8493
static char * disableInlineRecursiveMonitor = feGetEnv("TR_DisableInlineRecursiveMonitor");
8494
8495
bool inlineRecursive = true;
8496
if (disableInlineRecursiveMonitor)
8497
inlineRecursive = false;
8498
8499
int32_t numDeps = 4;
8500
8501
if (lwOffset <=0)
8502
{
8503
numDeps +=2;
8504
if (comp->getOption(TR_EnableMonitorCacheLookup))
8505
{
8506
numDeps +=2; // extra one for lit pool reg in disableZ9 mode
8507
}
8508
}
8509
8510
if (comp->getOptions()->enableDebugCounters())
8511
numDeps += 5;
8512
bool simpleLocking = false;
8513
bool reserveLocking = false, normalLockWithReservationPreserving = false;
8514
8515
if (isMonitorValueBasedOrValueType == TR_maybe)
8516
{
8517
numDeps += 1;
8518
// If we are generating code for MonitorCacheLookup then we will not have a separate OOL for inlineRecursive, and callLabel points
8519
// to the OOL Containing only helper call. Otherwise, OOL will have other code apart from helper call which we do not want to execute
8520
// for ValueType or ValueBased object and in that scenario we will need to generate another OOL that just contains helper call.
8521
objectClassReg = generateCheckForValueMonitorEnterOrExit(node, cFlowRegionEnd, lwOffset <= 0 ? callLabel : NULL, cg);
8522
}
8523
TR::RegisterDependencyConditions * conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, numDeps, cg);
8524
8525
TR_Debug * debugObj = cg->getDebug();
8526
8527
8528
conditions->addPostCondition(objReg, TR::RealRegister::AssignAny);
8529
conditions->addPostCondition(monitorReg, TR::RealRegister::AssignAny);
8530
if (objectClassReg != NULL)
8531
conditions->addPostCondition(objectClassReg, TR::RealRegister::AssignAny);
8532
8533
static const char * peekFirst = feGetEnv("TR_PeekingMonEnter");
8534
// This debug option is for printing the locking mechanism.
8535
static int printMethodSignature = feGetEnv("PrintMethodSignatureForLockResEnt")? 1 : 0;
8536
if (lwOffset <= 0)
8537
{
8538
inlineRecursive = false;
8539
// should not happen often, only on a subset of objects that don't have a lockword
8540
// set with option -Xlockword
8541
8542
TR::LabelSymbol *helperCallLabel = generateLabelSymbol(cg);
8543
TR::LabelSymbol *helperReturnOOLLabel = generateLabelSymbol(cg);
8544
TR::MemoryReference * tempMR = NULL;
8545
if (objectClassReg == NULL)
8546
{
8547
tempMR = generateS390MemoryReference(objReg, TR::Compiler->om.offsetOfObjectVftField(), cg);
8548
// TODO We don't need objectClassReg except in this ifCase. We can use scratchRegisterManager to allocate one here.
8549
objectClassReg = cg->allocateRegister();
8550
conditions->addPostCondition(objectClassReg, TR::RealRegister::AssignAny);
8551
TR::TreeEvaluator::genLoadForObjectHeadersMasked(cg, node, objectClassReg, tempMR, NULL);
8552
}
8553
int32_t offsetOfLockOffset = offsetof(J9Class, lockOffset);
8554
tempMR = generateS390MemoryReference(objectClassReg, offsetOfLockOffset, cg);
8555
8556
tempRegister = cg->allocateRegister();
8557
TR::LabelSymbol *targetLabel = callLabel;
8558
if (comp->getOption(TR_EnableMonitorCacheLookup))
8559
targetLabel = monitorLookupCacheLabel;
8560
8561
generateRXInstruction(cg, TR::InstOpCode::getLoadTestOpCode(), node, tempRegister, tempMR);
8562
8563
TR::Instruction *cmpInstr = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, node, targetLabel);
8564
8565
if(cg->comp()->target().is64Bit())
8566
generateRXInstruction(cg, TR::InstOpCode::LA, node, tempRegister, generateS390MemoryReference(objReg, tempRegister, 0, cg));
8567
else
8568
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, tempRegister, objReg);
8569
8570
if (comp->getOption(TR_EnableMonitorCacheLookup))
8571
{
8572
TR::RegisterDependencyConditions * OOLConditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 5, cg);
8573
OOLConditions->addPostCondition(objReg, TR::RealRegister::AssignAny);
8574
OOLConditions->addPostCondition(monitorReg, TR::RealRegister::AssignAny);
8575
OOLConditions->addPostCondition(tempRegister, TR::RealRegister::AssignAny);
8576
// pulling this chunk of code into OOL sequence for better Register allocation and avoid branches
8577
TR_S390OutOfLineCodeSection *monitorCacheLookupOOL;
8578
monitorCacheLookupOOL = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(monitorLookupCacheLabel,cFlowRegionEnd,cg);
8579
cg->getS390OutOfLineCodeSectionList().push_front(monitorCacheLookupOOL);
8580
monitorCacheLookupOOL->swapInstructionListsWithCompilation();
8581
8582
TR::Instruction *cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, monitorLookupCacheLabel);
8583
8584
if (debugObj)
8585
{
8586
debugObj->addInstructionComment(cmpInstr, "Branch to OOL monent monitorLookupCache");
8587
debugObj->addInstructionComment(cursor, "Denotes start of OOL monent monitorLookupCache");
8588
}
8589
8590
lookupOffsetReg = cg->allocateRegister();
8591
OOLConditions->addPostCondition(lookupOffsetReg, TR::RealRegister::AssignAny);
8592
8593
int32_t offsetOfMonitorLookupCache = offsetof(J9VMThread, objectMonitorLookupCache);
8594
int32_t t = trailingZeroes(TR::Compiler->om.getObjectAlignmentInBytes());
8595
int32_t shiftAmount = trailingZeroes((int32_t) TR::Compiler->om.sizeofReferenceField()) - t;
8596
int32_t end = 63 - trailingZeroes((int32_t) TR::Compiler->om.sizeofReferenceField());
8597
int32_t start = end - trailingZeroes(J9VMTHREAD_OBJECT_MONITOR_CACHE_SIZE) + 1;
8598
8599
if (cg->comp()->target().cpu.isAtLeast(OMR_PROCESSOR_S390_ZEC12) && cg->comp()->target().is64Bit())
8600
generateRIEInstruction(cg, TR::InstOpCode::RISBGN, node, lookupOffsetReg, objReg, start, end+0x80, shiftAmount);
8601
else if(cg->comp()->target().is64Bit())
8602
generateRIEInstruction(cg, TR::InstOpCode::RISBG, node, lookupOffsetReg, objReg, start, end+0x80, shiftAmount);
8603
else
8604
{
8605
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, lookupOffsetReg, objReg);
8606
8607
if (cg->comp()->target().is64Bit())
8608
generateRSInstruction(cg, TR::InstOpCode::SRAG, node, lookupOffsetReg, lookupOffsetReg, t);
8609
else
8610
generateRSInstruction(cg, TR::InstOpCode::SRA, node, lookupOffsetReg, t);
8611
8612
J9JavaVM * jvm = fej9->getJ9JITConfig()->javaVM;
8613
8614
if (cg->comp()->target().is32Bit())
8615
generateS390ImmOp(cg, TR::InstOpCode::getAndOpCode(), node, lookupOffsetReg, lookupOffsetReg, (int32_t) J9VMTHREAD_OBJECT_MONITOR_CACHE_SIZE - 1, OOLConditions, 0);
8616
else
8617
generateS390ImmOp(cg, TR::InstOpCode::getAndOpCode(), node, lookupOffsetReg, lookupOffsetReg, (int64_t) J9VMTHREAD_OBJECT_MONITOR_CACHE_SIZE - 1, OOLConditions, 0);
8618
8619
if (cg->comp()->target().is64Bit())
8620
generateRSInstruction(cg, TR::InstOpCode::SLLG, node, lookupOffsetReg, lookupOffsetReg, trailingZeroes((int32_t) TR::Compiler->om.sizeofReferenceField()));
8621
else
8622
generateRSInstruction(cg, TR::InstOpCode::SLL, node, lookupOffsetReg, trailingZeroes((int32_t) TR::Compiler->om.sizeofReferenceField()));
8623
}
8624
8625
TR::MemoryReference * temp2MR = generateS390MemoryReference(cg->getMethodMetaDataRealRegister(), lookupOffsetReg, offsetOfMonitorLookupCache, cg);
8626
8627
if (TR::Compiler->om.compressObjectReferences())
8628
{
8629
generateRXInstruction(cg, TR::InstOpCode::LLGF, node, tempRegister, temp2MR, NULL);
8630
startICF = generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, tempRegister, NULLVALUE, TR::InstOpCode::COND_BE, helperCallLabel, false, true);
8631
}
8632
else
8633
{
8634
generateRXInstruction(cg, TR::InstOpCode::getLoadTestOpCode(), node, tempRegister, temp2MR);
8635
startICF = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, helperCallLabel);
8636
}
8637
8638
int32_t offsetOfMonitor = offsetof(J9ObjectMonitor, monitor);
8639
temp2MR = generateS390MemoryReference(tempRegister, offsetOfMonitor, cg);
8640
generateRXInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, objReg, temp2MR);
8641
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, helperCallLabel);
8642
8643
int32_t offsetOfAlternateLockWord = offsetof(J9ObjectMonitor, alternateLockword);
8644
8645
baseReg = tempRegister;
8646
lwOffset = 0 + offsetOfAlternateLockWord;
8647
8648
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
8649
generateRRInstruction(cg, TR::InstOpCode::XR, node, monitorReg, monitorReg);
8650
else
8651
generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, monitorReg, monitorReg);
8652
8653
if (peekFirst)
8654
{
8655
generateRXInstruction(cg, TR::InstOpCode::C, node, monitorReg, generateS390MemoryReference(baseReg, lwOffset, cg));
8656
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, helperCallLabel);
8657
}
8658
8659
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
8660
generateRSInstruction(cg, TR::InstOpCode::CS, node, monitorReg, metaReg,
8661
generateS390MemoryReference(baseReg, lwOffset, cg));
8662
else
8663
generateRSInstruction(cg, TR::InstOpCode::getCmpAndSwapOpCode(), node, monitorReg, metaReg,
8664
generateS390MemoryReference(baseReg, lwOffset, cg));
8665
8666
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, helperReturnOOLLabel);
8667
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, helperCallLabel );
8668
TR::RegisterDependencyConditions *deps = NULL;
8669
dummyResultReg = helperLink->buildDirectDispatch(node, &deps);
8670
TR::RegisterDependencyConditions *mergeConditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(OOLConditions, deps, cg);
8671
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, helperReturnOOLLabel , mergeConditions);
8672
8673
cursor = generateS390BranchInstruction(cg,TR::InstOpCode::BRC,TR::InstOpCode::COND_BRC,node,cFlowRegionEnd);
8674
if (debugObj)
8675
debugObj->addInstructionComment(cursor, "Denotes end of OOL monent monitorCacheLookup: return to mainline");
8676
8677
// Done using OOL with manual code generation
8678
monitorCacheLookupOOL->swapInstructionListsWithCompilation();
8679
}
8680
8681
simpleLocking = true;
8682
lwOffset = 0;
8683
baseReg = tempRegister;
8684
}
8685
8686
// Lock Reservation happens only for objects with lockword.
8687
// evaluateLockForReservation may output three different results:
8688
// 1- Lock Reservation: (reserveLocking = true)
8689
// 2- ReservationPreserving: (normalLockWithReservationPreserving = true)
8690
// 3- Normal lock: otherwise
8691
if (!simpleLocking && comp->getOption(TR_ReservingLocks))
8692
TR::TreeEvaluator::evaluateLockForReservation(node, &reserveLocking, &normalLockWithReservationPreserving, cg);
8693
8694
if (printMethodSignature)
8695
printf("%s:\t%s\t%s\n",simpleLocking ? "lwOffset <= 0" : reserveLocking ? "Lock Reservation" :
8696
normalLockWithReservationPreserving ? "Reservation Preserving" : "Normal Lock",
8697
comp->signature(),comp->getHotnessName(comp->getMethodHotness()));
8698
8699
if (reserveLocking)
8700
{
8701
8702
// TODO - ScratchRegisterManager Should Manage these temporary Registers.
8703
if (wasteReg)
8704
cg->stopUsingRegister(wasteReg);
8705
cg->stopUsingRegister(monitorReg);
8706
// TODO : objectClassReg contains the J9Class for object which is set in lwOffset <= 0 case. Usually that is NULL in the following function call
8707
return reservationLockEnter(node, lwOffset, objectClassReg, cg, helperLink);
8708
}
8709
8710
if (normalLockWithReservationPreserving)
8711
{
8712
lockPreservingReg = cg->allocateRegister();
8713
conditions->addPostCondition(lockPreservingReg, TR::RealRegister::AssignAny);
8714
}
8715
const char* debugCounterNamePrefix = normalLockWithReservationPreserving? "LockEnt/Preserving": "LockEnt/Normal";
8716
// Opcodes:
8717
bool use64b = true;
8718
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
8719
use64b = false;
8720
else if (!cg->comp()->target().is64Bit())
8721
use64b = false;
8722
TR::InstOpCode::Mnemonic loadOp = use64b ? TR::InstOpCode::LG : TR::InstOpCode::L;
8723
TR::InstOpCode::Mnemonic loadRegOp = use64b ? TR::InstOpCode::LGR : TR::InstOpCode::LR;
8724
TR::InstOpCode::Mnemonic orImmOp = TR::InstOpCode::OILF;
8725
TR::InstOpCode::Mnemonic compareOp = use64b ? TR::InstOpCode::CGR : TR::InstOpCode::CR;
8726
TR::InstOpCode::Mnemonic addImmOp = use64b ? TR::InstOpCode::AGHI : TR::InstOpCode::AHI;
8727
TR::InstOpCode::Mnemonic storeOp = use64b ? TR::InstOpCode::STG : TR::InstOpCode::ST;
8728
TR::InstOpCode::Mnemonic xorOp = use64b ? TR::InstOpCode::XGR : TR::InstOpCode::XR;
8729
TR::InstOpCode::Mnemonic casOp = use64b ? TR::InstOpCode::CSG : TR::InstOpCode::CS;
8730
TR::InstOpCode::Mnemonic andOp = use64b ? TR::InstOpCode::NGR : TR::InstOpCode::NR;
8731
TR::InstOpCode::Mnemonic loadHalfWordImmOp = use64b ? TR::InstOpCode::LGHI : TR::InstOpCode::LHI;
8732
8733
// MonitorReg = 0
8734
generateRRInstruction(cg, xorOp, node, monitorReg, monitorReg);
8735
8736
// PeekFirst option read the lock value first and then issue CAS only the lock value is zero.
8737
// This causes an extra load operation when the lock is free, but it leads to avoidance of unnecessary CAS operations.
8738
if (peekFirst)
8739
{
8740
generateRXInstruction(cg, TR::InstOpCode::C, node, monitorReg, generateS390MemoryReference(baseReg, lwOffset, cg));
8741
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, callLabel);
8742
}
8743
// Main path instruction sequence.
8744
// This sequence is the same for both normal locks and lock preservation.
8745
// XR monitorReg,monitorReg
8746
// CS monitorReg,GPR13,#lwOffset(objectReg)
8747
// BRC BLRC(0x4), callLabel (OOL path)
8748
8749
//Compare and Swap the lock value with R13 if the lock value is 0.
8750
generateRSInstruction(cg, casOp, node, monitorReg, metaReg, generateS390MemoryReference(baseReg, lwOffset, cg));
8751
8752
// Jump to OOL branch in case that the CAS is unsuccessful (Lockword had contained a non-zero value before CAS)
8753
// Both TR::InstOpCode::MASK6 and TR::InstOpCode::MASK4 are ok here. TR::InstOpCode::MASK4 is directly testing failure condition.
8754
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BL, node, callLabel);
8755
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "%s/CSSuccessfull", debugCounterNamePrefix), 1, TR::DebugCounter::Undetermined);
8756
TR_S390OutOfLineCodeSection *outlinedHelperCall = NULL;
8757
TR::Instruction *cursor;
8758
TR::LabelSymbol *returnLabel = generateLabelSymbol(cg);
8759
8760
outlinedHelperCall = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(callLabel, cFlowRegionEnd, cg);
8761
cg->getS390OutOfLineCodeSectionList().push_front(outlinedHelperCall);
8762
outlinedHelperCall->swapInstructionListsWithCompilation();
8763
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, callLabel);
8764
if (debugObj)
8765
debugObj->addInstructionComment(cursor, "Denotes start of OOL monent sequence");
8766
8767
if (inlineRecursive)
8768
{
8769
TR::LabelSymbol * callHelper = generateLabelSymbol(cg);
8770
8771
//Using OOL but generating code manually
8772
//Tasuki lock, inlined nested monitor handling
8773
//(on entry objectReg has been set up)
8774
8775
// Normal Lock Lock reservation preserving
8776
// L monitorReg, #lwOffset(objectReg) L monitorReg, #lwOffset(objectReg)
8777
// LHI wasteReg, NON_INC_DEC_MASK DIFF LHI wasteReg, LOCK_RES_PRESERVE_ENTER
8778
// AHI monitorReg, INC_DEC_VALUE DIFF
8779
// NR wasteReg, monitorReg NR wasteReg, monitorReg
8780
// DIFF LR lockPreservingReg, metaReg
8781
// DIFF OILF lockPreservingReg, LR-BIT
8782
// CRJ wasteReg, metaReg, MASK6, callHelper DIFF CRJ wasteReg, lockPreservingReg, MASK6, callHelper
8783
// DIFF AHI monitorReg,INC_DEC_VALUE
8784
// ST monitorReg, #lwOffset(objectReg) ST monitorReg, #lwOffset(objectReg)
8785
// BRC returnLabel BRC returnLabel
8786
// callHelper: callHelper:
8787
// BRASL R14, jitMonitorEnter BRASL R14, jitMonitorEnter
8788
// returnLabel: returnLabel:
8789
8790
TR::MemoryReference * tempMR = generateS390MemoryReference(baseReg, lwOffset, cg);
8791
TR::MemoryReference * tempMR1 = generateS390MemoryReference(baseReg, lwOffset, cg);
8792
wasteReg = cg->allocateRegister();
8793
conditions->addPostCondition(wasteReg, TR::RealRegister::AssignAny);
8794
// Loading Lock value into monitorReg
8795
generateRXInstruction(cg, loadOp, node, monitorReg, tempMR);
8796
generateRIInstruction(cg, loadHalfWordImmOp, node, wasteReg,
8797
normalLockWithReservationPreserving ? ~LOCK_RES_PRESERVE_ENTER_COMPLEMENT : ~OBJECT_HEADER_LOCK_RECURSION_MASK);
8798
8799
// In normal lock, we first increment the counter and then do the mask and comparison.
8800
// However, in lock preserving first we do mask and compare and then we increment the counter
8801
// We can do the same technique for both. The reason for current implementation is to expose less differences between
8802
// this implementation and other architecture implementations.
8803
if (!normalLockWithReservationPreserving)
8804
generateRIInstruction(cg, addImmOp, node, monitorReg, OBJECT_HEADER_LOCK_FIRST_RECURSION_BIT);
8805
// Mask out the counter value from lockword.
8806
generateRRInstruction(cg, andOp, node, wasteReg, monitorReg);
8807
if (normalLockWithReservationPreserving)
8808
{
8809
generateRRInstruction(cg,loadRegOp, node, lockPreservingReg, metaReg);
8810
generateRILInstruction(cg, orImmOp, node, lockPreservingReg, LOCK_RESERVATION_BIT);
8811
}
8812
8813
// The lock value (after masking out the counter) is being compared with R13 (or R13|LRbit for reservation preserving case)
8814
// to check whether the same thread has acquired the lock before.
8815
// if comparison fails (masked lock value != R13) that means another thread owns the lock.
8816
// In this case we call helper function and let the VM handle the situation.
8817
startICF = generateS390CompareAndBranchInstruction(cg, compareOp, node, wasteReg, normalLockWithReservationPreserving ? lockPreservingReg : metaReg, TR::InstOpCode::COND_BNE, callHelper, false, false);
8818
8819
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "%s/Recursive", debugCounterNamePrefix), 1, TR::DebugCounter::Undetermined);
8820
// In case of recursive lock, the counter should be incremented.
8821
if (normalLockWithReservationPreserving)
8822
generateRIInstruction(cg, addImmOp, node, monitorReg, OBJECT_HEADER_LOCK_FIRST_RECURSION_BIT);
8823
generateRXInstruction(cg, storeOp, node, monitorReg, tempMR1);
8824
8825
generateS390BranchInstruction(cg,TR::InstOpCode::BRC,TR::InstOpCode::COND_BRC,node,returnLabel);
8826
8827
tempMR->stopUsingMemRefRegister(cg);
8828
tempMR1->stopUsingMemRefRegister(cg);
8829
8830
// Helper Call
8831
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, callHelper);
8832
}
8833
8834
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "%s/VMHelper", debugCounterNamePrefix), 1, TR::DebugCounter::Undetermined);
8835
TR::RegisterDependencyConditions *deps = NULL;
8836
dummyResultReg = inlineRecursive ? helperLink->buildDirectDispatch(node, &deps) : helperLink->buildDirectDispatch(node);
8837
TR::RegisterDependencyConditions *mergeConditions = NULL;
8838
if (inlineRecursive)
8839
mergeConditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(conditions, deps, cg);
8840
else
8841
mergeConditions = conditions;
8842
generateS390LabelInstruction(cg,TR::InstOpCode::label,node,returnLabel,mergeConditions);
8843
8844
// End of OOl path.
8845
cursor = generateS390BranchInstruction(cg,TR::InstOpCode::BRC,TR::InstOpCode::COND_BRC,node,cFlowRegionEnd);
8846
if (debugObj)
8847
{
8848
debugObj->addInstructionComment(cursor, "Denotes end of OOL monent: return to mainline");
8849
}
8850
8851
// Done using OOL with manual code generation
8852
outlinedHelperCall->swapInstructionListsWithCompilation();
8853
8854
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, conditions);
8855
8856
cg->stopUsingRegister(monitorReg);
8857
if (wasteReg)
8858
cg->stopUsingRegister(wasteReg);
8859
if (objectClassReg)
8860
cg->stopUsingRegister(objectClassReg);
8861
if (lookupOffsetReg)
8862
cg->stopUsingRegister(lookupOffsetReg);
8863
if (tempRegister && (tempRegister != objectClassReg))
8864
cg->stopUsingRegister(tempRegister);
8865
if (lockPreservingReg)
8866
cg->stopUsingRegister(lockPreservingReg);
8867
cg->decReferenceCount(objNode);
8868
return NULL;
8869
}
8870
8871
TR::Register *
8872
J9::Z::TreeEvaluator::VMmonexitEvaluator(TR::Node * node, TR::CodeGenerator * cg)
8873
{
8874
TR::Compilation *comp = cg->comp();
8875
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
8876
int32_t lwOffset = fej9->getByteOffsetToLockword((TR_OpaqueClassBlock *) cg->getMonClass(node));
8877
J9::Z::CHelperLinkage *helperLink = static_cast<J9::Z::CHelperLinkage*>(cg->getLinkage(TR_CHelper));
8878
TR_YesNoMaybe isMonitorValueBasedOrValueType = cg->isMonitorValueBasedOrValueType(node);
8879
8880
if ((isMonitorValueBasedOrValueType == TR_yes) ||
8881
comp->getOption(TR_DisableInlineMonExit) ||
8882
comp->getOption(TR_FullSpeedDebug)) // Required for Live Monitor Meta Data in FSD.
8883
{
8884
TR::ILOpCodes opCode = node->getOpCodeValue();
8885
TR::Node::recreate(node, TR::call);
8886
TR::Register * targetRegister = helperLink->buildDirectDispatch(node);
8887
cg->decReferenceCount(node->getFirstChild());
8888
TR::Node::recreate(node, opCode);
8889
return targetRegister;
8890
}
8891
8892
TR::Node *objNode = node->getFirstChild();
8893
8894
8895
//TODO Use scratchRegisterManager here to avoid allocating un-necessary registers
8896
TR::Register *dummyResultRegister = NULL;
8897
TR::Register *objReg = cg->evaluate(objNode);
8898
TR::Register *baseReg = objReg;
8899
TR::Register *objectClassReg = NULL;
8900
TR::Register *lookupOffsetReg = NULL;
8901
TR::Register *tempRegister = NULL;
8902
TR::Register *monitorReg = cg->allocateRegister();
8903
TR::Register *metaReg = cg->getMethodMetaDataRealRegister();
8904
TR::Register *scratchRegister = NULL;
8905
TR::Instruction *startICF = NULL;
8906
8907
static char * disableInlineRecursiveMonitor = feGetEnv("TR_DisableInlineRecursiveMonitor");
8908
bool inlineRecursive = true;
8909
if (disableInlineRecursiveMonitor)
8910
inlineRecursive = false;
8911
8912
TR::LabelSymbol *callLabel = generateLabelSymbol(cg);
8913
TR::LabelSymbol *monitorLookupCacheLabel = generateLabelSymbol(cg);
8914
TR::LabelSymbol *cFlowRegionEnd = generateLabelSymbol(cg);
8915
TR::LabelSymbol *callHelper = generateLabelSymbol(cg);
8916
TR::LabelSymbol *returnLabel = generateLabelSymbol(cg);
8917
8918
int32_t numDeps = 4;
8919
if (lwOffset <=0)
8920
{
8921
numDeps +=2;
8922
if (comp->getOption(TR_EnableMonitorCacheLookup))
8923
{
8924
numDeps +=2; // extra one for lit pool reg in disableZ9 mode
8925
}
8926
}
8927
8928
if (comp->getOptions()->enableDebugCounters())
8929
numDeps += 4;
8930
8931
if (isMonitorValueBasedOrValueType == TR_maybe)
8932
{
8933
numDeps += 1;
8934
objectClassReg = generateCheckForValueMonitorEnterOrExit(node, cFlowRegionEnd, lwOffset <= 0 ? callLabel : NULL, cg);
8935
}
8936
TR::RegisterDependencyConditions * conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, numDeps, cg);
8937
8938
8939
TR::Instruction * gcPoint;
8940
TR_Debug * debugObj = cg->getDebug();
8941
8942
bool reserveLocking = false;
8943
bool normalLockWithReservationPreserving = false;
8944
bool simpleLocking = false;
8945
8946
8947
conditions->addPostCondition(objReg, TR::RealRegister::AssignAny);
8948
conditions->addPostCondition(monitorReg, TR::RealRegister::AssignAny);
8949
if (objectClassReg != NULL)
8950
conditions->addPostCondition(objectClassReg, TR::RealRegister::AssignAny);
8951
8952
8953
if (lwOffset <= 0)
8954
{
8955
inlineRecursive = false; // should not happen often, only on a subset of objects that don't have a lockword, set with option -Xlockword
8956
8957
TR::LabelSymbol *helperCallLabel = generateLabelSymbol(cg);
8958
TR::LabelSymbol *helperReturnOOLLabel = generateLabelSymbol(cg);
8959
TR::MemoryReference *tempMR = NULL;
8960
8961
if (objectClassReg == NULL)
8962
{
8963
tempMR = generateS390MemoryReference(objReg, TR::Compiler->om.offsetOfObjectVftField(), cg);
8964
objectClassReg = cg->allocateRegister();
8965
conditions->addPostCondition(objectClassReg, TR::RealRegister::AssignAny);
8966
TR::TreeEvaluator::genLoadForObjectHeadersMasked(cg, node, objectClassReg, tempMR, NULL);
8967
}
8968
int32_t offsetOfLockOffset = offsetof(J9Class, lockOffset);
8969
tempMR = generateS390MemoryReference(objectClassReg, offsetOfLockOffset, cg);
8970
8971
tempRegister = cg->allocateRegister();
8972
TR::LabelSymbol *targetLabel = callLabel;
8973
if (comp->getOption(TR_EnableMonitorCacheLookup))
8974
targetLabel = monitorLookupCacheLabel;
8975
8976
generateRXInstruction(cg, TR::InstOpCode::getLoadTestOpCode(), node, tempRegister, tempMR);
8977
8978
TR::Instruction *cmpInstr = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, node, targetLabel);
8979
8980
if(comp->target().is64Bit())
8981
generateRXInstruction(cg, TR::InstOpCode::LA, node, tempRegister, generateS390MemoryReference(objReg, tempRegister, 0, cg));
8982
else
8983
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, tempRegister, objReg);
8984
8985
if (comp->getOption(TR_EnableMonitorCacheLookup))
8986
{
8987
lookupOffsetReg = cg->allocateRegister();
8988
TR::RegisterDependencyConditions * OOLConditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 5, cg);
8989
OOLConditions->addPostCondition(objReg, TR::RealRegister::AssignAny);
8990
OOLConditions->addPostCondition(monitorReg, TR::RealRegister::AssignAny);
8991
// TODO Should be using SRM for tempRegister
8992
OOLConditions->addPostCondition(tempRegister, TR::RealRegister::AssignAny);
8993
OOLConditions->addPostCondition(lookupOffsetReg, TR::RealRegister::AssignAny);
8994
8995
8996
// pulling this chunk of code into OOL sequence for better Register allocation and avoid branches
8997
TR_S390OutOfLineCodeSection *monitorCacheLookupOOL = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(monitorLookupCacheLabel,cFlowRegionEnd,cg);
8998
cg->getS390OutOfLineCodeSectionList().push_front(monitorCacheLookupOOL);
8999
monitorCacheLookupOOL->swapInstructionListsWithCompilation();
9000
9001
TR::Instruction *cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, monitorLookupCacheLabel);
9002
9003
if (debugObj)
9004
{
9005
debugObj->addInstructionComment(cmpInstr, "Branch to OOL monexit monitorLookupCache");
9006
debugObj->addInstructionComment(cursor, "Denotes start of OOL monexit monitorLookupCache");
9007
}
9008
9009
9010
int32_t offsetOfMonitorLookupCache = offsetof(J9VMThread, objectMonitorLookupCache);
9011
int32_t t = trailingZeroes(TR::Compiler->om.getObjectAlignmentInBytes());
9012
int32_t shiftAmount = trailingZeroes((int32_t) TR::Compiler->om.sizeofReferenceField()) - t;
9013
int32_t end = 63 - trailingZeroes((int32_t) TR::Compiler->om.sizeofReferenceField());
9014
int32_t start = end - trailingZeroes(J9VMTHREAD_OBJECT_MONITOR_CACHE_SIZE) + 1;
9015
9016
if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_ZEC12) && comp->target().is64Bit())
9017
generateRIEInstruction(cg, TR::InstOpCode::RISBGN, node, lookupOffsetReg, objReg, start, end+0x80, shiftAmount);
9018
else if(comp->target().is64Bit())
9019
generateRIEInstruction(cg, TR::InstOpCode::RISBG, node, lookupOffsetReg, objReg, start, end+0x80, shiftAmount);
9020
else
9021
{
9022
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, lookupOffsetReg, objReg);
9023
9024
if (comp->target().is64Bit())
9025
generateRSInstruction(cg, TR::InstOpCode::SRAG, node, lookupOffsetReg, lookupOffsetReg, t);
9026
else
9027
generateRSInstruction(cg, TR::InstOpCode::SRA, node, lookupOffsetReg, t);
9028
9029
J9JavaVM * jvm = fej9->getJ9JITConfig()->javaVM;
9030
9031
if (comp->target().is32Bit())
9032
generateS390ImmOp(cg, TR::InstOpCode::getAndOpCode(), node, lookupOffsetReg, lookupOffsetReg, (int32_t) J9VMTHREAD_OBJECT_MONITOR_CACHE_SIZE - 1, OOLConditions, 0);
9033
else
9034
generateS390ImmOp(cg, TR::InstOpCode::getAndOpCode(), node, lookupOffsetReg, lookupOffsetReg, (int64_t) J9VMTHREAD_OBJECT_MONITOR_CACHE_SIZE - 1, OOLConditions, 0);
9035
9036
if (comp->target().is64Bit())
9037
generateRSInstruction(cg, TR::InstOpCode::SLLG, node, lookupOffsetReg, lookupOffsetReg, trailingZeroes((int32_t) TR::Compiler->om.sizeofReferenceField()));
9038
else
9039
generateRSInstruction(cg, TR::InstOpCode::SLL, node, lookupOffsetReg, trailingZeroes((int32_t) TR::Compiler->om.sizeofReferenceField()));
9040
}
9041
9042
// TODO No Need to use Memory Reference Here. Combine it with generateRXInstruction
9043
TR::MemoryReference * temp2MR = generateS390MemoryReference(cg->getMethodMetaDataRealRegister(), lookupOffsetReg, offsetOfMonitorLookupCache, cg);
9044
9045
if (TR::Compiler->om.compressObjectReferences())
9046
{
9047
generateRXInstruction(cg, TR::InstOpCode::LLGF, node, tempRegister, temp2MR, NULL);
9048
startICF = generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, tempRegister, NULLVALUE, TR::InstOpCode::COND_BE, helperCallLabel, false, true);
9049
}
9050
else
9051
{
9052
generateRXInstruction(cg, TR::InstOpCode::getLoadTestOpCode(), node, tempRegister, temp2MR);
9053
startICF = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, helperCallLabel);
9054
}
9055
9056
int32_t offsetOfMonitor = offsetof(J9ObjectMonitor, monitor);
9057
// TODO No Need to use Memory Reference Here. Combine it with generateRXInstruction
9058
temp2MR = generateS390MemoryReference(tempRegister, offsetOfMonitor, cg);
9059
generateRXInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, objReg, temp2MR);
9060
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, helperCallLabel);
9061
9062
int32_t offsetOfAlternateLockWord = offsetof(J9ObjectMonitor, alternateLockword);
9063
9064
baseReg = tempRegister;
9065
lwOffset = 0 + offsetOfAlternateLockWord;
9066
9067
// Check if the lockWord in the object contains our VMThread
9068
if (comp->target().is64Bit() && fej9->generateCompressedLockWord())
9069
generateRXInstruction(cg, TR::InstOpCode::C, node, metaReg, generateS390MemoryReference(baseReg, lwOffset, cg));
9070
else
9071
generateRXInstruction(cg, TR::InstOpCode::getCmpOpCode(), node, metaReg, generateS390MemoryReference(baseReg, lwOffset, cg));
9072
9073
// If VMThread does not match, call helper.
9074
TR::Instruction* helperBranch = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, helperCallLabel);
9075
9076
// If VMThread matches, we can safely perform the monitor exit by zero'ing
9077
// out the lockWord on the object
9078
if (comp->target().is64Bit() && fej9->generateCompressedLockWord())
9079
gcPoint = generateSILInstruction(cg, TR::InstOpCode::MVHI, node, generateS390MemoryReference(baseReg, lwOffset, cg), 0);
9080
else
9081
gcPoint = generateSILInstruction(cg, TR::InstOpCode::getMoveHalfWordImmOpCode(), node, generateS390MemoryReference(baseReg, lwOffset, cg), 0);
9082
9083
generateS390BranchInstruction(cg,TR::InstOpCode::BRC,TR::InstOpCode::COND_BRC,node,helperReturnOOLLabel);
9084
9085
generateS390LabelInstruction(cg, TR::InstOpCode::label , node, helperCallLabel );
9086
TR::RegisterDependencyConditions *deps = NULL;
9087
helperLink->buildDirectDispatch(node, &deps);
9088
TR::RegisterDependencyConditions *mergeConditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(OOLConditions, deps, cg);
9089
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, helperReturnOOLLabel , mergeConditions);
9090
9091
cursor = generateS390BranchInstruction(cg,TR::InstOpCode::BRC,TR::InstOpCode::COND_BRC,node,cFlowRegionEnd);
9092
if (debugObj)
9093
debugObj->addInstructionComment(cursor, "Denotes end of OOL monexit monitorCacheLookup: return to mainline");
9094
9095
// Done using OOL with manual code generation
9096
monitorCacheLookupOOL->swapInstructionListsWithCompilation();
9097
}
9098
9099
lwOffset = 0;
9100
baseReg = tempRegister;
9101
simpleLocking = true;
9102
}
9103
9104
// Lock Reservation happens only for objects with lockword.
9105
if (!simpleLocking && comp->getOption(TR_ReservingLocks))
9106
TR::TreeEvaluator::evaluateLockForReservation(node, &reserveLocking, &normalLockWithReservationPreserving, cg);
9107
if (reserveLocking)
9108
{
9109
// TODO - It would be much better to find a way not allocating these registers at the first place.
9110
cg->stopUsingRegister(monitorReg);
9111
return reservationLockExit(node, lwOffset, objectClassReg, cg, helperLink);
9112
}
9113
////////////
9114
// Opcodes:
9115
bool use64b = true;
9116
if (comp->target().is64Bit() && fej9->generateCompressedLockWord())
9117
use64b = false;
9118
else if (!comp->target().is64Bit())
9119
use64b = false;
9120
TR::InstOpCode::Mnemonic loadOp = use64b ? TR::InstOpCode::LG : TR::InstOpCode::L;
9121
TR::InstOpCode::Mnemonic loadRegOp = use64b ? TR::InstOpCode::LGR : TR::InstOpCode::LR;
9122
TR::InstOpCode::Mnemonic orImmOp = TR::InstOpCode::OILF;
9123
TR::InstOpCode::Mnemonic compareOp = use64b ? TR::InstOpCode::CGR : TR::InstOpCode::CR;
9124
TR::InstOpCode::Mnemonic compareImmOp = use64b ? TR::InstOpCode::CG : TR::InstOpCode::C;
9125
TR::InstOpCode::Mnemonic addImmOp = use64b ? TR::InstOpCode::AGHI : TR::InstOpCode::AHI;
9126
TR::InstOpCode::Mnemonic storeOp = use64b ? TR::InstOpCode::STG : TR::InstOpCode::ST;
9127
TR::InstOpCode::Mnemonic xorOp = use64b ? TR::InstOpCode::XGR : TR::InstOpCode::XR;
9128
TR::InstOpCode::Mnemonic casOp = use64b ? TR::InstOpCode::CSG : TR::InstOpCode::CS;
9129
TR::InstOpCode::Mnemonic loadImmOp = TR::InstOpCode::LGFI;
9130
TR::InstOpCode::Mnemonic andOp = use64b ? TR::InstOpCode::NGR : TR::InstOpCode::NR;
9131
TR::InstOpCode::Mnemonic andImmOp = TR::InstOpCode::NILF;
9132
TR::InstOpCode::Mnemonic moveImmOp = use64b ? TR::InstOpCode::MVGHI : TR::InstOpCode::MVHI;
9133
TR::InstOpCode::Mnemonic loadHalfWordImmOp = use64b ? TR::InstOpCode::LGHI : TR::InstOpCode::LHI;
9134
9135
// Main path instruction sequence.
9136
// This sequence is the same for both normal locks and lock preservation.
9137
// C metaReg, #lwOffset(objectReg)
9138
// BRC MASK6, callLabel
9139
// MVHI #lwOffset(objectReg), 0
9140
9141
//TODO - use compareAndBranch instruction
9142
// Check if the lockWord in the object contains our VMThread
9143
generateRXInstruction(cg, compareImmOp, node, metaReg, generateS390MemoryReference(baseReg, lwOffset, cg));
9144
// If VMThread does not match, call helper.
9145
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, callLabel);
9146
if (normalLockWithReservationPreserving)
9147
cg->generateDebugCounter("LockExit/Preserving/MVHISuccessfull", 1, TR::DebugCounter::Undetermined);
9148
else
9149
cg->generateDebugCounter("LockExit/Normal/MVHISuccessfull", 1, TR::DebugCounter::Undetermined);
9150
// If VMThread matches, we can safely perform the monitor exit by zero'ing
9151
// out the lockWord on the object
9152
generateSILInstruction(cg, moveImmOp, node, generateS390MemoryReference(baseReg, lwOffset, cg), 0);
9153
9154
TR_S390OutOfLineCodeSection *outlinedHelperCall = NULL;
9155
9156
outlinedHelperCall = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(callLabel,cFlowRegionEnd,cg);
9157
cg->getS390OutOfLineCodeSectionList().push_front(outlinedHelperCall);
9158
outlinedHelperCall->swapInstructionListsWithCompilation();
9159
9160
TR::Instruction *cursor = generateS390LabelInstruction(cg,TR::InstOpCode::label,node,callLabel);
9161
9162
if (inlineRecursive)
9163
{
9164
// inlineRecursive is only enabled when OOL is enabled
9165
if (debugObj)
9166
{
9167
debugObj->addInstructionComment(cursor, "Denotes start of OOL monexit sequence");
9168
}
9169
9170
// (on entry objectReg has been set up)
9171
9172
// Normal Lock Lock reservation preserving
9173
// L monitorReg, #lwOffset(objectReg) L monitorReg, #lwOffset(objectReg)
9174
// LHI wasteReg, ~LOCK_RECURSION_MASK LHI wasteReg, LOCK_OWNING_NON_INFLATED
9175
// AHI monitorReg, -INC_DEC_VALUE
9176
// NR wasteReg, monitorReg NR wasteReg, monitorReg
9177
// CRJ wasteReg, metaReg, MASK6, callHelper CRJ wasteReg, metaReg, MASK6, callHelper
9178
// LHI wasteReg, LOCK_RECURSION_MASK
9179
// NR wasteReg, monitorReg
9180
// BRC BERC, callHelper
9181
// LHI wasteReg, LOCK_OWNING_NON_INFLATED
9182
// NR wasteReg, monitorReg
9183
// CIJ wasteReg, callHelper, BERC, LOCK_RES_CONTENDED_VALUE
9184
// AHI monitorReg, -INC_DEC_VALUE
9185
// ST monitorReg, #lwOffset(objectReg) ST monitorReg, #lwOffset(objectReg)
9186
// BRC returnLabel BRC returnLabel
9187
// callHelper: callHelper:
9188
// BRASL R14,jitMonitorExit BRASL R14, jitMonitorExit
9189
// returnLabel: returnLabel:
9190
scratchRegister = cg->allocateRegister();
9191
conditions->addPostCondition(scratchRegister, TR::RealRegister::AssignAny);
9192
9193
TR::MemoryReference * tempMR = generateS390MemoryReference(baseReg, lwOffset, cg);
9194
TR::MemoryReference * tempMR1 = generateS390MemoryReference(baseReg, lwOffset, cg);
9195
if(!normalLockWithReservationPreserving)
9196
{
9197
generateRXInstruction(cg, loadOp, node, monitorReg, tempMR);
9198
generateRIInstruction(cg, loadHalfWordImmOp, node, scratchRegister, ~OBJECT_HEADER_LOCK_RECURSION_MASK);
9199
generateRIInstruction(cg, addImmOp, node, monitorReg, -OBJECT_HEADER_LOCK_FIRST_RECURSION_BIT);
9200
generateRRInstruction(cg, andOp, node, scratchRegister, monitorReg);
9201
startICF = generateS390CompareAndBranchInstruction(cg, compareOp, node, scratchRegister, metaReg, TR::InstOpCode::COND_BNE, callHelper, false, false);
9202
cg->generateDebugCounter("LockExit/Normal/Recursive", 1, TR::DebugCounter::Undetermined);
9203
generateRXInstruction(cg, storeOp, node, monitorReg, tempMR1);
9204
}
9205
else
9206
{
9207
generateRXInstruction(cg, loadOp, node, monitorReg, tempMR);
9208
generateRIInstruction(cg, loadHalfWordImmOp, node, scratchRegister, ~LOCK_OWNING_NON_INFLATED_COMPLEMENT);
9209
generateRRInstruction(cg, andOp, node, scratchRegister, monitorReg);
9210
startICF = generateS390CompareAndBranchInstruction(cg, compareOp, node, scratchRegister, metaReg, TR::InstOpCode::COND_BNE, callHelper, false, false);
9211
generateRIInstruction(cg, loadHalfWordImmOp, node, scratchRegister, OBJECT_HEADER_LOCK_RECURSION_MASK);
9212
generateRRInstruction(cg, andOp, node, scratchRegister, monitorReg);
9213
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, callHelper);
9214
generateRIInstruction(cg, loadHalfWordImmOp, node, scratchRegister, LOCK_OWNING_NON_INFLATED_COMPLEMENT);
9215
generateRRInstruction(cg, andOp, node, scratchRegister, monitorReg);
9216
generateS390CompareAndBranchInstruction(cg, compareImmOp, node, scratchRegister, LOCK_RES_CONTENDED_VALUE, TR::InstOpCode::COND_BE, callHelper, false, false);
9217
cg->generateDebugCounter("LockExit/Preserving/Recursive", 1, TR::DebugCounter::Undetermined);
9218
generateRIInstruction(cg, addImmOp, node, monitorReg, -OBJECT_HEADER_LOCK_FIRST_RECURSION_BIT);
9219
generateRXInstruction(cg, storeOp, node, monitorReg, tempMR1);
9220
}
9221
generateS390BranchInstruction(cg,TR::InstOpCode::BRC,TR::InstOpCode::COND_BRC,node,returnLabel);
9222
tempMR->stopUsingMemRefRegister(cg);
9223
tempMR1->stopUsingMemRefRegister(cg);
9224
9225
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, callHelper);
9226
if (normalLockWithReservationPreserving)
9227
cg->generateDebugCounter("LockExit/Preserving/VMHelper", 1, TR::DebugCounter::Undetermined);
9228
else
9229
cg->generateDebugCounter("LockExit/Normal/VMHelper", 1, TR::DebugCounter::Undetermined);
9230
}
9231
TR::RegisterDependencyConditions *deps = NULL;
9232
TR::Register *dummyResultReg = inlineRecursive ? helperLink->buildDirectDispatch(node, &deps) : helperLink->buildDirectDispatch(node);
9233
TR::RegisterDependencyConditions *mergeConditions = NULL;
9234
if (inlineRecursive)
9235
mergeConditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(conditions, deps, cg);
9236
else
9237
mergeConditions = conditions;
9238
9239
generateS390LabelInstruction(cg,TR::InstOpCode::label,node,returnLabel,mergeConditions);
9240
9241
cursor = generateS390BranchInstruction(cg,TR::InstOpCode::BRC,TR::InstOpCode::COND_BRC,node,cFlowRegionEnd);
9242
if (debugObj)
9243
{
9244
debugObj->addInstructionComment(cursor, "Denotes end of OOL monexit: return to mainline");
9245
}
9246
// Done using OOL with manual code generation
9247
outlinedHelperCall->swapInstructionListsWithCompilation();
9248
9249
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, conditions);
9250
9251
cg->stopUsingRegister(monitorReg);
9252
if (objectClassReg)
9253
cg->stopUsingRegister(objectClassReg);
9254
if (lookupOffsetReg)
9255
cg->stopUsingRegister(lookupOffsetReg);
9256
if (tempRegister && (tempRegister != objectClassReg))
9257
cg->stopUsingRegister(tempRegister);
9258
if (scratchRegister)
9259
cg->stopUsingRegister(scratchRegister);
9260
cg->decReferenceCount(objNode);
9261
9262
return NULL;
9263
}
9264
9265
static void
9266
roundArrayLengthToObjectAlignment(TR::CodeGenerator* cg, TR::Node* node, TR::Instruction*& iCursor, TR::Register* dataSizeReg,
9267
TR::RegisterDependencyConditions* conditions, TR::Register *litPoolBaseReg, int32_t allocSize, int32_t elementSize, TR::Register* sizeReg, TR::LabelSymbol * exitOOLLabel = NULL)
9268
{
9269
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
9270
int32_t alignmentConstant = TR::Compiler->om.getObjectAlignmentInBytes();
9271
if (exitOOLLabel)
9272
{
9273
TR_Debug * debugObj = cg->getDebug();
9274
iCursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, exitOOLLabel);
9275
//TODO if not outline stuff?
9276
if (debugObj)
9277
debugObj->addInstructionComment(iCursor, "Exit OOL, going back to main line");
9278
}
9279
9280
// Size of array is headerSize + dataSize. If either aren't
9281
// multiples of alignment then their sum likely won't be
9282
bool needsAlignment = ( ((allocSize % alignmentConstant) != 0) ||
9283
((elementSize % alignmentConstant) != 0) );
9284
9285
bool canCombineAGRs = ( ((allocSize % alignmentConstant) == 0) &&
9286
(elementSize < alignmentConstant));
9287
9288
if(!canCombineAGRs)
9289
iCursor = generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, sizeReg, dataSizeReg, iCursor);
9290
9291
if(needsAlignment)
9292
{
9293
iCursor = generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, sizeReg, alignmentConstant - 1 + allocSize, iCursor);
9294
if (cg->comp()->target().is64Bit())
9295
iCursor = generateS390ImmOp(cg, TR::InstOpCode::getAndOpCode(), node, sizeReg, sizeReg, -((int64_t) (alignmentConstant)), conditions, litPoolBaseReg);
9296
else
9297
iCursor = generateS390ImmOp(cg, TR::InstOpCode::getAndOpCode(), node, sizeReg, sizeReg, -((int32_t) (alignmentConstant)), conditions, litPoolBaseReg);
9298
}
9299
else
9300
{
9301
iCursor = generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, sizeReg, allocSize, iCursor);
9302
}
9303
}
9304
9305
9306
static void
9307
genHeapAlloc(TR::Node * node, TR::Instruction *& iCursor, bool isVariableLen, TR::Register * enumReg, TR::Register * resReg,
9308
TR::Register * zeroReg, TR::Register * dataSizeReg, TR::Register * sizeReg, TR::LabelSymbol * callLabel, int32_t allocSize,
9309
int32_t elementSize, TR::CodeGenerator * cg, TR::Register * litPoolBaseReg, TR::RegisterDependencyConditions * conditions,
9310
TR::Instruction *& firstBRCToOOL, TR::Instruction *& secondBRCToOOL, TR::LabelSymbol * exitOOLLabel = NULL)
9311
{
9312
TR::Compilation *comp = cg->comp();
9313
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
9314
if (!comp->getOptions()->realTimeGC())
9315
{
9316
TR::Register *metaReg = cg->getMethodMetaDataRealRegister();
9317
9318
// bool sizeInReg = (isVariableLen || (allocSize > MAX_IMMEDIATE_VAL));
9319
9320
int alignmentConstant = TR::Compiler->om.getObjectAlignmentInBytes();
9321
9322
if (isVariableLen)
9323
{
9324
if (exitOOLLabel)
9325
{
9326
TR_Debug * debugObj = cg->getDebug();
9327
iCursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, exitOOLLabel);
9328
if (debugObj)
9329
debugObj->addInstructionComment(iCursor, "Exit OOL, going back to main line");
9330
}
9331
// Detect large or negative number of elements, and call the helper in that case.
9332
// This 1MB limit comes from the cg.
9333
9334
TR::Register * tmp = sizeReg;
9335
if (allocSize % alignmentConstant == 0 && elementSize < alignmentConstant)
9336
{
9337
tmp = dataSizeReg;
9338
}
9339
9340
if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z196))
9341
{
9342
iCursor = generateRSInstruction(cg, TR::InstOpCode::SRAK, node, tmp, enumReg, 16, iCursor);
9343
}
9344
else
9345
{
9346
iCursor = generateRRInstruction(cg, TR::InstOpCode::LR, node, tmp, enumReg, iCursor);
9347
iCursor = generateRSInstruction(cg, TR::InstOpCode::SRA, node, tmp, 16, iCursor);
9348
}
9349
9350
iCursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, callLabel, iCursor);
9351
if(!firstBRCToOOL)
9352
{
9353
firstBRCToOOL = iCursor;
9354
}
9355
else
9356
{
9357
secondBRCToOOL = iCursor;
9358
}
9359
}
9360
9361
// We are loading up a partially constructed object. Don't let GC interfere with us
9362
// at this moment
9363
if (isVariableLen)
9364
{
9365
//Call helper to turn array length into size in bytes and do object alignment if necessary
9366
roundArrayLengthToObjectAlignment(cg, node, iCursor, dataSizeReg, conditions, litPoolBaseReg, allocSize, elementSize, sizeReg);
9367
9368
#if defined(J9VM_INTERP_FLAGS_IN_CLASS_SLOT)
9369
// All arrays in combo builds will always be at least 12 bytes in size in all specs:
9370
//
9371
// 1) class pointer + contig length + one or more elements
9372
// 2) class pointer + 0 + 0 (for zero length arrays)
9373
//
9374
//Since objects are aligned to 8 bytes then the minimum size for an array must be 16 after rounding
9375
9376
static_assert(J9_GC_MINIMUM_OBJECT_SIZE >= 8, "Expecting a minimum object size >= 8");
9377
#endif
9378
}
9379
9380
// Calculate the after-allocation heapAlloc: if the size is huge,
9381
// we need to check address wrap-around also. This is unsigned
9382
// integer arithmetic, checking carry bit is enough to detect it.
9383
// For variable length array, we did an up-front check already.
9384
9385
static char * disableInitClear = feGetEnv("TR_disableInitClear");
9386
static char * disableBatchClear = feGetEnv("TR_DisableBatchClear");
9387
9388
static char * useDualTLH = feGetEnv("TR_USEDUALTLH");
9389
9390
TR::Register * addressReg = NULL, * lengthReg = NULL, * shiftReg = NULL;
9391
if (disableBatchClear && disableInitClear==NULL)
9392
{
9393
addressReg = cg->allocateRegister();
9394
lengthReg = cg->allocateRegister();
9395
shiftReg = cg->allocateRegister();
9396
9397
if (conditions != NULL)
9398
{
9399
conditions->resetIsUsed();
9400
conditions->addPostCondition(addressReg, TR::RealRegister::AssignAny);
9401
conditions->addPostCondition(shiftReg, TR::RealRegister::AssignAny);
9402
conditions->addPostCondition(lengthReg, TR::RealRegister::AssignAny);
9403
}
9404
}
9405
9406
if (isVariableLen)
9407
{
9408
if (disableBatchClear && disableInitClear==NULL)
9409
iCursor = generateRRInstruction(cg, TR::InstOpCode::LGR, node, lengthReg, sizeReg, iCursor);
9410
if (!comp->getOption(TR_DisableDualTLH) && useDualTLH && node->canSkipZeroInitialization())
9411
{
9412
iCursor = generateRXInstruction(cg, TR::InstOpCode::getAddOpCode(), node, sizeReg,
9413
generateS390MemoryReference(metaReg, offsetof(J9VMThread, nonZeroHeapAlloc), cg), iCursor);
9414
}
9415
else
9416
{
9417
iCursor = generateRXInstruction(cg, TR::InstOpCode::getAddOpCode(), node, sizeReg,
9418
generateS390MemoryReference(metaReg, offsetof(J9VMThread, heapAlloc), cg), iCursor);
9419
}
9420
}
9421
else
9422
{
9423
if (comp->target().is64Bit())
9424
iCursor = genLoadLongConstant(cg, node, allocSize, sizeReg, iCursor, conditions);
9425
else
9426
iCursor = generateLoad32BitConstant(cg, node, allocSize, sizeReg, true, iCursor, conditions);
9427
9428
if (disableBatchClear && disableInitClear==NULL)
9429
iCursor = generateRRInstruction(cg, TR::InstOpCode::LGR, node, lengthReg, sizeReg, iCursor);
9430
9431
if (!comp->getOption(TR_DisableDualTLH) && useDualTLH && node->canSkipZeroInitialization())
9432
{
9433
iCursor = generateRXInstruction(cg, TR::InstOpCode::getAddOpCode(), node, sizeReg,
9434
generateS390MemoryReference(metaReg, offsetof(J9VMThread, nonZeroHeapAlloc), cg), iCursor);
9435
}
9436
else
9437
{
9438
iCursor = generateRXInstruction(cg, TR::InstOpCode::getAddOpCode(), node, sizeReg,
9439
generateS390MemoryReference(metaReg, offsetof(J9VMThread, heapAlloc), cg), iCursor);
9440
}
9441
9442
}
9443
9444
if (allocSize > cg->getMaxObjectSizeGuaranteedNotToOverflow())
9445
{
9446
iCursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BO, node, callLabel, iCursor);
9447
if(!firstBRCToOOL)
9448
{
9449
firstBRCToOOL = iCursor;
9450
}
9451
else
9452
{
9453
secondBRCToOOL = iCursor;
9454
}
9455
}
9456
9457
if (!comp->getOption(TR_DisableDualTLH) && useDualTLH && node->canSkipZeroInitialization())
9458
{
9459
iCursor = generateRXInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, sizeReg,
9460
generateS390MemoryReference(metaReg, offsetof(J9VMThread, nonZeroHeapTop), cg), iCursor);
9461
9462
// Moving the BRC before load so that the return object can be dead right after BRASL when heap alloc OOL opt is enabled
9463
iCursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BH, node, callLabel, iCursor);
9464
if(!firstBRCToOOL)
9465
{
9466
firstBRCToOOL = iCursor;
9467
}
9468
else
9469
{
9470
secondBRCToOOL = iCursor;
9471
}
9472
9473
9474
iCursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, resReg,
9475
generateS390MemoryReference(metaReg, offsetof(J9VMThread, nonZeroHeapAlloc), cg), iCursor);
9476
}
9477
else
9478
{
9479
iCursor = generateRXInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, sizeReg,
9480
generateS390MemoryReference(metaReg, offsetof(J9VMThread, heapTop), cg), iCursor);
9481
9482
// Moving the BRC before load so that the return object can be dead right after BRASL when heap alloc OOL opt is enabled
9483
iCursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BH, node, callLabel, iCursor);
9484
if(!firstBRCToOOL)
9485
{
9486
firstBRCToOOL = iCursor;
9487
}
9488
else
9489
{
9490
secondBRCToOOL = iCursor;
9491
}
9492
9493
9494
iCursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, resReg,
9495
generateS390MemoryReference(metaReg, offsetof(J9VMThread, heapAlloc), cg), iCursor);
9496
}
9497
9498
9499
if (!comp->getOption(TR_DisableDualTLH) && useDualTLH && node->canSkipZeroInitialization())
9500
iCursor = generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, sizeReg,
9501
generateS390MemoryReference(metaReg, offsetof(J9VMThread, nonZeroHeapAlloc), cg), iCursor);
9502
else
9503
iCursor = generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, sizeReg,
9504
generateS390MemoryReference(metaReg, offsetof(J9VMThread, heapAlloc), cg), iCursor);
9505
TR::LabelSymbol * fillerRemLabel = generateLabelSymbol(cg);
9506
TR::LabelSymbol * doneLabel = generateLabelSymbol(cg);
9507
9508
TR::LabelSymbol * fillerLoopLabel = generateLabelSymbol(cg);
9509
9510
// do this clear, if disableBatchClear is on
9511
if (disableBatchClear && disableInitClear==NULL) //&& (node->getOpCodeValue() == TR::anewarray) && (node->getFirstChild()->getInt()>0) && (node->getFirstChild()->getInt()<6) )
9512
{
9513
iCursor = generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, addressReg, resReg, iCursor);
9514
// Dont overwrite the class
9515
//
9516
iCursor = generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, shiftReg, lengthReg, iCursor);
9517
iCursor = generateRSInstruction(cg, TR::InstOpCode::SRA, node, shiftReg, 8);
9518
9519
iCursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BZ, node, fillerRemLabel, iCursor);
9520
iCursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, fillerLoopLabel);
9521
iCursor = generateSS1Instruction(cg, TR::InstOpCode::XC, node, 255, generateS390MemoryReference(addressReg, 0, cg), generateS390MemoryReference(addressReg, 0, cg), iCursor);
9522
9523
iCursor = generateRXInstruction(cg, TR::InstOpCode::LA, node, addressReg, generateS390MemoryReference(addressReg, 256, cg), iCursor);
9524
9525
iCursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRCT, node, shiftReg, fillerLoopLabel);
9526
iCursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, fillerRemLabel);
9527
9528
// and to only get the right 8 bits (remainder)
9529
iCursor = generateRIInstruction(cg, TR::InstOpCode::NILL, node, lengthReg, 0x00FF);
9530
iCursor = generateRIInstruction(cg, TR::InstOpCode::AHI, node, lengthReg, -1);
9531
// branch to done if length < 0
9532
9533
iCursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BL, node, doneLabel, iCursor);
9534
9535
iCursor = generateSS1Instruction(cg, TR::InstOpCode::XC, node, 0, generateS390MemoryReference(addressReg, 0, cg), generateS390MemoryReference(addressReg, 0, cg), iCursor);
9536
9537
// minus 1 from lengthreg since xc auto adds 1 to it
9538
9539
iCursor = generateEXDispatch(node, cg, lengthReg, shiftReg, iCursor);
9540
iCursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabel);
9541
}
9542
cg->stopUsingRegister(addressReg);
9543
cg->stopUsingRegister(shiftReg);
9544
cg->stopUsingRegister(lengthReg);
9545
9546
if (zeroReg != NULL)
9547
{
9548
iCursor = generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, zeroReg, zeroReg, iCursor);
9549
}
9550
}
9551
else
9552
{
9553
TR_ASSERT(0, "genHeapAlloc() not supported for RT");
9554
}
9555
}
9556
9557
9558
9559
static void
9560
genInitObjectHeader(TR::Node * node, TR::Instruction *& iCursor, TR_OpaqueClassBlock * classAddress, TR::Register * classReg, TR::Register * resReg,
9561
TR::Register * zeroReg, TR::Register * temp1Reg, TR::Register * litPoolBaseReg,
9562
TR::RegisterDependencyConditions * conditions,
9563
TR::CodeGenerator * cg, TR::Register * enumReg = NULL, bool canUseIIHF = false)
9564
{
9565
TR::Compilation *comp = cg->comp();
9566
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
9567
TR_J9VM *fej9vm = (TR_J9VM *)(comp->fe());
9568
if (!comp->getOptions()->realTimeGC())
9569
{
9570
J9ROMClass *romClass = 0;
9571
int32_t staticFlag = 0;
9572
uint32_t orFlag = 0;
9573
TR::Register *metaReg = cg->getMethodMetaDataRealRegister();
9574
TR_ASSERT(classAddress, "Cannot have a null OpaqueClassBlock\n");
9575
romClass = TR::Compiler->cls.romClassOf(classAddress);
9576
staticFlag = romClass->instanceShape;
9577
9578
// a pointer to the virtual register that will actually hold the class pointer.
9579
TR::Register * clzReg = classReg;
9580
// TODO: Following approach for initializing object header for array of objects in AOT is conservative.
9581
// We need support for relocation in generating RIL type instruction. If we have support, we can use
9582
// same sequence generated in JIT which saves us a load and store.
9583
if (comp->compileRelocatableCode())
9584
{
9585
if (node->getOpCodeValue() == TR::newarray)
9586
{
9587
iCursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, temp1Reg,
9588
generateS390MemoryReference(metaReg, offsetof(J9VMThread, javaVM), cg), iCursor);
9589
iCursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, temp1Reg,
9590
generateS390MemoryReference(temp1Reg,
9591
fej9vm->getPrimitiveArrayOffsetInJavaVM(node->getSecondChild()->getInt()), cg),
9592
iCursor);
9593
clzReg = temp1Reg;
9594
}
9595
else if (node->getOpCodeValue() == TR::anewarray)
9596
{
9597
TR_ASSERT(classReg, "must have a classReg for TR::anewarray in AOT mode");
9598
iCursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, temp1Reg,
9599
generateS390MemoryReference(classReg, offsetof(J9Class, arrayClass), cg), iCursor);
9600
clzReg = temp1Reg;
9601
//clzReg = classReg;
9602
}
9603
else
9604
{
9605
TR_ASSERT(node->getOpCodeValue() == TR::New && classReg,
9606
"must have a classReg for TR::New in AOT mode");
9607
clzReg = classReg;
9608
}
9609
}
9610
9611
// Store the class
9612
if (clzReg == NULL)
9613
{
9614
if (cg->wantToPatchClassPointer(classAddress, node))
9615
{
9616
iCursor = genLoadAddressConstantInSnippet(cg, node, (intptr_t) classAddress | (intptr_t)orFlag, temp1Reg, iCursor, conditions, litPoolBaseReg, true);
9617
if (orFlag != 0)
9618
{
9619
if (TR::Compiler->om.compressObjectReferences())
9620
iCursor = generateS390ImmOp(cg, TR::InstOpCode::O, node, temp1Reg, temp1Reg, (int32_t)orFlag, conditions, litPoolBaseReg);
9621
else
9622
{
9623
if (comp->target().is64Bit())
9624
iCursor = generateS390ImmOp(cg, TR::InstOpCode::OG, node, temp1Reg, temp1Reg, (int64_t)orFlag, conditions, litPoolBaseReg);
9625
else
9626
iCursor = generateS390ImmOp(cg, TR::InstOpCode::O, node, temp1Reg, temp1Reg, (int32_t)orFlag, conditions, litPoolBaseReg);
9627
}
9628
}
9629
}
9630
else
9631
{
9632
//case for arraynew and anewarray for compressedrefs and 31 bit
9633
/*
9634
* node->getOpCodeValue() == TR::newarray
9635
[0x484DF88C20] LGFI GPR15,674009856
9636
[0x484DF88DD8] ST GPR15,#613 0(GPR3)
9637
[0x484DF88F60] ST GPR2,#614 4(GPR3)
9638
9639
to
9640
IIHF
9641
STG GPR2,#614 0(GPR3)
9642
9643
*/
9644
9645
if (!canUseIIHF)
9646
iCursor = genLoadAddressConstant(cg, node, (intptr_t) classAddress | (intptr_t)orFlag, temp1Reg, iCursor, conditions, litPoolBaseReg);
9647
}
9648
if (canUseIIHF)
9649
{
9650
iCursor = generateRILInstruction(cg, TR::InstOpCode::IIHF, node, enumReg, static_cast<uint32_t>(reinterpret_cast<uintptr_t>(classAddress)) | orFlag, iCursor);
9651
}
9652
else
9653
{
9654
if (TR::Compiler->om.compressObjectReferences())
9655
// must store just 32 bits (class offset)
9656
9657
iCursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, temp1Reg,
9658
generateS390MemoryReference(resReg, (int32_t) TR::Compiler->om.offsetOfObjectVftField(), cg), iCursor);
9659
else
9660
iCursor = generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, temp1Reg,
9661
generateS390MemoryReference(resReg, (int32_t) TR::Compiler->om.offsetOfObjectVftField(), cg), iCursor);
9662
}
9663
}
9664
else
9665
{
9666
if (TR::Compiler->om.compressObjectReferences())
9667
// must store just 32 bits (class offset)
9668
iCursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, clzReg,
9669
generateS390MemoryReference(resReg, (int32_t) TR::Compiler->om.offsetOfObjectVftField(), cg), iCursor);
9670
else
9671
iCursor = generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, clzReg,
9672
generateS390MemoryReference(resReg, (int32_t) TR::Compiler->om.offsetOfObjectVftField(), cg), iCursor);
9673
}
9674
9675
#ifndef J9VM_INTERP_FLAGS_IN_CLASS_SLOT
9676
#if defined(J9VM_OPT_NEW_OBJECT_HASH)
9677
9678
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
9679
bool isStaticFlag = fej9->isStaticObjectFlags();
9680
9681
// If the object flags cannot be determined at compile time, we have to add a load
9682
// for it. And then, OR it with temp1Reg.
9683
if (isStaticFlag)
9684
{
9685
// The object flags can be determined at compile time.
9686
staticFlag |= fej9->getStaticObjectFlags();
9687
if (staticFlag != 0)
9688
{
9689
if (staticFlag >= MIN_IMMEDIATE_VAL && staticFlag <= MAX_IMMEDIATE_VAL)
9690
{
9691
iCursor = generateSILInstruction(cg, TR::InstOpCode::MVHI, node, generateS390MemoryReference(resReg, TMP_OFFSETOF_J9OBJECT_FLAGS, cg), staticFlag, iCursor);
9692
}
9693
else
9694
{
9695
iCursor = generateLoad32BitConstant(cg, node, staticFlag, temp1Reg, true, iCursor, conditions, litPoolBaseReg);
9696
// Store the flags
9697
iCursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, temp1Reg,
9698
generateS390MemoryReference(resReg, TMP_OFFSETOF_J9OBJECT_FLAGS, cg), iCursor);
9699
}
9700
}
9701
}
9702
else
9703
{
9704
// If the object flags cannot be determined at compile time, we add a load for it.
9705
if(!comp->getOption(TR_DisableDualTLH) && useDualTLH && node->canSkipZeroInitialization())
9706
iCursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, temp1Reg,
9707
generateS390MemoryReference(metaReg, offsetof(J9VMThread, nonZeroAllocateThreadLocalHeap.objectFlags), cg), iCursor);
9708
else
9709
iCursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, temp1Reg,
9710
generateS390MemoryReference(metaReg, offsetof(J9VMThread, allocateThreadLocalHeap.objectFlags), cg), iCursor);
9711
9712
// OR staticFlag with temp1Reg
9713
if (staticFlag)
9714
iCursor = generateS390ImmOp(cg, TR::InstOpCode::O, node, temp1Reg, temp1Reg, staticFlag, conditions, litPoolBaseReg);
9715
// Store the flags
9716
iCursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, temp1Reg,
9717
generateS390MemoryReference(resReg, TMP_OFFSETOF_J9OBJECT_FLAGS, cg), iCursor);
9718
//iCursor = generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, temp1Reg,
9719
// generateS390MemoryReference(resReg, TMP_OFFSETOF_J9OBJECT_FLAGS, cg), iCursor);
9720
}
9721
#endif /* J9VM_OPT_NEW_OBJECT_HASH */
9722
#endif /* FLAGS_IN_CLASS_SLOT */
9723
}
9724
else
9725
{
9726
TR_ASSERT(0, "genInitObjecHeader not supported for RT");
9727
}
9728
9729
}
9730
9731
9732
static void
9733
genAlignDoubleArray(TR::Node * node, TR::Instruction *& iCursor, bool isVariableLen, TR::Register * resReg, int32_t objectSize,
9734
int32_t dataBegin, TR::Register * dataSizeReg, TR::Register * temp1Reg, TR::Register * temp2Reg, TR::Register * litPoolBaseReg,
9735
TR::RegisterDependencyConditions * conditions, TR::CodeGenerator * cg)
9736
{
9737
TR::LabelSymbol * slotAtStart = generateLabelSymbol(cg);
9738
TR::LabelSymbol * doneAlign = generateLabelSymbol(cg);
9739
9740
iCursor = generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, temp1Reg, resReg, iCursor);
9741
iCursor = generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, temp2Reg, 3, iCursor);
9742
iCursor = generateS390ImmOp(cg, TR::InstOpCode::N, node, temp1Reg, temp1Reg, 7, conditions, litPoolBaseReg);
9743
iCursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNZ, node, slotAtStart, iCursor);
9744
9745
// The slop bytes are at the end of the allocated object.
9746
if (isVariableLen)
9747
{
9748
if (cg->comp()->target().is64Bit())
9749
{
9750
iCursor = generateRRInstruction(cg, TR::InstOpCode::LGFR, node, dataSizeReg, dataSizeReg, iCursor);
9751
}
9752
9753
iCursor = generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, temp2Reg,
9754
generateS390MemoryReference(resReg, dataSizeReg, dataBegin, cg), iCursor);
9755
}
9756
else if (objectSize >= MAXDISP)
9757
{
9758
iCursor = genLoadAddressConstant(cg, node, (intptr_t) objectSize, temp1Reg, iCursor, conditions);
9759
iCursor = generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, temp2Reg,
9760
generateS390MemoryReference(resReg, temp1Reg, 0, cg), iCursor);
9761
}
9762
else
9763
{
9764
iCursor = generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, temp2Reg,
9765
generateS390MemoryReference(resReg, objectSize, cg), iCursor);
9766
}
9767
iCursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, doneAlign, iCursor);
9768
9769
// the slop bytes are at the start of the allocation
9770
iCursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, slotAtStart, iCursor);
9771
iCursor = generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, temp2Reg,
9772
generateS390MemoryReference(resReg, (int32_t) 0, cg), iCursor);
9773
iCursor = generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, resReg, 4, iCursor);
9774
iCursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneAlign, iCursor);
9775
}
9776
9777
9778
static void
9779
genInitArrayHeader(TR::Node * node, TR::Instruction *& iCursor, bool isVariableLen, TR_OpaqueClassBlock * classAddress, TR::Register * classReg,
9780
TR::Register * resReg, TR::Register * zeroReg, TR::Register * eNumReg, TR::Register * dataSizeReg, TR::Register * temp1Reg,
9781
TR::Register * litPoolBaseReg, TR::RegisterDependencyConditions * conditions, TR::CodeGenerator * cg)
9782
{
9783
TR::Compilation *comp = cg->comp();
9784
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
9785
bool canUseIIHF= false;
9786
if (!comp->compileRelocatableCode() && (node->getOpCodeValue() == TR::newarray || node->getOpCodeValue() == TR::anewarray)
9787
&& (TR::Compiler->om.compressObjectReferences() || comp->target().is32Bit())
9788
#ifndef J9VM_INTERP_FLAGS_IN_CLASS_SLOT
9789
#if defined(J9VM_OPT_NEW_OBJECT_HASH)
9790
&& false
9791
#endif /* J9VM_OPT_NEW_OBJECT_HASH */
9792
#endif /* FLAGS_IN_CLASS_SLOT */
9793
)
9794
{
9795
canUseIIHF = true;
9796
}
9797
genInitObjectHeader(node, iCursor, classAddress, classReg, resReg, zeroReg, temp1Reg, litPoolBaseReg, conditions, cg, eNumReg, canUseIIHF);
9798
9799
// Store the array size
9800
if (canUseIIHF)
9801
{
9802
iCursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, eNumReg,
9803
generateS390MemoryReference(resReg, TR::Compiler->om.offsetOfObjectVftField(), cg), iCursor);
9804
}
9805
else
9806
iCursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, eNumReg,
9807
generateS390MemoryReference(resReg, fej9->getOffsetOfContiguousArraySizeField(), cg), iCursor);
9808
9809
static char * allocZeroArrayWithVM = feGetEnv("TR_VMALLOCZEROARRAY");
9810
static char * useDualTLH = feGetEnv("TR_USEDUALTLH");
9811
//write 0
9812
if(!comp->getOption(TR_DisableDualTLH) && useDualTLH && node->canSkipZeroInitialization() && allocZeroArrayWithVM == NULL)
9813
iCursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, eNumReg,
9814
generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg), iCursor);
9815
}
9816
9817
TR::Register *
9818
J9::Z::TreeEvaluator::VMnewEvaluator(TR::Node * node, TR::CodeGenerator * cg)
9819
{
9820
int32_t allocateSize, objectSize, dataBegin;
9821
TR::ILOpCodes opCode;
9822
TR_OpaqueClassBlock * classAddress = 0;
9823
TR::Register *classReg = NULL;
9824
TR::Register *resReg = NULL;
9825
TR::Register *zeroReg = NULL;
9826
TR::Register *litPoolBaseReg = NULL;
9827
TR::Register *enumReg = NULL;
9828
TR::Register *copyReg = NULL;
9829
TR::Register *classRegAOT = NULL;
9830
TR::Register *temp1Reg = NULL;
9831
TR::Register *callResult = NULL;
9832
TR::Register *dataSizeReg = NULL;
9833
TR::Node *litPoolBaseChild = NULL;
9834
TR::Register *copyClassReg = NULL;
9835
9836
TR_S390ScratchRegisterManager *srm = cg->generateScratchRegisterManager();
9837
9838
TR::LabelSymbol * callLabel, * cFlowRegionEnd;
9839
TR_S390OutOfLineCodeSection* outlinedSlowPath = NULL;
9840
TR::RegisterDependencyConditions * conditions;
9841
TR::Instruction * iCursor = NULL;
9842
bool isArray = false, isDoubleArray = false;
9843
bool isVariableLen;
9844
int32_t litPoolRegTotalUse, temp2RegTotalUse;
9845
int32_t elementSize;
9846
TR::Compilation *comp = cg->comp();
9847
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
9848
9849
9850
/* New Evaluator Optimization: Using OOL instead of snippet for heap alloc
9851
* The purpose of moving to an OOL from a snippet is that we don't need to
9852
* hard code dependencies at the merge label, hence it could possibly reduce
9853
* spills. When we have a snippet, then all registers used in between the
9854
* branch to the snippet and the merge label need to be assigned to specific
9855
* registers and added to the dependencies at the merge label.
9856
* Option to disable it: disableHeapAllocOOL */
9857
9858
/* Variables needed for Heap alloc OOL Opt */
9859
TR::Register * tempResReg;//Temporary register used to get the result from the BRASL call in heap alloc OOL
9860
TR::RegisterDependencyConditions * heapAllocDeps1;//Dependencies needed for BRASL call in heap alloc OOL
9861
TR::Instruction *firstBRCToOOL = NULL;
9862
TR::Instruction *secondBRCToOOL = NULL;
9863
9864
bool generateArraylets = comp->generateArraylets();
9865
9866
// in time, the tlh will probably always be batch cleared, and therefore it will not be
9867
// necessary for the JIT-generated inline code to do the clearing of fields. But, 2 things
9868
// have to happen first:
9869
// 1.The JVM has to change it's code so that it has batch clearing on for 390 (it is currently only
9870
// on if turned on as a runtime option)
9871
// 2.The JVM has to support the call - on z/OS, Modron GC is not enabled yet and so batch tlh clearing
9872
// can not be enabled yet.
9873
bool needZeroReg = !fej9->tlhHasBeenCleared();
9874
9875
opCode = node->getOpCodeValue();
9876
9877
// Since calls to canInlineAllocate could result in different results during the same compilation,
9878
// We must be conservative and only do inline allocation if the first call (in LocalOpts.cpp) has succeeded and we have the litPoolBaseChild added.
9879
// Refer to defects 161084 and 87089
9880
if (cg->doInlineAllocate(node)
9881
&& performTransformation(comp, "O^O Inlining Allocation of %s [0x%p].\n", node->getOpCode().getName(), node))
9882
{
9883
objectSize = comp->canAllocateInline(node, classAddress);
9884
isVariableLen = (objectSize == 0);
9885
allocateSize = objectSize;
9886
callLabel = generateLabelSymbol(cg);
9887
cFlowRegionEnd = generateLabelSymbol(cg);
9888
conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(10, 13, cg);
9889
if (!comp->getOption(TR_DisableHeapAllocOOL))
9890
{
9891
heapAllocDeps1 = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(2, 4, cg);
9892
}
9893
TR::Node * firstChild = node->getFirstChild();
9894
TR::Node * secondChild = NULL;
9895
9896
// load literal pool register
9897
if (((node->getNumChildren()==3) && ((node->getOpCodeValue()==TR::anewarray)
9898
|| (node->getOpCodeValue()==TR::newarray))) ||
9899
((node->getNumChildren()==2) && (node->getOpCodeValue()==TR::New)))
9900
{
9901
litPoolBaseChild=node->getLastChild();
9902
TR_ASSERT((litPoolBaseChild->getOpCodeValue()==TR::aload) || (litPoolBaseChild->getOpCodeValue()==TR::aRegLoad),
9903
"Literal pool base child expected\n");
9904
litPoolBaseReg=cg->evaluate(litPoolBaseChild);
9905
litPoolRegTotalUse = litPoolBaseReg->getTotalUseCount();
9906
}
9907
9908
//////////////////////////////////////////////////////////////////////////////////////////////////////
9909
///============================ STAGE 1: Evaluate Children ========================================///
9910
//////////////////////////////////////////////////////////////////////////////////////////////////////
9911
if (opCode == TR::New)
9912
{
9913
classReg = cg->evaluate(firstChild);
9914
dataBegin = TR::Compiler->om.objectHeaderSizeInBytes();
9915
}
9916
else
9917
{
9918
isArray = true;
9919
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
9920
if (generateArraylets || TR::Compiler->om.useHybridArraylets())
9921
{
9922
if (node->getOpCodeValue() == TR::newarray)
9923
elementSize = TR::Compiler->om.getSizeOfArrayElement(node);
9924
else if (comp->useCompressedPointers())
9925
elementSize = TR::Compiler->om.sizeofReferenceField();
9926
else
9927
elementSize = TR::Compiler->om.sizeofReferenceAddress();
9928
9929
if (generateArraylets)
9930
dataBegin = fej9->getArrayletFirstElementOffset(elementSize, comp);
9931
else
9932
dataBegin = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
9933
}
9934
else
9935
{
9936
dataBegin = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
9937
elementSize = TR::Compiler->om.getSizeOfArrayElement(node);
9938
}
9939
secondChild = node->getSecondChild();
9940
// For TR::newarray, classReg is not the real class actually.
9941
if (!comp->getOption(TR_DisableHeapAllocOOL))
9942
{
9943
/* Evaluate the second child node with info about the type of object in the mainline only
9944
* when it's an evaluation for anewarray or packed anewarray or if the second child's opcode
9945
* is not a load const. Otherwise, we evaluate the second child manually in OOL since it's
9946
* not used anywhere in the mainline, hence keeping a register unnecessarily live for a very
9947
* long time before it is killed. */
9948
9949
if (!secondChild->getOpCode().isLoadConst() || node->getOpCodeValue() == TR::anewarray)
9950
{
9951
classReg = cg->evaluate(secondChild);
9952
}
9953
}
9954
else
9955
{
9956
classReg = cg->evaluate(secondChild);
9957
}
9958
9959
// Potential helper call requires us to evaluate the arguments always.
9960
enumReg = cg->evaluate(firstChild);
9961
if (!cg->canClobberNodesRegister(firstChild))
9962
{
9963
copyReg = cg->allocateRegister();
9964
TR::InstOpCode::Mnemonic loadOpCode = (firstChild->getType().isInt64()) ? TR::InstOpCode::LGR : TR::InstOpCode::LR;
9965
iCursor = generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, copyReg, enumReg, iCursor);
9966
enumReg = copyReg;
9967
}
9968
}
9969
9970
//////////////////////////////////////////////////////////////////////////////////////////////////////
9971
///============================ STAGE 1: Setup Register Dependencies===============================///
9972
//////////////////////////////////////////////////////////////////////////////////////////////////////
9973
9974
temp1Reg = srm->findOrCreateScratchRegister();
9975
resReg = cg->allocateCollectedReferenceRegister();
9976
9977
if (needZeroReg)
9978
zeroReg = srm->findOrCreateScratchRegister();
9979
conditions->addPostCondition(classReg, TR::RealRegister::AssignAny);
9980
if (enumReg)
9981
{
9982
conditions->addPostCondition(enumReg, TR::RealRegister::AssignAny);
9983
traceMsg(comp,"enumReg = %s\n", enumReg->getRegisterName(comp));
9984
}
9985
conditions->addPostCondition(resReg, TR::RealRegister::AssignAny);
9986
traceMsg(comp, "classReg = %s , resReg = %s \n", classReg->getRegisterName(comp), resReg->getRegisterName(comp));
9987
/* VM helper function for heap alloc expects these parameters to have these values:
9988
* GPR1 -> Type of Object
9989
* GPR2 -> Size/Number of objects (if applicable) */
9990
// We don't need these many registers dependencies as Outlined path will only contain helper call
9991
TR::Register *copyEnumReg = enumReg;
9992
TR::Register *copyClassReg = classReg;
9993
9994
//////////////////////////////////////////////////////////////////////////////////////////////////////
9995
///============================ STAGE 2: Calculate Allocation Size ================================///
9996
//////////////////////////////////////////////////////////////////////////////////////////////////////
9997
// Three possible outputs:
9998
// if variable-length array - dataSizeReg will contain the (calculated) size
9999
// if outlined - tmpReg will contain the value of
10000
// otherwise - size is in (int) allocateSize
10001
int alignmentConstant = TR::Compiler->om.getObjectAlignmentInBytes();
10002
10003
if (isVariableLen)
10004
allocateSize += dataBegin;
10005
else
10006
allocateSize = (allocateSize + alignmentConstant - 1) & (-alignmentConstant);
10007
10008
TR::LabelSymbol * exitOOLLabel = NULL;
10009
10010
10011
if (isVariableLen)
10012
{
10013
10014
//want to fold some of the
10015
/*
10016
* figure out packed arrays
10017
* LTGFR GPR14,GPR2
10018
* SLLG GPR14,GPR14,1
10019
BRC BE(0x8), Snippet Label [0x484BD04470] <------combine LTGFR + SLLG to RSIBG
10020
10021
LR GPR15,GPR2
10022
SRA GPR15,16
10023
BRC MASK6(0x6), Snippet Label [0x484BD04470] # (Start of internal control flow)
10024
LG GPR3,#511 96(GPR13)
10025
1 AGHI GPR14,7 <---can combine 1 & 3 when allocateSize (8) is multiple of alignmentConstant (8), but need
10026
to re-arrange some of the registers, result is expected in GPR15
10027
2 NILF GPR14,-8
10028
3 LGHI GPR15,8
10029
4 AGR GPR15,GPR14
10030
5 AGR GPR15,GPR3
10031
CLG GPR15,#513 104(GPR13)
10032
10033
final:
10034
10035
*
10036
*/
10037
TR::Register * tmp = NULL;
10038
dataSizeReg = srm->findOrCreateScratchRegister();
10039
if (allocateSize % alignmentConstant == 0 && elementSize < alignmentConstant)
10040
{
10041
tmp = temp1Reg;
10042
}
10043
else
10044
{
10045
tmp = dataSizeReg;
10046
}
10047
10048
/* if (elementSize >= 2)
10049
{
10050
if (comp->target().is64Bit())
10051
iCursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, dataSizeReg, dataSizeReg, trailingZeroes(elementSize), iCursor);
10052
else
10053
iCursor = generateRSInstruction(cg, TR::InstOpCode::SLL, node, dataSizeReg, trailingZeroes(elementSize), iCursor);
10054
} */
10055
if (callLabel != NULL && (node->getOpCodeValue() == TR::anewarray ||
10056
node->getOpCodeValue() == TR::newarray))
10057
{
10058
TR_Debug * debugObj = cg->getDebug();
10059
TR::LabelSymbol * startOOLLabel = generateLabelSymbol(cg);
10060
exitOOLLabel = generateLabelSymbol(cg);
10061
TR_S390OutOfLineCodeSection *zeroSizeArrayChckOOL;
10062
if (comp->target().is64Bit())
10063
{
10064
//need 31 bit as well, combining lgfr + sllg into rsibg
10065
int32_t shift_amount = trailingZeroes(elementSize);
10066
iCursor = generateRIEInstruction(cg, TR::InstOpCode::RISBG, node, tmp, enumReg, (int8_t) (32 - shift_amount),
10067
(int8_t)((63 - shift_amount) |0x80), (int8_t) shift_amount);
10068
}
10069
else
10070
{
10071
iCursor = generateRRInstruction(cg, TR::InstOpCode::getLoadTestRegWidenOpCode(), node, tmp, enumReg, iCursor);
10072
if (elementSize >= 2)
10073
{
10074
if (comp->target().is64Bit())
10075
iCursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, tmp, tmp, trailingZeroes(elementSize), iCursor);
10076
else
10077
iCursor = generateRSInstruction(cg, TR::InstOpCode::SLL, node, tmp, trailingZeroes(elementSize), iCursor);
10078
}
10079
}
10080
10081
static char * allocZeroArrayWithVM = feGetEnv("TR_VMALLOCZEROARRAY");
10082
// DualTLH: Remove when performance confirmed
10083
static char * useDualTLH = feGetEnv("TR_USEDUALTLH");
10084
10085
if (comp->getOption(TR_DisableDualTLH) && useDualTLH || allocZeroArrayWithVM == NULL)
10086
{
10087
iCursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, startOOLLabel, iCursor);
10088
TR_Debug * debugObj = cg->getDebug();
10089
zeroSizeArrayChckOOL = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(startOOLLabel,exitOOLLabel,cg);
10090
cg->getS390OutOfLineCodeSectionList().push_front(zeroSizeArrayChckOOL);
10091
zeroSizeArrayChckOOL->swapInstructionListsWithCompilation();
10092
// Check to see if array-type is a super-class of the src object
10093
//
10094
TR::Instruction * cursor;
10095
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, startOOLLabel);
10096
if (debugObj)
10097
debugObj->addInstructionComment(cursor, "Denotes start of OOL for allocating zero size arrays");
10098
10099
/* using TR::Compiler->om.discontiguousArrayHeaderSizeInBytes() - TR::Compiler->om.contiguousArrayHeaderSizeInBytes()
10100
* for byte size for discontiguous 0 size arrays because later instructions do ( + 15 & -8) to round it to object size header and adding a j9 class header
10101
*
10102
*
10103
----------- OOL: Beginning of out-of-line code section ---------------
10104
Label [0x484BE2AC80]: ; Denotes start of OOL for allocating zero size arrays
10105
AGHI GPR_0x484BE2A900,16
10106
BRC J(0xf), Label [0x484BE2ACE0]
10107
--------------- OOL: End of out-of-line code section ------------------
10108
10109
Label [0x484BE2ACE0]: ; Exit OOL, going back to main line
10110
LR GPR_0x484BE2AAE0,GPR_0x484BE2A7A0
10111
SRA GPR_0x484BE2AAE0,16
10112
BRC MASK6(0x6), Snippet Label [0x484BE2A530] # (Start of internal control flow)
10113
AGHI GPR_0x484BE2A900,15 <----add 7 + 8
10114
NILF GPR_0x484BE2A900,-8 <---round to object size
10115
AG GPR_0x484BE2A900,#490 96(GPR13)
10116
10117
*/
10118
cursor = generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, tmp,
10119
TR::Compiler->om.discontiguousArrayHeaderSizeInBytes() - TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cursor);
10120
10121
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, exitOOLLabel,cursor);
10122
zeroSizeArrayChckOOL->swapInstructionListsWithCompilation();
10123
}
10124
else
10125
{
10126
iCursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, callLabel, iCursor);
10127
if(!firstBRCToOOL)
10128
{
10129
firstBRCToOOL = iCursor;
10130
}
10131
else
10132
{
10133
secondBRCToOOL = iCursor;
10134
}
10135
}
10136
}
10137
else
10138
{
10139
iCursor = generateRRInstruction(cg, TR::InstOpCode::getLoadRegWidenOpCode(), node, tmp, enumReg, iCursor);
10140
10141
if (elementSize >= 2)
10142
{
10143
if (comp->target().is64Bit())
10144
iCursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, tmp, tmp, trailingZeroes(elementSize), iCursor);
10145
else
10146
iCursor = generateRSInstruction(cg, TR::InstOpCode::SLL, node, tmp, trailingZeroes(elementSize), iCursor);
10147
}
10148
}
10149
10150
}
10151
10152
//////////////////////////////////////////////////////////////////////////////////////////////////////
10153
///============================ STAGE 3: Generate HeapTop Test=====================================///
10154
//////////////////////////////////////////////////////////////////////////////////////////////////////
10155
TR::Instruction *current;
10156
TR::Instruction *firstInstruction;
10157
srm->addScratchRegistersToDependencyList(conditions);
10158
10159
current = cg->getAppendInstruction();
10160
10161
TR_ASSERT(current != NULL, "Could not get current instruction");
10162
10163
static char * useDualTLH = feGetEnv("TR_USEDUALTLH");
10164
//Here we set up backout paths if we overflow nonZeroTLH in genHeapAlloc.
10165
//If we overflow the nonZeroTLH, set the destination to the right VM runtime helper (eg jitNewObjectNoZeroInit, etc...)
10166
//The zeroed-TLH versions have their correct destinations already setup in TR_ByteCodeIlGenerator::genNew, TR_ByteCodeIlGenerator::genNewArray, TR_ByteCodeIlGenerator::genANewArray
10167
//To retrieve the destination node->getSymbolReference() is used below after genHeapAlloc.
10168
if(!comp->getOption(TR_DisableDualTLH) && useDualTLH && node->canSkipZeroInitialization())
10169
{
10170
// For value types, the backout path should call jitNewValue helper call which is set up before code gen
10171
if ((node->getOpCodeValue() == TR::New)
10172
&& (!TR::Compiler->om.areValueTypesEnabled() || (node->getSymbolReference() != comp->getSymRefTab()->findOrCreateNewValueSymbolRef(comp->getMethodSymbol()))))
10173
node->setSymbolReference(comp->getSymRefTab()->findOrCreateNewObjectNoZeroInitSymbolRef(comp->getMethodSymbol()));
10174
else if (node->getOpCodeValue() == TR::newarray)
10175
node->setSymbolReference(comp->getSymRefTab()->findOrCreateNewArrayNoZeroInitSymbolRef(comp->getMethodSymbol()));
10176
else if (node->getOpCodeValue() == TR::anewarray)
10177
node->setSymbolReference(comp->getSymRefTab()->findOrCreateANewArrayNoZeroInitSymbolRef(comp->getMethodSymbol()));
10178
}
10179
10180
if (enumReg == NULL && opCode != TR::New)
10181
{
10182
enumReg = cg->allocateRegister();
10183
conditions->addPostCondition(enumReg, TR::RealRegister::AssignAny);
10184
traceMsg(comp,"enumReg = %s\n", enumReg->getRegisterName(comp));
10185
}
10186
// classReg and enumReg have to be intact still, in case we have to call the helper.
10187
// On return, zeroReg is set to 0, and dataSizeReg is set to the size of data area if
10188
// isVariableLen is true.
10189
genHeapAlloc(node, iCursor, isVariableLen, enumReg, resReg, zeroReg, dataSizeReg, temp1Reg, callLabel, allocateSize, elementSize, cg,
10190
litPoolBaseReg, conditions, firstBRCToOOL, secondBRCToOOL, exitOOLLabel);
10191
10192
//////////////////////////////////////////////////////////////////////////////////////////////////////
10193
///============================ STAGE 4: Generate Fall-back Path ==================================///
10194
//////////////////////////////////////////////////////////////////////////////////////////////////////
10195
/* New Evaluator Optimization: Using OOL instead of snippet for heap alloc */
10196
10197
/* Example of the OOL for newarray
10198
* Outlined Label L0048: ; Denotes start of OOL for heap alloc
10199
* LHI GPR_0120,0x5
10200
* assocreg
10201
* PRE:
10202
* {GPR2:GPR_0112:R} {GPR1:GPR_0120:R}
10203
* BRASL GPR_0117,0x00000000
10204
* POST:
10205
* {GPR1:D_GPR_0116:R}* {GPR14:GPR_0117:R} {GPR2:&GPR_0118:R}
10206
* LR &GPR_0115,&GPR_0118
10207
* BRC J(0xf), Label L0049*/
10208
10209
TR_Debug * debugObj = cg->getDebug();
10210
TR_S390OutOfLineCodeSection *heapAllocOOL = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(callLabel, cFlowRegionEnd, cg);
10211
cg->getS390OutOfLineCodeSectionList().push_front(heapAllocOOL);
10212
heapAllocOOL->swapInstructionListsWithCompilation();
10213
TR::Instruction * cursorHeapAlloc;
10214
// Generating OOL label: Outlined Label L00XX
10215
cursorHeapAlloc = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, callLabel);
10216
if (debugObj)
10217
debugObj->addInstructionComment(cursorHeapAlloc, "Denotes start of OOL for heap alloc");
10218
generateHelperCallForVMNewEvaluators(node, cg, true, resReg);
10219
/* Copying the return value from the temporary register to the actual register that is returned */
10220
/* Generating the branch to jump back to the merge label:
10221
* BRCL J(0xf), Label L00YZ, labelTargetAddr=0xZZZZZZZZ*/
10222
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);
10223
heapAllocOOL->swapInstructionListsWithCompilation();
10224
//////////////////////////////////////////////////////////////////////////////////////////////////////
10225
///============================ STAGE 5: Initialize the new object header ==========================///
10226
//////////////////////////////////////////////////////////////////////////////////////////////////////
10227
if (isArray)
10228
{
10229
if ( comp->compileRelocatableCode() && opCode == TR::anewarray)
10230
genInitArrayHeader(node, iCursor, isVariableLen, classAddress, classReg, resReg, zeroReg,
10231
enumReg, dataSizeReg, temp1Reg, litPoolBaseReg, conditions, cg);
10232
else
10233
genInitArrayHeader(node, iCursor, isVariableLen, classAddress, NULL, resReg, zeroReg,
10234
enumReg, dataSizeReg, temp1Reg, litPoolBaseReg, conditions, cg);
10235
10236
#ifdef TR_TARGET_64BIT
10237
/* Here we'll update dataAddr slot for both fixed and variable length arrays. Fixed length arrays are
10238
* simple as we just need to check first child of the node for array size. For variable length arrays
10239
* runtime size checks are needed to determine whether to use contiguous or discontiguous header layout.
10240
*
10241
* In both scenarios, arrays of non-zero size use contiguous header layout while zero size arrays use
10242
* discontiguous header layout.
10243
*/
10244
TR::Register *offsetReg = NULL;
10245
TR::MemoryReference *dataAddrMR = NULL;
10246
TR::MemoryReference *dataAddrSlotMR = NULL;
10247
10248
if (isVariableLen && TR::Compiler->om.compressObjectReferences())
10249
{
10250
/* We need to check enumReg (array size) at runtime to determine correct offset of dataAddr field.
10251
* Here we deal only with compressed refs because dataAddr offset for discontiguous
10252
* and contiguous arrays is the same in full refs.
10253
*/
10254
if (comp->getOption(TR_TraceCG))
10255
traceMsg(comp, "Node (%p): Dealing with compressed refs variable length array.\n", node);
10256
10257
TR_ASSERT_FATAL_WITH_NODE(node,
10258
(fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField()) == 8,
10259
"Offset of dataAddr field in discontiguous array is expected to be 8 bytes more than contiguous array. "
10260
"But was %d bytes for discontigous and %d bytes for contiguous array.\n",
10261
fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());
10262
10263
offsetReg = cg->allocateRegister();
10264
// Invert enumReg sign. 0 and negative numbers remain unchanged.
10265
iCursor = generateRREInstruction(cg, TR::InstOpCode::LNGFR, node, offsetReg, enumReg, iCursor);
10266
iCursor = generateRSInstruction(cg, TR::InstOpCode::SRLG, node, temp1Reg, offsetReg, 63, iCursor);
10267
iCursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, offsetReg, temp1Reg, 3, iCursor);
10268
// Inverting the sign bit will leave us with either -8 (if enumCopyReg > 0) or 0 (if enumCopyReg == 0).
10269
iCursor = generateRREInstruction(cg, TR::InstOpCode::LNGR, node, offsetReg, offsetReg, iCursor);
10270
10271
dataAddrMR = generateS390MemoryReference(resReg, offsetReg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg);
10272
dataAddrSlotMR = generateS390MemoryReference(resReg, offsetReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg);
10273
}
10274
else if (!isVariableLen && node->getFirstChild()->getOpCode().isLoadConst() && node->getFirstChild()->getInt() == 0)
10275
{
10276
if (comp->getOption(TR_TraceCG))
10277
traceMsg(comp, "Node (%p): Dealing with full/compressed refs fixed length zero size array.\n", node);
10278
10279
dataAddrMR = generateS390MemoryReference(resReg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg);
10280
dataAddrSlotMR = generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg);
10281
}
10282
else
10283
{
10284
if (comp->getOption(TR_TraceCG))
10285
{
10286
traceMsg(comp,
10287
"Node (%p): Dealing with either full/compressed refs fixed length non-zero size array or full refs variable length array.\n",
10288
node);
10289
}
10290
10291
if (!TR::Compiler->om.compressObjectReferences())
10292
{
10293
TR_ASSERT_FATAL_WITH_NODE(node,
10294
fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField(),
10295
"dataAddr field offset is expected to be same for both contiguous and discontiguous arrays in full refs. "
10296
"But was %d bytes for discontiguous and %d bytes for contiguous array.\n",
10297
fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());
10298
}
10299
10300
dataAddrMR = generateS390MemoryReference(resReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
10301
dataAddrSlotMR = generateS390MemoryReference(resReg, fej9->getOffsetOfContiguousDataAddrField(), cg);
10302
}
10303
10304
iCursor = generateRXInstruction(cg, TR::InstOpCode::LAY, node, temp1Reg, dataAddrMR, iCursor);
10305
iCursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, temp1Reg, dataAddrSlotMR, iCursor);
10306
10307
if (offsetReg)
10308
{
10309
conditions->addPostCondition(offsetReg, TR::RealRegister::AssignAny);
10310
cg->stopUsingRegister(offsetReg);
10311
}
10312
#endif /* TR_TARGET_64BIT */
10313
// Write Arraylet Pointer
10314
if (generateArraylets)
10315
{
10316
iCursor = generateS390ImmOp(cg, TR::InstOpCode::getAddOpCode(), node, temp1Reg, resReg, dataBegin, conditions, litPoolBaseReg);
10317
iCursor = generateS390ImmOp(cg, TR::InstOpCode::getAddOpCode(), node, temp1Reg, temp1Reg, -((int64_t)(0)), conditions, litPoolBaseReg);
10318
if(TR::Compiler->om.compressedReferenceShiftOffset() > 0)
10319
iCursor = generateRSInstruction(cg, TR::InstOpCode::SRL, node, temp1Reg, TR::Compiler->om.compressedReferenceShiftOffset(), iCursor);
10320
10321
iCursor = generateRXInstruction(cg, (comp->target().is64Bit()&& !comp->useCompressedPointers()) ? TR::InstOpCode::STG : TR::InstOpCode::ST, node, temp1Reg,
10322
generateS390MemoryReference(resReg, fej9->getOffsetOfContiguousArraySizeField(), cg), iCursor);
10323
10324
}
10325
}
10326
else
10327
{
10328
genInitObjectHeader(node, iCursor, classAddress, classReg , resReg, zeroReg, temp1Reg, litPoolBaseReg, conditions, cg);
10329
}
10330
10331
TR_ASSERT((fej9->tlhHasBeenCleared() || J9JIT_TOSS_CODE), "");
10332
10333
//////////////////////////////////////////////////////////////////////////////////////////////////////
10334
///============================ STAGE 5b: Prefetch after stores ===================================///
10335
//////////////////////////////////////////////////////////////////////////////////////////////////////
10336
if (cg->enableTLHPrefetching())
10337
{
10338
iCursor = generateS390MemInstruction(cg, TR::InstOpCode::PFD, node, 2, generateS390MemoryReference(resReg, 0x100, cg), iCursor);
10339
}
10340
10341
//////////////////////////////////////////////////////////////////////////////////////////////////////
10342
///============================ STAGE 6: AOT Relocation Records ===================================///
10343
//////////////////////////////////////////////////////////////////////////////////////////////////////
10344
if (comp->compileRelocatableCode() && (opCode == TR::New || opCode == TR::anewarray) )
10345
{
10346
firstInstruction = current->getNext();
10347
TR_RelocationRecordInformation *recordInfo =
10348
(TR_RelocationRecordInformation *) comp->trMemory()->allocateMemory(sizeof(TR_RelocationRecordInformation), heapAlloc);
10349
recordInfo->data1 = allocateSize;
10350
recordInfo->data2 = node->getInlinedSiteIndex();
10351
recordInfo->data3 = (uintptr_t) callLabel;
10352
recordInfo->data4 = (uintptr_t) firstInstruction;
10353
TR::SymbolReference * classSymRef;
10354
TR_ExternalRelocationTargetKind reloKind;
10355
TR_OpaqueClassBlock *classToValidate = classAddress;
10356
10357
if (opCode == TR::New)
10358
{
10359
classSymRef = node->getFirstChild()->getSymbolReference();
10360
reloKind = TR_VerifyClassObjectForAlloc;
10361
}
10362
else
10363
{
10364
classSymRef = node->getSecondChild()->getSymbolReference();
10365
reloKind = TR_VerifyRefArrayForAlloc;
10366
// In AOT without SVM, we validate the class by pulling it from the constant pool which is not the array class as anewarray bytecode refers to the component class.
10367
// In the evaluator we directly refer to the array class. In AOT with SVM we need to remember to validate the component class since relocation infrastructure is
10368
// expecting component class.
10369
if (comp->getOption(TR_UseSymbolValidationManager))
10370
classToValidate = comp->fej9()->getComponentClassFromArrayClass(classToValidate);
10371
}
10372
if (comp->getOption(TR_UseSymbolValidationManager))
10373
{
10374
TR_ASSERT_FATAL(classToValidate != NULL, "ClassToValidate Should not be NULL, clazz = %p\n", classAddress);
10375
recordInfo->data5 = (uintptr_t)classToValidate;
10376
}
10377
cg->addExternalRelocation(new (cg->trHeapMemory()) TR::BeforeBinaryEncodingExternalRelocation(firstInstruction,
10378
(uint8_t *) classSymRef,
10379
(uint8_t *) recordInfo,
10380
reloKind, cg),
10381
__FILE__, __LINE__, node);
10382
10383
}
10384
10385
//////////////////////////////////////////////////////////////////////////////////////////////////////
10386
///============================ STAGE 7: Done. Housekeeping items =================================///
10387
//////////////////////////////////////////////////////////////////////////////////////////////////////
10388
10389
// Add these registers to the dep list if they are actually used in the evaluator body
10390
// We detect use by observing if the totalUseCounts on the registers increased since their first
10391
// instance at the top of the evaluator.
10392
//
10393
if (litPoolBaseReg!=NULL && litPoolBaseReg->getTotalUseCount()>litPoolRegTotalUse)
10394
{
10395
// reset the isUSed bit on the condition, this prevents the assertion
10396
// "ERROR: cannot add conditions to an used dependency, create a copy first" from firing up.
10397
conditions->resetIsUsed();
10398
if (comp->getOption(TR_DisableHeapAllocOOL))
10399
conditions->addPostCondition(litPoolBaseReg, TR::RealRegister::AssignAny);
10400
}
10401
10402
if (!comp->getOption(TR_DisableHeapAllocOOL))
10403
{
10404
if (secondBRCToOOL)
10405
{
10406
TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);
10407
TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(cg);
10408
10409
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart, firstBRCToOOL->getPrev());
10410
cFlowRegionStart->setStartInternalControlFlow();
10411
10412
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, conditions, secondBRCToOOL);
10413
cFlowRegionEnd->setEndInternalControlFlow();
10414
}
10415
iCursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd);
10416
}
10417
else
10418
{
10419
// determine where internal control flow begins by looking for the first branch
10420
// instruction after where the label instruction would have been inserted
10421
TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);
10422
10423
TR::Instruction *next = current->getNext();
10424
while(next != NULL && !next->isBranchOp())
10425
next = next->getNext();
10426
TR_ASSERT(next != NULL && next->getPrev() != NULL, "Could not find branch instruction where internal control flow begins");
10427
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart, next->getPrev());
10428
cFlowRegionStart->setStartInternalControlFlow();
10429
10430
iCursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, conditions);
10431
cFlowRegionEnd->setEndInternalControlFlow();
10432
}
10433
10434
cg->decReferenceCount(firstChild);
10435
if (secondChild)
10436
{
10437
cg->decReferenceCount(secondChild);
10438
}
10439
if (litPoolBaseChild!=NULL)
10440
{
10441
cg->decReferenceCount(litPoolBaseChild);
10442
}
10443
10444
if (classReg)
10445
cg->stopUsingRegister(classReg);
10446
if (copyClassReg)
10447
cg->stopUsingRegister(copyClassReg);
10448
if (copyEnumReg != enumReg)
10449
cg->stopUsingRegister(copyEnumReg);
10450
if (enumReg)
10451
cg->stopUsingRegister(enumReg);
10452
if (copyReg)
10453
cg->stopUsingRegister(copyReg);
10454
srm->stopUsingRegisters();
10455
node->setRegister(resReg);
10456
return resReg;
10457
}
10458
else
10459
{
10460
// The call to doInlineAllocate may return true during LocalOpts, but subsequent optimizations may prove
10461
// that the anewarray cannot be allocated inline (i.e. it will end up going to helper). An example is
10462
// when arraysize is proven to be 0, which is considered a discontiguous array size in balanced mode GC.
10463
// In such cases, we need to remove the last litpool child before calling directCallEvaluator.
10464
if (((node->getNumChildren()==3) && ((node->getOpCodeValue()==TR::anewarray) || (node->getOpCodeValue()==TR::newarray))) ||
10465
((node->getNumChildren()==2) && (node->getOpCodeValue()==TR::New)))
10466
{
10467
// Remove the last literal pool child.
10468
node->removeLastChild();
10469
}
10470
return generateHelperCallForVMNewEvaluators(node, cg);
10471
}
10472
}
10473
10474
TR::Register *
10475
J9::Z::TreeEvaluator::VMarrayCheckEvaluator(TR::Node *node, TR::CodeGenerator *cg)
10476
{
10477
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
10478
TR::Node *object1 = node->getFirstChild();
10479
TR::Node *object2 = node->getSecondChild();
10480
TR::Register *object1Reg = cg->evaluate(object1);
10481
TR::Register *object2Reg = cg->evaluate(object2);
10482
10483
TR::LabelSymbol *cFlowRegionStart = generateLabelSymbol(cg);
10484
TR::LabelSymbol *fallThrough = generateLabelSymbol(cg);
10485
TR::LabelSymbol *snippetLabel = NULL;
10486
TR::Snippet *snippet = NULL;
10487
TR::Register *tempReg = cg->allocateRegister();
10488
TR::Register *tempClassReg = cg->allocateRegister();
10489
TR::InstOpCode::Mnemonic loadOpcode;
10490
TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 7, cg);
10491
10492
10493
// If the objects are the same and one of them is known to be an array, they
10494
// are compatible.
10495
//
10496
if (node->isArrayChkPrimitiveArray1() ||
10497
node->isArrayChkReferenceArray1() ||
10498
node->isArrayChkPrimitiveArray2() ||
10499
node->isArrayChkReferenceArray2())
10500
{
10501
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
10502
cFlowRegionStart->setStartInternalControlFlow();
10503
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::getCmpRegOpCode(), node, object1Reg, object2Reg, TR::InstOpCode::COND_BE, fallThrough, false, false);
10504
}
10505
10506
else
10507
{
10508
// Neither object is known to be an array
10509
// Check that object 1 is an array. If not, throw exception.
10510
//
10511
TR::Register * class1Reg = cg->allocateRegister();
10512
TR::TreeEvaluator::genLoadForObjectHeadersMasked(cg, node, class1Reg, generateS390MemoryReference(object1Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), NULL);
10513
10514
// TODO: Can we check the value of J9AccClassRAMArray and use NILF here?
10515
#ifdef TR_HOST_64BIT
10516
genLoadLongConstant(cg, node, J9AccClassRAMArray, tempReg, NULL, deps, NULL);
10517
generateRXInstruction(cg, TR::InstOpCode::NG, node, tempReg,
10518
new (cg->trHeapMemory()) TR::MemoryReference(class1Reg, offsetof(J9Class, classDepthAndFlags), cg));
10519
#else
10520
generateLoad32BitConstant(cg, node, J9AccClassRAMArray, tempReg, true, NULL, deps, NULL);
10521
generateRXInstruction(cg, TR::InstOpCode::N, node, tempReg,
10522
new (cg->trHeapMemory()) TR::MemoryReference(class1Reg, offsetof(J9Class, classDepthAndFlags), cg));
10523
#endif
10524
cg->stopUsingRegister(class1Reg);
10525
10526
if (!snippetLabel)
10527
{
10528
snippetLabel = generateLabelSymbol(cg);
10529
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
10530
cFlowRegionStart->setStartInternalControlFlow();
10531
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BZ, node, snippetLabel);
10532
10533
snippet = new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference());
10534
cg->addSnippet(snippet);
10535
}
10536
else
10537
{
10538
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
10539
cFlowRegionStart->setStartInternalControlFlow();
10540
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BZ, node, snippetLabel);
10541
}
10542
}
10543
10544
// Test equality of the object classes.
10545
//
10546
TR::TreeEvaluator::genLoadForObjectHeaders(cg, node, tempReg, generateS390MemoryReference(object1Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), NULL);
10547
10548
if (TR::Compiler->om.compressObjectReferences())
10549
generateRXInstruction(cg, TR::InstOpCode::X, node, tempReg, generateS390MemoryReference(object2Reg, TR::Compiler->om.offsetOfObjectVftField(), cg));
10550
else
10551
generateRXInstruction(cg, TR::InstOpCode::getXOROpCode(), node, tempReg, generateS390MemoryReference(object2Reg, TR::Compiler->om.offsetOfObjectVftField(), cg));
10552
10553
TR::TreeEvaluator::generateVFTMaskInstruction(node, tempReg, cg);
10554
10555
// XOR doesn't set the proper condition codes, so test explicitly
10556
generateRIInstruction(cg, TR::InstOpCode::getCmpHalfWordImmOpCode(), node, tempReg, 0);
10557
10558
// If either object is known to be a primitive array, we are done. Either
10559
// the equality test fails and we throw the exception or it succeeds and
10560
// we finish.
10561
//
10562
if (node->isArrayChkPrimitiveArray1() || node->isArrayChkPrimitiveArray2())
10563
{
10564
if (!snippetLabel)
10565
{
10566
snippetLabel = generateLabelSymbol(cg);
10567
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, snippetLabel);
10568
10569
snippet = new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference());
10570
cg->addSnippet(snippet);
10571
}
10572
else
10573
{
10574
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, snippetLabel);
10575
}
10576
}
10577
10578
// Otherwise, there is more testing to do. If the classes are equal we
10579
// are done, and branch to the fallThrough label.
10580
//
10581
else
10582
{
10583
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, fallThrough);
10584
10585
// If either object is not known to be a reference array type, check it
10586
// We already know that object1 is an array type but we may have to now
10587
// check object2.
10588
//
10589
if (!node->isArrayChkReferenceArray1())
10590
{
10591
TR::TreeEvaluator::genLoadForObjectHeadersMasked(cg, node, tempClassReg, generateS390MemoryReference(object1Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), NULL);
10592
10593
// ramclass->classDepth&flags
10594
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, tempReg,
10595
new (cg->trHeapMemory()) TR::MemoryReference(tempClassReg, offsetof(J9Class, classDepthAndFlags), cg));
10596
10597
// X = (ramclass->ClassDepthAndFlags)>>J9AccClassRAMShapeShift
10598
generateRSInstruction(cg, TR::InstOpCode::SRL, node, tempReg, J9AccClassRAMShapeShift);
10599
10600
// X & OBJECT_HEADER_SHAPE_MASK
10601
generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, tempClassReg, tempClassReg);
10602
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, tempClassReg, OBJECT_HEADER_SHAPE_MASK);
10603
generateRRInstruction(cg, TR::InstOpCode::NR, node, tempClassReg, tempReg);
10604
10605
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, tempReg, OBJECT_HEADER_SHAPE_POINTERS);
10606
10607
if (!snippetLabel)
10608
{
10609
snippetLabel = generateLabelSymbol(cg);
10610
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CR, node, tempReg, tempClassReg, TR::InstOpCode::COND_BNZ, snippetLabel, false, false);
10611
10612
snippet = new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference());
10613
cg->addSnippet(snippet);
10614
}
10615
else
10616
{
10617
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CR, node, tempReg, tempClassReg, TR::InstOpCode::COND_BNZ, snippetLabel, false, false);
10618
}
10619
}
10620
if (!node->isArrayChkReferenceArray2())
10621
{
10622
// Check that object 2 is an array. If not, throw exception.
10623
TR::TreeEvaluator::genLoadForObjectHeadersMasked(cg, node, tempClassReg, generateS390MemoryReference(object2Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), NULL);
10624
10625
// TODO: Can we check the value of J9AccClassRAMArray and use NILF here?
10626
#ifdef TR_HOST_64BIT
10627
{
10628
genLoadLongConstant(cg, node, J9AccClassRAMArray, tempReg, NULL, deps, NULL);
10629
generateRXInstruction(cg, TR::InstOpCode::NG, node, tempReg,
10630
new (cg->trHeapMemory()) TR::MemoryReference(tempClassReg, offsetof(J9Class, classDepthAndFlags), cg));
10631
}
10632
#else
10633
{
10634
generateLoad32BitConstant(cg, node, J9AccClassRAMArray, tempReg, true, NULL, deps, NULL);
10635
generateRXInstruction(cg, TR::InstOpCode::N, node, tempReg,
10636
new (cg->trHeapMemory()) TR::MemoryReference(tempClassReg, offsetof(J9Class, classDepthAndFlags), cg));
10637
}
10638
#endif
10639
if (!snippetLabel)
10640
{
10641
snippetLabel = generateLabelSymbol(cg);
10642
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BZ, node, snippetLabel);
10643
10644
snippet = new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, snippetLabel, node->getSymbolReference());
10645
cg->addSnippet(snippet);
10646
}
10647
else
10648
{
10649
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BZ, node, snippetLabel);
10650
}
10651
10652
//* Test object2 is reference array
10653
TR::TreeEvaluator::genLoadForObjectHeadersMasked(cg, node, tempClassReg, generateS390MemoryReference(object2Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), NULL);
10654
10655
// ramclass->classDepth&flags
10656
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, tempReg,
10657
new (cg->trHeapMemory()) TR::MemoryReference(tempClassReg, offsetof(J9Class, classDepthAndFlags), cg));
10658
10659
// X = (ramclass->ClassDepthAndFlags)>>J9AccClassRAMShapeShift
10660
generateRSInstruction(cg, TR::InstOpCode::SRL, node, tempReg, J9AccClassRAMShapeShift);
10661
10662
// X & OBJECT_HEADER_SHAPE_MASK
10663
generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, tempClassReg, tempClassReg);
10664
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, tempClassReg,OBJECT_HEADER_SHAPE_MASK);
10665
generateRRInstruction(cg, TR::InstOpCode::NR, node, tempClassReg, tempReg);
10666
10667
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, tempReg, OBJECT_HEADER_SHAPE_POINTERS);
10668
10669
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CR, node, tempReg, tempClassReg, TR::InstOpCode::COND_BNZ, snippetLabel, false, false);
10670
}
10671
10672
// Now both objects are known to be reference arrays, so they are
10673
// compatible for arraycopy.
10674
}
10675
10676
// Now generate the fall-through label
10677
//
10678
deps->addPostCondition(object1Reg, TR::RealRegister::AssignAny);
10679
deps->addPostConditionIfNotAlreadyInserted(object2Reg, TR::RealRegister::AssignAny); // 1st and 2nd object may be the same.
10680
deps->addPostCondition(tempReg, TR::RealRegister::AssignAny);
10681
deps->addPostCondition(tempClassReg, TR::RealRegister::AssignAny);
10682
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, fallThrough, deps);
10683
fallThrough->setEndInternalControlFlow();
10684
10685
cg->stopUsingRegister(tempClassReg);
10686
cg->stopUsingRegister(tempReg);
10687
cg->decReferenceCount(object1);
10688
cg->decReferenceCount(object2);
10689
10690
return 0;
10691
}
10692
10693
10694
10695
/////////////////////////////////////////////////////////////////////////////////
10696
/////////////////////////////////////////////////////////////////////////////////
10697
static bool inlineIsAssignableFrom(TR::Node *node, TR::CodeGenerator *cg)
10698
{
10699
static char *disable = feGetEnv("TR_disableInlineIsAssignableFrom");
10700
TR::Compilation *comp = cg->comp();
10701
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
10702
10703
if (disable)
10704
return false;
10705
10706
TR::Node *thisClass = node->getFirstChild();
10707
if (thisClass->getOpCodeValue() == TR::aloadi &&
10708
thisClass->getFirstChild()->getOpCodeValue() == TR::loadaddr)
10709
{
10710
TR::SymbolReference *thisClassSymRef = thisClass->getFirstChild()->getSymbolReference();
10711
10712
if (thisClassSymRef->isClassInterface(comp) || thisClassSymRef->isClassAbstract(comp))
10713
{
10714
return false;
10715
}
10716
}
10717
10718
int32_t classDepth = -1;
10719
TR::Node *javaLangClassFrom = node->getFirstChild();
10720
if((javaLangClassFrom->getOpCodeValue() == TR::aloadi
10721
&& javaLangClassFrom->getSymbolReference() == comp->getSymRefTab()->findJavaLangClassFromClassSymbolRef()
10722
&& javaLangClassFrom->getFirstChild()->getOpCodeValue() == TR::loadaddr))
10723
{
10724
TR::Node *castClassRef =javaLangClassFrom->getFirstChild();
10725
10726
TR::SymbolReference *castClassSymRef = NULL;
10727
if(castClassRef->getOpCode().hasSymbolReference())
10728
castClassSymRef= castClassRef->getSymbolReference();
10729
10730
TR::StaticSymbol *castClassSym = NULL;
10731
if (castClassSymRef && !castClassSymRef->isUnresolved())
10732
castClassSym= castClassSymRef ? castClassSymRef->getSymbol()->getStaticSymbol() : NULL;
10733
10734
TR_OpaqueClassBlock * clazz = NULL;
10735
if (castClassSym)
10736
clazz = (TR_OpaqueClassBlock *) castClassSym->getStaticAddress();
10737
10738
if(clazz)
10739
classDepth = (int32_t)TR::Compiler->cls.classDepthOf(clazz);
10740
}
10741
10742
TR::Register *returnRegister = NULL;
10743
TR::SymbolReference *symRef = node->getSymbolReference();
10744
TR::MethodSymbol *callSymbol = symRef->getSymbol()->castToMethodSymbol();
10745
10746
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
10747
// startLabel->setStartInternalControlFlow();
10748
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
10749
TR::LabelSymbol *failLabel = generateLabelSymbol(cg);
10750
TR::LabelSymbol *outlinedCallLabel = generateLabelSymbol(cg);
10751
// doneLabel->setEndInternalControlFlow();
10752
10753
TR::Register *thisClassReg = cg->evaluate(node->getFirstChild());
10754
TR::Register *checkClassReg = cg->evaluate(node->getSecondChild());
10755
10756
TR::RegisterDependencyConditions * deps = NULL;
10757
10758
10759
TR::Register *tempReg = cg->allocateRegister();
10760
TR::Register *objClassReg, *castClassReg, *scratch1Reg,*scratch2Reg;
10761
int8_t numOfPostDepConditions = (thisClassReg == checkClassReg)? 2 : 3;
10762
10763
10764
if (classDepth != -1)
10765
{
10766
deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, numOfPostDepConditions+4, cg);
10767
objClassReg = cg->allocateRegister();
10768
castClassReg = cg->allocateRegister();
10769
scratch1Reg = cg->allocateRegister();
10770
scratch2Reg = cg->allocateRegister();
10771
deps->addPostCondition(scratch1Reg, TR::RealRegister::AssignAny);
10772
deps->addPostCondition(scratch2Reg, TR::RealRegister::AssignAny);
10773
deps->addPostCondition(castClassReg, TR::RealRegister::AssignAny);
10774
deps->addPostCondition(objClassReg, TR::RealRegister::AssignAny);
10775
10776
}
10777
else
10778
{
10779
deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, numOfPostDepConditions, cg);
10780
objClassReg = tempReg;
10781
}
10782
10783
deps->addPostCondition(thisClassReg, TR::RealRegister::AssignAny);
10784
if (thisClassReg != checkClassReg)
10785
{
10786
deps->addPostCondition(checkClassReg, TR::RealRegister::AssignAny);
10787
}
10788
deps->addPostCondition(tempReg, TR::RealRegister::AssignAny);
10789
10790
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, startLabel);
10791
10792
generateRRInstruction(cg, TR::InstOpCode::getLoadTestRegOpCode(), node, thisClassReg, thisClassReg);
10793
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, outlinedCallLabel);
10794
generateRRInstruction(cg, TR::InstOpCode::getLoadTestRegOpCode(), node, checkClassReg, checkClassReg);
10795
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, outlinedCallLabel);
10796
10797
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, objClassReg,
10798
generateS390MemoryReference(checkClassReg, fej9->getOffsetOfClassFromJavaLangClassField(), cg));
10799
10800
generateRXInstruction(cg, TR::InstOpCode::getCmpLogicalOpCode(), node, objClassReg,
10801
generateS390MemoryReference(thisClassReg, fej9->getOffsetOfClassFromJavaLangClassField(), cg));
10802
10803
generateRIInstruction(cg, TR::InstOpCode::LHI, node, tempReg, 1);
10804
10805
TR_Debug * debugObj = cg->getDebug();
10806
if (classDepth != -1)
10807
{
10808
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, doneLabel);
10809
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, castClassReg,
10810
generateS390MemoryReference(thisClassReg, fej9->getOffsetOfClassFromJavaLangClassField(), cg));
10811
10812
genTestIsSuper(cg, node, objClassReg, castClassReg, scratch1Reg, scratch2Reg, tempReg, NULL, classDepth, failLabel, doneLabel, NULL, deps, NULL, false, NULL, NULL);
10813
10814
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, doneLabel);
10815
}
10816
else
10817
{
10818
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, outlinedCallLabel);
10819
}
10820
10821
10822
TR_S390OutOfLineCodeSection *outlinedHelperCall = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(node, TR::icall, tempReg, outlinedCallLabel, doneLabel, cg);
10823
cg->getS390OutOfLineCodeSectionList().push_front(outlinedHelperCall);
10824
outlinedHelperCall->generateS390OutOfLineCodeSectionDispatch();
10825
10826
10827
cg->decReferenceCount(node->getFirstChild());
10828
cg->decReferenceCount(node->getSecondChild());
10829
10830
node->setRegister(tempReg);
10831
10832
if (classDepth != -1)
10833
{
10834
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, failLabel, deps);
10835
generateRIInstruction(cg, TR::InstOpCode::LHI, node, tempReg, 0);
10836
10837
cg->stopUsingRegister(objClassReg);
10838
cg->stopUsingRegister(castClassReg);
10839
cg->stopUsingRegister(scratch1Reg);
10840
cg->stopUsingRegister(scratch2Reg);
10841
}
10842
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, deps);
10843
10844
return true;
10845
}
10846
10847
10848
bool
10849
J9::Z::TreeEvaluator::VMinlineCallEvaluator(TR::Node * node, bool indirect, TR::CodeGenerator * cg)
10850
{
10851
TR::ResolvedMethodSymbol * methodSymbol = node->getSymbol()->getResolvedMethodSymbol();
10852
10853
if (!methodSymbol)
10854
{
10855
return false;
10856
}
10857
10858
10859
bool callWasInlined = false;
10860
if (methodSymbol)
10861
{
10862
switch (methodSymbol->getRecognizedMethod())
10863
{
10864
case TR::java_lang_Class_isAssignableFrom:
10865
{
10866
callWasInlined = inlineIsAssignableFrom(node, cg);
10867
break;
10868
}
10869
}
10870
}
10871
10872
return callWasInlined;
10873
}
10874
10875
void
10876
J9::Z::TreeEvaluator::genGuardedLoadOOL(TR::Node *node, TR::CodeGenerator *cg,
10877
TR::Register *byteSrcReg, TR::Register *byteDstReg,
10878
TR::Register *byteLenReg, TR::LabelSymbol *mergeLabel,
10879
TR_S390ScratchRegisterManager *srm, bool isForward)
10880
{
10881
TR::LabelSymbol* slowPathLabel = generateLabelSymbol(cg);
10882
TR::Register *vmReg = cg->getMethodMetaDataRealRegister();
10883
auto baseMemRef = generateS390MemoryReference(vmReg, TR::Compiler->vm.thisThreadGetEvacuateBaseAddressOffset(cg->comp()), cg);
10884
generateSILInstruction(cg, cg->comp()->useCompressedPointers() ? TR::InstOpCode::CHSI : TR::InstOpCode::CGHSI, node, baseMemRef, -1);
10885
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, slowPathLabel);
10886
10887
TR_S390OutOfLineCodeSection* outOfLineCodeSection = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(slowPathLabel, mergeLabel, cg);
10888
cg->getS390OutOfLineCodeSectionList().push_front(outOfLineCodeSection);
10889
outOfLineCodeSection->swapInstructionListsWithCompilation();
10890
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, slowPathLabel);
10891
10892
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(cg->comp(), "readBar/arraycopy/OOL"), 1, TR::DebugCounter::Cheap);
10893
10894
// Call to generateMemToMemElementCopy generates core Array Copy sequence and identify starting instruction in ICF.
10895
TR::RegisterDependencyConditions *loopDeps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 10, cg);
10896
TR::TreeEvaluator::generateMemToMemElementCopy(node, cg, byteSrcReg, byteDstReg, byteLenReg, srm, isForward, true, false, loopDeps);
10897
10898
TR::LabelSymbol *doneOOLLabel = generateLabelSymbol(cg);
10899
loopDeps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(loopDeps, 0, 3+srm->numAvailableRegisters(), cg);
10900
loopDeps->addPostCondition(byteSrcReg, TR::RealRegister::AssignAny);
10901
loopDeps->addPostCondition(byteDstReg, TR::RealRegister::AssignAny);
10902
loopDeps->addPostCondition(byteLenReg, TR::RealRegister::AssignAny);
10903
srm->addScratchRegistersToDependencyList(loopDeps);
10904
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneOOLLabel, loopDeps);
10905
doneOOLLabel->setEndInternalControlFlow();
10906
10907
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, mergeLabel);
10908
outOfLineCodeSection->swapInstructionListsWithCompilation();
10909
}
10910
10911
void
10912
J9::Z::TreeEvaluator::genArrayCopyWithArrayStoreCHK(TR::Node* node,
10913
TR::Register *srcObjReg,
10914
TR::Register *dstObjReg,
10915
TR::Register *srcAddrReg,
10916
TR::Register *dstAddrReg,
10917
TR::Register *lengthReg,
10918
TR::CodeGenerator *cg)
10919
{
10920
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
10921
10922
TR::RegisterDependencyConditions * deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(9, 9, cg);
10923
TR::LabelSymbol * doneLabel = generateLabelSymbol(cg);
10924
TR::LabelSymbol * callLabel = generateLabelSymbol(cg);
10925
TR::LabelSymbol * OKLabel = generateLabelSymbol(cg);
10926
TR::Linkage * linkage = cg->getLinkage(node->getSymbol()->castToMethodSymbol()->getLinkageConvention());
10927
TR::SystemLinkage *sysLink = (TR::SystemLinkage *) cg->getLinkage(TR_System);
10928
10929
TR::RealRegister *sspRegReal = sysLink->getStackPointerRealRegister();
10930
TR::Register *sspReg;
10931
10932
TR::Compilation *comp = cg->comp();
10933
10934
if (sspRegReal->getState() == TR::RealRegister::Locked)
10935
{
10936
sspReg = sspRegReal;
10937
}
10938
else
10939
{
10940
sspReg = cg->allocateRegister();
10941
}
10942
10943
TR::Register *helperReg = cg->allocateRegister();
10944
int32_t offset = sysLink->getOffsetToFirstParm();
10945
int32_t ptrSize = (int32_t)TR::Compiler->om.sizeofReferenceAddress();
10946
10947
// Set the following parms in C parm area
10948
// 1) VM Thread
10949
// 2) srcObj
10950
// 3) dstObj
10951
// 4) srcAddr
10952
// 5) dstAddr
10953
// 6) num of slots
10954
// 7) VM referenceArrayCopy func desc
10955
TR::Register *metaReg = cg->getMethodMetaDataRealRegister();
10956
10957
if (sspRegReal->getState() != TR::RealRegister::Locked)
10958
{
10959
deps->addPreCondition(sspReg, TR::RealRegister::GPR4);
10960
deps->addPostCondition(sspReg, TR::RealRegister::GPR4);
10961
}
10962
if (cg->supportsJITFreeSystemStackPointer())
10963
{
10964
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, sspReg,
10965
generateS390MemoryReference(metaReg, (int32_t)(fej9->thisThreadGetSystemSPOffset()), cg));
10966
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, helperReg, 0);
10967
generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, helperReg,
10968
generateS390MemoryReference(metaReg, (int32_t)(fej9->thisThreadGetSystemSPOffset()), cg));
10969
}
10970
10971
// Ready parameter 5: count reg
10972
TR::Register *countReg = cg->allocateRegister();
10973
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, countReg, lengthReg);
10974
generateRSInstruction(cg, TR::InstOpCode::SRL, node, countReg, trailingZeroes(TR::Compiler->om.sizeofReferenceField()));
10975
10976
// Ready parameter 6: helper reg
10977
intptr_t *funcdescrptr = (intptr_t*) fej9->getReferenceArrayCopyHelperAddress();
10978
if (comp->compileRelocatableCode() || comp->isOutOfProcessCompilation())
10979
{
10980
generateRegLitRefInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, helperReg, (intptr_t)funcdescrptr, TR_ArrayCopyHelper, NULL, NULL, NULL);
10981
}
10982
else
10983
{
10984
genLoadAddressConstant(cg, node, (long) funcdescrptr, helperReg);
10985
}
10986
10987
// Store 7 parameters
10988
generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, metaReg,
10989
generateS390MemoryReference(sspReg, offset+0*ptrSize, cg));
10990
generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, srcObjReg,
10991
generateS390MemoryReference(sspReg, offset+1*ptrSize, cg));
10992
generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, dstObjReg,
10993
generateS390MemoryReference(sspReg, offset+2*ptrSize, cg));
10994
generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, srcAddrReg,
10995
generateS390MemoryReference(sspReg, offset+3*ptrSize, cg));
10996
generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, dstAddrReg,
10997
generateS390MemoryReference(sspReg, offset+4*ptrSize, cg));
10998
generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, countReg,
10999
generateS390MemoryReference(sspReg, offset+5*ptrSize, cg));
11000
generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, helperReg,
11001
generateS390MemoryReference(sspReg, offset+6*ptrSize, cg));
11002
11003
cg->stopUsingRegister(countReg);
11004
cg->stopUsingRegister(helperReg);
11005
11006
TR::Register *rcReg = cg->allocateRegister();
11007
TR::Register *raReg = cg->allocateRegister();
11008
TR::Register *tmpReg = cg->allocateRegister();
11009
TR::Register *R2SaveReg = cg->allocateRegister();
11010
11011
TR::SymbolReference* helperCallSymRef = cg->symRefTab()->findOrCreateRuntimeHelper(TR_S390referenceArrayCopyHelper);
11012
TR::Snippet * helperCallSnippet = new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, callLabel,
11013
helperCallSymRef, doneLabel);
11014
cg->addSnippet(helperCallSnippet);
11015
11016
// The snippet kill r14 and may kill r15, the rc is in r2
11017
deps->addPostCondition(rcReg, linkage->getIntegerReturnRegister());
11018
deps->addPostCondition(raReg, linkage->getReturnAddressRegister());
11019
deps->addPostCondition(tmpReg, linkage->getEntryPointRegister());
11020
11021
TR::Instruction *gcPoint =
11022
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, callLabel);
11023
gcPoint->setNeedsGCMap(0xFFFFFFFF);
11024
11025
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabel);
11026
11027
if (cg->supportsJITFreeSystemStackPointer())
11028
{
11029
generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, sspReg,
11030
generateS390MemoryReference(metaReg, (int32_t)(fej9->thisThreadGetSystemSPOffset()), cg));
11031
}
11032
11033
if (sspRegReal->getState() != TR::RealRegister::Locked)
11034
{
11035
cg->stopUsingRegister(sspReg);
11036
}
11037
11038
generateRIInstruction(cg, TR::InstOpCode::getCmpHalfWordImmOpCode(), node, rcReg, 65535);
11039
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, OKLabel);
11040
11041
// raise exceptions
11042
TR::SymbolReference *throwSymRef = comp->getSymRefTab()->findOrCreateArrayStoreExceptionSymbolRef(comp->getJittedMethodSymbol());
11043
TR::LabelSymbol *exceptionSnippetLabel = cg->lookUpSnippet(TR::Snippet::IsHelperCall, throwSymRef);
11044
if (exceptionSnippetLabel == NULL)
11045
{
11046
exceptionSnippetLabel = generateLabelSymbol(cg);
11047
cg->addSnippet(new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, exceptionSnippetLabel, throwSymRef));
11048
}
11049
11050
gcPoint = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, exceptionSnippetLabel);
11051
gcPoint->setNeedsGCMap(0xFFFFFFFF);
11052
11053
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, OKLabel, deps);
11054
11055
cg->stopUsingRegister(raReg);
11056
cg->stopUsingRegister(tmpReg);
11057
cg->stopUsingRegister(rcReg);
11058
cg->stopUsingRegister(R2SaveReg);
11059
11060
return;
11061
}
11062
11063
void
11064
J9::Z::TreeEvaluator::restoreGPR7(TR::Node *node, TR::CodeGenerator *cg)
11065
{
11066
TR::MemoryReference * tempMR = generateS390MemoryReference(cg->getMethodMetaDataRealRegister(), offsetof(J9VMThread, tempSlot), cg);
11067
TR::Register * tempReg = cg->allocateRegister();
11068
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, cg->machine()->getRealRegister(TR::RealRegister::GPR7), tempMR);
11069
}
11070
11071
void J9::Z::TreeEvaluator::genWrtbarForArrayCopy(TR::Node *node, TR::Register *srcReg, TR::Register *dstReg, bool srcNonNull, TR::CodeGenerator *cg)
11072
{
11073
TR::Instruction * cursor;
11074
TR::RegisterDependencyConditions * conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 4, cg);
11075
TR::Compilation * comp = cg->comp();
11076
11077
auto gcMode = TR::Compiler->om.writeBarrierType();
11078
bool doWrtBar = (gcMode == gc_modron_wrtbar_oldcheck || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_always);
11079
// Do not do card marking when gcMode is gc_modron_wrtbar_cardmark_and_oldcheck - we go through helper, which performs CM, so it is redundant.
11080
bool doCrdMrk = (gcMode == gc_modron_wrtbar_cardmark || gcMode == gc_modron_wrtbar_cardmark_incremental);
11081
TR::LabelSymbol * doneLabel = generateLabelSymbol(cg);
11082
11083
if (doWrtBar)
11084
{
11085
TR::Register * tempReg = cg->allocateRegister();
11086
TR::Register * tempReg2 = cg->allocateRegister();
11087
TR::SymbolReference * wbref = comp->getSymRefTab()->findOrCreateWriteBarrierBatchStoreSymbolRef(comp->getMethodSymbol());
11088
11089
TR::Register * srcObjReg = srcReg;
11090
TR::Register * dstObjReg;
11091
// It's possible to have srcReg and dstReg point to same array
11092
// If so, we need to copy before calling helper
11093
if (srcReg == dstReg){
11094
dstObjReg = cg->allocateRegister();
11095
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, dstObjReg, dstReg);
11096
}
11097
else
11098
dstObjReg = dstReg;
11099
11100
conditions->addPostCondition(tempReg, cg->getReturnAddressRegister());
11101
conditions->addPostCondition(tempReg2, cg->getEntryPointRegister());
11102
conditions->addPostCondition(dstObjReg, TR::RealRegister::GPR1);
11103
11104
/*** Start of VMnonNullSrcWrtBarEvaluator ***********************/
11105
// 83613: If this condition changes, please verify that the inline CM
11106
// conditions are still correct. Currently, we don't perform inline CM
11107
// for old&CM objects, since this wrtbarEvaluator will call the helper,which
11108
// also performs CM.
11109
11110
// check for old space or color black (fej9->getWriteBarrierGCFlagMaskAsByte())
11111
//
11112
// object layout
11113
// -------------
11114
// |class_pointer|
11115
// -------------
11116
// |***** flag|
11117
// -------------
11118
// .....
11119
//
11120
// flag is in the lower 2 bytes in a 8 byte slot on 64 bit obj.(4 byte slot in 32bit obj)
11121
// so the offset should be ...
11122
11123
if (gcMode != gc_modron_wrtbar_always)
11124
{
11125
bool is64Bit = comp->target().is64Bit();
11126
bool isConstantHeapBase = !comp->getOptions()->isVariableHeapBaseForBarrierRange0();
11127
bool isConstantHeapSize = !comp->getOptions()->isVariableHeapSizeForBarrierRange0();
11128
TR::Register * temp1Reg = cg->allocateRegister();
11129
11130
conditions->addPostCondition(temp1Reg, TR::RealRegister::AssignAny);
11131
11132
TR::MemoryReference * offset = generateS390MemoryReference(cg->getMethodMetaDataRealRegister(), offsetof(J9VMThread, heapBaseForBarrierRange0), cg);
11133
TR::MemoryReference * size = generateS390MemoryReference(cg->getMethodMetaDataRealRegister(), offsetof(J9VMThread, heapSizeForBarrierRange0), cg);
11134
generateRRInstruction(cg, is64Bit ? TR::InstOpCode::LGR : TR::InstOpCode::LR, node, temp1Reg, dstObjReg);
11135
generateRXInstruction(cg, is64Bit ? TR::InstOpCode::SG : TR::InstOpCode::S, node, temp1Reg, offset);
11136
generateRXInstruction(cg, is64Bit ? TR::InstOpCode::CLG : TR::InstOpCode::CL, node, temp1Reg, size);
11137
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BH, node, doneLabel);
11138
cg->stopUsingRegister(temp1Reg);
11139
// if not match, callout to the helper
11140
}
11141
11142
generateDirectCall(cg, node, false, wbref, conditions);
11143
/*** End Of *****************************************************/
11144
cg->stopUsingRegister(tempReg);
11145
cg->stopUsingRegister(tempReg2);
11146
if (srcReg == dstReg)
11147
cg->stopUsingRegister(dstObjReg);
11148
}
11149
11150
else if (doCrdMrk)
11151
{
11152
if (!comp->getOptions()->realTimeGC())
11153
{
11154
TR::Register * temp1Reg = cg->allocateRegister();
11155
conditions->addPostCondition(temp1Reg, TR::RealRegister::AssignAny);
11156
conditions->addPostCondition(dstReg, TR::RealRegister::AssignAny);
11157
VMCardCheckEvaluator(node, dstReg, temp1Reg, conditions, cg, false, doneLabel);
11158
cg->stopUsingRegister(temp1Reg);
11159
}
11160
else
11161
TR_ASSERT(0, "genWrtbarForArrayCopy card marking not supported for RT");
11162
}
11163
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, conditions);
11164
}
11165
11166
TR::Register*
11167
J9::Z::TreeEvaluator::VMinlineCompareAndSwap(TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic casOp, bool isObj)
11168
{
11169
TR::Register *scratchReg = NULL;
11170
TR::Register *objReg, *oldVReg, *newVReg;
11171
TR::Register *resultReg = cg->allocateRegister();
11172
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
11173
TR::MemoryReference* casMemRef = NULL;
11174
11175
TR::Compilation * comp = cg->comp();
11176
11177
TR::Node* thisNode = node->getChild(0);
11178
TR::Node* objNode = node->getChild(1);
11179
TR::Node* offsetNode = node->getChild(2);
11180
TR::Node* oldVNode = node->getChild(3);
11181
TR::Node* newVNode = node->getChild(4);
11182
11183
// The card mark write barrier helper expects the source register to be a decompressed reference. As such if the
11184
// value we are storing (last argument) has been lowered we must extract the decompressed reference from the
11185
// compression sequence.
11186
bool isValueCompressedReference = false;
11187
11188
TR::Node* decompressedValueNode = newVNode;
11189
11190
if (isObj && comp->useCompressedPointers() && decompressedValueNode->getOpCodeValue() == TR::l2i)
11191
{
11192
// Pattern match the sequence:
11193
//
11194
// <node>
11195
// <thisNode>
11196
// <objNode>
11197
// <offsetNode>
11198
// <oldVNode>
11199
// l2i
11200
// lushr
11201
// a2l
11202
// <decompressedValueNode>
11203
// iconst
11204
11205
if (decompressedValueNode->getOpCode().isConversion())
11206
{
11207
decompressedValueNode = decompressedValueNode->getFirstChild();
11208
}
11209
11210
if (decompressedValueNode->getOpCode().isRightShift())
11211
{
11212
decompressedValueNode = decompressedValueNode->getFirstChild();
11213
}
11214
11215
isValueCompressedReference = true;
11216
11217
while ((decompressedValueNode->getNumChildren() > 0) && (decompressedValueNode->getOpCodeValue() != TR::a2l))
11218
{
11219
decompressedValueNode = decompressedValueNode->getFirstChild();
11220
}
11221
11222
if (decompressedValueNode->getOpCodeValue() == TR::a2l)
11223
{
11224
decompressedValueNode = decompressedValueNode->getFirstChild();
11225
}
11226
11227
// Artificially bump the reference count on the value so that different registers are allocated for the
11228
// compressed and decompressed values. This is done so that the card mark write barrier helper uses the
11229
// decompressed value.
11230
decompressedValueNode->incReferenceCount();
11231
}
11232
11233
// Eval old and new vals
11234
//
11235
objReg = cg->evaluate(objNode);
11236
oldVReg = cg->gprClobberEvaluate(oldVNode); // CS oldReg, newReg, OFF(objReg)
11237
newVReg = cg->evaluate(newVNode); // oldReg is clobbered
11238
11239
TR::Register* compressedValueRegister = newVReg;
11240
11241
if (isValueCompressedReference)
11242
{
11243
compressedValueRegister = cg->evaluate(decompressedValueNode);
11244
}
11245
11246
bool needsDup = false;
11247
11248
if (objReg == newVReg)
11249
{
11250
// Make a copy of the register - reg deps later on expect them in different registers.
11251
newVReg = cg->allocateCollectedReferenceRegister();
11252
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, newVReg, objReg);
11253
if (!isValueCompressedReference)
11254
compressedValueRegister = newVReg;
11255
11256
needsDup = true;
11257
}
11258
11259
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, resultReg, 0x0);
11260
11261
// We can run into trouble when the offset value gets too big, or it may
11262
// simply not nbe known at compile time.
11263
//
11264
if (offsetNode->getOpCode().isLoadConst() && offsetNode->getRegister()==NULL)
11265
{
11266
// We know at compile time
11267
intptr_t offsetValue = offsetNode->getLongInt();
11268
if (offsetValue>=0 && offsetValue<MAXDISP)
11269
{
11270
casMemRef = generateS390MemoryReference(objReg, offsetValue, cg);
11271
}
11272
// ADD Golden Eagle support here if we ever see this path take (unlikely)
11273
}
11274
11275
// We couldn't figure out how to get the offset into the DISP field of the CAS inst
11276
// So use an explicit local ADD
11277
//
11278
if (casMemRef == NULL) // Not setup, hence we need a reg
11279
{
11280
scratchReg = cg->gprClobberEvaluate(offsetNode);
11281
11282
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, scratchReg,objReg);
11283
casMemRef = generateS390MemoryReference(scratchReg, 0, cg);
11284
}
11285
11286
if (TR::Compiler->om.readBarrierType() != gc_modron_readbar_none && isObj)
11287
{
11288
TR::Register* tempReadBarrier = cg->allocateRegister();
11289
if (comp->target().cpu.supportsFeature(OMR_FEATURE_S390_GUARDED_STORAGE))
11290
{
11291
auto guardedLoadMnemonic = comp->useCompressedPointers() ? TR::InstOpCode::LLGFSG : TR::InstOpCode::LGG;
11292
11293
// Compare-And-Swap on object reference, while primarily is a store operation, it is also an implicit read (it
11294
// reads the existing value to be compared with a provided compare value, before the store itself), hence needs
11295
// a read barrier
11296
generateS390IEInstruction(cg, TR::InstOpCode::NIAI, 1, 0, node);
11297
generateRXInstruction(cg, guardedLoadMnemonic, node, tempReadBarrier, generateS390MemoryReference(*casMemRef, 0, cg));
11298
}
11299
else
11300
{
11301
TR::TreeEvaluator::generateSoftwareReadBarrier(node, cg, tempReadBarrier, generateS390MemoryReference(*casMemRef, 0, cg));
11302
}
11303
cg->stopUsingRegister(tempReadBarrier);
11304
}
11305
11306
// Compare and swap
11307
//
11308
generateRSInstruction(cg, casOp, node, oldVReg, newVReg, casMemRef);
11309
11310
// Setup return
11311
//
11312
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, doneLabel);
11313
11314
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, resultReg, 0x1);
11315
11316
TR::RegisterDependencyConditions* cond = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 1, cg);
11317
cond->addPostCondition(resultReg, TR::RealRegister::AssignAny);
11318
11319
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, cond);
11320
11321
// Do wrtbar for Objects
11322
//
11323
auto gcMode = TR::Compiler->om.writeBarrierType();
11324
bool doWrtBar = (gcMode == gc_modron_wrtbar_oldcheck || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck ||
11325
gcMode == gc_modron_wrtbar_always);
11326
bool doCrdMrk = (gcMode == gc_modron_wrtbar_cardmark || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck ||
11327
gcMode == gc_modron_wrtbar_cardmark_incremental);
11328
11329
if (isObj && (doWrtBar || doCrdMrk))
11330
{
11331
TR::LabelSymbol *doneLabelWrtBar = generateLabelSymbol(cg);
11332
TR::Register *epReg = cg->allocateRegister();
11333
TR::Register *raReg = cg->allocateRegister();
11334
TR::RegisterDependencyConditions* condWrtBar = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 5, cg);
11335
condWrtBar->addPostCondition(objReg, TR::RealRegister::GPR1);
11336
if (compressedValueRegister != newVReg)
11337
condWrtBar->addPostCondition(newVReg, TR::RealRegister::AssignAny); //defect 92001
11338
if (compressedValueRegister != objReg) // add this because I got conflicting dependencies on GPR1 and GPR2!
11339
condWrtBar->addPostCondition(compressedValueRegister, TR::RealRegister::GPR2); //defect 92001
11340
condWrtBar->addPostCondition(epReg, cg->getEntryPointRegister());
11341
condWrtBar->addPostCondition(raReg, cg->getReturnAddressRegister());
11342
// Cardmarking is not inlined for gencon. Consider doing so when perf issue arises.
11343
if (doWrtBar)
11344
{
11345
TR::SymbolReference *wbRef;
11346
auto gcMode = TR::Compiler->om.writeBarrierType();
11347
11348
if (gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_oldcheck)
11349
wbRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreGenerationalSymbolRef(comp->getMethodSymbol());
11350
else
11351
wbRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef(comp->getMethodSymbol());
11352
VMnonNullSrcWrtBarCardCheckEvaluator(node, objReg, compressedValueRegister, epReg, raReg, doneLabelWrtBar, wbRef, condWrtBar, cg, false);
11353
}
11354
11355
else if (doCrdMrk)
11356
{
11357
VMCardCheckEvaluator(node, objReg, epReg, condWrtBar, cg, false, doneLabelWrtBar, false);
11358
// true #1 -> copy of objReg just happened, it's safe to clobber tempReg
11359
// false #2 -> Don't do compile time check for heap obj
11360
}
11361
11362
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabelWrtBar, condWrtBar);
11363
11364
cg->stopUsingRegister(epReg);
11365
cg->stopUsingRegister(raReg);
11366
}
11367
11368
// Value is not used, and not eval'd to avoid the extra reg
11369
// So recursively decrement to compensate
11370
//
11371
cg->recursivelyDecReferenceCount(thisNode);
11372
11373
cg->decReferenceCount(objNode);
11374
cg->decReferenceCount(offsetNode);
11375
cg->decReferenceCount(oldVNode);
11376
cg->decReferenceCount(newVNode);
11377
11378
cg->stopUsingRegister(oldVReg);
11379
11380
if (needsDup)
11381
{
11382
cg->stopUsingRegister(newVReg);
11383
}
11384
if (scratchReg)
11385
{
11386
cg->stopUsingRegister(scratchReg);
11387
}
11388
11389
if (isValueCompressedReference)
11390
cg->decReferenceCount(decompressedValueNode);
11391
11392
node->setRegister(resultReg);
11393
return resultReg;
11394
}
11395
11396
11397
/////////////////////////////////////////////////////////////////////////////////
11398
// getTOCOffset()
11399
// return codertTOC offset from vmThread (R13)
11400
////////////////////////////////////////////////////////////////////////////////
11401
int
11402
getTOCOffset()
11403
{
11404
return (offsetof(J9VMThread, codertTOC));
11405
}
11406
11407
TR::Instruction *
11408
J9::Z::TreeEvaluator::generateVFTMaskInstruction(TR::Node *node, TR::Register *reg, TR::CodeGenerator *cg, TR::Instruction *preced)
11409
{
11410
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
11411
TR::Instruction *result = preced;
11412
uintptr_t mask = TR::Compiler->om.maskOfObjectVftField();
11413
if (~mask == 0)
11414
{
11415
// no mask instruction required
11416
}
11417
else if (~mask <= 0xffff)
11418
{
11419
result = generateRIInstruction(cg, TR::InstOpCode::NILL, node, reg, mask, preced);
11420
}
11421
else
11422
{
11423
TR_ASSERT(0, "Can't mask out flag bits beyond the low 16 from the VFT pointer");
11424
}
11425
return result;
11426
}
11427
11428
// This routine generates RION and RIOFF guarded by VMThread->jitCurrentRIFlags
11429
// based on test for bit: J9_JIT_TOGGLE_RI_IN_COMPILED_CODE
11430
TR::Instruction *
11431
J9::Z::TreeEvaluator::generateRuntimeInstrumentationOnOffSequence(TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic op, TR::Node *node, TR::Instruction *preced, bool postRA)
11432
{
11433
TR::Compilation *comp = cg->comp();
11434
TR_ASSERT(op == TR::InstOpCode::RION || op == TR::InstOpCode::RIOFF, "Unexpected Runtime Instrumentation OpCode");
11435
11436
#ifdef TR_HOST_S390
11437
TR::LabelSymbol * OOLStartLabel = generateLabelSymbol(cg);
11438
TR::LabelSymbol * OOLReturnLabel = generateLabelSymbol(cg);
11439
TR_Debug * debugObj = cg->getDebug();
11440
11441
// Test the last byte of vmThread->jitCurrentRIFlags
11442
TR_ASSERT(0 != (J9_JIT_TOGGLE_RI_IN_COMPILED_CODE & 0xFF), "Cannot use TM to test for J9_JIT_TOGGLE_RI_IN_COMPILED_CODE");
11443
TR::MemoryReference *vmThreadMemRef = generateS390MemoryReference(cg->getMethodMetaDataRealRegister(), offsetof(J9VMThread, jitCurrentRIFlags) + sizeof(((J9VMThread *)0)->jitCurrentRIFlags) - 1, cg);
11444
preced = generateSIInstruction(cg, TR::InstOpCode::TM, node, vmThreadMemRef, J9_JIT_TOGGLE_RI_IN_COMPILED_CODE, preced);
11445
preced = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK7, node, OOLStartLabel, preced);
11446
11447
if (debugObj)
11448
if (op == TR::InstOpCode::RION)
11449
debugObj->addInstructionComment(preced, "-->OOL RION");
11450
else
11451
debugObj->addInstructionComment(preced, "-->OOL RIOFF");
11452
11453
11454
TR_S390OutOfLineCodeSection *RIOnOffOOL = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(OOLStartLabel, OOLReturnLabel, cg);
11455
cg->getS390OutOfLineCodeSectionList().push_front(RIOnOffOOL);
11456
RIOnOffOOL->swapInstructionListsWithCompilation();
11457
11458
TR::Instruction * cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, OOLStartLabel);
11459
if (debugObj)
11460
debugObj->addInstructionComment(cursor, "OOL RION/OFF seq");
11461
11462
// Generate the RION/RIOFF instruction.
11463
cursor = generateRuntimeInstrumentationInstruction(cg, op, node, NULL, cursor);
11464
11465
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, OOLReturnLabel, cursor);
11466
11467
RIOnOffOOL->swapInstructionListsWithCompilation();
11468
11469
preced = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, OOLReturnLabel, preced);
11470
11471
// OOL's are appended to the instruction stream during RA. If this is
11472
// emitted postRA, we have to attach it ourselves.
11473
if (postRA)
11474
{
11475
TR::Instruction *appendInstruction = cg->getAppendInstruction();
11476
appendInstruction->setNext(RIOnOffOOL->getFirstInstruction());
11477
RIOnOffOOL->getFirstInstruction()->setPrev(appendInstruction);
11478
cg->setAppendInstruction(RIOnOffOOL->getAppendInstruction());
11479
}
11480
#endif /* TR_HOST_S390 */
11481
return preced;
11482
}
11483
11484
11485
#if defined(TR_HOST_S390) && defined(J9ZOS390)
11486
// psuedo-call to asm function
11487
extern "C" void _getSTCKLSOOffset(int32_t* offsetArray); /* 390 asm stub */
11488
#endif
11489
11490
TR::Register*
11491
J9::Z::TreeEvaluator::inlineSinglePrecisionSQRT(TR::Node *node, TR::CodeGenerator *cg)
11492
{
11493
TR::Node * firstChild = node->getFirstChild();
11494
TR::Register * targetRegister = NULL;
11495
TR::Register * opRegister = cg->evaluate(firstChild);
11496
11497
if (cg->canClobberNodesRegister(firstChild))
11498
{
11499
targetRegister = opRegister;
11500
}
11501
else
11502
{
11503
targetRegister = cg->allocateRegister(TR_FPR);
11504
}
11505
generateRRInstruction(cg, TR::InstOpCode::SQEBR, node, targetRegister, opRegister);
11506
node->setRegister(targetRegister);
11507
cg->decReferenceCount(firstChild);
11508
return node->getRegister();
11509
}
11510
11511
TR::Register*
11512
J9::Z::TreeEvaluator::inlineCurrentTimeMaxPrecision(TR::CodeGenerator* cg, TR::Node* node)
11513
{
11514
// STCKF is an S instruction and requires a 64-bit memory reference
11515
TR::SymbolReference* reusableTempSlot = cg->allocateReusableTempSlot();
11516
11517
generateSInstruction(cg, TR::InstOpCode::STCKF, node, generateS390MemoryReference(node, reusableTempSlot, cg));
11518
11519
// Dynamic literal pool could have assigned us a literal base register
11520
TR::Register* literalBaseRegister = (node->getNumChildren() == 1) ? cg->evaluate(node->getFirstChild()) : NULL;
11521
11522
TR::Register* targetRegister = cg->allocateRegister();
11523
11524
#if defined(TR_HOST_S390) && defined(J9ZOS390)
11525
int32_t offsets[3];
11526
_getSTCKLSOOffset(offsets);
11527
11528
TR::Register* tempRegister = cg->allocateRegister();
11529
11530
// z/OS requires time correction to account for leap seconds. The number of leap seconds is stored in the LSO
11531
// field of the MVS data area.
11532
if (cg->comp()->target().isZOS())
11533
{
11534
// Load FFCVT(R0)
11535
generateRXInstruction(cg, TR::InstOpCode::LLGT, node, tempRegister, generateS390MemoryReference(offsets[0], cg));
11536
11537
// Load CVTEXT2 - CVT
11538
generateRXInstruction(cg, TR::InstOpCode::LLGT, node, tempRegister, generateS390MemoryReference(tempRegister, offsets[1], cg));
11539
}
11540
#endif
11541
11542
generateRXInstruction(cg, TR::InstOpCode::LG, node, targetRegister, generateS390MemoryReference(node, reusableTempSlot, cg));
11543
11544
int64_t todJanuary1970 = 0x7D91048BCA000000LL;
11545
generateRegLitRefInstruction(cg, TR::InstOpCode::SLG, node, targetRegister, todJanuary1970, NULL, NULL, literalBaseRegister);
11546
11547
#if defined(TR_HOST_S390) && defined(J9ZOS390)
11548
if (cg->comp()->target().isZOS())
11549
{
11550
// Subtract the LSO offset
11551
generateRXInstruction(cg, TR::InstOpCode::SLG, node, targetRegister, generateS390MemoryReference(tempRegister, offsets[2],cg));
11552
}
11553
11554
cg->stopUsingRegister(tempRegister);
11555
#endif
11556
11557
// Get current time in terms of 1/2048 of micro-seconds
11558
generateRSInstruction(cg, TR::InstOpCode::SRLG, node, targetRegister, targetRegister, 1);
11559
11560
cg->freeReusableTempSlot();
11561
11562
if (literalBaseRegister != NULL)
11563
{
11564
cg->decReferenceCount(node->getFirstChild());
11565
}
11566
11567
node->setRegister(targetRegister);
11568
11569
return targetRegister;
11570
}
11571
11572
TR::Register*
11573
J9::Z::TreeEvaluator::inlineAtomicOps(TR::Node *node, TR::CodeGenerator *cg, int8_t size, TR::MethodSymbol *method, bool isArray)
11574
{
11575
TR::Compilation *comp = cg->comp();
11576
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
11577
TR::Node *valueChild = node->getFirstChild();
11578
TR::Node *deltaChild = NULL;
11579
TR::Register *valueReg = cg->evaluate(valueChild);
11580
TR::Register *deltaReg = NULL;
11581
TR::Register *resultReg = NULL;
11582
11583
int32_t delta = 0;
11584
int32_t numDeps = 4;
11585
11586
bool isAddOp = true;
11587
bool isGetAndOp = true;
11588
bool isLong = false;
11589
bool isArgConstant = false;
11590
11591
TR::RecognizedMethod currentMethod = method->getRecognizedMethod();
11592
11593
// Gather information about the method
11594
//
11595
switch (currentMethod)
11596
{
11597
case TR::java_util_concurrent_atomic_AtomicBoolean_getAndSet:
11598
case TR::java_util_concurrent_atomic_AtomicInteger_getAndSet:
11599
case TR::java_util_concurrent_atomic_AtomicLong_getAndSet:
11600
case TR::java_util_concurrent_atomic_AtomicReference_getAndSet:
11601
case TR::java_util_concurrent_atomic_AtomicIntegerArray_getAndSet:
11602
case TR::java_util_concurrent_atomic_AtomicLongArray_getAndSet:
11603
case TR::java_util_concurrent_atomic_AtomicReferenceArray_getAndSet:
11604
{
11605
isAddOp = false;
11606
break;
11607
}
11608
case TR::java_util_concurrent_atomic_AtomicInteger_addAndGet:
11609
case TR::java_util_concurrent_atomic_AtomicIntegerArray_addAndGet:
11610
{
11611
isGetAndOp = false;
11612
}
11613
case TR::java_util_concurrent_atomic_AtomicInteger_getAndAdd:
11614
case TR::java_util_concurrent_atomic_AtomicIntegerArray_getAndAdd:
11615
{
11616
break;
11617
}
11618
case TR::java_util_concurrent_atomic_AtomicInteger_incrementAndGet:
11619
case TR::java_util_concurrent_atomic_AtomicIntegerArray_incrementAndGet:
11620
{
11621
isGetAndOp = false;
11622
}
11623
case TR::java_util_concurrent_atomic_AtomicInteger_getAndIncrement:
11624
case TR::java_util_concurrent_atomic_AtomicIntegerArray_getAndIncrement:
11625
{
11626
delta = (int32_t)1;
11627
isArgConstant = true;
11628
resultReg = cg->allocateRegister();
11629
break;
11630
}
11631
case TR::java_util_concurrent_atomic_AtomicInteger_decrementAndGet:
11632
case TR::java_util_concurrent_atomic_AtomicIntegerArray_decrementAndGet:
11633
{
11634
isGetAndOp = false;
11635
}
11636
case TR::java_util_concurrent_atomic_AtomicInteger_getAndDecrement:
11637
case TR::java_util_concurrent_atomic_AtomicIntegerArray_getAndDecrement:
11638
{
11639
delta = (int32_t)-1;
11640
isArgConstant = true;
11641
resultReg = cg->allocateRegister();
11642
break;
11643
}
11644
case TR::java_util_concurrent_atomic_AtomicLong_addAndGet:
11645
case TR::java_util_concurrent_atomic_AtomicLongArray_addAndGet:
11646
{
11647
isGetAndOp = false;
11648
}
11649
case TR::java_util_concurrent_atomic_AtomicLong_getAndAdd:
11650
case TR::java_util_concurrent_atomic_AtomicLongArray_getAndAdd:
11651
{
11652
isLong = true;
11653
break;
11654
}
11655
case TR::java_util_concurrent_atomic_AtomicLong_incrementAndGet:
11656
case TR::java_util_concurrent_atomic_AtomicLongArray_incrementAndGet:
11657
{
11658
isGetAndOp = false;
11659
}
11660
case TR::java_util_concurrent_atomic_AtomicLong_getAndIncrement:
11661
case TR::java_util_concurrent_atomic_AtomicLongArray_getAndIncrement:
11662
{
11663
isLong = true;
11664
delta = (int64_t)1;
11665
break;
11666
}
11667
case TR::java_util_concurrent_atomic_AtomicLong_decrementAndGet:
11668
case TR::java_util_concurrent_atomic_AtomicLongArray_decrementAndGet:
11669
{
11670
isGetAndOp = false;
11671
}
11672
case TR::java_util_concurrent_atomic_AtomicLong_getAndDecrement:
11673
case TR::java_util_concurrent_atomic_AtomicLongArray_getAndDecrement:
11674
{
11675
isLong = true;
11676
delta = (int64_t)-1;
11677
break;
11678
}
11679
}
11680
11681
//Determine the offset of the value field
11682
//
11683
int32_t shiftAmount = 0;
11684
TR::Node *indexChild = NULL;
11685
TR::Register *indexRegister = NULL;
11686
TR::Register *fieldOffsetReg = NULL;
11687
int32_t fieldOffset;
11688
11689
if (!isArray)
11690
{
11691
TR_OpaqueClassBlock * bdClass;
11692
char *className, *fieldSig;
11693
int32_t classNameLen, fieldSigLen;
11694
11695
fieldSigLen = 1;
11696
11697
switch (currentMethod)
11698
{
11699
case TR::java_util_concurrent_atomic_AtomicBoolean_getAndSet:
11700
className = "Ljava/util/concurrent/atomic/AtomicBoolean;";
11701
classNameLen = 43;
11702
fieldSig = "I"; // not a typo, the field is int
11703
break;
11704
case TR::java_util_concurrent_atomic_AtomicInteger_getAndSet:
11705
case TR::java_util_concurrent_atomic_AtomicInteger_addAndGet:
11706
case TR::java_util_concurrent_atomic_AtomicInteger_getAndAdd:
11707
case TR::java_util_concurrent_atomic_AtomicInteger_incrementAndGet:
11708
case TR::java_util_concurrent_atomic_AtomicInteger_getAndIncrement:
11709
case TR::java_util_concurrent_atomic_AtomicInteger_decrementAndGet:
11710
case TR::java_util_concurrent_atomic_AtomicInteger_getAndDecrement:
11711
className = "Ljava/util/concurrent/atomic/AtomicInteger;";
11712
classNameLen = 43;
11713
fieldSig = "I";
11714
break;
11715
case TR::java_util_concurrent_atomic_AtomicLong_getAndSet:
11716
case TR::java_util_concurrent_atomic_AtomicLong_addAndGet:
11717
case TR::java_util_concurrent_atomic_AtomicLong_getAndAdd:
11718
case TR::java_util_concurrent_atomic_AtomicLong_incrementAndGet:
11719
case TR::java_util_concurrent_atomic_AtomicLong_getAndIncrement:
11720
case TR::java_util_concurrent_atomic_AtomicLong_decrementAndGet:
11721
case TR::java_util_concurrent_atomic_AtomicLong_getAndDecrement:
11722
className = "Ljava/util/concurrent/atomic/AtomicLong;";
11723
classNameLen = 40;
11724
fieldSig = "J";
11725
break;
11726
case TR::java_util_concurrent_atomic_AtomicReference_getAndSet:
11727
className = "Ljava/util/concurrent/atomic/AtomicReference;";
11728
classNameLen = 45;
11729
fieldSig = "Ljava/lang/Object;";
11730
fieldSigLen = 18;
11731
break;
11732
default:
11733
TR_ASSERT( 0, "Unknown atomic operation method\n");
11734
return NULL;
11735
}
11736
11737
TR_ResolvedMethod *owningMethod = node->getSymbolReference()->getOwningMethod(comp);
11738
TR_OpaqueClassBlock *containingClass = fej9->getClassFromSignature(className, classNameLen, owningMethod, true);
11739
fieldOffset = fej9->getInstanceFieldOffset(containingClass, "value", 5, fieldSig, fieldSigLen)
11740
+ fej9->getObjectHeaderSizeInBytes(); // size of a J9 object header
11741
}
11742
else
11743
{
11744
if (isArray)
11745
{
11746
indexChild = node->getChild(1);
11747
indexRegister = cg->evaluate(indexChild);
11748
fieldOffset = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
11749
if (size == 4)
11750
shiftAmount = 2;
11751
else if (size == 8)
11752
shiftAmount = 3;
11753
11754
fieldOffsetReg = cg->allocateRegister();
11755
generateRSInstruction(cg, TR::InstOpCode::SLL, node, fieldOffsetReg, indexRegister, shiftAmount);
11756
}
11757
}
11758
11759
// Exploit z196 interlocked-update instructions
11760
if (comp->target().cpu.isAtLeast(OMR_PROCESSOR_S390_Z196))
11761
{
11762
if (isAddOp) //getAndAdd or andAndGet
11763
{
11764
if (node->getNumChildren() > 1)
11765
{
11766
// 2nd operand needs to be in a register
11767
deltaChild = node->getSecondChild();
11768
deltaReg = cg->evaluate(deltaChild);
11769
cg->decReferenceCount(deltaChild);
11770
}
11771
else
11772
{
11773
// no 2nd child = Atomic.increment or decrement, delta should be +/- 1
11774
deltaReg = cg->allocateRegister();
11775
if (!isLong)
11776
{
11777
generateRIInstruction(cg, TR::InstOpCode::LHI, node, deltaReg, delta);
11778
}
11779
else
11780
{
11781
generateRIInstruction(cg, TR::InstOpCode::LGHI, node, deltaReg, delta);
11782
}
11783
}
11784
11785
// Load And Add: LAA R1,R2,Mem
11786
// R1 = Mem; Mem = Mem + R2;
11787
// IMPORTANT: LAAG throws hardware exception if Mem is not double word aligned
11788
// Class AtomicLong currently has its value field d.word aligned
11789
if (!resultReg)
11790
resultReg = cg->allocateRegister();
11791
11792
if (!isLong)
11793
{
11794
if (fieldOffsetReg)
11795
generateRSInstruction(cg, TR::InstOpCode::LAA, node, resultReg, deltaReg, new (cg->trHeapMemory()) TR::MemoryReference(valueReg, fieldOffsetReg, fieldOffset, cg));
11796
else
11797
generateRSInstruction(cg, TR::InstOpCode::LAA, node, resultReg, deltaReg, new (cg->trHeapMemory()) TR::MemoryReference(valueReg, fieldOffset, cg));
11798
}
11799
else
11800
{
11801
if (fieldOffsetReg)
11802
generateRSInstruction(cg, TR::InstOpCode::LAAG, node, resultReg, deltaReg, new (cg->trHeapMemory()) TR::MemoryReference(valueReg, fieldOffsetReg, fieldOffset, cg));
11803
else
11804
generateRSInstruction(cg, TR::InstOpCode::LAAG, node, resultReg, deltaReg, new (cg->trHeapMemory()) TR::MemoryReference(valueReg, fieldOffset, cg));
11805
}
11806
if (!isGetAndOp)
11807
{
11808
// for addAndGet, the result needs to be recomputed. LAA loaded the original value into resultReg.
11809
if (!isLong)
11810
generateRRInstruction(cg, TR::InstOpCode::AR, node, resultReg, deltaReg);
11811
else
11812
generateRRInstruction(cg, TR::InstOpCode::AGR, node, resultReg, deltaReg);
11813
}
11814
11815
cg->stopUsingRegister(deltaReg);
11816
cg->decReferenceCount(valueChild);
11817
cg->stopUsingRegister(valueReg);
11818
11819
node->setRegister(resultReg);
11820
return resultReg;
11821
}
11822
}
11823
11824
if (node->getNumChildren() > 1)
11825
{
11826
deltaChild = node->getSecondChild();
11827
11828
//Determine if the delta is a constant.
11829
//
11830
if (deltaChild->getOpCode().isLoadConst() && !deltaChild->getRegister())
11831
{
11832
delta = (int32_t)(deltaChild->getInt());
11833
isArgConstant = true;
11834
resultReg = cg->allocateRegister();
11835
}
11836
else if (isAddOp)
11837
{
11838
deltaReg = cg->evaluate(deltaChild);
11839
resultReg = cg->allocateRegister();
11840
}
11841
else
11842
{
11843
resultReg = cg->evaluate(deltaChild);
11844
}
11845
}
11846
11847
TR::RegisterDependencyConditions * dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, numDeps, cg);
11848
TR::LabelSymbol *cFlowRegionEnd = generateLabelSymbol(cg);
11849
TR::LabelSymbol *loopLabel = generateLabelSymbol(cg);
11850
11851
11852
// If this is a getAndSet of a constant, load the constant outside the loop.
11853
//
11854
if (!isAddOp && isArgConstant)
11855
generateLoad32BitConstant(cg, node, delta, resultReg, true);
11856
11857
// Get the existing value
11858
//
11859
TR::Register *tempReg = cg->allocateRegister();
11860
if (fieldOffsetReg)
11861
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, tempReg, new (cg->trHeapMemory()) TR::MemoryReference(valueReg, fieldOffsetReg, fieldOffset, cg));
11862
else
11863
generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, tempReg, new (cg->trHeapMemory()) TR::MemoryReference(valueReg, fieldOffset, cg));
11864
11865
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, loopLabel);
11866
loopLabel->setStartInternalControlFlow();
11867
11868
// Perform the addition operation, if necessary
11869
//
11870
if (isAddOp)
11871
{
11872
generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(),node, resultReg, tempReg);
11873
if(isArgConstant)
11874
{
11875
generateS390ImmOp(cg, TR::InstOpCode::getAddOpCode(), node, resultReg, resultReg, (int32_t) delta, dependencies, NULL);
11876
}
11877
else
11878
{
11879
generateRRInstruction(cg, TR::InstOpCode::getAddRegOpCode(), node, resultReg ,deltaReg);
11880
}
11881
}
11882
11883
// Compare and swap!
11884
//
11885
if (fieldOffsetReg)
11886
generateRSInstruction(cg, TR::InstOpCode::CS, node, tempReg, resultReg, new (cg->trHeapMemory()) TR::MemoryReference(valueReg, fieldOffsetReg, fieldOffset, cg));
11887
else
11888
generateRSInstruction(cg, TR::InstOpCode::CS, node, tempReg, resultReg, new (cg->trHeapMemory()) TR::MemoryReference(valueReg, fieldOffset, cg));
11889
11890
// Branch if the compare and swap failed and try again.
11891
//
11892
generateS390BranchInstruction(cg, TR::InstOpCode::BRC,TR::InstOpCode::COND_BL, node, loopLabel);
11893
11894
dependencies->addPostCondition(valueReg, TR::RealRegister::AssignAny);
11895
dependencies->addPostCondition(tempReg, TR::RealRegister::AssignAny);
11896
11897
if (resultReg)
11898
dependencies->addPostCondition(resultReg, TR::RealRegister::AssignAny);
11899
if (deltaReg)
11900
dependencies->addPostCondition(deltaReg, TR::RealRegister::AssignAny);
11901
11902
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
11903
cFlowRegionEnd->setEndInternalControlFlow();
11904
11905
if (deltaChild != NULL)
11906
cg->decReferenceCount(deltaChild);
11907
if (deltaReg)
11908
cg->stopUsingRegister(deltaReg);
11909
11910
cg->decReferenceCount(valueChild);
11911
cg->stopUsingRegister(valueReg);
11912
11913
if (isGetAndOp)
11914
{
11915
// For Get And Op, the return value be stored in the temp register
11916
//
11917
if(resultReg)
11918
cg->stopUsingRegister(resultReg);
11919
node->setRegister(tempReg);
11920
return tempReg;
11921
}
11922
else
11923
{
11924
// For Op And Get, the return value will be stored in the result register
11925
//
11926
cg->stopUsingRegister(tempReg);
11927
node->setRegister(resultReg);
11928
return resultReg;
11929
}
11930
}
11931
11932
static TR::Register *
11933
evaluateTwo32BitLoadsInA64BitRegister(
11934
TR::Node *node,
11935
TR::CodeGenerator *cg,
11936
TR::Node * highNode,
11937
TR::Node *lowNode)
11938
{
11939
TR::Register * targetRegister = cg->gprClobberEvaluate(highNode);
11940
TR::Instruction * cursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, targetRegister, targetRegister, 32);
11941
11942
generateRRInstruction(cg, TR::InstOpCode::LR, node, targetRegister, cg->evaluate(lowNode));
11943
return targetRegister;
11944
}
11945
11946
//TODO: CS clobbers first arg, and padLow ,refFirst
11947
static TR::RegisterPair *
11948
evaluateTwo32BitLoadsInAConsecutiveEvenOddPair(
11949
TR::Node *node,
11950
TR::CodeGenerator *cg,
11951
TR::Node * highNode,
11952
TR::Node *lowNode,
11953
TR::RegisterDependencyConditions * dependencies,
11954
bool isRefFirst,
11955
bool isClobberEval)
11956
{
11957
TR::Register * evenReg = (isClobberEval || (!isRefFirst))? cg->gprClobberEvaluate(highNode) : cg->evaluate(highNode);
11958
TR::Register * oddReg = (isClobberEval || (isRefFirst))? cg->gprClobberEvaluate(lowNode) : cg->evaluate(lowNode);
11959
TR::Register * padReg = isRefFirst ? oddReg : evenReg;
11960
generateRSInstruction(cg, TR::InstOpCode::SLLG, node, padReg, padReg, 32);
11961
11962
TR::RegisterPair * newRegisterPair = cg->allocateConsecutiveRegisterPair(oddReg, evenReg);
11963
dependencies->addPostCondition(evenReg, TR::RealRegister::LegalEvenOfPair);
11964
dependencies->addPostCondition(oddReg, TR::RealRegister::LegalOddOfPair);
11965
dependencies->addPostCondition(newRegisterPair, TR::RealRegister::EvenOddPair);
11966
TR_ASSERT( newRegisterPair->getHighOrder() == evenReg, "evenReg is not high order\n");
11967
return newRegisterPair;
11968
}
11969
11970
TR::Register*
11971
J9::Z::TreeEvaluator::inlineAtomicFieldUpdater(TR::Node *node, TR::CodeGenerator *cg, TR::MethodSymbol *method)
11972
{
11973
TR::Compilation *comp = cg->comp();
11974
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
11975
TR::Register * resultReg;
11976
TR::RecognizedMethod currentMethod = method->getRecognizedMethod();
11977
11978
//Gather information about the method
11979
bool isAddOp = true;
11980
bool isGetAndOp = true;
11981
bool isArgConstant = false;
11982
int32_t delta = 1;
11983
char* className = "java/util/concurrent/atomic/AtomicIntegerFieldUpdater$AtomicIntegerFieldUpdaterImpl";
11984
int32_t classNameLen = 83;
11985
11986
switch (currentMethod)
11987
{
11988
case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_getAndDecrement:
11989
delta = -1;
11990
case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_getAndIncrement:
11991
isArgConstant = true;
11992
case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_getAndAdd:
11993
break;
11994
case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_decrementAndGet:
11995
delta = -1;
11996
case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_incrementAndGet:
11997
isArgConstant = true;
11998
case TR::java_util_concurrent_atomic_AtomicIntegerFieldUpdater_addAndGet:
11999
isGetAndOp = false;
12000
break;
12001
}
12002
12003
// getting the offsets to various fields: tclass, class, offset
12004
TR_ResolvedMethod *owningMethod = node->getSymbolReference()->getOwningMethod(comp);
12005
TR_OpaqueClassBlock *containingClass = fej9->getClassFromSignature(className, classNameLen, owningMethod, true);
12006
int32_t offset = fej9->getInstanceFieldOffset(containingClass, "offset", 6, "J", 1)
12007
+ fej9->getObjectHeaderSizeInBytes(); // size of a J9 object header
12008
int32_t cclass = fej9->getInstanceFieldOffset(containingClass, "cclass", 6, "Ljava/lang/Class;", 17)
12009
+ fej9->getObjectHeaderSizeInBytes(); // size of a J9 object header
12010
int32_t tclass = fej9->getInstanceFieldOffset(containingClass, "tclass", 6, "Ljava/lang/Class;", 17)
12011
+ fej9->getObjectHeaderSizeInBytes(); // size of a J9 object header
12012
12013
TR::Register * thisReg = cg->evaluate(node->getFirstChild());
12014
TR::Register * objReg = cg->evaluate(node->getSecondChild());
12015
TR::Register * tempReg = cg->allocateRegister();
12016
TR::Register * trueReg = cg->machine()->getRealRegister(TR::RealRegister::GPR5);
12017
TR::Register * deltaReg;
12018
TR::Register * offsetReg = cg->allocateRegister();
12019
TR::Register * tClassReg = cg->allocateRegister();
12020
TR::Register * objClassReg = cg->allocateRegister();
12021
12022
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
12023
TR::LabelSymbol *callLabel = generateLabelSymbol(cg);
12024
12025
// evaluate the delta node if it exists
12026
if (isArgConstant)
12027
{
12028
deltaReg = cg->allocateRegister();
12029
generateRIInstruction(cg, TR::InstOpCode::LHI, node, deltaReg, delta);
12030
}
12031
else
12032
{
12033
deltaReg = cg->evaluate(node->getChild(2));
12034
}
12035
12036
bool is64Bit = comp->target().is64Bit() && !comp->useCompressedPointers();
12037
12038
// cclass == null?
12039
generateRRInstruction(cg, is64Bit ? TR::InstOpCode::XGR : TR::InstOpCode::XR, node, tempReg, tempReg);
12040
generateRXInstruction(cg, is64Bit ? TR::InstOpCode::CG : TR::InstOpCode::C, node, tempReg, generateS390MemoryReference(thisReg, cclass, cg));
12041
generateRRFInstruction(cg, TR::InstOpCode::LOCR, node, tempReg, trueReg, getMaskForBranchCondition(TR::InstOpCode::COND_BNER), true);
12042
12043
// obj == null?
12044
generateRRInstruction(cg, comp->target().is64Bit() ? TR::InstOpCode::LTGR : TR::InstOpCode::LTR, node, objReg, objReg);
12045
generateRRFInstruction(cg, TR::InstOpCode::LOCR, node, tempReg, trueReg, getMaskForBranchCondition(TR::InstOpCode::COND_BER), true);
12046
12047
TR::TreeEvaluator::genLoadForObjectHeadersMasked(cg, node, objClassReg, generateS390MemoryReference(objReg, TR::Compiler->om.offsetOfObjectVftField(), cg), NULL);
12048
12049
// obj.getClass() == tclass?
12050
if (comp->useCompressedPointers())
12051
{
12052
// inline the getClass() method = grab it from j9class
12053
generateRXInstruction(cg, TR::InstOpCode::LG, node, objClassReg, generateS390MemoryReference(objClassReg, fej9->getOffsetOfJavaLangClassFromClassField(), cg));
12054
12055
// get tclass
12056
generateRXInstruction(cg, TR::InstOpCode::LLGF, node, tClassReg, generateS390MemoryReference(thisReg, tclass, cg));
12057
int32_t shiftAmount = TR::Compiler->om.compressedReferenceShift();
12058
if (shiftAmount != 0)
12059
{
12060
generateRSInstruction(cg, TR::InstOpCode::SLLG, node, tClassReg, tClassReg, shiftAmount);
12061
}
12062
}
12063
else
12064
{
12065
// inline the getClass() method = grab it from j9class
12066
generateRXInstruction(cg, comp->target().is64Bit() ? TR::InstOpCode::LG : TR::InstOpCode::L, node, objClassReg, generateS390MemoryReference(objClassReg, fej9->getOffsetOfJavaLangClassFromClassField(), cg));
12067
12068
// get tclass
12069
generateRXInstruction(cg, comp->target().is64Bit() ? TR::InstOpCode::LG : TR::InstOpCode::L, node, tClassReg, generateS390MemoryReference(thisReg, tclass, cg));
12070
}
12071
generateRRInstruction(cg, comp->target().is64Bit() ? TR::InstOpCode::CGR : TR::InstOpCode::CR, node, objClassReg, tClassReg);
12072
generateRRFInstruction(cg, TR::InstOpCode::LOCR, node, tempReg, trueReg, getMaskForBranchCondition(TR::InstOpCode::COND_BNER), true);
12073
12074
// if any of the above has set the flag, we need to revert back to call the original method via OOL
12075
generateRRInstruction(cg, TR::InstOpCode::LTR, node, tempReg, tempReg);
12076
generateS390BranchInstruction (cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, callLabel);
12077
12078
// start OOL
12079
TR_S390OutOfLineCodeSection *outlinedCall = new (cg->trHeapMemory()) TR_S390OutOfLineCodeSection(callLabel, doneLabel, cg);
12080
cg->getS390OutOfLineCodeSectionList().push_front(outlinedCall);
12081
outlinedCall->swapInstructionListsWithCompilation();
12082
TR::Instruction * cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, callLabel);
12083
12084
if (cg->getDebug())
12085
cg->getDebug()->addInstructionComment(cursor, "Denotes start of OOL AtomicFieldUpdater");
12086
12087
// original call, this decrements node counts
12088
resultReg = TR::TreeEvaluator::performCall(node, false, cg);
12089
12090
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, doneLabel);
12091
if (cg->getDebug())
12092
cg->getDebug()->addInstructionComment(cursor, "Denotes end of OOL AtomicFieldUpdater");
12093
12094
outlinedCall->swapInstructionListsWithCompilation();
12095
12096
// inline fast path: use Load-and-add. Get the offset of the value from the reflection object
12097
generateRXInstruction(cg, TR::InstOpCode::LG, node, offsetReg, generateS390MemoryReference(thisReg, offset, cg));
12098
generateRSInstruction(cg, TR::InstOpCode::LAA, node, resultReg, deltaReg, new (cg->trHeapMemory()) TR::MemoryReference(objReg, offsetReg, 0, cg));
12099
12100
// for addAndGet we need to recompute the resultReg
12101
if (!isGetAndOp)
12102
{
12103
generateRRInstruction(cg, TR::InstOpCode::AR, node, resultReg, deltaReg);
12104
}
12105
12106
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabel);
12107
12108
cg->stopUsingRegister(tempReg);
12109
cg->stopUsingRegister(deltaReg);
12110
cg->stopUsingRegister(offsetReg);
12111
cg->stopUsingRegister(tClassReg);
12112
cg->stopUsingRegister(objClassReg);
12113
12114
return resultReg;
12115
}
12116
12117
TR::Register*
12118
J9::Z::TreeEvaluator::inlineKeepAlive(TR::Node *node, TR::CodeGenerator *cg)
12119
{
12120
TR::Node *paramNode = node->getFirstChild();
12121
TR::Register *paramReg = cg->evaluate(paramNode);
12122
TR::RegisterDependencyConditions *conditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(1, 1, cg);
12123
conditions->addPreCondition(paramReg, TR::RealRegister::AssignAny);
12124
conditions->addPostCondition(paramReg, TR::RealRegister::AssignAny);
12125
TR::LabelSymbol *label = generateLabelSymbol(cg);
12126
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, label, conditions);
12127
cg->decReferenceCount(paramNode);
12128
return NULL;
12129
}
12130
12131
/**
12132
* Helper routine to generate a write barrier sequence for the Transactional Memory inlined sequences.
12133
*/
12134
static void
12135
genWrtBarForTM(
12136
TR::Node *node,
12137
TR::CodeGenerator *cg,
12138
TR::Register * objReg,
12139
TR::Register * srcReg,
12140
TR::Register * resultReg,
12141
bool checkResultRegForTMSuccess)
12142
{
12143
TR::Compilation *comp = cg->comp();
12144
auto gcMode = TR::Compiler->om.writeBarrierType();
12145
bool doWrtBar = (gcMode == gc_modron_wrtbar_oldcheck ||
12146
gcMode == gc_modron_wrtbar_cardmark_and_oldcheck ||
12147
gcMode == gc_modron_wrtbar_always);
12148
bool doCrdMrk = (gcMode == gc_modron_wrtbar_cardmark || gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_cardmark_incremental);
12149
12150
if (doWrtBar || doCrdMrk)
12151
{
12152
TR::LabelSymbol *doneLabelWrtBar = generateLabelSymbol(cg);
12153
TR::Register *epReg = cg->allocateRegister();
12154
TR::Register *raReg = cg->allocateRegister();
12155
12156
TR::RegisterDependencyConditions* condWrtBar = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 5, cg);
12157
12158
condWrtBar->addPostCondition(objReg, TR::RealRegister::GPR1);
12159
condWrtBar->addPostCondition(srcReg, TR::RealRegister::GPR2);
12160
condWrtBar->addPostCondition(epReg, cg->getEntryPointRegister());
12161
condWrtBar->addPostCondition(raReg, cg->getReturnAddressRegister());
12162
12163
// tmOffer returns 0 if transaction succeeds, tmPoll returns a non-Null object pointer if the transaction succeeds
12164
// we skip the wrtbar if TM failed
12165
if (checkResultRegForTMSuccess)
12166
{
12167
// the resultReg is not in the reg deps for tmOffer, add it for internal control flow
12168
condWrtBar->addPostCondition(resultReg, TR::RealRegister::AssignAny);
12169
generateRRInstruction(cg, TR::InstOpCode::LTR, node, resultReg, resultReg);
12170
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNE, node, doneLabelWrtBar);
12171
}
12172
else
12173
{
12174
generateRRInstruction(cg, comp->target().is64Bit() ? TR::InstOpCode::LTGR : TR::InstOpCode::LTR, node, resultReg, resultReg);
12175
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, doneLabelWrtBar);
12176
}
12177
12178
if (doWrtBar)
12179
{
12180
TR::SymbolReference *wbRef;
12181
12182
if (gcMode == gc_modron_wrtbar_cardmark_and_oldcheck || gcMode == gc_modron_wrtbar_oldcheck)
12183
wbRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreGenerationalSymbolRef(comp->getMethodSymbol());
12184
else
12185
wbRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef(comp->getMethodSymbol());
12186
12187
VMnonNullSrcWrtBarCardCheckEvaluator(node, objReg, srcReg, epReg, raReg, doneLabelWrtBar,
12188
wbRef, condWrtBar, cg, false);
12189
}
12190
else if (doCrdMrk)
12191
{
12192
VMCardCheckEvaluator(node, objReg, epReg, condWrtBar, cg, false, doneLabelWrtBar, false);
12193
// true #1 -> copy of objReg just happened, it's safe to clobber tempReg
12194
// false #2 -> Don't do compile time check for heap obj
12195
}
12196
12197
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabelWrtBar, condWrtBar);
12198
12199
cg->stopUsingRegister(epReg);
12200
cg->stopUsingRegister(raReg);
12201
}
12202
}
12203
12204
TR::Register*
12205
J9::Z::TreeEvaluator::inlineConcurrentLinkedQueueTMOffer(TR::Node *node, TR::CodeGenerator *cg)
12206
{
12207
int32_t offsetTail = 0;
12208
int32_t offsetNext = 0;
12209
TR_OpaqueClassBlock * classBlock1 = NULL;
12210
TR_OpaqueClassBlock * classBlock2 = NULL;
12211
TR::Register * rReturn = cg->allocateRegister();
12212
TR::Register * rThis = cg->evaluate(node->getFirstChild());
12213
TR::Register * rP = cg->allocateCollectedReferenceRegister();
12214
TR::Register * rQ = cg->allocateCollectedReferenceRegister();
12215
TR::Register * rN = cg->evaluate(node->getSecondChild());
12216
TR::Instruction * cursor = NULL;
12217
TR::LabelSymbol * insertLabel = generateLabelSymbol(cg);
12218
TR::LabelSymbol * doneLabel = generateLabelSymbol(cg);
12219
TR::LabelSymbol * failLabel = generateLabelSymbol(cg);
12220
12221
TR::Compilation *comp = cg->comp();
12222
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
12223
12224
TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 5, cg);
12225
12226
deps->addPostCondition(rReturn, TR::RealRegister::AssignAny);
12227
deps->addPostCondition(rThis, TR::RealRegister::AssignAny);
12228
deps->addPostCondition(rP, TR::RealRegister::AssignAny);
12229
deps->addPostCondition(rQ, TR::RealRegister::AssignAny);
12230
deps->addPostCondition(rN, TR::RealRegister::AssignAny);
12231
12232
bool usesCompressedrefs = comp->useCompressedPointers();
12233
int32_t shiftAmount = TR::Compiler->om.compressedReferenceShift();
12234
static char * disableTMOfferenv = feGetEnv("TR_DisableTMOffer");
12235
bool disableTMOffer = (disableTMOfferenv != NULL);
12236
12237
classBlock1 = fej9->getClassFromSignature("Ljava/util/concurrent/ConcurrentLinkedQueue$Node;", 49, comp->getCurrentMethod(), true);
12238
classBlock2 = fej9->getClassFromSignature("Ljava/util/concurrent/ConcurrentLinkedQueue;", 44, comp->getCurrentMethod(), true);
12239
12240
12241
if (classBlock1 && classBlock2)
12242
{
12243
offsetNext = fej9->getObjectHeaderSizeInBytes() + fej9->getInstanceFieldOffset(classBlock1, "next", 4, "Ljava/util/concurrent/ConcurrentLinkedQueue$Node;", 49);
12244
offsetTail = fej9->getObjectHeaderSizeInBytes() + fej9->getInstanceFieldOffset(classBlock2, "tail", 4, "Ljava/util/concurrent/ConcurrentLinkedQueue$Node;", 49);
12245
}
12246
else
12247
disableTMOffer = true;
12248
12249
cursor = generateRIInstruction(cg, TR::InstOpCode::LHI, node, rReturn, 1);
12250
12251
static char * debugTM= feGetEnv("TR_DebugTM");
12252
12253
if (debugTM)
12254
{
12255
if (disableTMOffer)
12256
{
12257
printf ("\nTM: disabling TM CLQ.Offer in %s (%s)", comp->signature(), comp->getHotnessName(comp->getMethodHotness()));
12258
fflush(stdout);
12259
}
12260
else
12261
{
12262
printf ("\nTM: use TM CLQ.Offer in %s (%s)", comp->signature(), comp->getHotnessName(comp->getMethodHotness()));
12263
fflush(stdout);
12264
}
12265
}
12266
12267
static char * useNonConstrainedTM = feGetEnv("TR_UseNonConstrainedTM");
12268
static char * disableNIAI = feGetEnv("TR_DisableNIAI");
12269
12270
// the Transaction Diagnostic Block (TDB) is a memory location for the OS to write state info in the event of an abort
12271
TR::MemoryReference* TDBmemRef = generateS390MemoryReference(cg->getMethodMetaDataRealRegister(), fej9->thisThreadGetTDBOffset(), cg);
12272
12273
if (!disableTMOffer)
12274
{
12275
if (useNonConstrainedTM)
12276
{
12277
// immediate field described in TR::TreeEvaluator::tstartEvaluator
12278
cursor = generateSILInstruction(cg, TR::InstOpCode::TBEGIN, node, TDBmemRef, 0xFF02);
12279
12280
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK7, node, failLabel);
12281
}
12282
else
12283
{
12284
// No TDB for constrained transactions. Immediate field reflects TBEGINC can't filter interrupts
12285
cursor = generateSILInstruction(cg, TR::InstOpCode::TBEGINC, node, generateS390MemoryReference(0, cg), 0xFF00);
12286
}
12287
12288
if (!disableNIAI)
12289
cursor = generateS390IEInstruction(cg, TR::InstOpCode::NIAI, 1, 0, node);
12290
12291
if (usesCompressedrefs)
12292
{
12293
cursor = generateRXInstruction(cg, TR::InstOpCode::LLGF, node, rP, generateS390MemoryReference(rThis, offsetTail, cg));
12294
12295
if (shiftAmount != 0)
12296
{
12297
cursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, rP, rP, shiftAmount);
12298
}
12299
}
12300
else
12301
{
12302
cursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, rP, generateS390MemoryReference(rThis, offsetTail, cg));
12303
}
12304
12305
if (!disableNIAI)
12306
cursor = generateS390IEInstruction(cg, TR::InstOpCode::NIAI, 1, 0, node);
12307
12308
if (usesCompressedrefs)
12309
{
12310
cursor = generateRXInstruction(cg, TR::InstOpCode::LT, node, rQ, generateS390MemoryReference(rP, offsetNext, cg));
12311
cursor = generateRRInstruction(cg, TR::InstOpCode::LLGFR, node, rQ, rQ);
12312
12313
if (shiftAmount != 0)
12314
{
12315
cursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, rQ, rQ, shiftAmount);
12316
}
12317
}
12318
else
12319
{
12320
cursor = generateRXInstruction(cg, TR::InstOpCode::getLoadTestOpCode(), node, rQ, generateS390MemoryReference(rP, offsetNext, cg));
12321
}
12322
12323
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, insertLabel);
12324
cursor = generateRRInstruction(cg, TR::InstOpCode::getLoadRegOpCode(), node, rP, rQ);
12325
12326
if (!disableNIAI)
12327
cursor = generateS390IEInstruction(cg, TR::InstOpCode::NIAI, 1, 0, node);
12328
12329
if (usesCompressedrefs)
12330
{
12331
cursor = generateRXInstruction(cg, TR::InstOpCode::LT, node, rQ, generateS390MemoryReference(rP, offsetNext, cg));
12332
cursor = generateRRInstruction(cg, TR::InstOpCode::LLGFR, node, rQ, rQ);
12333
if (shiftAmount != 0)
12334
{
12335
cursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, rQ, rQ, shiftAmount);
12336
}
12337
}
12338
else
12339
{
12340
cursor = generateRXInstruction(cg, TR::InstOpCode::getLoadTestOpCode(), node, rQ, generateS390MemoryReference(rP, offsetNext, cg));
12341
}
12342
12343
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, insertLabel);
12344
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, doneLabel);
12345
12346
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, insertLabel);
12347
12348
if (usesCompressedrefs)
12349
{
12350
if (shiftAmount != 0)
12351
{
12352
cursor = generateRSInstruction(cg, TR::InstOpCode::SRLG, node, rQ, rN, shiftAmount);
12353
cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, rQ, generateS390MemoryReference(rP, offsetNext, cg));
12354
cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, rQ, generateS390MemoryReference(rThis, offsetTail, cg));
12355
}
12356
else
12357
{
12358
cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, rN, generateS390MemoryReference(rP, offsetNext, cg));
12359
cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, rN, generateS390MemoryReference(rThis, offsetTail, cg));
12360
}
12361
}
12362
else
12363
{
12364
cursor = generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, rN, generateS390MemoryReference(rP, offsetNext, cg));
12365
cursor = generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, rN, generateS390MemoryReference(rThis, offsetTail, cg));
12366
}
12367
12368
cursor = generateRRInstruction(cg, TR::InstOpCode::XR, node, rReturn, rReturn);
12369
12370
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, deps);
12371
12372
cursor = generateSInstruction(cg, TR::InstOpCode::TEND, node, generateS390MemoryReference(cg->machine()->getRealRegister(TR::RealRegister::GPR0),0,cg));
12373
}
12374
12375
if (useNonConstrainedTM || disableTMOffer)
12376
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, failLabel, deps);
12377
12378
genWrtBarForTM(node, cg, rP, rN, rReturn, true);
12379
genWrtBarForTM(node, cg, rThis, rN, rReturn, true);
12380
12381
cg->decReferenceCount(node->getFirstChild());
12382
cg->decReferenceCount(node->getSecondChild());
12383
cg->stopUsingRegister(rP);
12384
cg->stopUsingRegister(rQ);
12385
12386
node->setRegister(rReturn);
12387
return rReturn;
12388
}
12389
12390
TR::Register*
12391
J9::Z::TreeEvaluator::inlineConcurrentLinkedQueueTMPoll(TR::Node *node, TR::CodeGenerator *cg)
12392
{
12393
int32_t offsetHead = 0;
12394
int32_t offsetNext = 0;
12395
int32_t offsetItem = 0;
12396
TR_OpaqueClassBlock * classBlock1 = NULL;
12397
TR_OpaqueClassBlock * classBlock2 = NULL;
12398
12399
TR::Register * rE = cg->allocateCollectedReferenceRegister();
12400
TR::Register * rP = cg->allocateCollectedReferenceRegister();
12401
TR::Register * rQ = cg->allocateCollectedReferenceRegister();
12402
TR::Register * rThis = cg->evaluate(node->getFirstChild());
12403
TR::Register * rTmp = NULL;
12404
TR::Instruction * cursor = NULL;
12405
TR::LabelSymbol * doneLabel = generateLabelSymbol(cg);
12406
TR::LabelSymbol * failLabel = generateLabelSymbol(cg);
12407
12408
TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 5, cg);
12409
deps->addPostCondition(rE, TR::RealRegister::AssignAny);
12410
deps->addPostCondition(rP, TR::RealRegister::AssignAny);
12411
deps->addPostCondition(rQ, TR::RealRegister::AssignAny);
12412
deps->addPostCondition(rThis, TR::RealRegister::AssignAny);
12413
12414
TR::Compilation *comp = cg->comp();
12415
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
12416
12417
bool usesCompressedrefs = comp->useCompressedPointers();
12418
int32_t shiftAmount = TR::Compiler->om.compressedReferenceShift();
12419
12420
if (usesCompressedrefs && shiftAmount !=0)
12421
{
12422
rTmp = cg->allocateRegister();
12423
deps->addPostCondition(rTmp, TR::RealRegister::AssignAny);
12424
}
12425
12426
static char * disableTMPollenv = feGetEnv("TR_DisableTMPoll");
12427
bool disableTMPoll = disableTMPollenv;
12428
12429
classBlock1 = fej9->getClassFromSignature("Ljava/util/concurrent/ConcurrentLinkedQueue;", 44, comp->getCurrentMethod(), true);
12430
classBlock2 = fej9->getClassFromSignature("Ljava/util/concurrent/ConcurrentLinkedQueue$Node;", 49, comp->getCurrentMethod(), true);
12431
12432
if (classBlock1 && classBlock2)
12433
{
12434
offsetHead = fej9->getObjectHeaderSizeInBytes() + fej9->getInstanceFieldOffset(classBlock1, "head", 4, "Ljava/util/concurrent/ConcurrentLinkedQueue$Node;", 49);
12435
offsetNext = fej9->getObjectHeaderSizeInBytes() + fej9->getInstanceFieldOffset(classBlock2, "next", 4, "Ljava/util/concurrent/ConcurrentLinkedQueue$Node;", 49);
12436
offsetItem = fej9->getObjectHeaderSizeInBytes() + fej9->getInstanceFieldOffset(classBlock2, "item", 4, "Ljava/lang/Object;", 18);
12437
}
12438
else
12439
disableTMPoll = true;
12440
12441
cursor = generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, rE, rE);
12442
12443
static char * debugTM= feGetEnv("TR_DebugTM");
12444
12445
if (debugTM)
12446
{
12447
if (disableTMPoll)
12448
{
12449
printf ("\nTM: disabling TM CLQ.Poll in %s (%s)", comp->signature(), comp->getHotnessName(comp->getMethodHotness()));
12450
fflush(stdout);
12451
}
12452
else
12453
{
12454
printf ("\nTM: use TM CLQ.Poll in %s (%s)", comp->signature(), comp->getHotnessName(comp->getMethodHotness()));
12455
fflush(stdout);
12456
}
12457
}
12458
12459
static char * useNonConstrainedTM = feGetEnv("TR_UseNonConstrainedTM");
12460
static char * disableNIAI = feGetEnv("TR_DisableNIAI");
12461
12462
// the Transaction Diagnostic Block (TDB) is a memory location for the OS to write state info in the event of an abort
12463
TR::MemoryReference* TDBmemRef = generateS390MemoryReference(cg->getMethodMetaDataRealRegister(), fej9->thisThreadGetTDBOffset(), cg);
12464
12465
if (!disableTMPoll)
12466
{
12467
if (useNonConstrainedTM)
12468
{
12469
// immediate field described in TR::TreeEvaluator::tstartEvaluator
12470
cursor = generateSILInstruction(cg, TR::InstOpCode::TBEGIN, node, TDBmemRef, 0xFF02);
12471
12472
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK7, node, failLabel);
12473
}
12474
else
12475
{
12476
// No TDB for constrained transactions. Immediate field reflects TBEGINC can't filter interrupts
12477
cursor = generateSILInstruction(cg, TR::InstOpCode::TBEGINC, node, generateS390MemoryReference(0, cg), 0xFF00);
12478
}
12479
12480
if (!disableNIAI)
12481
cursor = generateS390IEInstruction(cg, TR::InstOpCode::NIAI, 1, 0, node);
12482
12483
if (usesCompressedrefs)
12484
{
12485
cursor = generateRXInstruction(cg, TR::InstOpCode::LLGF, node, rP, generateS390MemoryReference(rThis, offsetHead, cg));
12486
12487
if (shiftAmount != 0)
12488
{
12489
cursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, rP, rP, shiftAmount);
12490
}
12491
}
12492
else
12493
{
12494
cursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, rP, generateS390MemoryReference(rThis, offsetHead, cg));
12495
}
12496
12497
if (!disableNIAI)
12498
cursor = generateS390IEInstruction(cg, TR::InstOpCode::NIAI, 1, 0, node);
12499
12500
if (usesCompressedrefs)
12501
{
12502
cursor = generateRXInstruction(cg, TR::InstOpCode::LLGF, node, rE, generateS390MemoryReference(rP, offsetItem, cg));
12503
12504
if (shiftAmount != 0)
12505
{
12506
cursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, rE, rE, shiftAmount);
12507
}
12508
}
12509
else
12510
{
12511
cursor = generateRXInstruction(cg, TR::InstOpCode::getLoadOpCode(), node, rE, generateS390MemoryReference(rP, offsetItem, cg));
12512
}
12513
12514
if (!disableNIAI)
12515
cursor = generateS390IEInstruction(cg, TR::InstOpCode::NIAI, 1, 0, node);
12516
12517
if (usesCompressedrefs)
12518
{
12519
cursor = generateRXInstruction(cg, TR::InstOpCode::LT, node, rQ, generateS390MemoryReference(rP, offsetNext, cg));
12520
cursor = generateSILInstruction(cg, TR::InstOpCode::MVHI, node, generateS390MemoryReference(rP, offsetItem, cg), 0);
12521
}
12522
else
12523
{
12524
cursor = generateRXInstruction(cg, TR::InstOpCode::getLoadTestOpCode(), node, rQ, generateS390MemoryReference(rP, offsetNext, cg));
12525
cursor = generateSILInstruction(cg, TR::InstOpCode::getMoveHalfWordImmOpCode(), node, generateS390MemoryReference(rP, offsetItem, cg), 0);
12526
}
12527
12528
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, doneLabel);
12529
12530
if (usesCompressedrefs)
12531
{
12532
cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, rQ, generateS390MemoryReference(rThis, offsetHead, cg));
12533
if (shiftAmount != 0)
12534
{
12535
cursor = generateRSInstruction(cg, TR::InstOpCode::SRLG, node, rTmp, rP, shiftAmount);
12536
cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, rTmp, generateS390MemoryReference(rP, offsetNext, cg));
12537
}
12538
else
12539
{
12540
cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, rP, generateS390MemoryReference(rP, offsetNext, cg));
12541
}
12542
}
12543
else
12544
{
12545
cursor = generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, rQ, generateS390MemoryReference(rThis, offsetHead, cg));
12546
cursor = generateRXInstruction(cg, TR::InstOpCode::getStoreOpCode(), node, rP, generateS390MemoryReference(rP, offsetNext, cg));
12547
}
12548
12549
if (useNonConstrainedTM)
12550
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabel);
12551
else
12552
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabel, deps);
12553
12554
cursor = generateSInstruction(cg, TR::InstOpCode::TEND, node, generateS390MemoryReference(cg->machine()->getRealRegister(TR::RealRegister::GPR0),0,cg));
12555
}
12556
12557
if (useNonConstrainedTM || disableTMPoll)
12558
cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, failLabel, deps);
12559
12560
if (usesCompressedrefs)
12561
{
12562
generateRRInstruction(cg, TR::InstOpCode::LLGFR, node, rQ, rQ);
12563
12564
if (shiftAmount != 0)
12565
{
12566
cursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, rQ, rQ, shiftAmount);
12567
}
12568
}
12569
12570
genWrtBarForTM(node, cg, rThis, rQ, rQ, false);
12571
// we don't need wrtbar for P, it is dead (or has NULL)
12572
12573
cg->decReferenceCount(node->getFirstChild());
12574
cg->stopUsingRegister(rP);
12575
cg->stopUsingRegister(rQ);
12576
12577
if (usesCompressedrefs && shiftAmount != 0)
12578
{
12579
cg->stopUsingRegister(rTmp);
12580
}
12581
12582
node->setRegister(rE);
12583
12584
return rE;
12585
}
12586
12587
void
12588
VMgenerateCatchBlockBBStartPrologue(
12589
TR::Node *node,
12590
TR::Instruction *fenceInstruction,
12591
TR::CodeGenerator *cg)
12592
{
12593
TR::Compilation *comp = cg->comp();
12594
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
12595
12596
TR::Block *block = node->getBlock();
12597
12598
// Encourage recompilation
12599
if (fej9->shouldPerformEDO(block, comp))
12600
{
12601
TR::Register * biAddrReg = cg->allocateRegister();
12602
12603
// Load address of counter into biAddrReg
12604
genLoadAddressConstant(cg, node, (uintptr_t) comp->getRecompilationInfo()->getCounterAddress(), biAddrReg);
12605
12606
// Counter is 32-bit, so only use 32-bit opcodes
12607
TR::MemoryReference * recompMR = generateS390MemoryReference(biAddrReg, 0, cg);
12608
generateSIInstruction(cg, TR::InstOpCode::ASI, node, recompMR, -1);
12609
recompMR->stopUsingMemRefRegister(cg);
12610
12611
// Check counter and induce recompilation if counter = 0
12612
TR::LabelSymbol * cFlowRegionStart = generateLabelSymbol(cg);
12613
TR::LabelSymbol * snippetLabel = generateLabelSymbol(cg);
12614
TR::LabelSymbol * restartLabel = generateLabelSymbol(cg);
12615
12616
snippetLabel->setEndInternalControlFlow();
12617
12618
TR::Register * tempReg1 = cg->allocateRegister();
12619
TR::Register * tempReg2 = cg->allocateRegister();
12620
12621
TR::RegisterDependencyConditions * dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 2, cg);
12622
dependencies->addPostCondition(tempReg1, cg->getEntryPointRegister());
12623
dependencies->addPostCondition(tempReg2, cg->getReturnAddressRegister());
12624
// Branch to induceRecompilation helper routine if counter is 0 - based on condition code of the preceding adds.
12625
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
12626
cFlowRegionStart->setStartInternalControlFlow();
12627
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, snippetLabel);
12628
12629
TR::Snippet * snippet = new (cg->trHeapMemory()) TR::S390ForceRecompilationSnippet(cg, node, restartLabel, snippetLabel);
12630
cg->addSnippet(snippet);
12631
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, restartLabel, dependencies);
12632
12633
cg->stopUsingRegister(tempReg1);
12634
cg->stopUsingRegister(tempReg2);
12635
12636
cg->stopUsingRegister(biAddrReg);
12637
}
12638
}
12639
12640
float
12641
J9::Z::TreeEvaluator::interpreterProfilingInstanceOfOrCheckCastTopProb(TR::CodeGenerator * cg, TR::Node * node)
12642
{
12643
TR::Compilation *comp = cg->comp();
12644
TR_ByteCodeInfo bcInfo = node->getByteCodeInfo();
12645
TR_ValueProfileInfoManager * valueProfileInfo = TR_ValueProfileInfoManager::get(comp);
12646
12647
if (!valueProfileInfo)
12648
return 0;
12649
12650
TR_AddressInfo *valueInfo = static_cast<TR_AddressInfo*>(valueProfileInfo->getValueInfo(bcInfo, comp, AddressInfo, TR_ValueProfileInfoManager::justInterpreterProfileInfo));
12651
if (!valueInfo || valueInfo->getNumProfiledValues()==0)
12652
{
12653
return 0;
12654
}
12655
12656
TR_OpaqueClassBlock *topValue = (TR_OpaqueClassBlock *) valueInfo->getTopValue();
12657
if (!topValue)
12658
{
12659
return 0;
12660
}
12661
12662
if (valueInfo->getTopProbability() < TR::Options::getMinProfiledCheckcastFrequency())
12663
return 0;
12664
12665
if (comp->getPersistentInfo()->isObsoleteClass(topValue, cg->fe()))
12666
{
12667
return 0;
12668
}
12669
12670
return valueInfo->getTopProbability();
12671
}
12672
12673
/**
12674
* countDigitsEvaluator - count the number of decimal digits of an integer/long binary
12675
* value (excluding the negative sign). The original counting digits Java loop is
12676
* reduced to this IL node by idiom recognition.
12677
*/
12678
TR::Register *
12679
J9::Z::TreeEvaluator::countDigitsEvaluator(TR::Node * node, TR::CodeGenerator * cg)
12680
{
12681
// Idiom recognition will reduce the appropriate loop into the following
12682
// form:
12683
// TR::countDigits
12684
// inputValue // either int or long
12685
// digits10LookupTable
12686
//
12687
// Original loop:
12688
// do { count ++; } while((l /= 10) != 0);
12689
//
12690
// Since the maximum number of decimal digits for an int is 10, and a long is 19,
12691
// we can perform binary search comparing the input value with pre-computed digits.
12692
12693
12694
TR::Node * inputNode = node->getChild(0);
12695
TR::Register * inputReg = cg->gprClobberEvaluate(inputNode);
12696
TR::Register * workReg = cg->evaluate(node->getChild(1));
12697
TR::Register * countReg = cg->allocateRegister();
12698
12699
TR_ASSERT( inputNode->getDataType() == TR::Int64 || inputNode->getDataType() == TR::Int32, "child of TR::countDigits must be of integer type");
12700
12701
bool isLong = (inputNode->getDataType() == TR::Int64);
12702
TR_ASSERT( !isLong || cg->comp()->target().is64Bit(), "CountDigitEvaluator requires 64-bit support for longs");
12703
12704
TR::RegisterDependencyConditions * dependencies;
12705
dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 3, cg);
12706
dependencies->addPostCondition(inputReg, TR::RealRegister::AssignAny);
12707
dependencies->addPostCondition(workReg, TR::RealRegister::AssignAny);
12708
dependencies->addPostCondition(countReg, TR::RealRegister::AssignAny);
12709
12710
TR::MemoryReference * work[18];
12711
TR::LabelSymbol * label[18];
12712
TR::LabelSymbol * cFlowRegionEnd = generateLabelSymbol(cg);
12713
12714
// Get the negative input value (2's complement) - We treat all numbers as
12715
// negative to simplify the absolute comparison, and take advance of the
12716
// CC trick in countsDigitHelper.
12717
12718
// If the input is a 32-bit value on 64-bit architecture, we cannot simply use TR::InstOpCode::LNGR because the input may not be sign-extended.
12719
// If you want to use TR::InstOpCode::LNGR for a 32-bit value on 64-bit architecture, you'll need to additionally generate TR::InstOpCode::LGFR for the input.
12720
generateRRInstruction(cg, !isLong ? TR::InstOpCode::LNR : TR::InstOpCode::LNGR, node, inputReg, inputReg);
12721
12722
TR::LabelSymbol * cFloWRegionStart = generateLabelSymbol(cg);
12723
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFloWRegionStart);
12724
cFloWRegionStart->setStartInternalControlFlow();
12725
12726
if (isLong)
12727
{
12728
for (int32_t i = 0; i < 18; i++)
12729
{
12730
work[i] = generateS390MemoryReference(workReg, i*8, cg);
12731
label[i] = generateLabelSymbol(cg);
12732
}
12733
12734
generateRXInstruction(cg, TR::InstOpCode::CG, node, inputReg, work[7]);
12735
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, node, label[11]);
12736
12737
// LABEL 3
12738
generateRXInstruction(cg, TR::InstOpCode::CG, node, inputReg, work[3]);
12739
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, node, label[5]);
12740
12741
// LABEL 1
12742
generateRXInstruction(cg, TR::InstOpCode::CG, node, inputReg, work[1]);
12743
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, node, label[2]);
12744
12745
countDigitsHelper(node, cg, 0, work[0], inputReg, countReg, cFlowRegionEnd, isLong); // 0 and 1
12746
12747
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, label[2]); // LABEL 2
12748
countDigitsHelper(node, cg, 2, work[2], inputReg, countReg, cFlowRegionEnd, isLong); // 2 and 3
12749
12750
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, label[5]); // LABEL 5
12751
12752
generateRXInstruction(cg, TR::InstOpCode::CG, node, inputReg, work[5]);
12753
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, node, label[6]);
12754
12755
countDigitsHelper(node, cg, 4, work[4], inputReg, countReg, cFlowRegionEnd, isLong); // 4 and 5
12756
12757
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, label[6]); // LABEL 6
12758
countDigitsHelper(node, cg, 6, work[6], inputReg, countReg, cFlowRegionEnd, isLong); // 6 and 7
12759
12760
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, label[11]); // LABEL 11
12761
12762
generateRXInstruction(cg, TR::InstOpCode::CG, node, inputReg, work[11]);
12763
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, node, label[14]);
12764
12765
// LABEL 9
12766
generateRXInstruction(cg, TR::InstOpCode::CG, node, inputReg, work[9]);
12767
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, node, label[10]);
12768
12769
countDigitsHelper(node, cg, 8, work[8], inputReg, countReg, cFlowRegionEnd, isLong); // 8 and 9
12770
12771
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, label[10]); // LABEL 10
12772
countDigitsHelper(node, cg, 10, work[10], inputReg, countReg, cFlowRegionEnd, isLong); // 10 and 11
12773
12774
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, label[14]); // LABEL 14
12775
12776
generateRXInstruction(cg, TR::InstOpCode::CG, node, inputReg, work[14]);
12777
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, node, label[16]);
12778
12779
// LABEL 12
12780
generateRXInstruction(cg, TR::InstOpCode::CG, node, inputReg, work[12]); // 12
12781
generateRIInstruction(cg, TR::InstOpCode::getLoadHalfWordImmOpCode(), node, countReg, 12+1);
12782
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BH, node, cFlowRegionEnd);
12783
12784
// LABEL 13
12785
countDigitsHelper(node, cg, 13, work[13], inputReg, countReg, cFlowRegionEnd, isLong); // 13 and 14
12786
12787
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, label[16]); // LABEL 16
12788
12789
generateRXInstruction(cg, TR::InstOpCode::CG, node, inputReg, work[16]);
12790
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, node, label[17]);
12791
// LABEL 15
12792
countDigitsHelper(node, cg, 15, work[15], inputReg, countReg, cFlowRegionEnd, isLong); // 15 and 16
12793
12794
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, label[17]); // LABEL 17
12795
countDigitsHelper(node, cg, 17, work[17], inputReg, countReg, cFlowRegionEnd, isLong); // 17 and 18
12796
12797
for (int32_t i = 0; i < 18; i++)
12798
{
12799
work[i]->stopUsingMemRefRegister(cg);
12800
}
12801
}
12802
else
12803
{
12804
for (int32_t i = 0; i < 9; i++)
12805
{
12806
work[i] = generateS390MemoryReference(workReg, i*8+4, cg); // lower 32-bit
12807
label[i] = generateLabelSymbol(cg);
12808
}
12809
12810
// We already generate the label instruction, why would we generate it again?
12811
//generateS390LabelInstruction(cg, TR::InstOpCode::label, node, startLabel);
12812
12813
generateRXInstruction(cg, TR::InstOpCode::C, node, inputReg, work[3]);
12814
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, node, label[5]);
12815
12816
// LABEL 1
12817
generateRXInstruction(cg, TR::InstOpCode::C, node, inputReg, work[1]);
12818
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, node, label[2]);
12819
12820
countDigitsHelper(node, cg, 0, work[0], inputReg, countReg, cFlowRegionEnd, isLong); // 0 and 1
12821
12822
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, label[2]); // LABEL 2
12823
countDigitsHelper(node, cg, 2, work[2], inputReg, countReg, cFlowRegionEnd, isLong); // 2 and 3
12824
12825
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, label[5]); // LABEL 5
12826
12827
generateRXInstruction(cg, TR::InstOpCode::C, node, inputReg, work[5]);
12828
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, node, label[7]);
12829
12830
countDigitsHelper(node, cg, 4, work[4], inputReg, countReg, cFlowRegionEnd, isLong); // 4 and 5
12831
12832
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, label[7]); // LABEL 7
12833
12834
generateRXInstruction(cg, TR::InstOpCode::C, node, inputReg, work[7]);
12835
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BNH, node, label[8]);
12836
12837
countDigitsHelper(node, cg, 6, work[6], inputReg, countReg, cFlowRegionEnd, isLong); // 6 and 7
12838
12839
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, label[8]); // LABEL 8
12840
countDigitsHelper(node, cg, 8, work[8], inputReg, countReg, cFlowRegionEnd, isLong); // 8 and 9
12841
12842
12843
for (int32_t i = 0; i < 9; i++)
12844
{
12845
work[i]->stopUsingMemRefRegister(cg);
12846
}
12847
}
12848
12849
cg->stopUsingRegister(inputReg);
12850
cg->stopUsingRegister(workReg);
12851
12852
// End
12853
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
12854
cFlowRegionEnd->setEndInternalControlFlow();
12855
12856
node->setRegister(countReg);
12857
12858
cg->decReferenceCount(inputNode);
12859
cg->decReferenceCount(node->getChild(1));
12860
return countReg;
12861
}
12862
12863
/**
12864
* countDigitsHelper emits code to determine whether the given input value has
12865
* memRefIndex or memRefIndex+1 digits.
12866
*/
12867
void
12868
J9::Z::TreeEvaluator::countDigitsHelper(TR::Node * node, TR::CodeGenerator * cg,
12869
int32_t memRefIndex, TR::MemoryReference * memRef,
12870
TR::Register* inputReg, TR::Register* countReg,
12871
TR::LabelSymbol *doneLabel, bool isLong)
12872
{
12873
// Compare input value with the binary memRefIndex value. The instruction
12874
// sets CC1 if input <= [memRefIndex], which is also the borrow CC. Since
12875
// the numbers are all negative, the equivalent comparison is set if
12876
// inputValue > [memRefIndex].
12877
generateRXInstruction(cg, (isLong)?TR::InstOpCode::CG:TR::InstOpCode::C, node, inputReg, memRef); \
12878
12879
// Clear countRegister and set it to 1 if inputValue > [memRefIndex].
12880
generateRRInstruction(cg, TR::InstOpCode::getSubtractWithBorrowOpCode(), node, countReg, countReg);
12881
generateRRInstruction(cg, TR::InstOpCode::getLoadComplementOpCode(), node, countReg, countReg);
12882
12883
// Calculate final count of digits by adding to memRefIndex + 1. The +1 is
12884
// required as our memRefIndex starts with index 0, but digit counts starts with 1.
12885
generateRIInstruction(cg, TR::InstOpCode::getAddHalfWordImmOpCode(), node, countReg, memRefIndex+1);
12886
12887
// CountReg has the number of digits. Jump to done label.
12888
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, doneLabel);
12889
12890
}
12891
12892
12893
/**
12894
* tstartEvaluator: begin a transaction
12895
*/
12896
TR::Register *
12897
J9::Z::TreeEvaluator::tstartEvaluator(TR::Node * node, TR::CodeGenerator * cg)
12898
{
12899
// [0x00000000803797c8] ( 0) tstart
12900
// [0x0000000080379738] ( 1) branch --> block 28 BBStart at [0x0000000080378bc8]
12901
// [0x00000000803f15f8] ( 1) GlRegDeps
12902
// ( 3) ==>aRegLoad at [0x00000000803f1568] (in &GPR_0048)
12903
// ( 2) ==>aRegLoad at [0x00000000803f15b0] (in &GPR_0049)
12904
// [0x0000000080379780] ( 1) branch --> block 29 BBStart at [0x0000000080378ed8]
12905
// [0x00000000803f1640] ( 1) GlRegDeps
12906
// ( 3) ==>aRegLoad at [0x00000000803f1568] (in &GPR_0048)
12907
// ( 2) ==>aRegLoad at [0x00000000803f15b0] (in &GPR_0049)
12908
// [0x00000000803796f0] ( 1) aload #422[0x000000008035e4b0] Auto[<temp slot 2 holds monitoredObject syncMethod>] <flags:"0x4" (X!=0 )/>
12909
// [0x00000000803f1688] ( 1) GlRegDeps
12910
// ( 3) ==>aRegLoad at [0x00000000803f1568] (in &GPR_0048)
12911
12912
12913
// TBEGIN 0(R0),0xFF00
12914
// BRNEZ OOL TM ; CC0 = success
12915
// ------ OOL TM ----
12916
// BRH Block_Transient_Handler ; CC2 = transient failure
12917
// POST deps (persistent path)
12918
// BRC Block_Persistent_Handler ; CC1,CC3 = persistent failure
12919
// Post deps (transient path)
12920
// BRC mainline ; we need this brc for OOL mechanism, though it's never taken
12921
// -----------------------
12922
// LT Rlw, lockword (obj)
12923
// BEQ Label Start
12924
// TEND
12925
// BRC Block_Transient_Handler
12926
// Label Start
12927
// POST Deps
12928
12929
TR::Compilation *comp = cg->comp();
12930
TR_J9VMBase *fej9 = static_cast<TR_J9VMBase*>(cg->fe());
12931
TR::Instruction * cursor = NULL;
12932
12933
TR::Node * brPersistentNode = node->getFirstChild();
12934
TR::Node * brTransientNode = node->getSecondChild();
12935
TR::Node * fallThrough = node->getThirdChild();
12936
TR::Node * objNode = node->getChild(3);
12937
TR::Node * GRAChild = NULL;
12938
12939
TR::LabelSymbol * labelPersistentFailure = brPersistentNode->getBranchDestination()->getNode()->getLabel();
12940
TR::LabelSymbol * labelTransientFailure = brTransientNode->getBranchDestination()->getNode()->getLabel();
12941
TR::LabelSymbol * startLabel = fallThrough->getBranchDestination()->getNode()->getLabel();
12942
12943
TR::Register * objReg = cg->evaluate(objNode);
12944
TR::Register * monitorReg = cg->allocateRegister();
12945
12946
TR::RegisterDependencyConditions *deps = NULL;
12947
TR::RegisterDependencyConditions *depsPersistent = NULL;
12948
TR::RegisterDependencyConditions *depsTransient = NULL;
12949
12950
// GRA
12951
if (fallThrough->getNumChildren() !=0)
12952
{
12953
GRAChild = fallThrough->getFirstChild();
12954
cg->evaluate(GRAChild);
12955
deps = generateRegisterDependencyConditions(cg, GRAChild, 0);
12956
cg->decReferenceCount(GRAChild);
12957
}
12958
12959
if (brPersistentNode->getNumChildren() != 0)
12960
{
12961
GRAChild = brPersistentNode->getFirstChild();
12962
cg->evaluate(GRAChild);
12963
depsPersistent = generateRegisterDependencyConditions(cg, GRAChild, 0);
12964
cg->decReferenceCount(GRAChild);
12965
}
12966
12967
if (brTransientNode->getNumChildren() != 0)
12968
{
12969
GRAChild = brTransientNode->getFirstChild();
12970
cg->evaluate(GRAChild);
12971
depsTransient = generateRegisterDependencyConditions(cg, GRAChild, 0);
12972
cg->decReferenceCount(GRAChild);
12973
}
12974
12975
// the Transaction Diagnostic Block (TDB) is a memory location for the OS to write state info in the event of an abort
12976
TR::MemoryReference* TDBmemRef = generateS390MemoryReference(cg->getMethodMetaDataRealRegister(), fej9->thisThreadGetTDBOffset(), cg);
12977
12978
static char * debugTM = feGetEnv("debugTM");
12979
12980
if (debugTM)
12981
{
12982
// artificially set CC to transientFailure, objReg is always > 0
12983
cursor = generateRRInstruction(cg, comp->target().is64Bit() ? TR::InstOpCode::LTGR : TR::InstOpCode::LTR, node, objReg, objReg);
12984
}
12985
else
12986
{
12987
/// Immediate field of TBEGIN:
12988
/// bits 0-7: FF - General Register Save Mask used to tell the hardware which pairs of registers need to be rolled back.
12989
/// always set to FF here because GRA will later decide which registers we actually need to roll back.
12990
/// bits 8-11: 0 - not used by hardware, always zero.
12991
/// bit 12: 0 - Allow access register modification
12992
/// bit 13: 0 - Allow floating-point operation
12993
/// bits 14-15: 2 - Program-Interruption-Filtering Control
12994
/// PIFC bits needs to be set to 2, to allow 0C4 and 0C7 interrupts to resume, instead of being thrown.
12995
/// Since all interrupts cause aborts, the PSW is rolled back to TBEGIN on interrupts. The 0C7 interrupts
12996
/// are generated by trap instructions for Java exception handling. The 0C4 interrupts are used by z/OS LE to
12997
/// detect guarded page exceptions which are used to trigger XPLINK stack growth. In both cases, either the
12998
/// LE or JIT signal handler need the PSW of the actual instruction that generated the interrupt, not the
12999
/// rolled back PSW pointing to TBEGIN. Without filtering these interrupts, the program will crash. Filtering
13000
/// the interrupts allows us to resume execution following the abort and go to slow path so the exceptions
13001
/// can be properly caught and handled.
13002
13003
cursor = generateSILInstruction(cg, TR::InstOpCode::TBEGIN, node, TDBmemRef, 0xFF02);
13004
}
13005
13006
if (labelTransientFailure == labelPersistentFailure)
13007
{
13008
if (depsPersistent != depsTransient) //only possible to be equal if they are NULL (i.e. non existent)
13009
{
13010
TR_ASSERT( depsPersistent && depsTransient, "regdeps wrong in tstart evaluator");
13011
uint32_t i = depsPersistent->getNumPostConditions();
13012
uint32_t j = depsTransient->getNumPostConditions();
13013
TR_ASSERT( i == j, "regdep postcondition number not the same");
13014
depsPersistent->getPostConditions()->getRegisterDependency(i);
13015
i = depsPersistent->getNumPreConditions();
13016
j = depsTransient->getNumPreConditions();
13017
TR_ASSERT( i == j, "regdep precondition number not the same");
13018
}
13019
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK7, node, labelTransientFailure, depsPersistent);
13020
}
13021
else
13022
{
13023
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BH, node, labelTransientFailure, depsTransient);
13024
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK7, node, labelPersistentFailure, depsPersistent);
13025
}
13026
13027
13028
int32_t lwOffset = cg->fej9()->getByteOffsetToLockword((TR_OpaqueClassBlock *) cg->getMonClass(node));
13029
13030
if (comp->target().is64Bit() && cg->fej9()->generateCompressedLockWord())
13031
cursor = generateRXInstruction(cg, TR::InstOpCode::LT, node, monitorReg, generateS390MemoryReference(objReg, lwOffset, cg), cursor);
13032
else
13033
cursor = generateRXInstruction(cg, TR::InstOpCode::getLoadTestOpCode(), node, monitorReg, generateS390MemoryReference(objReg, lwOffset, cg),cursor);
13034
13035
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BE, node, startLabel, deps, cursor);
13036
13037
TR::MemoryReference * tempMR1 = generateS390MemoryReference(cg->machine()->getRealRegister(TR::RealRegister::GPR0),0,cg);
13038
13039
// use TEND + BRC instead of TABORT for better performance
13040
cursor = generateSInstruction(cg, TR::InstOpCode::TEND, node, tempMR1, cursor);
13041
13042
cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, labelTransientFailure, depsTransient, cursor);
13043
13044
cg->stopUsingRegister(monitorReg);
13045
cg->decReferenceCount(objNode);
13046
cg->decReferenceCount(brPersistentNode);
13047
cg->decReferenceCount(brTransientNode);
13048
cg->decReferenceCount(fallThrough);
13049
13050
return NULL;
13051
}
13052
13053
/**
13054
* tfinishEvaluator: end a transaction
13055
*/
13056
TR::Register *
13057
J9::Z::TreeEvaluator::tfinishEvaluator(TR::Node * node, TR::CodeGenerator * cg)
13058
{
13059
TR::MemoryReference * tempMR1 = generateS390MemoryReference(cg->machine()->getRealRegister(TR::RealRegister::GPR0),0,cg);
13060
TR::Instruction * cursor = generateSInstruction(cg, TR::InstOpCode::TEND, node, tempMR1);
13061
13062
return NULL;
13063
}
13064
13065
/**
13066
* tabortEvaluator: abort a transaction
13067
*/
13068
TR::Register *
13069
J9::Z::TreeEvaluator::tabortEvaluator(TR::Node * node, TR::CodeGenerator * cg)
13070
{
13071
TR::Instruction *cursor;
13072
TR::LabelSymbol * labelDone = generateLabelSymbol(cg);
13073
TR::Register *codeReg = cg->allocateRegister();
13074
generateRIInstruction(cg, cg->comp()->target().is64Bit() ? TR::InstOpCode::LGHI : TR::InstOpCode::LHI, node, codeReg, 0);
13075
//Get the nesting depth
13076
cursor = generateRREInstruction(cg, TR::InstOpCode::ETND, node, codeReg, codeReg);
13077
13078
generateRIInstruction(cg, TR::InstOpCode::CHI, node, codeReg, 0);
13079
//branch on zero to done label
13080
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_MASK8, node, labelDone);
13081
generateRIInstruction(cg, cg->comp()->target().is64Bit() ? TR::InstOpCode::LGHI : TR::InstOpCode::LHI, node, codeReg, 0x100);
13082
TR::MemoryReference *codeMR = generateS390MemoryReference(codeReg, 0, cg);
13083
cursor = generateSInstruction(cg, TR::InstOpCode::TABORT, node, codeMR);
13084
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, labelDone);
13085
cg->stopUsingRegister(codeReg);
13086
return NULL;
13087
}
13088
13089
/**
13090
* \details
13091
* Resolved and unresolved reference field load get two slightly different sequences.
13092
*
13093
* Resolved reference fields load sequence for -XnocompressedRefs:
13094
* \verbatim
13095
*
13096
* Label: startICF
13097
* LG R_obj, Ref_field_MemRef
13098
*
13099
* // range check with implicit CS cycle check
13100
* CLG R_obj, EvacuateBase(R_vmthread)
13101
* BRC COND_BL, doneLabel
13102
* CLG R_obj, EvacuateTop(R_vmthread)
13103
* BRC COND_BH, doneLabel
13104
*
13105
* LAY R_addr, Ref_field_MemRef
13106
* BRC helper_call_snippet
13107
*
13108
* Label: jitReadBarrier return label
13109
* // reload evacuated reference
13110
* LG R_obj, 0(R_addr)
13111
*
13112
* doneLabel: endICF
13113
* \endverbatim
13114
*
13115
*
13116
* Unresolved reference fields load sequence for -XnocompressedRefs:
13117
* \verbatim
13118
*
13119
* Label: startICF
13120
* LAY R_addr, Ref_field_MemRef
13121
* LG R_obj, 0(R_addr)
13122
*
13123
* // range check with implicit CS cycle check
13124
* CLG R_obj, EvacuateBase(R_vmthread)
13125
* BRC COND_BL, doneLabel
13126
* CLG R_obj, EvacuateTop(R_vmthread)
13127
* BRC COND_BH, doneLabel
13128
*
13129
* BRC helper_call_snippet
13130
*
13131
* Label: jitReadBarrier return label
13132
* // reload evacuated reference
13133
* LG R_obj, 0(R_addr)
13134
*
13135
* doneLabel: endICF
13136
* \endverbatim
13137
*
13138
* If compressed pointer is enabled, the LG instructions above are replaced by LLGF+SLLG.
13139
*/
13140
TR::Register *
13141
J9::Z::TreeEvaluator::generateSoftwareReadBarrier(TR::Node* node,
13142
TR::CodeGenerator* cg,
13143
TR::Register* resultReg,
13144
TR::MemoryReference* loadMemRef,
13145
TR::RegisterDependencyConditions* deps,
13146
bool produceUnshiftedValue)
13147
{
13148
TR::Compilation* comp = cg->comp();
13149
TR::Register* fieldAddrReg = cg->allocateRegister();
13150
TR::RealRegister* raReg = cg->machine()->getRealRegister(cg->getReturnAddressRegister());
13151
bool isCompressedRef = comp->useCompressedPointers();
13152
13153
if (!isCompressedRef)
13154
{
13155
TR::TreeEvaluator::checkAndSetMemRefDataSnippetRelocationType(node, cg, loadMemRef);
13156
}
13157
13158
const bool fieldUnresolved = node->getSymbolReference()->isUnresolved();
13159
if (comp->getOption(TR_TraceCG))
13160
{
13161
traceMsg(comp, "SoftwareReadBarrier: symbol is %s. Compr shift %d. RA reg: %s Entry reg %s\n",
13162
fieldUnresolved ? "unresolved" : "resolved",
13163
TR::Compiler->om.compressedReferenceShift(),
13164
raReg->getRegisterName(comp),
13165
cg->getEntryPointRealRegister()->getRegisterName(comp));
13166
}
13167
13168
bool notInsideICF = (deps == NULL);
13169
if (notInsideICF)
13170
{
13171
deps = generateRegisterDependencyConditions(0, 6, cg);
13172
TR::LabelSymbol* startICFLabel = generateLabelSymbol(cg);
13173
13174
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, startICFLabel);
13175
startICFLabel->setStartInternalControlFlow();
13176
}
13177
13178
TR::Register* dummyRegForRA = cg->allocateRegister();
13179
TR::Register* dummyRegForEntry = cg->allocateRegister();
13180
dummyRegForRA->setPlaceholderReg();
13181
dummyRegForEntry->setPlaceholderReg();
13182
13183
deps->addPostCondition(resultReg, TR::RealRegister::AssignAny);
13184
deps->addPostCondition(fieldAddrReg, comp->target().isLinux() ? TR::RealRegister::GPR3 : TR::RealRegister::GPR2);
13185
deps->addPostCondition(dummyRegForRA, cg->getReturnAddressRegister());
13186
deps->addPostCondition(dummyRegForEntry, cg->getEntryPointRegister());
13187
13188
cg->stopUsingRegister(dummyRegForRA);
13189
cg->stopUsingRegister(dummyRegForEntry);
13190
13191
int32_t shiftAmount = TR::Compiler->om.compressedReferenceShift();
13192
bool shouldShift = (shiftAmount != 0) && !produceUnshiftedValue;
13193
TR::InstOpCode::Mnemonic loadOpCode = isCompressedRef ? TR::InstOpCode::LLGF: TR::InstOpCode::LG;
13194
13195
if (fieldUnresolved)
13196
{
13197
generateRXInstruction(cg, TR::InstOpCode::LA, node, fieldAddrReg, loadMemRef);
13198
generateRXInstruction(cg, loadOpCode, node, resultReg, generateS390MemoryReference(fieldAddrReg, 0, cg));
13199
}
13200
else
13201
{
13202
generateRXInstruction(cg, loadOpCode, node, resultReg, loadMemRef);
13203
}
13204
13205
deps->addAssignAnyPostCondOnMemRef(loadMemRef);
13206
13207
TR::Register* vmReg = cg->getLinkage()->getMethodMetaDataRealRegister();
13208
13209
TR::MemoryReference* baseMemRef = generateS390MemoryReference(vmReg, TR::Compiler->vm.thisThreadGetEvacuateBaseAddressOffset(comp), cg);
13210
TR::MemoryReference* topMemRef = generateS390MemoryReference(vmReg, TR::Compiler->vm.thisThreadGetEvacuateTopAddressOffset(comp), cg);
13211
13212
// Range check with implicit software CS status check.
13213
TR::LabelSymbol* doneLabel = generateLabelSymbol(cg);
13214
generateRXInstruction(cg, comp->useCompressedPointers() ? TR::InstOpCode::CL : TR::InstOpCode::CLG, node, resultReg, baseMemRef);
13215
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BL, node, doneLabel);
13216
generateRXInstruction(cg, comp->useCompressedPointers() ? TR::InstOpCode::CL : TR::InstOpCode::CLG, node, resultReg, topMemRef);
13217
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BH, node, doneLabel);
13218
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "readBar/helperCall"), 1, TR::DebugCounter::Cheap);
13219
if (!fieldUnresolved)
13220
{
13221
generateRXInstruction(cg, TR::InstOpCode::LA, node, fieldAddrReg, generateS390MemoryReference(*loadMemRef, 0, cg));
13222
}
13223
13224
TR::LabelSymbol* callLabel = generateLabelSymbol(cg);
13225
TR::LabelSymbol* callEndLabel = generateLabelSymbol(cg);
13226
TR::Instruction *gcPoint = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, callLabel);
13227
gcPoint->setNeedsGCMap(0);
13228
auto readBarHelperSnippet = new (cg->trHeapMemory()) TR::S390HelperCallSnippet(cg, node, callLabel,
13229
cg->symRefTab()->findOrCreateRuntimeHelper(TR_softwareReadBarrier),
13230
callEndLabel);
13231
cg->addSnippet(readBarHelperSnippet);
13232
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, callEndLabel);
13233
13234
// Reload the object after helper call.
13235
generateRXInstruction(cg, loadOpCode, node, resultReg, generateS390MemoryReference(fieldAddrReg, 0, cg));
13236
TR::Instruction* cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, doneLabel);
13237
if (notInsideICF)
13238
{
13239
cursor->setDependencyConditions(deps);
13240
doneLabel->setEndInternalControlFlow();
13241
}
13242
13243
// produce decompressed value in the end
13244
if (shouldShift)
13245
{
13246
generateRSInstruction(cg, TR::InstOpCode::SLLG, node, resultReg, resultReg, shiftAmount);
13247
}
13248
13249
cg->generateDebugCounter(TR::DebugCounter::debugCounterName(comp, "readBar/total"), 1, TR::DebugCounter::Cheap);
13250
cg->stopUsingRegister(fieldAddrReg);
13251
13252
return resultReg;
13253
}
13254
13255
TR::Register *
13256
J9::Z::TreeEvaluator::arraycopyEvaluator(TR::Node * node, TR::CodeGenerator * cg)
13257
{
13258
if (node->isReferenceArrayCopy())
13259
{
13260
TR::TreeEvaluator::referenceArraycopyEvaluator(node, cg);
13261
}
13262
else
13263
{
13264
OMR::TreeEvaluatorConnector::arraycopyEvaluator(node, cg);
13265
}
13266
return NULL;
13267
}
13268
13269
TR::Register *
13270
J9::Z::TreeEvaluator::referenceArraycopyEvaluator(TR::Node * node, TR::CodeGenerator * cg)
13271
{
13272
TR::Node* byteSrcObjNode = node->getChild(0);
13273
TR::Node* byteDstObjNode = node->getChild(1);
13274
TR::Node* byteSrcNode = node->getChild(2);
13275
TR::Node* byteDstNode = node->getChild(3);
13276
TR::Node* byteLenNode = node->getChild(4);
13277
13278
TR::Register* byteSrcObjReg = cg->evaluate(byteSrcObjNode);
13279
TR::Register* byteDstObjReg = cg->evaluate(byteDstObjNode);
13280
13281
if (!node->chkNoArrayStoreCheckArrayCopy())
13282
{
13283
TR::Register* byteSrcReg = cg->evaluate(byteSrcNode);
13284
TR::Register* byteDstReg = cg->evaluate(byteDstNode);
13285
TR::Register* byteLenReg = cg->evaluate(byteLenNode);
13286
13287
genArrayCopyWithArrayStoreCHK(node, byteSrcObjReg, byteDstObjReg, byteSrcReg, byteDstReg, byteLenReg, cg);
13288
13289
cg->decReferenceCount(byteSrcNode);
13290
cg->decReferenceCount(byteDstNode);
13291
cg->decReferenceCount(byteLenNode);
13292
}
13293
else
13294
{
13295
TR_ASSERT_FATAL(node->getArrayCopyElementType() == TR::Address, "Reference arraycopy element type should be TR::Address but was '%s'", node->getArrayCopyElementType().toString());
13296
primitiveArraycopyEvaluator(node, cg, byteSrcNode, byteDstNode, byteLenNode);
13297
genWrtbarForArrayCopy(node, byteSrcObjReg, byteDstObjReg, byteSrcNode->isNonNull(), cg);
13298
}
13299
13300
cg->decReferenceCount(byteSrcObjNode);
13301
cg->decReferenceCount(byteDstObjNode);
13302
return NULL;
13303
}
13304
13305
void
13306
J9::Z::TreeEvaluator::forwardArrayCopySequenceGenerator(TR::Node *node, TR::CodeGenerator *cg,
13307
TR::Register *byteSrcReg, TR::Register *byteDstReg,
13308
TR::Register *byteLenReg, TR::Node *byteLenNode,
13309
TR_S390ScratchRegisterManager *srm, TR::LabelSymbol *mergeLabel)
13310
{
13311
bool mustGenerateOOLGuardedLoadPath = TR::Compiler->om.readBarrierType() != gc_modron_readbar_none &&
13312
node->getArrayCopyElementType() == TR::Address;
13313
if (mustGenerateOOLGuardedLoadPath)
13314
{
13315
// It might be possible that we have constant byte length load and it is forward array copy.
13316
// In this case if we need to do guarded Load then need to evaluate byteLenNode.
13317
if (byteLenReg == NULL)
13318
byteLenReg = cg->gprClobberEvaluate(byteLenNode);
13319
TR::TreeEvaluator::genGuardedLoadOOL(node, cg, byteSrcReg, byteDstReg, byteLenReg, mergeLabel, srm, true);
13320
}
13321
13322
OMR::TreeEvaluatorConnector::forwardArrayCopySequenceGenerator(node, cg, byteSrcReg, byteDstReg, byteLenReg, byteLenNode, srm, mergeLabel);
13323
}
13324
13325
TR::RegisterDependencyConditions *
13326
J9::Z::TreeEvaluator::backwardArrayCopySequenceGenerator(TR::Node *node, TR::CodeGenerator *cg,
13327
TR::Register *byteSrcReg, TR::Register *byteDstReg,
13328
TR::Register *byteLenReg, TR::Node *byteLenNode,
13329
TR_S390ScratchRegisterManager *srm, TR::LabelSymbol *mergeLabel)
13330
{
13331
bool mustGenerateOOLGuardedLoadPath = TR::Compiler->om.readBarrierType() != gc_modron_readbar_none &&
13332
node->getArrayCopyElementType() == TR::Address;
13333
if (mustGenerateOOLGuardedLoadPath)
13334
{
13335
TR::TreeEvaluator::genGuardedLoadOOL(node, cg, byteSrcReg, byteDstReg, byteLenReg, mergeLabel, srm, false);
13336
}
13337
13338
return OMR::TreeEvaluatorConnector::backwardArrayCopySequenceGenerator(node, cg, byteSrcReg, byteDstReg, byteLenReg, byteLenNode, srm, mergeLabel);
13339
}
13340
13341
void
13342
J9::Z::TreeEvaluator::generateLoadAndStoreForArrayCopy(TR::Node *node, TR::CodeGenerator *cg,
13343
TR::MemoryReference *srcMemRef, TR::MemoryReference *dstMemRef,
13344
TR_S390ScratchRegisterManager *srm,
13345
TR::DataType elenmentType, bool needsGuardedLoad,
13346
TR::RegisterDependencyConditions* deps)
13347
13348
{
13349
TR::Compilation *comp = cg->comp();
13350
13351
if ((node->getArrayCopyElementType() == TR::Address)
13352
&& needsGuardedLoad
13353
&& (!comp->target().cpu.supportsFeature(OMR_FEATURE_S390_GUARDED_STORAGE)))
13354
{
13355
TR::Register* resultReg = srm->findOrCreateScratchRegister();
13356
TR::TreeEvaluator::generateSoftwareReadBarrier(node, cg, resultReg, srcMemRef, deps, true);
13357
TR::InstOpCode::Mnemonic storeOp = TR::InstOpCode::ST;
13358
if (comp->target().is64Bit() && !comp->useCompressedPointers())
13359
{
13360
storeOp = TR::InstOpCode::STG;
13361
}
13362
13363
generateRXInstruction(cg, storeOp, node, resultReg, dstMemRef);
13364
srm->reclaimScratchRegister(resultReg);
13365
}
13366
else
13367
{
13368
OMR::TreeEvaluatorConnector::generateLoadAndStoreForArrayCopy(node, cg, srcMemRef, dstMemRef, srm, elenmentType, needsGuardedLoad, deps);
13369
}
13370
}
13371
13372
TR::Register*
13373
J9::Z::TreeEvaluator::inlineIntegerToCharsForLatin1Strings(TR::Node *node, TR::CodeGenerator *cg)
13374
{
13375
TR::Compilation *comp = cg->comp();
13376
13377
TR_ResolvedMethod *candidateToStringMethod = NULL;
13378
if (node->getInlinedSiteIndex() != -1)
13379
{
13380
candidateToStringMethod = comp->getInlinedResolvedMethod(node->getInlinedSiteIndex());
13381
}
13382
else
13383
{
13384
candidateToStringMethod = comp->getCurrentMethod();
13385
}
13386
// If method caller of Integer.stringSize or Long.stringSize is not Integer.toString(I) or Long.toString(J), then we don't inline
13387
if (candidateToStringMethod->getRecognizedMethod() != TR::java_lang_Long_toString &&
13388
candidateToStringMethod->getRecognizedMethod() != TR::java_lang_Integer_toString)
13389
{
13390
return NULL;
13391
}
13392
13393
if (comp->getOption(TR_TraceCG))
13394
{
13395
traceMsg(comp, "inlineIntegerToCharsForLatin1Strings (compressed strings)\n");
13396
}
13397
TR::Node *inputValueNode = node->getChild(0);
13398
TR::Node *stringSizeNode = node->getChild(1);
13399
TR::Node *byteArrayNode = node->getChild(2);
13400
13401
TR::Register *inputValueReg = cg->evaluate(inputValueNode);
13402
TR::Register *stringSizeReg = cg->gprClobberEvaluate(stringSizeNode, true);
13403
TR::Register *byteArrayReg = cg->gprClobberEvaluate(byteArrayNode, true);
13404
13405
bool inputIs64Bit = inputValueNode->getDataType() == TR::Int64;
13406
13407
TR::MemoryReference *destinationArrayMemRef = generateS390MemoryReference(byteArrayReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
13408
13409
TR::LabelSymbol *cFlowRegionStart = generateLabelSymbol(cg);
13410
cFlowRegionStart->setStartInternalControlFlow();
13411
TR::LabelSymbol *cFlowRegionEnd = generateLabelSymbol(cg);
13412
cFlowRegionEnd->setEndInternalControlFlow();
13413
13414
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
13415
13416
// If input is 0, then do the work in GPRs and exit.
13417
// TODO: Measure performance of [1,9] here vs vanilla Java code to see what's faster. If vanilla java, then we should bail from here accordingly.
13418
// (See https://github.ibm.com/runtimes/openj9/pull/385#discussion_r5004355 for discussion)
13419
TR::Register *numCharsRemainingReg = cg->allocateRegister(); // this is also the index of the position of the first char after we have populated the buffer
13420
TR::LabelSymbol *nonZeroInputLabel = generateLabelSymbol(cg);
13421
generateS390CompareAndBranchInstruction(cg, inputIs64Bit ? TR::InstOpCode::CG : TR::InstOpCode::C, node, inputValueReg, 0, TR::InstOpCode::COND_BNE, nonZeroInputLabel, false);
13422
generateSIInstruction(cg, TR::InstOpCode::MVI, node, generateS390MemoryReference(*destinationArrayMemRef, 0, cg), 48);
13423
generateRILInstruction(cg, TR::InstOpCode::IILF, node, numCharsRemainingReg, 0);
13424
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);
13425
13426
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, nonZeroInputLabel);
13427
13428
TR::LabelSymbol *handleDigitsLabel = generateLabelSymbol(cg);
13429
// First handle negative sign if needed. Then proceed to handleDigitsLabel to process the digits.
13430
generateS390CompareAndBranchInstruction(cg, inputIs64Bit ? TR::InstOpCode::CG : TR::InstOpCode::C, node, inputValueReg, 0, TR::InstOpCode::COND_BNL, handleDigitsLabel, false);
13431
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, stringSizeReg, 1);
13432
generateSIInstruction(cg, TR::InstOpCode::MVI, node, generateS390MemoryReference(*destinationArrayMemRef, 0, cg), 45);
13433
generateRILInstruction(cg, TR::InstOpCode::getAddImmOpCode(), node, byteArrayReg, 1);
13434
13435
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, handleDigitsLabel);
13436
TR::Register *intToPDReg = cg->allocateRegister(TR_VRF);
13437
// Load all digits into packed decimal format.
13438
generateVRIiInstruction(cg, inputIs64Bit ? TR::InstOpCode::VCVDG : TR::InstOpCode::VCVD, node, intToPDReg, inputValueReg, inputIs64Bit ? 19 : 10, 0x1);
13439
TR::Register *maskReg = cg->allocateRegister(TR_VRF);
13440
TR::Register *zonedDecimalReg1 = cg->allocateRegister(TR_VRF);
13441
TR::Register *zonedDecimalReg2 = NULL;
13442
13443
TR::RegisterDependencyConditions *dependencies = NULL;
13444
13445
if (inputIs64Bit)
13446
{
13447
// if the long input value is greater than 16 digits in length, then we need two vector registers to do the conversion. so jump to lengthgreaterthan16label
13448
// to handle that case.
13449
TR::LabelSymbol *lengthGreaterThan16Label = generateLabelSymbol(cg);
13450
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, stringSizeReg, 16, TR::InstOpCode::COND_BH, lengthGreaterThan16Label, false);
13451
// this instruction unpacks the packed decimal in inttppdreg to zoned decimal format. it will do this for the rightmost 16 digits.
13452
// it populates the higher 4 bits of each byte with the "zone" bits and the bottom 4 bits with each digit from the packed decimal sequence.
13453
generateVRRkInstruction(cg, TR::InstOpCode::VUPKZL, node, zonedDecimalReg1, intToPDReg, 0 /*m3*/);
13454
// now we zero out the zone bits because we don't need them.
13455
generateVRIbInstruction(cg, TR::InstOpCode::VGM, node, maskReg, 4, 7, 0);
13456
generateVRRcInstruction(cg, TR::InstOpCode::VN, node, zonedDecimalReg1, zonedDecimalReg1, maskReg, 0, 0, 0);
13457
// now the rightmost 10 bytes should hold the entire integer in packed decimal format. so let's add 48 to each byte to convert each digit to ascii.
13458
generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, maskReg, 48, 0);
13459
generateVRRcInstruction(cg, TR::InstOpCode::VA, node, zonedDecimalReg1, zonedDecimalReg1, maskReg, 0);
13460
// for the purposes of this evaluator, stringsizereg contains the length of the resulting string. ex if input is 2147483647, stringsizereg will be 10.
13461
// when storing using vstrl, the index register specifying the first byte to store is 0 based. meanwhile
13462
// stringsizereg is 1 based. so we must first subtract 1 from stringsizereg so the calculation is done correctly by the instruction.
13463
// ex. if we specify 10 in vstrl, the instruction will do 15-10=5 to figure out that it needs to store bytes 5 to 15 instead of 6 to 15.
13464
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, stringSizeReg, 1);
13465
// the memory reference should already be pointing to where the most significant digit is to be stored. so we just have to create the vstrl instruction now.
13466
generateVRSdInstruction(cg, TR::InstOpCode::VSTRLR, node, stringSizeReg, zonedDecimalReg1, generateS390MemoryReference(*destinationArrayMemRef, 0, cg));
13467
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);
13468
13469
// if we end up here, then there are more than 16 digits in the input value. this instruction sequence is similar to the one above, except that
13470
// we handle the 1 to 3 of the most significant digits in a separate register. we then store the value in this register before storing the remainder of the digits.
13471
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, lengthGreaterThan16Label);
13472
zonedDecimalReg2 = cg->allocateRegister(TR_VRF); // holds the most significant digits. can be anywhere from 1-3 digits.
13473
generateVRRkInstruction(cg, TR::InstOpCode::VUPKZL, node, zonedDecimalReg1, intToPDReg, 0 /*m3*/);
13474
generateVRRkInstruction(cg, TR::InstOpCode::VUPKZH, node, zonedDecimalReg2, intToPDReg, 0 /*m3*/);
13475
// now we zero out the zone bits because we don't need them.
13476
generateVRIbInstruction(cg, TR::InstOpCode::VGM, node, maskReg, 4, 7, 0);
13477
generateVRRcInstruction(cg, TR::InstOpCode::VN, node, zonedDecimalReg1, zonedDecimalReg1, maskReg, 0, 0, 0);
13478
generateVRRcInstruction(cg, TR::InstOpCode::VN, node, zonedDecimalReg2, zonedDecimalReg2, maskReg, 0, 0, 0);
13479
// now add 48 to each byte.
13480
generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, maskReg, 48, 0);
13481
generateVRRcInstruction(cg, TR::InstOpCode::VA, node, zonedDecimalReg1, zonedDecimalReg1, maskReg, 0);
13482
generateVRRcInstruction(cg, TR::InstOpCode::VA, node, zonedDecimalReg2, zonedDecimalReg2, maskReg, 0);
13483
// now calculate how many digits are in the top half of the zoned decimal (i.e. zoneddecimalreg2) --> (stringsizereg - 16) - 1 = stringsizereg - 17
13484
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, stringSizeReg, 17);
13485
// the memory reference should already be pointing to where the most significant digit is to be stored. so we just have to create the vstrl instruction now.
13486
generateVRSdInstruction(cg, TR::InstOpCode::VSTRLR, node, stringSizeReg, zonedDecimalReg2, generateS390MemoryReference(*destinationArrayMemRef, 0, cg));
13487
// increment bytearrayreg by stringsizereg+1 to move buffer pointer forward so we can write remaining bytes.
13488
generateRILInstruction(cg, TR::InstOpCode::AFI, node, stringSizeReg, 1);
13489
generateRRInstruction(cg, TR::InstOpCode::getAddRegWidenOpCode(), node, byteArrayReg, stringSizeReg);
13490
generateVSIInstruction(cg, TR::InstOpCode::VSTRL, node, zonedDecimalReg1, generateS390MemoryReference(*destinationArrayMemRef, 0, cg), 15);
13491
13492
dependencies = generateRegisterDependencyConditions(0, 8, cg);
13493
dependencies->addPostCondition(zonedDecimalReg2, TR::RealRegister::AssignAny);
13494
}
13495
else
13496
{
13497
// This instruction unpacks the packed decimal in intTpPDReg to zoned decimal format. It will do this for the rightmost 16 digits.
13498
// It populates the higher 4 bits of each byte with the "zone" bits and the bottom 4 bits with each digit from the packed decimal sequence.
13499
generateVRRkInstruction(cg, TR::InstOpCode::VUPKZL, node, zonedDecimalReg1, intToPDReg, 0 /*M3*/);
13500
// Now we zero out the zone bits because we don't need them.
13501
generateVRIbInstruction(cg, TR::InstOpCode::VGM, node, maskReg, 4, 7, 0);
13502
generateVRRcInstruction(cg, TR::InstOpCode::VN, node, zonedDecimalReg1, zonedDecimalReg1, maskReg, 0, 0, 0);
13503
// Now the rightmost 10 bytes should hold the entire integer value in packed decimal form. So let's add 48 to each byte to convert each digit to ASCII.
13504
generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, maskReg, 48, 0);
13505
generateVRRcInstruction(cg, TR::InstOpCode::VA, node, zonedDecimalReg1, zonedDecimalReg1, maskReg, 0);
13506
13507
// For the purposes of this evaluator, stringSizeReg contains the length of the resulting string. Ex if input is 2147483647, stringSizeReg will be 10.
13508
// When storing using VSTRL, the index register specifying the first byte to store is 0 based. Meanwhile
13509
// stringSizeReg is 1 based. So we must first subtract 1 from stringSizeReg so the calculation is done correctly by the instruction.
13510
// ex. if we specify 10 in VSTRL, the instruction will do 15-10=5 to figure out that it needs to store bytes 5 to 15 instead of 6 to 15.
13511
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, stringSizeReg, 1);
13512
// The memory reference should already be pointing to where the most significant digit is to be stored. So we just have to create the VSTRL instruction now.
13513
generateVRSdInstruction(cg, TR::InstOpCode::VSTRLR, node, stringSizeReg, zonedDecimalReg1, generateS390MemoryReference(*destinationArrayMemRef, 0, cg));
13514
13515
dependencies = generateRegisterDependencyConditions(0, 7, cg);
13516
}
13517
13518
dependencies->addPostCondition(inputValueReg, TR::RealRegister::AssignAny);
13519
dependencies->addPostCondition(byteArrayReg, TR::RealRegister::AssignAny);
13520
dependencies->addPostCondition(stringSizeReg, TR::RealRegister::AssignAny);
13521
dependencies->addPostCondition(numCharsRemainingReg, TR::RealRegister::AssignAny);
13522
dependencies->addPostCondition(intToPDReg, TR::RealRegister::AssignAny);
13523
dependencies->addPostCondition(maskReg, TR::RealRegister::AssignAny);
13524
dependencies->addPostCondition(zonedDecimalReg1, TR::RealRegister::AssignAny);
13525
13526
// For the purposes of inlining Integer.toString and Long.toString, the return value of the getChars API will always be 0. So we load it here manually.
13527
generateRILInstruction(cg, TR::InstOpCode::IILF, node, numCharsRemainingReg, 0);
13528
13529
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
13530
13531
cg->decReferenceCount(inputValueNode);
13532
cg->decReferenceCount(stringSizeNode);
13533
cg->decReferenceCount(byteArrayNode);
13534
13535
cg->stopUsingRegister(intToPDReg);
13536
cg->stopUsingRegister(maskReg);
13537
cg->stopUsingRegister(zonedDecimalReg1);
13538
13539
cg->stopUsingRegister(byteArrayReg);
13540
cg->stopUsingRegister(stringSizeReg);
13541
cg->stopUsingRegister(zonedDecimalReg2);
13542
13543
return node->setRegister(numCharsRemainingReg);
13544
}
13545
13546
TR::Register*
13547
J9::Z::TreeEvaluator::inlineIntegerToCharsForUTF16Strings(TR::Node *node, TR::CodeGenerator *cg)
13548
{
13549
TR::Compilation *comp = cg->comp();
13550
13551
TR_ResolvedMethod *candidateToStringMethod = NULL;
13552
if (node->getInlinedSiteIndex() != -1)
13553
{
13554
candidateToStringMethod = comp->getInlinedResolvedMethod(node->getInlinedSiteIndex());
13555
}
13556
else
13557
{
13558
candidateToStringMethod = comp->getCurrentMethod();
13559
}
13560
// If method caller of Integer.stringSize or Long.stringSize is not Integer.toString(I) or Long.toString(J), then we don't inline
13561
if (candidateToStringMethod->getRecognizedMethod() != TR::java_lang_Long_toString &&
13562
candidateToStringMethod->getRecognizedMethod() != TR::java_lang_Integer_toString)
13563
{
13564
return NULL;
13565
}
13566
13567
if (comp->getOption(TR_TraceCG))
13568
{
13569
traceMsg(comp, "inlineIntegerToCharsForUTF16Strings (decompressed strings)\n");
13570
}
13571
TR::Node *inputValueNode = node->getChild(0);
13572
TR::Node *stringSizeNode = node->getChild(1);
13573
TR::Node *byteArrayNode = node->getChild(2);
13574
13575
TR::Register *inputValueReg = cg->evaluate(inputValueNode);
13576
TR::Register *stringSizeReg = cg->gprClobberEvaluate(stringSizeNode, true);
13577
TR::Register *byteArrayReg = cg->gprClobberEvaluate(byteArrayNode, true);
13578
13579
bool inputIs64Bit = inputValueNode->getDataType() == TR::Int64;
13580
13581
TR::LabelSymbol *cFlowRegionStart = generateLabelSymbol(cg);
13582
cFlowRegionStart->setStartInternalControlFlow();
13583
TR::LabelSymbol *cFlowRegionEnd = generateLabelSymbol(cg);
13584
cFlowRegionEnd->setEndInternalControlFlow();
13585
13586
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
13587
13588
// If input is 0, just do the work in GPRs and exit.
13589
TR::Register *numCharsRemainingReg = cg->allocateRegister(); // this is also the index of the position of the first char after we have populated the buffer
13590
TR::LabelSymbol *nonZeroInputLabel = generateLabelSymbol(cg);
13591
generateS390CompareAndBranchInstruction(cg, inputIs64Bit ? TR::InstOpCode::CG : TR::InstOpCode::C, node, inputValueReg, 0, TR::InstOpCode::COND_BNE, nonZeroInputLabel, false);
13592
TR::MemoryReference *destinationArrayMemRef = generateS390MemoryReference(byteArrayReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
13593
generateSILInstruction(cg, TR::InstOpCode::MVHHI, node, destinationArrayMemRef, 48);
13594
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);
13595
13596
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, nonZeroInputLabel);
13597
13598
TR::LabelSymbol *handleDigitsLabel = generateLabelSymbol(cg);
13599
// Handle negative sign first.
13600
generateS390CompareAndBranchInstruction(cg, inputIs64Bit ? TR::InstOpCode::CG : TR::InstOpCode::C, node, inputValueReg, 0, TR::InstOpCode::COND_BNL, handleDigitsLabel, false);
13601
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, stringSizeReg, 1);
13602
generateSILInstruction(cg, TR::InstOpCode::MVHHI, node, generateS390MemoryReference(*destinationArrayMemRef, 0, cg), 45);
13603
generateRILInstruction(cg, TR::InstOpCode::getAddImmOpCode(), node, byteArrayReg, 2);
13604
13605
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, handleDigitsLabel);
13606
13607
TR::Register *intToPDReg = cg->allocateRegister(TR_VRF);
13608
// Load all digits into packed decimal format.
13609
generateVRIiInstruction(cg, inputIs64Bit ? TR::InstOpCode::VCVDG : TR::InstOpCode::VCVD, node, intToPDReg, inputValueReg, inputIs64Bit ? 19 : 10, 0x1);
13610
TR::Register *maskReg = cg->allocateRegister(TR_VRF);
13611
TR::Register *asciiOffset = cg->allocateRegister(TR_VRF);
13612
generateVRIbInstruction(cg, TR::InstOpCode::VGM, node, maskReg, 4, 7, 0);
13613
generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, asciiOffset, 48, 0);
13614
TR::Register *zonedDecimalRegLower = cg->allocateRegister(TR_VRF);
13615
13616
TR::LabelSymbol *moreThan9DigitsLabel = generateLabelSymbol(cg);
13617
// Depending on the length of the resulting string, we will need different amounts of vector registers to do the conversion. We test for that
13618
// here and then branch to a handcrafted routine for each scenario. This creates some redundancy in the code generated (hence increasing footprint),
13619
// however it reduces checks/branches during runtime preventing bottlenecks.
13620
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, stringSizeReg, 8, TR::InstOpCode::COND_BH, moreThan9DigitsLabel, false);
13621
13622
// In this scenario we only need one vector register to do the conversion as there are less than 9 digits to process
13623
// If we are here, then 0 =< stringSizeReg <= 8
13624
// Unpack packed decimal into zoned decimal. This should take a maximum of 8 bytes.
13625
generateVRRkInstruction(cg, TR::InstOpCode::VUPKZL, node, zonedDecimalRegLower, intToPDReg, 0 /*M3*/);
13626
// Remove the zone bits
13627
generateVRRcInstruction(cg, TR::InstOpCode::VN, node, zonedDecimalRegLower, zonedDecimalRegLower, maskReg, 0, 0, 0);
13628
// Now the rightmost 10 bytes should hold the entire integer we care about. So let's add 48 to each byte.
13629
generateVRRcInstruction(cg, TR::InstOpCode::VA, node, zonedDecimalRegLower, zonedDecimalRegLower, asciiOffset, 0);
13630
13631
// For the purposes of this evaluator, stringSizeReg contains the length of the resulting string. Ex if input is 2147483647, stringSizeReg will be 10.
13632
// When storing using VSTRL, the index register specifying the first byte to store is 0 based. Meanwhile
13633
// stringSizeReg is 1 based. So we must first subtract 1 from stringSizeReg so the calculation is done correctly by the instruction.
13634
// ex. if we specify 10 in VSTRL, the instruction will do 15-10=5 to figure out that it needs to store bytes 5 to 15 instead of 6 to 15.
13635
// Since each character is 2 bytes in length, we must first multiply by 2.
13636
generateRSInstruction(cg, TR::InstOpCode::SLA, node, stringSizeReg, 1);
13637
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, stringSizeReg, 1);
13638
// Finally, unpack the data in zonedDecimalReg1 using VUPL. The result should take no more than 16 bytes.
13639
generateVRRaInstruction(cg, TR::InstOpCode::VUPLL, node, zonedDecimalRegLower, zonedDecimalRegLower, 0, 0, 0);
13640
// The memory reference should already be pointing to where the most significant digit is to be stored. So we just have to create the VSTRL instruction now.
13641
generateVRSdInstruction(cg, TR::InstOpCode::VSTRLR, node, stringSizeReg, zonedDecimalRegLower, generateS390MemoryReference(*destinationArrayMemRef, 0, cg));
13642
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);
13643
13644
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, moreThan9DigitsLabel);
13645
TR::LabelSymbol *moreThan16DigitsLabel = generateLabelSymbol(cg);
13646
generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, stringSizeReg, 16, TR::InstOpCode::COND_BH, moreThan16DigitsLabel, false);
13647
// Need two intermediate vector registers to process input values that are greater than 8 digits and less than 17.
13648
// For Integers between the length of 8 and 16, we must do as above but also use VUPLH to load the higher order digits. Note that the result
13649
// of converting packed decimal to zoned decimal will fit in 16 bytes, so we only need to use VUPKZL still.
13650
generateVRRkInstruction(cg, TR::InstOpCode::VUPKZL, node, zonedDecimalRegLower, intToPDReg, 0 /*M3*/);
13651
// Remove the zone bits
13652
generateVRRcInstruction(cg, TR::InstOpCode::VN, node, zonedDecimalRegLower, zonedDecimalRegLower, maskReg, 0, 0, 0);
13653
// Now the rightmost 16 bytes should hold the entire intege we care about. So let's add 48 to each byte.
13654
generateVRRcInstruction(cg, TR::InstOpCode::VA, node, zonedDecimalRegLower, zonedDecimalRegLower, asciiOffset, 0);
13655
// We know zonedDecimalRegLower will be full when we unpack. So we store all bytes in it. But we don't know if
13656
// zonedDecimalRegLowerUpperHalf will be full. So we must calculate "stringSize-8" to figure out how many extra digits remain.
13657
// ex if stringSize = 10, then 10-8 = 2 digits in upper half. 15 - 2*2-1 = 12 --> 12,13,14,15 are the bytes stored to memory.
13658
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, stringSizeReg, 8);
13659
generateRSInstruction(cg, TR::InstOpCode::SLA, node, stringSizeReg, 1);
13660
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, stringSizeReg, 1);
13661
// now stringSize will have position of byte in upper half.
13662
// Finally, unpack the higher 8 bytes in zonedDecimalReg1 using VUPLH.
13663
TR::Register *zonedDecimalRegLowerUpperHalf = cg->allocateRegister(TR_VRF);
13664
generateVRRaInstruction(cg, TR::InstOpCode::VUPLH, node, zonedDecimalRegLowerUpperHalf, zonedDecimalRegLower, 0, 0, 0);
13665
// And unpack the lower 8 bytes using VUPLL
13666
generateVRRaInstruction(cg, TR::InstOpCode::VUPLL, node, zonedDecimalRegLower, zonedDecimalRegLower, 0, 0, 0);
13667
// Store the higher half first as it holds the most significant digits.
13668
generateVRSdInstruction(cg, TR::InstOpCode::VSTRLR, node, stringSizeReg, zonedDecimalRegLowerUpperHalf, generateS390MemoryReference(*destinationArrayMemRef, 0, cg));
13669
13670
// Advance the memoryReference pointer then store the bottom half
13671
generateRILInstruction(cg, TR::InstOpCode::AFI, node, stringSizeReg, 1);
13672
generateRRInstruction(cg, TR::InstOpCode::getAddRegWidenOpCode(), node, byteArrayReg, stringSizeReg);
13673
generateVSIInstruction(cg, TR::InstOpCode::VSTRL, node, zonedDecimalRegLower, generateS390MemoryReference(*destinationArrayMemRef, 0, cg), 15);
13674
generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd);
13675
13676
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, moreThan16DigitsLabel);
13677
// In this scenario we have between 17 and 19 digits. The logic is similar to before, except we use VUPKZH to unpack the most significant digits (could be anywhere from 1 to 3 digits).
13678
// We first unpack into lower and upper zoned decimal halves.
13679
generateVRRkInstruction(cg, TR::InstOpCode::VUPKZL, node, zonedDecimalRegLower, intToPDReg, 0 /*M3*/);
13680
TR::Register *zonedDecimalRegHigher = cg->allocateRegister(TR_VRF);
13681
generateVRRkInstruction(cg, TR::InstOpCode::VUPKZH, node, zonedDecimalRegHigher, intToPDReg, 0 /*M3*/);
13682
// Now remove the zone bits in both
13683
generateVRRcInstruction(cg, TR::InstOpCode::VN, node, zonedDecimalRegLower, zonedDecimalRegLower, maskReg, 0, 0, 0);
13684
generateVRRcInstruction(cg, TR::InstOpCode::VN, node, zonedDecimalRegHigher, zonedDecimalRegHigher, maskReg, 0, 0, 0);
13685
// And add 0x30 to all
13686
generateVRRcInstruction(cg, TR::InstOpCode::VA, node, zonedDecimalRegLower, zonedDecimalRegLower, asciiOffset, 0);
13687
generateVRRcInstruction(cg, TR::InstOpCode::VA, node, zonedDecimalRegHigher, zonedDecimalRegHigher, asciiOffset, 0);
13688
// Now unpack the higher half --> i.e. process the most significant digits (anywhere from 1 to 3 digits)
13689
generateVRRaInstruction(cg, TR::InstOpCode::VUPLL, node, zonedDecimalRegHigher, zonedDecimalRegHigher, 0, 0, 0);
13690
// Calculate how many digits we need to store in this higher half.
13691
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, stringSizeReg, 16);
13692
generateRSInstruction(cg, TR::InstOpCode::SLA, node, stringSizeReg, 1);
13693
generateRILInstruction(cg, TR::InstOpCode::SLFI, node, stringSizeReg, 1);
13694
// Store that many bytes from this register
13695
generateVRSdInstruction(cg, TR::InstOpCode::VSTRLR, node, stringSizeReg, zonedDecimalRegHigher, generateS390MemoryReference(*destinationArrayMemRef, 0, cg));
13696
// Advance buffer pointer
13697
generateRILInstruction(cg, TR::InstOpCode::AFI, node, stringSizeReg, 1);
13698
generateRRInstruction(cg, TR::InstOpCode::getAddRegWidenOpCode(), node, byteArrayReg, stringSizeReg);
13699
// unpack zonedDecimalRegLower into upper and lower halves --> i.e. we now process the least significant 16 digits.
13700
generateVRRaInstruction(cg, TR::InstOpCode::VUPLH, node, zonedDecimalRegLowerUpperHalf, zonedDecimalRegLower, 0, 0, 0);
13701
generateVRRaInstruction(cg, TR::InstOpCode::VUPLL, node, zonedDecimalRegLower, zonedDecimalRegLower, 0, 0, 0);
13702
generateVSIInstruction(cg, TR::InstOpCode::VSTRL, node, zonedDecimalRegLowerUpperHalf, generateS390MemoryReference(*destinationArrayMemRef, 0, cg), 15);
13703
// Advance Pointer then store again.
13704
generateRILInstruction(cg, TR::InstOpCode::getAddImmOpCode(), node, byteArrayReg, 16);
13705
generateVSIInstruction(cg, TR::InstOpCode::VSTRL, node, zonedDecimalRegLower, generateS390MemoryReference(*destinationArrayMemRef, 0, cg), 15);
13706
13707
TR::RegisterDependencyConditions *dependencies = generateRegisterDependencyConditions(0, 10, cg);
13708
dependencies->addPostCondition(inputValueReg, TR::RealRegister::AssignAny);
13709
dependencies->addPostCondition(byteArrayReg, TR::RealRegister::AssignAny);
13710
dependencies->addPostCondition(stringSizeReg, TR::RealRegister::AssignAny);
13711
dependencies->addPostCondition(numCharsRemainingReg, TR::RealRegister::AssignAny);
13712
dependencies->addPostCondition(intToPDReg, TR::RealRegister::AssignAny);
13713
dependencies->addPostCondition(maskReg, TR::RealRegister::AssignAny);
13714
dependencies->addPostCondition(asciiOffset, TR::RealRegister::AssignAny);
13715
dependencies->addPostCondition(zonedDecimalRegLower, TR::RealRegister::AssignAny);
13716
dependencies->addPostCondition(zonedDecimalRegLowerUpperHalf, TR::RealRegister::AssignAny);
13717
dependencies->addPostCondition(zonedDecimalRegHigher, TR::RealRegister::AssignAny);
13718
13719
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
13720
13721
// For the purposes of inlining Integer.toString and Long.toString, the return value of the getChars API will always be 0. So we load it here manually.
13722
generateRILInstruction(cg, TR::InstOpCode::IILF, node, numCharsRemainingReg, 0);
13723
13724
cg->decReferenceCount(inputValueNode);
13725
cg->decReferenceCount(stringSizeNode);
13726
cg->decReferenceCount(byteArrayNode);
13727
13728
cg->stopUsingRegister(intToPDReg);
13729
cg->stopUsingRegister(maskReg);
13730
cg->stopUsingRegister(asciiOffset);
13731
cg->stopUsingRegister(zonedDecimalRegLower);
13732
13733
cg->stopUsingRegister(byteArrayReg);
13734
cg->stopUsingRegister(stringSizeReg);
13735
13736
cg->stopUsingRegister(zonedDecimalRegLowerUpperHalf);
13737
cg->stopUsingRegister(zonedDecimalRegHigher);
13738
13739
return node->setRegister(numCharsRemainingReg);
13740
}
13741
13742
/*
13743
* This method inlines calls to Integer.stringSize and Long.stringSize using the VCLZDP instruction on zNext
13744
*/
13745
TR::Register*
13746
J9::Z::TreeEvaluator::inlineIntegerStringSize(TR::Node* node, TR::CodeGenerator* cg)
13747
{
13748
TR::Compilation *comp = cg->comp();
13749
static const bool disableIntegerStringSizeBranch = feGetEnv("TR_disableStringSizeBranch") != NULL;
13750
TR::Node *inputValueNode = node->getChild(0);
13751
bool inputIs64Bit = inputValueNode->getDataType() == TR::Int64;
13752
TR::Register *inputValueReg = cg->evaluate(inputValueNode);
13753
13754
TR::LabelSymbol *cFlowRegionStart = generateLabelSymbol(cg);
13755
cFlowRegionStart->setStartInternalControlFlow();
13756
TR::LabelSymbol *cFlowRegionEnd = generateLabelSymbol(cg);
13757
cFlowRegionEnd->setEndInternalControlFlow();
13758
TR::LabelSymbol *inputValueZeroLabel = generateLabelSymbol(cg);
13759
TR::LabelSymbol *countNumDigitsLabel = generateLabelSymbol(cg);
13760
13761
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart);
13762
TR::Register *lengthReg = cg->allocateRegister();
13763
// If value is 0, we branch to end as string is "0"
13764
generateRIInstruction(cg, TR::InstOpCode::LHI, node, lengthReg, 1);
13765
generateS390CompareAndBranchInstruction(cg, inputIs64Bit ? TR::InstOpCode::CG : TR::InstOpCode::C, node, inputValueReg, 0, TR::InstOpCode::COND_BE, cFlowRegionEnd, false);
13766
13767
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, countNumDigitsLabel);
13768
TR::Register *intToPDReg = cg->allocateRegister(TR_VRF);
13769
TR::Register *maxNumDigitsReg = cg->allocateRegister(TR_VRF);
13770
TR::Register *lengthVectorReg = cg->allocateRegister(TR_VRF);
13771
TR::Register *signBitConstant = disableIntegerStringSizeBranch ? cg->allocateRegister(TR_VRF) : NULL;
13772
if (disableIntegerStringSizeBranch)
13773
{
13774
generateVRIaInstruction(cg, TR::InstOpCode::VLEIB, node, signBitConstant, 1, 15);
13775
}
13776
generateVRIaInstruction(cg, TR::InstOpCode::VLEIB, node, maxNumDigitsReg, 31, 7);
13777
generateVRIiInstruction(cg, inputIs64Bit ? TR::InstOpCode::VCVDG : TR::InstOpCode::VCVD, node, intToPDReg, inputValueReg, inputIs64Bit ? 19 : 10, 0x1);
13778
TR::Register *leadingZerosReg = cg->allocateRegister(TR_VRF);
13779
generateVRRkInstruction(cg, TR::InstOpCode::VCLZDP, node, leadingZerosReg, intToPDReg, 0 /*M3*/);
13780
// Now subtract to get length of string
13781
generateVRRcInstruction(cg, TR::InstOpCode::VS, node, lengthVectorReg, maxNumDigitsReg, leadingZerosReg, 0);
13782
13783
if (disableIntegerStringSizeBranch)
13784
{
13785
generateVRRcInstruction(cg, TR::InstOpCode::VN, node, intToPDReg, intToPDReg, signBitConstant, 0, 0, 0);
13786
generateVRRcInstruction(cg, TR::InstOpCode::VA, node, lengthVectorReg, lengthVectorReg, intToPDReg, 0);
13787
}
13788
else
13789
{
13790
generateVRScInstruction(cg, TR::InstOpCode::VLGV, node, lengthReg, lengthVectorReg, generateS390MemoryReference(7, cg), 0);
13791
// If value is greater than 0, we branch to end. Otherwise we add 1 to lengthReg to account for '-' sign.
13792
generateS390CompareAndBranchInstruction(cg, inputIs64Bit ? TR::InstOpCode::CG : TR::InstOpCode::C, node, inputValueReg, 0, TR::InstOpCode::COND_BNL, cFlowRegionEnd, false);
13793
generateRILInstruction(cg, TR::InstOpCode::AFI, node, lengthReg, 1);
13794
}
13795
13796
TR::RegisterDependencyConditions *dependencies = generateRegisterDependencyConditions(0, disableIntegerStringSizeBranch ? 7 : 6, cg);
13797
dependencies->addPostCondition(inputValueReg, TR::RealRegister::AssignAny);
13798
dependencies->addPostCondition(intToPDReg, TR::RealRegister::AssignAny);
13799
dependencies->addPostCondition(leadingZerosReg, TR::RealRegister::AssignAny);
13800
dependencies->addPostCondition(lengthReg, TR::RealRegister::AssignAny);
13801
dependencies->addPostCondition(maxNumDigitsReg, TR::RealRegister::AssignAny);
13802
dependencies->addPostCondition(lengthVectorReg, TR::RealRegister::AssignAny);
13803
if (disableIntegerStringSizeBranch)
13804
{
13805
dependencies->addPostCondition(signBitConstant, TR::RealRegister::AssignAny);
13806
}
13807
13808
generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies);
13809
13810
cg->decReferenceCount(inputValueNode);
13811
cg->stopUsingRegister(intToPDReg);
13812
cg->stopUsingRegister(leadingZerosReg);
13813
cg->stopUsingRegister(maxNumDigitsReg);
13814
cg->stopUsingRegister(lengthVectorReg);
13815
if (disableIntegerStringSizeBranch)
13816
{
13817
cg->stopUsingRegister(signBitConstant);
13818
}
13819
13820
return node->setRegister(lengthReg);
13821
}
13822
13823