Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/x/codegen/J9TreeEvaluator.cpp
6004 views
1
/*******************************************************************************
2
* Copyright (c) 2000, 2022 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
23
#include <assert.h>
24
#include <limits.h>
25
#include <math.h>
26
#include <stdint.h>
27
#include "j9.h"
28
#include "j9cfg.h"
29
#include "j9consts.h"
30
#include "j9port.h"
31
#include "locknursery.h"
32
#include "thrdsup.h"
33
#include "thrtypes.h"
34
#include "codegen/AheadOfTimeCompile.hpp"
35
#include "codegen/CodeGenerator.hpp"
36
#include "codegen/Instruction.hpp"
37
#include "codegen/Machine.hpp"
38
#include "codegen/Linkage.hpp"
39
#include "codegen/Linkage_inlines.hpp"
40
#include "codegen/LiveRegister.hpp"
41
#include "codegen/Relocation.hpp"
42
#include "codegen/Register.hpp"
43
#include "codegen/RegisterPair.hpp"
44
#include "codegen/ScratchRegisterManager.hpp"
45
#include "codegen/Snippet.hpp"
46
#include "codegen/TreeEvaluator.hpp"
47
#include "codegen/UnresolvedDataSnippet.hpp"
48
#include "compile/CompilationTypes.hpp"
49
#include "compile/ResolvedMethod.hpp"
50
#include "compile/VirtualGuard.hpp"
51
#include "control/Recompilation.hpp"
52
#include "control/RecompilationInfo.hpp"
53
#include "env/CompilerEnv.hpp"
54
#include "env/CHTable.hpp"
55
#include "env/IO.hpp"
56
#include "env/j9method.h"
57
#include "env/jittypes.h"
58
#include "env/PersistentCHTable.hpp"
59
#include "env/VMJ9.h"
60
#include "il/Block.hpp"
61
#include "il/DataTypes.hpp"
62
#include "il/Node.hpp"
63
#include "il/Node_inlines.hpp"
64
#include "il/TreeTop.hpp"
65
#include "il/TreeTop_inlines.hpp"
66
#include "infra/SimpleRegex.hpp"
67
#include "OMR/Bytes.hpp"
68
#include "x/codegen/AllocPrefetchSnippet.hpp"
69
#include "x/codegen/CheckFailureSnippet.hpp"
70
#include "x/codegen/CompareAnalyser.hpp"
71
#include "x/codegen/ForceRecompilationSnippet.hpp"
72
#include "x/codegen/FPTreeEvaluator.hpp"
73
#include "x/codegen/J9X86Instruction.hpp"
74
#include "x/codegen/MonitorSnippet.hpp"
75
#include "x/codegen/OutlinedInstructions.hpp"
76
#include "x/codegen/HelperCallSnippet.hpp"
77
#include "x/codegen/X86Evaluator.hpp"
78
#include "env/CompilerEnv.hpp"
79
#include "runtime/J9Runtime.hpp"
80
#include "codegen/J9WatchedStaticFieldSnippet.hpp"
81
#include "codegen/X86FPConversionSnippet.hpp"
82
83
#ifdef TR_TARGET_64BIT
84
#include "codegen/AMD64PrivateLinkage.hpp"
85
#endif
86
87
#ifdef TR_TARGET_32BIT
88
#include "codegen/IA32PrivateLinkage.hpp"
89
#endif
90
91
#ifdef LINUX
92
#include <time.h>
93
94
#endif
95
96
#define NUM_PICS 3
97
98
// Minimum number of words for zero-initialization via REP TR::InstOpCode::STOSD
99
//
100
#define MIN_REPSTOSD_WORDS 64
101
static int32_t minRepstosdWords = 0;
102
103
// Maximum number of words per loop iteration for loop zero-initialization.
104
//
105
#define MAX_ZERO_INIT_WORDS_PER_ITERATION 4
106
static int32_t maxZeroInitWordsPerIteration = 0;
107
108
static bool getNodeIs64Bit(TR::Node *node, TR::CodeGenerator *cg);
109
static TR::Register *intOrLongClobberEvaluate(TR::Node *node, bool nodeIs64Bit, TR::CodeGenerator *cg);
110
111
static uint32_t logBase2(uintptr_t n)
112
{
113
// Could use leadingZeroes, except we can't call it from here
114
//
115
uint32_t result = 8*sizeof(n)-1;
116
uintptr_t mask = ((uintptr_t)1) << result;
117
while (mask && !(mask & n))
118
{
119
mask >>= 1;
120
result--;
121
}
122
return result;
123
}
124
125
// ----------------------------------------------------------------------------
126
inline void generateLoadJ9Class(TR::Node* node, TR::Register* j9class, TR::Register* object, TR::CodeGenerator* cg)
127
{
128
bool needsNULLCHK = false;
129
TR::ILOpCodes opValue = node->getOpCodeValue();
130
131
if (node->getOpCode().isReadBar() || node->getOpCode().isWrtBar())
132
needsNULLCHK = true;
133
else
134
{
135
switch (opValue)
136
{
137
case TR::monent:
138
case TR::monexit:
139
TR_ASSERT_FATAL(TR::Compiler->om.areValueTypesEnabled() || TR::Compiler->om.areValueBasedMonitorChecksEnabled(),
140
"monent and monexit are expected for generateLoadJ9Class only when value type or when value based monitor check is enabled");
141
case TR::checkcastAndNULLCHK:
142
needsNULLCHK = true;
143
break;
144
case TR::icall: // TR_checkAssignable
145
return; // j9class register already holds j9class
146
case TR::checkcast:
147
case TR::instanceof:
148
break;
149
default:
150
TR_ASSERT_FATAL(false, "Unexpected opCode for generateLoadJ9Class %s.", node->getOpCode().getName());
151
break;
152
}
153
}
154
155
auto use64BitClasses = cg->comp()->target().is64Bit() && !TR::Compiler->om.generateCompressedObjectHeaders();
156
auto instr = generateRegMemInstruction(TR::InstOpCode::LRegMem(use64BitClasses), node, j9class, generateX86MemoryReference(object, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);
157
if (needsNULLCHK)
158
{
159
cg->setImplicitExceptionPoint(instr);
160
instr->setNeedsGCMap(0xFF00FFFF);
161
if (opValue == TR::checkcastAndNULLCHK)
162
instr->setNode(cg->comp()->findNullChkInfo(node));
163
}
164
165
166
auto mask = TR::Compiler->om.maskOfObjectVftField();
167
if (~mask != 0)
168
{
169
generateRegImmInstruction(~mask <= 127 ? TR::InstOpCode::ANDRegImms(use64BitClasses) : TR::InstOpCode::ANDRegImm4(use64BitClasses), node, j9class, mask, cg);
170
}
171
}
172
173
static TR_OutlinedInstructions *generateArrayletReference(
174
TR::Node *node,
175
TR::Node *loadOrStoreOrArrayElementNode,
176
TR::Instruction *checkInstruction,
177
TR::LabelSymbol *arrayletRefLabel,
178
TR::LabelSymbol *restartLabel,
179
TR::Register *baseArrayReg,
180
TR::Register *loadOrStoreReg,
181
TR::Register *indexReg,
182
int32_t indexValue,
183
TR::Register *valueReg,
184
bool needsBoundCheck,
185
TR::CodeGenerator *cg)
186
{
187
TR::Compilation *comp = cg->comp();
188
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
189
190
TR::Register *scratchReg = cg->allocateRegister();
191
192
TR_OutlinedInstructions *arrayletRef = new (cg->trHeapMemory()) TR_OutlinedInstructions(arrayletRefLabel, cg);
193
arrayletRef->setRestartLabel(restartLabel);
194
195
if (needsBoundCheck)
196
{
197
// The current block is required for exception handling and anchoring
198
// the GC map.
199
//
200
arrayletRef->setBlock(cg->getCurrentEvaluationBlock());
201
arrayletRef->setCallNode(node);
202
}
203
204
cg->getOutlinedInstructionsList().push_front(arrayletRef);
205
206
arrayletRef->swapInstructionListsWithCompilation();
207
208
generateLabelInstruction(NULL, TR::InstOpCode::label, arrayletRefLabel, cg)->setNode(node);
209
210
// TODO: REMOVE THIS!
211
//
212
// This merely indicates that this OOL sequence should be assigned with the non-linear
213
// assigner, and should go away when the non-linear assigner handles all OOL sequences.
214
//
215
arrayletRefLabel->setNonLinear();
216
217
static char *forceArrayletInt = feGetEnv("TR_forceArrayletInt");
218
if (forceArrayletInt)
219
{
220
generateInstruction(TR::InstOpCode::INT3, node, cg);
221
}
222
223
// -----------------------------------------------------------------------------------
224
// Track all virtual register use within the arraylet path. This info will be used
225
// to adjust the virtual register use counts within the mainline path for more precise
226
// register assignment.
227
// -----------------------------------------------------------------------------------
228
229
cg->startRecordingRegisterUsage();
230
231
if (needsBoundCheck)
232
{
233
// -------------------------------------------------------------------------
234
// Check if the base array has a spine. If not, this is a real AIOB.
235
// -------------------------------------------------------------------------
236
237
TR::MemoryReference *arraySizeMR =
238
generateX86MemoryReference(baseArrayReg, fej9->getOffsetOfContiguousArraySizeField(), cg);
239
240
generateMemImmInstruction(TR::InstOpCode::CMP4MemImms, node, arraySizeMR, 0, cg);
241
242
TR::LabelSymbol *boundCheckFailureLabel = generateLabelSymbol(cg);
243
244
checkInstruction = generateLabelInstruction(TR::InstOpCode::JNE4, node, boundCheckFailureLabel, cg);
245
246
cg->addSnippet(
247
new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(
248
cg, node->getSymbolReference(),
249
boundCheckFailureLabel,
250
checkInstruction,
251
false
252
));
253
254
// -------------------------------------------------------------------------
255
// The array has a spine. Do a bound check on its true length.
256
// -------------------------------------------------------------------------
257
258
arraySizeMR = generateX86MemoryReference(baseArrayReg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg);
259
260
if (!indexReg)
261
{
262
TR::InstOpCode::Mnemonic op = (indexValue >= -128 && indexValue <= 127) ? TR::InstOpCode::CMP4MemImms : TR::InstOpCode::CMP4MemImm4;
263
generateMemImmInstruction(op, node, arraySizeMR, indexValue, cg);
264
}
265
else
266
{
267
generateMemRegInstruction(TR::InstOpCode::CMP4MemReg, node, arraySizeMR, indexReg, cg);
268
}
269
270
boundCheckFailureLabel = generateLabelSymbol(cg);
271
checkInstruction = generateLabelInstruction(TR::InstOpCode::JBE4, node, boundCheckFailureLabel, cg);
272
273
cg->addSnippet(
274
new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(
275
cg, node->getSymbolReference(),
276
boundCheckFailureLabel,
277
checkInstruction,
278
false
279
));
280
}
281
282
// -------------------------------------------------------------------------
283
// Determine if a load needs to be decompressed.
284
// -------------------------------------------------------------------------
285
286
bool seenCompressionSequence = false;
287
bool loadNeedsDecompression = false;
288
289
if (loadOrStoreOrArrayElementNode->getOpCodeValue() == TR::l2a ||
290
(((loadOrStoreOrArrayElementNode->getOpCodeValue() == TR::aload ||
291
loadOrStoreOrArrayElementNode->getOpCodeValue() == TR::aRegLoad) &&
292
node->isSpineCheckWithArrayElementChild()) &&
293
comp->target().is64Bit() && comp->useCompressedPointers()))
294
loadNeedsDecompression = true;
295
296
TR::Node *actualLoadOrStoreOrArrayElementNode = loadOrStoreOrArrayElementNode;
297
while ((loadNeedsDecompression && actualLoadOrStoreOrArrayElementNode->getOpCode().isConversion()) ||
298
actualLoadOrStoreOrArrayElementNode->containsCompressionSequence())
299
{
300
if (actualLoadOrStoreOrArrayElementNode->containsCompressionSequence())
301
seenCompressionSequence = true;
302
303
actualLoadOrStoreOrArrayElementNode = actualLoadOrStoreOrArrayElementNode->getFirstChild();
304
}
305
306
// -------------------------------------------------------------------------
307
// Do the load, store, or array address calculation
308
// -------------------------------------------------------------------------
309
310
TR::DataType dt = actualLoadOrStoreOrArrayElementNode->getDataType();
311
int32_t elementSize;
312
313
if (dt == TR::Address)
314
{
315
elementSize = TR::Compiler->om.sizeofReferenceField();
316
}
317
else
318
{
319
elementSize = TR::Symbol::convertTypeToSize(dt);
320
}
321
322
int32_t spinePointerSize = (comp->target().is64Bit() && !comp->useCompressedPointers()) ? 8 : 4;
323
int32_t arrayHeaderSize = TR::Compiler->om.discontiguousArrayHeaderSizeInBytes();
324
int32_t arrayletMask = fej9->getArrayletMask(elementSize);
325
326
TR::MemoryReference *spineMR;
327
328
// Load the arraylet from the spine.
329
//
330
if (indexReg)
331
{
332
TR::InstOpCode::Mnemonic op = comp->target().is64Bit() ? TR::InstOpCode::MOVSXReg8Reg4 : TR::InstOpCode::MOVRegReg();
333
generateRegRegInstruction(op, node, scratchReg, indexReg, cg);
334
335
int32_t spineShift = fej9->getArraySpineShift(elementSize);
336
generateRegImmInstruction(TR::InstOpCode::SARRegImm1(), node, scratchReg, spineShift, cg);
337
338
spineMR =
339
generateX86MemoryReference(
340
baseArrayReg,
341
scratchReg,
342
TR::MemoryReference::convertMultiplierToStride(spinePointerSize),
343
arrayHeaderSize,
344
cg);
345
}
346
else
347
{
348
int32_t spineIndex = fej9->getArrayletLeafIndex(indexValue, elementSize);
349
int32_t spineDisp32 = (spineIndex * spinePointerSize) + arrayHeaderSize;
350
351
spineMR = generateX86MemoryReference(baseArrayReg, spineDisp32, cg);
352
}
353
354
TR::InstOpCode::Mnemonic op = (spinePointerSize == 8) ? TR::InstOpCode::L8RegMem : TR::InstOpCode::L4RegMem;
355
generateRegMemInstruction(op, node, scratchReg, spineMR, cg);
356
357
// Decompress the arraylet pointer from the spine.
358
int32_t shiftOffset = 0;
359
360
if (comp->target().is64Bit() && comp->useCompressedPointers())
361
{
362
shiftOffset = TR::Compiler->om.compressedReferenceShiftOffset();
363
if (shiftOffset > 0)
364
{
365
generateRegImmInstruction(TR::InstOpCode::SHL8RegImm1, node, scratchReg, shiftOffset, cg);
366
}
367
}
368
369
TR::MemoryReference *arrayletMR;
370
371
// Calculate the offset with the arraylet for the index.
372
//
373
if (indexReg)
374
{
375
TR::Register *scratchReg2 = cg->allocateRegister();
376
377
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, scratchReg2, indexReg, cg);
378
generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, scratchReg2, arrayletMask, cg);
379
arrayletMR = generateX86MemoryReference(
380
scratchReg,
381
scratchReg2,
382
TR::MemoryReference::convertMultiplierToStride(elementSize),
383
cg);
384
385
cg->stopUsingRegister(scratchReg2);
386
}
387
else
388
{
389
int32_t arrayletIndex = ((TR_J9VMBase *)fej9)->getLeafElementIndex(indexValue, elementSize);
390
arrayletMR = generateX86MemoryReference(scratchReg, arrayletIndex*elementSize, cg);
391
}
392
393
cg->stopUsingRegister(scratchReg);
394
395
if (!actualLoadOrStoreOrArrayElementNode->getOpCode().isStore())
396
{
397
TR::InstOpCode::Mnemonic op;
398
399
TR::MemoryReference *highArrayletMR = NULL;
400
TR::Register *highRegister = NULL;
401
402
// If we're not loading an array shadow then this must be an effective
403
// address computation on the array element (for a write barrier).
404
//
405
if ((!actualLoadOrStoreOrArrayElementNode->getOpCode().hasSymbolReference() ||
406
!actualLoadOrStoreOrArrayElementNode->getSymbolReference()->getSymbol()->isArrayShadowSymbol()) &&
407
!node->isSpineCheckWithArrayElementChild())
408
{
409
op = TR::InstOpCode::LEARegMem();
410
}
411
else
412
{
413
switch (dt)
414
{
415
case TR::Int8: op = TR::InstOpCode::L1RegMem; break;
416
case TR::Int16: op = TR::InstOpCode::L2RegMem; break;
417
case TR::Int32: op = TR::InstOpCode::L4RegMem; break;
418
case TR::Int64:
419
if (comp->target().is64Bit())
420
op = TR::InstOpCode::L8RegMem;
421
else
422
{
423
TR_ASSERT(loadOrStoreReg->getRegisterPair(), "expecting a register pair");
424
425
op = TR::InstOpCode::L4RegMem;
426
highArrayletMR = generateX86MemoryReference(*arrayletMR, 4, cg);
427
highRegister = loadOrStoreReg->getHighOrder();
428
loadOrStoreReg = loadOrStoreReg->getLowOrder();
429
}
430
break;
431
432
case TR::Float: op = TR::InstOpCode::MOVSSRegMem; break;
433
case TR::Double: op = TR::InstOpCode::MOVSDRegMem; break;
434
435
case TR::Address:
436
if (comp->target().is32Bit() || comp->useCompressedPointers())
437
op = TR::InstOpCode::L4RegMem;
438
else
439
op = TR::InstOpCode::L8RegMem;
440
break;
441
442
default:
443
TR_ASSERT(0, "unsupported array element load type");
444
op = TR::InstOpCode::bad;
445
}
446
}
447
448
generateRegMemInstruction(op, node, loadOrStoreReg, arrayletMR, cg);
449
450
if (highArrayletMR)
451
{
452
generateRegMemInstruction(op, node, highRegister, highArrayletMR, cg);
453
}
454
455
// Decompress the loaded address if necessary.
456
//
457
if (loadNeedsDecompression)
458
{
459
if (comp->target().is64Bit() && comp->useCompressedPointers())
460
{
461
if (shiftOffset > 0)
462
{
463
generateRegImmInstruction(TR::InstOpCode::SHL8RegImm1, node, loadOrStoreReg, shiftOffset, cg);
464
}
465
}
466
}
467
}
468
else
469
{
470
if (dt != TR::Address)
471
{
472
// movE [S + S2], value
473
//
474
TR::InstOpCode::Mnemonic op;
475
bool needStore = true;
476
477
switch (dt)
478
{
479
case TR::Int8: op = valueReg ? TR::InstOpCode::S1MemReg : TR::InstOpCode::S1MemImm1; break;
480
case TR::Int16: op = valueReg ? TR::InstOpCode::S2MemReg : TR::InstOpCode::S2MemImm2; break;
481
case TR::Int32: op = valueReg ? TR::InstOpCode::S4MemReg : TR::InstOpCode::S4MemImm4; break;
482
case TR::Int64:
483
if (comp->target().is64Bit())
484
{
485
// The range of the immediate must be verified before this function to
486
// fall within a signed 32-bit integer.
487
//
488
op = valueReg ? TR::InstOpCode::S8MemReg : TR::InstOpCode::S8MemImm4;
489
}
490
else
491
{
492
if (valueReg)
493
{
494
TR_ASSERT(valueReg->getRegisterPair(), "value must be a register pair");
495
generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, arrayletMR, valueReg->getLowOrder(), cg);
496
generateMemRegInstruction(TR::InstOpCode::S4MemReg, node,
497
generateX86MemoryReference(*arrayletMR, 4, cg),
498
valueReg->getHighOrder(), cg);
499
}
500
else
501
{
502
TR::Node *valueChild = actualLoadOrStoreOrArrayElementNode->getSecondChild();
503
TR_ASSERT(valueChild->getOpCode().isLoadConst(), "expecting a long constant child");
504
505
generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, arrayletMR, valueChild->getLongIntLow(), cg);
506
generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node,
507
generateX86MemoryReference(*arrayletMR, 4, cg),
508
valueChild->getLongIntHigh(), cg);
509
}
510
511
needStore = false;
512
}
513
break;
514
515
case TR::Float: op = TR::InstOpCode::MOVSSMemReg; break;
516
case TR::Double: op = TR::InstOpCode::MOVSDMemReg; break;
517
518
default:
519
TR_ASSERT(0, "unsupported array element store type");
520
op = TR::InstOpCode::bad;
521
}
522
523
if (needStore)
524
{
525
if (valueReg)
526
generateMemRegInstruction(op, node, arrayletMR, valueReg, cg);
527
else
528
{
529
int32_t value = actualLoadOrStoreOrArrayElementNode->getSecondChild()->getInt();
530
generateMemImmInstruction(op, node, arrayletMR, value, cg);
531
}
532
}
533
}
534
else
535
{
536
// lea S, [S+S2]
537
TR_ASSERT(0, "OOL reference stores not supported yet");
538
}
539
}
540
541
generateLabelInstruction(TR::InstOpCode::JMP4, node, restartLabel, cg);
542
543
// -----------------------------------------------------------------------------------
544
// Stop tracking virtual register usage.
545
// -----------------------------------------------------------------------------------
546
547
arrayletRef->setOutlinedPathRegisterUsageList(cg->stopRecordingRegisterUsage());
548
549
arrayletRef->swapInstructionListsWithCompilation();
550
551
return arrayletRef;
552
}
553
554
static TR::Instruction *generatePrefetchAfterHeaderAccess(TR::Node *node,
555
TR::Register *objectReg,
556
TR::CodeGenerator *cg)
557
{
558
TR::Compilation *comp = cg->comp();
559
TR::Instruction *instr = NULL;
560
561
static const char *prefetch = feGetEnv("TR_EnableSoftwarePrefetch");
562
TR_ASSERT_FATAL(comp->compileRelocatableCode() || comp->isOutOfProcessCompilation() || comp->compilePortableCode() || comp->target().cpu.is(OMR_PROCESSOR_X86_INTELCORE2) == cg->getX86ProcessorInfo().isIntelCore2(), "isIntelCore2() failed\n");
563
if (prefetch && comp->getMethodHotness()>=scorching && comp->target().cpu.is(OMR_PROCESSOR_X86_INTELCORE2))
564
{
565
int32_t fieldOffset = 0;
566
if (TR::TreeEvaluator::loadLookaheadAfterHeaderAccess(node, fieldOffset, cg))
567
{
568
if (fieldOffset > 32)
569
instr = generateMemInstruction(TR::InstOpCode::PREFETCHT0, node, generateX86MemoryReference(objectReg, fieldOffset, cg), cg);
570
571
//printf("found a field load after monitor field at field offset %d\n", fieldOffset);
572
}
573
}
574
575
return instr;
576
}
577
578
// 32-bit float/double convert to long
579
//
580
TR::Register *J9::X86::TreeEvaluator::fpConvertToLong(TR::Node *node, TR::SymbolReference *helperSymRef, TR::CodeGenerator *cg)
581
{
582
TR::Compilation *comp = cg->comp();
583
TR_ASSERT_FATAL(comp->target().is32Bit(), "AMD64 doesn't use this logic");
584
585
TR::Node *child = node->getFirstChild();
586
587
if (child->getOpCode().isDouble())
588
{
589
TR::RegisterDependencyConditions *deps;
590
591
TR::Register *doubleReg = cg->evaluate(child);
592
TR::Register *lowReg = cg->allocateRegister(TR_GPR);
593
TR::Register *highReg = cg->allocateRegister(TR_GPR);
594
TR::RealRegister *espReal = cg->machine()->getRealRegister(TR::RealRegister::esp);
595
596
deps = generateRegisterDependencyConditions((uint8_t) 0, 3, cg);
597
deps->addPostCondition(lowReg, TR::RealRegister::NoReg, cg);
598
deps->addPostCondition(highReg, TR::RealRegister::NoReg, cg);
599
deps->addPostCondition(doubleReg, TR::RealRegister::NoReg, cg);
600
deps->stopAddingConditions();
601
602
TR::LabelSymbol *reStartLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); // exit routine label
603
TR::LabelSymbol *CallLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg); // label where long (64-bit) conversion will start
604
TR::LabelSymbol *StartLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
605
606
StartLabel->setStartInternalControlFlow();
607
reStartLabel->setEndInternalControlFlow();
608
609
// Attempt to convert a double in an XMM register to an integer using CVTTSD2SI.
610
// If the conversion succeeds, put the integer in lowReg and sign-extend it to highReg.
611
// If the conversion fails (the double is too large), call the helper.
612
generateRegRegInstruction(TR::InstOpCode::CVTTSD2SIReg4Reg, node, lowReg, doubleReg, cg);
613
generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, lowReg, 0x80000000, cg);
614
615
generateLabelInstruction(TR::InstOpCode::label, node, StartLabel, cg);
616
generateLabelInstruction(TR::InstOpCode::JE4, node, CallLabel, cg);
617
618
generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, highReg ,lowReg, cg);
619
generateRegImmInstruction(TR::InstOpCode::SAR4RegImm1, node, highReg , 31, cg);
620
621
generateLabelInstruction(TR::InstOpCode::label, node, reStartLabel, deps, cg);
622
623
TR::Register *targetRegister = cg->allocateRegisterPair(lowReg, highReg);
624
TR::SymbolReference *d2l = comp->getSymRefTab()->findOrCreateRuntimeHelper(TR_IA32double2LongSSE);
625
d2l->getSymbol()->getMethodSymbol()->setLinkage(TR_Helper);
626
TR::Node::recreate(node, TR::lcall);
627
node->setSymbolReference(d2l);
628
TR_OutlinedInstructions *outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::lcall, targetRegister, CallLabel, reStartLabel, cg);
629
cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);
630
631
cg->decReferenceCount(child);
632
node->setRegister(targetRegister);
633
634
return targetRegister;
635
}
636
else
637
{
638
TR::Register *accReg = NULL;
639
TR::Register *lowReg = cg->allocateRegister(TR_GPR);
640
TR::Register *highReg = cg->allocateRegister(TR_GPR);
641
TR::Register *floatReg = cg->evaluate(child);
642
643
TR::LabelSymbol *snippetLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
644
TR::LabelSymbol *startLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
645
TR::LabelSymbol *reStartLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
646
647
startLabel->setStartInternalControlFlow();
648
reStartLabel->setEndInternalControlFlow();
649
650
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
651
652
// These instructions must be set appropriately prior to the creation
653
// of the snippet near the end of this method. Also see warnings below.
654
//
655
TR::X86RegMemInstruction *loadHighInstr; // loads the high dword of the converted long
656
TR::X86RegMemInstruction *loadLowInstr; // loads the low dword of the converted long
657
658
TR::MemoryReference *tempMR = cg->machine()->getDummyLocalMR(TR::Float);
659
generateMemRegInstruction(TR::InstOpCode::MOVSSMemReg, node, tempMR, floatReg, cg);
660
generateMemInstruction(TR::InstOpCode::FLDMem, node, generateX86MemoryReference(*tempMR, 0, cg), cg);
661
662
generateInstruction(TR::InstOpCode::FLDDUP, node, cg);
663
664
// For slow conversion only, change the rounding mode on the FPU via its control word register.
665
//
666
TR::MemoryReference *convertedLongMR = (cg->machine())->getDummyLocalMR(TR::Int64);
667
668
if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_SSE3))
669
{
670
generateMemInstruction(TR::InstOpCode::FLSTTPMem, node, convertedLongMR, cg);
671
}
672
else
673
{
674
int16_t fpcw = comp->getJittedMethodSymbol()->usesSinglePrecisionMode() ?
675
SINGLE_PRECISION_ROUND_TO_ZERO : DOUBLE_PRECISION_ROUND_TO_ZERO;
676
generateMemInstruction(TR::InstOpCode::LDCWMem, node, generateX86MemoryReference(cg->findOrCreate2ByteConstant(node, fpcw), cg), cg);
677
generateMemInstruction(TR::InstOpCode::FLSTPMem, node, convertedLongMR, cg);
678
679
fpcw = comp->getJittedMethodSymbol()->usesSinglePrecisionMode() ?
680
SINGLE_PRECISION_ROUND_TO_NEAREST : DOUBLE_PRECISION_ROUND_TO_NEAREST;
681
682
generateMemInstruction(TR::InstOpCode::LDCWMem, node, generateX86MemoryReference(cg->findOrCreate2ByteConstant(node, fpcw), cg), cg);
683
}
684
685
// WARNING:
686
//
687
// The following load instructions are dissected in the snippet to determine the target registers.
688
// If they or their format is changed, you may need to change the snippet also.
689
//
690
loadHighInstr = generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, highReg,
691
generateX86MemoryReference(*convertedLongMR, 4, cg), cg);
692
693
loadLowInstr = generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, lowReg,
694
generateX86MemoryReference(*convertedLongMR, 0, cg), cg);
695
696
// Jump to the snippet if the converted value is an indefinite integer; otherwise continue.
697
//
698
generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, highReg, INT_MIN, cg);
699
generateLabelInstruction(TR::InstOpCode::JNE4, node, reStartLabel, cg);
700
generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, lowReg, lowReg, cg);
701
generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);
702
703
// Create the conversion snippet.
704
//
705
cg->addSnippet( new (cg->trHeapMemory()) TR::X86FPConvertToLongSnippet(reStartLabel,
706
snippetLabel,
707
helperSymRef,
708
node,
709
loadHighInstr,
710
loadLowInstr,
711
cg) );
712
713
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, accReg ? 3 : 2, cg);
714
715
// Make sure the high and low long registers are assigned to something.
716
//
717
if (accReg)
718
{
719
deps->addPostCondition(accReg, TR::RealRegister::eax, cg);
720
}
721
722
deps->addPostCondition(lowReg, TR::RealRegister::NoReg, cg);
723
deps->addPostCondition(highReg, TR::RealRegister::NoReg, cg);
724
725
generateLabelInstruction(TR::InstOpCode::label, node, reStartLabel, deps, cg);
726
727
cg->decReferenceCount(child);
728
generateInstruction(TR::InstOpCode::FSTPST0, node, cg);
729
730
TR::Register *targetRegister = cg->allocateRegisterPair(lowReg, highReg);
731
node->setRegister(targetRegister);
732
return targetRegister;
733
}
734
}
735
736
// On AMD64, all four [fd]2[il] conversions are handled here
737
// On IA32, both [fd]2i conversions are handled here
738
TR::Register *J9::X86::TreeEvaluator::f2iEvaluator(TR::Node *node, TR::CodeGenerator *cg)
739
{
740
bool doubleSource;
741
bool longTarget;
742
TR::InstOpCode::Mnemonic cvttOpCode;
743
744
switch (node->getOpCodeValue())
745
{
746
case TR::f2i:
747
cvttOpCode = TR::InstOpCode::CVTTSS2SIReg4Reg;
748
doubleSource = false;
749
longTarget = false;
750
break;
751
case TR::f2l:
752
cvttOpCode = TR::InstOpCode::CVTTSS2SIReg8Reg;
753
doubleSource = false;
754
longTarget = true;
755
break;
756
case TR::d2i:
757
cvttOpCode = TR::InstOpCode::CVTTSD2SIReg4Reg;
758
doubleSource = true;
759
longTarget = false;
760
break;
761
case TR::d2l:
762
cvttOpCode = TR::InstOpCode::CVTTSD2SIReg8Reg;
763
doubleSource = true;
764
longTarget = true;
765
break;
766
default:
767
TR_ASSERT_FATAL(0, "Unknown opcode value in f2iEvaluator");
768
break;
769
}
770
TR_ASSERT_FATAL(cg->comp()->target().is64Bit() || !longTarget, "Incorrect opcode value in f2iEvaluator");
771
772
TR::Node *child = node->getFirstChild();
773
TR::Register *sourceRegister = NULL;
774
TR::Register *targetRegister = cg->allocateRegister(TR_GPR);
775
TR::LabelSymbol *startLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
776
TR::LabelSymbol *endLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
777
TR::LabelSymbol *exceptionLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
778
779
sourceRegister = cg->evaluate(child);
780
generateRegRegInstruction(cvttOpCode, node, targetRegister, sourceRegister, cg);
781
782
startLabel->setStartInternalControlFlow();
783
endLabel->setEndInternalControlFlow();
784
785
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
786
787
if (longTarget)
788
{
789
TR_ASSERT_FATAL(cg->comp()->target().is64Bit(), "We should only get here on AMD64");
790
// We can't compare with 0x8000000000000000.
791
// Instead, rotate left 1 bit and compare with 0x0000000000000001.
792
generateRegInstruction(TR::InstOpCode::ROL8Reg1, node, targetRegister, cg);
793
generateRegImmInstruction(TR::InstOpCode::CMP8RegImms, node, targetRegister, 1, cg);
794
generateLabelInstruction(TR::InstOpCode::JE4, node, exceptionLabel, cg);
795
}
796
else
797
{
798
generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, targetRegister, INT_MIN, cg);
799
generateLabelInstruction(TR::InstOpCode::JE4, node, exceptionLabel, cg);
800
}
801
802
//TODO: (omr issue #4969): Remove once support for spills in OOL paths is added
803
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)2, cg);
804
deps->addPostCondition(targetRegister, TR::RealRegister::NoReg, cg);
805
deps->addPostCondition(sourceRegister, TR::RealRegister::NoReg, cg);
806
807
{
808
TR_OutlinedInstructionsGenerator og(exceptionLabel, node, cg);
809
// at this point, target is set to -INF and there can only be THREE possible results: -INF, +INF, NaN
810
// compare source with ZERO
811
generateRegMemInstruction(doubleSource ? TR::InstOpCode::UCOMISDRegMem : TR::InstOpCode::UCOMISSRegMem,
812
node,
813
sourceRegister,
814
generateX86MemoryReference(doubleSource ? cg->findOrCreate8ByteConstant(node, 0) : cg->findOrCreate4ByteConstant(node, 0), cg),
815
cg);
816
// load max int if source is positive, note that for long case, LLONG_MAX << 1 is loaded as it will be shifted right
817
generateRegMemInstruction(TR::InstOpCode::CMOVARegMem(longTarget),
818
node,
819
targetRegister,
820
generateX86MemoryReference(longTarget ? cg->findOrCreate8ByteConstant(node, LLONG_MAX << 1) : cg->findOrCreate4ByteConstant(node, INT_MAX), cg),
821
cg);
822
// load zero if source is NaN
823
generateRegMemInstruction(TR::InstOpCode::CMOVPRegMem(longTarget),
824
node,
825
targetRegister,
826
generateX86MemoryReference(longTarget ? cg->findOrCreate8ByteConstant(node, 0) : cg->findOrCreate4ByteConstant(node, 0), cg),
827
cg);
828
829
generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);
830
og.endOutlinedInstructionSequence();
831
}
832
833
generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);
834
if (longTarget)
835
{
836
generateRegInstruction(TR::InstOpCode::ROR8Reg1, node, targetRegister, cg);
837
}
838
839
node->setRegister(targetRegister);
840
cg->decReferenceCount(child);
841
return targetRegister;
842
}
843
844
TR::Register *J9::X86::TreeEvaluator::f2lEvaluator(TR::Node *node, TR::CodeGenerator *cg)
845
{
846
TR_ASSERT_FATAL(cg->comp()->target().is32Bit(), "AMD64 uses f2iEvaluator for this");
847
return TR::TreeEvaluator::fpConvertToLong(node, cg->symRefTab()->findOrCreateRuntimeHelper(TR_IA32floatToLong), cg);
848
}
849
850
TR::Register *J9::X86::TreeEvaluator::d2lEvaluator(TR::Node *node, TR::CodeGenerator *cg)
851
{
852
TR_ASSERT_FATAL(cg->comp()->target().is32Bit(), "AMD64 uses f2iEvaluator for this");
853
854
return TR::TreeEvaluator::fpConvertToLong(node, cg->symRefTab()->findOrCreateRuntimeHelper(TR_IA32doubleToLong), cg);
855
}
856
857
/*
858
* J9 X86 specific tree evaluator table overrides
859
*/
860
extern void TEMPORARY_initJ9X86TreeEvaluatorTable(TR::CodeGenerator *cg)
861
{
862
TR_TreeEvaluatorFunctionPointer *tet = cg->getTreeEvaluatorTable();
863
tet[TR::f2i] = TR::TreeEvaluator::f2iEvaluator;
864
tet[TR::f2iu] = TR::TreeEvaluator::f2iEvaluator;
865
tet[TR::f2l] = TR::TreeEvaluator::f2iEvaluator;
866
tet[TR::f2lu] = TR::TreeEvaluator::f2iEvaluator;
867
tet[TR::d2i] = TR::TreeEvaluator::f2iEvaluator;
868
tet[TR::d2iu] = TR::TreeEvaluator::f2iEvaluator;
869
tet[TR::d2l] = TR::TreeEvaluator::f2iEvaluator;
870
tet[TR::d2lu] = TR::TreeEvaluator::f2iEvaluator;
871
tet[TR::monent] = TR::TreeEvaluator::monentEvaluator;
872
tet[TR::monexit] = TR::TreeEvaluator::monexitEvaluator;
873
tet[TR::monexitfence] = TR::TreeEvaluator::monexitfenceEvaluator;
874
tet[TR::asynccheck] = TR::TreeEvaluator::asynccheckEvaluator;
875
tet[TR::instanceof] = TR::TreeEvaluator::checkcastinstanceofEvaluator;
876
tet[TR::checkcast] = TR::TreeEvaluator::checkcastinstanceofEvaluator;
877
tet[TR::checkcastAndNULLCHK] = TR::TreeEvaluator::checkcastinstanceofEvaluator;
878
tet[TR::New] = TR::TreeEvaluator::newEvaluator;
879
tet[TR::newarray] = TR::TreeEvaluator::newEvaluator;
880
tet[TR::anewarray] = TR::TreeEvaluator::newEvaluator;
881
tet[TR::variableNew] = TR::TreeEvaluator::newEvaluator;
882
tet[TR::variableNewArray] = TR::TreeEvaluator::newEvaluator;
883
tet[TR::multianewarray] = TR::TreeEvaluator::multianewArrayEvaluator;
884
tet[TR::arraylength] = TR::TreeEvaluator::arraylengthEvaluator;
885
tet[TR::lookup] = TR::TreeEvaluator::lookupEvaluator;
886
tet[TR::exceptionRangeFence] = TR::TreeEvaluator::exceptionRangeFenceEvaluator;
887
tet[TR::NULLCHK] = TR::TreeEvaluator::NULLCHKEvaluator;
888
tet[TR::ZEROCHK] = TR::TreeEvaluator::ZEROCHKEvaluator;
889
tet[TR::ResolveCHK] = TR::TreeEvaluator::resolveCHKEvaluator;
890
tet[TR::ResolveAndNULLCHK] = TR::TreeEvaluator::resolveAndNULLCHKEvaluator;
891
tet[TR::DIVCHK] = TR::TreeEvaluator::DIVCHKEvaluator;
892
tet[TR::BNDCHK] = TR::TreeEvaluator::BNDCHKEvaluator;
893
tet[TR::ArrayCopyBNDCHK] = TR::TreeEvaluator::ArrayCopyBNDCHKEvaluator;
894
tet[TR::BNDCHKwithSpineCHK] = TR::TreeEvaluator::BNDCHKwithSpineCHKEvaluator;
895
tet[TR::SpineCHK] = TR::TreeEvaluator::BNDCHKwithSpineCHKEvaluator;
896
tet[TR::ArrayStoreCHK] = TR::TreeEvaluator::ArrayStoreCHKEvaluator;
897
tet[TR::ArrayCHK] = TR::TreeEvaluator::ArrayCHKEvaluator;
898
tet[TR::MethodEnterHook] = TR::TreeEvaluator::conditionalHelperEvaluator;
899
tet[TR::MethodExitHook] = TR::TreeEvaluator::conditionalHelperEvaluator;
900
tet[TR::allocationFence] = TR::TreeEvaluator::NOPEvaluator;
901
tet[TR::loadFence] = TR::TreeEvaluator::barrierFenceEvaluator;
902
tet[TR::storeFence] = TR::TreeEvaluator::barrierFenceEvaluator;
903
tet[TR::fullFence] = TR::TreeEvaluator::barrierFenceEvaluator;
904
tet[TR::ihbit] = TR::TreeEvaluator::integerHighestOneBit;
905
tet[TR::ilbit] = TR::TreeEvaluator::integerLowestOneBit;
906
tet[TR::inolz] = TR::TreeEvaluator::integerNumberOfLeadingZeros;
907
tet[TR::inotz] = TR::TreeEvaluator::integerNumberOfTrailingZeros;
908
tet[TR::ipopcnt] = TR::TreeEvaluator::integerBitCount;
909
tet[TR::lhbit] = TR::TreeEvaluator::longHighestOneBit;
910
tet[TR::llbit] = TR::TreeEvaluator::longLowestOneBit;
911
tet[TR::lnolz] = TR::TreeEvaluator::longNumberOfLeadingZeros;
912
tet[TR::lnotz] = TR::TreeEvaluator::longNumberOfTrailingZeros;
913
tet[TR::lpopcnt] = TR::TreeEvaluator::longBitCount;
914
tet[TR::tstart] = TR::TreeEvaluator::tstartEvaluator;
915
tet[TR::tfinish] = TR::TreeEvaluator::tfinishEvaluator;
916
tet[TR::tabort] = TR::TreeEvaluator::tabortEvaluator;
917
918
#if defined(TR_TARGET_32BIT)
919
// 32-bit overrides
920
tet[TR::f2l] = TR::TreeEvaluator::f2lEvaluator;
921
tet[TR::f2lu] = TR::TreeEvaluator::f2lEvaluator;
922
tet[TR::d2l] = TR::TreeEvaluator::d2lEvaluator;
923
tet[TR::d2lu] = TR::TreeEvaluator::d2lEvaluator;
924
tet[TR::ldiv] = TR::TreeEvaluator::integerPairDivEvaluator;
925
tet[TR::lrem] = TR::TreeEvaluator::integerPairRemEvaluator;
926
#endif
927
}
928
929
930
static void generateCommonLockNurseryCodes(TR::Node *node,
931
TR::CodeGenerator *cg,
932
bool monent, //true for VMmonentEvaluator, false for VMmonexitEvaluator
933
TR::LabelSymbol *monitorLookupCacheLabel,
934
TR::LabelSymbol *fallThruFromMonitorLookupCacheLabel,
935
TR::LabelSymbol *snippetLabel,
936
uint32_t &numDeps,
937
int &lwOffset,
938
TR::Register *objectClassReg,
939
TR::Register *&lookupOffsetReg,
940
TR::Register *vmThreadReg,
941
TR::Register *objectReg
942
)
943
{
944
TR::Compilation *comp = cg->comp();
945
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
946
if (comp->getOption(TR_EnableMonitorCacheLookup))
947
{
948
if (monent) lwOffset = 0;
949
generateLabelInstruction(TR::InstOpCode::JLE4, node, monitorLookupCacheLabel, cg);
950
generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThruFromMonitorLookupCacheLabel, cg);
951
952
generateLabelInstruction(TR::InstOpCode::label, node, monitorLookupCacheLabel, cg);
953
954
lookupOffsetReg = cg->allocateRegister();
955
numDeps++;
956
957
int32_t offsetOfMonitorLookupCache = offsetof(J9VMThread, objectMonitorLookupCache);
958
959
//generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, objectClassReg, generateX86MemoryReference(vmThreadReg, offsetOfMonitorLookupCache, cg), cg);
960
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, lookupOffsetReg, objectReg, cg);
961
962
generateRegImmInstruction(TR::InstOpCode::SARRegImm1(comp->target().is64Bit()), node, lookupOffsetReg, trailingZeroes(TR::Compiler->om.getObjectAlignmentInBytes()), cg);
963
964
J9JavaVM * jvm = fej9->getJ9JITConfig()->javaVM;
965
generateRegImmInstruction(TR::InstOpCode::ANDRegImms(), node, lookupOffsetReg, J9VMTHREAD_OBJECT_MONITOR_CACHE_SIZE - 1, cg);
966
generateRegImmInstruction(TR::InstOpCode::SHLRegImm1(), node, lookupOffsetReg, trailingZeroes(TR::Compiler->om.sizeofReferenceField()), cg);
967
generateRegMemInstruction((comp->target().is64Bit() && fej9->generateCompressedLockWord()) ? TR::InstOpCode::L4RegMem : TR::InstOpCode::LRegMem(), node, objectClassReg, generateX86MemoryReference(vmThreadReg, lookupOffsetReg, 0, offsetOfMonitorLookupCache, cg), cg);
968
969
generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, objectClassReg, objectClassReg, cg);
970
generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);
971
972
int32_t offsetOfMonitor = offsetof(J9ObjectMonitor, monitor);
973
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, lookupOffsetReg, generateX86MemoryReference(objectClassReg, offsetOfMonitor, cg), cg);
974
975
int32_t offsetOfUserData = offsetof(J9ThreadAbstractMonitor, userData);
976
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, lookupOffsetReg, generateX86MemoryReference(lookupOffsetReg, offsetOfUserData, cg), cg);
977
978
generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, lookupOffsetReg, objectReg, cg);
979
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
980
981
int32_t offsetOfAlternateLockWord = offsetof(J9ObjectMonitor, alternateLockword);
982
//generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, lookupOffsetReg, generateX86MemoryReference(objectClassReg, offsetOfAlternateLockWord, cg), cg);
983
generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, objectClassReg, offsetOfAlternateLockWord, cg);
984
//generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, objectClassReg, lookupOffsetReg, cg);
985
generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, objectClassReg, objectReg, cg);
986
987
generateLabelInstruction(TR::InstOpCode::label, node, fallThruFromMonitorLookupCacheLabel, cg);
988
}
989
else
990
generateLabelInstruction(TR::InstOpCode::JLE4, node, snippetLabel, cg);
991
}
992
993
#ifdef TR_TARGET_32BIT
994
TR::Register *J9::X86::I386::TreeEvaluator::conditionalHelperEvaluator(TR::Node *node, TR::CodeGenerator *cg)
995
{
996
// used by asynccheck, methodEnterhook, and methodExitHook
997
998
// Decrement the reference count on the constant placeholder parameter to
999
// the MethodEnterHook call. An evaluation isn't necessary because the
1000
// constant value isn't used here.
1001
//
1002
if (node->getOpCodeValue() == TR::MethodEnterHook)
1003
{
1004
if (node->getSecondChild()->getOpCode().isCall() &&
1005
node->getSecondChild()->getNumChildren() > 1)
1006
{
1007
cg->decReferenceCount(node->getSecondChild()->getFirstChild());
1008
}
1009
}
1010
1011
// The child contains an inline test.
1012
//
1013
TR::Node *testNode = node->getFirstChild();
1014
TR::Node *secondChild = testNode->getSecondChild();
1015
if (secondChild->getOpCode().isLoadConst() &&
1016
secondChild->getRegister() == NULL)
1017
{
1018
int32_t value = secondChild->getInt();
1019
TR::Node *firstChild = testNode->getFirstChild();
1020
TR::InstOpCode::Mnemonic opCode;
1021
if (value >= -128 && value <= 127)
1022
opCode = TR::InstOpCode::CMP4MemImms;
1023
else
1024
opCode = TR::InstOpCode::CMP4MemImm4;
1025
TR::MemoryReference * memRef = generateX86MemoryReference(firstChild, cg);
1026
generateMemImmInstruction(opCode, node, memRef, value, cg);
1027
memRef->decNodeReferenceCounts(cg);
1028
cg->decReferenceCount(secondChild);
1029
}
1030
else
1031
{
1032
TR_X86CompareAnalyser temp(cg);
1033
temp.integerCompareAnalyser(testNode, TR::InstOpCode::CMP4RegReg, TR::InstOpCode::CMP4RegMem, TR::InstOpCode::CMP4MemReg);
1034
}
1035
1036
TR::LabelSymbol *startLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
1037
TR::LabelSymbol *reStartLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
1038
TR::LabelSymbol *snippetLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
1039
startLabel->setStartInternalControlFlow();
1040
reStartLabel->setEndInternalControlFlow();
1041
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
1042
generateLabelInstruction(testNode->getOpCodeValue() == TR::icmpeq ? TR::InstOpCode::JE4 : TR::InstOpCode::JNE4, node, snippetLabel, cg);
1043
1044
TR::Snippet *snippet;
1045
if (node->getNumChildren() == 2)
1046
snippet = new (cg->trHeapMemory()) TR::X86HelperCallSnippet(cg, reStartLabel, snippetLabel, node->getSecondChild());
1047
else
1048
snippet = new (cg->trHeapMemory()) TR::X86HelperCallSnippet(cg, node, reStartLabel, snippetLabel, node->getSymbolReference());
1049
1050
cg->addSnippet(snippet);
1051
1052
generateLabelInstruction(TR::InstOpCode::label, node, reStartLabel, cg);
1053
cg->decReferenceCount(testNode);
1054
return NULL;
1055
}
1056
#endif
1057
1058
#ifdef TR_TARGET_64BIT
1059
TR::Register *J9::X86::AMD64::TreeEvaluator::conditionalHelperEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1060
{
1061
// TODO:AMD64: Try to common this with the IA32 version
1062
1063
// used by asynccheck, methodEnterHook, and methodExitHook
1064
1065
// The trees for TR::MethodEnterHook are expected to look like one of the following only:
1066
//
1067
// (1) Static Method
1068
//
1069
// TR::MethodEnterHook
1070
// icmpne
1071
// iload eventFlags (VM Thread)
1072
// iconst 0
1073
// vcall (jitReportMethodEnter)
1074
// aconst (RAM method)
1075
//
1076
// (2) Virtual Method
1077
//
1078
// TR::MethodEnterHook
1079
// icmpne
1080
// iload eventFlags (VM Thread)
1081
// iconst 0
1082
// vcall (jitReportMethodEnter)
1083
// aload (receiver parameter)
1084
// aconst (RAM method)
1085
//
1086
//
1087
// The tree for TR::MethodExitHook is expected to look like the following:
1088
//
1089
// TR::MethodExitHook
1090
// icmpne
1091
// iload (MethodExitHook table entry)
1092
// iconst 0
1093
// vcall (jitReportMethodExit)
1094
// aconst (RAM method)
1095
//
1096
1097
// The child contains an inline test.
1098
//
1099
TR::Node *testNode = node->getFirstChild();
1100
TR::Node *secondChild = testNode->getSecondChild();
1101
bool testIs64Bit = TR::TreeEvaluator::getNodeIs64Bit(secondChild, cg);
1102
bool testIsEQ = testNode->getOpCodeValue() == TR::icmpeq || testNode->getOpCodeValue() == TR::lcmpeq;
1103
1104
TR::Register *thisReg = NULL;
1105
TR::Register *ramMethodReg = NULL;
1106
1107
// The receiver and RAM method parameters must be evaluated outside of the internal control flow region if it is commoned,
1108
// and their registers added to the post dependency condition on the merge label.
1109
//
1110
// The reference counts will be decremented when the call node is evaluated.
1111
//
1112
if (node->getOpCodeValue() == TR::MethodEnterHook || node->getOpCodeValue() == TR::MethodExitHook)
1113
{
1114
TR::Node *callNode = node->getSecondChild();
1115
1116
if (callNode->getNumChildren() > 1)
1117
{
1118
if (callNode->getFirstChild()->getReferenceCount() > 1)
1119
thisReg = cg->evaluate(callNode->getFirstChild());
1120
1121
if (callNode->getSecondChild()->getReferenceCount() > 1)
1122
ramMethodReg = cg->evaluate(callNode->getSecondChild());
1123
}
1124
else
1125
{
1126
if (callNode->getFirstChild()->getReferenceCount() > 1)
1127
ramMethodReg = cg->evaluate(callNode->getFirstChild());
1128
}
1129
}
1130
1131
if (secondChild->getOpCode().isLoadConst() &&
1132
secondChild->getRegister() == NULL &&
1133
(!testIs64Bit || IS_32BIT_SIGNED(secondChild->getLongInt())))
1134
{
1135
// Try to compare memory directly with immediate
1136
//
1137
TR::MemoryReference * memRef = generateX86MemoryReference(testNode->getFirstChild(), cg);
1138
TR::InstOpCode::Mnemonic op;
1139
1140
if (testIs64Bit)
1141
{
1142
int64_t value = secondChild->getLongInt();
1143
op = IS_8BIT_SIGNED(value) ? TR::InstOpCode::CMP8MemImms : TR::InstOpCode::CMP8MemImm4;
1144
generateMemImmInstruction(op, node, memRef, value, cg);
1145
}
1146
else
1147
{
1148
int32_t value = secondChild->getInt();
1149
op = IS_8BIT_SIGNED(value) ? TR::InstOpCode::CMP4MemImms : TR::InstOpCode::CMP4MemImm4;
1150
generateMemImmInstruction(op, node, memRef, value, cg);
1151
}
1152
1153
memRef->decNodeReferenceCounts(cg);
1154
cg->decReferenceCount(secondChild);
1155
}
1156
else
1157
{
1158
TR_X86CompareAnalyser temp(cg);
1159
temp.integerCompareAnalyser(testNode, TR::InstOpCode::CMPRegReg(testIs64Bit), TR::InstOpCode::CMPRegMem(testIs64Bit), TR::InstOpCode::CMPMemReg(testIs64Bit));
1160
}
1161
1162
TR::LabelSymbol *startLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
1163
TR::LabelSymbol *reStartLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
1164
TR::LabelSymbol *snippetLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
1165
startLabel->setStartInternalControlFlow();
1166
reStartLabel->setEndInternalControlFlow();
1167
1168
TR::Instruction *startInstruction = generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
1169
1170
if (node->getOpCodeValue() == TR::MethodEnterHook || node->getOpCodeValue() == TR::MethodExitHook)
1171
{
1172
TR::Node *callNode = node->getSecondChild();
1173
1174
// Generate an inverted jump around the call. This is necessary because we want to do the call inline rather
1175
// than through the snippet.
1176
//
1177
generateLabelInstruction(testIsEQ ? TR::InstOpCode::JNE4 : TR::InstOpCode::JE4, node, reStartLabel, cg);
1178
TR::TreeEvaluator::performCall(callNode, false, false, cg);
1179
1180
// Collect postconditions from the internal control flow region and put
1181
// them on the restart label to prevent spills in the internal control
1182
// flow region.
1183
// TODO:AMD64: This would be a useful general facility to have.
1184
//
1185
TR::Machine *machine = cg->machine();
1186
TR::RegisterDependencyConditions *postConditions = new (cg->trHeapMemory()) TR::RegisterDependencyConditions((uint8_t)0, TR::RealRegister::NumRegisters, cg->trMemory());
1187
if (thisReg)
1188
postConditions->addPostCondition(thisReg, TR::RealRegister::NoReg, cg);
1189
1190
if (ramMethodReg)
1191
postConditions->addPostCondition(ramMethodReg, TR::RealRegister::NoReg, cg);
1192
1193
for (TR::Instruction *cursor = cg->getAppendInstruction(); cursor != startInstruction; cursor = cursor->getPrev())
1194
{
1195
TR::RegisterDependencyConditions *cursorDeps = cursor->getDependencyConditions();
1196
if (cursorDeps && cursor->getOpCodeValue() != TR::InstOpCode::assocreg)
1197
{
1198
if (debug("traceConditionalHelperEvaluator"))
1199
{
1200
diagnostic("conditionalHelperEvaluator: Adding deps from " POINTER_PRINTF_FORMAT "\n", cursor);
1201
}
1202
for (int32_t i = 0; i < cursorDeps->getNumPostConditions(); i++)
1203
{
1204
TR::RegisterDependency *cursorPostCondition = cursorDeps->getPostConditions()->getRegisterDependency(i);
1205
postConditions->unionPostCondition(cursorPostCondition->getRegister(), cursorPostCondition->getRealRegister(), cg);
1206
if (debug("traceConditionalHelperEvaluator"))
1207
{
1208
TR_Debug *debug = cg->getDebug();
1209
diagnostic("conditionalHelperEvaluator: [%s : %s]\n", debug->getName(cursorPostCondition->getRegister()), debug->getName(machine->getRealRegister(cursorPostCondition->getRealRegister())));
1210
}
1211
}
1212
}
1213
}
1214
postConditions->stopAddingPostConditions();
1215
1216
generateLabelInstruction(TR::InstOpCode::label, node, reStartLabel, postConditions, cg);
1217
}
1218
else
1219
{
1220
generateLabelInstruction(testIsEQ? TR::InstOpCode::JE4 : TR::InstOpCode::JNE4, node, snippetLabel, cg);
1221
1222
TR::Snippet *snippet;
1223
if (node->getNumChildren() == 2)
1224
snippet = new (cg->trHeapMemory()) TR::X86HelperCallSnippet(cg, reStartLabel, snippetLabel, node->getSecondChild());
1225
else
1226
snippet = new (cg->trHeapMemory()) TR::X86HelperCallSnippet(cg, node, reStartLabel, snippetLabel, node->getSymbolReference());
1227
1228
cg->addSnippet(snippet);
1229
generateLabelInstruction(TR::InstOpCode::label, node, reStartLabel, cg);
1230
}
1231
1232
cg->decReferenceCount(testNode);
1233
return NULL;
1234
}
1235
#endif
1236
1237
TR::Register* J9::X86::TreeEvaluator::performHeapLoadWithReadBarrier(TR::Node* node, TR::CodeGenerator* cg)
1238
{
1239
#ifndef OMR_GC_CONCURRENT_SCAVENGER
1240
TR_ASSERT_FATAL(0, "Concurrent Scavenger not supported.");
1241
return NULL;
1242
#else
1243
TR::Compilation *comp = cg->comp();
1244
bool use64BitClasses = comp->target().is64Bit() && !comp->useCompressedPointers();
1245
1246
TR::MemoryReference* sourceMR = generateX86MemoryReference(node, cg);
1247
TR::Register* address = TR::TreeEvaluator::loadMemory(node, sourceMR, TR_RematerializableLoadEffectiveAddress, false, cg);
1248
address->setMemRef(sourceMR);
1249
sourceMR->decNodeReferenceCounts(cg);
1250
1251
TR::Register* object = cg->allocateRegister();
1252
TR::Instruction* load = generateRegMemInstruction(TR::InstOpCode::LRegMem(use64BitClasses), node, object, generateX86MemoryReference(address, 0, cg), cg);
1253
cg->setImplicitExceptionPoint(load);
1254
1255
switch (TR::Compiler->om.readBarrierType())
1256
{
1257
case gc_modron_readbar_none:
1258
TR_ASSERT(false, "This path should only be reached when a read barrier is required.");
1259
break;
1260
case gc_modron_readbar_always:
1261
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), address, cg);
1262
generateHelperCallInstruction(node, TR_softwareReadBarrier, NULL, cg);
1263
generateRegMemInstruction(TR::InstOpCode::LRegMem(use64BitClasses), node, object, generateX86MemoryReference(address, 0, cg), cg);
1264
break;
1265
case gc_modron_readbar_range_check:
1266
{
1267
TR::LabelSymbol* begLabel = generateLabelSymbol(cg);
1268
TR::LabelSymbol* endLabel = generateLabelSymbol(cg);
1269
TR::LabelSymbol* rdbarLabel = generateLabelSymbol(cg);
1270
begLabel->setStartInternalControlFlow();
1271
endLabel->setEndInternalControlFlow();
1272
1273
TR::RegisterDependencyConditions* deps = generateRegisterDependencyConditions((uint8_t)2, 2, cg);
1274
deps->addPreCondition(object, TR::RealRegister::NoReg, cg);
1275
deps->addPreCondition(address, TR::RealRegister::NoReg, cg);
1276
deps->addPostCondition(object, TR::RealRegister::NoReg, cg);
1277
deps->addPostCondition(address, TR::RealRegister::NoReg, cg);
1278
1279
generateLabelInstruction(TR::InstOpCode::label, node, begLabel, cg);
1280
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(use64BitClasses), node, object, generateX86MemoryReference(cg->getVMThreadRegister(), comp->fej9()->thisThreadGetEvacuateBaseAddressOffset(), cg), cg);
1281
generateLabelInstruction(TR::InstOpCode::JAE4, node, rdbarLabel, cg);
1282
{
1283
TR_OutlinedInstructionsGenerator og(rdbarLabel, node, cg);
1284
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(use64BitClasses), node, object, generateX86MemoryReference(cg->getVMThreadRegister(), comp->fej9()->thisThreadGetEvacuateTopAddressOffset(), cg), cg);
1285
generateLabelInstruction(TR::InstOpCode::JA4, node, endLabel, cg);
1286
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), address, cg);
1287
generateHelperCallInstruction(node, TR_softwareReadBarrier, NULL, cg);
1288
generateRegMemInstruction(TR::InstOpCode::LRegMem(use64BitClasses), node, object, generateX86MemoryReference(address, 0, cg), cg);
1289
generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);
1290
og.endOutlinedInstructionSequence();
1291
}
1292
generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);
1293
}
1294
break;
1295
default:
1296
TR_ASSERT(false, "Unsupported Read Barrier Type.");
1297
break;
1298
}
1299
cg->stopUsingRegister(address);
1300
return object;
1301
#endif
1302
}
1303
1304
// Should only be called for pure TR::awrtbar and TR::awrtbari nodes.
1305
//
1306
TR::Register *J9::X86::TreeEvaluator::writeBarrierEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1307
{
1308
TR::MemoryReference *storeMR = generateX86MemoryReference(node, cg);
1309
TR::Node *destOwningObject;
1310
TR::Node *sourceObject;
1311
TR::Compilation *comp = cg->comp();
1312
bool usingCompressedPointers = false;
1313
bool useShiftedOffsets = (TR::Compiler->om.compressedReferenceShiftOffset() != 0);
1314
1315
if (node->getOpCodeValue() == TR::awrtbari)
1316
{
1317
destOwningObject = node->getChild(2);
1318
sourceObject = node->getSecondChild();
1319
if (comp->useCompressedPointers() &&
1320
(node->getSymbolReference()->getSymbol()->getDataType() == TR::Address) &&
1321
(node->getSecondChild()->getDataType() != TR::Address))
1322
{
1323
usingCompressedPointers = true;
1324
1325
if (useShiftedOffsets)
1326
{
1327
while ((sourceObject->getNumChildren() > 0) && (sourceObject->getOpCodeValue() != TR::a2l))
1328
sourceObject = sourceObject->getFirstChild();
1329
if (sourceObject->getOpCodeValue() == TR::a2l)
1330
sourceObject = sourceObject->getFirstChild();
1331
// this is required so that different registers are
1332
// allocated for the actual store and translated values
1333
sourceObject->incReferenceCount();
1334
}
1335
}
1336
}
1337
else
1338
{
1339
TR_ASSERT((node->getOpCodeValue() == TR::awrtbar), "expecting a TR::wrtbar");
1340
destOwningObject = node->getSecondChild();
1341
sourceObject = node->getFirstChild();
1342
}
1343
1344
TR_X86ScratchRegisterManager *scratchRegisterManager =
1345
cg->generateScratchRegisterManager(comp->target().is64Bit() ? 15 : 7);
1346
1347
TR::TreeEvaluator::VMwrtbarWithStoreEvaluator(
1348
node,
1349
storeMR,
1350
scratchRegisterManager,
1351
destOwningObject,
1352
sourceObject,
1353
(node->getOpCodeValue() == TR::awrtbari) ? true : false,
1354
cg,
1355
false);
1356
1357
if (comp->useAnchors() && (node->getOpCodeValue() == TR::awrtbari))
1358
node->setStoreAlreadyEvaluated(true);
1359
1360
if (usingCompressedPointers)
1361
cg->decReferenceCount(node->getSecondChild());
1362
1363
return NULL;
1364
}
1365
1366
1367
TR::Register *J9::X86::TreeEvaluator::monentEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1368
{
1369
if (cg->enableRematerialisation() &&
1370
cg->supportsStaticMemoryRematerialization())
1371
TR::TreeEvaluator::removeLiveDiscardableStatics(cg);
1372
1373
return TR::TreeEvaluator::VMmonentEvaluator(node, cg);
1374
}
1375
1376
TR::Register *J9::X86::TreeEvaluator::monexitEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1377
{
1378
if (cg->enableRematerialisation() &&
1379
cg->supportsStaticMemoryRematerialization())
1380
TR::TreeEvaluator::removeLiveDiscardableStatics(cg);
1381
1382
return TR::TreeEvaluator::VMmonexitEvaluator(node, cg);
1383
}
1384
1385
TR::Register *J9::X86::TreeEvaluator::asynccheckEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1386
{
1387
// Generate the test and branch for async message processing.
1388
//
1389
TR::Node *compareNode = node->getFirstChild();
1390
TR::Node *secondChild = compareNode->getSecondChild();
1391
TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);
1392
TR::Compilation *comp = cg->comp();
1393
1394
if (comp->getOption(TR_RTGCMapCheck))
1395
{
1396
TR::TreeEvaluator::asyncGCMapCheckPatching(node, cg, snippetLabel);
1397
}
1398
else
1399
{
1400
TR_ASSERT_FATAL(secondChild->getOpCode().isLoadConst(), "unrecognized asynccheck test: special async check value is not a constant");
1401
1402
TR::MemoryReference *mr = generateX86MemoryReference(compareNode->getFirstChild(), cg);
1403
if ((secondChild->getRegister() != NULL) ||
1404
(comp->target().is64Bit() && !IS_32BIT_SIGNED(secondChild->getLongInt())))
1405
{
1406
TR::Register *valueReg = cg->evaluate(secondChild);
1407
TR::X86CheckAsyncMessagesMemRegInstruction *ins =
1408
generateCheckAsyncMessagesInstruction(node, TR::InstOpCode::CMPMemReg(), mr, valueReg, cg);
1409
}
1410
else
1411
{
1412
int32_t value = secondChild->getInt();
1413
TR::InstOpCode::Mnemonic op = (value < 127 && value >= -128) ? TR::InstOpCode::CMPMemImms() : TR::InstOpCode::CMPMemImm4();
1414
TR::X86CheckAsyncMessagesMemImmInstruction *ins =
1415
generateCheckAsyncMessagesInstruction(node, op, mr, value, cg);
1416
}
1417
1418
mr->decNodeReferenceCounts(cg);
1419
cg->decReferenceCount(secondChild);
1420
}
1421
1422
TR::LabelSymbol *startControlFlowLabel = generateLabelSymbol(cg);
1423
TR::LabelSymbol *endControlFlowLabel = generateLabelSymbol(cg);
1424
1425
bool testIsEqual = compareNode->getOpCodeValue() == TR::icmpeq || compareNode->getOpCodeValue() == TR::lcmpeq;
1426
1427
TR_ASSERT(testIsEqual, "unrecognized asynccheck test: test is not equal");
1428
1429
startControlFlowLabel->setStartInternalControlFlow();
1430
generateLabelInstruction(TR::InstOpCode::label, node, startControlFlowLabel, cg);
1431
1432
generateLabelInstruction(testIsEqual ? TR::InstOpCode::JE4 : TR::InstOpCode::JNE4, node, snippetLabel, cg);
1433
1434
{
1435
TR_OutlinedInstructionsGenerator og(snippetLabel, node, cg);
1436
generateImmSymInstruction(TR::InstOpCode::CALLImm4, node, (uintptr_t)node->getSymbolReference()->getMethodAddress(), node->getSymbolReference(), cg)->setNeedsGCMap(0xFF00FFFF);
1437
generateLabelInstruction(TR::InstOpCode::JMP4, node, endControlFlowLabel, cg);
1438
og.endOutlinedInstructionSequence();
1439
}
1440
1441
endControlFlowLabel->setEndInternalControlFlow();
1442
generateLabelInstruction(TR::InstOpCode::label, node, endControlFlowLabel, cg);
1443
1444
cg->decReferenceCount(compareNode);
1445
1446
return NULL;
1447
}
1448
1449
// Handles newObject, newArray, anewArray
1450
//
1451
TR::Register *J9::X86::TreeEvaluator::newEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1452
{
1453
TR::Compilation *comp = cg->comp();
1454
TR::Register *targetRegister = NULL;
1455
1456
if (TR::TreeEvaluator::requireHelperCallValueTypeAllocation(node, cg))
1457
{
1458
TR_OpaqueClassBlock *classInfo;
1459
bool spillFPRegs = comp->canAllocateInlineOnStack(node, classInfo) <= 0;
1460
return TR::TreeEvaluator::performHelperCall(node, NULL, TR::acall, spillFPRegs, cg);
1461
}
1462
1463
targetRegister = TR::TreeEvaluator::VMnewEvaluator(node, cg);
1464
if (!targetRegister)
1465
{
1466
// Inline object allocation wasn't generated, just generate a call to the helper.
1467
// If we know that the class is fully initialized, we don't have to spill
1468
// the FP registers.
1469
//
1470
TR_OpaqueClassBlock *classInfo;
1471
bool spillFPRegs = (comp->canAllocateInlineOnStack(node, classInfo) <= 0);
1472
targetRegister = TR::TreeEvaluator::performHelperCall(node, NULL, TR::acall, spillFPRegs, cg);
1473
}
1474
else if (cg->canEmitBreakOnDFSet())
1475
{
1476
// Check DF flag after inline new
1477
generateBreakOnDFSet(cg);
1478
}
1479
1480
return targetRegister;
1481
}
1482
1483
TR::Register *J9::X86::TreeEvaluator::multianewArrayEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1484
{
1485
TR::Node *firstChild = node->getFirstChild();
1486
TR::Node *secondChild = node->getSecondChild();
1487
TR::Node *thirdChild = node->getThirdChild();
1488
1489
// 2-dimensional MultiANewArray
1490
TR::Compilation *comp = cg->comp();
1491
TR_ASSERT_FATAL(comp->target().is64Bit(), "multianewArrayEvaluator is only supported on 64-bit JVMs!");
1492
TR_J9VMBase *fej9 = static_cast<TR_J9VMBase *>(comp->fe());
1493
1494
TR::Register *dimsPtrReg = NULL;
1495
TR::Register *dimReg = NULL;
1496
TR::Register *classReg = NULL;
1497
TR::Register *firstDimLenReg = NULL;
1498
TR::Register *secondDimLenReg = NULL;
1499
TR::Register *targetReg = NULL;
1500
TR::Register *temp1Reg = NULL;
1501
TR::Register *temp2Reg = NULL;
1502
TR::Register *temp3Reg = NULL;
1503
TR::Register *componentClassReg = NULL;
1504
1505
TR::Register *vmThreadReg = cg->getVMThreadRegister();
1506
targetReg = cg->allocateRegister();
1507
firstDimLenReg = cg->allocateRegister();
1508
secondDimLenReg = cg->allocateRegister();
1509
temp1Reg = cg->allocateRegister();
1510
temp2Reg = cg->allocateRegister();
1511
temp3Reg = cg->allocateRegister();
1512
componentClassReg = cg->allocateRegister();
1513
1514
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
1515
TR::LabelSymbol *fallThru = generateLabelSymbol(cg);
1516
TR::LabelSymbol *loopLabel = generateLabelSymbol(cg);
1517
TR::LabelSymbol *nonZeroFirstDimLabel = generateLabelSymbol(cg);
1518
startLabel->setStartInternalControlFlow();
1519
fallThru->setEndInternalControlFlow();
1520
1521
TR::LabelSymbol *oolFailLabel = generateLabelSymbol(cg);
1522
TR::LabelSymbol *oolJumpPoint = generateLabelSymbol(cg);
1523
1524
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
1525
1526
// Generate the heap allocation, and the snippet that will handle heap overflow.
1527
TR_OutlinedInstructions *outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::acall, targetReg, oolFailLabel, fallThru, cg);
1528
cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);
1529
1530
dimReg = cg->evaluate(secondChild);
1531
1532
dimsPtrReg = cg->evaluate(firstChild);
1533
1534
classReg = cg->evaluate(thirdChild);
1535
1536
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, secondDimLenReg,
1537
generateX86MemoryReference(dimsPtrReg, 0, cg), cg);
1538
// Load the 32-bit length value as a 64-bit value so that the top half of the register
1539
// can be zeroed out. This will allow us to treat the value as 64-bit when performing
1540
// calculations later on.
1541
generateRegMemInstruction(TR::InstOpCode::MOVSXReg8Mem4, node, firstDimLenReg,
1542
generateX86MemoryReference(dimsPtrReg, 4, cg), cg);
1543
1544
generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, secondDimLenReg, 0, cg);
1545
1546
generateLabelInstruction(TR::InstOpCode::JNE4, node, oolJumpPoint, cg);
1547
// Second Dim length is 0
1548
1549
generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, firstDimLenReg, 0, cg);
1550
generateLabelInstruction(TR::InstOpCode::JNE4, node, nonZeroFirstDimLabel, cg);
1551
1552
// First Dim zero, only allocate 1 zero-length object array
1553
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, targetReg, generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapAlloc), cg), cg);
1554
1555
// Take into account alignment requirements for the size of the zero-length array header
1556
int32_t zeroArraySizeAligned = OMR::align(TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), TR::Compiler->om.getObjectAlignmentInBytes());
1557
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, temp1Reg, generateX86MemoryReference(targetReg, zeroArraySizeAligned, cg), cg);
1558
1559
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, temp1Reg, generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapTop), cg), cg);
1560
generateLabelInstruction(TR::InstOpCode::JA4, node, oolJumpPoint, cg);
1561
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapAlloc), cg), temp1Reg, cg);
1562
1563
// Init class
1564
bool use64BitClasses = comp->target().is64Bit() && !TR::Compiler->om.generateCompressedObjectHeaders();
1565
generateMemRegInstruction(TR::InstOpCode::SMemReg(use64BitClasses), node, generateX86MemoryReference(targetReg, TR::Compiler->om.offsetOfObjectVftField(), cg), classReg, cg);
1566
1567
// Init size and '0' fields to 0
1568
generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, generateX86MemoryReference(targetReg, fej9->getOffsetOfContiguousArraySizeField(), cg), 0, cg);
1569
generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, generateX86MemoryReference(targetReg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg), 0, cg);
1570
1571
generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThru, cg);
1572
1573
//First dim length not 0
1574
generateLabelInstruction(TR::InstOpCode::label, node, nonZeroFirstDimLabel, cg);
1575
1576
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, componentClassReg,
1577
generateX86MemoryReference(classReg, offsetof(J9ArrayClass, componentType), cg), cg);
1578
1579
int32_t elementSize = TR::Compiler->om.sizeofReferenceField();
1580
1581
uintptr_t maxObjectSize = cg->getMaxObjectSizeGuaranteedNotToOverflow();
1582
uintptr_t maxObjectSizeInElements = maxObjectSize / elementSize;
1583
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, firstDimLenReg, static_cast<int32_t>(maxObjectSizeInElements), cg);
1584
1585
// Must be an unsigned comparison on sizes.
1586
generateLabelInstruction(TR::InstOpCode::JAE4, node, oolJumpPoint, cg);
1587
1588
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, temp1Reg, firstDimLenReg, cg);
1589
1590
int32_t elementSizeAligned = OMR::align(elementSize, TR::Compiler->om.getObjectAlignmentInBytes());
1591
int32_t alignmentCompensation = (elementSize == elementSizeAligned) ? 0 : elementSizeAligned - 1;
1592
1593
TR_ASSERT_FATAL(elementSize <= 8, "multianewArrayEvaluator - elementSize cannot be greater than 8!");
1594
generateRegImmInstruction(TR::InstOpCode::SHLRegImm1(), node, temp1Reg, TR::MemoryReference::convertMultiplierToStride(elementSize), cg);
1595
generateRegImmInstruction(TR::InstOpCode::ADDRegImm4(), node, temp1Reg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes()+alignmentCompensation, cg);
1596
1597
if (alignmentCompensation != 0)
1598
{
1599
generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, temp1Reg, -elementSizeAligned, cg);
1600
}
1601
1602
TR_ASSERT_FATAL(zeroArraySizeAligned >= 0 && zeroArraySizeAligned <= 127, "discontiguousArrayHeaderSizeInBytes cannot be > 127 for IMulRegRegImms instruction");
1603
generateRegRegImmInstruction(TR::InstOpCode::IMULRegRegImm4(), node, temp2Reg, firstDimLenReg, zeroArraySizeAligned, cg);
1604
1605
// temp2Reg = temp2Reg + temp1Reg
1606
generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, temp2Reg, temp1Reg, cg);
1607
1608
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, targetReg, generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapAlloc), cg), cg);
1609
// temp2Reg = temp2Reg + J9VMThread->heapAlloc
1610
generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, temp2Reg, targetReg, cg);
1611
1612
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, temp2Reg, generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapTop), cg), cg);
1613
generateLabelInstruction(TR::InstOpCode::JA4, node, oolJumpPoint, cg);
1614
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapAlloc), cg), temp2Reg, cg);
1615
1616
//init 1st dim array class field
1617
generateMemRegInstruction(TR::InstOpCode::SMemReg(use64BitClasses), node, generateX86MemoryReference(targetReg, TR::Compiler->om.offsetOfObjectVftField(), cg), classReg, cg);
1618
// Init 1st dim array size field
1619
generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, generateX86MemoryReference(targetReg, fej9->getOffsetOfContiguousArraySizeField(), cg), firstDimLenReg, cg);
1620
1621
// temp2 point to end of 1st dim array i.e. start of 2nd dim
1622
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, temp2Reg, targetReg, cg);
1623
generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, temp2Reg, temp1Reg, cg);
1624
// temp1 points to 1st dim array past header
1625
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, temp1Reg, generateX86MemoryReference(targetReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), cg);
1626
1627
//loop start
1628
generateLabelInstruction(TR::InstOpCode::label, node, loopLabel, cg);
1629
// Init 2nd dim element's class
1630
generateMemRegInstruction(TR::InstOpCode::SMemReg(use64BitClasses), node, generateX86MemoryReference(temp2Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), componentClassReg, cg);
1631
// Init 2nd dim element's size and '0' fields to 0
1632
generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, generateX86MemoryReference(temp2Reg, fej9->getOffsetOfContiguousArraySizeField(), cg), 0, cg);
1633
generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, generateX86MemoryReference(temp2Reg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg), 0, cg);
1634
// Store 2nd dim element into 1st dim array slot, compress temp2 if needed
1635
if (comp->target().is64Bit() && comp->useCompressedPointers())
1636
{
1637
int32_t shiftAmount = TR::Compiler->om.compressedReferenceShift();
1638
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, temp3Reg, temp2Reg, cg);
1639
if (shiftAmount != 0)
1640
{
1641
generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(), node, temp3Reg, shiftAmount, cg);
1642
}
1643
generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, generateX86MemoryReference(temp1Reg, 0, cg), temp3Reg, cg);
1644
}
1645
else
1646
{
1647
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(temp1Reg, 0, cg), temp2Reg, cg);
1648
}
1649
1650
// Advance cursors temp1 and temp2
1651
generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, temp2Reg, zeroArraySizeAligned, cg);
1652
generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, temp1Reg, elementSize, cg);
1653
1654
generateRegInstruction(TR::InstOpCode::DEC4Reg, node, firstDimLenReg, cg);
1655
generateLabelInstruction(TR::InstOpCode::JA4, node, loopLabel, cg);
1656
generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThru, cg);
1657
1658
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, 13, cg);
1659
1660
deps->addPostCondition(dimsPtrReg, TR::RealRegister::NoReg, cg);
1661
deps->addPostCondition(dimReg, TR::RealRegister::NoReg, cg);
1662
deps->addPostCondition(classReg, TR::RealRegister::NoReg, cg);
1663
1664
deps->addPostCondition(firstDimLenReg, TR::RealRegister::NoReg, cg);
1665
deps->addPostCondition(secondDimLenReg, TR::RealRegister::NoReg, cg);
1666
deps->addPostCondition(temp1Reg, TR::RealRegister::NoReg, cg);
1667
deps->addPostCondition(temp2Reg, TR::RealRegister::NoReg, cg);
1668
deps->addPostCondition(temp3Reg, TR::RealRegister::NoReg, cg);
1669
deps->addPostCondition(componentClassReg, TR::RealRegister::NoReg, cg);
1670
1671
deps->addPostCondition(targetReg, TR::RealRegister::eax, cg);
1672
deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);
1673
1674
TR::Node *callNode = outlinedHelperCall->getCallNode();
1675
TR::Register *reg;
1676
1677
if (callNode->getFirstChild() == node->getFirstChild())
1678
{
1679
reg = callNode->getFirstChild()->getRegister();
1680
if (reg)
1681
deps->unionPostCondition(reg, TR::RealRegister::NoReg, cg);
1682
}
1683
1684
if (callNode->getSecondChild() == node->getSecondChild())
1685
{
1686
reg = callNode->getSecondChild()->getRegister();
1687
if (reg)
1688
deps->unionPostCondition(reg, TR::RealRegister::NoReg, cg);
1689
}
1690
1691
if (callNode->getThirdChild() == node->getThirdChild())
1692
{
1693
reg = callNode->getThirdChild()->getRegister();
1694
if (reg)
1695
deps->unionPostCondition(reg, TR::RealRegister::NoReg, cg);
1696
}
1697
1698
deps->stopAddingConditions();
1699
1700
generateLabelInstruction(TR::InstOpCode::label, node, oolJumpPoint, cg);
1701
generateLabelInstruction(TR::InstOpCode::JMP4, node, oolFailLabel, cg);
1702
1703
generateLabelInstruction(TR::InstOpCode::label, node, fallThru, deps, cg);
1704
1705
// Copy the newly allocated object into a collected reference register now that it is a valid object.
1706
//
1707
TR::Register *targetReg2 = cg->allocateCollectedReferenceRegister();
1708
TR::RegisterDependencyConditions *deps2 = generateRegisterDependencyConditions(0, 1, cg);
1709
deps2->addPostCondition(targetReg2, TR::RealRegister::eax, cg);
1710
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, targetReg2, targetReg, deps2, cg);
1711
cg->stopUsingRegister(targetReg);
1712
targetReg = targetReg2;
1713
1714
cg->stopUsingRegister(firstDimLenReg);
1715
cg->stopUsingRegister(secondDimLenReg);
1716
cg->stopUsingRegister(temp1Reg);
1717
cg->stopUsingRegister(temp2Reg);
1718
cg->stopUsingRegister(temp3Reg);
1719
cg->stopUsingRegister(componentClassReg);
1720
1721
// Decrement use counts on the children
1722
//
1723
cg->decReferenceCount(node->getFirstChild());
1724
cg->decReferenceCount(node->getSecondChild());
1725
cg->decReferenceCount(node->getThirdChild());
1726
1727
node->setRegister(targetReg);
1728
return targetReg;
1729
}
1730
1731
TR::Register *J9::X86::TreeEvaluator::arraycopyEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1732
{
1733
if (cg->canEmitBreakOnDFSet())
1734
generateBreakOnDFSet(cg);
1735
1736
TR::Compilation *comp = cg->comp();
1737
1738
if (!node->isReferenceArrayCopy())
1739
{
1740
return OMR::TreeEvaluatorConnector::arraycopyEvaluator(node, cg);
1741
}
1742
1743
auto srcObjReg = cg->evaluate(node->getChild(0));
1744
auto dstObjReg = cg->evaluate(node->getChild(1));
1745
auto srcReg = cg->evaluate(node->getChild(2));
1746
auto dstReg = cg->evaluate(node->getChild(3));
1747
auto sizeReg = cg->evaluate(node->getChild(4));
1748
1749
if (comp->target().is64Bit() && !TR::TreeEvaluator::getNodeIs64Bit(node->getChild(4), cg))
1750
{
1751
generateRegRegInstruction(TR::InstOpCode::MOVZXReg8Reg4, node, sizeReg, sizeReg, cg);
1752
}
1753
1754
if (!node->isNoArrayStoreCheckArrayCopy())
1755
{
1756
// Nothing to optimize, simply call jitReferenceArrayCopy helper
1757
auto deps = generateRegisterDependencyConditions((uint8_t)3, 3, cg);
1758
deps->addPreCondition(srcReg, TR::RealRegister::esi, cg);
1759
deps->addPreCondition(dstReg, TR::RealRegister::edi, cg);
1760
deps->addPreCondition(sizeReg, TR::RealRegister::ecx, cg);
1761
deps->addPostCondition(srcReg, TR::RealRegister::esi, cg);
1762
deps->addPostCondition(dstReg, TR::RealRegister::edi, cg);
1763
deps->addPostCondition(sizeReg, TR::RealRegister::ecx, cg);
1764
1765
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), srcObjReg, cg);
1766
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp2), cg), dstObjReg, cg);
1767
generateHelperCallInstruction(node, TR_referenceArrayCopy, deps, cg)->setNeedsGCMap(0xFF00FFFF);
1768
1769
auto snippetLabel = generateLabelSymbol(cg);
1770
auto instr = generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg); // ReferenceArrayCopy set ZF when succeed.
1771
auto snippet = new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, cg->symRefTab()->findOrCreateRuntimeHelper(TR_arrayStoreException),
1772
snippetLabel, instr, false);
1773
cg->addSnippet(snippet);
1774
}
1775
else
1776
{
1777
bool use64BitClasses = comp->target().is64Bit() && !TR::Compiler->om.generateCompressedObjectHeaders();
1778
1779
auto RSI = cg->allocateRegister();
1780
auto RDI = cg->allocateRegister();
1781
auto RCX = cg->allocateRegister();
1782
1783
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, RSI, srcReg, cg);
1784
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, RDI, dstReg, cg);
1785
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, RCX, sizeReg, cg);
1786
1787
auto deps = generateRegisterDependencyConditions((uint8_t)5, 5, cg);
1788
deps->addPreCondition(RSI, TR::RealRegister::esi, cg);
1789
deps->addPreCondition(RDI, TR::RealRegister::edi, cg);
1790
deps->addPreCondition(RCX, TR::RealRegister::ecx, cg);
1791
deps->addPreCondition(srcObjReg, TR::RealRegister::NoReg, cg);
1792
deps->addPreCondition(dstObjReg, TR::RealRegister::NoReg, cg);
1793
deps->addPostCondition(RSI, TR::RealRegister::esi, cg);
1794
deps->addPostCondition(RDI, TR::RealRegister::edi, cg);
1795
deps->addPostCondition(RCX, TR::RealRegister::ecx, cg);
1796
deps->addPostCondition(srcObjReg, TR::RealRegister::NoReg, cg);
1797
deps->addPostCondition(dstObjReg, TR::RealRegister::NoReg, cg);
1798
1799
auto begLabel = generateLabelSymbol(cg);
1800
auto endLabel = generateLabelSymbol(cg);
1801
begLabel->setStartInternalControlFlow();
1802
endLabel->setEndInternalControlFlow();
1803
1804
generateLabelInstruction(TR::InstOpCode::label, node, begLabel, cg);
1805
1806
if (TR::Compiler->om.readBarrierType() != gc_modron_readbar_none)
1807
{
1808
bool use64BitClasses = comp->target().is64Bit() && !comp->useCompressedPointers();
1809
1810
TR::LabelSymbol* rdbarLabel = generateLabelSymbol(cg);
1811
// EvacuateTopAddress == 0 means Concurrent Scavenge is inactive
1812
generateMemImmInstruction(TR::InstOpCode::CMPMemImms(use64BitClasses), node, generateX86MemoryReference(cg->getVMThreadRegister(), comp->fej9()->thisThreadGetEvacuateTopAddressOffset(), cg), 0, cg);
1813
generateLabelInstruction(TR::InstOpCode::JNE4, node, rdbarLabel, cg);
1814
1815
TR_OutlinedInstructionsGenerator og(rdbarLabel, node, cg);
1816
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), srcObjReg, cg);
1817
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp2), cg), dstObjReg, cg);
1818
generateHelperCallInstruction(node, TR_referenceArrayCopy, NULL, cg)->setNeedsGCMap(0xFF00FFFF);
1819
generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);
1820
og.endOutlinedInstructionSequence();
1821
}
1822
if (!node->isForwardArrayCopy())
1823
{
1824
TR::LabelSymbol* backwardLabel = generateLabelSymbol(cg);
1825
1826
generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, RDI, RSI, cg); // dst = dst - src
1827
generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, RDI, RCX, cg); // cmp dst, size
1828
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, RDI, generateX86MemoryReference(RDI, RSI, 0, cg), cg); // dst = dst + src
1829
generateLabelInstruction(TR::InstOpCode::JB4, node, backwardLabel, cg); // jb, skip backward copy setup
1830
1831
TR_OutlinedInstructionsGenerator og(backwardLabel, node, cg);
1832
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, RSI, generateX86MemoryReference(RSI, RCX, 0, -TR::Compiler->om.sizeofReferenceField(), cg), cg);
1833
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, RDI, generateX86MemoryReference(RDI, RCX, 0, -TR::Compiler->om.sizeofReferenceField(), cg), cg);
1834
generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(), node, RCX, use64BitClasses ? 3 : 2, cg);
1835
generateInstruction(TR::InstOpCode::STD, node, cg);
1836
generateInstruction(use64BitClasses ? TR::InstOpCode::REPMOVSQ : TR::InstOpCode::REPMOVSD, node, cg);
1837
generateInstruction(TR::InstOpCode::CLD, node, cg);
1838
generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);
1839
og.endOutlinedInstructionSequence();
1840
}
1841
generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(), node, RCX, use64BitClasses ? 3 : 2, cg);
1842
generateInstruction(use64BitClasses ? TR::InstOpCode::REPMOVSQ : TR::InstOpCode::REPMOVSD, node, cg);
1843
generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);
1844
1845
cg->stopUsingRegister(RSI);
1846
cg->stopUsingRegister(RDI);
1847
cg->stopUsingRegister(RCX);
1848
1849
TR::TreeEvaluator::VMwrtbarWithoutStoreEvaluator(node, node->getChild(1), NULL, NULL, cg->generateScratchRegisterManager(), cg);
1850
}
1851
1852
for (int32_t i = 0; i < node->getNumChildren(); i++)
1853
{
1854
cg->decReferenceCount(node->getChild(i));
1855
}
1856
return NULL;
1857
}
1858
1859
TR::Register *J9::X86::TreeEvaluator::arraylengthEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1860
{
1861
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
1862
// MOV R, [B + contiguousSize]
1863
// TEST R, R
1864
// CMOVE R, [B + discontiguousSize]
1865
//
1866
TR::Register *objectReg = cg->evaluate(node->getFirstChild());
1867
TR::Register *lengthReg = cg->allocateRegister();
1868
1869
TR::MemoryReference *contiguousArraySizeMR =
1870
generateX86MemoryReference(objectReg, fej9->getOffsetOfContiguousArraySizeField(), cg);
1871
1872
TR::MemoryReference *discontiguousArraySizeMR =
1873
generateX86MemoryReference(objectReg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg);
1874
1875
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, lengthReg, contiguousArraySizeMR, cg);
1876
generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, lengthReg, lengthReg, cg);
1877
generateRegMemInstruction(TR::InstOpCode::CMOVE4RegMem, node, lengthReg, discontiguousArraySizeMR, cg);
1878
1879
cg->decReferenceCount(node->getFirstChild());
1880
node->setRegister(lengthReg);
1881
return lengthReg;
1882
}
1883
1884
TR::Register *J9::X86::TreeEvaluator::exceptionRangeFenceEvaluator(TR::Node *node, TR::CodeGenerator *cg)
1885
{
1886
generateFenceInstruction(TR::InstOpCode::fence, node, node, cg);
1887
return NULL;
1888
}
1889
1890
1891
TR::Register *J9::X86::TreeEvaluator::evaluateNULLCHKWithPossibleResolve(
1892
TR::Node *node,
1893
bool needResolution,
1894
TR::CodeGenerator *cg)
1895
{
1896
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
1897
static bool disableBranchlessPassThroughNULLCHK = feGetEnv("TR_disableBranchlessPassThroughNULLCHK") != NULL;
1898
// NOTE:
1899
//
1900
// If no code is generated for the null check, just evaluate the
1901
// child and decrement its use count UNLESS the child is a pass-through node
1902
// in which case some kind of explicit test or indirect load must be generated
1903
// to force the null check at this point.
1904
//
1905
TR::Node *firstChild = node->getFirstChild();
1906
TR::Node *reference = NULL;
1907
TR::Compilation *comp = cg->comp();
1908
1909
bool usingCompressedPointers = false;
1910
1911
if (comp->useCompressedPointers() &&
1912
firstChild->getOpCodeValue() == TR::l2a)
1913
{
1914
// pattern match the sequence under the l2a
1915
// NULLCHK NULLCHK <- node
1916
// aloadi f l2a
1917
// aload O ladd
1918
// lshl
1919
// i2l
1920
// iloadi/irdbari f <- firstChild
1921
// aload O <- reference
1922
// iconst shftKonst
1923
// lconst HB
1924
//
1925
usingCompressedPointers = true;
1926
1927
TR::ILOpCodes loadOp = comp->il.opCodeForIndirectLoad(TR::Int32);
1928
TR::ILOpCodes rdbarOp = comp->il.opCodeForIndirectReadBarrier(TR::Int32);
1929
while (firstChild->getOpCodeValue() != loadOp && firstChild->getOpCodeValue() != rdbarOp)
1930
firstChild = firstChild->getFirstChild();
1931
reference = firstChild->getFirstChild();
1932
}
1933
else
1934
reference = node->getNullCheckReference();
1935
1936
TR::ILOpCode &opCode = firstChild->getOpCode();
1937
1938
// Skip the NULLCHK for TR::loadaddr nodes.
1939
//
1940
if (reference->getOpCodeValue() == TR::loadaddr)
1941
{
1942
if (usingCompressedPointers)
1943
firstChild = node->getFirstChild();
1944
cg->evaluate(firstChild);
1945
cg->decReferenceCount(firstChild);
1946
return NULL;
1947
}
1948
1949
bool needExplicitCheck = true;
1950
bool needLateEvaluation = true;
1951
1952
// Add the explicit check after this instruction
1953
//
1954
TR::Instruction *appendTo = 0;
1955
1956
if (opCode.isLoadVar() || (comp->target().is64Bit() && opCode.getOpCodeValue()==TR::l2i))
1957
{
1958
TR::SymbolReference *symRef = NULL;
1959
1960
if (opCode.getOpCodeValue()==TR::l2i)
1961
{
1962
symRef = firstChild->getFirstChild()->getSymbolReference();
1963
}
1964
else
1965
symRef = firstChild->getSymbolReference();
1966
1967
if (symRef &&
1968
(symRef->getSymbol()->getOffset() + symRef->getOffset() < cg->getNumberBytesReadInaccessible()))
1969
{
1970
needExplicitCheck = false;
1971
1972
// If the child is an arraylength which has been reduced to an iiload,
1973
// and is only going to be used immediately in a bound check then combine the checks.
1974
//
1975
TR::TreeTop *nextTreeTop = cg->getCurrentEvaluationTreeTop()->getNextTreeTop();
1976
if (firstChild->getReferenceCount() == 2 && nextTreeTop)
1977
{
1978
TR::Node *nextTopNode = nextTreeTop->getNode();
1979
1980
if (nextTopNode)
1981
{
1982
if (nextTopNode->getOpCode().isBndCheck() || nextTopNode->getOpCode().isSpineCheck())
1983
{
1984
bool doIt = false;
1985
1986
if (nextTopNode->getOpCodeValue() == TR::SpineCHK)
1987
{
1988
// Implicit NULLCHKs and SpineCHKs can be merged if the base array
1989
// is the same.
1990
//
1991
if (firstChild->getOpCode().isIndirect() && firstChild->getOpCode().isLoadVar())
1992
{
1993
if (nextTopNode->getChild(1) == firstChild->getFirstChild())
1994
doIt = true;
1995
}
1996
}
1997
else
1998
{
1999
int32_t arrayLengthChildNum = (nextTopNode->getOpCodeValue() == TR::BNDCHKwithSpineCHK) ? 2 : 0;
2000
2001
if (nextTopNode->getChild(arrayLengthChildNum) == firstChild)
2002
doIt = true;
2003
}
2004
2005
if (doIt &&
2006
performTransformation(comp,
2007
"\nMerging NULLCHK [" POINTER_PRINTF_FORMAT "] and BNDCHK/SpineCHK [" POINTER_PRINTF_FORMAT "] of load child [" POINTER_PRINTF_FORMAT "]\n",
2008
node, nextTopNode, firstChild))
2009
{
2010
needLateEvaluation = false;
2011
nextTopNode->setHasFoldedImplicitNULLCHK(true);
2012
}
2013
}
2014
else if (nextTopNode->getOpCode().isIf() &&
2015
nextTopNode->isNonoverriddenGuard() &&
2016
nextTopNode->getFirstChild() == firstChild)
2017
{
2018
needLateEvaluation = false;
2019
needExplicitCheck = true;
2020
reference->incReferenceCount(); // will be decremented again later
2021
}
2022
}
2023
}
2024
}
2025
else if (firstChild->getReferenceCount() == 1 && !firstChild->getSymbolReference()->isUnresolved())
2026
{
2027
// If the child is only used here, we don't need to evaluate it
2028
// since all we need is the grandchild which will be evaluated by
2029
// the generation of the explicit check below.
2030
//
2031
needLateEvaluation = false;
2032
2033
// at this point, firstChild is the raw iiload (created by lowerTrees) and
2034
// reference is the aload of the object. node->getFirstChild is the
2035
// l2a sequence; as a result, firstChild's refCount will always be 1
2036
// and node->getFirstChild's refCount will be at least 2 (one under the nullchk
2037
// and the other under the translate treetop)
2038
//
2039
if (usingCompressedPointers && node->getFirstChild()->getReferenceCount() >= 2)
2040
needLateEvaluation = true;
2041
}
2042
}
2043
else if (opCode.isStore())
2044
{
2045
TR::SymbolReference *symRef = firstChild->getSymbolReference();
2046
if (symRef &&
2047
symRef->getSymbol()->getOffset() + symRef->getOffset() < cg->getNumberBytesWriteInaccessible())
2048
{
2049
needExplicitCheck = false;
2050
}
2051
}
2052
else if (opCode.isCall() &&
2053
opCode.isIndirect() &&
2054
cg->getNumberBytesReadInaccessible() > TR::Compiler->om.offsetOfObjectVftField())
2055
{
2056
needExplicitCheck = false;
2057
}
2058
else if (opCode.getOpCodeValue() == TR::monent ||
2059
opCode.getOpCodeValue() == TR::monexit)
2060
{
2061
// The child may generate inline code that provides an implicit null check
2062
// but we won't know until the child is evaluated.
2063
//
2064
reference->incReferenceCount(); // will be decremented again later
2065
needLateEvaluation = false;
2066
cg->evaluate(reference);
2067
appendTo = cg->getAppendInstruction();
2068
cg->evaluate(firstChild);
2069
2070
// TODO: this shouldn't be getOffsetOfContiguousArraySizeField
2071
//
2072
if (cg->getImplicitExceptionPoint() &&
2073
cg->getNumberBytesReadInaccessible() > fej9->getOffsetOfContiguousArraySizeField())
2074
{
2075
needExplicitCheck = false;
2076
cg->decReferenceCount(reference);
2077
}
2078
}
2079
else if (!disableBranchlessPassThroughNULLCHK && opCode.getOpCodeValue () == TR::PassThrough
2080
&& !needResolution && cg->getHasResumableTrapHandler())
2081
{
2082
TR::Register *refRegister = cg->evaluate(firstChild);
2083
needLateEvaluation = false;
2084
2085
if (refRegister)
2086
{
2087
if (!appendTo)
2088
appendTo = cg->getAppendInstruction();
2089
if (cg->getNumberBytesReadInaccessible() > 0)
2090
{
2091
needExplicitCheck = false;
2092
TR::MemoryReference *memRef = NULL;
2093
if (TR::Compiler->om.compressedReferenceShift() > 0
2094
&& firstChild->getType() == TR::Address
2095
&& firstChild->getOpCode().hasSymbolReference()
2096
&& firstChild->getSymbol()->isCollectedReference())
2097
{
2098
memRef = generateX86MemoryReference(NULL, refRegister, TR::Compiler->om.compressedReferenceShift(), 0, cg);
2099
}
2100
else
2101
{
2102
memRef = generateX86MemoryReference(refRegister, 0, cg);
2103
}
2104
appendTo = generateMemImmInstruction(appendTo, TR::InstOpCode::TEST1MemImm1, memRef, 0, cg);
2105
cg->setImplicitExceptionPoint(appendTo);
2106
}
2107
}
2108
}
2109
2110
// Generate the code for the null check.
2111
//
2112
if (needExplicitCheck)
2113
{
2114
// TODO - If a resolve check is needed as well, the resolve must be done
2115
// before the null check, so that exceptions are handled in the correct
2116
// order.
2117
//
2118
///// if (needResolution)
2119
///// {
2120
///// ...
2121
///// }
2122
2123
// Avoid loading the grandchild into a register if it is not going to be used again.
2124
//
2125
if (opCode.getOpCodeValue() == TR::PassThrough &&
2126
reference->getOpCode().isLoadVar() &&
2127
reference->getRegister() == NULL &&
2128
reference->getReferenceCount() == 1)
2129
{
2130
TR::MemoryReference *tempMR = generateX86MemoryReference(reference, cg);
2131
2132
if (!appendTo)
2133
appendTo = cg->getAppendInstruction();
2134
2135
TR::InstOpCode::Mnemonic op = TR::InstOpCode::CMPMemImms();
2136
appendTo = generateMemImmInstruction(appendTo, op, tempMR, NULLVALUE, cg);
2137
tempMR->decNodeReferenceCounts(cg);
2138
needLateEvaluation = false;
2139
}
2140
else
2141
{
2142
TR::Register *targetRegister = cg->evaluate(reference);
2143
2144
if (!appendTo)
2145
appendTo = cg->getAppendInstruction();
2146
2147
appendTo = generateRegRegInstruction(appendTo, TR::InstOpCode::TESTRegReg(), targetRegister, targetRegister, cg);
2148
}
2149
2150
TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);
2151
appendTo = generateLabelInstruction(appendTo, TR::InstOpCode::JE4, snippetLabel, cg);
2152
//the _node field should point to the current node
2153
appendTo->setNode(node);
2154
appendTo->setLiveLocals(cg->getLiveLocals());
2155
2156
TR::Snippet *snippet;
2157
if (opCode.isCall() || !needResolution || comp->target().is64Bit()) //TODO:AMD64: Implement the "withresolve" version
2158
{
2159
snippet = new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(),
2160
snippetLabel, appendTo);
2161
}
2162
else
2163
{
2164
TR_RuntimeHelper resolverCall;
2165
TR::Machine *machine = cg->machine();
2166
TR::Symbol * firstChildSym = firstChild->getSymbolReference()->getSymbol();
2167
2168
if (firstChildSym->isShadow())
2169
{
2170
resolverCall = opCode.isStore() ?
2171
TR_X86interpreterUnresolvedFieldSetterGlue : TR_X86interpreterUnresolvedFieldGlue;
2172
}
2173
else if (firstChildSym->isClassObject())
2174
{
2175
resolverCall = firstChildSym->addressIsCPIndexOfStatic() ?
2176
TR_X86interpreterUnresolvedClassFromStaticFieldGlue : TR_X86interpreterUnresolvedClassGlue;
2177
}
2178
else if (firstChildSym->isConstString())
2179
{
2180
resolverCall = TR_X86interpreterUnresolvedStringGlue;
2181
}
2182
else if (firstChildSym->isConstMethodType())
2183
{
2184
resolverCall = TR_interpreterUnresolvedMethodTypeGlue;
2185
}
2186
else if (firstChildSym->isConstMethodHandle())
2187
{
2188
resolverCall = TR_interpreterUnresolvedMethodHandleGlue;
2189
}
2190
else if (firstChildSym->isCallSiteTableEntry())
2191
{
2192
resolverCall = TR_interpreterUnresolvedCallSiteTableEntryGlue;
2193
}
2194
else if (firstChildSym->isMethodTypeTableEntry())
2195
{
2196
resolverCall = TR_interpreterUnresolvedMethodTypeTableEntryGlue;
2197
}
2198
else
2199
{
2200
resolverCall = opCode.isStore() ?
2201
TR_X86interpreterUnresolvedStaticFieldSetterGlue : TR_X86interpreterUnresolvedStaticFieldGlue;
2202
}
2203
2204
snippet = new (cg->trHeapMemory()) TR::X86CheckFailureSnippetWithResolve(cg, node->getSymbolReference(),
2205
firstChild->getSymbolReference(),
2206
resolverCall,
2207
snippetLabel,
2208
appendTo);
2209
2210
((TR::X86CheckFailureSnippetWithResolve *)(snippet))->setNumLiveX87Registers(machine->fpGetNumberOfLiveFPRs());
2211
((TR::X86CheckFailureSnippetWithResolve *)(snippet))->setHasLiveXMMRs();
2212
}
2213
2214
cg->addSnippet(snippet);
2215
}
2216
2217
// If we need to evaluate the child, do so. Otherwise, if we have
2218
// evaluated the reference node, then decrement its use count.
2219
// The use count of the child is decremented when we are done
2220
// evaluating the NULLCHK.
2221
//
2222
if (needLateEvaluation)
2223
{
2224
cg->evaluate(node->getFirstChild());
2225
}
2226
else if (needExplicitCheck)
2227
{
2228
cg->decReferenceCount(reference);
2229
}
2230
2231
if (comp->useCompressedPointers())
2232
cg->decReferenceCount(node->getFirstChild());
2233
else
2234
cg->decReferenceCount(firstChild);
2235
2236
// If an explicit check has not been generated for the null check, there is
2237
// an instruction that will cause a hardware trap if the exception is to be
2238
// taken. If this method may catch the exception, a GC stack map must be
2239
// created for this instruction. All registers are valid at this GC point
2240
// TODO - if the method may not catch the exception we still need to note
2241
// that the GC point exists, since maps before this point and after it cannot
2242
// be merged.
2243
//
2244
if (!needExplicitCheck)
2245
{
2246
TR::Instruction *faultingInstruction = cg->getImplicitExceptionPoint();
2247
if (faultingInstruction)
2248
{
2249
faultingInstruction->setNeedsGCMap(0xFF00FFFF);
2250
faultingInstruction->setNode(node);
2251
}
2252
}
2253
2254
TR::Node *n = NULL;
2255
if (comp->useCompressedPointers() &&
2256
reference->getOpCodeValue() == TR::l2a)
2257
{
2258
reference->setIsNonNull(true);
2259
n = reference->getFirstChild();
2260
TR::ILOpCodes loadOp = comp->il.opCodeForIndirectLoad(TR::Int32);
2261
TR::ILOpCodes rdbarOp = comp->il.opCodeForIndirectReadBarrier(TR::Int32);
2262
while (n->getOpCodeValue() != loadOp && n->getOpCodeValue() != rdbarOp)
2263
{
2264
n->setIsNonZero(true);
2265
n = n->getFirstChild();
2266
}
2267
n->setIsNonZero(true);
2268
}
2269
2270
reference->setIsNonNull(true);
2271
2272
return NULL;
2273
}
2274
2275
TR::Register *J9::X86::TreeEvaluator::NULLCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2276
{
2277
return TR::TreeEvaluator::evaluateNULLCHKWithPossibleResolve(node, false, cg);
2278
}
2279
2280
TR::Register *J9::X86::TreeEvaluator::resolveAndNULLCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2281
{
2282
return TR::TreeEvaluator::evaluateNULLCHKWithPossibleResolve(node, true, cg);
2283
}
2284
2285
2286
// Generate explicit checks for division by zero and division
2287
// overflow (i.e. 0x80000000 / 0xFFFFFFFF), if necessary.
2288
//
2289
TR::Register *J9::X86::TreeEvaluator::DIVCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2290
{
2291
bool hasConversion;
2292
TR::Node *divisionNode = node->getFirstChild();
2293
TR::Compilation *comp = cg->comp();
2294
2295
TR::ILOpCodes op = divisionNode->getOpCodeValue();
2296
2297
if (op == TR::iu2l ||
2298
op == TR::bu2i ||
2299
op == TR::bu2l ||
2300
op == TR::bu2s ||
2301
op == TR::su2i ||
2302
op == TR::su2l)
2303
{
2304
divisionNode = divisionNode->getFirstChild();
2305
hasConversion = true;
2306
}
2307
else
2308
hasConversion = false;
2309
2310
bool use64BitRegisters = comp->target().is64Bit() && divisionNode->getOpCode().isLong();
2311
bool useRegisterPairs = comp->target().is32Bit() && divisionNode->getOpCode().isLong();
2312
2313
// Not all targets support implicit division checks, so we generate explicit
2314
// tests and snippets to jump to.
2315
//
2316
bool platformNeedsExplicitCheck = !cg->enableImplicitDivideCheck();
2317
2318
// Only do this for TR::ldiv/TR::lrem and TR::idiv/TR::irem by non-constant
2319
// divisors, or by a constant of zero.
2320
// Other constant divisors are optimized in signedIntegerDivOrRemAnalyser,
2321
// and do not cause hardware exceptions.
2322
//
2323
bool operationNeedsCheck = (divisionNode->getOpCode().isInt() &&
2324
(!divisionNode->getSecondChild()->getOpCode().isLoadConst() || divisionNode->getSecondChild()->getInt() == 0));
2325
if (use64BitRegisters)
2326
{
2327
operationNeedsCheck = operationNeedsCheck |
2328
((!divisionNode->getSecondChild()->getOpCode().isLoadConst() || divisionNode->getSecondChild()->getLongInt() == 0));
2329
}
2330
else
2331
{
2332
operationNeedsCheck = operationNeedsCheck | useRegisterPairs;
2333
}
2334
2335
if (platformNeedsExplicitCheck && operationNeedsCheck)
2336
{
2337
TR::Register *dividendReg = cg->evaluate(divisionNode->getFirstChild());
2338
TR::Register *divisorReg = cg->evaluate(divisionNode->getSecondChild());
2339
2340
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
2341
TR::LabelSymbol *divisionLabel = generateLabelSymbol(cg);
2342
TR::LabelSymbol *divideByZeroSnippetLabel = generateLabelSymbol(cg);
2343
TR::LabelSymbol *restartLabel = generateLabelSymbol(cg);
2344
2345
// These instructions are dissected in the divide check snippet to determine
2346
// the source registers. If they or their format are changed, you may need to
2347
// change the snippet(s) also.
2348
//
2349
TR::X86RegRegInstruction *lowDivisorTestInstr;
2350
TR::X86RegRegInstruction *highDivisorTestInstr;
2351
2352
startLabel->setStartInternalControlFlow();
2353
restartLabel->setEndInternalControlFlow();
2354
2355
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
2356
2357
if (useRegisterPairs)
2358
{
2359
TR::Register *tempReg = cg->allocateRegister(TR_GPR);
2360
lowDivisorTestInstr = generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, tempReg, divisorReg->getLowOrder(), cg);
2361
highDivisorTestInstr = generateRegRegInstruction(TR::InstOpCode::OR4RegReg, node, tempReg, divisorReg->getHighOrder(), cg);
2362
generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, tempReg, tempReg, cg);
2363
cg->stopUsingRegister(tempReg);
2364
}
2365
else
2366
lowDivisorTestInstr = generateRegRegInstruction(TR::InstOpCode::TESTRegReg(use64BitRegisters), node, divisorReg, divisorReg, cg);
2367
2368
generateLabelInstruction(TR::InstOpCode::JE4, node, divideByZeroSnippetLabel, cg);
2369
2370
cg->addSnippet(new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(),
2371
divideByZeroSnippetLabel,
2372
cg->getAppendInstruction()));
2373
2374
generateLabelInstruction(TR::InstOpCode::label, node, divisionLabel, cg);
2375
2376
TR::Register *resultRegister = cg->evaluate(divisionNode);
2377
2378
if (!hasConversion)
2379
cg->decReferenceCount(divisionNode);
2380
2381
// We need to make sure that any spilling occurs only after restartLabel,
2382
// otherwise the divide check snippet may store into the wrong register.
2383
//
2384
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t) 0, 2, cg);
2385
TR::Register *scratchRegister;
2386
2387
if (useRegisterPairs)
2388
{
2389
deps->addPostCondition(resultRegister->getLowOrder(), TR::RealRegister::eax, cg);
2390
deps->addPostCondition(resultRegister->getHighOrder(), TR::RealRegister::edx, cg);
2391
}
2392
else switch(divisionNode->getOpCodeValue())
2393
{
2394
case TR::idiv:
2395
case TR::ldiv:
2396
deps->addPostCondition(resultRegister, TR::RealRegister::eax, cg);
2397
scratchRegister = cg->allocateRegister(TR_GPR);
2398
deps->addPostCondition(scratchRegister, TR::RealRegister::edx, cg);
2399
cg->stopUsingRegister(scratchRegister);
2400
break;
2401
2402
case TR::irem:
2403
case TR::lrem:
2404
deps->addPostCondition(resultRegister, TR::RealRegister::edx, cg);
2405
scratchRegister = cg->allocateRegister(TR_GPR);
2406
deps->addPostCondition(scratchRegister, TR::RealRegister::eax, cg);
2407
cg->stopUsingRegister(scratchRegister);
2408
break;
2409
2410
default:
2411
TR_ASSERT(0, "bad division opcode for DIVCHK\n");
2412
}
2413
2414
generateLabelInstruction(TR::InstOpCode::label, node, restartLabel, deps, cg);
2415
2416
if (hasConversion)
2417
{
2418
cg->evaluate(node->getFirstChild());
2419
cg->decReferenceCount(node->getFirstChild());
2420
}
2421
}
2422
else
2423
{
2424
cg->evaluate(node->getFirstChild());
2425
cg->decReferenceCount(node->getFirstChild());
2426
2427
// There may be an instruction that will cause a hardware trap if an exception
2428
// is to be taken.
2429
// If this method may catch the exception, a GC stack map must be created for
2430
// this instruction. All registers are valid at this GC point
2431
//
2432
// TODO: if the method may not catch the exception we still need to note
2433
// that the GC point exists, since maps before this point and after it cannot
2434
// be merged.
2435
//
2436
TR::Instruction *faultingInstruction = cg->getImplicitExceptionPoint();
2437
if (faultingInstruction)
2438
faultingInstruction->setNeedsGCMap(0xFF00FFFF);
2439
}
2440
2441
return NULL;
2442
}
2443
2444
2445
static bool isInteger(TR::ILOpCode &op, TR::CodeGenerator *cg)
2446
{
2447
if (cg->comp()->target().is64Bit())
2448
return op.isIntegerOrAddress();
2449
else
2450
return op.isIntegerOrAddress() && (op.getSize() <= 4);
2451
}
2452
2453
2454
static TR::InstOpCode::Mnemonic branchOpCodeForCompare(TR::ILOpCode &op, bool opposite=false)
2455
{
2456
int32_t index = 0;
2457
if (op.isCompareTrueIfLess())
2458
index += 1;
2459
if (op.isCompareTrueIfGreater())
2460
index += 2;
2461
if (op.isCompareTrueIfEqual())
2462
index += 4;
2463
if (op.isUnsignedCompare())
2464
index += 8;
2465
2466
if (opposite)
2467
index ^= 7;
2468
2469
static const TR::InstOpCode::Mnemonic opTable[] =
2470
{
2471
TR::InstOpCode::bad, TR::InstOpCode::JL4, TR::InstOpCode::JG4, TR::InstOpCode::JNE4,
2472
TR::InstOpCode::JE4, TR::InstOpCode::JLE4, TR::InstOpCode::JGE4, TR::InstOpCode::bad,
2473
TR::InstOpCode::bad, TR::InstOpCode::JB4, TR::InstOpCode::JA4, TR::InstOpCode::JNE4,
2474
TR::InstOpCode::JE4, TR::InstOpCode::JBE4, TR::InstOpCode::JAE4, TR::InstOpCode::bad,
2475
};
2476
return opTable[index];
2477
}
2478
2479
2480
TR::Register *J9::X86::TreeEvaluator::ZEROCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2481
{
2482
// NOTE: ZEROCHK is intended to be general and straightforward. If you're
2483
// thinking of adding special code for specific situations in here, consider
2484
// whether you want to add your own CHK opcode instead. If you feel the
2485
// need for special handling here, you may also want special handling in the
2486
// optimizer, in which case a separate opcode may be more suitable.
2487
//
2488
// On the other hand, if the improvements you're adding could benefit other
2489
// users of ZEROCHK, please go ahead and add them!
2490
//
2491
// If in doubt, discuss your design with your team lead.
2492
2493
TR::LabelSymbol *slowPathLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
2494
TR::LabelSymbol *restartLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
2495
slowPathLabel->setStartInternalControlFlow();
2496
restartLabel->setEndInternalControlFlow();
2497
TR::Compilation *comp = cg->comp();
2498
2499
// Temporarily hide the first child so it doesn't appear in the outlined call
2500
//
2501
node->rotateChildren(node->getNumChildren()-1, 0);
2502
node->setNumChildren(node->getNumChildren()-1);
2503
2504
// Outlined instructions for check failure
2505
// Note: we don't pass the restartLabel in here because we don't want a
2506
// restart branch.
2507
//
2508
TR_OutlinedInstructions *outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::call, NULL, slowPathLabel, NULL, cg);
2509
cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);
2510
cg->generateDebugCounter(
2511
outlinedHelperCall->getFirstInstruction(),
2512
TR::DebugCounter::debugCounterName(comp, "helperCalls/%s/(%s)/%d/%d", node->getOpCode().getName(), comp->signature(), node->getByteCodeInfo().getCallerIndex(), node->getByteCodeInfo().getByteCodeIndex()),
2513
1, TR::DebugCounter::Cheap);
2514
2515
// Restore the first child
2516
//
2517
node->setNumChildren(node->getNumChildren()+1);
2518
node->rotateChildren(0, node->getNumChildren()-1);
2519
2520
// Children other than the first are only for the outlined path; we don't need them here
2521
//
2522
for (int32_t i = 1; i < node->getNumChildren(); i++)
2523
cg->recursivelyDecReferenceCount(node->getChild(i));
2524
2525
// In-line instructions for the check
2526
//
2527
TR::Node *valueToCheck = node->getFirstChild();
2528
if ( valueToCheck->getOpCode().isBooleanCompare()
2529
&& isInteger(valueToCheck->getChild(0)->getOpCode(), cg)
2530
&& isInteger(valueToCheck->getChild(1)->getOpCode(), cg)
2531
&& performTransformation(comp, "O^O CODEGEN Optimizing ZEROCHK+%s %s\n", valueToCheck->getOpCode().getName(), valueToCheck->getName(cg->getDebug())))
2532
{
2533
if (valueToCheck->getOpCode().isCompareForOrder())
2534
{
2535
TR::TreeEvaluator::compareIntegersForOrder(valueToCheck, cg);
2536
}
2537
else
2538
{
2539
TR_ASSERT(valueToCheck->getOpCode().isCompareForEquality(), "Compare opcode must either be compare for order or for equality");
2540
TR::TreeEvaluator::compareIntegersForEquality(valueToCheck, cg);
2541
}
2542
generateLabelInstruction(branchOpCodeForCompare(valueToCheck->getOpCode(), true), node, slowPathLabel, cg);
2543
}
2544
else
2545
{
2546
TR::Register *value = cg->evaluate(node->getFirstChild());
2547
generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, value, value, cg);
2548
cg->decReferenceCount(node->getFirstChild());
2549
generateLabelInstruction(TR::InstOpCode::JE4, node, slowPathLabel, cg);
2550
}
2551
generateLabelInstruction(TR::InstOpCode::label, node, restartLabel, cg);
2552
2553
return NULL;
2554
}
2555
2556
2557
bool isConditionCodeSetForCompare(TR::Node *node, bool *jumpOnOppositeCondition)
2558
{
2559
TR::Compilation *comp = TR::comp();
2560
// Disable. Need to re-think how we handle overflow cases.
2561
//
2562
static char *disableNoCompareEFlags = feGetEnv("TR_disableNoCompareEFlags");
2563
if (disableNoCompareEFlags)
2564
return false;
2565
2566
// See if there is a previous instruction that has set the condition flags
2567
// properly for this node's register
2568
//
2569
TR::Register *firstChildReg = node->getFirstChild()->getRegister();
2570
TR::Register *secondChildReg = node->getSecondChild()->getRegister();
2571
2572
if (!firstChildReg || !secondChildReg)
2573
return false;
2574
2575
// Find the last instruction that either
2576
// 1) sets the appropriate condition flags, or
2577
// 2) modifies the register to be tested
2578
// (and that hopefully does both)
2579
//
2580
TR::Instruction *prevInstr;
2581
for (prevInstr = comp->cg()->getAppendInstruction();
2582
prevInstr;
2583
prevInstr = prevInstr->getPrev())
2584
{
2585
if (prevInstr->getOpCodeValue() == TR::InstOpCode::CMP4RegReg)
2586
{
2587
TR::Register *prevInstrTargetRegister = prevInstr->getTargetRegister();
2588
TR::Register *prevInstrSourceRegister = prevInstr->getSourceRegister();
2589
2590
if (prevInstrTargetRegister && prevInstrSourceRegister &&
2591
(((prevInstrSourceRegister == firstChildReg) && (prevInstrTargetRegister == secondChildReg)) ||
2592
((prevInstrSourceRegister == secondChildReg) && (prevInstrTargetRegister == firstChildReg))))
2593
{
2594
if (performTransformation(comp, "O^O SKIP BOUND CHECK COMPARISON at node %p\n", node))
2595
{
2596
if (prevInstrTargetRegister == secondChildReg)
2597
*jumpOnOppositeCondition = true;
2598
return true;
2599
}
2600
else
2601
return false;
2602
}
2603
}
2604
2605
if (prevInstr->getOpCodeValue() == TR::InstOpCode::label)
2606
{
2607
// This instruction is a possible branch target.
2608
return false;
2609
}
2610
2611
if (prevInstr->getOpCode().modifiesSomeArithmeticFlags())
2612
{
2613
// This instruction overwrites the condition flags.
2614
return false;
2615
}
2616
}
2617
2618
return false;
2619
}
2620
2621
2622
TR::Register *J9::X86::TreeEvaluator::BNDCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2623
{
2624
TR::Node *firstChild = node->getFirstChild();
2625
TR::Node *secondChild = node->getSecondChild();
2626
2627
// Perform a bound check.
2628
//
2629
// Value propagation or profile-directed optimization may have determined
2630
// that the array bound is a constant, and lowered TR::arraylength into an
2631
// iconst. In this case, make sure that the constant is the second child.
2632
//
2633
TR::LabelSymbol *boundCheckFailureLabel = generateLabelSymbol(cg);
2634
TR::Instruction *instr;
2635
TR::Compilation *comp = cg->comp();
2636
2637
bool skippedComparison = false;
2638
bool jumpOnOppositeCondition = false;
2639
if (firstChild->getOpCode().isLoadConst())
2640
{
2641
if (secondChild->getOpCode().isLoadConst() && firstChild->getInt() <= secondChild->getInt())
2642
{
2643
instr = generateLabelInstruction(TR::InstOpCode::JMP4, node, boundCheckFailureLabel, cg);
2644
cg->decReferenceCount(firstChild);
2645
cg->decReferenceCount(secondChild);
2646
}
2647
else
2648
{
2649
if (!isConditionCodeSetForCompare(node, &jumpOnOppositeCondition))
2650
{
2651
node->swapChildren();
2652
TR::TreeEvaluator::compareIntegersForOrder(node, cg);
2653
node->swapChildren();
2654
instr = generateLabelInstruction(TR::InstOpCode::JAE4, node, boundCheckFailureLabel, cg);
2655
}
2656
else
2657
skippedComparison = true;
2658
}
2659
}
2660
else
2661
{
2662
if (!isConditionCodeSetForCompare(node, &jumpOnOppositeCondition))
2663
{
2664
TR::TreeEvaluator::compareIntegersForOrder(node, cg);
2665
instr = generateLabelInstruction(TR::InstOpCode::JBE4, node, boundCheckFailureLabel, cg);
2666
}
2667
else
2668
skippedComparison = true;
2669
}
2670
2671
if (skippedComparison)
2672
{
2673
if (jumpOnOppositeCondition)
2674
instr = generateLabelInstruction(TR::InstOpCode::JAE4, node, boundCheckFailureLabel, cg);
2675
else
2676
instr = generateLabelInstruction(TR::InstOpCode::JBE4, node, boundCheckFailureLabel, cg);
2677
2678
cg->decReferenceCount(firstChild);
2679
cg->decReferenceCount(secondChild);
2680
}
2681
2682
cg->addSnippet(new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(),
2683
boundCheckFailureLabel,
2684
instr,
2685
false
2686
));
2687
2688
if (node->hasFoldedImplicitNULLCHK())
2689
{
2690
TR::Instruction *faultingInstruction = cg->getImplicitExceptionPoint();
2691
if (comp->getOption(TR_TraceCG))
2692
{
2693
traceMsg(comp,"Node %p has foldedimplicitNULLCHK, and a faulting instruction of %p\n",node,faultingInstruction);
2694
}
2695
2696
if (faultingInstruction)
2697
{
2698
faultingInstruction->setNeedsGCMap(0xFF00FFFF);
2699
faultingInstruction->setNode(node);
2700
}
2701
}
2702
2703
firstChild->setIsNonNegative(true);
2704
secondChild->setIsNonNegative(true);
2705
2706
return NULL;
2707
}
2708
2709
2710
TR::Register *J9::X86::TreeEvaluator::ArrayCopyBNDCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2711
{
2712
// Check that first child >= second child
2713
//
2714
// If the first child is a constant and the second isn't, swap the children.
2715
//
2716
TR::Node *firstChild = node->getFirstChild();
2717
TR::Node *secondChild = node->getSecondChild();
2718
TR::LabelSymbol *boundCheckFailureLabel = generateLabelSymbol(cg);
2719
TR::Instruction *instr;
2720
2721
if (firstChild->getOpCode().isLoadConst())
2722
{
2723
if (secondChild->getOpCode().isLoadConst())
2724
{
2725
if (firstChild->getInt() < secondChild->getInt())
2726
{
2727
// Check will always fail, just jump to failure snippet
2728
//
2729
instr = generateLabelInstruction(TR::InstOpCode::JMP4, node, boundCheckFailureLabel, cg);
2730
}
2731
else
2732
{
2733
// Check will always succeed, no need for an instruction
2734
//
2735
instr = NULL;
2736
}
2737
cg->decReferenceCount(firstChild);
2738
cg->decReferenceCount(secondChild);
2739
}
2740
else
2741
{
2742
node->swapChildren();
2743
TR::TreeEvaluator::compareIntegersForOrder(node, cg);
2744
node->swapChildren();
2745
instr = generateLabelInstruction(TR::InstOpCode::JG4, node, boundCheckFailureLabel, cg);
2746
}
2747
}
2748
else
2749
{
2750
TR::TreeEvaluator::compareIntegersForOrder(node, cg);
2751
instr = generateLabelInstruction(TR::InstOpCode::JL4, node, boundCheckFailureLabel, cg);
2752
}
2753
2754
if (instr)
2755
cg->addSnippet(new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(),
2756
boundCheckFailureLabel,
2757
instr,
2758
false
2759
));
2760
2761
return NULL;
2762
}
2763
2764
2765
TR::Register *J9::X86::TreeEvaluator::ArrayStoreCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
2766
{
2767
TR::Compilation *comp = cg->comp();
2768
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
2769
TR::Instruction *prevInstr = cg->getAppendInstruction();
2770
TR::LabelSymbol *startLabel,
2771
*startOfWrtbarLabel,
2772
*doNullStoreLabel,
2773
*doneLabel;
2774
2775
// skipStoreNullCheck
2776
// skipJLOCheck
2777
// skipSuperClassCheck
2778
// cannotSkipWriteBarrier
2779
2780
flags16_t actions;
2781
2782
TR::Node *firstChild = node->getFirstChild();
2783
TR::Node *sourceChild = firstChild->getSecondChild();
2784
2785
static bool isRealTimeGC = comp->getOptions()->realTimeGC();
2786
auto gcMode = TR::Compiler->om.writeBarrierType();
2787
2788
bool isNonRTWriteBarrierRequired = (gcMode != gc_modron_wrtbar_none && !firstChild->skipWrtBar()) ? true : false;
2789
bool generateWriteBarrier = isRealTimeGC || isNonRTWriteBarrierRequired;
2790
bool nopASC = (node->getArrayStoreClassInNode() &&
2791
comp->performVirtualGuardNOPing() &&
2792
!fej9->classHasBeenExtended(node->getArrayStoreClassInNode())
2793
) ? true : false;
2794
2795
doneLabel = generateLabelSymbol(cg);
2796
doneLabel->setEndInternalControlFlow();
2797
2798
doNullStoreLabel = generateWriteBarrier ? generateLabelSymbol(cg) : doneLabel;
2799
startOfWrtbarLabel = generateWriteBarrier ? generateLabelSymbol(cg) : doNullStoreLabel;
2800
2801
bool usingCompressedPointers = false;
2802
bool usingLowMemHeap = false;
2803
bool useShiftedOffsets = (TR::Compiler->om.compressedReferenceShiftOffset() != 0);
2804
2805
if (comp->useCompressedPointers() && firstChild->getOpCode().isIndirect())
2806
{
2807
usingLowMemHeap = true;
2808
usingCompressedPointers = true;
2809
2810
if (useShiftedOffsets)
2811
{
2812
while ((sourceChild->getNumChildren() > 0) && (sourceChild->getOpCodeValue() != TR::a2l))
2813
sourceChild = sourceChild->getFirstChild();
2814
if (sourceChild->getOpCodeValue() == TR::a2l)
2815
sourceChild = sourceChild->getFirstChild();
2816
// this is required so that different registers are
2817
// allocated for the actual store and translated values
2818
sourceChild->incReferenceCount();
2819
}
2820
}
2821
2822
// -------------------------------------------------------------------------
2823
//
2824
// Evaluate all of the children here to avoid issues with internal control
2825
// flow and outlined instructions.
2826
//
2827
// -------------------------------------------------------------------------
2828
2829
TR::MemoryReference *tempMR = NULL;
2830
2831
if (generateWriteBarrier)
2832
{
2833
tempMR = generateX86MemoryReference(firstChild, cg);
2834
}
2835
2836
TR::Node *destinationChild = firstChild->getChild(2);
2837
TR::Register *destinationRegister = cg->evaluate(destinationChild);
2838
TR::Register *sourceRegister = cg->evaluate(sourceChild);
2839
2840
TR_X86ScratchRegisterManager *scratchRegisterManager =
2841
cg->generateScratchRegisterManager(comp->target().is64Bit() ? 15 : 7);
2842
2843
TR::Register *compressedRegister = NULL;
2844
if (usingCompressedPointers)
2845
{
2846
if (usingLowMemHeap && !useShiftedOffsets)
2847
compressedRegister = sourceRegister;
2848
else
2849
{
2850
// valid for useShiftedOffsets
2851
compressedRegister = cg->evaluate(firstChild->getSecondChild());
2852
if (!usingLowMemHeap)
2853
{
2854
generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), firstChild, sourceRegister, sourceRegister, cg);
2855
generateRegRegInstruction(TR::InstOpCode::CMOVERegReg(), firstChild, compressedRegister, sourceRegister, cg);
2856
}
2857
}
2858
}
2859
2860
// -------------------------------------------------------------------------
2861
//
2862
// If the source reference is NULL, the array store checks and the write
2863
// barrier can be bypassed. Generate the NULL store in an outlined sequence.
2864
// For realtime GC we must still do the barrier. If we are not generating
2865
// a write barrier then the store will happen inline.
2866
//
2867
// -------------------------------------------------------------------------
2868
2869
startLabel = generateLabelSymbol(cg);
2870
startLabel->setStartInternalControlFlow();
2871
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
2872
2873
generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, sourceRegister, sourceRegister, cg);
2874
2875
TR::LabelSymbol *nullTargetLabel =
2876
isRealTimeGC ? startOfWrtbarLabel : doNullStoreLabel;
2877
2878
generateLabelInstruction(TR::InstOpCode::JE4, node, nullTargetLabel, cg);
2879
2880
// -------------------------------------------------------------------------
2881
//
2882
// Generate up-front array store checks to avoid calling out to the helper.
2883
//
2884
// -------------------------------------------------------------------------
2885
2886
TR::LabelSymbol *postASCLabel = NULL;
2887
if (nopASC)
2888
{
2889
// Speculatively NOP the array store check if VP is able to prove that the ASC
2890
// would always succeed given the current state of the class hierarchy.
2891
//
2892
TR::Node *helperCallNode = TR::Node::createWithSymRef(TR::call, 2, 2, sourceChild, destinationChild, node->getSymbolReference());
2893
helperCallNode->copyByteCodeInfo(node);
2894
2895
TR::LabelSymbol *oolASCLabel = generateLabelSymbol(cg);
2896
TR::LabelSymbol *restartLabel;
2897
2898
if (generateWriteBarrier)
2899
{
2900
restartLabel = startOfWrtbarLabel;
2901
}
2902
else
2903
{
2904
restartLabel = postASCLabel = generateLabelSymbol(cg);
2905
}
2906
2907
TR_OutlinedInstructions *outlinedASCHelperCall =
2908
new (cg->trHeapMemory()) TR_OutlinedInstructions(helperCallNode, TR::call, NULL, oolASCLabel, restartLabel, cg);
2909
cg->getOutlinedInstructionsList().push_front(outlinedASCHelperCall);
2910
2911
static char *alwaysDoOOLASCc = feGetEnv("TR_doOOLASC");
2912
static bool alwaysDoOOLASC = alwaysDoOOLASCc ? true : false;
2913
2914
if (!alwaysDoOOLASC)
2915
{
2916
TR_VirtualGuard *virtualGuard = TR_VirtualGuard::createArrayStoreCheckGuard(comp, node, node->getArrayStoreClassInNode());
2917
TR::Instruction *pachable = generateVirtualGuardNOPInstruction(node, virtualGuard->addNOPSite(), NULL, oolASCLabel, cg);
2918
}
2919
else
2920
{
2921
generateLabelInstruction(TR::InstOpCode::JMP4, node, oolASCLabel, cg);
2922
}
2923
2924
// Restore the reference counts of the children created for the temporary vacll node above.
2925
//
2926
sourceChild->decReferenceCount();
2927
destinationChild->decReferenceCount();
2928
}
2929
else
2930
{
2931
TR::TreeEvaluator::VMarrayStoreCHKEvaluator(
2932
node,
2933
sourceChild,
2934
destinationChild,
2935
scratchRegisterManager,
2936
startOfWrtbarLabel,
2937
prevInstr,
2938
cg);
2939
}
2940
2941
// -------------------------------------------------------------------------
2942
//
2943
// Generate write barrier.
2944
//
2945
// -------------------------------------------------------------------------
2946
2947
bool isSourceNonNull = sourceChild->isNonNull();
2948
2949
if (generateWriteBarrier)
2950
{
2951
generateLabelInstruction(TR::InstOpCode::label, node, startOfWrtbarLabel, cg);
2952
2953
if (!isRealTimeGC)
2954
{
2955
// HACK: set the nullness property on the source so that the write barrier
2956
// doesn't do the same test.
2957
//
2958
sourceChild->setIsNonNull(true);
2959
}
2960
2961
TR::TreeEvaluator::VMwrtbarWithStoreEvaluator(
2962
node,
2963
tempMR,
2964
scratchRegisterManager,
2965
destinationChild,
2966
sourceChild,
2967
true,
2968
cg,
2969
true);
2970
}
2971
else if (postASCLabel)
2972
{
2973
// Lay down a arestart label for OOL ASC if the write barrier was skipped
2974
//
2975
generateLabelInstruction(TR::InstOpCode::label, node, postASCLabel, cg);
2976
}
2977
2978
// -------------------------------------------------------------------------
2979
//
2980
// Either do the bypassed NULL store out of line or the reference store
2981
// inline if the write barrier was omitted.
2982
//
2983
// -------------------------------------------------------------------------
2984
2985
TR::MemoryReference *tempMR2 = NULL;
2986
2987
TR::Instruction *dependencyAnchorInstruction = NULL;
2988
2989
if (!isRealTimeGC)
2990
{
2991
if (generateWriteBarrier)
2992
{
2993
assert(isNonRTWriteBarrierRequired);
2994
assert(tempMR);
2995
2996
// HACK: reset the nullness property on the source.
2997
//
2998
sourceChild->setIsNonNull(isSourceNonNull);
2999
3000
// Perform the NULL store that was bypassed earlier by the write barrier.
3001
//
3002
TR_OutlinedInstructionsGenerator og(nullTargetLabel, node, cg);
3003
3004
tempMR2 = generateX86MemoryReference(*tempMR, 0, cg);
3005
3006
if (usingCompressedPointers)
3007
generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, tempMR2, compressedRegister, cg);
3008
else
3009
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, tempMR2, sourceRegister, cg);
3010
3011
generateLabelInstruction(TR::InstOpCode::JMP4, node, doneLabel, cg);
3012
og.endOutlinedInstructionSequence();
3013
}
3014
else
3015
{
3016
// No write barrier emitted. Evaluate the store here.
3017
//
3018
assert(!isNonRTWriteBarrierRequired);
3019
assert(doneLabel == nullTargetLabel);
3020
3021
// This is where the dependency condition will eventually go.
3022
//
3023
dependencyAnchorInstruction = cg->getAppendInstruction();
3024
3025
tempMR = generateX86MemoryReference(firstChild, cg);
3026
3027
TR::X86MemRegInstruction *storeInstr;
3028
3029
if (usingCompressedPointers)
3030
storeInstr = generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, tempMR, compressedRegister, cg);
3031
else
3032
storeInstr = generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, tempMR, sourceRegister, cg);
3033
3034
cg->setImplicitExceptionPoint(storeInstr);
3035
3036
if (!usingLowMemHeap || useShiftedOffsets)
3037
cg->decReferenceCount(sourceChild);
3038
cg->decReferenceCount(destinationChild);
3039
tempMR->decNodeReferenceCounts(cg);
3040
}
3041
}
3042
3043
// -------------------------------------------------------------------------
3044
//
3045
// Generate outermost register dependencies
3046
//
3047
// -------------------------------------------------------------------------
3048
3049
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions(12, 12, cg);
3050
deps->unionPostCondition(destinationRegister, TR::RealRegister::NoReg, cg);
3051
deps->unionPostCondition(sourceRegister, TR::RealRegister::NoReg, cg);
3052
3053
scratchRegisterManager->addScratchRegistersToDependencyList(deps);
3054
3055
if (usingCompressedPointers && (!usingLowMemHeap || useShiftedOffsets))
3056
{
3057
deps->unionPostCondition(compressedRegister, TR::RealRegister::NoReg, cg);
3058
}
3059
3060
if (generateWriteBarrier)
3061
{
3062
// Memory reference is not live in an internal control flow region.
3063
//
3064
if (tempMR->getBaseRegister() && tempMR->getBaseRegister() != destinationRegister)
3065
{
3066
deps->unionPostCondition(tempMR->getBaseRegister(), TR::RealRegister::NoReg, cg);
3067
}
3068
3069
if (tempMR->getIndexRegister() && tempMR->getIndexRegister() != destinationRegister)
3070
{
3071
deps->unionPostCondition(tempMR->getIndexRegister(), TR::RealRegister::NoReg, cg);
3072
}
3073
3074
if (comp->target().is64Bit())
3075
{
3076
TR::Register *addressRegister =tempMR->getAddressRegister();
3077
if (addressRegister && addressRegister != destinationRegister)
3078
{
3079
deps->unionPostCondition(addressRegister, TR::RealRegister::NoReg, cg);
3080
}
3081
}
3082
}
3083
3084
if (tempMR2 && comp->target().is64Bit())
3085
{
3086
TR::Register *addressRegister = tempMR2->getAddressRegister();
3087
if (addressRegister && addressRegister != destinationRegister)
3088
deps->unionPostCondition(addressRegister, TR::RealRegister::NoReg, cg);
3089
}
3090
3091
deps->unionPostCondition(
3092
cg->getVMThreadRegister(),
3093
(TR::RealRegister::RegNum)cg->getVMThreadRegister()->getAssociation(), cg);
3094
3095
deps->stopAddingConditions();
3096
3097
scratchRegisterManager->stopUsingRegisters();
3098
3099
if (dependencyAnchorInstruction)
3100
{
3101
generateLabelInstruction(dependencyAnchorInstruction, TR::InstOpCode::label, doneLabel, deps, cg);
3102
}
3103
else
3104
{
3105
generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, deps, cg);
3106
}
3107
3108
if (usingCompressedPointers)
3109
{
3110
cg->decReferenceCount(firstChild->getSecondChild());
3111
cg->decReferenceCount(firstChild);
3112
}
3113
3114
if (comp->useAnchors() && firstChild->getOpCode().isIndirect())
3115
firstChild->setStoreAlreadyEvaluated(true);
3116
3117
return NULL;
3118
}
3119
3120
TR::Register *J9::X86::TreeEvaluator::ArrayCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
3121
{
3122
return TR::TreeEvaluator::VMarrayCheckEvaluator(node, cg);
3123
}
3124
3125
3126
// Handles both BNDCHKwithSpineCHK and SpineCHK nodes.
3127
//
3128
TR::Register *J9::X86::TreeEvaluator::BNDCHKwithSpineCHKEvaluator(TR::Node *node, TR::CodeGenerator *cg)
3129
{
3130
TR::Compilation *comp = cg->comp();
3131
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
3132
bool needsBoundCheck = (node->getOpCodeValue() == TR::BNDCHKwithSpineCHK) ? true : false;
3133
3134
TR::Node *loadOrStoreChild = node->getFirstChild();
3135
TR::Node *baseArrayChild = node->getSecondChild();
3136
TR::Node *arrayLengthChild;
3137
TR::Node *indexChild;
3138
3139
if (needsBoundCheck)
3140
{
3141
arrayLengthChild = node->getChild(2);
3142
indexChild = node->getChild(3);
3143
}
3144
else
3145
{
3146
arrayLengthChild = NULL;
3147
indexChild = node->getChild(2);
3148
}
3149
3150
// Perform a bound check.
3151
//
3152
// Value propagation or profile-directed optimization may have determined
3153
// that the array bound is a constant, and lowered TR::arraylength into an
3154
// iconst. In this case, make sure that the constant is the second child.
3155
//
3156
TR::InstOpCode::Mnemonic branchOpCode;
3157
3158
// For primitive stores anchored under the check node, we must evaluate the source node
3159
// before the bound check branch so that its available to the snippet. We can make
3160
// an exception for constant values that could be folded directly into a immediate
3161
// store instruction.
3162
//
3163
if (loadOrStoreChild->getOpCode().isStore() && loadOrStoreChild->getReferenceCount() <= 1)
3164
{
3165
TR::Node *valueChild = loadOrStoreChild->getSecondChild();
3166
3167
if (!valueChild->getOpCode().isLoadConst() ||
3168
(valueChild->getOpCode().isLoadConst() &&
3169
((valueChild->getDataType() == TR::Float) || (valueChild->getDataType() == TR::Double) ||
3170
(comp->target().is64Bit() && !IS_32BIT_SIGNED(valueChild->getLongInt())))))
3171
{
3172
cg->evaluate(valueChild);
3173
}
3174
}
3175
3176
TR::Register *baseArrayReg = cg->evaluate(baseArrayChild);
3177
3178
TR::TreeEvaluator::preEvaluateEscapingNodesForSpineCheck(node, cg);
3179
3180
TR::Instruction *faultingInstruction = NULL;
3181
3182
TR::LabelSymbol *boundCheckFailureLabel = generateLabelSymbol(cg);
3183
TR::X86LabelInstruction *checkInstr = NULL;
3184
3185
if (needsBoundCheck)
3186
{
3187
if (arrayLengthChild->getOpCode().isLoadConst())
3188
{
3189
if (indexChild->getOpCode().isLoadConst() && arrayLengthChild->getInt() <= indexChild->getInt())
3190
{
3191
// Create real check failure snippet if we can prove the
3192
// bound check will always fail.
3193
//
3194
branchOpCode = TR::InstOpCode::JMP4;
3195
cg->decReferenceCount(arrayLengthChild);
3196
cg->decReferenceCount(indexChild);
3197
}
3198
else
3199
{
3200
TR::DataType dt = loadOrStoreChild->getDataType();
3201
int32_t elementSize = (dt == TR::Address) ? TR::Compiler->om.sizeofReferenceField()
3202
: TR::Symbol::convertTypeToSize(dt);
3203
3204
if (TR::Compiler->om.isDiscontiguousArray(arrayLengthChild->getInt(), elementSize))
3205
{
3206
// Create real check failure snippet if we can prove the spine check
3207
// will always fail
3208
//
3209
branchOpCode = TR::InstOpCode::JMP4;
3210
cg->decReferenceCount(arrayLengthChild);
3211
if (!indexChild->getOpCode().isLoadConst())
3212
{
3213
cg->evaluate(indexChild);
3214
}
3215
else
3216
{
3217
cg->decReferenceCount(indexChild);
3218
}
3219
faultingInstruction = cg->getImplicitExceptionPoint();
3220
}
3221
else
3222
{
3223
// Check the bounds.
3224
//
3225
TR::TreeEvaluator::compareIntegersForOrder(node, indexChild, arrayLengthChild, cg);
3226
branchOpCode = TR::InstOpCode::JAE4;
3227
faultingInstruction = cg->getImplicitExceptionPoint();
3228
}
3229
}
3230
}
3231
else
3232
{
3233
// Check the bounds.
3234
//
3235
TR::TreeEvaluator::compareIntegersForOrder(node, arrayLengthChild, indexChild, cg);
3236
branchOpCode = TR::InstOpCode::JBE4;
3237
faultingInstruction = cg->getImplicitExceptionPoint();
3238
}
3239
3240
static char *forceArraylet = feGetEnv("TR_forceArraylet");
3241
if (forceArraylet)
3242
{
3243
branchOpCode = TR::InstOpCode::JMP4;
3244
}
3245
3246
checkInstr = generateLabelInstruction(branchOpCode, node, boundCheckFailureLabel, cg);
3247
}
3248
else
3249
{
3250
// -------------------------------------------------------------------------
3251
// Check if the base array has a spine. If so, process out of line.
3252
// -------------------------------------------------------------------------
3253
3254
if (!indexChild->getOpCode().isLoadConst())
3255
{
3256
cg->evaluate(indexChild);
3257
}
3258
3259
TR::MemoryReference *arraySizeMR =
3260
generateX86MemoryReference(baseArrayReg, fej9->getOffsetOfContiguousArraySizeField(), cg);
3261
3262
generateMemImmInstruction(TR::InstOpCode::CMP4MemImms, node, arraySizeMR, 0, cg);
3263
generateLabelInstruction(TR::InstOpCode::JE4, node, boundCheckFailureLabel, cg);
3264
}
3265
3266
// -----------------------------------------------------------------------------------
3267
// Track all virtual register use within the mainline path. This info will be used
3268
// to adjust the virtual register use counts within the outlined path for more precise
3269
// register assignment.
3270
// -----------------------------------------------------------------------------------
3271
3272
cg->startRecordingRegisterUsage();
3273
3274
TR::Register *loadOrStoreReg = NULL;
3275
TR::Register *valueReg = NULL;
3276
3277
int32_t indexValue;
3278
3279
// For reference stores, only evaluate the array element address because the store cannot
3280
// happen here (it must be done via the array store check).
3281
//
3282
// For primitive stores, evaluate them now.
3283
//
3284
// For loads, evaluate them now.
3285
//
3286
if (loadOrStoreChild->getOpCode().isStore())
3287
{
3288
if (loadOrStoreChild->getReferenceCount() > 1)
3289
{
3290
TR_ASSERT(loadOrStoreChild->getOpCode().isWrtBar(), "Opcode must be wrtbar");
3291
loadOrStoreReg = cg->evaluate(loadOrStoreChild->getFirstChild());
3292
cg->decReferenceCount(loadOrStoreChild->getFirstChild());
3293
}
3294
else
3295
{
3296
// If the store is not commoned then it must be a primitive store.
3297
//
3298
loadOrStoreReg = cg->evaluate(loadOrStoreChild);
3299
valueReg = loadOrStoreChild->getSecondChild()->getRegister();
3300
3301
if (!valueReg)
3302
{
3303
// If the immediate value was not evaluated then it must have been folded
3304
// into the instruction.
3305
//
3306
TR_ASSERT(loadOrStoreChild->getSecondChild()->getOpCode().isLoadConst(), "unevaluated, non-constant value child");
3307
TR_ASSERT(IS_32BIT_SIGNED(loadOrStoreChild->getSecondChild()->getInt()), "immediate value too wide for instruction");
3308
}
3309
}
3310
}
3311
else
3312
{
3313
loadOrStoreReg = cg->evaluate(loadOrStoreChild);
3314
}
3315
3316
// -----------------------------------------------------------------------------------
3317
// Stop tracking virtual register usage.
3318
// -----------------------------------------------------------------------------------
3319
3320
TR::list<OMR::RegisterUsage*> *mainlineRUL = cg->stopRecordingRegisterUsage();
3321
3322
TR::Register *indexReg = indexChild->getRegister();
3323
3324
// Index register must be in a register or a constant.
3325
//
3326
TR_ASSERT((indexReg || indexChild->getOpCode().isLoadConst()),
3327
"index child is not evaluated or constant: indexReg=%p, indexChild=%p", indexReg, indexChild);
3328
3329
if (indexReg)
3330
{
3331
indexValue = -1;
3332
}
3333
else
3334
{
3335
indexValue = indexChild->getInt();
3336
}
3337
3338
// TODO: don't always require the VM thread
3339
//
3340
TR::RegisterDependencyConditions *deps =
3341
generateRegisterDependencyConditions((uint8_t) 0, 1, cg);
3342
3343
deps->addPostCondition(
3344
cg->getVMThreadRegister(),
3345
(TR::RealRegister::RegNum)cg->getVMThreadRegister()->getAssociation(), cg);
3346
3347
deps->stopAddingConditions();
3348
3349
TR::LabelSymbol *mergeLabel = generateLabelSymbol(cg);
3350
mergeLabel->setInternalControlFlowMerge();
3351
TR::X86LabelInstruction *restartInstr = generateLabelInstruction(TR::InstOpCode::label, node, mergeLabel, deps, cg);
3352
3353
TR_OutlinedInstructions *arrayletOI =
3354
generateArrayletReference(
3355
node,
3356
loadOrStoreChild,
3357
checkInstr,
3358
boundCheckFailureLabel,
3359
mergeLabel,
3360
baseArrayReg,
3361
loadOrStoreReg,
3362
indexReg,
3363
indexValue,
3364
valueReg,
3365
needsBoundCheck,
3366
cg);
3367
3368
arrayletOI->setMainlinePathRegisterUsageList(mainlineRUL);
3369
3370
if (node->hasFoldedImplicitNULLCHK())
3371
{
3372
if (faultingInstruction)
3373
{
3374
faultingInstruction->setNeedsGCMap(0xFF00FFFF);
3375
faultingInstruction->setNode(node);
3376
}
3377
}
3378
3379
if (arrayLengthChild)
3380
arrayLengthChild->setIsNonNegative(true);
3381
3382
indexChild->setIsNonNegative(true);
3383
3384
cg->decReferenceCount(loadOrStoreChild);
3385
cg->decReferenceCount(baseArrayChild);
3386
3387
if (!needsBoundCheck)
3388
{
3389
// Spine checks must decrement the reference count on the index explicitly.
3390
//
3391
cg->decReferenceCount(indexChild);
3392
}
3393
3394
return NULL;
3395
3396
}
3397
3398
/*
3399
* this evaluator is used specifically for evaluate the following three nodes
3400
*
3401
* storFence
3402
* loadFence
3403
* storeFence
3404
*
3405
* Since Java specification for loadfence and storefenc is stronger
3406
* than the intel specification, a full mfence instruction have to
3407
* be used for all three of them
3408
*
3409
* Due to performance penalty of mfence, a faster lockor on RSP is used
3410
* it achieve the same functionality but runs faster.
3411
*/
3412
TR::Register *J9::X86::TreeEvaluator::barrierFenceEvaluator(TR::Node *node, TR::CodeGenerator *cg)
3413
{
3414
TR::ILOpCodes opCode = node->getOpCodeValue();
3415
if (opCode == TR::fullFence && node->canOmitSync())
3416
{
3417
generateLabelInstruction(TR::InstOpCode::label, node, generateLabelSymbol(cg), cg);
3418
}
3419
else if(cg->comp()->getOption(TR_X86UseMFENCE))
3420
{
3421
generateInstruction(TR::InstOpCode::MFENCE, node, cg);
3422
}
3423
else
3424
{
3425
TR::RealRegister *stackReg = cg->machine()->getRealRegister(TR::RealRegister::esp);
3426
TR::MemoryReference *mr = generateX86MemoryReference(stackReg, intptr_t(0), cg);
3427
3428
mr->setRequiresLockPrefix();
3429
generateMemImmInstruction(TR::InstOpCode::OR4MemImms, node, mr, 0, cg);
3430
cg->stopUsingRegister(stackReg);
3431
}
3432
return NULL;
3433
}
3434
3435
3436
TR::Register *J9::X86::TreeEvaluator::readbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
3437
{
3438
TR_ASSERT(node->getNumChildren() == 1, "readbar node should have one child");
3439
TR::Node *handleNode = node->getChild(0);
3440
3441
TR::Compilation *comp = cg->comp();
3442
3443
bool needBranchAroundForNULL = !node->hasFoldedImplicitNULLCHK() && !node->isNonNull();
3444
traceMsg(comp, "\nnode %p has folded implicit nullchk: %d\n", node, node->hasFoldedImplicitNULLCHK());
3445
traceMsg(comp, "node %p is nonnull: %d\n", node, node->isNonNull());
3446
traceMsg(comp, "node %p needs branchAround: %d\n", node, needBranchAroundForNULL);
3447
3448
TR::LabelSymbol *startLabel=NULL;
3449
TR::LabelSymbol *doneLabel=NULL;
3450
if (needBranchAroundForNULL)
3451
{
3452
startLabel = generateLabelSymbol(cg);
3453
doneLabel = generateLabelSymbol(cg);
3454
3455
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
3456
startLabel->setStartInternalControlFlow();
3457
}
3458
3459
TR::Register *handleRegister = cg->intClobberEvaluate(handleNode);
3460
3461
if (needBranchAroundForNULL)
3462
{
3463
// if handle is NULL, then just branch around the redirection
3464
generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, handleRegister, handleRegister, cg);
3465
generateLabelInstruction(TR::InstOpCode::JE4, handleNode, doneLabel, cg);
3466
}
3467
3468
// handle is not NULL or we're an implicit nullcheck, so go through forwarding pointer to get object
3469
TR::MemoryReference *handleMR = generateX86MemoryReference(handleRegister, node->getSymbolReference()->getOffset(), cg);
3470
TR::Instruction *forwardingInstr=generateRegMemInstruction(TR::InstOpCode::L4RegMem, handleNode, handleRegister, handleMR, cg);
3471
cg->setImplicitExceptionPoint(forwardingInstr);
3472
3473
if (needBranchAroundForNULL)
3474
{
3475
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t) 0, 1, cg);
3476
deps->addPostCondition(handleRegister, TR::RealRegister::NoReg, cg);
3477
3478
// and we're done
3479
generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, deps, cg);
3480
3481
doneLabel->setEndInternalControlFlow();
3482
}
3483
3484
node->setRegister(handleRegister);
3485
cg->decReferenceCount(handleNode);
3486
3487
return handleRegister;
3488
}
3489
3490
static
3491
TR::Register * highestOneBit(TR::Node *node, TR::CodeGenerator *cg, TR::Register *reg, bool is64Bit)
3492
{
3493
// xor r1, r1
3494
// bsr r2, reg
3495
// setne r1
3496
// shl r1, r2
3497
TR::Register *scratchReg = cg->allocateRegister();
3498
TR::Register *bsrReg = cg->allocateRegister();
3499
generateRegRegInstruction(TR::InstOpCode::XORRegReg(is64Bit), node, scratchReg, scratchReg, cg);
3500
generateRegRegInstruction(TR::InstOpCode::BSRRegReg(is64Bit), node, bsrReg, reg, cg);
3501
generateRegInstruction(TR::InstOpCode::SETNE1Reg, node, scratchReg, cg);
3502
TR::RegisterDependencyConditions *shiftDependencies = generateRegisterDependencyConditions((uint8_t)1, 1, cg);
3503
shiftDependencies->addPreCondition(bsrReg, TR::RealRegister::ecx, cg);
3504
shiftDependencies->addPostCondition(bsrReg, TR::RealRegister::ecx, cg);
3505
shiftDependencies->stopAddingConditions();
3506
generateRegRegInstruction(TR::InstOpCode::SHLRegCL(is64Bit), node, scratchReg, bsrReg, shiftDependencies, cg);
3507
cg->stopUsingRegister(bsrReg);
3508
return scratchReg;
3509
}
3510
3511
TR::Register *J9::X86::TreeEvaluator::integerHighestOneBit(TR::Node *node, TR::CodeGenerator *cg)
3512
{
3513
TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");
3514
TR::Node *child = node->getFirstChild();
3515
TR::Register *inputReg = cg->evaluate(child);
3516
TR::Register *resultReg = highestOneBit(node, cg, inputReg, cg->comp()->target().is64Bit());
3517
cg->decReferenceCount(child);
3518
node->setRegister(resultReg);
3519
return resultReg;
3520
}
3521
3522
TR::Register *J9::X86::TreeEvaluator::longHighestOneBit(TR::Node *node, TR::CodeGenerator *cg)
3523
{
3524
TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");
3525
TR::Node *child = node->getFirstChild();
3526
TR::Register *inputReg = cg->evaluate(child);
3527
TR::Register *resultReg = NULL;
3528
if (cg->comp()->target().is64Bit())
3529
{
3530
resultReg = highestOneBit(node, cg, inputReg, true);
3531
}
3532
else
3533
{
3534
//mask out low part result if high part is not 0
3535
//xor r1 r1
3536
//cmp inputHigh, 0
3537
//setne r1
3538
//dec r1
3539
//and resultLow, r1
3540
//ret resultHigh:resultLow
3541
TR::Register *inputLow = inputReg->getLowOrder();
3542
TR::Register *inputHigh = inputReg->getHighOrder();
3543
TR::Register *maskReg = cg->allocateRegister();
3544
TR::Register *resultHigh = highestOneBit(node, cg, inputHigh, false);
3545
TR::Register *resultLow = highestOneBit(node, cg, inputLow, false);
3546
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, maskReg, maskReg, cg);
3547
generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, inputHigh, 0, cg);
3548
generateRegInstruction(TR::InstOpCode::SETNE1Reg, node, maskReg, cg);
3549
generateRegInstruction(TR::InstOpCode::DEC4Reg, node, maskReg, cg);
3550
generateRegRegInstruction(TR::InstOpCode::AND4RegReg, node, resultLow, maskReg, cg);
3551
resultReg = cg->allocateRegisterPair(resultLow, resultHigh);
3552
cg->stopUsingRegister(maskReg);
3553
}
3554
cg->decReferenceCount(child);
3555
node->setRegister(resultReg);
3556
return resultReg;
3557
}
3558
3559
static
3560
TR::Register *lowestOneBit(TR::Node *node, TR::CodeGenerator *cg, TR::Register *reg, bool is64Bit)
3561
{
3562
TR::Register *resultReg = cg->allocateRegister();
3563
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(is64Bit), node, resultReg, reg, cg);
3564
generateRegInstruction(TR::InstOpCode::NEGReg(is64Bit), node, resultReg, cg);
3565
generateRegRegInstruction(TR::InstOpCode::ANDRegReg(is64Bit), node, resultReg, reg, cg);
3566
return resultReg;
3567
}
3568
3569
TR::Register *J9::X86::TreeEvaluator::integerLowestOneBit(TR::Node *node, TR::CodeGenerator *cg)
3570
{
3571
TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");
3572
TR::Node *child = node->getFirstChild();
3573
TR::Register *inputReg = cg->evaluate(child);
3574
TR::Register *reg = lowestOneBit(node, cg, inputReg, cg->comp()->target().is64Bit());
3575
node->setRegister(reg);
3576
cg->decReferenceCount(child);
3577
return reg;
3578
}
3579
3580
TR::Register *J9::X86::TreeEvaluator::longLowestOneBit(TR::Node *node, TR::CodeGenerator *cg)
3581
{
3582
TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");
3583
TR::Node *child = node->getFirstChild();
3584
TR::Register *inputReg = cg->evaluate(child);
3585
TR::Register *resultReg = NULL;
3586
if (cg->comp()->target().is64Bit())
3587
{
3588
resultReg = lowestOneBit(node, cg, inputReg, true);
3589
}
3590
else
3591
{
3592
// mask out high part if low part is not 0
3593
// xor r1, r1
3594
// get low result
3595
// setne r1
3596
// dec r1
3597
// and r1, inputHigh
3598
// get high result
3599
// return resultHigh:resultLow
3600
TR::Register *inputHigh = inputReg->getHighOrder();
3601
TR::Register *inputLow = inputReg->getLowOrder();
3602
TR::Register *scratchReg = cg->allocateRegister();
3603
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, scratchReg, scratchReg, cg);
3604
TR::Register *resultLow = lowestOneBit(node, cg, inputLow, false);
3605
generateRegInstruction(TR::InstOpCode::SETNE1Reg, node, scratchReg, cg);
3606
generateRegInstruction(TR::InstOpCode::DEC4Reg, node, scratchReg, cg);
3607
generateRegRegInstruction(TR::InstOpCode::AND4RegReg, node, scratchReg, inputHigh, cg);
3608
TR::Register *resultHigh = lowestOneBit(node, cg, scratchReg, false);
3609
cg->stopUsingRegister(scratchReg);
3610
resultReg = cg->allocateRegisterPair(resultLow, resultHigh);
3611
}
3612
node->setRegister(resultReg);
3613
cg->decReferenceCount(child);
3614
return resultReg;
3615
}
3616
3617
3618
static
3619
TR::Register *numberOfLeadingZeros(TR::Node *node, TR::CodeGenerator *cg, TR::Register *reg, bool is64Bit, bool isLong)
3620
{
3621
// xor r1, r1
3622
// bsr r2, reg
3623
// sete r1
3624
// dec r1
3625
// inc r2
3626
// and r2, r1
3627
// mov r1, is64Bit? 64: 32
3628
// sub r1, r2
3629
// ret r1
3630
TR::Register *maskReg = cg->allocateRegister();
3631
TR::Register *bsrReg = cg->allocateRegister();
3632
generateRegRegInstruction(TR::InstOpCode::XORRegReg(is64Bit), node, maskReg, maskReg, cg);
3633
generateRegRegInstruction(TR::InstOpCode::BSRRegReg(is64Bit), node, bsrReg, reg, cg);
3634
generateRegInstruction(TR::InstOpCode::SETE1Reg, node, maskReg, cg);
3635
generateRegInstruction(TR::InstOpCode::DECReg(is64Bit), node, maskReg, cg);
3636
generateRegInstruction(TR::InstOpCode::INCReg(is64Bit), node, bsrReg, cg);
3637
generateRegRegInstruction(TR::InstOpCode::ANDRegReg(is64Bit), node, bsrReg, maskReg, cg);
3638
generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(is64Bit), node, maskReg, isLong ? 64 : 32, cg);
3639
generateRegRegInstruction(TR::InstOpCode::SUBRegReg(is64Bit), node, maskReg, bsrReg, cg);
3640
cg->stopUsingRegister(bsrReg);
3641
return maskReg;
3642
}
3643
3644
TR::Register *J9::X86::TreeEvaluator::integerNumberOfLeadingZeros(TR::Node *node, TR::CodeGenerator *cg)
3645
{
3646
TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");
3647
TR::Node* child = node->getFirstChild();
3648
TR::Register* inputReg = cg->evaluate(child);
3649
TR::Register* resultReg = numberOfLeadingZeros(node, cg, inputReg, false, false);
3650
node->setRegister(resultReg);
3651
cg->decReferenceCount(child);
3652
return resultReg;
3653
}
3654
3655
TR::Register *J9::X86::TreeEvaluator::longNumberOfLeadingZeros(TR::Node *node, TR::CodeGenerator *cg)
3656
{
3657
TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");
3658
TR::Node* child = node->getFirstChild();
3659
TR::Register* inputReg = cg->evaluate(child);
3660
TR::Register *resultReg = NULL;
3661
if (cg->comp()->target().is64Bit())
3662
{
3663
resultReg = numberOfLeadingZeros(node, cg, inputReg, true, true);
3664
}
3665
else
3666
{
3667
// keep low part if high part is 0
3668
// xor r1, r1
3669
// cmp inputHigh, 0
3670
// setne r1
3671
// dec r1
3672
// and resultLow, r1
3673
// add resultHigh, resultLow
3674
// return resultHigh
3675
TR::Register *inputHigh = inputReg->getHighOrder();
3676
TR::Register *inputLow = inputReg->getLowOrder();
3677
TR::Register *resultHigh = numberOfLeadingZeros(node, cg, inputHigh, false, false);
3678
TR::Register *resultLow = numberOfLeadingZeros(node, cg, inputLow, false, false);
3679
TR::Register *maskReg = cg->allocateRegister();
3680
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, maskReg, maskReg, cg);
3681
generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, inputHigh, 0, cg);
3682
generateRegInstruction(TR::InstOpCode::SETNE1Reg, node, maskReg, cg);
3683
generateRegInstruction(TR::InstOpCode::DEC4Reg, node, maskReg, cg);
3684
generateRegRegInstruction(TR::InstOpCode::AND4RegReg, node, resultLow, maskReg, cg);
3685
generateRegRegInstruction(TR::InstOpCode::ADD4RegReg, node, resultHigh, resultLow, cg);
3686
cg->stopUsingRegister(resultLow);
3687
cg->stopUsingRegister(maskReg);
3688
resultReg = resultHigh;
3689
}
3690
node->setRegister(resultReg);
3691
cg->decReferenceCount(child);
3692
return resultReg;
3693
}
3694
3695
static
3696
TR::Register * numberOfTrailingZeros(TR::Node *node, TR::CodeGenerator *cg, TR::Register *reg, bool is64Bit, bool isLong)
3697
{
3698
// r1 is shift amount, r3 is the mask
3699
// xor r1, r1
3700
// bsf r2, reg
3701
// sete r1
3702
// mov r3, r1
3703
// dec r3
3704
// shl r1, is64Bit ? 6 : 5
3705
// and r2, r3
3706
// add r2, r1
3707
// return r2
3708
TR::Register *bsfReg = cg->allocateRegister();
3709
TR::Register *tempReg = cg->allocateRegister();
3710
TR::Register *maskReg = cg->allocateRegister();
3711
generateRegRegInstruction(TR::InstOpCode::XORRegReg(is64Bit), node, tempReg, tempReg, cg);
3712
generateRegRegInstruction(TR::InstOpCode::BSFRegReg(is64Bit), node, bsfReg, reg, cg);
3713
generateRegInstruction(TR::InstOpCode::SETE1Reg, node, tempReg, cg);
3714
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(is64Bit), node, maskReg, tempReg, cg);
3715
generateRegInstruction(TR::InstOpCode::DECReg(is64Bit), node, maskReg, cg);
3716
generateRegImmInstruction(TR::InstOpCode::SHLRegImm1(is64Bit), node, tempReg, isLong ? 6 : 5, cg);
3717
generateRegRegInstruction(TR::InstOpCode::ANDRegReg(is64Bit), node, bsfReg, maskReg, cg);
3718
generateRegRegInstruction(TR::InstOpCode::ADDRegReg(is64Bit), node, bsfReg, tempReg, cg);
3719
cg->stopUsingRegister(tempReg);
3720
cg->stopUsingRegister(maskReg);
3721
return bsfReg;
3722
}
3723
3724
TR::Register *J9::X86::TreeEvaluator::integerNumberOfTrailingZeros(TR::Node *node, TR::CodeGenerator *cg)
3725
{
3726
TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");
3727
TR::Node* child = node->getFirstChild();
3728
TR::Register* inputReg = cg->evaluate(child);
3729
TR::Register *resultReg = numberOfTrailingZeros(node, cg, inputReg, cg->comp()->target().is64Bit(), false);
3730
node->setRegister(resultReg);
3731
cg->decReferenceCount(child);
3732
return resultReg;
3733
}
3734
3735
TR::Register *J9::X86::TreeEvaluator::longNumberOfTrailingZeros(TR::Node *node, TR::CodeGenerator *cg)
3736
{
3737
TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");
3738
TR::Node * child = node->getFirstChild();
3739
TR::Register * inputReg = cg->evaluate(child);
3740
TR::Register * resultReg = NULL;
3741
if (cg->comp()->target().is64Bit())
3742
{
3743
resultReg = numberOfTrailingZeros(node, cg, inputReg, true, true);
3744
}
3745
else
3746
{
3747
// mask out result of high part if low part is not 32
3748
// xor r1, r1
3749
// cmp resultLow, 32
3750
// setne r1
3751
// dec r1
3752
// and r1, resultHigh
3753
// and resultLow, r1
3754
// return resultLow
3755
TR::Register *inputLow = inputReg->getLowOrder();
3756
TR::Register *inputHigh = inputReg->getHighOrder();
3757
TR::Register *maskReg = cg->allocateRegister();
3758
TR::Register *resultLow = numberOfTrailingZeros(node, cg, inputLow, false, false);
3759
TR::Register *resultHigh = numberOfTrailingZeros(node, cg, inputHigh, false, false);
3760
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, maskReg, maskReg, cg);
3761
generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, resultLow, 32, cg);
3762
generateRegInstruction(TR::InstOpCode::SETNE1Reg, node, maskReg, cg);
3763
generateRegInstruction(TR::InstOpCode::DEC4Reg, node, maskReg, cg);
3764
generateRegRegInstruction(TR::InstOpCode::AND4RegReg, node, maskReg, resultHigh, cg);
3765
generateRegRegInstruction(TR::InstOpCode::ADD4RegReg, node, resultLow, maskReg, cg);
3766
cg->stopUsingRegister(resultHigh);
3767
cg->stopUsingRegister(maskReg);
3768
resultReg = resultLow;
3769
}
3770
node->setRegister(resultReg);
3771
cg->decReferenceCount(child);
3772
return resultReg;
3773
}
3774
3775
static
3776
TR::Register *bitCount(TR::Node *node, TR::CodeGenerator *cg, TR::Register *reg, bool is64Bit)
3777
{
3778
TR::Register *bsfReg = cg->allocateRegister();
3779
generateRegRegInstruction(TR::InstOpCode::POPCNTRegReg(is64Bit), node, bsfReg, reg, cg);
3780
return bsfReg;
3781
}
3782
3783
TR::Register *J9::X86::TreeEvaluator::integerBitCount(TR::Node *node, TR::CodeGenerator *cg)
3784
{
3785
TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");
3786
TR::Node* child = node->getFirstChild();
3787
TR::Register* inputReg = cg->evaluate(child);
3788
TR::Register* resultReg = bitCount(node, cg, inputReg, cg->comp()->target().is64Bit());
3789
node->setRegister(resultReg);
3790
cg->decReferenceCount(child);
3791
return resultReg;
3792
}
3793
3794
TR::Register *J9::X86::TreeEvaluator::longBitCount(TR::Node *node, TR::CodeGenerator *cg)
3795
{
3796
TR_ASSERT(node->getNumChildren() == 1, "Node has a wrong number of children (i.e. !=1 )! ");
3797
TR::Node * child = node->getFirstChild();
3798
TR::Register * inputReg = cg->evaluate(child);
3799
TR::Register * resultReg = NULL;
3800
if (cg->comp()->target().is64Bit())
3801
{
3802
resultReg = bitCount(node, cg, inputReg, true);
3803
}
3804
else
3805
{
3806
//add low result and high result together
3807
TR::Register * inputHigh = inputReg->getHighOrder();
3808
TR::Register * inputLow = inputReg->getLowOrder();
3809
TR::Register * resultLow = bitCount(node, cg, inputLow, false);
3810
TR::Register * resultHigh = bitCount(node, cg, inputHigh, false);
3811
generateRegRegInstruction(TR::InstOpCode::ADD4RegReg, node, resultLow, resultHigh, cg);
3812
cg->stopUsingRegister(resultHigh);
3813
resultReg = resultLow;
3814
}
3815
node->setRegister(resultReg);
3816
cg->decReferenceCount(child);
3817
return resultReg;
3818
}
3819
3820
inline void generateInlinedCheckCastForDynamicCastClass(TR::Node* node, TR::CodeGenerator* cg)
3821
{
3822
TR::Compilation *comp = cg->comp();
3823
auto use64BitClasses = comp->target().is64Bit() &&
3824
(!TR::Compiler->om.generateCompressedObjectHeaders() ||
3825
(comp->compileRelocatableCode() && comp->getOption(TR_UseSymbolValidationManager)));
3826
TR::Register *ObjReg = cg->evaluate(node->getFirstChild());
3827
TR::Register *castClassReg = cg->evaluate(node->getSecondChild());
3828
TR::Register *temp1Reg = cg->allocateRegister();
3829
TR::Register *temp2Reg = cg->allocateRegister();
3830
TR::Register *objClassReg = cg->allocateRegister();
3831
3832
bool isCheckCastAndNullCheck = (node->getOpCodeValue() == TR::checkcastAndNULLCHK);
3833
3834
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
3835
TR::LabelSymbol *fallThruLabel = generateLabelSymbol(cg);
3836
TR::LabelSymbol *outlinedCallLabel = generateLabelSymbol(cg);
3837
TR::LabelSymbol *throwLabel = generateLabelSymbol(cg);
3838
TR::LabelSymbol *isClassLabel = generateLabelSymbol(cg);
3839
TR::LabelSymbol *iTableLoopLabel = generateLabelSymbol(cg);
3840
startLabel->setStartInternalControlFlow();
3841
fallThruLabel->setEndInternalControlFlow();
3842
3843
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
3844
3845
TR_OutlinedInstructions *outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::call, NULL, outlinedCallLabel, fallThruLabel, cg);
3846
cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);
3847
3848
// objClassReg holds object class also serves as null check
3849
if (isCheckCastAndNullCheck)
3850
generateLoadJ9Class(node, objClassReg, ObjReg, cg);
3851
3852
// temp2Reg holds romClass of cast class, for testing array, interface class type
3853
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, temp2Reg, generateX86MemoryReference(castClassReg, offsetof(J9Class, romClass), cg), cg);
3854
3855
// If cast class is array, call out of line helper
3856
generateMemImmInstruction(TR::InstOpCode::TEST4MemImm4, node,
3857
generateX86MemoryReference(temp2Reg, offsetof(J9ROMClass, modifiers), cg), J9AccClassArray, cg);
3858
generateLabelInstruction(TR::InstOpCode::JNE4, node, outlinedCallLabel, cg);
3859
3860
// objClassReg holds object class
3861
if (!isCheckCastAndNullCheck)
3862
{
3863
generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, ObjReg, ObjReg, cg);
3864
generateLabelInstruction(TR::InstOpCode::JE4, node, fallThruLabel, cg);
3865
generateLoadJ9Class(node, objClassReg, ObjReg, cg);
3866
}
3867
3868
// Object not array, inline checks
3869
// Check cast class is interface
3870
generateMemImmInstruction(TR::InstOpCode::TEST4MemImm4, node,
3871
generateX86MemoryReference(temp2Reg, offsetof(J9ROMClass, modifiers), cg), J9AccInterface, cg);
3872
generateLabelInstruction(TR::InstOpCode::JE4, node, isClassLabel, cg);
3873
3874
// Obtain I-Table
3875
// temp1Reg holds head of J9Class->iTable of obj class
3876
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, temp1Reg, generateX86MemoryReference(objClassReg, offsetof(J9Class, iTable), cg), cg);
3877
// Loop through I-Table
3878
// temp1Reg holds iTable list element through the loop
3879
generateLabelInstruction(TR::InstOpCode::label, node, iTableLoopLabel, cg);
3880
generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, temp1Reg, temp1Reg, cg);
3881
generateLabelInstruction(TR::InstOpCode::JE4, node, throwLabel, cg);
3882
auto interfaceMR = generateX86MemoryReference(temp1Reg, offsetof(J9ITable, interfaceClass), cg);
3883
generateMemRegInstruction(TR::InstOpCode::CMPMemReg(), node, interfaceMR, castClassReg, cg);
3884
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, temp1Reg, generateX86MemoryReference(temp1Reg, offsetof(J9ITable, next), cg), cg);
3885
generateLabelInstruction(TR::InstOpCode::JNE4, node, iTableLoopLabel, cg);
3886
3887
// Found from I-Table
3888
generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThruLabel, cg);
3889
3890
// cast class is non-interface class
3891
generateLabelInstruction(TR::InstOpCode::label, node, isClassLabel, cg);
3892
// equality test
3893
generateRegRegInstruction(TR::InstOpCode::CMPRegReg(use64BitClasses), node, objClassReg, castClassReg, cg);
3894
generateLabelInstruction(TR::InstOpCode::JE4, node, fallThruLabel, cg);
3895
3896
// class not equal
3897
// temp2 holds cast class depth
3898
// class depth mask must be low 16 bits to safely load without the mask.
3899
static_assert(J9AccClassDepthMask == 0xffff, "J9_JAVA_CLASS_DEPTH_MASK must be 0xffff");
3900
generateRegMemInstruction(comp->target().is64Bit()? TR::InstOpCode::MOVZXReg8Mem2 : TR::InstOpCode::MOVZXReg4Mem2, node,
3901
temp2Reg, generateX86MemoryReference(castClassReg, offsetof(J9Class, classDepthAndFlags), cg), cg);
3902
3903
// cast class depth >= obj class depth, throw
3904
generateRegMemInstruction(TR::InstOpCode::CMP2RegMem, node, temp2Reg, generateX86MemoryReference(objClassReg, offsetof(J9Class, classDepthAndFlags), cg), cg);
3905
generateLabelInstruction(TR::InstOpCode::JAE4, node, throwLabel, cg);
3906
3907
// check obj class's super class array entry
3908
// temp1Reg holds superClasses array of obj class
3909
// An alternative sequences requiring one less register may be:
3910
// SHL temp2Reg, 3 for 64-bit or 2 for 32-bit
3911
// ADD temp2Reg, [temp3Reg, superclasses offset]
3912
// CMP classClassReg, [temp2Reg]
3913
// On 64 bit, the extra reg isn't likely to cause significant register pressure.
3914
// On 32 bit, it could put more register pressure due to limited number of regs.
3915
// Since 64-bit is more prevalent, we opt to optimize for 64bit in this case
3916
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, temp1Reg, generateX86MemoryReference(objClassReg, offsetof(J9Class, superclasses), cg), cg);
3917
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(use64BitClasses), node, castClassReg,
3918
generateX86MemoryReference(temp1Reg, temp2Reg, comp->target().is64Bit()?3:2, cg), cg);
3919
generateLabelInstruction(TR::InstOpCode::JNE4, node, throwLabel, cg);
3920
3921
// throw classCastException
3922
{
3923
TR_OutlinedInstructionsGenerator og(throwLabel, node, cg);
3924
generateRegInstruction(TR::InstOpCode::PUSHReg, node, objClassReg, cg);
3925
generateRegInstruction(TR::InstOpCode::PUSHReg, node, castClassReg, cg);
3926
auto call = generateHelperCallInstruction(node, TR_throwClassCastException, NULL, cg);
3927
call->setNeedsGCMap(0xFF00FFFF);
3928
call->setAdjustsFramePointerBy(-2*(int32_t)sizeof(J9Class*));
3929
og.endOutlinedInstructionSequence();
3930
}
3931
3932
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, 8, cg);
3933
3934
deps->addPostCondition(ObjReg, TR::RealRegister::NoReg, cg);
3935
deps->addPostCondition(castClassReg, TR::RealRegister::NoReg, cg);
3936
deps->addPostCondition(temp1Reg, TR::RealRegister::NoReg, cg);
3937
deps->addPostCondition(temp2Reg, TR::RealRegister::NoReg, cg);
3938
deps->addPostCondition(objClassReg, TR::RealRegister::NoReg, cg);
3939
3940
TR::Node *callNode = outlinedHelperCall->getCallNode();
3941
TR::Register *reg;
3942
3943
if (callNode->getFirstChild() == node->getFirstChild())
3944
{
3945
reg = callNode->getFirstChild()->getRegister();
3946
if (reg)
3947
deps->unionPostCondition(reg, TR::RealRegister::NoReg, cg);
3948
}
3949
3950
if (callNode->getSecondChild() == node->getSecondChild())
3951
{
3952
reg = callNode->getSecondChild()->getRegister();
3953
if (reg)
3954
deps->unionPostCondition(reg, TR::RealRegister::NoReg, cg);
3955
}
3956
3957
deps->stopAddingConditions();
3958
3959
generateLabelInstruction(TR::InstOpCode::label, node, fallThruLabel, deps, cg);
3960
3961
cg->stopUsingRegister(temp1Reg);
3962
cg->stopUsingRegister(temp2Reg);
3963
cg->stopUsingRegister(objClassReg);
3964
3965
// Decrement use counts on the children
3966
//
3967
cg->decReferenceCount(node->getFirstChild());
3968
cg->decReferenceCount(node->getSecondChild());
3969
}
3970
3971
inline void generateInlinedCheckCastOrInstanceOfForInterface(TR::Node* node, TR_OpaqueClassBlock* clazz, TR::CodeGenerator* cg, bool isCheckCast)
3972
{
3973
TR::Compilation *comp = cg->comp();
3974
TR_ASSERT(clazz && TR::Compiler->cls.isInterfaceClass(comp, clazz), "Not a compile-time known Interface.");
3975
3976
auto use64BitClasses = comp->target().is64Bit() &&
3977
(!TR::Compiler->om.generateCompressedObjectHeaders() ||
3978
(comp->compileRelocatableCode() && comp->getOption(TR_UseSymbolValidationManager)));
3979
3980
// When running 64 bit compressed refs, if clazz is an address above the 2G boundary then we can't use
3981
// a push 32bit immediate instruction to pass it on the stack to the jitThrowClassCastException helper
3982
// as the address gets sign extended. It needs to be stored in a temp register and then push the
3983
// register to the stack.
3984
auto highClass = (comp->target().is64Bit() && ((uintptr_t)clazz) > INT_MAX) ? true : false;
3985
3986
auto j9class = cg->allocateRegister();
3987
auto tmp = (use64BitClasses || highClass) ? cg->allocateRegister() : NULL;
3988
3989
auto deps = generateRegisterDependencyConditions((uint8_t)2, (uint8_t)2, cg);
3990
deps->addPreCondition(j9class, TR::RealRegister::NoReg, cg);
3991
deps->addPostCondition(j9class, TR::RealRegister::NoReg, cg);
3992
if (tmp)
3993
{
3994
deps->addPreCondition(tmp, TR::RealRegister::NoReg, cg);
3995
deps->addPostCondition(tmp, TR::RealRegister::NoReg, cg);
3996
}
3997
deps->stopAddingConditions();
3998
3999
auto begLabel = generateLabelSymbol(cg);
4000
auto endLabel = generateLabelSymbol(cg);
4001
begLabel->setStartInternalControlFlow();
4002
endLabel->setEndInternalControlFlow();
4003
4004
auto iTableLookUpPathLabel = generateLabelSymbol(cg);
4005
auto iTableLookUpFailLabel = generateLabelSymbol(cg);
4006
auto iTableLoopLabel = generateLabelSymbol(cg);
4007
4008
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, j9class, node->getChild(0)->getRegister(), cg);
4009
generateLabelInstruction(TR::InstOpCode::label, node, begLabel, cg);
4010
4011
// Null test
4012
if (!node->getChild(0)->isNonNull() && node->getOpCodeValue() != TR::checkcastAndNULLCHK)
4013
{
4014
// j9class contains the object at this point, reusing the register as object is no longer used after this point.
4015
generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, j9class, j9class, cg);
4016
generateLabelInstruction(TR::InstOpCode::JE4, node, endLabel, cg);
4017
}
4018
4019
// Load J9Class
4020
generateLoadJ9Class(node, j9class, j9class, cg);
4021
4022
// Profiled call site cache
4023
uintptr_t guessClass = 0;
4024
if (!comp->compileRelocatableCode())
4025
{
4026
TR_OpaqueClassBlock* guessClassArray[NUM_PICS];
4027
auto num_PICs = TR::TreeEvaluator::interpreterProfilingInstanceOfOrCheckCastInfo(cg, node, guessClassArray);
4028
auto fej9 = static_cast<TR_J9VMBase *>(comp->fe());
4029
for (uint8_t i = 0; i < num_PICs; i++)
4030
{
4031
if (fej9->instanceOfOrCheckCastNoCacheUpdate((J9Class*)guessClassArray[i], (J9Class*)clazz))
4032
{
4033
guessClass = reinterpret_cast<uintptr_t>(guessClassArray[i]);
4034
}
4035
}
4036
}
4037
4038
// Call site cache
4039
auto cache = sizeof(J9Class*) == 4 ? cg->create4ByteData(node, (uint32_t)guessClass) : cg->create8ByteData(node, (uint64_t)guessClass);
4040
cache->setClassAddress(true);
4041
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(use64BitClasses), node, j9class, generateX86MemoryReference(cache, cg), cg);
4042
generateLabelInstruction(TR::InstOpCode::JNE4, node, iTableLookUpPathLabel, cg);
4043
4044
// I-Table lookup
4045
{
4046
TR_OutlinedInstructionsGenerator og(iTableLookUpPathLabel, node, cg);
4047
auto itable = j9class; // re-use the j9class register to perform itable lookup
4048
4049
generateRegInstruction(TR::InstOpCode::PUSHReg, node, j9class, cg);
4050
4051
// Save VFP
4052
auto vfp = generateVFPSaveInstruction(node, cg);
4053
4054
// Obtain I-Table
4055
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, itable, generateX86MemoryReference(j9class, offsetof(J9Class, iTable), cg), cg);
4056
if (tmp)
4057
{
4058
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tmp, (uintptr_t)clazz, cg, TR_ClassAddress);
4059
}
4060
4061
// Loop through I-Table
4062
generateLabelInstruction(TR::InstOpCode::label, node, iTableLoopLabel, cg);
4063
generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, itable, itable, cg);
4064
generateLabelInstruction(TR::InstOpCode::JE4, node, iTableLookUpFailLabel, cg);
4065
auto interfaceMR = generateX86MemoryReference(itable, offsetof(J9ITable, interfaceClass), cg);
4066
if (tmp)
4067
{
4068
generateMemRegInstruction(TR::InstOpCode::CMP8MemReg, node, interfaceMR, tmp, cg);
4069
}
4070
else
4071
{
4072
generateMemImmSymInstruction(TR::InstOpCode::CMP4MemImm4, node, interfaceMR, (uintptr_t)clazz, node->getChild(1)->getSymbolReference(), cg);
4073
}
4074
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, itable, generateX86MemoryReference(itable, offsetof(J9ITable, next), cg), cg);
4075
generateLabelInstruction(TR::InstOpCode::JNE4, node, iTableLoopLabel, cg);
4076
4077
// Found from I-Table
4078
generateMemInstruction(TR::InstOpCode::POPMem, node, generateX86MemoryReference(cache, cg), cg); // j9class
4079
if (!isCheckCast)
4080
{
4081
generateInstruction(TR::InstOpCode::STC, node, cg);
4082
}
4083
generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);
4084
4085
// Not found
4086
generateVFPRestoreInstruction(vfp, node, cg);
4087
generateLabelInstruction(TR::InstOpCode::label, node, iTableLookUpFailLabel, cg);
4088
if (isCheckCast)
4089
{
4090
if (tmp)
4091
{
4092
generateRegInstruction(TR::InstOpCode::PUSHReg, node, tmp, cg);
4093
}
4094
else
4095
{
4096
generateImmInstruction(TR::InstOpCode::PUSHImm4, node, (int32_t)(uintptr_t)clazz, cg);
4097
}
4098
auto call = generateHelperCallInstruction(node, TR_throwClassCastException, NULL, cg);
4099
call->setNeedsGCMap(0xFF00FFFF);
4100
call->setAdjustsFramePointerBy(-2*(int32_t)sizeof(J9Class*));
4101
}
4102
else
4103
{
4104
generateRegInstruction(TR::InstOpCode::POPReg, node, j9class, cg);
4105
generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);
4106
}
4107
4108
og.endOutlinedInstructionSequence();
4109
}
4110
4111
// Succeed
4112
if (!isCheckCast)
4113
{
4114
generateInstruction(TR::InstOpCode::STC, node, cg);
4115
}
4116
generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);
4117
4118
cg->stopUsingRegister(j9class);
4119
if (tmp)
4120
{
4121
cg->stopUsingRegister(tmp);
4122
}
4123
}
4124
4125
inline void generateInlinedCheckCastOrInstanceOfForClass(TR::Node* node, TR_OpaqueClassBlock* clazz, TR::CodeGenerator* cg, bool isCheckCast)
4126
{
4127
TR::Compilation *comp = cg->comp();
4128
auto fej9 = (TR_J9VMBase*)(cg->fe());
4129
4130
bool use64BitClasses = false;
4131
if (comp->target().is64Bit())
4132
{
4133
// When running 64 bit compressed refs, if clazz is an address above the 2G
4134
// boundary then we can't use a push 32bit immediate instruction to pass it
4135
// to the helper as the address gets sign extended. So we need to test for
4136
// this case and switch to the 64bit memory to memory encoding
4137
// that is used when running 64 bit non-compressed.
4138
auto highClass = ((uintptr_t)clazz) > INT_MAX;
4139
4140
use64BitClasses = !TR::Compiler->om.generateCompressedObjectHeaders() ||
4141
highClass ||
4142
(comp->compileRelocatableCode() && comp->getOption(TR_UseSymbolValidationManager));
4143
}
4144
4145
auto clazzData = use64BitClasses ? cg->create8ByteData(node, (uint64_t)(uintptr_t)clazz) : NULL;
4146
if (clazzData)
4147
{
4148
clazzData->setClassAddress(true);
4149
}
4150
4151
auto j9class = cg->allocateRegister();
4152
auto tmp = cg->allocateRegister();
4153
4154
auto deps = generateRegisterDependencyConditions((uint8_t)2, (uint8_t)2, cg);
4155
deps->addPreCondition(tmp, TR::RealRegister::NoReg, cg);
4156
deps->addPreCondition(j9class, TR::RealRegister::NoReg, cg);
4157
deps->addPostCondition(tmp, TR::RealRegister::NoReg, cg);
4158
deps->addPostCondition(j9class, TR::RealRegister::NoReg, cg);
4159
4160
auto begLabel = generateLabelSymbol(cg);
4161
auto endLabel = generateLabelSymbol(cg);
4162
begLabel->setStartInternalControlFlow();
4163
endLabel->setEndInternalControlFlow();
4164
4165
auto successLabel = isCheckCast ? endLabel : generateLabelSymbol(cg);
4166
auto failLabel = isCheckCast ? generateLabelSymbol(cg) : endLabel;
4167
4168
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, j9class, node->getChild(0)->getRegister(), cg);
4169
generateLabelInstruction(TR::InstOpCode::label, node, begLabel, cg);
4170
4171
// Null test
4172
if (!node->getChild(0)->isNonNull() && node->getOpCodeValue() != TR::checkcastAndNULLCHK)
4173
{
4174
// j9class contains the object at this point, reusing the register as object is no longer used after this point.
4175
generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, j9class, j9class, cg);
4176
generateLabelInstruction(TR::InstOpCode::JE4, node, endLabel, cg);
4177
}
4178
4179
// Load J9Class
4180
generateLoadJ9Class(node, j9class, j9class, cg);
4181
4182
// Equality test
4183
if (!fej9->isAbstractClass(clazz) || node->getOpCodeValue() == TR::icall/*TR_checkAssignable*/)
4184
{
4185
// For instanceof and checkcast, LHS is obtained from an instance, which cannot be abstract or interface;
4186
// therefore, equality test can be safely skipped for instanceof and checkcast when RHS is abstract.
4187
// However, LHS for TR_checkAssignable may be abstract or interface as it may be an arbitrary class, and
4188
// hence equality test is always needed.
4189
if (use64BitClasses)
4190
{
4191
generateRegMemInstruction(TR::InstOpCode::CMP8RegMem, node, j9class, generateX86MemoryReference(clazzData, cg), cg);
4192
}
4193
else
4194
{
4195
generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, j9class, (uintptr_t)clazz, cg);
4196
}
4197
if (!fej9->isClassFinal(clazz))
4198
{
4199
generateLabelInstruction(TR::InstOpCode::JE4, node, successLabel, cg);
4200
}
4201
}
4202
// at this point, ZF == 1 indicates success
4203
4204
// Superclass test
4205
if (!fej9->isClassFinal(clazz))
4206
{
4207
auto depth = TR::Compiler->cls.classDepthOf(clazz);
4208
if (depth >= comp->getOptions()->_minimumSuperclassArraySize)
4209
{
4210
static_assert(J9AccClassDepthMask == 0xffff, "J9AccClassDepthMask must be 0xffff");
4211
auto depthMR = generateX86MemoryReference(j9class, offsetof(J9Class, classDepthAndFlags), cg);
4212
generateMemImmInstruction(TR::InstOpCode::CMP2MemImm2, node, depthMR, depth, cg);
4213
if (!isCheckCast)
4214
{
4215
// Need ensure CF is cleared before reaching to fail label
4216
auto outlineLabel = generateLabelSymbol(cg);
4217
generateLabelInstruction(TR::InstOpCode::JBE4, node, outlineLabel, cg);
4218
4219
TR_OutlinedInstructionsGenerator og(outlineLabel, node, cg);
4220
generateInstruction(TR::InstOpCode::CLC, node, cg);
4221
generateLabelInstruction(TR::InstOpCode::JMP4, node, failLabel, cg);
4222
og.endOutlinedInstructionSequence();
4223
}
4224
else
4225
{
4226
generateLabelInstruction(TR::InstOpCode::JBE4, node, failLabel, cg);
4227
}
4228
}
4229
4230
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tmp, generateX86MemoryReference(j9class, offsetof(J9Class, superclasses), cg), cg);
4231
auto offset = depth * sizeof(J9Class*);
4232
TR_ASSERT(IS_32BIT_SIGNED(offset), "The offset to superclass is unreasonably large.");
4233
auto superclass = generateX86MemoryReference(tmp, offset, cg);
4234
if (use64BitClasses)
4235
{
4236
generateRegMemInstruction(TR::InstOpCode::L8RegMem, node, tmp, superclass, cg);
4237
generateRegMemInstruction(TR::InstOpCode::CMP8RegMem, node, tmp, generateX86MemoryReference(clazzData, cg), cg);
4238
}
4239
else
4240
{
4241
generateMemImmInstruction(TR::InstOpCode::CMP4MemImm4, node, superclass, (int32_t)(uintptr_t)clazz, cg);
4242
}
4243
}
4244
// at this point, ZF == 1 indicates success
4245
4246
// Branch to success/fail path
4247
if (!isCheckCast)
4248
{
4249
generateInstruction(TR::InstOpCode::CLC, node, cg);
4250
}
4251
generateLabelInstruction(TR::InstOpCode::JNE4, node, failLabel, cg);
4252
4253
// Set CF to report success
4254
if (!isCheckCast)
4255
{
4256
generateLabelInstruction(TR::InstOpCode::label, node, successLabel, cg);
4257
generateInstruction(TR::InstOpCode::STC, node, cg);
4258
}
4259
4260
// Throw exception for CheckCast
4261
if (isCheckCast)
4262
{
4263
TR_OutlinedInstructionsGenerator og(failLabel, node, cg);
4264
4265
generateRegInstruction(TR::InstOpCode::PUSHReg, node, j9class, cg);
4266
if (use64BitClasses)
4267
{
4268
generateMemInstruction(TR::InstOpCode::PUSHMem, node, generateX86MemoryReference(clazzData, cg), cg);
4269
}
4270
else
4271
{
4272
generateImmInstruction(TR::InstOpCode::PUSHImm4, node, (int32_t)(uintptr_t)clazz, cg);
4273
}
4274
auto call = generateHelperCallInstruction(node, TR_throwClassCastException, NULL, cg);
4275
call->setNeedsGCMap(0xFF00FFFF);
4276
call->setAdjustsFramePointerBy(-2*(int32_t)sizeof(J9Class*));
4277
4278
og.endOutlinedInstructionSequence();
4279
}
4280
4281
// Succeed
4282
generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);
4283
4284
cg->stopUsingRegister(j9class);
4285
cg->stopUsingRegister(tmp);
4286
}
4287
4288
TR::Register *J9::X86::TreeEvaluator::checkcastinstanceofEvaluator(TR::Node *node, TR::CodeGenerator *cg)
4289
{
4290
TR::Compilation *comp = cg->comp();
4291
4292
bool isCheckCast = false;
4293
switch (node->getOpCodeValue())
4294
{
4295
case TR::checkcast:
4296
case TR::checkcastAndNULLCHK:
4297
isCheckCast = true;
4298
break;
4299
case TR::instanceof:
4300
case TR::icall: // TR_checkAssignable
4301
break;
4302
default:
4303
TR_ASSERT(false, "Incorrect Op Code %d.", node->getOpCodeValue());
4304
break;
4305
}
4306
auto clazz = TR::TreeEvaluator::getCastClassAddress(node->getChild(1));
4307
if (isCheckCast && !clazz && !comp->getOption(TR_DisableInlineCheckCast) && (!comp->compileRelocatableCode() || comp->getOption(TR_UseSymbolValidationManager)))
4308
{
4309
generateInlinedCheckCastForDynamicCastClass(node, cg);
4310
}
4311
else if (clazz &&
4312
!TR::Compiler->cls.isClassArray(comp, clazz) && // not yet optimized
4313
(!comp->compileRelocatableCode() || comp->getOption(TR_UseSymbolValidationManager)) &&
4314
!comp->getOption(TR_DisableInlineCheckCast) &&
4315
!comp->getOption(TR_DisableInlineInstanceOf))
4316
{
4317
cg->evaluate(node->getChild(0));
4318
if (TR::Compiler->cls.isInterfaceClass(comp, clazz))
4319
{
4320
generateInlinedCheckCastOrInstanceOfForInterface(node, clazz, cg, isCheckCast);
4321
}
4322
else
4323
{
4324
generateInlinedCheckCastOrInstanceOfForClass(node, clazz, cg, isCheckCast);
4325
}
4326
if (!isCheckCast)
4327
{
4328
auto result = cg->allocateRegister();
4329
generateRegInstruction(TR::InstOpCode::SETB1Reg, node, result, cg);
4330
generateRegRegInstruction(TR::InstOpCode::MOVZXReg4Reg1, node, result, result, cg);
4331
node->setRegister(result);
4332
}
4333
cg->decReferenceCount(node->getChild(0));
4334
cg->recursivelyDecReferenceCount(node->getChild(1));
4335
}
4336
else
4337
{
4338
if (node->getOpCodeValue() == TR::checkcastAndNULLCHK)
4339
{
4340
auto object = cg->evaluate(node->getChild(0));
4341
// Just touch the memory in case this is a NULL pointer and we need to throw
4342
// the exception after the checkcast. If the checkcast was combined with nullpointer
4343
// there's nobody after the checkcast to throw the exception.
4344
auto instr = generateMemImmInstruction(TR::InstOpCode::TEST1MemImm1, node, generateX86MemoryReference(object, TR::Compiler->om.offsetOfObjectVftField(), cg), 0, cg);
4345
cg->setImplicitExceptionPoint(instr);
4346
instr->setNeedsGCMap(0xFF00FFFF);
4347
instr->setNode(comp->findNullChkInfo(node));
4348
}
4349
TR::TreeEvaluator::performHelperCall(node, NULL, isCheckCast ? TR::call : TR::icall, false, cg);
4350
}
4351
return node->getRegister();
4352
}
4353
4354
static bool comesFromClassLib(TR::Node *node, TR::Compilation *comp)
4355
{
4356
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
4357
TR_OpaqueMethodBlock *mb = node->getOwningMethod();
4358
char buf[512];
4359
const char *methodSig = fej9->sampleSignature(mb, buf, 512, comp->trMemory());
4360
if (methodSig &&
4361
(strncmp(methodSig, "java", 4)==0 ||
4362
strncmp(methodSig, "sun", 3) ==0))
4363
return true;
4364
return false;
4365
}
4366
4367
static TR::MemoryReference *getMemoryReference(TR::Register *objectClassReg, TR::Register *objectReg, int32_t lwOffset, TR::CodeGenerator *cg)
4368
{
4369
if (objectClassReg)
4370
return generateX86MemoryReference(objectReg, objectClassReg, 0, cg);
4371
else
4372
return generateX86MemoryReference(objectReg, lwOffset, cg);
4373
}
4374
4375
void J9::X86::TreeEvaluator::asyncGCMapCheckPatching(TR::Node *node, TR::CodeGenerator *cg, TR::LabelSymbol *snippetLabel)
4376
{
4377
TR::MemoryReference *SOMmr = generateX86MemoryReference(node->getFirstChild()->getFirstChild(), cg);
4378
TR::Compilation *comp = cg->comp();
4379
4380
if (cg->comp()->target().is64Bit())
4381
{
4382
//64 bit sequence
4383
//
4384
//Generate a call to the out-of-line patching sequence.
4385
//This sequence will convert the call back into an asynch message check cmp
4386
//
4387
TR::LabelSymbol *gcMapPatchingLabel = generateLabelSymbol(cg);
4388
TR::LabelSymbol *outlinedStartLabel = generateLabelSymbol(cg);
4389
TR::LabelSymbol *outlinedEndLabel = generateLabelSymbol(cg);
4390
TR::LabelSymbol *asyncWithoutPatch = generateLabelSymbol(cg);
4391
4392
//Start inline patching sequence
4393
//
4394
TR::Register *patchableAddrReg = cg->allocateRegister();
4395
TR::Register *patchValReg = cg->allocateRegister();
4396
TR::Register *tempReg = cg->allocateRegister();
4397
4398
4399
outlinedStartLabel->setStartInternalControlFlow();
4400
outlinedEndLabel->setEndInternalControlFlow();
4401
4402
//generateLabelInstruction(TR::InstOpCode::CALLImm4, node, gcMapPatchingLabel, cg);
4403
generatePatchableCodeAlignmentInstruction(TR::X86PatchableCodeAlignmentInstruction::CALLImm4AtomicRegions, generateLabelInstruction(TR::InstOpCode::CALLImm4, node, gcMapPatchingLabel, cg), cg);
4404
4405
TR_OutlinedInstructionsGenerator og(gcMapPatchingLabel, node, cg);
4406
4407
generateLabelInstruction(TR::InstOpCode::label, node, outlinedStartLabel, cg);
4408
//Load the address that we are going to patch and clean up the stack
4409
//
4410
generateRegInstruction(TR::InstOpCode::POPReg, node, patchableAddrReg, cg);
4411
4412
//check if there is already an async even pending
4413
//
4414
generateMemImmInstruction(TR::InstOpCode::CMP8MemImm4, node, SOMmr, -1, cg);
4415
generateLabelInstruction(TR::InstOpCode::JE4, node, asyncWithoutPatch, cg);
4416
4417
//Signal the async event
4418
//
4419
static char *d = feGetEnv("TR_GCOnAsyncBREAK");
4420
if (d)
4421
generateInstruction(TR::InstOpCode::INT3, node, cg);
4422
4423
generateMemImmInstruction(TR::InstOpCode::S8MemImm4, node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, stackOverflowMark), cg), -1, cg);
4424
generateRegImmInstruction(TR::InstOpCode::MOV8RegImm4, node, tempReg, 1 << comp->getPersistentInfo()->getGCMapCheckEventHandle(), cg);
4425
generateMemRegInstruction(TR::InstOpCode::LOR8MemReg, node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, asyncEventFlags),cg), tempReg, cg);
4426
4427
//Populate the code we are going to patch in
4428
//
4429
//existing
4430
//000007ff`7d340578 e8f4170000 call 000007ff`7d341d71 <------
4431
//000007ff`7d34057d 0f84ee1e0000 je 000007ff`7d342471
4432
//*********
4433
//patching in
4434
//000007ff'7d34056f 48837d50ff cmp qword ptr [rbp+0x50], 0xffffffffffffffff <-----
4435
//000007ff`7d34057d 0f84ee1e0000 je 000007ff`7d342471
4436
4437
//Load the original value
4438
//
4439
4440
generateRegMemInstruction(TR::InstOpCode::L8RegMem, node, patchValReg, generateX86MemoryReference(patchableAddrReg, -5, cg), cg);
4441
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg, (uint64_t) 0x0, cg);
4442
generateRegRegInstruction(TR::InstOpCode::OR8RegReg, node, patchValReg, tempReg, cg);
4443
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg, (uint64_t) 0x0, cg);
4444
generateRegRegInstruction(TR::InstOpCode::AND8RegReg, node, patchValReg, tempReg , cg);
4445
4446
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, 4, cg);
4447
deps->addPostCondition(patchableAddrReg, TR::RealRegister::NoReg, cg);
4448
deps->addPostCondition(patchValReg, TR::RealRegister::NoReg, cg);
4449
deps->addPostCondition(tempReg, TR::RealRegister::NoReg, cg);
4450
deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);
4451
deps->stopAddingConditions();
4452
4453
generateMemRegInstruction(TR::InstOpCode::S8MemReg, node, generateX86MemoryReference(patchableAddrReg, -5, cg), patchValReg, deps, cg);
4454
generateLabelInstruction(TR::InstOpCode::label, node, asyncWithoutPatch, cg);
4455
generateLabelInstruction(TR::InstOpCode::JMP4, node, snippetLabel, cg);
4456
4457
cg->stopUsingRegister(patchableAddrReg);
4458
cg->stopUsingRegister(patchValReg);
4459
cg->stopUsingRegister(tempReg);
4460
generateLabelInstruction(TR::InstOpCode::label, node, outlinedEndLabel, cg);
4461
4462
og.endOutlinedInstructionSequence();
4463
}
4464
else
4465
{
4466
//32 bit sequence
4467
//
4468
4469
//Generate a call to the out-of-line patching sequence.
4470
//This sequence will convert the call back into an asynch message check cmp
4471
//
4472
TR::LabelSymbol *gcMapPatchingLabel = generateLabelSymbol(cg);
4473
TR::LabelSymbol *outlinedStartLabel = generateLabelSymbol(cg);
4474
TR::LabelSymbol *outlinedEndLabel = generateLabelSymbol(cg);
4475
TR::LabelSymbol *asyncWithoutPatch = generateLabelSymbol(cg);
4476
4477
//Start inline patching sequence
4478
//
4479
TR::Register *patchableAddrReg = cg->allocateRegister();
4480
TR::Register *lowPatchValReg = cg->allocateRegister();
4481
TR::Register *highPatchValReg = cg->allocateRegister();
4482
TR::Register *lowExistingValReg = cg->allocateRegister();
4483
TR::Register *highExistingValReg = cg->allocateRegister();
4484
4485
outlinedStartLabel->setStartInternalControlFlow();
4486
outlinedEndLabel->setEndInternalControlFlow();
4487
4488
//generateBoundaryAvoidanceInstruction(TR::X86BoundaryAvoidanceInstruction::CALLImm4AtomicRegions, 8, 8,generateLabelInstruction(TR::InstOpCode::CALLImm4, node, gcMapPatchingLabel, cg), cg);
4489
TR::Instruction *callInst = generatePatchableCodeAlignmentInstruction(TR::X86PatchableCodeAlignmentInstruction::CALLImm4AtomicRegions, generateLabelInstruction(TR::InstOpCode::CALLImm4, node, gcMapPatchingLabel, cg), cg);
4490
TR::X86VFPSaveInstruction *vfpSaveInst = generateVFPSaveInstruction(callInst->getPrev(), cg);
4491
4492
TR_OutlinedInstructionsGenerator og(gcMapPatchingLabel, node, cg);
4493
4494
generateLabelInstruction(TR::InstOpCode::label, node, outlinedStartLabel, cg);
4495
//Load the address that we are going to patch and clean up the stack
4496
//
4497
generateRegInstruction(TR::InstOpCode::POPReg, node, patchableAddrReg, cg);
4498
4499
4500
//check if there is already an async even pending
4501
//
4502
generateMemImmInstruction(TR::InstOpCode::CMP4MemImm4, node, SOMmr, -1, cg);
4503
generateLabelInstruction(TR::InstOpCode::JE4, node, asyncWithoutPatch, cg);
4504
4505
//Signal the async event
4506
//
4507
generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, stackOverflowMark), cg), -1, cg);
4508
generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, node, lowPatchValReg, 1 << comp->getPersistentInfo()->getGCMapCheckEventHandle(), cg);
4509
generateMemRegInstruction(TR::InstOpCode::LOR4MemReg, node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, asyncEventFlags),cg), lowPatchValReg, cg);
4510
4511
//Populate the registers we are going to use in the lock cmp xchg
4512
//
4513
4514
static char *d = feGetEnv("TR_GCOnAsyncBREAK");
4515
if (d)
4516
generateInstruction(TR::InstOpCode::INT3, node, cg);
4517
4518
//Populate the existing inline code
4519
//
4520
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, lowExistingValReg, generateX86MemoryReference(patchableAddrReg, -5, cg), cg);
4521
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, highExistingValReg, generateX86MemoryReference(patchableAddrReg, -1, cg), cg);
4522
4523
//Populate the code we are going to patch in
4524
//837d28ff cmp dword ptr [ebp+28h],0FFFFFFFFh <--- patching in
4525
//90 nop
4526
//*******************
4527
// call imm4 <---- patching over
4528
//
4529
generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, node, lowPatchValReg, (uint32_t) 0x287d8390, cg);
4530
generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, highPatchValReg, highExistingValReg, cg);
4531
generateRegImmInstruction(TR::InstOpCode::OR4RegImm4, node, highPatchValReg, (uint32_t) 0x000000ff, cg);
4532
4533
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, 6, cg);
4534
4535
deps->addPostCondition(patchableAddrReg, TR::RealRegister::edi, cg);
4536
deps->addPostCondition(lowPatchValReg, TR::RealRegister::ebx, cg);
4537
deps->addPostCondition(highPatchValReg, TR::RealRegister::ecx, cg);
4538
deps->addPostCondition(lowExistingValReg, TR::RealRegister::eax, cg);
4539
deps->addPostCondition(highExistingValReg, TR::RealRegister::edx, cg);
4540
deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);
4541
deps->stopAddingConditions();
4542
generateMemInstruction(TR::InstOpCode::LCMPXCHG8BMem, node, generateX86MemoryReference(patchableAddrReg, -5, cg), deps, cg);
4543
generateLabelInstruction(TR::InstOpCode::label, node, asyncWithoutPatch, cg);
4544
generateVFPRestoreInstruction(generateLabelInstruction(TR::InstOpCode::JMP4, node, snippetLabel, cg),vfpSaveInst,cg);
4545
4546
cg->stopUsingRegister(patchableAddrReg);
4547
cg->stopUsingRegister(lowPatchValReg);
4548
cg->stopUsingRegister(highPatchValReg);
4549
cg->stopUsingRegister(lowExistingValReg);
4550
cg->stopUsingRegister(highExistingValReg);
4551
generateLabelInstruction(TR::InstOpCode::label, node, outlinedEndLabel, cg);
4552
4553
og.endOutlinedInstructionSequence();
4554
}
4555
}
4556
4557
void J9::X86::TreeEvaluator::inlineRecursiveMonitor(TR::Node *node,
4558
TR::CodeGenerator *cg,
4559
TR::LabelSymbol *fallThruLabel,
4560
TR::LabelSymbol *jitMonitorEnterOrExitSnippetLabel,
4561
TR::LabelSymbol *inlineRecursiveSnippetLabel,
4562
TR::Register *objectReg,
4563
int lwOffset,
4564
TR::LabelSymbol *snippetRestartLabel,
4565
bool reservingLock)
4566
{
4567
//Code generated:
4568
// mov lockWordReg, [obj+lwOffset]
4569
// add lockWordReg, INC_DEC_VALUE/-INC_DEC_VALUE ---> lock word with increased recursive count
4570
// mov lockWordMaskedReg, NON_INC_DEC_MASK
4571
// and lockWordMaskedReg, lockWordReg ---> lock word masked out counter bits
4572
// cmp lockWordMaskedReg, ebp
4573
// jne jitMonitorEnterOrExitSnippetLabel
4574
// mov [obj+lwOffset], lockWordReg
4575
// jmp fallThruLabel
4576
4577
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
4578
TR::LabelSymbol *outlinedStartLabel = generateLabelSymbol(cg);
4579
TR::LabelSymbol *outlinedEndLabel = generateLabelSymbol(cg);
4580
4581
outlinedStartLabel->setStartInternalControlFlow();
4582
outlinedEndLabel->setEndInternalControlFlow();
4583
4584
TR_OutlinedInstructionsGenerator og(inlineRecursiveSnippetLabel, node, cg);
4585
4586
generateLabelInstruction(TR::InstOpCode::label, node, outlinedStartLabel, cg);
4587
TR::Register *lockWordReg = cg->allocateRegister();
4588
TR::Register *lockWordMaskedReg = cg->allocateRegister();
4589
TR::Register *vmThreadReg = cg->getVMThreadRegister();
4590
bool use64bitOp = cg->comp()->target().is64Bit() && !fej9->generateCompressedLockWord();
4591
bool isMonitorEnter = node->getSymbolReference() == cg->comp()->getSymRefTab()->findOrCreateMethodMonitorEntrySymbolRef(NULL)
4592
|| node->getSymbolReference() == cg->comp()->getSymRefTab()->findOrCreateMonitorEntrySymbolRef(NULL);
4593
4594
generateRegMemInstruction(TR::InstOpCode::LRegMem(use64bitOp), node, lockWordReg, generateX86MemoryReference(objectReg, lwOffset, cg), cg);
4595
generateRegImmInstruction(TR::InstOpCode::ADDRegImm4(use64bitOp), node, lockWordReg, isMonitorEnter? INC_DEC_VALUE: -INC_DEC_VALUE, cg);
4596
generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(use64bitOp), node, lockWordMaskedReg, NON_INC_DEC_MASK - RES_BIT, cg);
4597
generateRegRegInstruction(TR::InstOpCode::ANDRegReg(use64bitOp), node, lockWordMaskedReg, lockWordReg, cg);
4598
generateRegRegInstruction(TR::InstOpCode::CMPRegReg(use64bitOp), node, lockWordMaskedReg, vmThreadReg, cg);
4599
4600
generateLabelInstruction(TR::InstOpCode::JNE4, node, jitMonitorEnterOrExitSnippetLabel, cg);
4601
generateMemRegInstruction(TR::InstOpCode::SMemReg(use64bitOp), node, generateX86MemoryReference(objectReg, lwOffset, cg), lockWordReg, cg);
4602
4603
TR::RegisterDependencyConditions *restartDeps = generateRegisterDependencyConditions((uint8_t)0, 4, cg);
4604
restartDeps->addPostCondition(objectReg, TR::RealRegister::NoReg, cg);
4605
restartDeps->addPostCondition(vmThreadReg, TR::RealRegister::ebp, cg);
4606
restartDeps->addPostCondition(lockWordMaskedReg, TR::RealRegister::NoReg, cg);
4607
restartDeps->addPostCondition(lockWordReg, TR::RealRegister::NoReg, cg);
4608
restartDeps->stopAddingConditions();
4609
generateLabelInstruction(TR::InstOpCode::label, node, snippetRestartLabel, restartDeps, cg);
4610
4611
generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThruLabel, cg);
4612
4613
cg->stopUsingRegister(lockWordReg);
4614
cg->stopUsingRegister(lockWordMaskedReg);
4615
4616
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, 1, cg);
4617
deps->addPostCondition(vmThreadReg, TR::RealRegister::ebp, cg);
4618
deps->stopAddingConditions();
4619
generateLabelInstruction(TR::InstOpCode::label, node, outlinedEndLabel, deps, cg);
4620
4621
og.endOutlinedInstructionSequence();
4622
}
4623
4624
void J9::X86::TreeEvaluator::transactionalMemoryJITMonitorEntry(TR::Node *node,
4625
TR::CodeGenerator *cg,
4626
TR::LabelSymbol *startLabel,
4627
TR::LabelSymbol *snippetLabel,
4628
TR::LabelSymbol *JITMonitorEnterSnippetLabel,
4629
TR::Register *objectReg,
4630
int lwOffset)
4631
4632
{
4633
TR::LabelSymbol *txJITMonitorEntryLabel = snippetLabel;
4634
TR::LabelSymbol *outlinedStartLabel = generateLabelSymbol(cg);
4635
TR::LabelSymbol *outlinedEndLabel = generateLabelSymbol(cg);
4636
4637
outlinedStartLabel->setStartInternalControlFlow();
4638
outlinedEndLabel->setEndInternalControlFlow();
4639
4640
TR_OutlinedInstructionsGenerator og(txJITMonitorEntryLabel, node, cg);
4641
4642
TR::Register *counterReg = cg->allocateRegister();
4643
generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, node, counterReg, 1024, cg);
4644
TR::LabelSymbol *spinLabel = outlinedStartLabel;
4645
generateLabelInstruction(TR::InstOpCode::label, node, spinLabel, cg);
4646
4647
generateInstruction(TR::InstOpCode::PAUSE, node, cg);
4648
generateRegInstruction(TR::InstOpCode::DEC4Reg, node, counterReg, cg); // might need to consider 32bits later
4649
generateLabelInstruction(TR::InstOpCode::JE4, node, JITMonitorEnterSnippetLabel, cg);
4650
TR::MemoryReference *objLockRef = generateX86MemoryReference(objectReg, lwOffset, cg);
4651
generateMemImmInstruction(TR::InstOpCode::CMP4MemImm4, node, objLockRef, 0, cg);
4652
generateLabelInstruction(TR::InstOpCode::JNE4, node, spinLabel, cg);
4653
generateLabelInstruction(TR::InstOpCode::JMP4, node, startLabel, cg);
4654
4655
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, 1, cg);
4656
deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);
4657
deps->stopAddingConditions();
4658
generateLabelInstruction(TR::InstOpCode::label, node, outlinedEndLabel, cg);
4659
4660
cg->stopUsingRegister(counterReg);
4661
4662
og.endOutlinedInstructionSequence();
4663
}
4664
4665
void
4666
J9::X86::TreeEvaluator::generateCheckForValueMonitorEnterOrExit(
4667
TR::Node *node,
4668
int32_t classFlag,
4669
TR::LabelSymbol *snippetLabel,
4670
TR::CodeGenerator *cg)
4671
{
4672
TR::Register *objectReg = cg->evaluate(node->getFirstChild());
4673
TR::Register *j9classReg = cg->allocateRegister();
4674
generateLoadJ9Class(node, j9classReg, objectReg, cg);
4675
auto fej9 = (TR_J9VMBase *)(cg->fe());
4676
TR::MemoryReference *classFlagsMR = generateX86MemoryReference(j9classReg, (uintptr_t)(fej9->getOffsetOfClassFlags()), cg);
4677
4678
TR::InstOpCode::Mnemonic testOpCode;
4679
if ((uint32_t)classFlag <= USHRT_MAX)
4680
testOpCode = TR::InstOpCode::TEST2MemImm2;
4681
else
4682
testOpCode = TR::InstOpCode::TEST4MemImm4;
4683
4684
generateMemImmInstruction(testOpCode, node, classFlagsMR, classFlag, cg);
4685
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
4686
}
4687
4688
TR::Register *
4689
J9::X86::TreeEvaluator::VMmonentEvaluator(
4690
TR::Node *node,
4691
TR::CodeGenerator *cg)
4692
{
4693
// If there is a NULLCHK above this node it will be expecting us to set
4694
// up the excepting instruction. If we are not going to inline an
4695
// appropriate excepting instruction we must make sure to reset the
4696
// excepting instruction since our children may have set it.
4697
//
4698
TR::Compilation *comp = cg->comp();
4699
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
4700
static const char *noInline = feGetEnv("TR_NoInlineMonitor");
4701
static const char *firstMonEnt = feGetEnv("TR_FirstMonEnt");
4702
static int32_t monEntCount = 0;
4703
bool reservingLock = false;
4704
bool normalLockPreservingReservation = false;
4705
bool dummyMethodMonitor = false;
4706
TR_YesNoMaybe isMonitorValueBasedOrValueType = cg->isMonitorValueBasedOrValueType(node);
4707
static const char *doCmpFirst = feGetEnv("TR_AddCMPBeforeCMPXCHG");
4708
4709
int lwOffset = fej9->getByteOffsetToLockword((TR_OpaqueClassBlock *) cg->getMonClass(node));
4710
if (comp->getOption(TR_MimicInterpreterFrameShape) ||
4711
(comp->getOption(TR_FullSpeedDebug) && node->isSyncMethodMonitor()) ||
4712
noInline ||
4713
(isMonitorValueBasedOrValueType == TR_yes) ||
4714
comp->getOption(TR_DisableInlineMonEnt) ||
4715
(firstMonEnt && (*firstMonEnt-'0') > monEntCount++))
4716
{
4717
// Don't inline
4718
//
4719
TR::ILOpCodes opCode = node->getOpCodeValue();
4720
TR::Node::recreate(node, TR::call);
4721
TR::TreeEvaluator::directCallEvaluator(node, cg);
4722
TR::Node::recreate(node, opCode);
4723
cg->setImplicitExceptionPoint(NULL);
4724
return NULL;
4725
}
4726
4727
if (lwOffset > 0 && comp->getOption(TR_ReservingLocks))
4728
{
4729
bool dummy=false;
4730
TR::TreeEvaluator::evaluateLockForReservation (node, &reservingLock, &normalLockPreservingReservation, cg);
4731
TR::TreeEvaluator::isPrimitiveMonitor (node, cg);
4732
4733
if (node->isPrimitiveLockedRegion() && reservingLock)
4734
dummyMethodMonitor = TR::TreeEvaluator::isDummyMonitorEnter(node, cg);
4735
4736
if (reservingLock && !node->isPrimitiveLockedRegion())
4737
dummyMethodMonitor = false;
4738
}
4739
4740
TR::Node *objectRef = node->getFirstChild();
4741
4742
static const char *disableInlineRecursiveEnv = feGetEnv("TR_DisableInlineRecursiveMonitor");
4743
bool inlineRecursive = disableInlineRecursiveEnv ? false : true;
4744
if (comp->getOption(TR_X86HLE) || lwOffset <= 0)
4745
inlineRecursive = false;
4746
4747
// Evaluate the object reference
4748
//
4749
TR::Register *objectReg = cg->evaluate(objectRef);
4750
TR::Register *eaxReal = cg->allocateRegister();
4751
TR::Register *scratchReg = NULL;
4752
uint32_t numDeps = 3; // objectReg, eax, ebp
4753
4754
generatePrefetchAfterHeaderAccess (node, objectReg, cg);
4755
4756
cg->setImplicitExceptionPoint(NULL);
4757
4758
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
4759
TR::LabelSymbol *fallThru = generateLabelSymbol(cg);
4760
TR::LabelSymbol *snippetFallThru = inlineRecursive ? generateLabelSymbol(cg) : fallThru;
4761
4762
startLabel->setStartInternalControlFlow();
4763
fallThru->setEndInternalControlFlow();
4764
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
4765
4766
TR::Register *vmThreadReg = cg->getVMThreadRegister();
4767
4768
TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);
4769
TR::LabelSymbol *monitorLookupCacheLabel = generateLabelSymbol(cg);
4770
TR::LabelSymbol *fallThruFromMonitorLookupCacheLabel = generateLabelSymbol(cg);
4771
TR::LabelSymbol *exitLabel = NULL;
4772
4773
TR_OutlinedInstructions *outlinedHelperCall;
4774
// In the reserving lock case below, we change the symref on the node... Here, we are going to store the original symref, so that we can restore our change.
4775
TR::SymbolReference *originalNodeSymRef = NULL;
4776
4777
TR::Node *helperCallNode = node;
4778
4779
if (isMonitorValueBasedOrValueType == TR_maybe)
4780
TR::TreeEvaluator::generateCheckForValueMonitorEnterOrExit(node, J9_CLASS_DISALLOWS_LOCKING_FLAGS, snippetLabel, cg);
4781
4782
if (comp->getOption(TR_ReservingLocks))
4783
{
4784
// About to change the node's symref... store the original.
4785
originalNodeSymRef = node->getSymbolReference();
4786
4787
if (reservingLock && node->isPrimitiveLockedRegion() && dummyMethodMonitor)
4788
{
4789
if (node->getSymbolReference() == cg->getSymRef(TR_methodMonitorEntry))
4790
node->setSymbolReference(comp->getSymRefTab()->findOrCreateRuntimeHelper(TR_AMD64JitMethodMonitorExitReservedPrimitive, true, true, true));
4791
else
4792
node->setSymbolReference(comp->getSymRefTab()->findOrCreateRuntimeHelper(TR_AMD64JitMonitorExitReservedPrimitive, true, true, true));
4793
4794
exitLabel = generateLabelSymbol(cg);
4795
TR_OutlinedInstructions *outlinedExitHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::call, NULL, exitLabel, fallThru, cg);
4796
cg->getOutlinedInstructionsList().push_front(outlinedExitHelperCall);
4797
}
4798
4799
TR_RuntimeHelper helper;
4800
bool success = TR::TreeEvaluator::monEntryExitHelper(true, node, reservingLock, normalLockPreservingReservation, helper, cg);
4801
if (success)
4802
node->setSymbolReference(comp->getSymRefTab()->findOrCreateRuntimeHelper(helper, true, true, true));
4803
4804
if (reservingLock)
4805
{
4806
uint32_t reservableLwValue = RES_BIT;
4807
if (TR::Options::_aggressiveLockReservation)
4808
reservableLwValue = 0;
4809
4810
// Make this integer the same size as the lock word. If we always
4811
// passed a 32-bit value, then on 64-bit with an uncompressed lock
4812
// word, the helper would have to either zero-extend the value, or
4813
// rely on the caller having done so even though the calling
4814
// convention doesn't appear to require it.
4815
TR::Node *reservableLwNode = NULL;
4816
if (cg->comp()->target().is32Bit() || fej9->generateCompressedLockWord())
4817
reservableLwNode = TR::Node::iconst(node, reservableLwValue);
4818
else
4819
reservableLwNode = TR::Node::lconst(node, reservableLwValue);
4820
4821
helperCallNode = TR::Node::create(
4822
node,
4823
TR::call,
4824
2,
4825
objectRef,
4826
reservableLwNode);
4827
4828
helperCallNode->setSymbolReference(node->getSymbolReference());
4829
helperCallNode->incReferenceCount();
4830
}
4831
}
4832
4833
if (cg->comp()->target().is64Bit() && cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_HLE) && comp->getOption(TR_X86HLE))
4834
{
4835
TR::LabelSymbol *JITMonitorEntrySnippetLabel = generateLabelSymbol(cg);
4836
TR::TreeEvaluator::transactionalMemoryJITMonitorEntry(node, cg, startLabel, snippetLabel, JITMonitorEntrySnippetLabel, objectReg, lwOffset);
4837
outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(helperCallNode, TR::call, NULL,
4838
JITMonitorEntrySnippetLabel, (exitLabel) ? exitLabel : fallThru, cg);
4839
}
4840
else
4841
outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(helperCallNode, TR::call, NULL,
4842
snippetLabel, (exitLabel) ? exitLabel : snippetFallThru, cg);
4843
4844
if (helperCallNode != node)
4845
helperCallNode->recursivelyDecReferenceCount();
4846
4847
cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);
4848
cg->generateDebugCounter(
4849
outlinedHelperCall->getFirstInstruction(),
4850
TR::DebugCounter::debugCounterName(comp, "helperCalls/%s/(%s)/%d/%d", node->getOpCode().getName(), comp->signature(), node->getByteCodeInfo().getCallerIndex(), node->getByteCodeInfo().getByteCodeIndex()),
4851
1, TR::DebugCounter::Cheap);
4852
4853
// Okay, and we've made it down here and we've successfully generated all outlined snippets, let's restore the node's symref.
4854
if (comp->getOption(TR_ReservingLocks))
4855
{
4856
node->setSymbolReference(originalNodeSymRef);
4857
}
4858
4859
if (inlineRecursive)
4860
{
4861
TR::LabelSymbol *inlineRecursiveSnippetLabel = generateLabelSymbol(cg);
4862
TR::LabelSymbol *jitMonitorEnterSnippetLabel = snippetLabel;
4863
snippetLabel = inlineRecursiveSnippetLabel;
4864
TR::TreeEvaluator::inlineRecursiveMonitor(node, cg, fallThru, jitMonitorEnterSnippetLabel, inlineRecursiveSnippetLabel, objectReg, lwOffset, snippetFallThru, reservingLock);
4865
}
4866
4867
// Compare the monitor slot in the object against zero. If it succeeds
4868
// we are done. Else call the helper.
4869
// Code generated:
4870
// xor eax, eax
4871
// cmpxchg monitor(objectReg), ebp
4872
// jne snippet
4873
// label restartLabel
4874
//
4875
// Code generated for read monitor enter:
4876
// xor eax, eax
4877
// mov lockedReg, INC_DEC_VALUE (0x04)
4878
// cmpxchg monitor(objectReg), lockedReg
4879
// jne snippet
4880
// label restartLabel
4881
//
4882
TR::Register *lockedReg = NULL;
4883
TR::InstOpCode::Mnemonic op = TR::InstOpCode::bad;
4884
4885
if (cg->comp()->target().is64Bit() && !fej9->generateCompressedLockWord())
4886
{
4887
op = cg->comp()->target().isSMP() ? TR::InstOpCode::LCMPXCHG8MemReg : TR::InstOpCode::CMPXCHG8MemReg;
4888
if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_HLE) && comp->getOption(TR_X86HLE))
4889
op = cg->comp()->target().isSMP() ? TR::InstOpCode::XALCMPXCHG8MemReg : TR::InstOpCode::XACMPXCHG8MemReg;
4890
}
4891
else
4892
{
4893
op = cg->comp()->target().isSMP() ? TR::InstOpCode::LCMPXCHG4MemReg : TR::InstOpCode::CMPXCHG4MemReg;
4894
if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_HLE) && comp->getOption(TR_X86HLE))
4895
op = cg->comp()->target().isSMP() ? TR::InstOpCode::XALCMPXCHG4MemReg : TR::InstOpCode::XACMPXCHG4MemReg;
4896
}
4897
4898
TR::Register *objectClassReg = NULL;
4899
TR::Register *lookupOffsetReg = NULL;
4900
4901
if (lwOffset <= 0)
4902
{
4903
TR::MemoryReference *objectClassMR = generateX86MemoryReference(objectReg, TMP_OFFSETOF_J9OBJECT_CLAZZ, cg);
4904
objectClassReg = cg->allocateRegister();
4905
numDeps++;
4906
TR::X86RegMemInstruction *instr;
4907
if (TR::Compiler->om.compressObjectReferences())
4908
instr = generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, objectClassReg, objectClassMR, cg);
4909
else
4910
instr = generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, objectClassReg, objectClassMR, cg);
4911
// This instruction may try to dereference a null memory address
4912
// add an implicit exception point for it.
4913
//
4914
cg->setImplicitExceptionPoint(instr);
4915
instr->setNeedsGCMap(0xFF00FFFF);
4916
4917
TR::TreeEvaluator::generateVFTMaskInstruction(node, objectClassReg, cg);
4918
int32_t offsetOfLockOffset = offsetof(J9Class, lockOffset);
4919
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, objectClassReg, generateX86MemoryReference(objectClassReg, offsetOfLockOffset, cg), cg);
4920
generateRegImmInstruction(TR::InstOpCode::CMPRegImms(), node, objectClassReg, 0, cg);
4921
4922
generateCommonLockNurseryCodes(
4923
node,
4924
cg,
4925
true, //true for VMmonentEvaluator, false for VMmonexitEvaluator
4926
monitorLookupCacheLabel,
4927
fallThruFromMonitorLookupCacheLabel,
4928
snippetLabel,
4929
numDeps,
4930
lwOffset,
4931
objectClassReg,
4932
lookupOffsetReg,
4933
vmThreadReg,
4934
objectReg);
4935
}
4936
4937
if (comp->getOption(TR_ReservingLocks) && reservingLock)
4938
{
4939
TR::LabelSymbol *mismatchLabel = NULL;
4940
if (TR::Options::_aggressiveLockReservation)
4941
mismatchLabel = snippetLabel;
4942
else
4943
mismatchLabel = generateLabelSymbol(cg);
4944
4945
#if defined(TRACE_LOCK_RESERVATION)
4946
{
4947
auto cds = cg->findOrCreate4ByteConstant(node, (int)node);
4948
TR::MemoryReference *tempMR = generateX86MemoryReference(cds, cg);
4949
4950
TR::X86MemImmInstruction * instr;
4951
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
4952
{
4953
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, eaxReal, eaxReal, cg); // Zero out eaxReal
4954
instr = generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, eaxReal, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg);
4955
}
4956
else
4957
instr = generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, eaxReal, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg);
4958
4959
cg->setImplicitExceptionPoint(instr);
4960
instr->setNeedsGCMap(0xFF00FFFF);
4961
4962
TR::SymbolReference *tempRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);
4963
TR::MemoryReference *tempMR1 = generateX86MemoryReference(tempRef, cg);
4964
4965
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR, eaxReal, cg);
4966
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR1, eaxReal, cg);
4967
4968
auto cds1 = cg->findOrCreate4ByteConstant(node, (int)node+2);
4969
TR::MemoryReference *tempMR3 = generateX86MemoryReference(cds1, cg);
4970
TR::SymbolReference *tempRef2 = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);
4971
TR::MemoryReference *tempMR2 = generateX86MemoryReference(tempRef2, cg);
4972
4973
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR3, objectReg, cg);
4974
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR2, objectReg, cg);
4975
4976
scratchReg = cg->allocateRegister();
4977
numDeps++;
4978
TR::TreeEvaluator::generateValueTracingCode (node, vmThreadReg, scratchReg, objectReg, eaxReal, cg);
4979
}
4980
#endif
4981
4982
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, eaxReal, generateX86MemoryReference(vmThreadReg, RES_BIT, cg), cg);
4983
4984
TR::X86MemRegInstruction * instr;
4985
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
4986
{
4987
// Use TR::InstOpCode::CMP4RegMem instead of TR::InstOpCode::CMPRegMem(...).
4988
instr = generateMemRegInstruction(TR::InstOpCode::CMP4MemReg, node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), eaxReal, cg);
4989
}
4990
else
4991
instr = generateMemRegInstruction(TR::InstOpCode::CMPMemReg(cg->comp()->target().is64Bit()), node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), eaxReal, cg);
4992
4993
cg->setImplicitExceptionPoint(instr);
4994
instr->setNeedsGCMap(0xFF00FFFF);
4995
4996
generateLabelInstruction(TR::InstOpCode::JNE4, node, mismatchLabel, cg);
4997
4998
if (!node->isPrimitiveLockedRegion())
4999
{
5000
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
5001
{
5002
// Use ADD4memImms instead of TR::InstOpCode::ADDMemImms
5003
generateMemImmInstruction(TR::InstOpCode::ADD4MemImms, node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), REC_BIT, cg);
5004
}
5005
else
5006
generateMemImmInstruction(TR::InstOpCode::ADDMemImms(), node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), REC_BIT, cg);
5007
}
5008
5009
if (!TR::Options::_aggressiveLockReservation)
5010
{
5011
// Jump over the non-reservable path
5012
generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThru, cg);
5013
5014
// It's possible that the lock may be available, but not reservable. In
5015
// that case we should try the usual cmpxchg for non-reserving enter.
5016
// Otherwise we'll necessarily call the helper.
5017
generateLabelInstruction(TR::InstOpCode::label, node, mismatchLabel, cg);
5018
5019
TR::InstOpCode::Mnemonic cmpOp = TR::InstOpCode::CMPMemImms();
5020
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
5021
cmpOp = TR::InstOpCode::CMP4MemImms;
5022
5023
auto lwMR = getMemoryReference(objectClassReg, objectReg, lwOffset, cg);
5024
generateMemImmInstruction(cmpOp, node, lwMR, 0, cg);
5025
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
5026
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, eaxReal, eaxReal, cg);
5027
lwMR = getMemoryReference(objectClassReg, objectReg, lwOffset, cg);
5028
generateMemRegInstruction(op, node, lwMR, vmThreadReg, cg);
5029
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
5030
}
5031
}
5032
else
5033
{
5034
if (TR::Options::_aggressiveLockReservation)
5035
{
5036
if (comp->getOption(TR_ReservingLocks) && normalLockPreservingReservation)
5037
{
5038
TR::X86MemImmInstruction * instr;
5039
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
5040
instr = generateMemImmInstruction(TR::InstOpCode::CMP4MemImms, node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), 0, cg);
5041
else
5042
instr = generateMemImmInstruction(TR::InstOpCode::CMPMemImms(), node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), 0, cg);
5043
cg->setImplicitExceptionPoint(instr);
5044
instr->setNeedsGCMap(0xFF00FFFF);
5045
5046
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
5047
}
5048
5049
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, eaxReal, eaxReal, cg);
5050
}
5051
else if (!comp->getOption(TR_ReservingLocks))
5052
{
5053
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, eaxReal, eaxReal, cg);
5054
}
5055
else
5056
{
5057
TR::InstOpCode::Mnemonic loadOp = TR::InstOpCode::LRegMem();
5058
TR::InstOpCode::Mnemonic testOp = TR::InstOpCode::TESTRegImm4();
5059
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
5060
{
5061
loadOp = TR::InstOpCode::L4RegMem;
5062
testOp = TR::InstOpCode::TEST4RegImm4;
5063
}
5064
5065
auto lwMR = getMemoryReference(objectClassReg, objectReg, lwOffset, cg);
5066
auto instr = generateRegMemInstruction(loadOp, node, eaxReal, lwMR, cg);
5067
cg->setImplicitExceptionPoint(instr);
5068
instr->setNeedsGCMap(0xFF00FFFF);
5069
5070
generateRegImmInstruction(testOp, node, eaxReal, (int32_t)~RES_BIT, cg);
5071
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
5072
}
5073
5074
if (doCmpFirst &&
5075
!comesFromClassLib(node, comp))
5076
{
5077
TR::X86MemImmInstruction * instr;
5078
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
5079
instr = generateMemImmInstruction(TR::InstOpCode::CMP4MemImms, node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), 0, cg);
5080
else
5081
instr = generateMemImmInstruction(TR::InstOpCode::CMPMemImms(), node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), 0, cg);
5082
5083
cg->setImplicitExceptionPoint(instr);
5084
instr->setNeedsGCMap(0xFF00FFFF);
5085
5086
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
5087
}
5088
5089
if (node->isReadMonitor())
5090
{
5091
lockedReg = cg->allocateRegister();
5092
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
5093
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, lockedReg, lockedReg, cg); //After lockedReg is allocated zero it out.
5094
generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, lockedReg, INC_DEC_VALUE, cg);
5095
++numDeps;
5096
}
5097
else
5098
{
5099
#if defined(J9VM_OPT_REAL_TIME_LOCKING_SUPPORT)
5100
// need to get monitor from cache, if we can
5101
lockedReg = cg->allocateRegister();
5102
numDeps++;
5103
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, lockedReg,
5104
generateX86MemoryReference(vmThreadReg, fej9->thisThreadMonitorCacheOffset(), cg), cg);
5105
generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, lockedReg, lockedReg, cg);
5106
generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);
5107
5108
#else
5109
bool conditionallyReserve = false;
5110
bool shouldConditionallyReserveForReservableClasses =
5111
comp->getOption(TR_ReservingLocks)
5112
&& !TR::Options::_aggressiveLockReservation
5113
&& lwOffset > 0
5114
&& cg->getMonClass(node) != NULL;
5115
5116
if (shouldConditionallyReserveForReservableClasses)
5117
{
5118
TR_PersistentClassInfo *monClassInfo = comp
5119
->getPersistentInfo()
5120
->getPersistentCHTable()
5121
->findClassInfoAfterLocking(cg->getMonClass(node), comp);
5122
5123
if (monClassInfo != NULL && monClassInfo->isReservable())
5124
conditionallyReserve = true;
5125
}
5126
5127
if (!conditionallyReserve)
5128
{
5129
// we want to write thread reg into lock word
5130
lockedReg = vmThreadReg;
5131
}
5132
else
5133
{
5134
lockedReg = cg->allocateRegister();
5135
numDeps++;
5136
5137
// Compute the value to put into the lock word based on the
5138
// current value, which is either 0 or RES_BIT ("reservable").
5139
//
5140
// 0 ==> vmThreadReg
5141
// RES_BIT ==> vmThreadReg | RES_BIT | INC_DEC_VALUE
5142
//
5143
// For reservable locks, failure to reserve at this point would
5144
// prevent any future reservation of the same lock.
5145
5146
bool b64 = cg->comp()->target().is64Bit() && !fej9->generateCompressedLockWord();
5147
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(b64), node, lockedReg, eaxReal, cg);
5148
generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(b64), node, lockedReg, RES_BIT_POSITION, cg);
5149
generateRegInstruction(TR::InstOpCode::NEGReg(b64), node, lockedReg, cg);
5150
generateRegImmInstruction(TR::InstOpCode::ANDRegImms(b64), node, lockedReg, RES_BIT | INC_DEC_VALUE, cg);
5151
generateRegRegInstruction(TR::InstOpCode::ADDRegReg(b64), node, lockedReg, vmThreadReg, cg);
5152
}
5153
#endif
5154
}
5155
5156
// try to swap into lock word
5157
TR::X86MemRegInstruction *instr = generateMemRegInstruction(op, node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), lockedReg, cg);
5158
cg->setImplicitExceptionPoint(instr);
5159
instr->setNeedsGCMap(0xFF00FFFF);
5160
5161
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
5162
}
5163
5164
// Create dependencies for the registers used.
5165
// The dependencies must be in the order:
5166
// objectReg, eaxReal, vmThreadReg
5167
// since the snippet needs to find them to grab the real registers from them.
5168
//
5169
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)numDeps, cg);
5170
deps->addPostCondition(objectReg, TR::RealRegister::NoReg, cg);
5171
deps->addPostCondition(eaxReal, TR::RealRegister::eax, cg);
5172
deps->addPostCondition(vmThreadReg, TR::RealRegister::ebp, cg);
5173
5174
if (scratchReg)
5175
deps->addPostCondition(scratchReg, TR::RealRegister::NoReg, cg);
5176
5177
if (lockedReg != NULL && lockedReg != vmThreadReg)
5178
{
5179
deps->addPostCondition(lockedReg, TR::RealRegister::NoReg, cg);
5180
}
5181
5182
if (objectClassReg)
5183
deps->addPostCondition(objectClassReg, TR::RealRegister::NoReg, cg);
5184
5185
if (lookupOffsetReg)
5186
deps->addPostCondition(lookupOffsetReg, TR::RealRegister::NoReg, cg);
5187
5188
deps->stopAddingConditions();
5189
5190
#if defined(J9VM_OPT_REAL_TIME_LOCKING_SUPPORT)
5191
// our lock is in the object, now need to advance to next monitor in cache
5192
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, lockedReg,
5193
generateX86MemoryReference(lockedReg, fej9->getMonitorNextOffset(), cg), cg);
5194
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node,
5195
generateX86MemoryReference(vmThreadReg, fej9->thisThreadMonitorCacheOffset(), cg),
5196
lockedReg, cg);
5197
#endif
5198
5199
generateLabelInstruction(TR::InstOpCode::label, node, fallThru, deps, cg);
5200
5201
#if defined(TRACE_LOCK_RESERVATION)
5202
{
5203
auto cds = cg->findOrCreate4ByteConstant(node, (int)node+1);
5204
TR::MemoryReference *tempMR = generateX86MemoryReference(cds, cg);
5205
5206
TR::X86RegMemInstruction *instr;
5207
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
5208
instr = generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, eaxReal, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg)
5209
else
5210
instr = generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, eaxReal, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg);
5211
5212
cg->setImplicitExceptionPoint(instr);
5213
instr->setNeedsGCMap(0xFF00FFFF);
5214
5215
TR::SymbolReference *tempRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);
5216
TR::MemoryReference *tempMR1 = generateX86MemoryReference(tempRef, cg);
5217
5218
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR, eaxReal, cg);
5219
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR1, eaxReal, cg);
5220
5221
auto cds1 = cg->findOrCreate4ByteConstant(node, (int)node+2);
5222
TR::MemoryReference *tempMR3 = generateX86MemoryReference(cds1, cg);
5223
TR::SymbolReference *tempRef2 = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);
5224
TR::MemoryReference *tempMR2 = generateX86MemoryReference(tempRef2, cg);
5225
5226
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR3, objectReg, cg);
5227
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR2, objectReg, cg);
5228
}
5229
#endif
5230
5231
cg->decReferenceCount(objectRef);
5232
cg->stopUsingRegister(eaxReal);
5233
if (scratchReg)
5234
cg->stopUsingRegister(scratchReg);
5235
if (objectClassReg)
5236
cg->stopUsingRegister(objectClassReg);
5237
if (lookupOffsetReg)
5238
cg->stopUsingRegister(lookupOffsetReg);
5239
5240
if (lockedReg != NULL && lockedReg != vmThreadReg)
5241
{
5242
cg->stopUsingRegister(lockedReg);
5243
}
5244
5245
return NULL;
5246
}
5247
5248
5249
void J9::X86::TreeEvaluator::generateValueTracingCode(
5250
TR::Node *node,
5251
TR::Register *vmThreadReg,
5252
TR::Register *scratchReg,
5253
TR::Register *valueReg,
5254
TR::CodeGenerator *cg)
5255
{
5256
if (!cg->comp()->getOption(TR_EnableValueTracing))
5257
return;
5258
// the code requires that the caller has vmThread in EBP as well as
5259
// that the caller has already setup internal control flow
5260
uint32_t vmThreadBase = offsetof(J9VMThread, debugEventData6);
5261
uint32_t vmThreadTop = offsetof(J9VMThread, debugEventData4);
5262
uint32_t vmThreadCursor = offsetof(J9VMThread, debugEventData5);
5263
TR::LabelSymbol *endLabel = generateLabelSymbol(cg);
5264
5265
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, scratchReg, generateX86MemoryReference(vmThreadReg, vmThreadCursor, cg), cg);
5266
generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, scratchReg, 8, cg);
5267
5268
generateMemRegInstruction(TR::InstOpCode::CMPMemReg(), node, generateX86MemoryReference(vmThreadReg, vmThreadTop, cg), scratchReg, cg);
5269
generateLabelInstruction(TR::InstOpCode::JG4, node, endLabel, cg);
5270
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, scratchReg, generateX86MemoryReference(vmThreadReg, vmThreadBase, cg), cg);
5271
generateLabelInstruction(TR::InstOpCode::label, node, endLabel, cg);
5272
generateMemImmInstruction(TR::InstOpCode::SMemImm4(), node, generateX86MemoryReference(scratchReg, 0, cg), node->getOpCodeValue(), cg);
5273
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(scratchReg, 0, cg), valueReg, cg);
5274
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(vmThreadReg, vmThreadCursor, cg), scratchReg, cg);
5275
}
5276
5277
void J9::X86::TreeEvaluator::generateValueTracingCode(
5278
TR::Node *node,
5279
TR::Register *vmThreadReg,
5280
TR::Register *scratchReg,
5281
TR::Register *valueRegHigh,
5282
TR::Register *valueRegLow,
5283
TR::CodeGenerator *cg)
5284
{
5285
if (!cg->comp()->getOption(TR_EnableValueTracing))
5286
return;
5287
5288
// the code requires that the caller has vmThread in EBP as well as
5289
// that the caller has already setup internal control flow
5290
uint32_t vmThreadBase = offsetof(J9VMThread, debugEventData6);
5291
uint32_t vmThreadTop = offsetof(J9VMThread, debugEventData4);
5292
uint32_t vmThreadCursor = offsetof(J9VMThread, debugEventData5);
5293
TR::LabelSymbol *endLabel = generateLabelSymbol(cg);
5294
5295
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, scratchReg, generateX86MemoryReference(vmThreadReg, vmThreadCursor, cg), cg);
5296
generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, scratchReg, 0x10, cg);
5297
5298
generateMemRegInstruction(TR::InstOpCode::CMPMemReg(), node, generateX86MemoryReference(vmThreadReg, vmThreadTop, cg), scratchReg, cg);
5299
generateLabelInstruction(TR::InstOpCode::JG4, node, endLabel, cg);
5300
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, scratchReg, generateX86MemoryReference(vmThreadReg, vmThreadBase, cg), cg);
5301
generateLabelInstruction(TR::InstOpCode::label, node, endLabel, cg);
5302
generateMemImmInstruction(TR::InstOpCode::SMemImm4(), node, generateX86MemoryReference(scratchReg, 0, cg), node->getOpCodeValue(), cg);
5303
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(scratchReg, 4, cg), valueRegHigh, cg);
5304
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(scratchReg, 8, cg), valueRegLow, cg);
5305
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, valueRegLow, generateX86MemoryReference(valueRegHigh, 0, cg), cg);
5306
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(scratchReg, 0xc, cg), valueRegLow, cg);
5307
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(vmThreadReg, vmThreadCursor, cg), scratchReg, cg);
5308
}
5309
5310
TR::Register
5311
*J9::X86::TreeEvaluator::VMmonexitEvaluator(
5312
TR::Node *node,
5313
TR::CodeGenerator *cg)
5314
{
5315
// If there is a NULLCHK above this node it will be expecting us to set
5316
// up the excepting instruction. If we are not going to inline an
5317
// appropriate excepting instruction we must make sure to reset the
5318
// excepting instruction since our children may have set it.
5319
//
5320
TR::Compilation *comp = cg->comp();
5321
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
5322
static const char *noInline = feGetEnv("TR_NoInlineMonitor");
5323
static const char *firstMonExit = feGetEnv("TR_FirstMonExit");
5324
static int32_t monExitCount = 0;
5325
bool reservingLock = false;
5326
bool normalLockPreservingReservation = false;
5327
bool dummyMethodMonitor = false;
5328
bool gen64BitInstr = cg->comp()->target().is64Bit() && !fej9->generateCompressedLockWord();
5329
int lwOffset = fej9->getByteOffsetToLockword((TR_OpaqueClassBlock *) cg->getMonClass(node));
5330
TR_YesNoMaybe isMonitorValueBasedOrValueType = cg->isMonitorValueBasedOrValueType(node);
5331
5332
if ((comp->getOption(TR_MimicInterpreterFrameShape) /*&& !comp->getOption(TR_EnableLiveMonitorMetadata)*/) ||
5333
noInline ||
5334
(isMonitorValueBasedOrValueType == TR_yes) ||
5335
comp->getOption(TR_DisableInlineMonExit) ||
5336
(firstMonExit && (*firstMonExit-'0') > monExitCount++))
5337
{
5338
// Don't inline
5339
//
5340
TR::ILOpCodes opCode = node->getOpCodeValue();
5341
TR::Node::recreate(node, TR::call);
5342
TR::TreeEvaluator::directCallEvaluator(node, cg);
5343
TR::Node::recreate(node, opCode);
5344
cg->setImplicitExceptionPoint(NULL);
5345
return NULL;
5346
}
5347
5348
if (lwOffset > 0 && comp->getOption(TR_ReservingLocks))
5349
{
5350
bool dummy=false;
5351
TR::TreeEvaluator::evaluateLockForReservation (node, &reservingLock, &normalLockPreservingReservation, cg);
5352
if (node->isPrimitiveLockedRegion() && reservingLock)
5353
dummyMethodMonitor = TR::TreeEvaluator::isDummyMonitorExit(node, cg);
5354
5355
if (!node->isPrimitiveLockedRegion() && reservingLock)
5356
dummyMethodMonitor = false;
5357
}
5358
5359
if (dummyMethodMonitor)
5360
{
5361
cg->decReferenceCount(node->getFirstChild());
5362
return NULL;
5363
}
5364
5365
static const char *disableInlineRecursiveEnv = feGetEnv("TR_DisableInlineRecursiveMonitor");
5366
bool inlineRecursive = disableInlineRecursiveEnv ? false : true;
5367
if (comp->getOption(TR_X86HLE) || lwOffset <= 0)
5368
inlineRecursive = false;
5369
5370
// Evaluate the object reference
5371
//
5372
TR::Node *objectRef = node->getFirstChild();
5373
TR::Register *objectReg = cg->evaluate(objectRef);
5374
TR::Register *tempReg = NULL;
5375
uint32_t numDeps = 2; // objectReg, ebp
5376
5377
cg->setImplicitExceptionPoint(NULL);
5378
TR::Register *vmThreadReg = cg->getVMThreadRegister();
5379
5380
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
5381
TR::LabelSymbol *fallThru = generateLabelSymbol(cg);
5382
// Create the monitor exit snippet
5383
TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);
5384
5385
if (isMonitorValueBasedOrValueType == TR_maybe)
5386
TR::TreeEvaluator::generateCheckForValueMonitorEnterOrExit(node, J9_CLASS_DISALLOWS_LOCKING_FLAGS, snippetLabel, cg);
5387
5388
#if !defined(J9VM_OPT_REAL_TIME_LOCKING_SUPPORT)
5389
// Now that the object reference has been generated, see if this is the end
5390
// of a small synchronized block.
5391
// The definition of "small" depends on the method hotness and is measured
5392
// in instructions.
5393
// The following method makes use of the fact that the body of the sync
5394
// block has been generated but the monitor exit hasn't yet.
5395
//
5396
int32_t maxInstructions;
5397
TR_Hotness hotness = comp->getMethodHotness();
5398
if (hotness == scorching) maxInstructions = 30;
5399
else if (hotness == hot) maxInstructions = 20;
5400
else maxInstructions = 10;
5401
#endif
5402
5403
startLabel->setStartInternalControlFlow();
5404
TR::LabelSymbol *snippetFallThru = inlineRecursive ? generateLabelSymbol(cg): fallThru;
5405
fallThru->setEndInternalControlFlow();
5406
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
5407
5408
TR::Register *eaxReal = 0;
5409
TR::Register *unlockedReg = 0;
5410
TR::Register *scratchReg = 0;
5411
5412
TR::Register *objectClassReg = NULL;
5413
TR::Register *lookupOffsetReg = NULL;
5414
5415
if (lwOffset <= 0)
5416
{
5417
TR::MemoryReference *objectClassMR = generateX86MemoryReference(objectReg, TMP_OFFSETOF_J9OBJECT_CLAZZ, cg);
5418
objectClassReg = cg->allocateRegister();
5419
TR::Instruction *instr = NULL;
5420
if (TR::Compiler->om.compressObjectReferences())
5421
instr = generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, objectClassReg, objectClassMR, cg);
5422
else
5423
instr = generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, objectClassReg, objectClassMR, cg);
5424
//this instruction may try to dereference a null memory address
5425
//add an implicit exception point for it.
5426
cg->setImplicitExceptionPoint(instr);
5427
instr->setNeedsGCMap(0xFF00FFFF);
5428
5429
TR::TreeEvaluator::generateVFTMaskInstruction(node, objectClassReg, cg);
5430
int32_t offsetOfLockOffset = offsetof(J9Class, lockOffset);
5431
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, objectClassReg, generateX86MemoryReference(objectClassReg, offsetOfLockOffset, cg), cg);
5432
5433
numDeps++;
5434
}
5435
5436
TR::LabelSymbol *monitorLookupCacheLabel = generateLabelSymbol(cg);
5437
TR::LabelSymbol *fallThruFromMonitorLookupCacheLabel = generateLabelSymbol(cg);
5438
5439
#if defined(J9VM_OPT_REAL_TIME_LOCKING_SUPPORT)
5440
TR::LabelSymbol *decCountLabel = generateLabelSymbol(cg);
5441
5442
unlockedReg = cg->allocateRegister();
5443
tempReg = cg->allocateRegister();
5444
eaxReal = cg->allocateRegister();
5445
5446
numDeps += 3;
5447
5448
if (lwOffset <= 0)
5449
{
5450
generateRegImmInstruction(TR::InstOpCode::CMPRegImms(), node, objectClassReg, 0, cg);
5451
5452
generateCommonLockNurseryCodes(node,
5453
cg,
5454
false, //true for VMmonentEvaluator, false for VMmonexitEvaluator
5455
monitorLookupCacheLabel,
5456
fallThruFromMonitorLookupCacheLabel,
5457
snippetLabel,
5458
numDeps,
5459
lwOffset,
5460
objectClassReg,
5461
lookupOffsetReg,
5462
vmThreadReg,
5463
objectReg);
5464
}
5465
5466
5467
// load lock word
5468
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg);
5469
5470
// extract monitor from lock word
5471
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, unlockedReg, tempReg, cg);
5472
5473
#define LOCK_PINNED_BIT (0x1)
5474
generateRegImmInstruction(TR::InstOpCode::ANDRegImms(), node, unlockedReg, ~((UDATA) LOCK_PINNED_BIT), cg);
5475
5476
// need a NULL test to snippet: about to dereference lock word
5477
generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, unlockedReg, unlockedReg, cg);
5478
generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);
5479
5480
// if OS monitors don't match, let snippet handle it
5481
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, eaxReal,
5482
generateX86MemoryReference(unlockedReg, fej9->getMonitorOwnerOffset(), cg), cg);
5483
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, eaxReal,
5484
generateX86MemoryReference(vmThreadReg, fej9->thisThreadOSThreadOffset(), cg), cg);
5485
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
5486
5487
// monitors match so we can unlock it
5488
// decrement count, maybe unlock object
5489
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, eaxReal,
5490
generateX86MemoryReference(unlockedReg, fej9->getMonitorEntryCountOffset(), cg), cg);
5491
generateRegImmInstruction(TR::InstOpCode::CMPRegImms(), node, eaxReal, 1, cg);
5492
generateLabelInstruction(TR::InstOpCode::JA4, node, decCountLabel, cg);
5493
5494
5495
// leaving main-line code path
5496
// create the outlined path that decrements the count
5497
{
5498
TR_OutlinedInstructionsGenerator og(decCountLabel, node, cg);
5499
generateMemInstruction( TR::InstOpCode::DECMem(cg), node, generateX86MemoryReference(unlockedReg, fej9->getMonitorEntryCountOffset(), cg), cg);
5500
generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThru, cg);
5501
5502
og.endOutlinedInstructionSequence();
5503
}
5504
5505
// back to main-line code path
5506
5507
// unlock object...but only if lock pinned bit is clear
5508
generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, eaxReal, unlockedReg, cg);
5509
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
5510
5511
5512
TR::InstOpCode::Mnemonic op = cg->comp()->target().isSMP() ? TR::InstOpCode::LCMPXCHGMemReg(gen64BitInstr) : TR::InstOpCode::CMPXCHGMemReg(gen64BitInstr);
5513
5514
// compare-and-swap to unlock:
5515
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, eaxReal, eaxReal, cg);
5516
cg->setImplicitExceptionPoint(generateMemRegInstruction(op, node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), eaxReal, cg));
5517
5518
generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, eaxReal, unlockedReg, cg);
5519
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
5520
5521
// unlocked the object, just need to put monitor back in thread cache
5522
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, eaxReal,
5523
generateX86MemoryReference(vmThreadReg, fej9->thisThreadMonitorCacheOffset(), cg), cg);
5524
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node,
5525
generateX86MemoryReference(unlockedReg, fej9->getMonitorNextOffset(), cg), eaxReal, cg);
5526
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node,
5527
generateX86MemoryReference(vmThreadReg, fej9->thisThreadMonitorCacheOffset(), cg),
5528
unlockedReg, cg);
5529
5530
TR_OutlinedInstructions *outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::call, NULL, snippetLabel, fallThru, cg);
5531
cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);
5532
cg->generateDebugCounter(
5533
outlinedHelperCall->getFirstInstruction(),
5534
TR::DebugCounter::debugCounterName(comp, "helperCalls/%s/(%s)/%d/%d", node->getOpCode().getName(), comp->signature(), node->getByteCodeInfo().getCallerIndex(), node->getByteCodeInfo().getByteCodeIndex()),
5535
1, TR::DebugCounter::Cheap);
5536
5537
#else
5538
5539
if (lwOffset <= 0)
5540
{
5541
generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, objectClassReg, 0, cg);
5542
5543
generateCommonLockNurseryCodes(node,
5544
cg,
5545
false, //true for VMmonentEvaluator, false for VMmonexitEvaluator
5546
monitorLookupCacheLabel,
5547
fallThruFromMonitorLookupCacheLabel,
5548
snippetLabel,
5549
numDeps,
5550
lwOffset,
5551
objectClassReg,
5552
lookupOffsetReg,
5553
vmThreadReg,
5554
objectReg);
5555
}
5556
5557
// This is a normal inlined monitor exit
5558
//
5559
// Compare the monitor slot in the object against the thread register.
5560
// If it succeeds we are done. Else call the helper.
5561
//
5562
// Code generated:
5563
// cmp ebp, monitor(objectReg)
5564
// jne snippet
5565
// test flags(objectReg), FLC-bit ; Only if FLC in separate word
5566
// jne snippet
5567
// mov monitor(objectReg), 0
5568
// label restartLabel
5569
//
5570
// Code generated for read monitor:
5571
// xor unlockedReg, unlockedReg
5572
// mov eax, INC_DEC_VALUE
5573
// (lock)cmpxchg monitor(objectReg), unlockedReg
5574
// jne snippet
5575
// label restartLabel
5576
//
5577
if (comp->getOption(TR_ReservingLocks))
5578
{
5579
if (reservingLock)
5580
{
5581
tempReg = cg->allocateRegister();
5582
numDeps++;
5583
}
5584
}
5585
5586
if (comp->getOption(TR_ReservingLocks))
5587
{
5588
if (reservingLock || normalLockPreservingReservation)
5589
{
5590
TR_RuntimeHelper helper;
5591
bool success = TR::TreeEvaluator::monEntryExitHelper(false, node, reservingLock, normalLockPreservingReservation, helper, cg);
5592
5593
TR_ASSERT(success == true, "monEntryExitHelper: could not find runtime helper");
5594
5595
node->setSymbolReference(comp->getSymRefTab()->findOrCreateRuntimeHelper(helper, true, true, true));
5596
}
5597
}
5598
TR_OutlinedInstructions *outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::call, NULL, snippetLabel, snippetFallThru, cg);
5599
cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);
5600
cg->generateDebugCounter(
5601
outlinedHelperCall->getFirstInstruction(),
5602
TR::DebugCounter::debugCounterName(comp, "helperCalls/%s/(%s)/%d/%d", node->getOpCode().getName(), comp->signature(), node->getByteCodeInfo().getCallerIndex(), node->getByteCodeInfo().getByteCodeIndex()),
5603
1, TR::DebugCounter::Cheap);
5604
5605
if (inlineRecursive)
5606
{
5607
TR::LabelSymbol *inlineRecursiveSnippetLabel = generateLabelSymbol(cg);
5608
TR::LabelSymbol *jitMonitorExitSnippetLabel = snippetLabel;
5609
snippetLabel = inlineRecursiveSnippetLabel;
5610
TR::TreeEvaluator::inlineRecursiveMonitor(node, cg, fallThru, jitMonitorExitSnippetLabel, inlineRecursiveSnippetLabel, objectReg, lwOffset, snippetFallThru, reservingLock);
5611
}
5612
5613
bool reservingDecrementNeeded = false;
5614
5615
if (node->isReadMonitor())
5616
{
5617
unlockedReg = cg->allocateRegister();
5618
eaxReal = cg->allocateRegister();
5619
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, unlockedReg, unlockedReg, cg);
5620
generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, eaxReal, INC_DEC_VALUE, cg);
5621
5622
TR::InstOpCode::Mnemonic op = cg->comp()->target().isSMP() ? TR::InstOpCode::LCMPXCHGMemReg(gen64BitInstr) : TR::InstOpCode::CMPXCHGMemReg(gen64BitInstr);
5623
cg->setImplicitExceptionPoint(generateMemRegInstruction(op, node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), unlockedReg, cg));
5624
numDeps += 2;
5625
}
5626
else
5627
{
5628
if (reservingLock)
5629
{
5630
#if defined(TRACE_LOCK_RESERVATION)
5631
auto cds = cg->findOrCreate4ByteConstant(node, (int)node);
5632
TR::MemoryReference *tempMR = generateX86MemoryReference(cds, cg);
5633
5634
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
5635
{
5636
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, tempReg, tempReg, cg); // Zero out tempReg before TR::InstOpCode::LRegMem op.
5637
}
5638
cg->setImplicitExceptionPoint(generateRegMemInstruction(
5639
TR::InstOpCode::LRegMem(gen64BitInstr), node, tempReg,
5640
getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg));
5641
5642
TR::SymbolReference *tempRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);
5643
TR::MemoryReference *tempMR1 = generateX86MemoryReference(tempRef, cg);
5644
5645
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR, tempReg, cg);
5646
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR1, tempReg, cg);
5647
5648
auto cds1 = cg->findOrCreate4ByteConstant(node, (int)node+2);
5649
TR::MemoryReference *tempMR3 = generateX86MemoryReference(cds1, cg);
5650
TR::SymbolReference *tempRef2 = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);
5651
TR::MemoryReference *tempMR2 = generateX86MemoryReference(tempRef2, cg);
5652
5653
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR3, objectReg, cg);
5654
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR2, objectReg, cg);
5655
5656
scratchReg = cg->allocateRegister();
5657
numDeps++;
5658
5659
TR::LabelSymbol *doneTestLabel = generateLabelSymbol(cg);
5660
5661
//generateLabelInstruction(TR::InstOpCode::label, node, doneTestLabel, cg);
5662
//generateImmSymInstruction(TR::InstOpCode::PUSHImm4, node, (uintptr_t)doneTestLabel->getStaticSymbol()->getStaticAddress(), node->getSymbolReference(), cg);
5663
//generateRegInstruction(TR::InstOpCode::POPReg, node, scratchReg, cg);
5664
5665
TR::TreeEvaluator::generateValueTracingCode (node, vmThreadReg, scratchReg, objectReg, tempReg, cg);
5666
5667
// cause crash in some cases
5668
if (0)
5669
{
5670
generateRegImmInstruction(TR::InstOpCode::TEST1RegImm1, node, tempReg, 0xA, cg);
5671
generateLabelInstruction(TR::InstOpCode::JNE4, node, doneTestLabel, cg);
5672
generateRegRegInstruction(TR::InstOpCode::XOR4RegReg, node, scratchReg, scratchReg, cg);
5673
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node,
5674
scratchReg,
5675
generateX86MemoryReference(scratchReg, 0, cg), cg);
5676
generateLabelInstruction(TR::InstOpCode::label, node, doneTestLabel, cg);
5677
}
5678
#endif
5679
if (node->isPrimitiveLockedRegion())
5680
{
5681
cg->setImplicitExceptionPoint(generateRegMemInstruction(
5682
TR::InstOpCode::LRegMem(gen64BitInstr), node, tempReg,
5683
getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg));
5684
// Mask out the thread ID and reservation count
5685
generateRegImmInstruction(TR::InstOpCode::ANDRegImms(), node, tempReg, FLAGS_MASK, cg);
5686
// If only the RES flag is set and no other we can continue
5687
generateRegImmInstruction(TR::InstOpCode::XORRegImms(), node, tempReg, RES_BIT, cg);
5688
}
5689
else
5690
{
5691
reservingDecrementNeeded = true;
5692
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tempReg, generateX86MemoryReference(vmThreadReg, (REC_BIT | RES_BIT), cg), cg);
5693
cg->setImplicitExceptionPoint(generateMemRegInstruction(
5694
TR::InstOpCode::CMPMemReg(gen64BitInstr), node,
5695
getMemoryReference(objectClassReg, objectReg, lwOffset, cg), tempReg, cg));
5696
}
5697
}
5698
else
5699
{
5700
cg->setImplicitExceptionPoint(generateRegMemInstruction(
5701
TR::InstOpCode::CMPRegMem(gen64BitInstr), node, vmThreadReg,
5702
getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg));
5703
}
5704
}
5705
5706
TR::LabelSymbol *mismatchLabel = NULL;
5707
if (reservingLock && !TR::Options::_aggressiveLockReservation)
5708
mismatchLabel = generateLabelSymbol(cg);
5709
else
5710
mismatchLabel = snippetLabel;
5711
5712
generateLabelInstruction(TR::InstOpCode::JNE4, node, mismatchLabel, cg);
5713
5714
if (reservingDecrementNeeded)
5715
{
5716
// Subtract the reservation count
5717
generateMemImmInstruction(TR::InstOpCode::SUBMemImms(gen64BitInstr), node,
5718
getMemoryReference(objectClassReg, objectReg, lwOffset, cg), REC_BIT, cg); // I'm not sure TR::InstOpCode::SUB4MemImms will work.
5719
}
5720
5721
if (!node->isReadMonitor() && !reservingLock)
5722
{
5723
if (cg->comp()->target().cpu.supportsFeature(OMR_FEATURE_X86_HLE) && comp->getOption(TR_X86HLE))
5724
generateMemImmInstruction(TR::InstOpCode::XRSMemImm4(gen64BitInstr),
5725
node, getMemoryReference(objectClassReg, objectReg, lwOffset, cg), 0, cg);
5726
else
5727
generateMemImmInstruction(TR::InstOpCode::SMemImm4(gen64BitInstr), node,
5728
getMemoryReference(objectClassReg, objectReg, lwOffset, cg), 0, cg);
5729
}
5730
5731
if (reservingLock && !TR::Options::_aggressiveLockReservation)
5732
{
5733
generateLabelInstruction(TR::InstOpCode::JMP4, node, fallThru, cg);
5734
5735
// Avoid the helper for non-recursive exit in case it isn't reserved
5736
generateLabelInstruction(TR::InstOpCode::label, node, mismatchLabel, cg);
5737
auto lwMR = getMemoryReference(objectClassReg, objectReg, lwOffset, cg);
5738
generateMemRegInstruction(TR::InstOpCode::CMPMemReg(gen64BitInstr), node, lwMR, vmThreadReg, cg);
5739
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
5740
lwMR = getMemoryReference(objectClassReg, objectReg, lwOffset, cg);
5741
generateMemImmInstruction(TR::InstOpCode::SMemImm4(gen64BitInstr), node, lwMR, 0, cg);
5742
}
5743
5744
#endif // J9VM_OPT_REAL_TIME_LOCKING_SUPPORT
5745
5746
5747
// Create dependencies for the registers used.
5748
// The first dependencies must be objectReg, vmThreadReg, tempReg
5749
// Or, for readmonitors they must be objectReg, vmThreadReg, unlockedReg, eaxReal
5750
// snippet needs to find them to grab the real registers from them.
5751
//
5752
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)numDeps, cg);
5753
deps->addPostCondition(objectReg, TR::RealRegister::NoReg, cg);
5754
deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);
5755
5756
#if !defined(J9VM_OPT_REAL_TIME_LOCKING_SUPPORT)
5757
if (node->isReadMonitor())
5758
#endif
5759
{
5760
deps->addPostCondition(unlockedReg, TR::RealRegister::NoReg, cg);
5761
deps->addPostCondition(eaxReal, TR::RealRegister::eax, cg);
5762
}
5763
5764
if (lookupOffsetReg)
5765
deps->addPostCondition(lookupOffsetReg, TR::RealRegister::NoReg, cg);
5766
5767
if (tempReg && !node->isReadMonitor())
5768
deps->addPostCondition(tempReg, TR::RealRegister::NoReg, cg);
5769
if (scratchReg)
5770
deps->addPostCondition(scratchReg, TR::RealRegister::NoReg, cg);
5771
if (objectClassReg)
5772
deps->addPostCondition(objectClassReg, TR::RealRegister::NoReg, cg);
5773
5774
deps->stopAddingConditions();
5775
generateLabelInstruction(TR::InstOpCode::label, node, fallThru, deps, cg);
5776
5777
#if defined(TRACE_LOCK_RESERVATION)
5778
if (reservingLock)
5779
{
5780
auto cds = cg->findOrCreate4ByteConstant(node, (int)node+1);
5781
TR::MemoryReference *tempMR = generateX86MemoryReference(cds, cg);
5782
5783
cg->setImplicitExceptionPoint(generateRegMemInstruction(
5784
TR::InstOpCode::LRegMem(gen64BitInstr), node, tempReg,
5785
getMemoryReference(objectClassReg, objectReg, lwOffset, cg), cg));
5786
5787
TR::SymbolReference *tempRef = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);
5788
TR::MemoryReference *tempMR1 = generateX86MemoryReference(tempRef, cg);
5789
5790
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR, tempReg, cg);
5791
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR1, tempReg, cg);
5792
5793
auto cds1 = cg->findOrCreate4ByteConstant(node, (int)node+2);
5794
TR::MemoryReference *tempMR3 = generateX86MemoryReference(cds1, cg);
5795
TR::SymbolReference *tempRef2 = comp->getSymRefTab()->createTemporary(comp->getMethodSymbol(), TR_UInt32);
5796
TR::MemoryReference *tempMR2 = generateX86MemoryReference(tempRef2, cg);
5797
5798
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR3, objectReg, cg);
5799
generateMemRegInstruction(TR::InstOpCode::SMemReg(),node, tempMR2, objectReg, cg);
5800
}
5801
#endif
5802
5803
if (eaxReal)
5804
cg->stopUsingRegister(eaxReal);
5805
if (unlockedReg)
5806
cg->stopUsingRegister(unlockedReg);
5807
5808
cg->decReferenceCount(objectRef);
5809
if (tempReg)
5810
cg->stopUsingRegister(tempReg);
5811
5812
if (scratchReg)
5813
cg->stopUsingRegister(scratchReg);
5814
5815
if (objectClassReg)
5816
cg->stopUsingRegister(objectClassReg);
5817
5818
if (lookupOffsetReg)
5819
cg->stopUsingRegister(lookupOffsetReg);
5820
5821
return NULL;
5822
}
5823
5824
5825
bool J9::X86::TreeEvaluator::monEntryExitHelper(
5826
bool entry,
5827
TR::Node* node,
5828
bool reservingLock,
5829
bool normalLockPreservingReservation,
5830
TR_RuntimeHelper &helper,
5831
TR::CodeGenerator* cg)
5832
{
5833
bool methodMonitor = entry
5834
? (node->getSymbolReference() == cg->getSymRef(TR_methodMonitorEntry))
5835
: (node->getSymbolReference() == cg->getSymRef(TR_methodMonitorExit));
5836
5837
if (reservingLock)
5838
{
5839
if (node->isPrimitiveLockedRegion())
5840
{
5841
static TR_RuntimeHelper helpersCase1[2][2][2] =
5842
{
5843
{
5844
{TR_IA32JitMonitorExitReservedPrimitive, TR_IA32JitMethodMonitorExitReservedPrimitive},
5845
{TR_AMD64JitMonitorExitReservedPrimitive, TR_AMD64JitMethodMonitorExitReservedPrimitive}
5846
},
5847
{
5848
{TR_IA32JitMonitorEnterReservedPrimitive, TR_IA32JitMethodMonitorEnterReservedPrimitive},
5849
{TR_AMD64JitMonitorEnterReservedPrimitive, TR_AMD64JitMethodMonitorEnterReservedPrimitive}
5850
}
5851
};
5852
5853
helper = helpersCase1[entry?1:0][cg->comp()->target().is64Bit()?1:0][methodMonitor?1:0];
5854
return true;
5855
}
5856
else
5857
{
5858
static TR_RuntimeHelper helpersCase2[2][2][2] =
5859
{
5860
{
5861
{TR_IA32JitMonitorExitReserved, TR_IA32JitMethodMonitorExitReserved},
5862
{TR_AMD64JitMonitorExitReserved, TR_AMD64JitMethodMonitorExitReserved}
5863
},
5864
{
5865
{TR_IA32JitMonitorEnterReserved, TR_IA32JitMethodMonitorEnterReserved},
5866
{TR_AMD64JitMonitorEnterReserved, TR_AMD64JitMethodMonitorEnterReserved}
5867
}
5868
};
5869
5870
helper = helpersCase2[entry?1:0][cg->comp()->target().is64Bit()?1:0][methodMonitor?1:0];
5871
return true;
5872
}
5873
}
5874
else if (normalLockPreservingReservation)
5875
{
5876
static TR_RuntimeHelper helpersCase2[2][2][2] =
5877
{
5878
{
5879
{TR_IA32JitMonitorExitPreservingReservation, TR_IA32JitMethodMonitorExitPreservingReservation},
5880
{TR_AMD64JitMonitorExitPreservingReservation, TR_AMD64JitMethodMonitorExitPreservingReservation}
5881
},
5882
{
5883
{TR_IA32JitMonitorEnterPreservingReservation, TR_IA32JitMethodMonitorEnterPreservingReservation},
5884
{TR_AMD64JitMonitorEnterPreservingReservation, TR_AMD64JitMethodMonitorEnterPreservingReservation}
5885
}
5886
};
5887
5888
helper = helpersCase2[entry?1:0][cg->comp()->target().is64Bit()?1:0][methodMonitor?1:0];
5889
return true;
5890
}
5891
5892
return false;
5893
}
5894
5895
5896
5897
// Generate code to allocate from the object heap. Returns the register
5898
// containing the address of the allocation.
5899
//
5900
// If the sizeReg is non-null, the allocation is variable length. In this case
5901
// the elementSize is meaningful and "size" is the extra size to be added.
5902
// Otherwise "size" contains the total size of the allocation.
5903
//
5904
// Also, on return the "segmentReg" register is set to the address of the
5905
// memory segment.
5906
//
5907
static void genHeapAlloc(
5908
TR::Node *node,
5909
TR_OpaqueClassBlock *clazz,
5910
int32_t allocationSizeOrDataOffset,
5911
int32_t elementSize,
5912
TR::Register *sizeReg,
5913
TR::Register *eaxReal,
5914
TR::Register *segmentReg,
5915
TR::Register *tempReg,
5916
TR::LabelSymbol *failLabel,
5917
TR::CodeGenerator *cg)
5918
{
5919
5920
// Load the current heap segment and see if there is room in it. Loop if
5921
// we can't get the lock on the segment.
5922
//
5923
TR::Compilation *comp = cg->comp();
5924
TR::Register *vmThreadReg = cg->getVMThreadRegister();
5925
bool generateArraylets = comp->generateArraylets();
5926
5927
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
5928
5929
if (comp->getOptions()->realTimeGC())
5930
{
5931
#if defined(J9VM_GC_REALTIME)
5932
// this will be bogus for variable length allocations because it only includes the header size (+ arraylet ptr for arrays)
5933
UDATA sizeClass = fej9->getObjectSizeClass(allocationSizeOrDataOffset);
5934
5935
if (comp->getOption(TR_BreakOnNew))
5936
generateInstruction(TR::InstOpCode::INT3, node, cg);
5937
5938
// heap allocation, so proceed
5939
if (sizeReg)
5940
{
5941
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, eaxReal, eaxReal, cg);
5942
5943
// make sure size isn't too big
5944
// convert max object size to num elements because computing an object size from num elements may overflow
5945
TR_ASSERT(fej9->getMaxObjectSizeForSizeClass() <= UINT_MAX, "assertion failure");
5946
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, sizeReg, (fej9->getMaxObjectSizeForSizeClass()-allocationSizeOrDataOffset)/elementSize, cg);
5947
generateLabelInstruction(TR::InstOpCode::JA4, node, failLabel, cg);
5948
5949
// Hybrid arraylets need a zero length test if the size is unknown.
5950
//
5951
if (!generateArraylets)
5952
{
5953
generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, sizeReg, sizeReg, cg);
5954
generateLabelInstruction(TR::InstOpCode::JE4, node, failLabel, cg);
5955
}
5956
5957
// need to round up to sizeof(UDATA) so we can use it to index into size class index array
5958
// conservatively just add sizeof(UDATA) bytes and round
5959
int32_t round = 0;
5960
if (elementSize < sizeof(UDATA))
5961
round = sizeof(UDATA) - 1;
5962
5963
// now compute size of object in bytes
5964
generateRegMemInstruction(TR::InstOpCode::LEARegMem(),
5965
node,
5966
segmentReg,
5967
generateX86MemoryReference(eaxReal,
5968
sizeReg,
5969
TR::MemoryReference::convertMultiplierToStride(elementSize),
5970
allocationSizeOrDataOffset + round, cg), cg);
5971
5972
5973
if (elementSize < sizeof(UDATA))
5974
generateRegImmInstruction(TR::InstOpCode::ANDRegImms(), node, segmentReg, -(int32_t)sizeof(UDATA), cg);
5975
5976
#ifdef J9VM_INTERP_FLAGS_IN_CLASS_SLOT
5977
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, segmentReg, J9_GC_MINIMUM_OBJECT_SIZE, cg);
5978
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
5979
generateLabelInstruction(TR::InstOpCode::JAE4, node, doneLabel, cg);
5980
generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, segmentReg, J9_GC_MINIMUM_OBJECT_SIZE, cg);
5981
generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, cg);
5982
#endif
5983
5984
// get size class
5985
generateRegMemInstruction(TR::InstOpCode::LRegMem(),
5986
node,
5987
tempReg,
5988
generateX86MemoryReference(vmThreadReg, fej9->thisThreadJavaVMOffset(), cg), cg);
5989
generateRegMemInstruction(TR::InstOpCode::LRegMem(),
5990
node,
5991
tempReg,
5992
generateX86MemoryReference(tempReg, fej9->getRealtimeSizeClassesOffset(), cg), cg);
5993
generateRegMemInstruction(TR::InstOpCode::LRegMem(),
5994
node,
5995
tempReg,
5996
generateX86MemoryReference(tempReg,
5997
segmentReg, TR::MemoryReference::convertMultiplierToStride(1),
5998
fej9->getSizeClassesIndexOffset(),
5999
cg),
6000
cg);
6001
6002
// tempReg now holds size class
6003
TR::MemoryReference *currentMemRef, *topMemRef, *currentMemRefBump;
6004
if (cg->comp()->target().is64Bit())
6005
{
6006
TR_ASSERT(sizeof(J9VMGCSegregatedAllocationCacheEntry) == 16, "unexpected J9VMGCSegregatedAllocationCacheEntry size");
6007
// going to play some games here
6008
// need to use tempReg to index into two arrays:
6009
// 1) allocation caches
6010
// 2) cell size array
6011
// The first one has stride 16, second one stride sizeof(UDATA)
6012
// We need a shift instruction to be able to do stride 16
6013
// To avoid two shifts, only do one for stride sizeof(UDATA) and use a multiplier in memory ref for 16
6014
// 64-bit, so shift 3 times for sizeof(UDATA) and use multiplier stride 2 in memory references
6015
generateRegImmInstruction(TR::InstOpCode::SHLRegImm1(), node, tempReg, 3, cg);
6016
currentMemRef = generateX86MemoryReference(vmThreadReg, tempReg, TR::MemoryReference::convertMultiplierToStride(2), fej9->thisThreadAllocationCacheCurrentOffset(0), cg);
6017
topMemRef = generateX86MemoryReference(vmThreadReg, tempReg, TR::MemoryReference::convertMultiplierToStride(2), fej9->thisThreadAllocationCacheTopOffset(0), cg);
6018
currentMemRefBump = generateX86MemoryReference(vmThreadReg, tempReg, TR::MemoryReference::convertMultiplierToStride(2), fej9->thisThreadAllocationCacheCurrentOffset(0), cg);
6019
}
6020
else
6021
{
6022
// size needs to be 8 or less or it there's no multiplier stride available (would need to use other branch of else)
6023
TR_ASSERT(sizeof(J9VMGCSegregatedAllocationCacheEntry) <= 8, "unexpected J9VMGCSegregatedAllocationCacheEntry size");
6024
6025
currentMemRef = generateX86MemoryReference(vmThreadReg, tempReg,
6026
TR::MemoryReference::convertMultiplierToStride(sizeof(J9VMGCSegregatedAllocationCacheEntry)),
6027
fej9->thisThreadAllocationCacheCurrentOffset(0), cg);
6028
topMemRef = generateX86MemoryReference(vmThreadReg, tempReg,
6029
TR::MemoryReference::convertMultiplierToStride(sizeof(J9VMGCSegregatedAllocationCacheEntry)),
6030
fej9->thisThreadAllocationCacheTopOffset(0), cg);
6031
currentMemRefBump = generateX86MemoryReference(vmThreadReg, tempReg,
6032
TR::MemoryReference::convertMultiplierToStride(sizeof(J9VMGCSegregatedAllocationCacheEntry)),
6033
fej9->thisThreadAllocationCacheCurrentOffset(0), cg);
6034
}
6035
// tempReg now contains size class (32-bit) or size class * sizeof(J9VMGCSegregatedAllocationCacheEntry) (64-bit)
6036
6037
// get next cell for this size class
6038
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, eaxReal, currentMemRef, cg);
6039
6040
// if null, then no cell available, use slow path
6041
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, eaxReal, topMemRef, cg);
6042
generateLabelInstruction(TR::InstOpCode::JAE4, node, failLabel, cg);
6043
6044
// have a valid cell, need to update current cell pointer
6045
generateRegMemInstruction(TR::InstOpCode::LRegMem(),
6046
node,
6047
segmentReg,
6048
generateX86MemoryReference(vmThreadReg, fej9->thisThreadJavaVMOffset(), cg), cg);
6049
generateRegMemInstruction(TR::InstOpCode::LRegMem(),
6050
node,
6051
segmentReg,
6052
generateX86MemoryReference(segmentReg, fej9->getRealtimeSizeClassesOffset(), cg), cg);
6053
if (cg->comp()->target().is64Bit())
6054
{
6055
// tempReg already has already been shifted for sizeof(UDATA)
6056
generateRegMemInstruction(TR::InstOpCode::LRegMem(),
6057
node,
6058
segmentReg,
6059
generateX86MemoryReference(segmentReg,
6060
tempReg,
6061
TR::MemoryReference::convertMultiplierToStride(1),
6062
fej9->getSmallCellSizesOffset(),
6063
cg),
6064
cg);
6065
}
6066
else
6067
{
6068
// tempReg needs to be shifted for sizeof(UDATA)
6069
generateRegMemInstruction(TR::InstOpCode::LRegMem(),
6070
node,
6071
segmentReg,
6072
generateX86MemoryReference(segmentReg,
6073
tempReg,
6074
TR::MemoryReference::convertMultiplierToStride(sizeof(UDATA)),
6075
fej9->getSmallCellSizesOffset(),
6076
cg),
6077
cg);
6078
}
6079
// segmentReg now holds cell size
6080
6081
// update current cell by cell size
6082
generateMemRegInstruction(TR::InstOpCode::ADDMemReg(), node, currentMemRefBump, segmentReg, cg);
6083
}
6084
else
6085
{
6086
generateRegMemInstruction(TR::InstOpCode::LRegMem(),
6087
node,
6088
eaxReal,
6089
generateX86MemoryReference(vmThreadReg,
6090
fej9->thisThreadAllocationCacheCurrentOffset(sizeClass),
6091
cg),
6092
cg);
6093
6094
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(),
6095
node,
6096
eaxReal,
6097
generateX86MemoryReference(vmThreadReg,
6098
fej9->thisThreadAllocationCacheTopOffset(sizeClass),
6099
cg),
6100
cg);
6101
6102
generateLabelInstruction(TR::InstOpCode::JAE4, node, failLabel, cg);
6103
6104
// we have an object in eaxReal, now bump the current updatepointer
6105
TR::InstOpCode::Mnemonic opcode;
6106
uint32_t cellSize = fej9->getCellSizeForSizeClass(sizeClass);
6107
if (cellSize <= 127)
6108
opcode = TR::InstOpCode::ADDMemImms();
6109
else if (cellSize == 128)
6110
{
6111
opcode = TR::InstOpCode::SUBMemImms();
6112
cellSize = (uint32_t)-128;
6113
}
6114
else
6115
opcode = TR::InstOpCode::ADDMemImm4();
6116
6117
generateMemImmInstruction(opcode, node,
6118
generateX86MemoryReference(vmThreadReg,
6119
fej9->thisThreadAllocationCacheCurrentOffset(sizeClass),
6120
cg),
6121
cellSize, cg);
6122
}
6123
6124
// we're done
6125
return;
6126
#endif
6127
}
6128
else
6129
{
6130
bool shouldAlignToCacheBoundary = false;
6131
bool isSmallAllocation = false;
6132
6133
size_t heapAlloc_offset=offsetof(J9VMThread, heapAlloc);
6134
size_t heapTop_offset=offsetof(J9VMThread, heapTop);
6135
size_t tlhPrefetchFTA_offset= offsetof(J9VMThread, tlhPrefetchFTA);
6136
#ifdef J9VM_GC_NON_ZERO_TLH
6137
if (!comp->getOption(TR_DisableDualTLH) && node->canSkipZeroInitialization())
6138
{
6139
heapAlloc_offset=offsetof(J9VMThread, nonZeroHeapAlloc);
6140
heapTop_offset=offsetof(J9VMThread, nonZeroHeapTop);
6141
tlhPrefetchFTA_offset= offsetof(J9VMThread, nonZeroTlhPrefetchFTA);
6142
}
6143
#endif
6144
// Load the base of the next available heap storage. This load is done speculatively on the assumption that the
6145
// allocation will be inlined. If the assumption turns out to be false then the performance impact should be minimal
6146
// because the helper will be called in that case. It is necessary to insert this load here so that it dominates all
6147
// control paths through this internal control flow region.
6148
//
6149
generateRegMemInstruction(TR::InstOpCode::LRegMem(),
6150
node,
6151
eaxReal,
6152
generateX86MemoryReference(vmThreadReg,heapAlloc_offset, cg), cg);
6153
6154
if (comp->getOption(TR_EnableNewAllocationProfiling))
6155
{
6156
TR::LabelSymbol *doneProfilingLabel = generateLabelSymbol(cg);
6157
6158
uint32_t *globalAllocationDataPointer = fej9->getGlobalAllocationDataPointer();
6159
if (globalAllocationDataPointer)
6160
{
6161
TR::MemoryReference *gmr = generateX86MemoryReference((uintptr_t)globalAllocationDataPointer, cg);
6162
6163
generateMemImmInstruction(TR::InstOpCode::CMP4MemImm4,
6164
node,
6165
generateX86MemoryReference((uint32_t)(uintptr_t)globalAllocationDataPointer, cg),
6166
0x07ffffff,
6167
cg);
6168
generateLabelInstruction(TR::InstOpCode::JAE4, node, doneProfilingLabel, cg);
6169
6170
generateMemInstruction(TR::InstOpCode::INC4Mem, node, gmr, cg);
6171
uint32_t *dataPointer = fej9->getAllocationProfilingDataPointer(node->getByteCodeInfo(), clazz, node->getOwningMethod(), comp);
6172
if (dataPointer)
6173
{
6174
TR::MemoryReference *mr = generateX86MemoryReference((uint32_t)(uintptr_t)dataPointer, cg);
6175
generateMemInstruction(TR::InstOpCode::INC4Mem, node, mr, cg);
6176
}
6177
6178
generateLabelInstruction(TR::InstOpCode::label, node, doneProfilingLabel, cg);
6179
}
6180
}
6181
6182
bool canSkipOverflowCheck = false;
6183
6184
// If the array length is constant, check to see if the size of the array will fit in a single arraylet leaf.
6185
// If the allocation size is too large, call the snippet.
6186
//
6187
if (generateArraylets && (node->getOpCodeValue() == TR::anewarray || node->getOpCodeValue() == TR::newarray))
6188
{
6189
if (comp->getOption(TR_DisableTarokInlineArrayletAllocation))
6190
generateLabelInstruction(TR::InstOpCode::JMP4, node, failLabel, cg);
6191
6192
if (sizeReg)
6193
{
6194
uint32_t maxContiguousArrayletLeafSizeInBytes =
6195
(uint32_t)(TR::Compiler->om.arrayletLeafSize() - TR::Compiler->om.sizeofReferenceAddress());
6196
6197
int32_t maxArrayletSizeInElements = maxContiguousArrayletLeafSizeInBytes/elementSize;
6198
6199
// Hybrid arraylets need a zero length test if the size is unknown.
6200
//
6201
generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, sizeReg, sizeReg, cg);
6202
generateLabelInstruction(TR::InstOpCode::JE4, node, failLabel, cg);
6203
6204
generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, sizeReg, maxArrayletSizeInElements, cg);
6205
generateLabelInstruction(TR::InstOpCode::JAE4, node, failLabel, cg);
6206
6207
// If the max arraylet leaf size is less than the amount of free space available on
6208
// the stack, there is no need to check for an overflow scenario.
6209
//
6210
if (maxContiguousArrayletLeafSizeInBytes <= cg->getMaxObjectSizeGuaranteedNotToOverflow() )
6211
canSkipOverflowCheck = true;
6212
}
6213
else if (TR::Compiler->om.isDiscontiguousArray(allocationSizeOrDataOffset))
6214
{
6215
// TODO: just call the helper directly and don't generate any
6216
// further instructions.
6217
//
6218
// Actually, we should never get here because we've already checked
6219
// constant lengths for discontiguity...
6220
//
6221
generateLabelInstruction(TR::InstOpCode::JMP4, node, failLabel, cg);
6222
}
6223
}
6224
6225
if (sizeReg && !canSkipOverflowCheck)
6226
{
6227
// Hybrid arraylets need a zero length test if the size is unknown.
6228
// The length could be zero.
6229
//
6230
if (!generateArraylets)
6231
{
6232
generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, sizeReg, sizeReg, cg);
6233
generateLabelInstruction(TR::InstOpCode::JE4, node, failLabel, cg);
6234
}
6235
6236
// The GC will guarantee that at least 'maxObjectSizeGuaranteedNotToOverflow' bytes
6237
// of slush will exist between the top of the heap and the end of the address space.
6238
//
6239
uintptr_t maxObjectSize = cg->getMaxObjectSizeGuaranteedNotToOverflow();
6240
uintptr_t maxObjectSizeInElements = maxObjectSize / elementSize;
6241
6242
if (cg->comp()->target().is64Bit() && !(maxObjectSizeInElements > 0 && maxObjectSizeInElements <= (uintptr_t)INT_MAX))
6243
{
6244
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg, maxObjectSizeInElements, cg);
6245
generateRegRegInstruction(TR::InstOpCode::CMP8RegReg, node, sizeReg, tempReg, cg);
6246
}
6247
else
6248
{
6249
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, sizeReg, (int32_t)maxObjectSizeInElements, cg);
6250
}
6251
6252
// Must be an unsigned comparison on sizes.
6253
//
6254
generateLabelInstruction(TR::InstOpCode::JAE4, node, failLabel, cg);
6255
}
6256
6257
#if !defined(J9VM_GC_THREAD_LOCAL_HEAP)
6258
// Establish a loop label in case the new heap pointer cannot be committed.
6259
//
6260
TR::LabelSymbol *loopLabel = generateLabelSymbol(cg);
6261
generateLabelInstruction(TR::InstOpCode::label, node, loopLabel, cg);
6262
#endif
6263
6264
if (sizeReg)
6265
{
6266
// calculate variable size, rounding up if necessary to a intptr_t multiple boundary
6267
//
6268
int32_t round; // zero indicates no rounding is necessary
6269
6270
if (!generateArraylets)
6271
{
6272
// TR_ASSERT(allocationSizeOrDataOffset % fej9->getObjectAlignmentInBytes() == 0, "Array header size of %d is not a multiple of %d", allocationSizeOrDataOffset, fej9->getObjectAlignmentInBytes());
6273
}
6274
6275
6276
round = (elementSize < TR::Compiler->om.getObjectAlignmentInBytes()) ? TR::Compiler->om.getObjectAlignmentInBytes() : 0;
6277
6278
int32_t disp32 = round ? (round-1) : 0;
6279
#ifdef J9VM_INTERP_FLAGS_IN_CLASS_SLOT
6280
if ( (node->getOpCodeValue() == TR::anewarray || node->getOpCodeValue() == TR::newarray))
6281
{
6282
// All arrays in combo builds will always be at least 20 bytes in size in all specs:
6283
//
6284
// 1) class pointer + contig length + dataAddr + one or more elements
6285
// 2) class pointer + 0 + 0 (for zero length arrays) + dataAddr
6286
//
6287
TR_ASSERT(J9_GC_MINIMUM_INDEXABLE_OBJECT_SIZE >= 8, "Expecting a minimum indexable object size >= 8 (actual minimum is %d)\n", J9_GC_MINIMUM_INDEXABLE_OBJECT_SIZE);
6288
6289
generateRegMemInstruction(
6290
TR::InstOpCode::LEARegMem(),
6291
node,
6292
tempReg,
6293
generateX86MemoryReference(
6294
eaxReal,
6295
sizeReg,
6296
TR::MemoryReference::convertMultiplierToStride(elementSize),
6297
allocationSizeOrDataOffset+disp32, cg), cg);
6298
6299
if (round)
6300
{
6301
generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, tempReg, -round, cg);
6302
}
6303
}
6304
else
6305
#endif
6306
{
6307
#ifdef J9VM_INTERP_FLAGS_IN_CLASS_SLOT
6308
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, tempReg, tempReg, cg);
6309
#endif
6310
6311
6312
generateRegMemInstruction(
6313
TR::InstOpCode::LEARegMem(),
6314
node,
6315
tempReg,
6316
generateX86MemoryReference(
6317
#ifdef J9VM_INTERP_FLAGS_IN_CLASS_SLOT
6318
tempReg,
6319
#else
6320
eaxReal,
6321
#endif
6322
sizeReg,
6323
TR::MemoryReference::convertMultiplierToStride(elementSize),
6324
allocationSizeOrDataOffset+disp32, cg), cg);
6325
6326
if (round)
6327
{
6328
generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, tempReg, -round, cg);
6329
}
6330
6331
#ifdef J9VM_INTERP_FLAGS_IN_CLASS_SLOT
6332
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, tempReg, J9_GC_MINIMUM_OBJECT_SIZE, cg);
6333
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
6334
generateLabelInstruction(TR::InstOpCode::JAE4, node, doneLabel, cg);
6335
generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, tempReg, J9_GC_MINIMUM_OBJECT_SIZE, cg);
6336
generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, cg);
6337
generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, tempReg, eaxReal, cg);
6338
#endif
6339
}
6340
}
6341
else
6342
{
6343
isSmallAllocation = allocationSizeOrDataOffset <= 0x40 ? true : false;
6344
allocationSizeOrDataOffset = (allocationSizeOrDataOffset+TR::Compiler->om.getObjectAlignmentInBytes()-1) & (-TR::Compiler->om.getObjectAlignmentInBytes());
6345
6346
#if defined(J9VM_GC_THREAD_LOCAL_HEAP)
6347
if ((node->getOpCodeValue() == TR::New) &&
6348
(comp->getMethodHotness() >= hot || node->shouldAlignTLHAlloc()))
6349
{
6350
TR_OpaqueMethodBlock *ownMethod = node->getOwningMethod();
6351
TR::Node *classChild = node->getFirstChild();
6352
char * className = NULL;
6353
TR_OpaqueClassBlock *clazz = NULL;
6354
6355
if (classChild &&
6356
classChild->getSymbolReference() &&
6357
!classChild->getSymbolReference()->isUnresolved())
6358
{
6359
TR::SymbolReference *symRef = classChild->getSymbolReference();
6360
TR::Symbol *sym = symRef->getSymbol();
6361
6362
if (sym &&
6363
sym->getKind() == TR::Symbol::IsStatic &&
6364
sym->isClassObject())
6365
{
6366
TR::StaticSymbol * staticSym = symRef->getSymbol()->castToStaticSymbol();
6367
void * staticAddress = staticSym->getStaticAddress();
6368
if (symRef->getCPIndex() >= 0)
6369
{
6370
if (!staticSym->addressIsCPIndexOfStatic() && staticAddress)
6371
{
6372
int32_t len;
6373
className = TR::Compiler->cls.classNameChars(comp,symRef, len);
6374
clazz = (TR_OpaqueClassBlock *)staticAddress;
6375
}
6376
}
6377
}
6378
}
6379
6380
uint32_t instanceSizeForAlignment = 30;
6381
static char *p= feGetEnv("TR_AlignInstanceSize");
6382
if (p)
6383
instanceSizeForAlignment = atoi(p);
6384
6385
if ((comp->getMethodHotness() >= hot) && clazz &&
6386
!cg->getCurrentEvaluationBlock()->isCold() &&
6387
TR::Compiler->cls.classInstanceSize(clazz)>=instanceSizeForAlignment)
6388
{
6389
shouldAlignToCacheBoundary = true;
6390
6391
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, eaxReal,
6392
generateX86MemoryReference(eaxReal, 63, cg), cg);
6393
generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, eaxReal, 0xFFFFFFC0, cg);
6394
}
6395
}
6396
#endif // J9VM_GC_THREAD_LOCAL_HEAP
6397
6398
if ((uint32_t)allocationSizeOrDataOffset > cg->getMaxObjectSizeGuaranteedNotToOverflow())
6399
{
6400
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, tempReg, eaxReal, cg);
6401
if (allocationSizeOrDataOffset <= 127)
6402
generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, tempReg, allocationSizeOrDataOffset, cg);
6403
else if (allocationSizeOrDataOffset == 128)
6404
generateRegImmInstruction(TR::InstOpCode::SUBRegImms(), node, tempReg, (unsigned)-128, cg);
6405
else
6406
generateRegImmInstruction(TR::InstOpCode::ADDRegImm4(), node, tempReg, allocationSizeOrDataOffset, cg);
6407
6408
// Check for overflow
6409
generateLabelInstruction(TR::InstOpCode::JB4, node, failLabel, cg);
6410
}
6411
else
6412
{
6413
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tempReg,
6414
generateX86MemoryReference(eaxReal, allocationSizeOrDataOffset, cg), cg);
6415
}
6416
}
6417
6418
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(),
6419
node,
6420
tempReg,
6421
generateX86MemoryReference(vmThreadReg, heapTop_offset, cg), cg);
6422
6423
generateLabelInstruction(TR::InstOpCode::JA4, node, failLabel, cg);
6424
6425
#if defined(J9VM_GC_THREAD_LOCAL_HEAP)
6426
6427
if (shouldAlignToCacheBoundary)
6428
{
6429
// Alignment to a cache line boundary may require inserting more padding than is normally
6430
// necessary to achieve the alignment. In those cases, insert GC dark matter to describe
6431
// the space inserted.
6432
//
6433
6434
generateRegInstruction(TR::InstOpCode::PUSHReg, node, tempReg, cg);
6435
generateRegMemInstruction(TR::InstOpCode::LRegMem(),
6436
node,
6437
tempReg,
6438
generateX86MemoryReference(vmThreadReg,heapAlloc_offset, cg), cg);
6439
6440
generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, eaxReal, tempReg, cg);
6441
6442
TR::LabelSymbol *doneAlignLabel = generateLabelSymbol(cg);
6443
TR::LabelSymbol *multiSlotGapLabel = generateLabelSymbol(cg);
6444
6445
generateRegImmInstruction(TR::InstOpCode::CMPRegImms(), node, eaxReal, sizeof(uintptr_t), cg);
6446
generateLabelInstruction(TR::InstOpCode::JB4, node, doneAlignLabel, cg);
6447
generateLabelInstruction(TR::InstOpCode::JA4, node, multiSlotGapLabel, cg);
6448
6449
int32_t singleSlotHole;
6450
6451
singleSlotHole = J9_GC_SINGLE_SLOT_HOLE;
6452
6453
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
6454
{
6455
generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node,
6456
generateX86MemoryReference(tempReg, 0, cg), singleSlotHole, cg);
6457
generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node,
6458
generateX86MemoryReference(tempReg, 4, cg), singleSlotHole, cg);
6459
}
6460
else
6461
{
6462
generateMemImmInstruction(
6463
TR::InstOpCode::SMemImm4(), node,
6464
generateX86MemoryReference(tempReg, 0, cg), singleSlotHole, cg);
6465
}
6466
6467
generateLabelInstruction(TR::InstOpCode::JMP4, node, doneAlignLabel, cg);
6468
generateLabelInstruction(TR::InstOpCode::label, node, multiSlotGapLabel, cg);
6469
6470
int32_t multiSlotHole;
6471
6472
multiSlotHole = J9_GC_MULTI_SLOT_HOLE;
6473
6474
generateMemImmInstruction(
6475
TR::InstOpCode::SMemImm4(), node,
6476
generateX86MemoryReference(tempReg, 0, cg),
6477
multiSlotHole, cg);
6478
6479
generateMemRegInstruction(
6480
TR::InstOpCode::SMemReg(), node,
6481
generateX86MemoryReference(tempReg, sizeof(uintptr_t), cg),
6482
eaxReal, cg);
6483
6484
generateLabelInstruction(TR::InstOpCode::label, node, doneAlignLabel, cg);
6485
generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, eaxReal, tempReg, cg);
6486
generateRegInstruction(TR::InstOpCode::POPReg, node, tempReg, cg);
6487
}
6488
6489
// Make sure that the arraylet is aligned properly.
6490
//
6491
if (generateArraylets && (node->getOpCodeValue() == TR::anewarray || node->getOpCodeValue() == TR::newarray) )
6492
{
6493
generateRegMemInstruction(TR::InstOpCode::LEARegMem(),node,tempReg, generateX86MemoryReference(tempReg,TR::Compiler->om.getObjectAlignmentInBytes()-1,cg),cg);
6494
if (cg->comp()->target().is64Bit())
6495
generateRegImmInstruction(TR::InstOpCode::AND8RegImm4,node,tempReg,-TR::Compiler->om.getObjectAlignmentInBytes(),cg);
6496
else
6497
generateRegImmInstruction(TR::InstOpCode::AND4RegImm4,node,tempReg,-TR::Compiler->om.getObjectAlignmentInBytes(),cg);
6498
}
6499
6500
generateMemRegInstruction(TR::InstOpCode::SMemReg(),
6501
node,
6502
generateX86MemoryReference(vmThreadReg, heapAlloc_offset, cg),
6503
tempReg, cg);
6504
6505
if (!isSmallAllocation && cg->enableTLHPrefetching())
6506
{
6507
TR::LabelSymbol *prefetchSnippetLabel = generateLabelSymbol(cg);
6508
TR::LabelSymbol *restartLabel = generateLabelSymbol(cg);
6509
cg->addSnippet(new (cg->trHeapMemory()) TR::X86AllocPrefetchSnippet(cg, node, TR::Options::_TLHPrefetchSize,
6510
restartLabel, prefetchSnippetLabel,
6511
(!comp->getOption(TR_DisableDualTLH) && node->canSkipZeroInitialization())));
6512
6513
6514
bool useDirectPrefetchCall = false;
6515
bool useSharedCodeCacheSnippet = fej9->supportsCodeCacheSnippets();
6516
6517
// Generate the prefetch thunk in code cache. Only generate this once.
6518
//
6519
bool prefetchThunkGenerated = (fej9->getAllocationPrefetchCodeSnippetAddress(comp) != 0);
6520
#ifdef J9VM_GC_NON_ZERO_TLH
6521
if (!comp->getOption(TR_DisableDualTLH) && node->canSkipZeroInitialization())
6522
{
6523
prefetchThunkGenerated = (fej9->getAllocationNoZeroPrefetchCodeSnippetAddress(comp) !=0);
6524
}
6525
#endif
6526
if (useSharedCodeCacheSnippet && prefetchThunkGenerated)
6527
{
6528
useDirectPrefetchCall = true;
6529
}
6530
6531
if (!comp->getOption(TR_EnableNewX86PrefetchTLH))
6532
{
6533
generateRegRegInstruction(TR::InstOpCode::SUB4RegReg, node, tempReg, eaxReal, cg);
6534
6535
generateMemRegInstruction(TR::InstOpCode::SUB4MemReg,
6536
node,
6537
generateX86MemoryReference(vmThreadReg, tlhPrefetchFTA_offset, cg),
6538
tempReg, cg);
6539
if (!useDirectPrefetchCall)
6540
generateLabelInstruction(TR::InstOpCode::JLE4, node, prefetchSnippetLabel, cg);
6541
else
6542
{
6543
generateLabelInstruction(TR::InstOpCode::JG4, node, restartLabel, cg);
6544
TR::SymbolReference * helperSymRef = cg->getSymRefTab()->findOrCreateRuntimeHelper(TR_X86CodeCachePrefetchHelper);
6545
TR::MethodSymbol *helperSymbol = helperSymRef->getSymbol()->castToMethodSymbol();
6546
#ifdef J9VM_GC_NON_ZERO_TLH
6547
if (!comp->getOption(TR_DisableDualTLH) && node->canSkipZeroInitialization())
6548
{
6549
helperSymbol->setMethodAddress(fej9->getAllocationNoZeroPrefetchCodeSnippetAddress(comp));
6550
}
6551
else
6552
{
6553
helperSymbol->setMethodAddress(fej9->getAllocationPrefetchCodeSnippetAddress(comp));
6554
}
6555
#else
6556
helperSymbol->setMethodAddress(fej9->getAllocationPrefetchCodeSnippetAddress(comp));
6557
#endif
6558
generateImmSymInstruction(TR::InstOpCode::CALLImm4, node, (uintptr_t)helperSymbol->getMethodAddress(), helperSymRef, cg);
6559
}
6560
}
6561
else
6562
{
6563
// This currently only works when 'tlhPrefetchFTA' field is 4 bytes (on 32-bit or a
6564
// compressed references build). True 64-bit support requires this field be widened
6565
// to 64-bits.
6566
//
6567
generateRegMemInstruction(TR::InstOpCode::CMP4RegMem, node,
6568
tempReg,
6569
generateX86MemoryReference(vmThreadReg,tlhPrefetchFTA_offset, cg),
6570
cg);
6571
generateLabelInstruction(TR::InstOpCode::JAE4, node, prefetchSnippetLabel, cg);
6572
}
6573
6574
generateLabelInstruction(TR::InstOpCode::label, node, restartLabel, cg);
6575
}
6576
6577
#else // J9VM_GC_THREAD_LOCAL_HEAP
6578
generateMemRegInstruction(TR::InstOpCode::CMPXCHGMemReg(), node, generateX86MemoryReference(vmThreadReg, heapAlloc_offset, cg), tempReg, cg);
6579
generateLabelInstruction(TR::InstOpCode::JNE4, node, loopLabel, cg);
6580
#endif // !J9VM_GC_THREAD_LOCAL_HEAP
6581
}
6582
}
6583
6584
// ------------------------------------------------------------------------------
6585
// genHeapAlloc2
6586
//
6587
// Will eventually become the de facto genHeapAlloc. Needs packed array and 2TLH
6588
// support.
6589
// ------------------------------------------------------------------------------
6590
6591
static void genHeapAlloc2(
6592
TR::Node *node,
6593
TR_OpaqueClassBlock *clazz,
6594
int32_t allocationSizeOrDataOffset,
6595
int32_t elementSize,
6596
TR::Register *sizeReg,
6597
TR::Register *eaxReal,
6598
TR::Register *segmentReg,
6599
TR::Register *tempReg,
6600
TR::LabelSymbol *failLabel,
6601
TR::CodeGenerator *cg)
6602
{
6603
// Load the current heap segment and see if there is room in it. Loop if
6604
// we can't get the lock on the segment.
6605
//
6606
TR::Compilation *comp = cg->comp();
6607
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
6608
TR::Register *vmThreadReg = cg->getVMThreadRegister();
6609
bool generateArraylets = comp->generateArraylets();
6610
bool isTooSmallToPrefetch = false;
6611
6612
{
6613
bool shouldAlignToCacheBoundary = false;
6614
6615
// Load the base of the next available heap storage. This load is done speculatively on the assumption that the
6616
// allocation will be inlined. If the assumption turns out to be false then the performance impact should be minimal
6617
// because the helper will be called in that case. It is necessary to insert this load here so that it dominates all
6618
// control paths through this internal control flow region.
6619
//
6620
6621
if (sizeReg)
6622
{
6623
6624
// -------------
6625
//
6626
// VARIABLE SIZE
6627
//
6628
// -------------
6629
6630
// The GC will guarantee that at least 'maxObjectSizeGuaranteedNotToOverflow' bytes
6631
// of slush will exist between the top of the heap and the end of the address space.
6632
//
6633
uintptr_t maxObjectSize = cg->getMaxObjectSizeGuaranteedNotToOverflow();
6634
uintptr_t maxObjectSizeInElements = maxObjectSize / elementSize;
6635
6636
if (cg->comp()->target().is64Bit() && !(maxObjectSizeInElements > 0 && maxObjectSizeInElements <= (uintptr_t)INT_MAX))
6637
{
6638
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, segmentReg, maxObjectSizeInElements, cg);
6639
generateRegRegInstruction(TR::InstOpCode::CMP8RegReg, node, sizeReg, segmentReg, cg);
6640
}
6641
else
6642
{
6643
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, sizeReg, (int32_t)maxObjectSizeInElements, cg);
6644
}
6645
6646
// Must be an unsigned comparison on sizes.
6647
//
6648
generateLabelInstruction(TR::InstOpCode::JAE4, node, failLabel, cg);
6649
6650
6651
generateRegMemInstruction(TR::InstOpCode::LRegMem(),
6652
node,
6653
eaxReal,
6654
generateX86MemoryReference(vmThreadReg,
6655
offsetof(J9VMThread, heapAlloc), cg), cg);
6656
6657
6658
// calculate variable size, rounding up if necessary to a intptr_t multiple boundary
6659
//
6660
int32_t round; // zero indicates no rounding is necessary
6661
6662
if (!generateArraylets)
6663
{
6664
// TR_ASSERT(allocationSizeOrDataOffset % fej9->getObjectAlignmentInBytes() == 0, "Array header size of %d is not a multiple of %d", allocationSizeOrDataOffset, fej9->getObjectAlignmentInBytes());
6665
}
6666
6667
round = (elementSize >= TR::Compiler->om.getObjectAlignmentInBytes())? 0 : TR::Compiler->om.getObjectAlignmentInBytes();
6668
int32_t disp32 = round ? (round-1) : 0;
6669
6670
/*
6671
mov rcx, rdx ; # of array elements (1)
6672
cmp rcx, 1 (1)
6673
adc rcx, 0 ; adjust for zero length (1)
6674
6675
shl rcx, 2 (1)
6676
add rcx, 0xf ; rcx + header (8) + 7 (1)
6677
6678
and rcx,0xfffffffffffffff8 ; round down (1)
6679
*/
6680
6681
generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, segmentReg, sizeReg, cg);
6682
6683
// Artificially adjust the number of elements by 1 if the array is zero length. This works
6684
// because either the array is zero length and needs a discontiguous array length field
6685
// (occupying a slot) or it has at least 1 element which will take up a slot anyway.
6686
//
6687
// Native 64-bit array headers do not need this adjustment because the
6688
// contiguous and discontiguous array headers are the same size.
6689
//
6690
if (cg->comp()->target().is32Bit() || (cg->comp()->target().is64Bit() && comp->useCompressedPointers()))
6691
{
6692
generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, segmentReg, 1, cg);
6693
generateRegImmInstruction(TR::InstOpCode::ADC4RegImm4, node, segmentReg, 0, cg);
6694
}
6695
6696
uint8_t shiftVal = TR::MemoryReference::convertMultiplierToStride(elementSize);
6697
if (shiftVal > 0)
6698
{
6699
generateRegImmInstruction(TR::InstOpCode::SHLRegImm1(), node, segmentReg, shiftVal, cg);
6700
}
6701
6702
generateRegImmInstruction(TR::InstOpCode::ADDRegImm4(), node, segmentReg, allocationSizeOrDataOffset+disp32, cg);
6703
6704
if (round)
6705
{
6706
generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, segmentReg, -round, cg);
6707
}
6708
6709
// Copy full object size in bytes to RCX for zero init via REP TR::InstOpCode::STOSQ
6710
//
6711
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, tempReg, segmentReg, cg);
6712
6713
generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, segmentReg, eaxReal, cg);
6714
}
6715
else
6716
{
6717
// ----------
6718
//
6719
// FIXED SIZE
6720
//
6721
// ----------
6722
6723
generateRegMemInstruction(TR::InstOpCode::LRegMem(),
6724
node,
6725
eaxReal,
6726
generateX86MemoryReference(vmThreadReg,
6727
offsetof(J9VMThread, heapAlloc), cg), cg);
6728
6729
if (comp->getOptLevel() < hot)
6730
isTooSmallToPrefetch = allocationSizeOrDataOffset <= 0x40 ? true : false;
6731
6732
allocationSizeOrDataOffset = (allocationSizeOrDataOffset+TR::Compiler->om.getObjectAlignmentInBytes()-1) & (-TR::Compiler->om.getObjectAlignmentInBytes());
6733
6734
#if defined(J9VM_GC_THREAD_LOCAL_HEAP)
6735
if ((node->getOpCodeValue() == TR::New) &&
6736
(comp->getMethodHotness() >= hot || node->shouldAlignTLHAlloc()))
6737
{
6738
TR_OpaqueMethodBlock *ownMethod = node->getOwningMethod();
6739
6740
TR::Node *classChild = node->getFirstChild();
6741
char * className = NULL;
6742
TR_OpaqueClassBlock *clazz = NULL;
6743
6744
if (classChild &&
6745
classChild->getSymbolReference() &&
6746
!classChild->getSymbolReference()->isUnresolved())
6747
{
6748
TR::SymbolReference *symRef = classChild->getSymbolReference();
6749
TR::Symbol *sym = symRef->getSymbol();
6750
6751
if (sym &&
6752
sym->getKind() == TR::Symbol::IsStatic &&
6753
sym->isClassObject())
6754
{
6755
TR::StaticSymbol * staticSym = symRef->getSymbol()->castToStaticSymbol();
6756
void * staticAddress = staticSym->getStaticAddress();
6757
if (symRef->getCPIndex() >= 0)
6758
{
6759
if (!staticSym->addressIsCPIndexOfStatic() && staticAddress)
6760
{
6761
int32_t len;
6762
className = TR::Compiler->cls.classNameChars(comp, symRef, len);
6763
clazz = (TR_OpaqueClassBlock *)staticAddress;
6764
}
6765
}
6766
}
6767
}
6768
6769
uint32_t instanceSizeForAlignment = 30;
6770
static char *p= feGetEnv("TR_AlignInstanceSize");
6771
if (p)
6772
instanceSizeForAlignment = atoi(p);
6773
6774
if ((comp->getMethodHotness() >= hot) && clazz &&
6775
!cg->getCurrentEvaluationBlock()->isCold() &&
6776
TR::Compiler->cls.classInstanceSize(clazz)>=instanceSizeForAlignment)
6777
{
6778
shouldAlignToCacheBoundary = true;
6779
6780
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, eaxReal,
6781
generateX86MemoryReference(eaxReal, 63, cg), cg);
6782
generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, eaxReal, 0xFFFFFFC0, cg);
6783
}
6784
}
6785
#endif // J9VM_GC_THREAD_LOCAL_HEAP
6786
6787
if ((uint32_t)allocationSizeOrDataOffset > cg->getMaxObjectSizeGuaranteedNotToOverflow())
6788
{
6789
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, segmentReg, eaxReal, cg);
6790
if (allocationSizeOrDataOffset <= 127)
6791
generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, segmentReg, allocationSizeOrDataOffset, cg);
6792
else if (allocationSizeOrDataOffset == 128)
6793
generateRegImmInstruction(TR::InstOpCode::SUBRegImms(), node, segmentReg, (unsigned)-128, cg);
6794
else
6795
generateRegImmInstruction(TR::InstOpCode::ADDRegImm4(), node, segmentReg, allocationSizeOrDataOffset, cg);
6796
6797
// Check for overflow
6798
generateLabelInstruction(TR::InstOpCode::JB4, node, failLabel, cg);
6799
}
6800
else
6801
{
6802
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, segmentReg,
6803
generateX86MemoryReference(eaxReal, allocationSizeOrDataOffset, cg), cg);
6804
}
6805
}
6806
6807
6808
// -----------
6809
// MERGED PATH
6810
// -----------
6811
6812
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(),
6813
node,
6814
segmentReg,
6815
generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapTop), cg), cg);
6816
6817
generateLabelInstruction(TR::InstOpCode::JA4, node, failLabel, cg);
6818
6819
// ------------
6820
// 1st PREFETCH
6821
// ------------
6822
6823
if (!isTooSmallToPrefetch)
6824
generateMemInstruction(TR::InstOpCode::PREFETCHNTA, node, generateX86MemoryReference(segmentReg, 0xc0, cg), cg);
6825
6826
if (shouldAlignToCacheBoundary)
6827
{
6828
// Alignment to a cache line boundary may require inserting more padding than is normally
6829
// necessary to achieve the alignment. In those cases, insert GC dark matter to describe
6830
// the space inserted.
6831
//
6832
6833
generateRegInstruction(TR::InstOpCode::PUSHReg, node, segmentReg, cg);
6834
generateRegMemInstruction(TR::InstOpCode::LRegMem(),
6835
node,
6836
segmentReg,
6837
generateX86MemoryReference(vmThreadReg,
6838
offsetof(J9VMThread, heapAlloc), cg), cg);
6839
6840
generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, eaxReal, segmentReg, cg);
6841
6842
TR::LabelSymbol *doneAlignLabel = generateLabelSymbol(cg);
6843
TR::LabelSymbol *multiSlotGapLabel = generateLabelSymbol(cg);
6844
6845
generateRegImmInstruction(TR::InstOpCode::CMPRegImms(), node, eaxReal, sizeof(uintptr_t), cg);
6846
generateLabelInstruction(TR::InstOpCode::JB4, node, doneAlignLabel, cg);
6847
generateLabelInstruction(TR::InstOpCode::JA4, node, multiSlotGapLabel, cg);
6848
6849
int32_t singleSlotHole;
6850
6851
singleSlotHole = J9_GC_SINGLE_SLOT_HOLE;
6852
6853
if (cg->comp()->target().is64Bit() && fej9->generateCompressedLockWord())
6854
{
6855
generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node,
6856
generateX86MemoryReference(segmentReg, 0, cg), singleSlotHole, cg);
6857
generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node,
6858
generateX86MemoryReference(segmentReg, 4, cg), singleSlotHole, cg);
6859
}
6860
else
6861
{
6862
generateMemImmInstruction(
6863
TR::InstOpCode::SMemImm4(), node,
6864
generateX86MemoryReference(segmentReg, 0, cg), singleSlotHole, cg);
6865
}
6866
6867
generateLabelInstruction(TR::InstOpCode::JMP4, node, doneAlignLabel, cg);
6868
generateLabelInstruction(TR::InstOpCode::label, node, multiSlotGapLabel, cg);
6869
6870
int32_t multiSlotHole;
6871
6872
multiSlotHole = J9_GC_MULTI_SLOT_HOLE;
6873
6874
generateMemImmInstruction(
6875
TR::InstOpCode::SMemImm4(), node,
6876
generateX86MemoryReference(segmentReg, 0, cg),
6877
multiSlotHole, cg);
6878
6879
generateMemRegInstruction(
6880
TR::InstOpCode::SMemReg(), node,
6881
generateX86MemoryReference(segmentReg, sizeof(uintptr_t), cg),
6882
eaxReal, cg);
6883
6884
generateLabelInstruction(TR::InstOpCode::label, node, doneAlignLabel, cg);
6885
generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, eaxReal, segmentReg, cg);
6886
generateRegInstruction(TR::InstOpCode::POPReg, node, segmentReg, cg);
6887
}
6888
6889
// Make sure that the arraylet is aligned properly.
6890
//
6891
if (generateArraylets && (node->getOpCodeValue() == TR::anewarray || node->getOpCodeValue() == TR::newarray) )
6892
{
6893
generateRegMemInstruction(TR::InstOpCode::LEARegMem(),node,segmentReg, generateX86MemoryReference(tempReg,TR::Compiler->om.getObjectAlignmentInBytes()-1,cg),cg);
6894
if (cg->comp()->target().is64Bit())
6895
generateRegImmInstruction(TR::InstOpCode::AND8RegImm4,node,segmentReg,-TR::Compiler->om.getObjectAlignmentInBytes(),cg);
6896
else
6897
generateRegImmInstruction(TR::InstOpCode::AND4RegImm4,node,segmentReg,-TR::Compiler->om.getObjectAlignmentInBytes(),cg);
6898
}
6899
6900
generateMemRegInstruction(TR::InstOpCode::SMemReg(),
6901
node,
6902
generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, heapAlloc), cg),
6903
segmentReg, cg);
6904
6905
if (!isTooSmallToPrefetch && node->getOpCodeValue() != TR::New)
6906
{
6907
// ------------
6908
// 2nd PREFETCH
6909
// ------------
6910
generateMemInstruction(TR::InstOpCode::PREFETCHNTA, node, generateX86MemoryReference(segmentReg, 0x100, cg), cg);
6911
6912
// ------------
6913
// 3rd PREFETCH
6914
// ------------
6915
generateMemInstruction(TR::InstOpCode::PREFETCHNTA, node, generateX86MemoryReference(segmentReg, 0x140, cg), cg);
6916
6917
// ------------
6918
// 4th PREFETCH
6919
// ------------
6920
generateMemInstruction(TR::InstOpCode::PREFETCHNTA, node, generateX86MemoryReference(segmentReg, 0x180, cg), cg);
6921
}
6922
}
6923
}
6924
6925
// Generate the code to initialize an object header - used for both new and
6926
// array new
6927
//
6928
static void genInitObjectHeader(TR::Node *node,
6929
TR_OpaqueClassBlock *clazz,
6930
TR::Register *classReg,
6931
TR::Register *objectReg,
6932
TR::Register *tempReg,
6933
bool isZeroInitialized,
6934
bool isDynamicAllocation,
6935
TR::CodeGenerator *cg)
6936
{
6937
TR::Compilation *comp = cg->comp();
6938
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
6939
6940
bool use64BitClasses = comp->target().is64Bit() &&
6941
(!TR::Compiler->om.generateCompressedObjectHeaders() ||
6942
(comp->compileRelocatableCode() && comp->getOption(TR_UseSymbolValidationManager)));
6943
6944
TR_ASSERT((isDynamicAllocation || clazz), "Cannot have a null clazz while not doing dynamic array allocation\n");
6945
6946
// --------------------------------------------------------------------------------
6947
//
6948
// Initialize CLASS field
6949
//
6950
// --------------------------------------------------------------------------------
6951
//
6952
TR::InstOpCode::Mnemonic opSMemReg = TR::InstOpCode::SMemReg(use64BitClasses);
6953
6954
TR::Register * clzReg = classReg;
6955
6956
// For dynamic array allocation, load the array class from the component class and store into clzReg
6957
if (isDynamicAllocation)
6958
{
6959
TR_ASSERT((node->getOpCodeValue() == TR::anewarray), "Dynamic allocation currently only supports reference arrays");
6960
TR_ASSERT(classReg, "must have a classReg for dynamic allocation");
6961
clzReg = tempReg;
6962
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, clzReg, generateX86MemoryReference(classReg, offsetof(J9Class, arrayClass), cg), cg);
6963
}
6964
// TODO: should be able to use a TR_ClassPointer relocation without this stuff (along with class validation)
6965
else if (cg->needClassAndMethodPointerRelocations() && !comp->getOption(TR_UseSymbolValidationManager))
6966
{
6967
TR::Register *vmThreadReg = cg->getVMThreadRegister();
6968
if (node->getOpCodeValue() == TR::newarray)
6969
{
6970
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node,tempReg,
6971
generateX86MemoryReference(vmThreadReg,offsetof(J9VMThread, javaVM), cg), cg);
6972
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg,
6973
generateX86MemoryReference(tempReg,
6974
offsetof(J9JavaVM, booleanArrayClass)+(node->getSecondChild()->getInt()-4)*sizeof(J9Class*), cg), cg);
6975
// tempReg should contain a 32 bit pointer.
6976
generateMemRegInstruction(opSMemReg, node,
6977
generateX86MemoryReference(objectReg, TR::Compiler->om.offsetOfObjectVftField(), cg),
6978
tempReg, cg);
6979
clzReg = tempReg;
6980
}
6981
else
6982
{
6983
TR_ASSERT((node->getOpCodeValue() == TR::New)
6984
&& classReg, "must have a classReg for TR::New in AOT mode");
6985
clzReg = classReg;
6986
}
6987
}
6988
6989
6990
// For RealTime Code Only.
6991
int32_t orFlags = 0;
6992
int32_t orFlagsClass = 0;
6993
6994
if (!clzReg)
6995
{
6996
TR::Instruction *instr = NULL;
6997
if (use64BitClasses)
6998
{
6999
if (cg->needClassAndMethodPointerRelocations() && comp->getOption(TR_UseSymbolValidationManager))
7000
instr = generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg, ((intptr_t)clazz|orFlagsClass), cg, TR_ClassPointer);
7001
else
7002
instr = generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg, ((intptr_t)clazz|orFlagsClass), cg);
7003
generateMemRegInstruction(TR::InstOpCode::S8MemReg, node, generateX86MemoryReference(objectReg, TR::Compiler->om.offsetOfObjectVftField(), cg), tempReg, cg);
7004
}
7005
else
7006
{
7007
instr = generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, generateX86MemoryReference(objectReg, TR::Compiler->om.offsetOfObjectVftField(), cg), (int32_t)((uintptr_t)clazz|orFlagsClass), cg);
7008
}
7009
7010
// HCR in genInitObjectHeader
7011
if (instr && cg->wantToPatchClassPointer(clazz, node))
7012
comp->getStaticHCRPICSites()->push_front(instr);
7013
}
7014
else
7015
{
7016
if (orFlagsClass != 0)
7017
generateRegImmInstruction(use64BitClasses ? TR::InstOpCode::OR8RegImm4 : TR::InstOpCode::OR4RegImm4, node, clzReg, orFlagsClass, cg);
7018
generateMemRegInstruction(opSMemReg, node,
7019
generateX86MemoryReference(objectReg, TR::Compiler->om.offsetOfObjectVftField(), cg), clzReg, cg);
7020
}
7021
7022
// --------------------------------------------------------------------------------
7023
//
7024
// Initialize FLAGS field
7025
//
7026
// --------------------------------------------------------------------------------
7027
//
7028
7029
// Collect the flags to be OR'd in that are known at compile time.
7030
//
7031
7032
#ifndef J9VM_INTERP_FLAGS_IN_CLASS_SLOT
7033
// Enable macro once GC-Helper is fixed
7034
J9ROMClass *romClass = TR::Compiler->cls.romClassOf(clazz);
7035
if (romClass)
7036
{
7037
orFlags |= romClass->instanceShape;
7038
orFlags |= fej9->getStaticObjectFlags();
7039
7040
#if defined(J9VM_OPT_NEW_OBJECT_HASH)
7041
// put orFlags or 0 into header if needed
7042
generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node,
7043
generateX86MemoryReference(objectReg, TMP_OFFSETOF_J9OBJECT_FLAGS, cg),
7044
orFlags, cg);
7045
7046
#endif /* !J9VM_OPT_NEW_OBJECT_HASH */
7047
}
7048
#endif /* FLAGS_IN_CLASS_SLOT */
7049
7050
// --------------------------------------------------------------------------------
7051
//
7052
// Initialize MONITOR field
7053
//
7054
// --------------------------------------------------------------------------------
7055
//
7056
// For dynamic array allocation, in case (very unlikely) the object array has a lock word, we just initialized it to 0 conservatively.
7057
// In this case, if the original array is reserved, initializing the cloned object's lock word to 0 will force the
7058
// locking to go to the slow locking path.
7059
if (isDynamicAllocation)
7060
{
7061
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
7062
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg, generateX86MemoryReference(clzReg, offsetof(J9ArrayClass, lockOffset), cg), cg);
7063
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, tempReg, (int32_t)-1, cg);
7064
generateLabelInstruction (TR::InstOpCode::JE4, node, doneLabel, cg);
7065
generateMemImmInstruction(TR::InstOpCode::SMemImm4(comp->target().is64Bit() && !fej9->generateCompressedLockWord()),
7066
node, generateX86MemoryReference(objectReg, tempReg, 0, cg), 0, cg);
7067
generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, cg);
7068
}
7069
else
7070
{
7071
bool initReservable = TR::Compiler->cls.classFlagReservableWordInitValue(clazz);
7072
if (!isZeroInitialized || initReservable)
7073
{
7074
bool initLw = (node->getOpCodeValue() != TR::New) || initReservable;
7075
int lwOffset = fej9->getByteOffsetToLockword(clazz);
7076
if (lwOffset == -1)
7077
initLw = false;
7078
7079
if (initLw)
7080
{
7081
int32_t initialLwValue = 0;
7082
if (initReservable)
7083
initialLwValue = OBJECT_HEADER_LOCK_RESERVED;
7084
7085
generateMemImmInstruction(TR::InstOpCode::SMemImm4(comp->target().is64Bit() && !fej9->generateCompressedLockWord()),
7086
node, generateX86MemoryReference(objectReg, lwOffset, cg), initialLwValue, cg);
7087
}
7088
}
7089
}
7090
}
7091
7092
7093
// Generate the code to initialize an array object header
7094
//
7095
static void genInitArrayHeader(
7096
TR::Node *node,
7097
TR_OpaqueClassBlock *clazz,
7098
TR::Register *classReg,
7099
TR::Register *objectReg,
7100
TR::Register *sizeReg,
7101
int32_t elementSize,
7102
int32_t arrayletDataOffset,
7103
TR::Register *tempReg,
7104
bool isZeroInitialized,
7105
bool isDynamicAllocation,
7106
bool shouldInitZeroSizedArrayHeader,
7107
TR::CodeGenerator *cg)
7108
{
7109
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
7110
7111
// Initialize the object header
7112
//
7113
genInitObjectHeader(node, clazz, classReg, objectReg, tempReg, isZeroInitialized, isDynamicAllocation, cg);
7114
7115
int32_t arraySizeOffset = fej9->getOffsetOfContiguousArraySizeField();
7116
7117
TR::MemoryReference *arraySizeMR = generateX86MemoryReference(objectReg, arraySizeOffset, cg);
7118
// Special handling of zero sized arrays.
7119
// Zero length arrays are discontiguous (i.e. they also need the discontiguous length field to be 0) because
7120
// they are indistinguishable from non-zero length discontiguous arrays. But instead of explicitly checking
7121
// for zero sized arrays we unconditionally store 0 in the third dword of the array object header. That is
7122
// safe because the 3rd dword is either array size of a zero sized array or will contain the first elements
7123
// of an array:
7124
// - Zero sized arrays have the following layout:
7125
// - The smallest array possible is a byte array with 1 element which would have a layout:
7126
// #bits per section (compressed refs): | 32 bits | 32 bits | 32 bits | 32 bits | 32 bits | 32 bits |
7127
// zero sized arrays: | class | mustBeZero | size | padding | dataAddr |
7128
// smallest contiguous array: | class | size | dataAddr | 1 byte + padding | other |
7129
// This also reflects the minimum object size which is 16 bytes.
7130
int32_t arrayDiscontiguousSizeOffset = fej9->getOffsetOfDiscontiguousArraySizeField();
7131
TR::MemoryReference *arrayDiscontiguousSizeMR = generateX86MemoryReference(objectReg, arrayDiscontiguousSizeOffset, cg);
7132
7133
TR::Compilation *comp = cg->comp();
7134
7135
bool canUseFastInlineAllocation =
7136
(!comp->getOptions()->realTimeGC() &&
7137
!comp->generateArraylets()) ? true : false;
7138
7139
// Initialize the array size
7140
//
7141
if (sizeReg)
7142
{
7143
// Variable size
7144
//
7145
if (canUseFastInlineAllocation)
7146
{
7147
// Native 64-bit needs to cover the discontiguous size field
7148
//
7149
TR::InstOpCode::Mnemonic storeOp = (comp->target().is64Bit() && !comp->useCompressedPointers()) ? TR::InstOpCode::S8MemReg : TR::InstOpCode::S4MemReg;
7150
generateMemRegInstruction(storeOp, node, arraySizeMR, sizeReg, cg);
7151
}
7152
else
7153
{
7154
generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, arraySizeMR, sizeReg, cg);
7155
}
7156
// Take care of zero sized arrays as they are discontiguous and not contiguous
7157
if (shouldInitZeroSizedArrayHeader)
7158
{
7159
generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, arrayDiscontiguousSizeMR, 0, cg);
7160
}
7161
}
7162
else
7163
{
7164
// Fixed size
7165
//
7166
int32_t instanceSize = 0;
7167
if (canUseFastInlineAllocation)
7168
{
7169
// Native 64-bit needs to cover the discontiguous size field
7170
//
7171
TR::InstOpCode::Mnemonic storeOp = (comp->target().is64Bit() && !comp->useCompressedPointers()) ? TR::InstOpCode::S8MemImm4 : TR::InstOpCode::S4MemImm4;
7172
instanceSize = node->getFirstChild()->getInt();
7173
generateMemImmInstruction(storeOp, node, arraySizeMR, instanceSize, cg);
7174
}
7175
else
7176
{
7177
instanceSize = node->getFirstChild()->getInt();
7178
generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, arraySizeMR, instanceSize, cg);
7179
}
7180
// Take care of zero sized arrays as they are discontiguous and not contiguous
7181
if (shouldInitZeroSizedArrayHeader && (instanceSize == 0))
7182
{
7183
generateMemImmInstruction(TR::InstOpCode::S4MemImm4, node, arrayDiscontiguousSizeMR, 0, cg);
7184
}
7185
}
7186
7187
bool generateArraylets = comp->generateArraylets();
7188
7189
if (generateArraylets)
7190
{
7191
// write arraylet pointer
7192
TR::InstOpCode::Mnemonic storeOp;
7193
7194
generateRegMemInstruction(
7195
TR::InstOpCode::LEARegMem(), node,
7196
tempReg,
7197
generateX86MemoryReference(objectReg, arrayletDataOffset, cg), cg);
7198
7199
if (comp->useCompressedPointers())
7200
{
7201
storeOp = TR::InstOpCode::S4MemReg;
7202
7203
// Compress the arraylet pointer.
7204
//
7205
if (TR::Compiler->om.compressedReferenceShiftOffset() > 0)
7206
generateRegImmInstruction(TR::InstOpCode::SHR8RegImm1, node, tempReg, TR::Compiler->om.compressedReferenceShiftOffset(), cg);
7207
}
7208
else
7209
{
7210
storeOp = TR::InstOpCode::SMemReg();
7211
}
7212
7213
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
7214
generateMemRegInstruction(storeOp, node, generateX86MemoryReference(objectReg, fej9->getFirstArrayletPointerOffset(comp), cg), tempReg, cg);
7215
}
7216
7217
}
7218
7219
7220
// ------------------------------------------------------------------------------
7221
// genZeroInitObject2
7222
// ------------------------------------------------------------------------------
7223
7224
static bool genZeroInitObject2(
7225
TR::Node *node,
7226
int32_t objectSize,
7227
int32_t elementSize,
7228
TR::Register *sizeReg,
7229
TR::Register *targetReg,
7230
TR::Register *tempReg,
7231
TR::Register *segmentReg,
7232
TR::Register *&scratchReg,
7233
TR::CodeGenerator *cg)
7234
{
7235
TR::Compilation *comp = cg->comp();
7236
7237
// set up clazz value here
7238
TR_OpaqueClassBlock *clazz = NULL;
7239
bool isArrayNew = (node->getOpCodeValue() != TR::New);
7240
comp->canAllocateInline(node, clazz);
7241
auto headerSize = isArrayNew ? TR::Compiler->om.contiguousArrayHeaderSizeInBytes() : TR::Compiler->om.objectHeaderSizeInBytes();
7242
// If we are using full refs both contiguous and discontiguous array header have the same size, in which case we must adjust header size
7243
// slightly so that rep stosb can initialize the size field of zero sized arrays appropriately
7244
// #bits per section (compressed refs): | 32 bits | 32 bits | 32 bits | 32 bits | 32 bits | 32 bits |
7245
// zero sized arrays: | class | mustBeZero | size | padding | dataAddr |
7246
// smallest contiguous array: | class | size | dataAddr | 1 byte + padding | other |
7247
// In order for us to successfully initialize the size field of a zero sized array in compressed refs
7248
// we must subtract 8 bytes (sizeof(dataAddr)) from header size. And in case of full refs we must
7249
// subtract 16 bytes from the header in order to properly initialize the zero sized field. We can
7250
// accomplish that by simply subtracting the offset of dataAddr field, which is 8 for compressed refs
7251
// and 16 for full refs.
7252
#if defined(TR_TARGET_64BIT)
7253
if (!cg->comp()->target().is32Bit() && isArrayNew)
7254
{
7255
headerSize -= static_cast<TR_J9VMBase *>(cg->fe())->getOffsetOfContiguousDataAddrField();
7256
}
7257
#endif /* TR_TARGET_64BIT */
7258
TR_ASSERT(headerSize >= 4, "Object/Array header must be >= 4.");
7259
objectSize -= headerSize;
7260
7261
if (!minRepstosdWords)
7262
{
7263
static char *p= feGetEnv("TR_MinRepstosdWords");
7264
if (p)
7265
minRepstosdWords = atoi(p);
7266
else
7267
minRepstosdWords = MIN_REPSTOSD_WORDS; // Use default value
7268
}
7269
7270
if (sizeReg || objectSize >= minRepstosdWords)
7271
{
7272
// Zero-initialize by using REP TR::InstOpCode::STOSB.
7273
//
7274
if (sizeReg)
7275
{
7276
// -------------
7277
//
7278
// VARIABLE SIZE
7279
//
7280
// -------------
7281
// Subtract off the header size and initialize the remaining slots.
7282
//
7283
generateRegImmInstruction(TR::InstOpCode::SUBRegImms(), node, tempReg, headerSize, cg);
7284
}
7285
else
7286
{
7287
// ----------
7288
// FIXED SIZE
7289
// ----------
7290
if (comp->target().is64Bit() && !IS_32BIT_SIGNED(objectSize))
7291
{
7292
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg, objectSize, cg);
7293
}
7294
else
7295
{
7296
generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, tempReg, objectSize, cg);
7297
}
7298
}
7299
7300
// -----------
7301
// Destination
7302
// -----------
7303
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, segmentReg, generateX86MemoryReference(targetReg, headerSize, cg), cg);
7304
if (comp->target().is64Bit())
7305
{
7306
scratchReg = cg->allocateRegister();
7307
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, scratchReg, targetReg, cg);
7308
}
7309
else
7310
{
7311
generateRegInstruction(TR::InstOpCode::PUSHReg, node, targetReg, cg);
7312
}
7313
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, targetReg, targetReg, cg);
7314
generateInstruction(TR::InstOpCode::REPSTOSB, node, cg);
7315
if (comp->target().is64Bit())
7316
{
7317
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, targetReg, scratchReg, cg);
7318
}
7319
else
7320
{
7321
generateRegInstruction(TR::InstOpCode::POPReg, node, targetReg, cg);
7322
}
7323
return true;
7324
}
7325
else if (objectSize > 0)
7326
{
7327
if (objectSize % 16 == 12)
7328
{
7329
// Zero-out header to avoid a 12-byte residue
7330
objectSize += 4;
7331
headerSize -= 4;
7332
}
7333
scratchReg = cg->allocateRegister(TR_FPR);
7334
generateRegRegInstruction(TR::InstOpCode::PXORRegReg, node, scratchReg, scratchReg, cg);
7335
int32_t offset = 0;
7336
while (objectSize >= 16)
7337
{
7338
generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, generateX86MemoryReference(targetReg, headerSize + offset, cg), scratchReg, cg);
7339
objectSize -= 16;
7340
offset += 16;
7341
}
7342
switch (objectSize)
7343
{
7344
case 8:
7345
generateMemRegInstruction(TR::InstOpCode::MOVQMemReg, node, generateX86MemoryReference(targetReg, headerSize + offset, cg), scratchReg, cg);
7346
break;
7347
case 4:
7348
generateMemRegInstruction(TR::InstOpCode::MOVDMemReg, node, generateX86MemoryReference(targetReg, headerSize + offset, cg), scratchReg, cg);
7349
break;
7350
case 0:
7351
break;
7352
default:
7353
TR_ASSERT(false, "residue size should only be 0, 4 or 8.");
7354
}
7355
return false;
7356
}
7357
else
7358
{
7359
return false;
7360
}
7361
}
7362
7363
7364
// Generate the code to initialize the data portion of an allocated object.
7365
// Zero-initialize the monitor slot in the header at the same time.
7366
// If "sizeReg" is non-null it contains the number of array elements and
7367
// "elementSize" contains the size of each element.
7368
// Otherwise the object size is in "objectSize".
7369
//
7370
static bool genZeroInitObject(
7371
TR::Node *node,
7372
int32_t objectSize,
7373
int32_t elementSize,
7374
TR::Register *sizeReg,
7375
TR::Register *targetReg,
7376
TR::Register *tempReg,
7377
TR::Register *segmentReg,
7378
TR::Register *&scratchReg,
7379
TR::CodeGenerator *cg)
7380
{
7381
// object header flags now occupy 4bytes on 64-bit
7382
TR::ILOpCodes opcode = node->getOpCodeValue();
7383
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
7384
TR::Compilation *comp = cg->comp();
7385
7386
bool isArrayNew = (opcode != TR::New) ;
7387
TR_OpaqueClassBlock *clazz = NULL;
7388
7389
// set up clazz value here
7390
comp->canAllocateInline(node, clazz);
7391
7392
int32_t numSlots = 0;
7393
int32_t startOfZeroInits = isArrayNew ? TR::Compiler->om.contiguousArrayHeaderSizeInBytes() : TR::Compiler->om.objectHeaderSizeInBytes();
7394
7395
if (comp->target().is64Bit())
7396
{
7397
// round down to the nearest word size
7398
TR_ASSERT(startOfZeroInits < 0xF8, "expecting start of zero inits to be the size of the header");
7399
startOfZeroInits &= 0xF8;
7400
}
7401
7402
numSlots = (int32_t)((objectSize - startOfZeroInits)/TR::Compiler->om.sizeofReferenceAddress());
7403
7404
bool generateArraylets = comp->generateArraylets();
7405
7406
int32_t i;
7407
7408
7409
// *** old object header ***
7410
// since i'm always confused,
7411
// here is the layout of an object
7412
//
7413
// #if defined(J9VM_THR_LOCK_NURSERY)
7414
//
7415
// on 32-bit
7416
// for an indexable object [header = 4 or 3 slots]
7417
// #if defined(J9VM_THR_LOCK_NURSERY_FAT_ARRAYS)
7418
// --clazz-- --flags-- --monitor-- --size-- <--data-->
7419
// #else
7420
// --clazz-- --flags-- --size-- <--data-->
7421
// #endif
7422
//
7423
// for a non-indexable object (if the object has sync methods, monitor
7424
// slot is part of the data slots) [header = 2 slots]
7425
// --clazz-- --flags-- <--data-->
7426
//
7427
// on 64-bit
7428
// for an indexable object [header = 3 or 2 slots]
7429
// #if defined(J9VM_THR_LOCK_NURSERY_FAT_ARRAYS)
7430
// --clazz-- --flags+size-- --monitor-- <--data-->
7431
// #else
7432
// --clazz-- --flags+size-- <--data-->
7433
// #endif
7434
//
7435
// for a non-indexable object [header = 2 slots]
7436
// --clazz-- --flags-- <--data-->
7437
//
7438
// #else
7439
//
7440
// on 32-bit
7441
// for an indexable object [header = 4 slots]
7442
// --clazz-- --flags-- --monitor-- --size-- <--data-->
7443
//
7444
// for a non-indexable object [header = 3 slots]
7445
// --clazz-- --flags-- --monitor-- <--data-->
7446
//
7447
// on 64-bit
7448
// for an indexable object [header = 3 slots]
7449
// --clazz-- --flags+size-- --monitor-- <--data-->
7450
//
7451
// for a non-indexable object [header = 3 slots]
7452
// --clazz-- --flags-- --monitor-- <--data-->
7453
//
7454
// #endif
7455
//
7456
// Packed Objects adds two more fields,
7457
//
7458
7459
if (!minRepstosdWords)
7460
{
7461
static char *p= feGetEnv("TR_MinRepstosdWords");
7462
if (p)
7463
minRepstosdWords = atoi(p);
7464
else
7465
minRepstosdWords = MIN_REPSTOSD_WORDS; // Use default value
7466
}
7467
7468
int32_t alignmentDelta = 0; // for aligning properly to get best performance from REP TR::InstOpCode::STOSD/TR::InstOpCode::STOSQ
7469
7470
if (sizeReg || (numSlots + alignmentDelta) >= minRepstosdWords)
7471
{
7472
// Zero-initialize by using REP TR::InstOpCode::STOSD/TR::InstOpCode::STOSQ.
7473
//
7474
// startOffset will be monitorSlot only for arrays
7475
7476
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, segmentReg, generateX86MemoryReference(targetReg, startOfZeroInits, cg), cg);
7477
7478
if (sizeReg)
7479
{
7480
int32_t additionalSlots = 0;
7481
7482
if (generateArraylets)
7483
{
7484
additionalSlots++;
7485
if (elementSize > sizeof(UDATA))
7486
additionalSlots++;
7487
}
7488
7489
switch (elementSize)
7490
{
7491
// Calculate the number of slots by rounding up to number of words,
7492
// adding in partialHeaderSize.adding in partialHeaderSize.
7493
//
7494
case 1:
7495
if (comp->target().is64Bit())
7496
{
7497
generateRegMemInstruction(TR::InstOpCode::LEA8RegMem, node, tempReg, generateX86MemoryReference(sizeReg, (additionalSlots*8)+7, cg), cg);
7498
generateRegImmInstruction(TR::InstOpCode::SHR8RegImm1, node, tempReg, 3, cg);
7499
}
7500
else
7501
{
7502
generateRegMemInstruction(TR::InstOpCode::LEA4RegMem, node, tempReg, generateX86MemoryReference(sizeReg, (additionalSlots*4)+3, cg), cg);
7503
generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, tempReg, 2, cg);
7504
}
7505
break;
7506
case 2:
7507
if (comp->target().is64Bit())
7508
{
7509
generateRegMemInstruction(TR::InstOpCode::LEA8RegMem, node, tempReg, generateX86MemoryReference(sizeReg, (additionalSlots*4)+3, cg), cg);
7510
generateRegImmInstruction(TR::InstOpCode::SHR8RegImm1, node, tempReg, 2, cg);
7511
}
7512
else
7513
{
7514
generateRegMemInstruction(TR::InstOpCode::LEA4RegMem, node, tempReg, generateX86MemoryReference(sizeReg, (additionalSlots*2)+1, cg), cg);
7515
generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, tempReg, 1, cg);
7516
}
7517
break;
7518
case 4:
7519
if (comp->target().is64Bit())
7520
{
7521
generateRegMemInstruction(TR::InstOpCode::LEA8RegMem, node, tempReg, generateX86MemoryReference(sizeReg, (additionalSlots*2)+1, cg), cg);
7522
generateRegImmInstruction(TR::InstOpCode::SHR8RegImm1, node, tempReg, 1, cg);
7523
}
7524
else
7525
{
7526
generateRegMemInstruction(TR::InstOpCode::LEA4RegMem, node, tempReg,
7527
generateX86MemoryReference(sizeReg, additionalSlots, cg), cg);
7528
}
7529
break;
7530
case 8:
7531
if (comp->target().is64Bit())
7532
{
7533
generateRegMemInstruction(TR::InstOpCode::LEA8RegMem, node, tempReg,
7534
generateX86MemoryReference(sizeReg, additionalSlots, cg), cg);
7535
}
7536
else
7537
{
7538
generateRegMemInstruction(TR::InstOpCode::LEA4RegMem, node, tempReg,
7539
generateX86MemoryReference(NULL, sizeReg,
7540
TR::MemoryReference::convertMultiplierToStride(2),
7541
additionalSlots, cg), cg);
7542
}
7543
break;
7544
}
7545
}
7546
else
7547
{
7548
// Fixed size
7549
//
7550
generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, tempReg, numSlots + alignmentDelta, cg);
7551
if (comp->target().is64Bit())
7552
{
7553
// TODO AMD64: replace both instructions with a LEA tempReg, [disp32]
7554
//
7555
generateRegRegInstruction(TR::InstOpCode::MOVSXReg8Reg4, node, tempReg, tempReg, cg);
7556
}
7557
}
7558
7559
if (comp->target().is64Bit())
7560
{
7561
scratchReg = cg->allocateRegister();
7562
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, scratchReg, targetReg, cg);
7563
}
7564
else
7565
{
7566
generateRegInstruction(TR::InstOpCode::PUSHReg, node, targetReg, cg);
7567
}
7568
7569
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, targetReg, targetReg, cg);
7570
7571
// We just pushed targetReg on the stack and zeroed it out. targetReg contained the address of the
7572
// beginning of the header. We want to use the 0-reg to initialize the monitor slot, so we use
7573
// segmentReg, which points to targetReg+startOfZeroInits and subtract the extra offset.
7574
7575
bool initLw = (node->getOpCodeValue() != TR::New);
7576
int lwOffset = fej9->getByteOffsetToLockword(clazz);
7577
initLw = false;
7578
7579
if (initLw)
7580
{
7581
TR::InstOpCode::Mnemonic op = (comp->target().is64Bit() && fej9->generateCompressedLockWord()) ? TR::InstOpCode::S4MemReg : TR::InstOpCode::SMemReg();
7582
generateMemRegInstruction(op, node, generateX86MemoryReference(segmentReg, lwOffset-startOfZeroInits, cg), targetReg, cg);
7583
}
7584
7585
TR::InstOpCode::Mnemonic op = comp->target().is64Bit() ? TR::InstOpCode::REPSTOSQ : TR::InstOpCode::REPSTOSD;
7586
generateInstruction(op, node, cg);
7587
7588
if (comp->target().is64Bit())
7589
{
7590
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, targetReg, scratchReg, cg);
7591
}
7592
else
7593
{
7594
generateRegInstruction(TR::InstOpCode::POPReg, node, targetReg, cg);
7595
}
7596
7597
return true;
7598
}
7599
7600
if (numSlots > 0)
7601
{
7602
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, tempReg, tempReg, cg);
7603
7604
bool initLw = (node->getOpCodeValue() != TR::New);
7605
int lwOffset = fej9->getByteOffsetToLockword(clazz);
7606
initLw = false;
7607
7608
if (initLw)
7609
{
7610
TR::InstOpCode::Mnemonic op = (comp->target().is64Bit() && fej9->generateCompressedLockWord()) ? TR::InstOpCode::S4MemReg : TR::InstOpCode::SMemReg();
7611
generateMemRegInstruction(op, node, generateX86MemoryReference(targetReg, lwOffset, cg), tempReg, cg);
7612
}
7613
}
7614
else
7615
{
7616
bool initLw = (node->getOpCodeValue() != TR::New);
7617
int lwOffset = fej9->getByteOffsetToLockword(clazz);
7618
initLw = false;
7619
7620
if (initLw)
7621
{
7622
TR::InstOpCode::Mnemonic op = (comp->target().is64Bit() && fej9->generateCompressedLockWord()) ? TR::InstOpCode::S4MemImm4 : TR::InstOpCode::SMemImm4();
7623
generateMemImmInstruction(op, node, generateX86MemoryReference(targetReg, lwOffset, cg), 0, cg);
7624
}
7625
return false;
7626
}
7627
7628
int32_t numIterations = numSlots/maxZeroInitWordsPerIteration;
7629
if (numIterations > 1)
7630
{
7631
// Generate the initializations in a loop
7632
//
7633
int32_t numLoopSlots = numIterations*maxZeroInitWordsPerIteration;
7634
int32_t endOffset;
7635
7636
endOffset = (int32_t)(numLoopSlots*TR::Compiler->om.sizeofReferenceAddress() + startOfZeroInits);
7637
7638
generateRegImmInstruction(TR::InstOpCode::MOVRegImm4(), node, segmentReg, -((numIterations-1)*maxZeroInitWordsPerIteration), cg);
7639
7640
if (comp->target().is64Bit())
7641
generateRegRegInstruction(TR::InstOpCode::MOVSXReg8Reg4, node, segmentReg, segmentReg, cg);
7642
7643
TR::LabelSymbol *loopLabel = generateLabelSymbol(cg);
7644
generateLabelInstruction(TR::InstOpCode::label, node, loopLabel, cg);
7645
for (i = maxZeroInitWordsPerIteration; i > 0; i--)
7646
{
7647
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node,
7648
generateX86MemoryReference(targetReg,
7649
segmentReg,
7650
TR::MemoryReference::convertMultiplierToStride((int32_t)TR::Compiler->om.sizeofReferenceAddress()),
7651
endOffset - TR::Compiler->om.sizeofReferenceAddress()*i, cg),
7652
tempReg, cg);
7653
}
7654
generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, segmentReg, maxZeroInitWordsPerIteration, cg);
7655
generateLabelInstruction(TR::InstOpCode::JLE4, node, loopLabel, cg);
7656
7657
// Generate the left-over initializations
7658
//
7659
for (i = 0; i < numSlots % maxZeroInitWordsPerIteration; i++)
7660
{
7661
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node,
7662
generateX86MemoryReference(targetReg,
7663
endOffset+TR::Compiler->om.sizeofReferenceAddress()*i, cg),
7664
tempReg, cg);
7665
}
7666
}
7667
else
7668
{
7669
// Generate the initializations inline
7670
//
7671
for (i = 0; i < numSlots; i++)
7672
{
7673
// Don't bother initializing the array-size slot
7674
//
7675
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node,
7676
generateX86MemoryReference(targetReg,
7677
i*TR::Compiler->om.sizeofReferenceAddress() + startOfZeroInits, cg),
7678
tempReg, cg);
7679
}
7680
}
7681
7682
return false;
7683
}
7684
7685
TR::Register *
7686
objectCloneEvaluator(
7687
TR::Node *node,
7688
TR::CodeGenerator *cg)
7689
{
7690
/*
7691
* Commented out Object.clone() code has been removed for code cleanliness.
7692
* If it needs to be resurrected it can be found in RTC or CMVC.
7693
*/
7694
return NULL;
7695
}
7696
7697
7698
TR::Register *
7699
J9::X86::TreeEvaluator::VMnewEvaluator(
7700
TR::Node *node,
7701
TR::CodeGenerator *cg)
7702
{
7703
// See if inline allocation is appropriate.
7704
//
7705
// Don't do the inline allocation if we are generating JVMPI hooks, since
7706
// JVMPI needs to know about the allocation.
7707
//
7708
TR::Compilation *comp = cg->comp();
7709
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
7710
7711
if (comp->suppressAllocationInlining())
7712
return NULL;
7713
7714
// If the helper does not preserve all the registers there will not be
7715
// enough registers to do the inline allocation.
7716
// Also, don't do the inline allocation if optimizing for space
7717
//
7718
TR::MethodSymbol *helperSym = node->getSymbol()->castToMethodSymbol();
7719
if (!helperSym->preservesAllRegisters())
7720
return NULL;
7721
7722
TR_OpaqueClassBlock *clazz = NULL;
7723
TR::Register *classReg = NULL;
7724
bool isArrayNew = false;
7725
int32_t allocationSize = 0;
7726
int32_t objectSize = 0;
7727
int32_t elementSize = 0;
7728
int32_t dataOffset = 0;
7729
7730
bool realTimeGC = comp->getOptions()->realTimeGC();
7731
bool generateArraylets = comp->generateArraylets();
7732
7733
TR::Register *segmentReg = NULL;
7734
TR::Register *tempReg = NULL;
7735
TR::Register *targetReg = NULL;
7736
TR::Register *sizeReg = NULL;
7737
7738
7739
/**
7740
* Study of registers used in inline allocation.
7741
*
7742
* Result goes to targetReg. Unless outlinedHelperCall is used, which requires an extra register move to targetReg2.
7743
* targetReg2 is needed because the result needs to be CollectedReferenceRegister, but only after object is ready.
7744
*
7745
* classReg contains the J9Class for the object to be allocated. Not always used; instead, when loadaddr is not evaluated, it
7746
* is rematerialized like a constant (in which case, clazz contains the known value). When it is rematerialized, there are
7747
* 'interesting' AOT/HCR patching routines.
7748
*
7749
* sizeReg is used for array allocations to hold the number of elements. However...
7750
* for packed variable (objectSize==0) arrays, sizeReg behaves like segmentReg should (i.e. contains size in _bytes_): elementSize
7751
* is set to 1 and sizeReg is result of multiplication of real elementSize vs element count.
7752
*
7753
* segmentReg contains the size, _in bytes!_, of the object/array to be allocated. When outlining is used, it will be bound to edi.
7754
* This must contain the rounding (i.e. 8-aligned, so address will always end in 0x0 or 0x8). When size cannot be known (i.e.
7755
* dynamic array size) explicit assembly is generated to do rounding (allocationSize is reused to contain the header offset).
7756
* After tlh-top comparison, this register is reused as a temporary register (i.e. genHeapAlloc in non-outlined path, and
7757
* inside the outlined codert asm sequences). This size is not available at non-outlined zero-initialization routine and needs
7758
* to be re-materialized.
7759
*
7760
*/
7761
7762
TR::RegisterDependencyConditions *deps;
7763
7764
// --------------------------------------------------------------------------------
7765
//
7766
// Find the class info and allocation size depending on the node type.
7767
//
7768
// Returns:
7769
// size of object includes the size of the array header
7770
// -1 cannot allocate inline
7771
// 0 variable sized allocation
7772
//
7773
// --------------------------------------------------------------------------------
7774
7775
objectSize = comp->canAllocateInline(node, clazz);
7776
if (objectSize < 0)
7777
return NULL;
7778
// Currently dynamic allocation is only supported on reference array.
7779
// We are performing dynamic array allocation if both object size and
7780
// class block cannot be statically determined.
7781
bool dynamicArrayAllocation = (node->getOpCodeValue() == TR::anewarray)
7782
&& (objectSize == 0) && (clazz == NULL);
7783
allocationSize = objectSize;
7784
7785
static long count = 0;
7786
if (!performTransformation(comp, "O^O <%3d> Inlining Allocation of %s [0x%p].\n", count++, node->getOpCode().getName(), node))
7787
return NULL;
7788
7789
if (node->getOpCodeValue() == TR::New)
7790
{
7791
if (comp->getOption(TR_DisableAllocationInlining))
7792
return 0;
7793
7794
// realtimeGC: cannot inline if object size is too big to get a size class
7795
if (comp->getOptions()->realTimeGC())
7796
{
7797
if ((uint32_t) objectSize > fej9->getMaxObjectSizeForSizeClass())
7798
return NULL;
7799
}
7800
7801
dataOffset = TR::Compiler->om.objectHeaderSizeInBytes(); //Not used...
7802
classReg = node->getFirstChild()->getRegister();
7803
TR_ASSERT(objectSize > 0, "assertion failure");
7804
}
7805
else
7806
{
7807
if (node->getOpCodeValue() == TR::newarray)
7808
{
7809
if (comp->getOption(TR_DisableAllocationInlining))
7810
return 0;
7811
7812
elementSize = TR::Compiler->om.getSizeOfArrayElement(node);
7813
}
7814
else
7815
{
7816
// Must be TR::anewarray
7817
//
7818
if (comp->getOption(TR_DisableAllocationInlining))
7819
return 0;
7820
7821
if (comp->useCompressedPointers())
7822
elementSize = TR::Compiler->om.sizeofReferenceField();
7823
else
7824
elementSize = (int32_t)TR::Compiler->om.sizeofReferenceAddress();
7825
7826
classReg = node->getSecondChild()->getRegister();
7827
// For dynamic array allocation, need to evaluate second child
7828
if (!classReg && dynamicArrayAllocation)
7829
classReg = cg->evaluate(node->getSecondChild());
7830
}
7831
7832
isArrayNew = true;
7833
7834
if (generateArraylets)
7835
{
7836
dataOffset = fej9->getArrayletFirstElementOffset(elementSize, comp);
7837
}
7838
else
7839
{
7840
dataOffset = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
7841
}
7842
}
7843
7844
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
7845
TR::LabelSymbol *fallThru = generateLabelSymbol(cg);
7846
startLabel->setStartInternalControlFlow();
7847
fallThru->setEndInternalControlFlow();
7848
7849
#ifdef J9VM_GC_NON_ZERO_TLH
7850
// If we can skip zero init, and it is not outlined new, we use the new TLH
7851
// same logic also appears later, but we need to do this before generate the helper call
7852
//
7853
if (node->canSkipZeroInitialization() && !comp->getOption(TR_DisableDualTLH) && !comp->getOptions()->realTimeGC())
7854
{
7855
// For value types, it should use jitNewValue helper call which is set up before code gen
7856
if ((node->getOpCodeValue() == TR::New)
7857
&& (!TR::Compiler->om.areValueTypesEnabled() || (node->getSymbolReference() != comp->getSymRefTab()->findOrCreateNewValueSymbolRef(comp->getMethodSymbol()))))
7858
node->setSymbolReference(comp->getSymRefTab()->findOrCreateNewObjectNoZeroInitSymbolRef(comp->getMethodSymbol()));
7859
else if (node->getOpCodeValue() == TR::newarray)
7860
node->setSymbolReference(comp->getSymRefTab()->findOrCreateNewArrayNoZeroInitSymbolRef(comp->getMethodSymbol()));
7861
if (comp->getOption(TR_TraceCG))
7862
traceMsg(comp, "SKIPZEROINIT: for %p, change the symbol to %p ", node, node->getSymbolReference());
7863
}
7864
else
7865
{
7866
if (comp->getOption(TR_TraceCG))
7867
traceMsg(comp, "NOSKIPZEROINIT: for %p, keep symbol as %p ", node, node->getSymbolReference());
7868
}
7869
#endif
7870
TR::LabelSymbol *failLabel = generateLabelSymbol(cg);
7871
7872
segmentReg = cg->allocateRegister();
7873
7874
tempReg = cg->allocateRegister();
7875
7876
// If the size is variable, evaluate it into a register
7877
//
7878
if (objectSize == 0)
7879
{
7880
sizeReg = cg->evaluate(node->getFirstChild());
7881
allocationSize += dataOffset;
7882
if (comp->getOption(TR_TraceCG))
7883
traceMsg(comp, "allocationSize %d dataOffset %d\n", allocationSize, dataOffset);
7884
}
7885
else
7886
{
7887
sizeReg = NULL;
7888
}
7889
7890
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
7891
7892
// Generate the heap allocation, and the snippet that will handle heap overflow.
7893
//
7894
TR_OutlinedInstructions *outlinedHelperCall = NULL;
7895
targetReg = cg->allocateRegister();
7896
outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(node, TR::acall, targetReg, failLabel, fallThru, cg);
7897
cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);
7898
7899
TR::Instruction * startInstr = cg->getAppendInstruction();
7900
7901
// --------------------------------------------------------------------------------
7902
//
7903
// Do the allocation from the TLH and bump pointers.
7904
//
7905
// The address of the start of the allocated heap space will be in targetReg.
7906
//
7907
// --------------------------------------------------------------------------------
7908
7909
bool canUseFastInlineAllocation =
7910
(!comp->getOptions()->realTimeGC() &&
7911
!comp->generateArraylets()) ? true : false;
7912
7913
bool useRepInstruction;
7914
bool monitorSlotIsInitialized;
7915
bool skipOutlineZeroInit = false;
7916
TR_ExtraInfoForNew *initInfo = node->getSymbolReference()->getExtraInfo();
7917
if (node->canSkipZeroInitialization())
7918
{
7919
skipOutlineZeroInit = true;
7920
}
7921
else if (initInfo)
7922
{
7923
if (node->canSkipZeroInitialization())
7924
{
7925
initInfo->zeroInitSlots = NULL;
7926
initInfo->numZeroInitSlots = 0;
7927
skipOutlineZeroInit = true;
7928
}
7929
else if (initInfo->numZeroInitSlots <= 0)
7930
{
7931
skipOutlineZeroInit = true;
7932
}
7933
}
7934
7935
if (skipOutlineZeroInit && !performTransformation(comp, "O^O OUTLINED NEW: skip outlined zero init on %s %p\n", cg->getDebug()->getName(node), node))
7936
skipOutlineZeroInit = false;
7937
7938
// Faster inlined sequence. It does not understand arraylet shapes yet.
7939
//
7940
if (canUseFastInlineAllocation)
7941
{
7942
genHeapAlloc2(node, clazz, allocationSize, elementSize, sizeReg, targetReg, segmentReg, tempReg, failLabel, cg);
7943
}
7944
else
7945
{
7946
genHeapAlloc(node, clazz, allocationSize, elementSize, sizeReg, targetReg, segmentReg, tempReg, failLabel, cg);
7947
}
7948
7949
// --------------------------------------------------------------------------------
7950
//
7951
// Perform zero-initialization on data slots.
7952
//
7953
// There may be information about which slots are to be zero-initialized.
7954
// If there is no information, all slots must be zero-initialized.
7955
//
7956
// --------------------------------------------------------------------------------
7957
7958
TR::Register *scratchReg = NULL;
7959
bool shouldInitZeroSizedArrayHeader = true;
7960
7961
#ifdef J9VM_GC_NON_ZERO_TLH
7962
if (comp->getOption(TR_DisableDualTLH) || comp->getOptions()->realTimeGC())
7963
{
7964
#endif
7965
if (!maxZeroInitWordsPerIteration)
7966
{
7967
static char *p = feGetEnv("TR_MaxZeroInitWordsPerIteration");
7968
if (p)
7969
maxZeroInitWordsPerIteration = atoi(p);
7970
else
7971
maxZeroInitWordsPerIteration = MAX_ZERO_INIT_WORDS_PER_ITERATION; // Use default value
7972
}
7973
7974
if (initInfo && initInfo->zeroInitSlots)
7975
{
7976
// If there are too many words to be individually initialized, initialize
7977
// them all
7978
//
7979
if (initInfo->numZeroInitSlots >= maxZeroInitWordsPerIteration*2-1)
7980
initInfo->zeroInitSlots = NULL;
7981
}
7982
7983
if (initInfo && initInfo->zeroInitSlots)
7984
{
7985
// Zero-initialize by explicit zero stores.
7986
// Use the supplied bit vector to identify which slots to initialize
7987
//
7988
// Zero-initialize the monitor slot in the header at the same time.
7989
//
7990
TR_BitVectorIterator bvi(*initInfo->zeroInitSlots);
7991
static bool UseOldBVI = feGetEnv("TR_UseOldBVI");
7992
if (UseOldBVI)
7993
{
7994
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, tempReg, tempReg, cg);
7995
while (bvi.hasMoreElements())
7996
{
7997
generateMemRegInstruction(TR::InstOpCode::S4MemReg, node,
7998
generateX86MemoryReference(targetReg, bvi.getNextElement()*4 +dataOffset, cg),
7999
tempReg, cg);
8000
}
8001
}
8002
else
8003
{
8004
int32_t lastElementIndex = -1;
8005
int32_t nextE = -2;
8006
int32_t span = 0;
8007
int32_t lastSpan = -1;
8008
scratchReg = cg->allocateRegister(TR_FPR);
8009
generateRegRegInstruction(TR::InstOpCode::PXORRegReg, node, scratchReg, scratchReg, cg);
8010
while (bvi.hasMoreElements())
8011
{
8012
nextE = bvi.getNextElement();
8013
if (-1 == lastElementIndex) lastElementIndex = nextE;
8014
span = nextE - lastElementIndex;
8015
TR_ASSERT(span>=0, "SPAN < 0");
8016
if (span < 3)
8017
{
8018
lastSpan = span;
8019
continue;
8020
}
8021
else if (span == 3)
8022
{
8023
generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, generateX86MemoryReference(targetReg, lastElementIndex*4 +dataOffset, cg), scratchReg, cg);
8024
lastSpan = -1;
8025
lastElementIndex = -1;
8026
}
8027
else if (span > 3)
8028
{
8029
if (lastSpan == 0)
8030
{
8031
generateMemRegInstruction(TR::InstOpCode::MOVDMemReg, node, generateX86MemoryReference(targetReg, lastElementIndex*4 +dataOffset, cg), scratchReg, cg);
8032
}
8033
else if (lastSpan == 1)
8034
{
8035
generateMemRegInstruction(TR::InstOpCode::MOVQMemReg, node,generateX86MemoryReference(targetReg, lastElementIndex*4 +dataOffset, cg), scratchReg, cg);
8036
}
8037
else
8038
{
8039
generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, generateX86MemoryReference(targetReg, lastElementIndex*4 +dataOffset, cg), scratchReg, cg);
8040
}
8041
lastElementIndex = nextE;
8042
lastSpan = 0;
8043
}
8044
}
8045
if (lastSpan == 0)
8046
{
8047
generateMemRegInstruction(TR::InstOpCode::MOVDMemReg, node, generateX86MemoryReference(targetReg, lastElementIndex*4 +dataOffset, cg), scratchReg, cg);
8048
}
8049
else if (lastSpan == 1)
8050
{
8051
generateMemRegInstruction(TR::InstOpCode::MOVQMemReg, node,generateX86MemoryReference(targetReg, lastElementIndex*4 +dataOffset, cg), scratchReg, cg);
8052
}
8053
else if (lastSpan == 2)
8054
{
8055
TR_ASSERT(dataOffset >= 4, "dataOffset must be >= 4.");
8056
generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, generateX86MemoryReference(targetReg, lastElementIndex*4 +dataOffset - 4, cg), scratchReg, cg);
8057
}
8058
}
8059
8060
useRepInstruction = false;
8061
8062
J9JavaVM * jvm = fej9->getJ9JITConfig()->javaVM;
8063
if (jvm->lockwordMode == LOCKNURSERY_ALGORITHM_ALL_INHERIT)
8064
monitorSlotIsInitialized = false;
8065
else
8066
monitorSlotIsInitialized = true;
8067
}
8068
else if ((!initInfo || initInfo->numZeroInitSlots > 0) &&
8069
!node->canSkipZeroInitialization())
8070
{
8071
// Initialize all slots
8072
//
8073
if (canUseFastInlineAllocation)
8074
{
8075
useRepInstruction = genZeroInitObject2(node, objectSize, elementSize, sizeReg, targetReg, tempReg, segmentReg, scratchReg, cg);
8076
shouldInitZeroSizedArrayHeader = false;
8077
}
8078
else
8079
{
8080
useRepInstruction = genZeroInitObject(node, objectSize, elementSize, sizeReg, targetReg, tempReg, segmentReg, scratchReg, cg);
8081
}
8082
8083
J9JavaVM * jvm = fej9->getJ9JITConfig()->javaVM;
8084
if (jvm->lockwordMode == LOCKNURSERY_ALGORITHM_ALL_INHERIT)
8085
monitorSlotIsInitialized = false;
8086
else
8087
monitorSlotIsInitialized = true;
8088
}
8089
else
8090
{
8091
// Skip data initialization
8092
//
8093
if (canUseFastInlineAllocation)
8094
{
8095
// Even though we can skip the data initialization, for arrays of unknown size we still have
8096
// to initialize at least one slot to cover the discontiguous length field in case the array
8097
// is zero sized. This is because the length is not checked at runtime and is only needed
8098
// for non-native 64-bit targets where the discontiguous length slot is already initialized
8099
// via the contiguous length slot.
8100
//
8101
if (node->getOpCodeValue() != TR::New &&
8102
(comp->target().is32Bit() || comp->useCompressedPointers()))
8103
{
8104
generateMemImmInstruction(TR::InstOpCode::SMemImm4(), node,
8105
generateX86MemoryReference(targetReg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg),
8106
0, cg);
8107
shouldInitZeroSizedArrayHeader = false;
8108
}
8109
}
8110
8111
monitorSlotIsInitialized = false;
8112
useRepInstruction = false;
8113
}
8114
#ifdef J9VM_GC_NON_ZERO_TLH
8115
}
8116
else
8117
{
8118
monitorSlotIsInitialized = false;
8119
useRepInstruction = false;
8120
}
8121
#endif
8122
8123
// --------------------------------------------------------------------------------
8124
// Initialize the header
8125
// --------------------------------------------------------------------------------
8126
// If dynamic array allocation, must pass in classReg to initialize the array header
8127
if ((fej9->inlinedAllocationsMustBeVerified() && !comp->getOption(TR_UseSymbolValidationManager) && node->getOpCodeValue() == TR::anewarray) || dynamicArrayAllocation)
8128
{
8129
genInitArrayHeader(
8130
node,
8131
clazz,
8132
classReg,
8133
targetReg,
8134
sizeReg,
8135
elementSize,
8136
dataOffset,
8137
tempReg,
8138
monitorSlotIsInitialized,
8139
true,
8140
shouldInitZeroSizedArrayHeader,
8141
cg);
8142
}
8143
else if (isArrayNew)
8144
{
8145
genInitArrayHeader(
8146
node,
8147
clazz,
8148
NULL,
8149
targetReg,
8150
sizeReg,
8151
elementSize,
8152
dataOffset,
8153
tempReg,
8154
monitorSlotIsInitialized,
8155
false,
8156
shouldInitZeroSizedArrayHeader,
8157
cg);
8158
}
8159
else
8160
{
8161
genInitObjectHeader(node, clazz, classReg, targetReg, tempReg, monitorSlotIsInitialized, false, cg);
8162
}
8163
TR::Register *discontiguousDataAddrOffsetReg = NULL;
8164
#ifdef TR_TARGET_64BIT
8165
if (isArrayNew)
8166
{
8167
/* Here we'll update dataAddr slot for both fixed and variable length arrays. Fixed length arrays are
8168
* simple as we just need to check first child of the node for array size. For variable length arrays
8169
* runtime size checks are needed to determine whether to use contiguous or discontiguous header layout.
8170
*
8171
* In both scenarios, arrays of non-zero size use contiguous header layout while zero size arrays use
8172
* discontiguous header layout.
8173
*/
8174
TR::MemoryReference *dataAddrSlotMR = NULL;
8175
TR::MemoryReference *dataAddrMR = NULL;
8176
if (TR::Compiler->om.compressObjectReferences() && NULL != sizeReg)
8177
{
8178
/* We need to check sizeReg at runtime to determine correct offset of dataAddr field.
8179
* Here we deal only with compressed refs because dataAddr field offset for discontiguous
8180
* and contiguous arrays is the same in full refs.
8181
*/
8182
if (comp->getOption(TR_TraceCG))
8183
traceMsg(comp, "Node (%p): Dealing with compressed refs variable length array.\n", node);
8184
8185
TR_ASSERT_FATAL_WITH_NODE(node,
8186
(fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField()) == 8,
8187
"Offset of dataAddr field in discontiguous array is expected to be 8 bytes more than contiguous array. "
8188
"But was %d bytes for discontigous and %d bytes for contiguous array.\n",
8189
fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());
8190
8191
discontiguousDataAddrOffsetReg = cg->allocateRegister();
8192
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, discontiguousDataAddrOffsetReg, discontiguousDataAddrOffsetReg, cg);
8193
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, sizeReg, 1, cg);
8194
generateRegImmInstruction(TR::InstOpCode::ADCRegImm4(), node, discontiguousDataAddrOffsetReg, 0, cg);
8195
dataAddrMR = generateX86MemoryReference(targetReg, discontiguousDataAddrOffsetReg, 3, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
8196
dataAddrSlotMR = generateX86MemoryReference(targetReg, discontiguousDataAddrOffsetReg, 3, fej9->getOffsetOfContiguousDataAddrField(), cg);
8197
}
8198
else if (NULL == sizeReg && node->getFirstChild()->getOpCode().isLoadConst() && node->getFirstChild()->getInt() == 0)
8199
{
8200
if (comp->getOption(TR_TraceCG))
8201
traceMsg(comp, "Node (%p): Dealing with full/compressed refs fixed length zero size array.\n", node);
8202
8203
dataAddrMR = generateX86MemoryReference(targetReg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg);
8204
dataAddrSlotMR = generateX86MemoryReference(targetReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg);
8205
}
8206
else
8207
{
8208
if (comp->getOption(TR_TraceCG))
8209
{
8210
traceMsg(comp,
8211
"Node (%p): Dealing with either full/compressed refs fixed length non-zero size array "
8212
"or full refs variable length array.\n",
8213
node);
8214
}
8215
8216
if (!TR::Compiler->om.compressObjectReferences())
8217
{
8218
TR_ASSERT_FATAL_WITH_NODE(node,
8219
fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField(),
8220
"dataAddr field offset is expected to be same for both contiguous and discontiguous arrays in full refs. "
8221
"But was %d bytes for discontiguous and %d bytes for contiguous array.\n",
8222
fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField());
8223
}
8224
8225
dataAddrMR = generateX86MemoryReference(targetReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg);
8226
dataAddrSlotMR = generateX86MemoryReference(targetReg, fej9->getOffsetOfContiguousDataAddrField(), cg);
8227
}
8228
8229
// write first data element address to dataAddr slot
8230
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tempReg, dataAddrMR, cg);
8231
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, dataAddrSlotMR, tempReg, cg);
8232
}
8233
#endif /* TR_TARGET_64BIT */
8234
8235
if (fej9->inlinedAllocationsMustBeVerified() && (node->getOpCodeValue() == TR::New ||
8236
node->getOpCodeValue() == TR::anewarray) )
8237
{
8238
startInstr = startInstr->getNext();
8239
TR_OpaqueClassBlock *classToValidate = clazz;
8240
8241
TR_RelocationRecordInformation *recordInfo =
8242
(TR_RelocationRecordInformation *) comp->trMemory()->allocateMemory(sizeof(TR_RelocationRecordInformation), heapAlloc);
8243
recordInfo->data1 = allocationSize;
8244
recordInfo->data2 = node->getInlinedSiteIndex();
8245
recordInfo->data3 = (uintptr_t) failLabel;
8246
recordInfo->data4 = (uintptr_t) startInstr;
8247
8248
TR::SymbolReference * classSymRef;
8249
TR_ExternalRelocationTargetKind reloKind;
8250
8251
if (node->getOpCodeValue() == TR::New)
8252
{
8253
classSymRef = node->getFirstChild()->getSymbolReference();
8254
reloKind = TR_VerifyClassObjectForAlloc;
8255
}
8256
else
8257
{
8258
classSymRef = node->getSecondChild()->getSymbolReference();
8259
reloKind = TR_VerifyRefArrayForAlloc;
8260
8261
if (comp->getOption(TR_UseSymbolValidationManager))
8262
classToValidate = comp->fej9()->getComponentClassFromArrayClass(classToValidate);
8263
}
8264
8265
if (comp->getOption(TR_UseSymbolValidationManager))
8266
{
8267
TR_ASSERT(classToValidate, "classToValidate should not be NULL, clazz=%p\n", clazz);
8268
recordInfo->data5 = (uintptr_t)classToValidate;
8269
}
8270
8271
cg->addExternalRelocation(new (cg->trHeapMemory()) TR::BeforeBinaryEncodingExternalRelocation(startInstr,
8272
(uint8_t *) classSymRef,
8273
(uint8_t *) recordInfo,
8274
reloKind, cg),
8275
__FILE__, __LINE__, node);
8276
}
8277
8278
int32_t numDeps = 4;
8279
if (classReg)
8280
numDeps += 2;
8281
if (sizeReg)
8282
numDeps += 2;
8283
8284
if (scratchReg)
8285
numDeps++;
8286
8287
if (outlinedHelperCall)
8288
{
8289
if (node->getOpCodeValue() == TR::New)
8290
numDeps++;
8291
else
8292
numDeps += 2;
8293
}
8294
8295
// Create dependencies for the allocation registers here.
8296
// The size and class registers, if they exist, must be the first
8297
// dependencies since the heap allocation snippet needs to find them to grab
8298
// the real registers from them.
8299
//
8300
deps = generateRegisterDependencyConditions((uint8_t)0, numDeps, cg);
8301
8302
if (sizeReg)
8303
deps->addPostCondition(sizeReg, TR::RealRegister::NoReg, cg);
8304
if (classReg)
8305
deps->addPostCondition(classReg, TR::RealRegister::NoReg, cg);
8306
8307
deps->addPostCondition(targetReg, TR::RealRegister::eax, cg);
8308
deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);
8309
8310
if (useRepInstruction)
8311
{
8312
deps->addPostCondition(tempReg, TR::RealRegister::ecx, cg);
8313
deps->addPostCondition(segmentReg, TR::RealRegister::edi, cg);
8314
}
8315
else
8316
{
8317
deps->addPostCondition(tempReg, TR::RealRegister::NoReg, cg);
8318
if (segmentReg)
8319
deps->addPostCondition(segmentReg, TR::RealRegister::NoReg, cg);
8320
}
8321
8322
if (NULL != discontiguousDataAddrOffsetReg)
8323
{
8324
deps->addPostCondition(discontiguousDataAddrOffsetReg, TR::RealRegister::NoReg, cg);
8325
cg->stopUsingRegister(discontiguousDataAddrOffsetReg);
8326
}
8327
8328
if (scratchReg)
8329
{
8330
deps->addPostCondition(scratchReg, TR::RealRegister::NoReg, cg);
8331
cg->stopUsingRegister(scratchReg);
8332
}
8333
8334
if (outlinedHelperCall)
8335
{
8336
TR::Node *callNode = outlinedHelperCall->getCallNode();
8337
TR::Register *reg;
8338
8339
if (callNode->getFirstChild() == node->getFirstChild())
8340
{
8341
reg = callNode->getFirstChild()->getRegister();
8342
if (reg)
8343
deps->unionPostCondition(reg, TR::RealRegister::NoReg, cg);
8344
}
8345
8346
if (node->getOpCodeValue() != TR::New)
8347
if (callNode->getSecondChild() == node->getSecondChild())
8348
{
8349
reg = callNode->getSecondChild()->getRegister();
8350
if (reg)
8351
deps->unionPostCondition(reg, TR::RealRegister::NoReg, cg);
8352
}
8353
}
8354
8355
deps->stopAddingConditions();
8356
8357
generateLabelInstruction(TR::InstOpCode::label, node, fallThru, deps, cg);
8358
8359
if (outlinedHelperCall) // 64bit or TR_newstructRef||TR_anewarraystructRef
8360
{
8361
// Copy the newly allocated object into a collected reference register now that it is a valid object.
8362
//
8363
TR::Register *targetReg2 = cg->allocateCollectedReferenceRegister();
8364
TR::RegisterDependencyConditions *deps2 = generateRegisterDependencyConditions(0, 1, cg);
8365
deps2->addPostCondition(targetReg2, TR::RealRegister::eax, cg);
8366
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, targetReg2, targetReg, deps2, cg);
8367
cg->stopUsingRegister(targetReg);
8368
targetReg = targetReg2;
8369
}
8370
8371
cg->stopUsingRegister(segmentReg);
8372
cg->stopUsingRegister(tempReg);
8373
8374
// Decrement use counts on the children
8375
//
8376
cg->decReferenceCount(node->getFirstChild());
8377
if (isArrayNew)
8378
cg->decReferenceCount(node->getSecondChild());
8379
8380
node->setRegister(targetReg);
8381
return targetReg;
8382
}
8383
8384
8385
// Generate instructions to type-check a store into a reference-type array.
8386
// The code sequence determines if the destination is an array of "java/lang/Object" instances,
8387
// or if the source object has the correct type (i.e. equal to the component type of the array).
8388
//
8389
void
8390
J9::X86::TreeEvaluator::VMarrayStoreCHKEvaluator(
8391
TR::Node *node,
8392
TR::Node *sourceChild,
8393
TR::Node *destinationChild,
8394
TR_X86ScratchRegisterManager *scratchRegisterManager,
8395
TR::LabelSymbol *wrtbarLabel,
8396
TR::Instruction *prevInstr,
8397
TR::CodeGenerator *cg)
8398
{
8399
TR::Compilation *comp = cg->comp();
8400
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
8401
TR::Register *sourceReg = sourceChild->getRegister();
8402
TR::Register *destReg = destinationChild->getRegister();
8403
TR::LabelSymbol *helperCallLabel = generateLabelSymbol(cg);
8404
8405
static char *disableArrayStoreCheckOpts = feGetEnv("TR_disableArrayStoreCheckOpts");
8406
if (!disableArrayStoreCheckOpts || !debug("noInlinedArrayStoreCHKs"))
8407
{
8408
// If the component type of the array is equal to the type of the source reference,
8409
// then the store always succeeds. The component type of the array is stored in a
8410
// field of the J9ArrayClass that represents the type of the array.
8411
//
8412
8413
TR::Register *sourceClassReg = scratchRegisterManager->findOrCreateScratchRegister();
8414
TR::Register *destComponentClassReg = scratchRegisterManager->findOrCreateScratchRegister();
8415
8416
TR::Instruction* instr;
8417
8418
if (TR::Compiler->om.compressObjectReferences())
8419
{
8420
8421
// FIXME: Add check for hint when doing the arraystore check as below when class pointer compression
8422
// is enabled.
8423
8424
TR::MemoryReference *destTypeMR = generateX86MemoryReference(destReg, TR::Compiler->om.offsetOfObjectVftField(), cg);
8425
8426
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, destComponentClassReg, destTypeMR, cg); // class pointer is 32 bits
8427
TR::TreeEvaluator::generateVFTMaskInstruction(node, destComponentClassReg, cg);
8428
8429
// -------------------------------------------------------------------------
8430
//
8431
// If the component type is java.lang.Object then the store always succeeds.
8432
//
8433
// -------------------------------------------------------------------------
8434
8435
TR_OpaqueClassBlock *objectClass = fej9->getSystemClassFromClassName("java/lang/Object", 16);
8436
8437
TR_ASSERT((((uintptr_t)objectClass) >> 32) == 0, "TR_OpaqueClassBlock must fit on 32 bits when using class pointer compression");
8438
instr = generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, destComponentClassReg, (uint32_t) ((uint64_t) objectClass), cg);
8439
8440
generateLabelInstruction(TR::InstOpCode::JE4, node, wrtbarLabel, cg);
8441
8442
// HCR in VMarrayStoreCHKEvaluator
8443
if (cg->wantToPatchClassPointer(objectClass, node))
8444
comp->getStaticHCRPICSites()->push_front(instr);
8445
8446
// here we may have to convert the TR_OpaqueClassBlock into a J9Class pointer
8447
// and store it in destComponentClassReg
8448
// ..
8449
8450
TR::MemoryReference *destCompTypeMR =
8451
generateX86MemoryReference(destComponentClassReg, offsetof(J9ArrayClass, componentType), cg);
8452
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, destComponentClassReg, destCompTypeMR, cg);
8453
8454
// here we may have to convert the J9Class pointer from destComponentClassReg into
8455
// a TR_OpaqueClassBlock and store it back into destComponentClassReg
8456
// ..
8457
8458
TR::MemoryReference *sourceRegClassMR = generateX86MemoryReference(sourceReg, TR::Compiler->om.offsetOfObjectVftField(), cg);
8459
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, sourceClassReg, sourceRegClassMR, cg);
8460
TR::TreeEvaluator::generateVFTMaskInstruction(node, sourceClassReg, cg);
8461
8462
generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, destComponentClassReg, sourceClassReg, cg); // compare only 32 bits
8463
generateLabelInstruction(TR::InstOpCode::JE4, node, wrtbarLabel, cg);
8464
8465
// -------------------------------------------------------------------------
8466
// // Check the source class cast cache
8467
//
8468
// -------------------------------------------------------------------------
8469
8470
generateMemRegInstruction(
8471
TR::InstOpCode::CMP4MemReg,
8472
node,
8473
generateX86MemoryReference(sourceClassReg, offsetof(J9Class, castClassCache), cg), destComponentClassReg, cg);
8474
}
8475
else // no class pointer compression
8476
{
8477
TR::MemoryReference *sourceClassMR = generateX86MemoryReference(sourceReg, TR::Compiler->om.offsetOfObjectVftField(), cg);
8478
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, sourceClassReg, sourceClassMR, cg);
8479
TR::TreeEvaluator::generateVFTMaskInstruction(node, sourceClassReg, cg);
8480
8481
TR::MemoryReference *destClassMR = generateX86MemoryReference(destReg, TR::Compiler->om.offsetOfObjectVftField(), cg);
8482
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, destComponentClassReg, destClassMR, cg);
8483
TR::TreeEvaluator::generateVFTMaskInstruction(node, destComponentClassReg, cg);
8484
TR::MemoryReference *destCompTypeMR =
8485
generateX86MemoryReference(destComponentClassReg, offsetof(J9ArrayClass, componentType), cg);
8486
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, destComponentClassReg, destCompTypeMR, cg);
8487
8488
generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, destComponentClassReg, sourceClassReg, cg);
8489
generateLabelInstruction(TR::InstOpCode::JE4, node, wrtbarLabel, cg);
8490
8491
// -------------------------------------------------------------------------
8492
//
8493
// Check the source class cast cache
8494
//
8495
// -------------------------------------------------------------------------
8496
8497
generateMemRegInstruction(
8498
TR::InstOpCode::CMPMemReg(),
8499
node,
8500
generateX86MemoryReference(sourceClassReg, offsetof(J9Class, castClassCache), cg), destComponentClassReg, cg);
8501
}
8502
generateLabelInstruction(TR::InstOpCode::JE4, node, wrtbarLabel, cg);
8503
8504
instr = NULL;
8505
/*
8506
TR::Instruction *instr;
8507
8508
8509
// -------------------------------------------------------------------------
8510
//
8511
// If the component type is java.lang.Object then the store always succeeds.
8512
//
8513
// -------------------------------------------------------------------------
8514
8515
TR_OpaqueClassBlock *objectClass = fej9->getSystemClassFromClassName("java/lang/Object", 16);
8516
8517
if (comp->target().is64Bit())
8518
{
8519
if (TR::Compiler->om.compressObjectReferences())
8520
{
8521
TR_ASSERT((((uintptr_t)objectClass) >> 32) == 0, "TR_OpaqueClassBlock must fit on 32 bits when using class pointer compression");
8522
instr = generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, destComponentClassReg, (uint32_t) ((uint64_t) objectClass), cg);
8523
}
8524
else // 64 bit but no class pointer compression
8525
{
8526
if ((uintptr_t)objectClass <= (uintptr_t)0x7fffffff)
8527
{
8528
instr = generateRegImmInstruction(TR::InstOpCode::CMP8RegImm4, node, destComponentClassReg, (uintptr_t) objectClass, cg);
8529
}
8530
else
8531
{
8532
TR::Register *objectClassReg = scratchRegisterManager->findOrCreateScratchRegister();
8533
instr = generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, objectClassReg, (uintptr_t) objectClass, cg);
8534
generateRegRegInstruction(TR::InstOpCode::CMP8RegReg, node, destComponentClassReg, objectClassReg, cg);
8535
scratchRegisterManager->reclaimScratchRegister(objectClassReg);
8536
}
8537
}
8538
}
8539
else
8540
{
8541
instr = generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, destComponentClassReg, (int32_t)(uintptr_t) objectClass, cg);
8542
}
8543
8544
generateLabelInstruction(TR::InstOpCode::JE4, node, wrtbarLabel, cg);
8545
8546
// HCR in VMarrayStoreCHKEvaluator
8547
if (cg->wantToPatchClassPointer(objectClass, node))
8548
comp->getStaticHCRPICSites()->push_front(instr);
8549
*/
8550
8551
8552
// ---------------------------------------------
8553
//
8554
// If isInstanceOf (objectClass,ArrayComponentClass,true,true) was successful and stored during VP, we need to test again the real arrayComponentClass
8555
// Need to relocate address of arrayComponentClass under aot sharedcache
8556
// Need to possibility of class unloading.
8557
// --------------------------------------------
8558
8559
8560
if (!(comp->getOption(TR_DisableArrayStoreCheckOpts)) && node->getArrayComponentClassInNode() )
8561
{
8562
TR_OpaqueClassBlock *arrayComponentClass = (TR_OpaqueClassBlock *) node->getArrayComponentClassInNode();
8563
if (comp->target().is64Bit())
8564
{
8565
if (TR::Compiler->om.compressObjectReferences())
8566
{
8567
TR_ASSERT((((uintptr_t)arrayComponentClass) >> 32) == 0, "TR_OpaqueClassBlock must fit on 32 bits when using class pointer compression");
8568
instr = generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, destComponentClassReg, (uint32_t) ((uint64_t) arrayComponentClass), cg);
8569
8570
if (fej9->isUnloadAssumptionRequired(arrayComponentClass, comp->getCurrentMethod()))
8571
comp->getStaticPICSites()->push_front(instr);
8572
8573
}
8574
else // 64 bit but no class pointer compression
8575
{
8576
if ((uintptr_t)arrayComponentClass <= (uintptr_t)0x7fffffff)
8577
{
8578
instr = generateRegImmInstruction(TR::InstOpCode::CMP8RegImm4, node, destComponentClassReg, (uintptr_t) arrayComponentClass, cg);
8579
if (fej9->isUnloadAssumptionRequired(arrayComponentClass, comp->getCurrentMethod()))
8580
comp->getStaticPICSites()->push_front(instr);
8581
8582
}
8583
else
8584
{
8585
TR::Register *arrayComponentClassReg = scratchRegisterManager->findOrCreateScratchRegister();
8586
instr = generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, arrayComponentClassReg, (uintptr_t) arrayComponentClass, cg);
8587
generateRegRegInstruction(TR::InstOpCode::CMP8RegReg, node, destComponentClassReg, arrayComponentClassReg, cg);
8588
scratchRegisterManager->reclaimScratchRegister(arrayComponentClassReg);
8589
}
8590
}
8591
}
8592
else
8593
{
8594
instr = generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, destComponentClassReg, (int32_t)(uintptr_t) arrayComponentClass, cg);
8595
if (fej9->isUnloadAssumptionRequired(arrayComponentClass, comp->getCurrentMethod()))
8596
comp->getStaticPICSites()->push_front(instr);
8597
8598
}
8599
8600
generateLabelInstruction(TR::InstOpCode::JE4, node, wrtbarLabel, cg);
8601
8602
// HCR in VMarrayStoreCHKEvaluator
8603
if (cg->wantToPatchClassPointer(arrayComponentClass, node))
8604
comp->getStaticHCRPICSites()->push_front(instr);
8605
8606
}
8607
8608
8609
8610
8611
// For compressed references:
8612
// destComponentClassReg contains the class offset so we may need to generate code
8613
// to convert from class offset to real J9Class pointer
8614
8615
// -------------------------------------------------------------------------
8616
//
8617
// Compare source and dest class depths
8618
//
8619
// -------------------------------------------------------------------------
8620
8621
// Get the depth of array component type in testerReg
8622
//
8623
bool eliminateDepthMask = (J9AccClassDepthMask == 0xffff);
8624
TR::MemoryReference *destComponentClassDepthMR =
8625
generateX86MemoryReference(destComponentClassReg, offsetof(J9Class,classDepthAndFlags), cg);
8626
8627
// DMDM 32-bit only???
8628
if (comp->target().is32Bit())
8629
{
8630
scratchRegisterManager->reclaimScratchRegister(destComponentClassReg);
8631
}
8632
8633
TR::Register *destComponentClassDepthReg = scratchRegisterManager->findOrCreateScratchRegister();
8634
8635
if (eliminateDepthMask)
8636
{
8637
if (comp->target().is64Bit())
8638
generateRegMemInstruction(TR::InstOpCode::MOVZXReg8Mem2, node, destComponentClassDepthReg, destComponentClassDepthMR, cg);
8639
else
8640
generateRegMemInstruction(TR::InstOpCode::MOVZXReg4Mem2, node, destComponentClassDepthReg, destComponentClassDepthMR, cg);
8641
}
8642
else
8643
{
8644
generateRegMemInstruction(
8645
TR::InstOpCode::LRegMem(),
8646
node,
8647
destComponentClassDepthReg,
8648
destComponentClassDepthMR, cg);
8649
}
8650
8651
if (!eliminateDepthMask)
8652
{
8653
if (comp->target().is64Bit())
8654
{
8655
TR_ASSERT(!(J9AccClassDepthMask & 0x80000000), "AMD64: need to use a second register for AND mask");
8656
if (!(J9AccClassDepthMask & 0x80000000))
8657
generateRegImmInstruction(TR::InstOpCode::AND8RegImm4, node, destComponentClassDepthReg, J9AccClassDepthMask, cg);
8658
}
8659
else
8660
{
8661
generateRegImmInstruction(TR::InstOpCode::AND4RegImm4, node, destComponentClassDepthReg, J9AccClassDepthMask, cg);
8662
}
8663
}
8664
8665
// For compressed references:
8666
// temp2 contains the class offset so we may need to generate code
8667
// to convert from class offset to real J9Class pointer
8668
8669
// Get the depth of type of object being stored into the array in testerReg2
8670
//
8671
8672
TR::MemoryReference *mr = generateX86MemoryReference(sourceClassReg, offsetof(J9Class,classDepthAndFlags), cg);
8673
8674
// There aren't enough registers available on 32-bit across this internal
8675
// control flow region. Give one back and manually and force the source
8676
// class to be rematerialized later.
8677
//
8678
if (comp->target().is32Bit())
8679
{
8680
scratchRegisterManager->reclaimScratchRegister(sourceClassReg);
8681
}
8682
8683
TR::Register *sourceClassDepthReg = NULL;
8684
if (eliminateDepthMask)
8685
{
8686
generateMemRegInstruction(TR::InstOpCode::CMP2MemReg, node, mr, destComponentClassDepthReg, cg);
8687
}
8688
else
8689
{
8690
sourceClassDepthReg = scratchRegisterManager->findOrCreateScratchRegister();
8691
generateRegMemInstruction(
8692
TR::InstOpCode::LRegMem(),
8693
node,
8694
sourceClassDepthReg,
8695
mr, cg);
8696
8697
if (comp->target().is64Bit())
8698
{
8699
TR_ASSERT(!(J9AccClassDepthMask & 0x80000000), "AMD64: need to use a second register for AND mask");
8700
if (!(J9AccClassDepthMask & 0x80000000))
8701
generateRegImmInstruction(TR::InstOpCode::AND8RegImm4, node, sourceClassDepthReg, J9AccClassDepthMask, cg);
8702
}
8703
else
8704
{
8705
generateRegImmInstruction(TR::InstOpCode::AND4RegImm4, node, sourceClassDepthReg, J9AccClassDepthMask, cg);
8706
}
8707
generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, sourceClassDepthReg, destComponentClassDepthReg, cg);
8708
}
8709
8710
/*TR::Register *sourceClassDepthReg = scratchRegisterManager->findOrCreateScratchRegister();
8711
generateRegMemInstruction(
8712
TR::InstOpCode::LRegMem(),
8713
node,
8714
sourceClassDepthReg,
8715
mr, cg);
8716
8717
if (comp->target().is64Bit())
8718
{
8719
TR_ASSERT(!(J9AccClassDepthMask & 0x80000000), "AMD64: need to use a second register for AND mask");
8720
if (!(J9AccClassDepthMask & 0x80000000))
8721
generateRegImmInstruction(TR::InstOpCode::AND8RegImm4, node, sourceClassDepthReg, J9AccClassDepthMask, cg);
8722
}
8723
else
8724
{
8725
generateRegImmInstruction(TR::InstOpCode::AND4RegImm4, node, sourceClassDepthReg, J9AccClassDepthMask, cg);
8726
}
8727
8728
generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, sourceClassDepthReg, destComponentClassDepthReg, cg);*/
8729
8730
generateLabelInstruction(TR::InstOpCode::JBE4, node, helperCallLabel, cg);
8731
if (sourceClassDepthReg != NULL)
8732
scratchRegisterManager->reclaimScratchRegister(sourceClassDepthReg);
8733
8734
8735
// For compressed references:
8736
// destComponentClassReg contains the class offset so we may need to generate code
8737
// to convert from class offset to real J9Class pointer
8738
8739
if (comp->target().is32Bit())
8740
{
8741
// Rematerialize the source class.
8742
//
8743
sourceClassReg = scratchRegisterManager->findOrCreateScratchRegister();
8744
TR::MemoryReference *sourceClassMR = generateX86MemoryReference(sourceReg, TR::Compiler->om.offsetOfObjectVftField(), cg);
8745
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, sourceClassReg, sourceClassMR, cg);
8746
TR::TreeEvaluator::generateVFTMaskInstruction(node, sourceClassReg, cg);
8747
}
8748
8749
TR::MemoryReference *tempMR = generateX86MemoryReference(sourceClassReg, offsetof(J9Class,superclasses), cg);
8750
8751
if (comp->target().is32Bit())
8752
{
8753
scratchRegisterManager->reclaimScratchRegister(sourceClassReg);
8754
}
8755
8756
TR::Register *sourceSuperClassReg = scratchRegisterManager->findOrCreateScratchRegister();
8757
8758
generateRegMemInstruction(
8759
TR::InstOpCode::LRegMem(),
8760
node,
8761
sourceSuperClassReg,
8762
tempMR,
8763
cg);
8764
8765
TR::MemoryReference *leaMR =
8766
generateX86MemoryReference(sourceSuperClassReg, destComponentClassDepthReg, logBase2(sizeof(uintptr_t)), 0, cg);
8767
8768
// For compressed references:
8769
// leaMR is a memory reference to a J9Class
8770
// destComponentClassReg contains a TR_OpaqueClassBlock
8771
// We may need to convert superClass to a class offset before doing the comparison
8772
8773
if (comp->target().is32Bit())
8774
{
8775
8776
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, sourceSuperClassReg, leaMR, cg);
8777
8778
// Rematerialize destination component class
8779
//
8780
TR::MemoryReference *destClassMR = generateX86MemoryReference(destReg, TR::Compiler->om.offsetOfObjectVftField(), cg);
8781
8782
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, destComponentClassReg, destClassMR, cg);
8783
TR::TreeEvaluator::generateVFTMaskInstruction(node, destComponentClassReg, cg);
8784
TR::MemoryReference *destCompTypeMR =
8785
generateX86MemoryReference(destComponentClassReg, offsetof(J9ArrayClass, componentType), cg);
8786
8787
generateMemRegInstruction(TR::InstOpCode::CMPMemReg(), node, destCompTypeMR, sourceSuperClassReg, cg);
8788
}
8789
else
8790
{
8791
generateRegMemInstruction(TR::InstOpCode::CMP4RegMem, node, destComponentClassReg, leaMR, cg);
8792
}
8793
8794
scratchRegisterManager->reclaimScratchRegister(destComponentClassReg);
8795
scratchRegisterManager->reclaimScratchRegister(destComponentClassDepthReg);
8796
scratchRegisterManager->reclaimScratchRegister(sourceClassReg);
8797
scratchRegisterManager->reclaimScratchRegister(sourceSuperClassReg);
8798
8799
generateLabelInstruction(TR::InstOpCode::JE4, node, wrtbarLabel, cg);
8800
}
8801
8802
// The fast paths failed; execute the type-check helper call.
8803
//
8804
TR::LabelSymbol* helperReturnLabel = generateLabelSymbol(cg);
8805
TR::Node *helperCallNode = TR::Node::createWithSymRef(TR::call, 2, 2, sourceChild, destinationChild, node->getSymbolReference());
8806
helperCallNode->copyByteCodeInfo(node);
8807
generateLabelInstruction(TR::InstOpCode::JMP4, helperCallNode, helperCallLabel, cg);
8808
TR_OutlinedInstructions* outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(helperCallNode, TR::call, NULL, helperCallLabel, helperReturnLabel, cg);
8809
cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);
8810
generateLabelInstruction(TR::InstOpCode::label, helperCallNode, helperReturnLabel, cg);
8811
cg->decReferenceCount(sourceChild);
8812
cg->decReferenceCount(destinationChild);
8813
}
8814
8815
8816
// Check that two objects are compatible for use in an arraycopy operation.
8817
// If not, an ArrayStoreException is thrown.
8818
//
8819
TR::Register *J9::X86::TreeEvaluator::VMarrayCheckEvaluator(TR::Node *node, TR::CodeGenerator *cg)
8820
{
8821
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
8822
bool use64BitClasses = cg->comp()->target().is64Bit() && !TR::Compiler->om.generateCompressedObjectHeaders();
8823
8824
TR::Node *object1 = node->getFirstChild();
8825
TR::Node *object2 = node->getSecondChild();
8826
TR::Register *object1Reg = cg->evaluate(object1);
8827
TR::Register *object2Reg = cg->evaluate(object2);
8828
8829
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
8830
TR::LabelSymbol *fallThrough = generateLabelSymbol(cg);
8831
TR::Instruction *instr;
8832
TR::LabelSymbol *snippetLabel = NULL;
8833
TR::Snippet *snippet = NULL;
8834
TR::Register *tempReg = cg->allocateRegister();
8835
8836
startLabel->setStartInternalControlFlow();
8837
fallThrough->setEndInternalControlFlow();
8838
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
8839
8840
// If the objects are the same and one of them is known to be an array, they
8841
// are compatible.
8842
//
8843
if (node->isArrayChkPrimitiveArray1() ||
8844
node->isArrayChkReferenceArray1() ||
8845
node->isArrayChkPrimitiveArray2() ||
8846
node->isArrayChkReferenceArray2())
8847
{
8848
generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, object1Reg, object2Reg, cg);
8849
generateLabelInstruction(TR::InstOpCode::JE4, node, fallThrough, cg);
8850
}
8851
8852
else
8853
{
8854
// Neither object is known to be an array
8855
// Check that object 1 is an array. If not, throw exception.
8856
//
8857
TR::InstOpCode::Mnemonic testOpCode;
8858
if ((J9AccClassRAMArray >= CHAR_MIN) && (J9AccClassRAMArray <= CHAR_MAX))
8859
testOpCode = TR::InstOpCode::TEST1MemImm1;
8860
else
8861
testOpCode = TR::InstOpCode::TEST4MemImm4;
8862
8863
if (TR::Compiler->om.compressObjectReferences())
8864
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, tempReg, generateX86MemoryReference(object1Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);
8865
else
8866
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg, generateX86MemoryReference(object1Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);
8867
8868
TR::TreeEvaluator::generateVFTMaskInstruction(node, tempReg, cg);
8869
generateMemImmInstruction(testOpCode, node, generateX86MemoryReference(tempReg, offsetof(J9Class, classDepthAndFlags), cg), J9AccClassRAMArray, cg);
8870
if (!snippetLabel)
8871
{
8872
snippetLabel = generateLabelSymbol(cg);
8873
instr = generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);
8874
snippet = new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(), snippetLabel, instr);
8875
cg->addSnippet(snippet);
8876
}
8877
else
8878
generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);
8879
}
8880
8881
// Test equality of the object classes.
8882
//
8883
generateRegMemInstruction(TR::InstOpCode::LRegMem(use64BitClasses), node, tempReg, generateX86MemoryReference(object1Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);
8884
generateRegMemInstruction(TR::InstOpCode::XORRegMem(use64BitClasses), node, tempReg, generateX86MemoryReference(object2Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);
8885
TR::TreeEvaluator::generateVFTMaskInstruction(node, tempReg, cg);
8886
8887
// If either object is known to be a primitive array, we are done. Either
8888
// the equality test fails and we throw the exception or it succeeds and
8889
// we finish.
8890
//
8891
if (node->isArrayChkPrimitiveArray1() || node->isArrayChkPrimitiveArray2())
8892
{
8893
if (!snippetLabel)
8894
{
8895
snippetLabel = generateLabelSymbol(cg);
8896
instr = generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
8897
snippet = new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(), snippetLabel, instr);
8898
cg->addSnippet(snippet);
8899
}
8900
else
8901
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
8902
}
8903
8904
// Otherwise, there is more testing to do. If the classes are equal we
8905
// are done, and branch to the fallThrough label.
8906
//
8907
else
8908
{
8909
generateLabelInstruction(TR::InstOpCode::JE4, node, fallThrough, cg);
8910
8911
// If either object is not known to be a reference array type, check it
8912
// We already know that object1 is an array type but we may have to now
8913
// check object2.
8914
//
8915
if (!node->isArrayChkReferenceArray1())
8916
{
8917
8918
if (TR::Compiler->om.compressObjectReferences())
8919
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, tempReg, generateX86MemoryReference(object1Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);
8920
else
8921
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg, generateX86MemoryReference(object1Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);
8922
8923
TR::TreeEvaluator::generateVFTMaskInstruction(node, tempReg, cg);
8924
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg, generateX86MemoryReference(tempReg, offsetof(J9Class, classDepthAndFlags), cg), cg);
8925
// X = (ramclass->ClassDepthAndFlags)>>J9AccClassRAMShapeShift
8926
8927
// X & OBJECT_HEADER_SHAPE_MASK
8928
generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, tempReg, (OBJECT_HEADER_SHAPE_MASK << J9AccClassRAMShapeShift), cg);
8929
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, tempReg, (OBJECT_HEADER_SHAPE_POINTERS << J9AccClassRAMShapeShift), cg);
8930
8931
if (!snippetLabel)
8932
{
8933
snippetLabel = generateLabelSymbol(cg);
8934
instr = generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
8935
snippet = new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(), snippetLabel, instr);
8936
cg->addSnippet(snippet);
8937
}
8938
else
8939
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
8940
}
8941
if (!node->isArrayChkReferenceArray2())
8942
{
8943
// Check that object 2 is an array. If not, throw exception.
8944
//
8945
TR::InstOpCode::Mnemonic testOpCode;
8946
if ((J9AccClassRAMArray >= CHAR_MIN) && (J9AccClassRAMArray <= CHAR_MAX))
8947
testOpCode = TR::InstOpCode::TEST1MemImm1;
8948
else
8949
testOpCode = TR::InstOpCode::TEST4MemImm4;
8950
8951
// Check that object 2 is an array. If not, throw exception.
8952
//
8953
if (TR::Compiler->om.compressObjectReferences())
8954
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, tempReg, generateX86MemoryReference(object2Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);
8955
else
8956
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg, generateX86MemoryReference(object2Reg, TR::Compiler->om.offsetOfObjectVftField(), cg), cg);
8957
TR::TreeEvaluator::generateVFTMaskInstruction(node, tempReg, cg);
8958
generateMemImmInstruction(testOpCode, node, generateX86MemoryReference(tempReg, offsetof(J9Class, classDepthAndFlags), cg), J9AccClassRAMArray, cg);
8959
if (!snippetLabel)
8960
{
8961
snippetLabel = generateLabelSymbol(cg);
8962
instr = generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);
8963
snippet = new (cg->trHeapMemory()) TR::X86CheckFailureSnippet(cg, node->getSymbolReference(), snippetLabel, instr);
8964
cg->addSnippet(snippet);
8965
}
8966
else
8967
generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);
8968
8969
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, tempReg, generateX86MemoryReference(tempReg, offsetof(J9Class, classDepthAndFlags), cg), cg);
8970
generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(), node, tempReg, (OBJECT_HEADER_SHAPE_MASK << J9AccClassRAMShapeShift), cg);
8971
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, tempReg, (OBJECT_HEADER_SHAPE_POINTERS << J9AccClassRAMShapeShift), cg);
8972
8973
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
8974
}
8975
8976
// Now both objects are known to be reference arrays, so they are
8977
// compatible for arraycopy.
8978
}
8979
8980
// Now generate the fall-through label
8981
//
8982
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)4, cg);
8983
deps->addPostCondition(object1Reg, TR::RealRegister::NoReg, cg);
8984
deps->addPostCondition(object2Reg, TR::RealRegister::NoReg, cg);
8985
deps->addPostCondition(tempReg, TR::RealRegister::NoReg, cg);
8986
deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);
8987
8988
generateLabelInstruction(TR::InstOpCode::label, node, fallThrough, deps, cg);
8989
8990
cg->stopUsingRegister(tempReg);
8991
cg->decReferenceCount(object1);
8992
cg->decReferenceCount(object2);
8993
8994
return NULL;
8995
}
8996
8997
8998
#ifdef LINUX
8999
#if defined(TR_TARGET_32BIT)
9000
static void
9001
addFPXMMDependencies(
9002
TR::CodeGenerator *cg,
9003
TR::RegisterDependencyConditions *dependencies)
9004
{
9005
TR_LiveRegisters *lr = cg->getLiveRegisters(TR_FPR);
9006
if (!lr || lr->getNumberOfLiveRegisters() > 0)
9007
{
9008
for (int regIndex = TR::RealRegister::FirstXMMR; regIndex <= TR::RealRegister::LastXMMR; regIndex++)
9009
{
9010
TR::Register *dummy = cg->allocateRegister(TR_FPR);
9011
dummy->setPlaceholderReg();
9012
dependencies->addPostCondition(dummy, (TR::RealRegister::RegNum)regIndex, cg);
9013
cg->stopUsingRegister(dummy);
9014
}
9015
}
9016
}
9017
#endif
9018
9019
#define J9TIME_NANOSECONDS_PER_SECOND ((I_64) 1000000000)
9020
#if defined(TR_TARGET_64BIT)
9021
static bool
9022
inlineNanoTime(
9023
TR::Node *node,
9024
TR::CodeGenerator *cg)
9025
{
9026
TR::Compilation *comp = cg->comp();
9027
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
9028
9029
if (debug("traceInlInlining"))
9030
diagnostic("nanoTime called by %s\n", comp->signature());
9031
9032
if (fej9->supportsFastNanoTime())
9033
{ // Fully Inlined Version
9034
9035
// First, evaluate resultAddress if provided. There's no telling how
9036
// many regs that address computation needs, so let's get it out of the
9037
// way before we start using registers for other things.
9038
//
9039
9040
TR::Register *resultAddress;
9041
if (node->getNumChildren() == 1)
9042
{
9043
resultAddress = cg->evaluate(node->getFirstChild());
9044
}
9045
else
9046
{
9047
TR_ASSERT(node->getNumChildren() == 0, "nanoTime must have zero or one children");
9048
resultAddress = NULL;
9049
}
9050
9051
TR::SymbolReference *gtod = comp->getSymRefTab()->findOrCreateRuntimeHelper(TR_AMD64clockGetTime);
9052
TR::Node *timevalNode = TR::Node::createWithSymRef(node, TR::loadaddr, 0, cg->getNanoTimeTemp());
9053
TR::Node *clockSourceNode = TR::Node::create(node, TR::iconst, 0, CLOCK_MONOTONIC);
9054
TR::Node *callNode = TR::Node::createWithSymRef(TR::call, 2, 2, clockSourceNode, timevalNode, gtod);
9055
// TODO: Use performCall
9056
TR::Linkage *linkage = cg->getLinkage(gtod->getSymbol()->getMethodSymbol()->getLinkageConvention());
9057
linkage->buildDirectDispatch(callNode, false);
9058
9059
TR::Register *result = cg->allocateRegister();
9060
TR::Register *reg = cg->allocateRegister();
9061
9062
TR::MemoryReference *tv_sec;
9063
9064
// result = tv_sec * 1,000,000,000 (converts seconds to nanoseconds)
9065
9066
tv_sec = generateX86MemoryReference(timevalNode, cg, false);
9067
generateRegMemInstruction(TR::InstOpCode::L8RegMem, node, result, tv_sec, cg);
9068
generateRegRegImmInstruction(TR::InstOpCode::IMUL8RegRegImm4, node, result, result, J9TIME_NANOSECONDS_PER_SECOND, cg);
9069
9070
// reg = tv_usec
9071
generateRegMemInstruction(TR::InstOpCode::L8RegMem, node, reg, generateX86MemoryReference(*tv_sec, offsetof(struct timespec, tv_nsec), cg), cg);
9072
9073
// result = reg + result
9074
generateRegMemInstruction(TR::InstOpCode::LEA8RegMem, node, result, generateX86MemoryReference(reg, result, 0, cg), cg);
9075
9076
cg->stopUsingRegister(reg);
9077
9078
// Store the result to memory if necessary
9079
if (resultAddress)
9080
{
9081
generateMemRegInstruction(TR::InstOpCode::S8MemReg, node, generateX86MemoryReference(resultAddress, 0, cg), result, cg);
9082
9083
cg->decReferenceCount(node->getFirstChild());
9084
if (node->getReferenceCount() == 1 && cg->getCurrentEvaluationTreeTop()->getNode()->getOpCodeValue() == TR::treetop)
9085
{
9086
// Result is not needed in a register, so free it up
9087
//
9088
cg->stopUsingRegister(result);
9089
result = NULL;
9090
}
9091
}
9092
9093
node->setRegister(result);
9094
9095
return true;
9096
}
9097
else
9098
{ // Inlined call to Port Library
9099
return false;
9100
}
9101
}
9102
#else // !64bit
9103
static bool
9104
inlineNanoTime(
9105
TR::Node *node,
9106
TR::CodeGenerator *cg)
9107
{
9108
TR::Compilation *comp = cg->comp();
9109
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
9110
9111
if (debug("traceInlInlining"))
9112
diagnostic("nanoTime called by %s\n", comp->signature());
9113
9114
TR::RealRegister *espReal = cg->machine()->getRealRegister(TR::RealRegister::esp);
9115
TR::Register *vmThreadReg = cg->getVMThreadRegister();
9116
TR::Register *temp2 = 0;
9117
9118
if (fej9->supportsFastNanoTime())
9119
{
9120
TR::Register *resultAddress;
9121
if (node->getNumChildren() == 1)
9122
{
9123
resultAddress = cg->evaluate(node->getFirstChild());
9124
generateRegInstruction(TR::InstOpCode::PUSHReg, node, resultAddress, cg);
9125
generateImmInstruction(TR::InstOpCode::PUSHImm4, node, CLOCK_MONOTONIC, cg);
9126
}
9127
else
9128
{
9129
// Leave space on the stack for the 64-bit result
9130
//
9131
9132
generateRegImmInstruction(TR::InstOpCode::SUB4RegImms, node, espReal, 8, cg);
9133
9134
resultAddress = cg->allocateRegister();
9135
generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, resultAddress, espReal, cg); // save away esp before the push
9136
generateRegInstruction(TR::InstOpCode::PUSHReg, node, resultAddress, cg);
9137
generateImmInstruction(TR::InstOpCode::PUSHImm4, node, CLOCK_MONOTONIC, cg);
9138
cg->stopUsingRegister(resultAddress);
9139
resultAddress = espReal;
9140
}
9141
9142
// 64-bit issues on the call instructions below
9143
9144
// Build register dependencies and call the method in the system library
9145
// directly. Since this is a "C"-style call, ebx, esi and edi are preserved
9146
//
9147
int32_t extraFPDeps = (uint8_t)(TR::RealRegister::LastXMMR - TR::RealRegister::FirstXMMR+1);
9148
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)4 + extraFPDeps, cg);
9149
TR::Register *temp1 = cg->allocateRegister();
9150
deps->addPostCondition(temp1, TR::RealRegister::eax, cg);
9151
cg->stopUsingRegister(temp1);
9152
temp1 = cg->allocateRegister();
9153
deps->addPostCondition(temp1, TR::RealRegister::ecx, cg);
9154
cg->stopUsingRegister(temp1);
9155
temp1 = cg->allocateRegister();
9156
deps->addPostCondition(temp1, TR::RealRegister::edx, cg);
9157
cg->stopUsingRegister(temp1);
9158
deps->addPostCondition(cg->getMethodMetaDataRegister(), TR::RealRegister::ebp, cg);
9159
9160
// add the XMM dependencies
9161
addFPXMMDependencies(cg, deps);
9162
deps->stopAddingConditions();
9163
9164
TR::X86ImmInstruction *callInstr = generateImmInstruction(TR::InstOpCode::CALLImm4, node, (int32_t)&clock_gettime, deps, cg);
9165
9166
generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, espReal, 8, cg);
9167
9168
TR::Register *eaxReal = cg->allocateRegister();
9169
TR::Register *edxReal = cg->allocateRegister();
9170
9171
// load usec to a register
9172
TR::Register *reglow = cg->allocateRegister();
9173
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, reglow, generateX86MemoryReference(resultAddress, 4, cg), cg);
9174
9175
9176
TR::RegisterDependencyConditions *dep1 = generateRegisterDependencyConditions((uint8_t)2, 2, cg);
9177
dep1->addPreCondition(eaxReal, TR::RealRegister::eax, cg);
9178
dep1->addPreCondition(edxReal, TR::RealRegister::edx, cg);
9179
dep1->addPostCondition(eaxReal, TR::RealRegister::eax, cg);
9180
dep1->addPostCondition(edxReal, TR::RealRegister::edx, cg);
9181
9182
9183
// load second to eax then multiply by 1,000,000,000
9184
9185
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, edxReal, generateX86MemoryReference(resultAddress, 0, cg), cg);
9186
generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, node, eaxReal, J9TIME_NANOSECONDS_PER_SECOND, cg);
9187
generateRegRegInstruction(TR::InstOpCode::IMUL4AccReg, node, eaxReal, edxReal, dep1, cg);
9188
9189
9190
// add the two parts then store it back
9191
generateRegRegInstruction(TR::InstOpCode::ADD4RegReg, node, eaxReal, reglow, cg);
9192
generateRegImmInstruction(TR::InstOpCode::ADC4RegImm4, node, edxReal, 0x0, cg);
9193
generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, generateX86MemoryReference(resultAddress, 0, cg), eaxReal, cg);
9194
generateMemRegInstruction(TR::InstOpCode::S4MemReg, node, generateX86MemoryReference(resultAddress, 4, cg), edxReal, cg);
9195
9196
cg->stopUsingRegister(eaxReal);
9197
cg->stopUsingRegister(edxReal);
9198
cg->stopUsingRegister(reglow);
9199
9200
TR::Register *lowReg = cg->allocateRegister();
9201
TR::Register *highReg = cg->allocateRegister();
9202
9203
if (node->getNumChildren() == 1)
9204
{
9205
if (node->getReferenceCount() > 1 ||
9206
cg->getCurrentEvaluationTreeTop()->getNode()->getOpCodeValue() != TR::treetop)
9207
{
9208
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, lowReg, generateX86MemoryReference(resultAddress, 0, cg), cg);
9209
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, highReg, generateX86MemoryReference(resultAddress, 4, cg), cg);
9210
9211
TR::RegisterPair *result = cg->allocateRegisterPair(lowReg, highReg);
9212
node->setRegister(result);
9213
}
9214
cg->decReferenceCount(node->getFirstChild());
9215
}
9216
else
9217
{
9218
// The result of the call is now on the stack. Get it into registers.
9219
//
9220
generateRegInstruction(TR::InstOpCode::POPReg, node, lowReg, cg);
9221
generateRegInstruction(TR::InstOpCode::POPReg, node, highReg, cg);
9222
TR::RegisterPair *result = cg->allocateRegisterPair(lowReg, highReg);
9223
node->setRegister(result);
9224
}
9225
}
9226
else
9227
{
9228
// This code is busted. The hires clock is measured in microseconds, not
9229
// nanoseconds, and this code doesn't correct for that. The above code
9230
// will be faster anyway, and it should be upgraded to support AOT, so
9231
// then we'll never need the hires clock version again.
9232
static char *useHiResClock = feGetEnv("TR_useHiResClock");
9233
if (!useHiResClock)
9234
return false;
9235
// Leave space on the stack for the 64-bit result
9236
//
9237
temp2 = cg->allocateRegister();
9238
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, temp2, generateX86MemoryReference(vmThreadReg, offsetof(J9VMThread, javaVM), cg), cg);
9239
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, temp2, generateX86MemoryReference(temp2, offsetof(J9JavaVM, portLibrary), cg), cg);
9240
generateRegInstruction(TR::InstOpCode::PUSHReg, node, espReal, cg);
9241
generateRegInstruction(TR::InstOpCode::PUSHReg, node, temp2, cg);
9242
9243
int32_t extraFPDeps = (uint8_t)(TR::RealRegister::LastXMMR - TR::RealRegister::FirstXMMR+1);
9244
9245
// Build register dependencies and call the method in the port library
9246
// directly. Since this is a "C"-style call, ebx, esi and edi are preserved
9247
//
9248
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)4 + extraFPDeps, cg);
9249
TR::Register *temp1 = cg->allocateRegister();
9250
deps->addPostCondition(temp1, TR::RealRegister::ecx, cg);
9251
cg->stopUsingRegister(temp1);
9252
9253
TR::Register *lowReg = cg->allocateRegister();
9254
deps->addPostCondition(lowReg, TR::RealRegister::eax, cg);
9255
9256
TR::Register *highReg = cg->allocateRegister();
9257
deps->addPostCondition(highReg, TR::RealRegister::edx, cg);
9258
9259
deps->addPostCondition(cg->getMethodMetaDataRegister(), TR::RealRegister::ebp, cg);
9260
9261
// add the XMM dependencies
9262
addFPXMMDependencies(cg, deps);
9263
deps->stopAddingConditions();
9264
9265
generateCallMemInstruction(TR::InstOpCode::CALLMem, node, generateX86MemoryReference(temp2, offsetof(OMRPortLibrary, time_hires_clock), cg), deps, cg);
9266
cg->stopUsingRegister(temp2);
9267
9268
generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, espReal, 8, cg);
9269
9270
TR::RegisterPair *result = cg->allocateRegisterPair(lowReg, highReg);
9271
node->setRegister(result);
9272
}
9273
9274
return true;
9275
}
9276
#endif
9277
#endif // LINUX
9278
9279
// Convert serial String.hashCode computation into vectorization copy and implement with SSE instruction
9280
//
9281
// Conversion process example:
9282
//
9283
// str[8] = example string representing 8 characters (compressed or decompressed)
9284
//
9285
// The serial method for creating the hash:
9286
// hash = 0, offset = 0, count = 8
9287
// for (int i = offset; i < offset+count; ++i) {
9288
// hash = (hash << 5) - hash + str[i];
9289
// }
9290
//
9291
// Note that ((hash << 5) - hash) is equivalent to hash * 31
9292
//
9293
// Expanding out the for loop:
9294
// hash = ((((((((0*31+str[0])*31+str[1])*31+str[2])*31+str[3])*31+str[4])*31+str[5])*31+str[6])*31+str[7])
9295
//
9296
// Simplified:
9297
// hash = (31^7)*str[0] + (31^6)*str[1] + (31^5)*str[2] + (31^4)*str[3]
9298
// + (31^3)*str[4] + (31^2)*str[5] + (31^1)*str[6] + (31^0)*str[7]
9299
//
9300
// Rearranged:
9301
// hash = (31^7)*str[0] + (31^3)*str[4]
9302
// + (31^6)*str[1] + (31^2)*str[5]
9303
// + (31^5)*str[2] + (31^1)*str[6]
9304
// + (31^4)*str[3] + (31^0)*str[7]
9305
//
9306
// Factor out [31^3, 31^2, 31^1, 31^0]:
9307
// hash = 31^3*((31^4)*str[0] + str[4]) Vector[0]
9308
// + 31^2*((31^4)*str[1] + str[5]) Vector[1]
9309
// + 31^1*((31^4)*str[2] + str[6]) Vector[2]
9310
// + 31^0*((31^4)*str[3] + str[7]) Vector[3]
9311
//
9312
// Keep factoring out any 31^4 if possible (this example has no such case). If the string was 12 characters long then:
9313
// 31^3*((31^8)*str[0] + (31^4)*str[4] + (31^0)*str[8]) would become 31^3*(31^4((31^4)*str[0] + str[4]) + (31^0)*str[8])
9314
//
9315
// Vectorization is done by simultaneously calculating the four sums that hash is made of (each -> is a successive step):
9316
// Vector[0] = str[0] -> multiply 31^4 -> add str[4] -> multiply 31^3
9317
// Vector[1] = str[1] -> multiply 31^4 -> add str[5] -> multiply 31^2
9318
// Vector[2] = str[2] -> multiply 31^4 -> add str[6] -> multiply 31^1
9319
// Vector[3] = str[3] -> multiply 31^4 -> add str[7] -> multiply 1
9320
//
9321
// Adding these four vectorized values together produces the required hash.
9322
// If the number of characters in the string is not a multiple of 4, then the remainder of the hash is calculated serially.
9323
//
9324
// Implementation overview:
9325
//
9326
// start_label
9327
// if size < threshold, goto serial_label, current threshold is 4
9328
// xmm0 = load 16 bytes align constant [923521, 923521, 923521, 923521]
9329
// xmm1 = 0
9330
// SSEloop
9331
// xmm2 = decompressed: load 8 byte value in lower 8 bytes.
9332
// compressed: load 4 byte value in lower 4 bytes
9333
// xmm1 = xmm1 * xmm0
9334
// if(isCompressed)
9335
// movzxbd xmm2, xmm2
9336
// else
9337
// movzxwd xmm2, xmm2
9338
// xmm1 = xmm1 + xmm2
9339
// i = i + 4;
9340
// cmp i, end -3
9341
// jge SSEloop
9342
// xmm0 = load 16 bytes align [31^3, 31^2, 31, 1]
9343
// xmm1 = xmm1 * xmm0 value contains [a0, a1, a2, a3]
9344
// xmm0 = xmm1
9345
// xmm0 = xmm0 >> 64 bits
9346
// xmm1 = xmm1 + xmm0 reduce add [a0+a2, a1+a3, .., ...]
9347
// xmm0 = xmm1
9348
// xmm0 = xmm0 >> 32 bits
9349
// xmm1 = xmm1 + xmm0 reduce add [a0+a2 + a1+a3, .., .., ..]
9350
// movd xmm1, GPR1
9351
//
9352
// serial_label
9353
//
9354
// cmp i end
9355
// jle end
9356
// serial_loop
9357
// GPR2 = GPR1
9358
// GPR1 = GPR1 << 5
9359
// GPR1 = GPR1 - GPR2
9360
// GPR2 = load c[i]
9361
// add GPR1, GPR2
9362
// dec i
9363
// cmp i, end
9364
// jl serial_loop
9365
//
9366
// end_label
9367
static TR::Register* inlineStringHashCode(TR::Node* node, bool isCompressed, TR::CodeGenerator* cg)
9368
{
9369
if (!cg->getSupportsInlineStringHashCode())
9370
{
9371
return NULL;
9372
}
9373
else
9374
{
9375
TR_ASSERT(node->getChild(1)->getOpCodeValue() == TR::iconst && node->getChild(1)->getInt() == 0, "String hashcode offset can only be const zero.");
9376
9377
const int size = 4;
9378
auto shift = isCompressed ? 0 : 1;
9379
9380
auto address = cg->evaluate(node->getChild(0));
9381
auto length = cg->evaluate(node->getChild(2));
9382
auto index = cg->allocateRegister();
9383
auto hash = cg->allocateRegister();
9384
auto tmp = cg->allocateRegister();
9385
auto hashXMM = cg->allocateRegister(TR_VRF);
9386
auto tmpXMM = cg->allocateRegister(TR_VRF);
9387
auto multiplierXMM = cg->allocateRegister(TR_VRF);
9388
9389
auto begLabel = generateLabelSymbol(cg);
9390
auto endLabel = generateLabelSymbol(cg);
9391
auto loopLabel = generateLabelSymbol(cg);
9392
begLabel->setStartInternalControlFlow();
9393
endLabel->setEndInternalControlFlow();
9394
auto deps = generateRegisterDependencyConditions((uint8_t)6, (uint8_t)6, cg);
9395
deps->addPreCondition(address, TR::RealRegister::NoReg, cg);
9396
deps->addPreCondition(index, TR::RealRegister::NoReg, cg);
9397
deps->addPreCondition(length, TR::RealRegister::NoReg, cg);
9398
deps->addPreCondition(multiplierXMM, TR::RealRegister::NoReg, cg);
9399
deps->addPreCondition(tmpXMM, TR::RealRegister::NoReg, cg);
9400
deps->addPreCondition(hashXMM, TR::RealRegister::NoReg, cg);
9401
deps->addPostCondition(address, TR::RealRegister::NoReg, cg);
9402
deps->addPostCondition(index, TR::RealRegister::NoReg, cg);
9403
deps->addPostCondition(length, TR::RealRegister::NoReg, cg);
9404
deps->addPostCondition(multiplierXMM, TR::RealRegister::NoReg, cg);
9405
deps->addPostCondition(tmpXMM, TR::RealRegister::NoReg, cg);
9406
deps->addPostCondition(hashXMM, TR::RealRegister::NoReg, cg);
9407
9408
generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, index, length, cg);
9409
generateRegImmInstruction(TR::InstOpCode::AND4RegImms, node, index, size-1, cg); // mod size
9410
generateRegMemInstruction(TR::InstOpCode::CMOVE4RegMem, node, index, generateX86MemoryReference(cg->findOrCreate4ByteConstant(node, size), cg), cg);
9411
9412
// Prepend zeros
9413
{
9414
TR::Compilation *comp = cg->comp();
9415
9416
static uint64_t MASKDECOMPRESSED[] = { 0x0000000000000000ULL, 0xffffffffffffffffULL };
9417
static uint64_t MASKCOMPRESSED[] = { 0xffffffff00000000ULL, 0x0000000000000000ULL };
9418
generateRegMemInstruction(isCompressed ? TR::InstOpCode::MOVDRegMem : TR::InstOpCode::MOVQRegMem, node, hashXMM, generateX86MemoryReference(address, index, shift, -(size << shift) + TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), cg);
9419
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tmp, generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, isCompressed ? MASKCOMPRESSED : MASKDECOMPRESSED), cg), cg);
9420
9421
auto mr = generateX86MemoryReference(tmp, index, shift, 0, cg);
9422
if (comp->target().cpu.supportsAVX())
9423
{
9424
generateRegMemInstruction(TR::InstOpCode::PANDRegMem, node, hashXMM, mr, cg);
9425
}
9426
else
9427
{
9428
generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, tmpXMM, mr, cg);
9429
generateRegRegInstruction(TR::InstOpCode::PANDRegReg, node, hashXMM, tmpXMM, cg);
9430
}
9431
generateRegRegInstruction(isCompressed ? TR::InstOpCode::PMOVZXBDRegReg : TR::InstOpCode::PMOVZXWDRegReg, node, hashXMM, hashXMM, cg);
9432
}
9433
9434
// Reduction Loop
9435
{
9436
static uint32_t multiplier[] = { 31*31*31*31, 31*31*31*31, 31*31*31*31, 31*31*31*31 };
9437
generateLabelInstruction(TR::InstOpCode::label, node, begLabel, cg);
9438
generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, index, length, cg);
9439
generateLabelInstruction(TR::InstOpCode::JGE4, node, endLabel, cg);
9440
generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, multiplierXMM, generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, multiplier), cg), cg);
9441
generateLabelInstruction(TR::InstOpCode::label, node, loopLabel, cg);
9442
generateRegRegInstruction(TR::InstOpCode::PMULLDRegReg, node, hashXMM, multiplierXMM, cg);
9443
generateRegMemInstruction(isCompressed ? TR::InstOpCode::PMOVZXBDRegMem : TR::InstOpCode::PMOVZXWDRegMem, node, tmpXMM, generateX86MemoryReference(address, index, shift, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), cg);
9444
generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, index, 4, cg);
9445
generateRegRegInstruction(TR::InstOpCode::PADDDRegReg, node, hashXMM, tmpXMM, cg);
9446
generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, index, length, cg);
9447
generateLabelInstruction(TR::InstOpCode::JL4, node, loopLabel, cg);
9448
generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);
9449
}
9450
9451
// Finalization
9452
{
9453
static uint32_t multiplier[] = { 31*31*31, 31*31, 31, 1 };
9454
generateRegMemInstruction(TR::InstOpCode::PMULLDRegMem, node, hashXMM, generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, multiplier), cg), cg);
9455
generateRegRegImmInstruction(TR::InstOpCode::PSHUFDRegRegImm1, node, tmpXMM, hashXMM, 0x0e, cg);
9456
generateRegRegInstruction(TR::InstOpCode::PADDDRegReg, node, hashXMM, tmpXMM, cg);
9457
generateRegRegImmInstruction(TR::InstOpCode::PSHUFDRegRegImm1, node, tmpXMM, hashXMM, 0x01, cg);
9458
generateRegRegInstruction(TR::InstOpCode::PADDDRegReg, node, hashXMM, tmpXMM, cg);
9459
}
9460
9461
generateRegRegInstruction(TR::InstOpCode::MOVDReg4Reg, node, hash, hashXMM, cg);
9462
9463
cg->stopUsingRegister(index);
9464
cg->stopUsingRegister(tmp);
9465
cg->stopUsingRegister(hashXMM);
9466
cg->stopUsingRegister(tmpXMM);
9467
cg->stopUsingRegister(multiplierXMM);
9468
9469
node->setRegister(hash);
9470
cg->decReferenceCount(node->getChild(0));
9471
cg->recursivelyDecReferenceCount(node->getChild(1));
9472
cg->decReferenceCount(node->getChild(2));
9473
return hash;
9474
}
9475
}
9476
9477
static bool
9478
getNodeIs64Bit(
9479
TR::Node *node,
9480
TR::CodeGenerator *cg)
9481
{
9482
/* This function is intended to allow existing 32-bit instruction selection code
9483
* to be reused, almost unchanged, to do the corresponding 64-bit logic on AMD64.
9484
* It compiles away to nothing on IA32, thus preserving performance and code size
9485
* on IA32, while allowing the logic to be generalized to suit AMD64.
9486
*
9487
* Don't use this function for 64-bit logic on IA32; instead, either (1) use
9488
* separate logic, or (2) use a different test for 64-bitness. Usually this is
9489
* not a hindrance, because 64-bit code on IA32 uses register pairs and other
9490
* things that are totally different from their 32-bit counterparts.
9491
*/
9492
9493
TR_ASSERT(cg->comp()->target().is64Bit() || node->getSize() <= 4, "64-bit nodes on 32-bit platforms shouldn't use getNodeIs64Bit");
9494
return cg->comp()->target().is64Bit() && node->getSize() > 4;
9495
}
9496
9497
static
9498
TR::Register *intOrLongClobberEvaluate(
9499
TR::Node *node,
9500
bool nodeIs64Bit,
9501
TR::CodeGenerator *cg)
9502
{
9503
if (nodeIs64Bit)
9504
{
9505
TR_ASSERT(getNodeIs64Bit(node, cg), "nodeIs64Bit must be consistent with node size");
9506
return cg->longClobberEvaluate(node);
9507
}
9508
else
9509
{
9510
TR_ASSERT(!getNodeIs64Bit(node, cg), "nodeIs64Bit must be consistent with node size");
9511
return cg->intClobberEvaluate(node);
9512
}
9513
}
9514
9515
/**
9516
* \brief
9517
* Generate inlined instructions equivalent to com/ibm/jit/JITHelpers.intrinsicIndexOfLatin1 or com/ibm/jit/JITHelpers.intrinsicIndexOfUTF16
9518
*
9519
* \param node
9520
* The tree node
9521
*
9522
* \param cg
9523
* The Code Generator
9524
*
9525
* \param isLatin1
9526
* True when the string is Latin1, False when the string is UTF16
9527
*
9528
* Note that this version does not support discontiguous arrays
9529
*/
9530
static TR::Register* inlineIntrinsicIndexOf(TR::Node* node, TR::CodeGenerator* cg, bool isLatin1)
9531
{
9532
static uint8_t MASKOFSIZEONE[] =
9533
{
9534
0x00, 0x00, 0x00, 0x00,
9535
0x00, 0x00, 0x00, 0x00,
9536
0x00, 0x00, 0x00, 0x00,
9537
0x00, 0x00, 0x00, 0x00,
9538
};
9539
static uint8_t MASKOFSIZETWO[] =
9540
{
9541
0x00, 0x01, 0x00, 0x01,
9542
0x00, 0x01, 0x00, 0x01,
9543
0x00, 0x01, 0x00, 0x01,
9544
0x00, 0x01, 0x00, 0x01,
9545
};
9546
9547
uint8_t width = 16;
9548
uint8_t shift = 0;
9549
uint8_t* shuffleMask = NULL;
9550
auto compareOp = TR::InstOpCode::bad;
9551
if(isLatin1)
9552
{
9553
shuffleMask = MASKOFSIZEONE;
9554
compareOp = TR::InstOpCode::PCMPEQBRegReg;
9555
shift = 0;
9556
}
9557
else
9558
{
9559
shuffleMask = MASKOFSIZETWO;
9560
compareOp = TR::InstOpCode::PCMPEQWRegReg;
9561
shift = 1;
9562
}
9563
9564
auto array = cg->evaluate(node->getChild(1));
9565
auto ch = cg->evaluate(node->getChild(2));
9566
auto offset = cg->evaluate(node->getChild(3));
9567
auto length = cg->evaluate(node->getChild(4));
9568
9569
auto ECX = cg->allocateRegister();
9570
auto result = cg->allocateRegister();
9571
auto scratch = cg->allocateRegister();
9572
auto scratchXMM = cg->allocateRegister(TR_VRF);
9573
auto valueXMM = cg->allocateRegister(TR_VRF);
9574
9575
auto dependencies = generateRegisterDependencyConditions((uint8_t)7, (uint8_t)7, cg);
9576
dependencies->addPreCondition(ECX, TR::RealRegister::ecx, cg);
9577
dependencies->addPreCondition(array, TR::RealRegister::NoReg, cg);
9578
dependencies->addPreCondition(length, TR::RealRegister::NoReg, cg);
9579
dependencies->addPreCondition(result, TR::RealRegister::NoReg, cg);
9580
dependencies->addPreCondition(scratch, TR::RealRegister::NoReg, cg);
9581
dependencies->addPreCondition(scratchXMM, TR::RealRegister::NoReg, cg);
9582
dependencies->addPreCondition(valueXMM, TR::RealRegister::NoReg, cg);
9583
dependencies->addPostCondition(ECX, TR::RealRegister::ecx, cg);
9584
dependencies->addPostCondition(array, TR::RealRegister::NoReg, cg);
9585
dependencies->addPostCondition(length, TR::RealRegister::NoReg, cg);
9586
dependencies->addPostCondition(result, TR::RealRegister::NoReg, cg);
9587
dependencies->addPostCondition(scratch, TR::RealRegister::NoReg, cg);
9588
dependencies->addPostCondition(scratchXMM, TR::RealRegister::NoReg, cg);
9589
dependencies->addPostCondition(valueXMM, TR::RealRegister::NoReg, cg);
9590
9591
auto begLabel = generateLabelSymbol(cg);
9592
auto endLabel = generateLabelSymbol(cg);
9593
auto loopLabel = generateLabelSymbol(cg);
9594
begLabel->setStartInternalControlFlow();
9595
endLabel->setEndInternalControlFlow();
9596
9597
generateRegRegInstruction(TR::InstOpCode::MOVDRegReg4, node, valueXMM, ch, cg);
9598
generateRegMemInstruction(TR::InstOpCode::PSHUFBRegMem, node, valueXMM, generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, shuffleMask), cg), cg);
9599
9600
generateRegRegInstruction(TR::InstOpCode::MOV4RegReg, node, result, offset, cg);
9601
9602
generateLabelInstruction(TR::InstOpCode::label, node, begLabel, cg);
9603
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, scratch, generateX86MemoryReference(array, result, shift, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), cg);
9604
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, ECX, scratch, cg);
9605
generateRegImmInstruction(TR::InstOpCode::ANDRegImms(), node, scratch, ~(width - 1), cg);
9606
generateRegImmInstruction(TR::InstOpCode::ANDRegImms(), node, ECX, width - 1, cg);
9607
generateLabelInstruction(TR::InstOpCode::JE1, node, loopLabel, cg);
9608
9609
generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, scratchXMM, generateX86MemoryReference(scratch, 0, cg), cg);
9610
generateRegRegInstruction(compareOp, node, scratchXMM, valueXMM, cg);
9611
generateRegRegInstruction(TR::InstOpCode::PMOVMSKB4RegReg, node, scratch, scratchXMM, cg);
9612
generateRegInstruction(TR::InstOpCode::SHR4RegCL, node, scratch, cg);
9613
generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, scratch, scratch, cg);
9614
generateLabelInstruction(TR::InstOpCode::JNE1, node, endLabel, cg);
9615
if (shift)
9616
{
9617
generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, ECX, shift, cg);
9618
}
9619
generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, result, width >> shift, cg);
9620
generateRegRegInstruction(TR::InstOpCode::SUB4RegReg, node, result, ECX, cg);
9621
generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, result, length, cg);
9622
generateLabelInstruction(TR::InstOpCode::JGE1, node, endLabel, cg);
9623
9624
generateLabelInstruction(TR::InstOpCode::label, node, loopLabel, cg);
9625
generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, scratchXMM, generateX86MemoryReference(array, result, shift, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), cg);
9626
generateRegRegInstruction(compareOp, node, scratchXMM, valueXMM, cg);
9627
generateRegRegInstruction(TR::InstOpCode::PMOVMSKB4RegReg, node, scratch, scratchXMM, cg);
9628
generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, scratch, scratch, cg);
9629
generateLabelInstruction(TR::InstOpCode::JNE1, node, endLabel, cg);
9630
generateRegImmInstruction(TR::InstOpCode::ADD4RegImms, node, result, width >> shift, cg);
9631
generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, result, length, cg);
9632
generateLabelInstruction(TR::InstOpCode::JL1, node, loopLabel, cg);
9633
generateLabelInstruction(TR::InstOpCode::label, node, endLabel, dependencies, cg);
9634
9635
generateRegRegInstruction(TR::InstOpCode::BSF4RegReg, node, scratch, scratch, cg);
9636
if (shift)
9637
{
9638
generateRegImmInstruction(TR::InstOpCode::SHR4RegImm1, node, scratch, shift, cg);
9639
}
9640
generateRegRegInstruction(TR::InstOpCode::ADDRegReg(), node, result, scratch, cg);
9641
generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, result, length, cg);
9642
generateRegMemInstruction(TR::InstOpCode::CMOVGERegMem(), node, result, generateX86MemoryReference(cg->comp()->target().is32Bit() ? cg->findOrCreate4ByteConstant(node, -1) : cg->findOrCreate8ByteConstant(node, -1), cg), cg);
9643
9644
cg->stopUsingRegister(ECX);
9645
cg->stopUsingRegister(scratch);
9646
cg->stopUsingRegister(scratchXMM);
9647
cg->stopUsingRegister(valueXMM);
9648
9649
9650
node->setRegister(result);
9651
cg->recursivelyDecReferenceCount(node->getChild(0));
9652
cg->decReferenceCount(node->getChild(1));
9653
cg->decReferenceCount(node->getChild(2));
9654
cg->decReferenceCount(node->getChild(3));
9655
cg->decReferenceCount(node->getChild(4));
9656
return result;
9657
}
9658
9659
/**
9660
* \brief
9661
* Generate inlined instructions equivalent to sun/misc/Unsafe.compareAndSwapObject or jdk/internal/misc/Unsafe.compareAndSwapObject
9662
*
9663
* \param node
9664
* The tree node
9665
*
9666
* \param cg
9667
* The Code Generator
9668
*
9669
*/
9670
static TR::Register* inlineCompareAndSwapObjectNative(TR::Node* node, TR::CodeGenerator* cg)
9671
{
9672
TR::Compilation *comp = cg->comp();
9673
9674
TR_ASSERT(!TR::Compiler->om.canGenerateArraylets() || node->isUnsafeGetPutCASCallOnNonArray(), "This evaluator does not support arraylets.");
9675
9676
cg->recursivelyDecReferenceCount(node->getChild(0)); // The Unsafe
9677
TR::Node* objectNode = node->getChild(1);
9678
TR::Node* offsetNode = node->getChild(2);
9679
TR::Node* oldValueNode = node->getChild(3);
9680
TR::Node* newValueNode = node->getChild(4);
9681
9682
TR::Register* object = cg->evaluate(objectNode);
9683
TR::Register* offset = cg->evaluate(offsetNode);
9684
TR::Register* oldValue = cg->evaluate(oldValueNode);
9685
TR::Register* newValue = cg->evaluate(newValueNode);
9686
TR::Register* result = cg->allocateRegister();
9687
TR::Register* EAX = cg->allocateRegister();
9688
TR::Register* tmp = cg->allocateRegister();
9689
9690
bool use64BitClasses = comp->target().is64Bit() && !comp->useCompressedPointers();
9691
9692
if (comp->target().is32Bit())
9693
{
9694
// Assume that the offset is positive and not pathologically large (i.e., > 2^31).
9695
offset = offset->getLowOrder();
9696
}
9697
9698
#if defined(OMR_GC_CONCURRENT_SCAVENGER)
9699
switch (TR::Compiler->om.readBarrierType())
9700
{
9701
case gc_modron_readbar_none:
9702
break;
9703
case gc_modron_readbar_always:
9704
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tmp, generateX86MemoryReference(object, offset, 0, cg), cg);
9705
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), tmp, cg);
9706
generateHelperCallInstruction(node, TR_softwareReadBarrier, NULL, cg);
9707
break;
9708
case gc_modron_readbar_range_check:
9709
{
9710
generateRegMemInstruction(TR::InstOpCode::LRegMem(use64BitClasses), node, tmp, generateX86MemoryReference(object, offset, 0, cg), cg);
9711
9712
TR::LabelSymbol* begLabel = generateLabelSymbol(cg);
9713
TR::LabelSymbol* endLabel = generateLabelSymbol(cg);
9714
TR::LabelSymbol* rdbarLabel = generateLabelSymbol(cg);
9715
begLabel->setStartInternalControlFlow();
9716
endLabel->setEndInternalControlFlow();
9717
9718
TR::RegisterDependencyConditions* deps = generateRegisterDependencyConditions((uint8_t)1, 1, cg);
9719
deps->addPreCondition(tmp, TR::RealRegister::NoReg, cg);
9720
deps->addPostCondition(tmp, TR::RealRegister::NoReg, cg);
9721
9722
generateLabelInstruction(TR::InstOpCode::label, node, begLabel, cg);
9723
9724
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(use64BitClasses), node, tmp, generateX86MemoryReference(cg->getVMThreadRegister(), comp->fej9()->thisThreadGetEvacuateBaseAddressOffset(), cg), cg);
9725
generateLabelInstruction(TR::InstOpCode::JAE4, node, rdbarLabel, cg);
9726
9727
{
9728
TR_OutlinedInstructionsGenerator og(rdbarLabel, node, cg);
9729
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(use64BitClasses), node, tmp, generateX86MemoryReference(cg->getVMThreadRegister(), comp->fej9()->thisThreadGetEvacuateTopAddressOffset(), cg), cg);
9730
generateLabelInstruction(TR::InstOpCode::JA4, node, endLabel, cg);
9731
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, tmp, generateX86MemoryReference(object, offset, 0, cg), cg);
9732
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), tmp, cg);
9733
generateHelperCallInstruction(node, TR_softwareReadBarrier, NULL, cg);
9734
generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);
9735
9736
og.endOutlinedInstructionSequence();
9737
}
9738
9739
generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);
9740
}
9741
break;
9742
default:
9743
TR_ASSERT(false, "Unsupported Read Barrier Type.");
9744
break;
9745
}
9746
#endif
9747
9748
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, EAX, oldValue, cg);
9749
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, tmp, newValue, cg);
9750
if (TR::Compiler->om.compressedReferenceShiftOffset() != 0)
9751
{
9752
if (!oldValueNode->isNull())
9753
{
9754
generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(), node, EAX, TR::Compiler->om.compressedReferenceShiftOffset(), cg);
9755
}
9756
if (!newValueNode->isNull())
9757
{
9758
generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(), node, tmp, TR::Compiler->om.compressedReferenceShiftOffset(), cg);
9759
}
9760
}
9761
9762
TR::RegisterDependencyConditions* deps = generateRegisterDependencyConditions((uint8_t)1, 1, cg);
9763
deps->addPreCondition(EAX, TR::RealRegister::eax, cg);
9764
deps->addPostCondition(EAX, TR::RealRegister::eax, cg);
9765
generateMemRegInstruction(use64BitClasses ? TR::InstOpCode::LCMPXCHG8MemReg : TR::InstOpCode::LCMPXCHG4MemReg, node, generateX86MemoryReference(object, offset, 0, cg), tmp, deps, cg);
9766
generateRegInstruction(TR::InstOpCode::SETE1Reg, node, result, cg);
9767
generateRegRegInstruction(TR::InstOpCode::MOVZXReg4Reg1, node, result, result, cg);
9768
9769
// We could insert a runtime test for whether the write actually succeeded or not.
9770
// However, since in practice it will almost always succeed we do not want to
9771
// penalize general runtime performance especially if it is still correct to do
9772
// a write barrier even if the store never actually happened.
9773
TR::TreeEvaluator::VMwrtbarWithoutStoreEvaluator(node, objectNode, newValueNode, NULL, cg->generateScratchRegisterManager(), cg);
9774
9775
cg->stopUsingRegister(tmp);
9776
cg->stopUsingRegister(EAX);
9777
node->setRegister(result);
9778
for (int32_t i = 1; i < node->getNumChildren(); i++)
9779
{
9780
cg->decReferenceCount(node->getChild(i));
9781
}
9782
return result;
9783
}
9784
9785
/** Replaces a call to an Unsafe CAS method with inline instructions.
9786
@return true if the call was replaced, false if it was not.
9787
9788
Note that this function must have behaviour consistent with the OMR function
9789
willNotInlineCompareAndSwapNative in omr/compiler/x/codegen/OMRCodeGenerator.cpp
9790
*/
9791
static bool
9792
inlineCompareAndSwapNative(
9793
TR::Node *node,
9794
int8_t size,
9795
bool isObject,
9796
TR::CodeGenerator *cg)
9797
{
9798
TR::Node *firstChild = node->getFirstChild();
9799
TR::Node *objectChild = node->getSecondChild();
9800
TR::Node *offsetChild = node->getChild(2);
9801
TR::Node *oldValueChild = node->getChild(3);
9802
TR::Node *newValueChild = node->getChild(4);
9803
TR::Compilation *comp = cg->comp();
9804
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
9805
9806
TR::InstOpCode::Mnemonic op;
9807
9808
if (TR::Compiler->om.canGenerateArraylets() && !node->isUnsafeGetPutCASCallOnNonArray())
9809
return false;
9810
9811
static char *disableCASInlining = feGetEnv("TR_DisableCASInlining");
9812
9813
if (disableCASInlining /* || comp->useCompressedPointers() */)
9814
return false;
9815
9816
// size = 4 --> CMPXCHG4
9817
// size = 8 --> if 64-bit -> CMPXCHG8
9818
// else if proc supports CMPXCHG8B -> CMPXCHG8B
9819
// else return false
9820
//
9821
// Do this early so we can return early without additional evaluations.
9822
//
9823
if (size == 4)
9824
{
9825
op = TR::InstOpCode::LCMPXCHG4MemReg;
9826
}
9827
else if (size == 8 && comp->target().is64Bit())
9828
{
9829
op = TR::InstOpCode::LCMPXCHG8MemReg;
9830
}
9831
else
9832
{
9833
if (!comp->target().cpu.supportsFeature(OMR_FEATURE_X86_CX8))
9834
return false;
9835
9836
op = TR::InstOpCode::LCMPXCHG8BMem;
9837
}
9838
9839
// In Java9 the sun.misc.Unsafe JNI methods have been moved to jdk.internal,
9840
// with a set of wrappers remaining in sun.misc to delegate to the new package.
9841
// We can be called in this function for the wrappers (which we will
9842
// not be converting to assembly), the new jdk.internal JNI methods or the
9843
// Java8 sun.misc JNI methods (both of which we will convert). We can
9844
// differentiate between these cases by testing with isNative() on the method.
9845
{
9846
TR::MethodSymbol *methodSymbol = node->getSymbol()->getMethodSymbol();
9847
if (methodSymbol && !methodSymbol->isNative())
9848
return false;
9849
}
9850
9851
cg->recursivelyDecReferenceCount(firstChild);
9852
9853
TR::Register *objectReg = cg->evaluate(objectChild);
9854
9855
TR::Register *offsetReg = NULL;
9856
int32_t offset = 0;
9857
9858
if (offsetChild->getOpCode().isLoadConst() && !offsetChild->getRegister() && IS_32BIT_SIGNED(offsetChild->getLongInt()))
9859
{
9860
offset = (int32_t)(offsetChild->getLongInt());
9861
}
9862
else
9863
{
9864
offsetReg = cg->evaluate(offsetChild);
9865
9866
// Assume that the offset is positive and not pathologically large (i.e., > 2^31).
9867
//
9868
if (comp->target().is32Bit())
9869
offsetReg = offsetReg->getLowOrder();
9870
}
9871
cg->decReferenceCount(offsetChild);
9872
9873
TR::MemoryReference *mr;
9874
9875
if (offsetReg)
9876
mr = generateX86MemoryReference(objectReg, offsetReg, 0, cg);
9877
else
9878
mr = generateX86MemoryReference(objectReg, offset, cg);
9879
9880
bool bumpedRefCount = false;
9881
TR::Node *translatedNode = newValueChild;
9882
if (comp->useCompressedPointers() &&
9883
isObject &&
9884
(newValueChild->getDataType() != TR::Address))
9885
{
9886
bool useShiftedOffsets = (TR::Compiler->om.compressedReferenceShiftOffset() != 0);
9887
9888
translatedNode = newValueChild;
9889
if (translatedNode->getOpCode().isConversion())
9890
translatedNode = translatedNode->getFirstChild();
9891
if (translatedNode->getOpCode().isRightShift()) // optional
9892
translatedNode = translatedNode->getFirstChild();
9893
9894
translatedNode = newValueChild;
9895
if (useShiftedOffsets)
9896
{
9897
while ((translatedNode->getNumChildren() > 0) &&
9898
(translatedNode->getOpCodeValue() != TR::a2l))
9899
translatedNode = translatedNode->getFirstChild();
9900
9901
if (translatedNode->getOpCodeValue() == TR::a2l)
9902
translatedNode = translatedNode->getFirstChild();
9903
9904
// this is required so that different registers are
9905
// allocated for the actual store and translated values
9906
bumpedRefCount = true;
9907
translatedNode->incReferenceCount();
9908
}
9909
}
9910
9911
TR::Register *newValueRegister = cg->evaluate(newValueChild);
9912
9913
TR::Register *oldValueRegister = (size == 8) ?
9914
cg->longClobberEvaluate(oldValueChild) : cg->intClobberEvaluate(oldValueChild);
9915
bool killOldValueRegister = (oldValueChild->getReferenceCount() > 1) ? true : false;
9916
cg->decReferenceCount(oldValueChild);
9917
9918
TR::RegisterDependencyConditions *deps;
9919
TR_X86ScratchRegisterManager *scratchRegisterManagerForRealTime = NULL;
9920
TR::Register *storeAddressRegForRealTime = NULL;
9921
9922
if (comp->getOptions()->realTimeGC() && isObject)
9923
{
9924
scratchRegisterManagerForRealTime = cg->generateScratchRegisterManager();
9925
9926
// If reference is unresolved, need to resolve it right here before the barrier starts
9927
// Otherwise, we could get stopped during the resolution and that could invalidate any tests we would have performend
9928
// beforehand
9929
// For simplicity, just evaluate the store address into storeAddressRegForRealTime right now
9930
storeAddressRegForRealTime = scratchRegisterManagerForRealTime->findOrCreateScratchRegister();
9931
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, storeAddressRegForRealTime, mr, cg);
9932
if (node->getSymbolReference()->isUnresolved())
9933
{
9934
TR::TreeEvaluator::padUnresolvedDataReferences(node, *node->getSymbolReference(), cg);
9935
9936
// storeMR was created against a (i)wrtbar node which is a store. The unresolved data snippet that
9937
// was created set the checkVolatility bit based on that node being a store. Since the resolution
9938
// is now going to occur on a LEA instruction, which does not require any memory fence and hence
9939
// no volatility check, we need to clear that "store" ness of the unresolved data snippet
9940
TR::UnresolvedDataSnippet *snippet = mr->getUnresolvedDataSnippet();
9941
if (snippet)
9942
snippet->resetUnresolvedStore();
9943
}
9944
9945
TR::TreeEvaluator::VMwrtbarRealTimeWithoutStoreEvaluator(
9946
node,
9947
mr,
9948
storeAddressRegForRealTime,
9949
objectChild,
9950
translatedNode,
9951
NULL,
9952
scratchRegisterManagerForRealTime,
9953
cg);
9954
}
9955
9956
TR::MemoryReference *cmpxchgMR = mr;
9957
9958
if (op == TR::InstOpCode::LCMPXCHG8BMem)
9959
{
9960
int numDeps = 4;
9961
if (storeAddressRegForRealTime != NULL)
9962
{
9963
numDeps++;
9964
cmpxchgMR = generateX86MemoryReference(storeAddressRegForRealTime, 0, cg);
9965
}
9966
9967
if (scratchRegisterManagerForRealTime)
9968
numDeps += scratchRegisterManagerForRealTime->numAvailableRegisters();
9969
9970
deps = generateRegisterDependencyConditions(numDeps, numDeps, cg);
9971
deps->addPreCondition(oldValueRegister->getLowOrder(), TR::RealRegister::eax, cg);
9972
deps->addPreCondition(oldValueRegister->getHighOrder(), TR::RealRegister::edx, cg);
9973
deps->addPreCondition(newValueRegister->getLowOrder(), TR::RealRegister::ebx, cg);
9974
deps->addPreCondition(newValueRegister->getHighOrder(), TR::RealRegister::ecx, cg);
9975
deps->addPostCondition(oldValueRegister->getLowOrder(), TR::RealRegister::eax, cg);
9976
deps->addPostCondition(oldValueRegister->getHighOrder(), TR::RealRegister::edx, cg);
9977
deps->addPostCondition(newValueRegister->getLowOrder(), TR::RealRegister::ebx, cg);
9978
deps->addPostCondition(newValueRegister->getHighOrder(), TR::RealRegister::ecx, cg);
9979
9980
if (scratchRegisterManagerForRealTime)
9981
scratchRegisterManagerForRealTime->addScratchRegistersToDependencyList(deps);
9982
9983
deps->stopAddingConditions();
9984
9985
generateMemInstruction(op, node, cmpxchgMR, deps, cg);
9986
}
9987
else
9988
{
9989
int numDeps = 1;
9990
if (storeAddressRegForRealTime != NULL)
9991
{
9992
numDeps++;
9993
cmpxchgMR = generateX86MemoryReference(storeAddressRegForRealTime, 0, cg);
9994
}
9995
9996
if (scratchRegisterManagerForRealTime)
9997
numDeps += scratchRegisterManagerForRealTime->numAvailableRegisters();
9998
9999
deps = generateRegisterDependencyConditions(numDeps, numDeps, cg);
10000
deps->addPreCondition(oldValueRegister, TR::RealRegister::eax, cg);
10001
deps->addPostCondition(oldValueRegister, TR::RealRegister::eax, cg);
10002
10003
if (scratchRegisterManagerForRealTime)
10004
scratchRegisterManagerForRealTime->addScratchRegistersToDependencyList(deps);
10005
10006
deps->stopAddingConditions();
10007
10008
generateMemRegInstruction(op, node, cmpxchgMR, newValueRegister, deps, cg);
10009
}
10010
10011
if (killOldValueRegister)
10012
cg->stopUsingRegister(oldValueRegister);
10013
10014
if (storeAddressRegForRealTime)
10015
scratchRegisterManagerForRealTime->reclaimScratchRegister(storeAddressRegForRealTime);
10016
10017
TR::Register *resultReg = cg->allocateRegister();
10018
generateRegInstruction(TR::InstOpCode::SETE1Reg, node, resultReg, cg);
10019
generateRegRegInstruction(TR::InstOpCode::MOVZXReg4Reg1, node, resultReg, resultReg, cg);
10020
10021
// Non-realtime: Generate a write barrier for this kind of object.
10022
//
10023
if (!comp->getOptions()->realTimeGC() && isObject)
10024
{
10025
// We could insert a runtime test for whether the write actually succeeded or not.
10026
// However, since in practice it will almost always succeed we do not want to
10027
// penalize general runtime performance especially if it is still correct to do
10028
// a write barrier even if the store never actually happened.
10029
//
10030
// A branch
10031
//
10032
TR_X86ScratchRegisterManager *scratchRegisterManager = cg->generateScratchRegisterManager();
10033
10034
TR::TreeEvaluator::VMwrtbarWithoutStoreEvaluator(
10035
node,
10036
objectChild,
10037
translatedNode,
10038
NULL,
10039
scratchRegisterManager,
10040
cg);
10041
}
10042
10043
node->setRegister(resultReg);
10044
10045
cg->decReferenceCount(newValueChild);
10046
cg->decReferenceCount(objectChild);
10047
if (bumpedRefCount)
10048
cg->decReferenceCount(translatedNode);
10049
10050
return true;
10051
}
10052
10053
10054
// Generate inline code if possible for a call to an inline method. The call
10055
// may be direct or indirect; if it is indirect a guard will be generated around
10056
// the inline code and a fall-back to the indirect call.
10057
// Returns true if the call was inlined, otherwise a regular call sequence must
10058
// be issued by the caller of this method.
10059
//
10060
bool J9::X86::TreeEvaluator::VMinlineCallEvaluator(
10061
TR::Node *node,
10062
bool isIndirect,
10063
TR::CodeGenerator *cg)
10064
{
10065
TR::MethodSymbol *methodSymbol = node->getSymbol()->castToMethodSymbol();
10066
TR::ResolvedMethodSymbol *resolvedMethodSymbol = node->getSymbol()->getResolvedMethodSymbol();
10067
10068
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
10069
10070
bool callWasInlined = false;
10071
TR::Compilation *comp = cg->comp();
10072
10073
if (methodSymbol)
10074
{
10075
switch (methodSymbol->getRecognizedMethod())
10076
{
10077
case TR::sun_nio_ch_NativeThread_current:
10078
// The spec says that on systems that do not require signaling
10079
// that this method should return -1. I'm not sure what do realtime
10080
// systems do here
10081
if (!comp->getOptions()->realTimeGC() && node->getNumChildren()>0)
10082
{
10083
TR::Register *nativeThreadReg = cg->allocateRegister();
10084
TR::Register *nativeThreadRegHigh = NULL;
10085
TR::Register *vmThreadReg = cg->getVMThreadRegister();
10086
int32_t numDeps = 2;
10087
10088
if (comp->target().is32Bit())
10089
{
10090
nativeThreadRegHigh = cg->allocateRegister();
10091
numDeps ++;
10092
}
10093
10094
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, (uint8_t)numDeps, cg);
10095
deps->addPostCondition(nativeThreadReg, TR::RealRegister::NoReg, cg);
10096
if (comp->target().is32Bit())
10097
{
10098
deps->addPostCondition(nativeThreadRegHigh, TR::RealRegister::NoReg, cg);
10099
}
10100
deps->addPostCondition(vmThreadReg, TR::RealRegister::ebp, cg);
10101
10102
if (comp->target().is64Bit())
10103
{
10104
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
10105
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
10106
startLabel->setStartInternalControlFlow();
10107
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
10108
10109
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, nativeThreadReg,
10110
generateX86MemoryReference(vmThreadReg, fej9->thisThreadOSThreadOffset(), cg), cg);
10111
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, nativeThreadReg,
10112
generateX86MemoryReference(nativeThreadReg, offsetof(J9Thread, handle), cg), cg);
10113
doneLabel->setEndInternalControlFlow();
10114
generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, deps, cg);
10115
}
10116
else
10117
{
10118
TR::MemoryReference *lowMR = generateX86MemoryReference(vmThreadReg, fej9->thisThreadOSThreadOffset(), cg);
10119
TR::MemoryReference *highMR = generateX86MemoryReference(*lowMR, 4, cg);
10120
10121
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
10122
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
10123
startLabel->setStartInternalControlFlow();
10124
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
10125
10126
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, nativeThreadReg, lowMR, cg);
10127
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, nativeThreadRegHigh, highMR, cg);
10128
10129
TR::MemoryReference *lowHandleMR = generateX86MemoryReference(nativeThreadReg, offsetof(J9Thread, handle), cg);
10130
TR::MemoryReference *highHandleMR = generateX86MemoryReference(*lowMR, 4, cg);
10131
10132
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, nativeThreadReg, lowHandleMR, cg);
10133
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, nativeThreadRegHigh, highHandleMR, cg);
10134
10135
doneLabel->setEndInternalControlFlow();
10136
generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, deps, cg);
10137
}
10138
10139
if (comp->target().is32Bit())
10140
{
10141
TR::RegisterPair *longRegister = cg->allocateRegisterPair(nativeThreadReg, nativeThreadRegHigh);
10142
node->setRegister(longRegister);
10143
}
10144
else
10145
{
10146
node->setRegister(nativeThreadReg);
10147
}
10148
cg->recursivelyDecReferenceCount(node->getFirstChild());
10149
return true;
10150
}
10151
return false; // Call the native version of NativeThread.current()
10152
case TR::jdk_internal_misc_Unsafe_copyMemory0:
10153
case TR::sun_misc_Unsafe_copyMemory:
10154
{
10155
if (comp->canTransformUnsafeCopyToArrayCopy()
10156
&& methodSymbol->isNative()
10157
&& performTransformation(comp, "O^O Call arraycopy instead of Unsafe.copyMemory: %s\n", cg->getDebug()->getName(node)))
10158
{
10159
TR::Node *src = node->getChild(1);
10160
TR::Node *srcOffset = node->getChild(2);
10161
TR::Node *dest = node->getChild(3);
10162
TR::Node *destOffset = node->getChild(4);
10163
TR::Node *len = node->getChild(5);
10164
10165
if (comp->target().is32Bit())
10166
{
10167
srcOffset = TR::Node::create(TR::l2i, 1, srcOffset);
10168
destOffset = TR::Node::create(TR::l2i, 1, destOffset);
10169
len = TR::Node::create(TR::l2i, 1, len);
10170
src = TR::Node::create(TR::aiadd, 2, src, srcOffset);
10171
dest = TR::Node::create(TR::aiadd, 2, dest, destOffset);
10172
}
10173
else
10174
{
10175
src = TR::Node::create(TR::aladd, 2, src, srcOffset);
10176
dest = TR::Node::create(TR::aladd, 2, dest, destOffset);
10177
}
10178
10179
TR::Node *arraycopyNode = TR::Node::createArraycopy(src, dest, len);
10180
TR::TreeEvaluator::arraycopyEvaluator(arraycopyNode,cg);
10181
10182
if (node->getChild(0)->getRegister())
10183
cg->decReferenceCount(node->getChild(0));
10184
else
10185
node->getChild(0)->recursivelyDecReferenceCount();
10186
10187
cg->decReferenceCount(node->getChild(1));
10188
cg->decReferenceCount(node->getChild(2));
10189
cg->decReferenceCount(node->getChild(3));
10190
cg->decReferenceCount(node->getChild(4));
10191
cg->decReferenceCount(node->getChild(5));
10192
10193
return true;
10194
}
10195
return false; // Perform the original Unsafe.copyMemory call
10196
}
10197
case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z:
10198
{
10199
if(node->isSafeForCGToFastPathUnsafeCall())
10200
return inlineCompareAndSwapNative(node, 4, false, cg);
10201
}
10202
break;
10203
case TR::sun_misc_Unsafe_compareAndSwapLong_jlObjectJJJ_Z:
10204
{
10205
if(node->isSafeForCGToFastPathUnsafeCall())
10206
return inlineCompareAndSwapNative(node, 8, false, cg);
10207
}
10208
break;
10209
case TR::sun_misc_Unsafe_compareAndSwapObject_jlObjectJjlObjectjlObject_Z:
10210
{
10211
static bool UseOldCompareAndSwapObject = (bool)feGetEnv("TR_UseOldCompareAndSwapObject");
10212
if(node->isSafeForCGToFastPathUnsafeCall())
10213
{
10214
if (UseOldCompareAndSwapObject)
10215
return inlineCompareAndSwapNative(node, (comp->target().is64Bit() && !comp->useCompressedPointers()) ? 8 : 4, true, cg);
10216
else
10217
{
10218
inlineCompareAndSwapObjectNative(node, cg);
10219
return true;
10220
}
10221
}
10222
}
10223
break;
10224
10225
case TR::java_util_concurrent_atomic_Fences_reachabilityFence:
10226
{
10227
cg->decReferenceCount(node->getChild(0));
10228
break;
10229
}
10230
10231
case TR::java_util_concurrent_atomic_Fences_orderAccesses:
10232
{
10233
if (comp->target().cpu.supportsMFence())
10234
{
10235
TR::InstOpCode fenceOp;
10236
fenceOp.setOpCodeValue(TR::InstOpCode::MFENCE);
10237
generateInstruction(fenceOp.getOpCodeValue(), node, cg);
10238
}
10239
10240
cg->decReferenceCount(node->getChild(0));
10241
break;
10242
}
10243
10244
case TR::java_util_concurrent_atomic_Fences_orderReads:
10245
{
10246
if (comp->target().cpu.requiresLFence() &&
10247
comp->target().cpu.supportsLFence())
10248
{
10249
TR::InstOpCode fenceOp;
10250
fenceOp.setOpCodeValue(TR::InstOpCode::LFENCE);
10251
generateInstruction(fenceOp.getOpCodeValue(), node, cg);
10252
}
10253
10254
cg->decReferenceCount(node->getChild(0));
10255
break;
10256
}
10257
10258
case TR::java_util_concurrent_atomic_Fences_orderWrites:
10259
{
10260
if (comp->target().cpu.supportsSFence())
10261
{
10262
TR::InstOpCode fenceOp;
10263
fenceOp.setOpCodeValue(TR::InstOpCode::SFENCE);
10264
generateInstruction(fenceOp.getOpCodeValue(), node, cg);
10265
}
10266
10267
cg->decReferenceCount(node->getChild(0));
10268
break;
10269
}
10270
10271
case TR::java_lang_Object_clone:
10272
{
10273
return (objectCloneEvaluator(node, cg) != NULL);
10274
break;
10275
}
10276
default:
10277
break;
10278
}
10279
}
10280
10281
if (!resolvedMethodSymbol)
10282
return false;
10283
10284
if (resolvedMethodSymbol)
10285
{
10286
switch (resolvedMethodSymbol->getRecognizedMethod())
10287
{
10288
#ifdef LINUX
10289
case TR::java_lang_System_nanoTime:
10290
{
10291
TR_ASSERT(!isIndirect, "Indirect call to nanoTime");
10292
callWasInlined = inlineNanoTime(node, cg);
10293
break;
10294
}
10295
#endif
10296
default:
10297
break;
10298
}
10299
}
10300
10301
return callWasInlined;
10302
}
10303
10304
10305
/**
10306
* \brief
10307
* Generate instructions to conditionally branch to a write barrier helper call
10308
*
10309
* \oaram branchOp
10310
* The branch instruction to jump to the write barrier helper call
10311
*
10312
* \param node
10313
* The write barrier node
10314
*
10315
* \param gcMode
10316
* The GC Mode
10317
*
10318
* \param owningObjectReg
10319
* The register holding the owning object
10320
*
10321
* \param sourceReg
10322
* The register holding the source object
10323
*
10324
* \param doneLabel
10325
* The label to jump to when returning from the write barrier helper
10326
*
10327
* \param cg
10328
* The Code Generator
10329
*
10330
* Note that RealTimeGC is handled separately in a different method.
10331
*/
10332
static void generateWriteBarrierCall(
10333
TR::InstOpCode::Mnemonic branchOp,
10334
TR::Node* node,
10335
MM_GCWriteBarrierType gcMode,
10336
TR::Register* owningObjectReg,
10337
TR::Register* sourceReg,
10338
TR::LabelSymbol* doneLabel,
10339
TR::CodeGenerator* cg)
10340
{
10341
TR::Compilation *comp = cg->comp();
10342
TR_ASSERT(gcMode != gc_modron_wrtbar_satb && !comp->getOptions()->realTimeGC(), "This helper is not for RealTimeGC.");
10343
10344
uint8_t helperArgCount = 0; // Number of arguments passed on the runtime helper.
10345
TR::SymbolReference *wrtBarSymRef = NULL;
10346
10347
if (node->getOpCodeValue() == TR::arraycopy)
10348
{
10349
wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierBatchStoreSymbolRef();
10350
helperArgCount = 1;
10351
}
10352
else if (gcMode == gc_modron_wrtbar_cardmark_and_oldcheck)
10353
{
10354
wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreGenerationalAndConcurrentMarkSymbolRef();
10355
helperArgCount = 2;
10356
}
10357
else if (gcMode == gc_modron_wrtbar_always)
10358
{
10359
wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef();
10360
helperArgCount = 2;
10361
}
10362
else if (comp->generateArraylets())
10363
{
10364
wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef();
10365
helperArgCount = 2;
10366
}
10367
else
10368
{
10369
// Default case is a generational barrier (non-concurrent).
10370
//
10371
wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreGenerationalSymbolRef();
10372
helperArgCount = 2;
10373
}
10374
10375
TR::LabelSymbol* wrtBarLabel = generateLabelSymbol(cg);
10376
10377
generateLabelInstruction(branchOp, node, wrtBarLabel, cg);
10378
10379
TR_OutlinedInstructionsGenerator og(wrtBarLabel, node, cg);
10380
10381
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), owningObjectReg, cg);
10382
if (helperArgCount > 1)
10383
{
10384
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp2), cg), sourceReg, cg);
10385
}
10386
generateImmSymInstruction(TR::InstOpCode::CALLImm4, node, (uintptr_t)wrtBarSymRef->getMethodAddress(), wrtBarSymRef, cg);
10387
generateLabelInstruction(TR::InstOpCode::JMP4, node, doneLabel, cg);
10388
10389
og.endOutlinedInstructionSequence();
10390
}
10391
10392
static void reportFlag(bool value, char *name, TR::CodeGenerator *cg)
10393
{
10394
if (value)
10395
traceMsg(cg->comp(), " %s", name);
10396
}
10397
10398
static int32_t byteOffsetForMask(int32_t mask, TR::CodeGenerator *cg)
10399
{
10400
int32_t result;
10401
for (result = 3; result >= 0; --result)
10402
{
10403
int32_t shift = 8*result;
10404
if ( ((mask>>shift)<<shift) == mask )
10405
break;
10406
}
10407
10408
if (result != -1
10409
&& performTransformation(cg->comp(), "O^O TREE EVALUATION: Use 1-byte TEST with offset %d for mask %08x\n", result, mask))
10410
return result;
10411
10412
return -1;
10413
}
10414
10415
10416
#define REPORT_FLAG(name) reportFlag((name), #name, cg)
10417
10418
void J9::X86::TreeEvaluator::VMwrtbarRealTimeWithoutStoreEvaluator(
10419
TR::Node *node,
10420
TR::MemoryReference *storeMRForRealTime, // RTJ only
10421
TR::Register *storeAddressRegForRealTime, // RTJ only
10422
TR::Node *destOwningObject, // only NULL for ME, always evaluated except for AC (evaluated below)
10423
TR::Node *sourceObject, // NULL for ME and AC(Array Copy?)
10424
TR::Register *srcReg, // should only be provided when sourceObject == NULL (ME Multimidlet)
10425
TR_X86ScratchRegisterManager *srm,
10426
TR::CodeGenerator *cg)
10427
{
10428
TR::Compilation *comp = cg->comp();
10429
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
10430
TR_ASSERT(comp->getOptions()->realTimeGC(),"Call the non real-time barrier");
10431
auto gcMode = TR::Compiler->om.writeBarrierType();
10432
10433
if (node->getOpCode().isWrtBar() && node->skipWrtBar())
10434
gcMode = gc_modron_wrtbar_none;
10435
else if ((node->getOpCodeValue() == TR::ArrayStoreCHK) &&
10436
node->getFirstChild()->getOpCode().isWrtBar() &&
10437
node->getFirstChild()->skipWrtBar())
10438
gcMode = gc_modron_wrtbar_none;
10439
10440
// PR98283: it is not acceptable to emit a label symbol twice so always generate a new label here
10441
// we can clean up the API later in a less risky manner
10442
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
10443
10444
// srcReg could only be NULL at this point for arraycopy
10445
if (sourceObject)
10446
{
10447
TR_ASSERT(!srcReg, "assertion failure");
10448
srcReg = sourceObject->getRegister();
10449
TR_ASSERT(srcReg, "assertion failure");
10450
} //
10451
10452
TR::Node *wrtbarNode;
10453
switch (node->getOpCodeValue())
10454
{
10455
case TR::ArrayStoreCHK:
10456
wrtbarNode = node->getFirstChild();
10457
break;
10458
case TR::arraycopy:
10459
wrtbarNode = NULL;
10460
break;
10461
case TR::awrtbari:
10462
case TR::awrtbar:
10463
wrtbarNode = node;
10464
break;
10465
default:
10466
wrtbarNode = NULL;
10467
break;
10468
}
10469
10470
bool doInternalControlFlow;
10471
10472
if (node->getOpCodeValue() == TR::ArrayStoreCHK)
10473
{
10474
// TR::ArrayStoreCHK will create its own internal control flow.
10475
//
10476
doInternalControlFlow = false;
10477
}
10478
else
10479
{
10480
doInternalControlFlow = true;
10481
}
10482
10483
if (comp->getOption(TR_TraceCG) /*&& comp->getOption(TR_TraceOptDetails)*/)
10484
{
10485
traceMsg(comp, " | Real Time Write barrier info:\n");
10486
traceMsg(comp, " | GC mode = %d:%s\n", gcMode, cg->getDebug()->getWriteBarrierKindName(gcMode));
10487
traceMsg(comp, " | Node = %s %s sourceObject = %s\n",
10488
cg->getDebug()->getName(node->getOpCodeValue()),
10489
cg->getDebug()->getName(node),
10490
sourceObject? cg->getDebug()->getName(sourceObject) : "(none)");
10491
traceMsg(comp, " | Action flags:");
10492
REPORT_FLAG(doInternalControlFlow);
10493
traceMsg(comp, "\n");
10494
}
10495
10496
//
10497
// Phase 2: Generate the appropriate code.
10498
//
10499
TR::Register *owningObjectReg;
10500
TR::Register *tempReg = NULL;
10501
10502
owningObjectReg = cg->evaluate(destOwningObject);
10503
10504
if (doInternalControlFlow)
10505
{
10506
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
10507
startLabel->setStartInternalControlFlow();
10508
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
10509
doneLabel->setEndInternalControlFlow();
10510
}
10511
10512
if (comp->getOption(TR_BreakOnWriteBarrier))
10513
{
10514
generateInstruction(TR::InstOpCode::INT3, node, cg);
10515
}
10516
10517
TR::SymbolReference *wrtBarSymRef = NULL;
10518
if (wrtbarNode && (wrtbarNode->getOpCodeValue()==TR::awrtbar || wrtbarNode->isUnsafeStaticWrtBar()))
10519
wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierClassStoreRealTimeGCSymbolRef();
10520
else
10521
wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreRealTimeGCSymbolRef();
10522
10523
TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);
10524
10525
// TR IL doesn't have a way to express the address of a field in an object, so we need some sneakiness here:
10526
// 1) create a dummy node for this argument to the call
10527
// 2) explicitly set that node's register to storeAddressRegForRealTime, preventing it from being evaluated
10528
// (will just push storeAddressRegForRealTime for the call)
10529
//
10530
TR::Node *dummyDestAddressNode = TR::Node::create(node, TR::aconst, 0, 0);
10531
dummyDestAddressNode->setRegister(storeAddressRegForRealTime);
10532
TR::Node *callNode = TR::Node::createWithSymRef(TR::call, 3, 3, sourceObject, dummyDestAddressNode, destOwningObject, wrtBarSymRef);
10533
10534
if (comp->getOption(TR_DisableInlineWriteBarriersRT))
10535
{
10536
cg->evaluate(callNode);
10537
}
10538
else
10539
{
10540
TR_OutlinedInstructions *outlinedHelperCall = new (cg->trHeapMemory()) TR_OutlinedInstructions(callNode, TR::call, NULL, snippetLabel, doneLabel, cg);
10541
10542
// have to disassemble the call node we just created, first have to give it a ref count 1
10543
callNode->setReferenceCount(1);
10544
cg->recursivelyDecReferenceCount(callNode);
10545
10546
cg->getOutlinedInstructionsList().push_front(outlinedHelperCall);
10547
cg->generateDebugCounter(
10548
outlinedHelperCall->getFirstInstruction(),
10549
TR::DebugCounter::debugCounterName(comp, "helperCalls/%s/(%s)/%d/%d", node->getOpCode().getName(), comp->signature(), node->getByteCodeInfo().getCallerIndex(), node->getByteCodeInfo().getByteCodeIndex()),
10550
1, TR::DebugCounter::Cheap);
10551
10552
if (comp->getOption(TR_CountWriteBarriersRT))
10553
{
10554
TR::MemoryReference *barrierCountMR = generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, debugEventData6), cg);
10555
generateMemInstruction(TR::InstOpCode::INCMem(comp->target().is64Bit()), node, barrierCountMR, cg);
10556
}
10557
10558
tempReg = srm->findOrCreateScratchRegister();
10559
10560
// if barrier not enabled, nothing to do
10561
TR::MemoryReference *fragmentParentMR = generateX86MemoryReference(cg->getVMThreadRegister(), fej9->thisThreadRememberedSetFragmentOffset() + fej9->getFragmentParentOffset(), cg);
10562
generateRegMemInstruction(TR::InstOpCode::LRegMem(comp->target().is64Bit()), node, tempReg, fragmentParentMR, cg);
10563
TR::MemoryReference *globalFragmentIDMR = generateX86MemoryReference(tempReg, fej9->getRememberedSetGlobalFragmentOffset(), cg);
10564
generateMemImmInstruction(TR::InstOpCode::CMPMemImms(), node, globalFragmentIDMR, 0, cg);
10565
generateLabelInstruction(TR::InstOpCode::JE4, node, doneLabel, cg);
10566
10567
// now check if double barrier is enabled and definitely execute the barrier if it is
10568
// if (vmThread->localFragmentIndex == 0) goto snippetLabel
10569
TR::MemoryReference *localFragmentIndexMR = generateX86MemoryReference(cg->getVMThreadRegister(), fej9->thisThreadRememberedSetFragmentOffset() + fej9->getLocalFragmentOffset(), cg);
10570
generateMemImmInstruction(TR::InstOpCode::CMPMemImms(), node, localFragmentIndexMR, 0, cg);
10571
generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);
10572
10573
// null test on the reference we're about to store over: if it is null goto doneLabel
10574
// if (destObject->field == null) goto doneLabel
10575
TR::MemoryReference *nullTestMR = generateX86MemoryReference(storeAddressRegForRealTime, 0, cg);
10576
if (comp->target().is64Bit() && comp->useCompressedPointers())
10577
generateMemImmInstruction(TR::InstOpCode::CMP4MemImms, node, nullTestMR, 0, cg);
10578
else
10579
generateMemImmInstruction(TR::InstOpCode::CMPMemImms(), node, nullTestMR, 0, cg);
10580
generateLabelInstruction(TR::InstOpCode::JNE4, node, snippetLabel, cg);
10581
10582
// fall-through means write barrier not needed, just do the store
10583
}
10584
10585
if (doInternalControlFlow)
10586
{
10587
int32_t numPostConditions = 2 + srm->numAvailableRegisters();
10588
10589
numPostConditions += 4;
10590
10591
if (srcReg)
10592
{
10593
numPostConditions++;
10594
}
10595
10596
TR::RegisterDependencyConditions *conditions =
10597
generateRegisterDependencyConditions((uint8_t) 0, numPostConditions, cg);
10598
10599
conditions->addPostCondition(owningObjectReg, TR::RealRegister::NoReg, cg);
10600
if (srcReg)
10601
{
10602
conditions->addPostCondition(srcReg, TR::RealRegister::NoReg, cg);
10603
}
10604
10605
conditions->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);
10606
10607
if (!comp->getOption(TR_DisableInlineWriteBarriersRT))
10608
{
10609
TR_ASSERT(storeAddressRegForRealTime != NULL, "assertion failure");
10610
conditions->addPostCondition(storeAddressRegForRealTime, TR::RealRegister::NoReg, cg);
10611
10612
TR_ASSERT(tempReg != NULL, "assertion failure");
10613
conditions->addPostCondition(tempReg, TR::RealRegister::NoReg, cg);
10614
}
10615
10616
if (destOwningObject->getOpCode().hasSymbolReference()
10617
&& destOwningObject->getSymbol()
10618
&& !destOwningObject->getSymbol()->isLocalObject())
10619
{
10620
if (storeMRForRealTime->getBaseRegister())
10621
{
10622
conditions->unionPostCondition(storeMRForRealTime->getBaseRegister(), TR::RealRegister::NoReg, cg);
10623
}
10624
if (storeMRForRealTime->getIndexRegister())
10625
{
10626
conditions->unionPostCondition(storeMRForRealTime->getIndexRegister(), TR::RealRegister::NoReg, cg);
10627
}
10628
}
10629
10630
srm->addScratchRegistersToDependencyList(conditions);
10631
conditions->stopAddingConditions();
10632
10633
generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, conditions, cg);
10634
10635
srm->stopUsingRegisters();
10636
}
10637
else
10638
{
10639
TR_ASSERT(node->getOpCodeValue() == TR::ArrayStoreCHK, "assertion failure");
10640
generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, cg);
10641
}
10642
}
10643
10644
10645
10646
10647
void J9::X86::TreeEvaluator::VMwrtbarWithoutStoreEvaluator(
10648
TR::Node *node,
10649
TR::Node *destOwningObject, // only NULL for ME, always evaluated except for AC (evaluated below)
10650
TR::Node *sourceObject, // NULL for ME and AC(Array Copy?)
10651
TR::Register *srcReg, // should only be provided when sourceObject == NULL (ME Multimidlet)
10652
TR_X86ScratchRegisterManager *srm,
10653
TR::CodeGenerator *cg)
10654
{
10655
TR::Compilation *comp = cg->comp();
10656
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
10657
TR_ASSERT(!(comp->getOptions()->realTimeGC()),"Call the real-time barrier");
10658
auto gcMode = TR::Compiler->om.writeBarrierType();
10659
10660
if (node->getOpCode().isWrtBar() && node->skipWrtBar())
10661
gcMode = gc_modron_wrtbar_none;
10662
else if ((node->getOpCodeValue() == TR::ArrayStoreCHK) &&
10663
node->getFirstChild()->getOpCode().isWrtBar() &&
10664
node->getFirstChild()->skipWrtBar())
10665
gcMode = gc_modron_wrtbar_none;
10666
10667
// PR98283: it is not acceptable to emit a label symbol twice so always generate a new label here
10668
// we can clean up the API later in a less risky manner
10669
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
10670
10671
TR::LabelSymbol *cardMarkDoneLabel = NULL;
10672
10673
bool isSourceNonNull;
10674
10675
// If a source node is provided, derive the source object register from it.
10676
// The source node must be evaluated before this function is called so it must
10677
// always be in a register.
10678
//
10679
if (sourceObject)
10680
{
10681
TR_ASSERT(!srcReg, "assertion failure");
10682
srcReg = sourceObject->getRegister();
10683
TR_ASSERT(srcReg, "assertion failure");
10684
isSourceNonNull = sourceObject->isNonNull();
10685
}
10686
else
10687
{
10688
isSourceNonNull = false;
10689
}
10690
10691
10692
// srcReg could only be NULL at this point for arraycopy
10693
10694
//
10695
// Phase 1: Decide what parts of this logic we need to do
10696
//
10697
10698
TR::Node *wrtbarNode;
10699
switch (node->getOpCodeValue())
10700
{
10701
case TR::ArrayStoreCHK:
10702
wrtbarNode = node->getFirstChild();
10703
break;
10704
case TR::arraycopy:
10705
wrtbarNode = NULL;
10706
break;
10707
case TR::awrtbari:
10708
case TR::awrtbar:
10709
wrtbarNode = node;
10710
break;
10711
default:
10712
wrtbarNode = NULL;
10713
break;
10714
}
10715
10716
bool doInlineCardMarkingWithoutOldSpaceCheck, doIsDestAHeapObjectCheck;
10717
10718
if (wrtbarNode)
10719
{
10720
TR_ASSERT(wrtbarNode->getOpCode().isWrtBar(), "Expected node " POINTER_PRINTF_FORMAT " to be a WrtBar", wrtbarNode);
10721
// Note: for gc_modron_wrtbar_cardmark_and_oldcheck we let the helper do the card mark (ie. we don't inline it)
10722
doInlineCardMarkingWithoutOldSpaceCheck =
10723
(gcMode == gc_modron_wrtbar_cardmark || gcMode == gc_modron_wrtbar_cardmark_incremental)
10724
&& !wrtbarNode->getSymbol()->isLocalObject()
10725
&& !wrtbarNode->isNonHeapObjectWrtBar();
10726
10727
doIsDestAHeapObjectCheck = doInlineCardMarkingWithoutOldSpaceCheck && !wrtbarNode->isHeapObjectWrtBar();
10728
}
10729
else
10730
{
10731
// TR::arraycopy or TR::ArrayStoreCHK
10732
//
10733
// Old space checks will be done out-of-line, and if a card mark policy requires an old space check
10734
// as well then both will be done out-of-line.
10735
//
10736
doInlineCardMarkingWithoutOldSpaceCheck = doIsDestAHeapObjectCheck = (gcMode == gc_modron_wrtbar_cardmark || gcMode == gc_modron_wrtbar_cardmark_incremental);
10737
}
10738
10739
10740
// for Tarok gc_modron_wrtbar_cardmark
10741
//
10742
// doIsDestAHeapObjectCheck = true (if req) OK
10743
// doIsDestInOldSpaceCheck = false OK
10744
// doInlineCardMarkingWithoutOldSpaceCheck = maybe OK
10745
// doCheckConcurrentMarkActive = false OK
10746
// dirtyCardTableOutOfLine = false OK
10747
10748
10749
bool doIsDestInOldSpaceCheck =
10750
gcMode == gc_modron_wrtbar_oldcheck
10751
|| gcMode == gc_modron_wrtbar_cardmark_and_oldcheck
10752
|| gcMode == gc_modron_wrtbar_always
10753
;
10754
10755
bool unsafeCallBarrier = false;
10756
if (doIsDestInOldSpaceCheck &&
10757
(gcMode == gc_modron_wrtbar_cardmark
10758
|| gcMode == gc_modron_wrtbar_cardmark_and_oldcheck
10759
|| gcMode == gc_modron_wrtbar_cardmark_incremental) &&
10760
(node->getOpCodeValue()==TR::icall)) {
10761
TR::MethodSymbol *symbol = node->getSymbol()->castToMethodSymbol();
10762
if (symbol != NULL && symbol->getRecognizedMethod())
10763
unsafeCallBarrier = true;
10764
}
10765
10766
bool doCheckConcurrentMarkActive =
10767
(gcMode == gc_modron_wrtbar_cardmark
10768
|| gcMode == gc_modron_wrtbar_cardmark_and_oldcheck
10769
|| gcMode == gc_modron_wrtbar_cardmark_incremental
10770
) && (doInlineCardMarkingWithoutOldSpaceCheck || (doIsDestInOldSpaceCheck && wrtbarNode) || unsafeCallBarrier);
10771
10772
// Use out-of-line instructions to dirty the card table.
10773
//
10774
bool dirtyCardTableOutOfLine = true;
10775
10776
if (gcMode == gc_modron_wrtbar_cardmark_incremental)
10777
{
10778
// Override these settings for policies that don't support concurrent mark.
10779
//
10780
doCheckConcurrentMarkActive = false;
10781
dirtyCardTableOutOfLine = false;
10782
}
10783
10784
// For practical applications, adding an explicit test for NULL is not worth the pathlength cost
10785
// especially since storing null values is not the dominant case.
10786
//
10787
static char *doNullCheckOnWrtBar = feGetEnv("TR_doNullCheckOnWrtBar");
10788
bool doSrcIsNullCheck = (doNullCheckOnWrtBar && doIsDestInOldSpaceCheck && srcReg && !isSourceNonNull);
10789
10790
bool doInternalControlFlow;
10791
10792
if (node->getOpCodeValue() == TR::ArrayStoreCHK)
10793
{
10794
// TR::ArrayStoreCHK will create its own internal control flow.
10795
//
10796
doInternalControlFlow = false;
10797
}
10798
else
10799
{
10800
doInternalControlFlow =
10801
(doIsDestInOldSpaceCheck
10802
|| doIsDestAHeapObjectCheck
10803
|| doCheckConcurrentMarkActive
10804
|| doSrcIsNullCheck);
10805
}
10806
10807
if (comp->getOption(TR_TraceCG) /*&& comp->getOption(TR_TraceOptDetails)*/)
10808
{
10809
traceMsg(comp, " | Write barrier info:\n");
10810
traceMsg(comp, " | GC mode = %d:%s\n", gcMode, cg->getDebug()->getWriteBarrierKindName(gcMode));
10811
traceMsg(comp, " | Node = %s %s sourceObject = %s\n",
10812
cg->getDebug()->getName(node->getOpCodeValue()),
10813
cg->getDebug()->getName(node),
10814
sourceObject? cg->getDebug()->getName(sourceObject) : "(none)");
10815
traceMsg(comp, " | Action flags:");
10816
REPORT_FLAG(doInternalControlFlow);
10817
REPORT_FLAG(doCheckConcurrentMarkActive);
10818
REPORT_FLAG(doInlineCardMarkingWithoutOldSpaceCheck);
10819
REPORT_FLAG(dirtyCardTableOutOfLine);
10820
REPORT_FLAG(doIsDestAHeapObjectCheck);
10821
REPORT_FLAG(doIsDestInOldSpaceCheck);
10822
REPORT_FLAG(isSourceNonNull);
10823
REPORT_FLAG(doSrcIsNullCheck);
10824
traceMsg(comp, "\n");
10825
}
10826
10827
//
10828
// Phase 2: Generate the appropriate code.
10829
//
10830
TR::Register *owningObjectReg;
10831
TR::Register *tempReg = NULL;
10832
10833
owningObjectReg = cg->evaluate(destOwningObject);
10834
10835
if (doInternalControlFlow)
10836
{
10837
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
10838
startLabel->setStartInternalControlFlow();
10839
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
10840
doneLabel->setEndInternalControlFlow();
10841
}
10842
10843
if (comp->getOption(TR_BreakOnWriteBarrier))
10844
{
10845
generateInstruction(TR::InstOpCode::INT3, node, cg);
10846
}
10847
10848
TR::MemoryReference *fragmentParentMR = generateX86MemoryReference(cg->getVMThreadRegister(), fej9->thisThreadRememberedSetFragmentOffset() + fej9->getFragmentParentOffset(), cg);
10849
TR::MemoryReference *localFragmentIndexMR = generateX86MemoryReference(cg->getVMThreadRegister(), fej9->thisThreadRememberedSetFragmentOffset() + fej9->getLocalFragmentOffset(), cg);
10850
TR_OutlinedInstructions *inlineCardMarkPath = NULL;
10851
if (doInlineCardMarkingWithoutOldSpaceCheck && doCheckConcurrentMarkActive)
10852
{
10853
TR::MemoryReference *vmThreadPrivateFlagsMR = generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, privateFlags), cg);
10854
generateMemImmInstruction(TR::InstOpCode::TEST4MemImm4, node, vmThreadPrivateFlagsMR, J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE, cg);
10855
10856
// Branch to outlined instructions to inline card dirtying.
10857
//
10858
TR::LabelSymbol *inlineCardMarkLabel = generateLabelSymbol(cg);
10859
10860
generateLabelInstruction(TR::InstOpCode::JNE4, node, inlineCardMarkLabel, cg);
10861
10862
// Dirty the card table.
10863
//
10864
TR_OutlinedInstructionsGenerator og(inlineCardMarkLabel, node, cg);
10865
TR::Register *tempReg = srm->findOrCreateScratchRegister();
10866
10867
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, tempReg, owningObjectReg, cg);
10868
10869
if (comp->getOptions()->isVariableHeapBaseForBarrierRange0())
10870
{
10871
TR::MemoryReference *vhbMR =
10872
generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, heapBaseForBarrierRange0), cg);
10873
generateRegMemInstruction(TR::InstOpCode::SUBRegMem(), node, tempReg, vhbMR, cg);
10874
}
10875
else
10876
{
10877
uintptr_t chb = comp->getOptions()->getHeapBaseForBarrierRange0();
10878
10879
if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(chb) || TR::Compiler->om.nativeAddressesCanChangeSize()))
10880
{
10881
TR::Register *chbReg = srm->findOrCreateScratchRegister();
10882
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, chbReg, chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);
10883
generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, tempReg, chbReg, cg);
10884
srm->reclaimScratchRegister(chbReg);
10885
}
10886
else
10887
{
10888
generateRegImmInstruction(TR::InstOpCode::SUBRegImm4(), node, tempReg, (int32_t)chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);
10889
}
10890
}
10891
10892
if (doIsDestAHeapObjectCheck)
10893
{
10894
cardMarkDoneLabel = doIsDestInOldSpaceCheck ? generateLabelSymbol(cg) : doneLabel;
10895
10896
if (comp->getOptions()->isVariableHeapSizeForBarrierRange0())
10897
{
10898
TR::MemoryReference *vhsMR =
10899
generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, heapSizeForBarrierRange0), cg);
10900
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, tempReg, vhsMR, cg);
10901
}
10902
else
10903
{
10904
uintptr_t chs = comp->getOptions()->getHeapSizeForBarrierRange0();
10905
10906
if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(chs) || TR::Compiler->om.nativeAddressesCanChangeSize()))
10907
{
10908
TR::Register *chsReg = srm->findOrCreateScratchRegister();
10909
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, chsReg, chs, cg, TR_HEAP_SIZE_FOR_BARRIER_RANGE);
10910
generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, tempReg, chsReg, cg);
10911
srm->reclaimScratchRegister(chsReg);
10912
}
10913
else
10914
{
10915
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, tempReg, (int32_t)chs, cg, TR_HEAP_SIZE_FOR_BARRIER_RANGE);
10916
}
10917
}
10918
10919
generateLabelInstruction(TR::InstOpCode::JAE4, node, cardMarkDoneLabel, cg);
10920
}
10921
10922
generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(), node, tempReg, comp->getOptions()->getHeapAddressToCardAddressShift(), cg);
10923
10924
// Mark the card
10925
//
10926
const uint8_t dirtyCard = 1;
10927
10928
TR::MemoryReference *cardTableMR;
10929
10930
if (comp->getOptions()->isVariableActiveCardTableBase())
10931
{
10932
TR::MemoryReference *actbMR =
10933
generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, activeCardTableBase), cg);
10934
generateRegMemInstruction(TR::InstOpCode::ADDRegMem(), node, tempReg, actbMR, cg);
10935
cardTableMR = generateX86MemoryReference(tempReg, 0, cg);
10936
}
10937
else
10938
{
10939
uintptr_t actb = comp->getOptions()->getActiveCardTableBase();
10940
10941
if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(actb) || TR::Compiler->om.nativeAddressesCanChangeSize()))
10942
{
10943
TR::Register *tempReg3 = srm->findOrCreateScratchRegister();
10944
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg3, actb, cg, TR_ACTIVE_CARD_TABLE_BASE);
10945
cardTableMR = generateX86MemoryReference(tempReg3, tempReg, 0, cg);
10946
srm->reclaimScratchRegister(tempReg3);
10947
}
10948
else
10949
{
10950
cardTableMR = generateX86MemoryReference(NULL, tempReg, 0, (int32_t)actb, cg);
10951
cardTableMR->setReloKind(TR_ACTIVE_CARD_TABLE_BASE);
10952
}
10953
}
10954
10955
generateMemImmInstruction(TR::InstOpCode::S1MemImm1, node, cardTableMR, dirtyCard, cg);
10956
srm->reclaimScratchRegister(tempReg);
10957
generateLabelInstruction(TR::InstOpCode::JMP4, node, doneLabel, cg);
10958
10959
og.endOutlinedInstructionSequence();
10960
}
10961
else if (doInlineCardMarkingWithoutOldSpaceCheck && !dirtyCardTableOutOfLine)
10962
{
10963
// Dirty the card table.
10964
//
10965
TR::Register *tempReg = srm->findOrCreateScratchRegister();
10966
10967
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, tempReg, owningObjectReg, cg);
10968
10969
if (comp->getOptions()->isVariableHeapBaseForBarrierRange0())
10970
{
10971
TR::MemoryReference *vhbMR =
10972
generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, heapBaseForBarrierRange0), cg);
10973
generateRegMemInstruction(TR::InstOpCode::SUBRegMem(), node, tempReg, vhbMR, cg);
10974
}
10975
else
10976
{
10977
uintptr_t chb = comp->getOptions()->getHeapBaseForBarrierRange0();
10978
10979
if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(chb) || TR::Compiler->om.nativeAddressesCanChangeSize()))
10980
{
10981
TR::Register *chbReg = srm->findOrCreateScratchRegister();
10982
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, chbReg, chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);
10983
generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, tempReg, chbReg, cg);
10984
srm->reclaimScratchRegister(chbReg);
10985
}
10986
else
10987
{
10988
generateRegImmInstruction(TR::InstOpCode::SUBRegImm4(), node, tempReg, (int32_t)chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);
10989
}
10990
}
10991
10992
if (doIsDestAHeapObjectCheck)
10993
{
10994
cardMarkDoneLabel = doIsDestInOldSpaceCheck ? generateLabelSymbol(cg) : doneLabel;
10995
10996
if (comp->getOptions()->isVariableHeapSizeForBarrierRange0())
10997
{
10998
TR::MemoryReference *vhsMR =
10999
generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, heapSizeForBarrierRange0), cg);
11000
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, tempReg, vhsMR, cg);
11001
}
11002
else
11003
{
11004
uintptr_t chs = comp->getOptions()->getHeapSizeForBarrierRange0();
11005
11006
if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(chs) || TR::Compiler->om.nativeAddressesCanChangeSize()))
11007
{
11008
TR::Register *chsReg = srm->findOrCreateScratchRegister();
11009
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, chsReg, chs, cg, TR_HEAP_SIZE_FOR_BARRIER_RANGE);
11010
generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, tempReg, chsReg, cg);
11011
srm->reclaimScratchRegister(chsReg);
11012
}
11013
else
11014
{
11015
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, tempReg, (int32_t)chs, cg, TR_HEAP_SIZE_FOR_BARRIER_RANGE);
11016
}
11017
}
11018
11019
generateLabelInstruction(TR::InstOpCode::JAE4, node, cardMarkDoneLabel, cg);
11020
}
11021
11022
generateRegImmInstruction(TR::InstOpCode::SHRRegImm1(), node, tempReg, comp->getOptions()->getHeapAddressToCardAddressShift(), cg);
11023
11024
// Mark the card
11025
//
11026
const uint8_t dirtyCard = 1;
11027
11028
TR::MemoryReference *cardTableMR;
11029
11030
if (comp->getOptions()->isVariableActiveCardTableBase())
11031
{
11032
TR::MemoryReference *actbMR =
11033
generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, activeCardTableBase), cg);
11034
generateRegMemInstruction(TR::InstOpCode::ADDRegMem(), node, tempReg, actbMR, cg);
11035
cardTableMR = generateX86MemoryReference(tempReg, 0, cg);
11036
}
11037
else
11038
{
11039
uintptr_t actb = comp->getOptions()->getActiveCardTableBase();
11040
11041
if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(actb) || TR::Compiler->om.nativeAddressesCanChangeSize()))
11042
{
11043
TR::Register *tempReg3 = srm->findOrCreateScratchRegister();
11044
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, tempReg3, actb, cg, TR_ACTIVE_CARD_TABLE_BASE);
11045
cardTableMR = generateX86MemoryReference(tempReg3, tempReg, 0, cg);
11046
srm->reclaimScratchRegister(tempReg3);
11047
}
11048
else
11049
{
11050
cardTableMR = generateX86MemoryReference(NULL, tempReg, 0, (int32_t)actb, cg);
11051
cardTableMR->setReloKind(TR_ACTIVE_CARD_TABLE_BASE);
11052
}
11053
}
11054
11055
generateMemImmInstruction(TR::InstOpCode::S1MemImm1, node, cardTableMR, dirtyCard, cg);
11056
11057
srm->reclaimScratchRegister(tempReg);
11058
}
11059
11060
if (doIsDestAHeapObjectCheck && doIsDestInOldSpaceCheck)
11061
{
11062
generateLabelInstruction(TR::InstOpCode::label, node, cardMarkDoneLabel, cg);
11063
}
11064
11065
if (doSrcIsNullCheck)
11066
{
11067
generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, srcReg, srcReg, cg);
11068
generateLabelInstruction(TR::InstOpCode::JE4, node, doneLabel, cg);
11069
}
11070
11071
if (doIsDestInOldSpaceCheck)
11072
{
11073
static char *disableWrtbarOpt = feGetEnv("TR_DisableWrtbarOpt");
11074
11075
TR::InstOpCode::Mnemonic branchOp;
11076
auto gcModeForSnippet = gcMode;
11077
11078
bool skipSnippetIfSrcNotOld = false;
11079
bool skipSnippetIfDestOld = false;
11080
bool skipSnippetIfDestRemembered = false;
11081
11082
TR::LabelSymbol *labelAfterBranchToSnippet = NULL;
11083
11084
if (gcMode == gc_modron_wrtbar_always)
11085
{
11086
// Always call the write barrier helper.
11087
//
11088
// TODO: this should be an inline call.
11089
//
11090
branchOp = TR::InstOpCode::JMP4;
11091
}
11092
else if (doCheckConcurrentMarkActive)
11093
{
11094
//TR_ASSERT(wrtbarNode, "Must not be an arraycopy");
11095
11096
// If the concurrent mark thread IS active then call the gencon write barrier in the helper
11097
// to perform card marking and any necessary remembered set updates.
11098
//
11099
// This is expected to be true for only a very small percentage of the time and hence
11100
// handling it out of line is justified.
11101
//
11102
if (!comp->getOption(TR_DisableWriteBarriersRangeCheck)
11103
&& (node->getOpCodeValue() == TR::awrtbari)
11104
&& doInternalControlFlow)
11105
{
11106
bool is64Bit = comp->target().is64Bit(); // On compressed refs, owningObjectReg is already uncompressed, and the vmthread fields are 64 bits
11107
labelAfterBranchToSnippet = generateLabelSymbol(cg);
11108
// AOT support to be implemented in another PR
11109
if (!comp->getOptions()->isVariableHeapSizeForBarrierRange0() && !comp->compileRelocatableCode() && !disableWrtbarOpt)
11110
{
11111
uintptr_t che = comp->getOptions()->getHeapBaseForBarrierRange0() + comp->getOptions()->getHeapSizeForBarrierRange0();
11112
if (comp->target().is64Bit() && !IS_32BIT_SIGNED(che))
11113
{
11114
generateRegMemInstruction(TR::InstOpCode::CMP8RegMem, node, owningObjectReg, generateX86MemoryReference(cg->findOrCreate8ByteConstant(node, che), cg), cg);
11115
}
11116
else
11117
{
11118
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, owningObjectReg, (int32_t)che, cg);
11119
}
11120
}
11121
else
11122
{
11123
uintptr_t chb = comp->getOptions()->getHeapBaseForBarrierRange0();
11124
TR::Register *tempOwningObjReg = srm->findOrCreateScratchRegister();
11125
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, tempOwningObjReg, owningObjectReg, cg);
11126
if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(chb) || TR::Compiler->om.nativeAddressesCanChangeSize()))
11127
{
11128
TR::Register *chbReg = srm->findOrCreateScratchRegister();
11129
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, chbReg, chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);
11130
generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, tempOwningObjReg, chbReg, cg);
11131
srm->reclaimScratchRegister(chbReg);
11132
}
11133
else
11134
{
11135
generateRegImmInstruction(TR::InstOpCode::SUBRegImm4(), node, tempOwningObjReg, (int32_t)chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);
11136
}
11137
TR::MemoryReference *vhsMR1 =
11138
generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, heapSizeForBarrierRange0), cg);
11139
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, tempOwningObjReg, vhsMR1, cg);
11140
srm->reclaimScratchRegister(tempOwningObjReg);
11141
}
11142
11143
generateLabelInstruction(TR::InstOpCode::JAE1, node, doneLabel, cg);
11144
11145
skipSnippetIfSrcNotOld = true;
11146
}
11147
else
11148
{
11149
skipSnippetIfDestOld = true;
11150
}
11151
11152
// See if we can do a TR::InstOpCode::TEST1MemImm1
11153
//
11154
int32_t byteOffset = byteOffsetForMask(J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE, cg);
11155
if (byteOffset != -1)
11156
{
11157
TR::MemoryReference *vmThreadPrivateFlagsMR = generateX86MemoryReference(cg->getVMThreadRegister(), byteOffset + offsetof(J9VMThread, privateFlags), cg);
11158
generateMemImmInstruction(TR::InstOpCode::TEST1MemImm1, node, vmThreadPrivateFlagsMR, J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE >> (8*byteOffset), cg);
11159
}
11160
else
11161
{
11162
TR::MemoryReference *vmThreadPrivateFlagsMR = generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, privateFlags), cg);
11163
generateMemImmInstruction(TR::InstOpCode::TEST4MemImm4, node, vmThreadPrivateFlagsMR, J9_PRIVATE_FLAGS_CONCURRENT_MARK_ACTIVE, cg);
11164
}
11165
11166
generateWriteBarrierCall(TR::InstOpCode::JNE4, node, gc_modron_wrtbar_cardmark_and_oldcheck, owningObjectReg, srcReg, doneLabel, cg);
11167
11168
// If the destination object is old and not remembered then process the remembered
11169
// set update out-of-line with the generational helper.
11170
//
11171
skipSnippetIfDestRemembered = true;
11172
gcModeForSnippet = gc_modron_wrtbar_oldcheck;
11173
}
11174
else if (gcMode == gc_modron_wrtbar_oldcheck)
11175
{
11176
// For pure generational barriers if the object is old and remembered then the helper
11177
// can be skipped.
11178
//
11179
skipSnippetIfDestOld = true;
11180
skipSnippetIfDestRemembered = true;
11181
}
11182
else
11183
{
11184
skipSnippetIfDestOld = true;
11185
skipSnippetIfDestRemembered = false;
11186
}
11187
11188
if (skipSnippetIfSrcNotOld || skipSnippetIfDestOld)
11189
{
11190
TR_ASSERT((!skipSnippetIfSrcNotOld || !skipSnippetIfDestOld), "At most one of skipSnippetIfSrcNotOld and skipSnippetIfDestOld can be true");
11191
TR_ASSERT(skipSnippetIfDestOld || (srcReg != NULL), "Expected to have a source register for wrtbari");
11192
11193
bool is64Bit = comp->target().is64Bit(); // On compressed refs, owningObjectReg is already uncompressed, and the vmthread fields are 64 bits
11194
bool checkDest = skipSnippetIfDestOld; // Otherwise, check the src value
11195
bool skipSnippetIfOld = skipSnippetIfDestOld; // Otherwise, skip if the checked value (source or destination) is not old
11196
labelAfterBranchToSnippet = generateLabelSymbol(cg);
11197
// AOT support to be implemented in another PR
11198
if (!comp->getOptions()->isVariableHeapSizeForBarrierRange0() && !comp->compileRelocatableCode() && !disableWrtbarOpt)
11199
{
11200
uintptr_t che = comp->getOptions()->getHeapBaseForBarrierRange0() + comp->getOptions()->getHeapSizeForBarrierRange0();
11201
if (comp->target().is64Bit() && !IS_32BIT_SIGNED(che))
11202
{
11203
generateRegMemInstruction(TR::InstOpCode::CMP8RegMem, node, checkDest ? owningObjectReg : srcReg, generateX86MemoryReference(cg->findOrCreate8ByteConstant(node, che), cg), cg);
11204
}
11205
else
11206
{
11207
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, checkDest ? owningObjectReg : srcReg, (int32_t)che, cg);
11208
}
11209
}
11210
else
11211
{
11212
uintptr_t chb = comp->getOptions()->getHeapBaseForBarrierRange0();
11213
TR::Register *tempReg = srm->findOrCreateScratchRegister();
11214
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, tempReg, checkDest ? owningObjectReg : srcReg, cg);
11215
if (comp->target().is64Bit() && (!IS_32BIT_SIGNED(chb) || TR::Compiler->om.nativeAddressesCanChangeSize()))
11216
{
11217
TR::Register *chbReg = srm->findOrCreateScratchRegister();
11218
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, node, chbReg, chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);
11219
generateRegRegInstruction(TR::InstOpCode::SUBRegReg(), node, tempReg, chbReg, cg);
11220
srm->reclaimScratchRegister(chbReg);
11221
}
11222
else
11223
{
11224
generateRegImmInstruction(TR::InstOpCode::SUBRegImm4(), node, tempReg, (int32_t)chb, cg, TR_HEAP_BASE_FOR_BARRIER_RANGE);
11225
}
11226
TR::MemoryReference *vhsMR1 =
11227
generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, heapSizeForBarrierRange0), cg);
11228
generateRegMemInstruction(TR::InstOpCode::CMPRegMem(), node, tempReg, vhsMR1, cg);
11229
}
11230
11231
branchOp = skipSnippetIfOld ? TR::InstOpCode::JB4 : TR::InstOpCode::JAE4; // For branch to snippet
11232
TR::InstOpCode::Mnemonic reverseBranchOp = skipSnippetIfOld ? TR::InstOpCode::JAE4 : TR::InstOpCode::JB4; // For branch past snippet
11233
11234
// Now performing check for remembered
11235
if (skipSnippetIfDestRemembered)
11236
{
11237
// Set up for branch *past* snippet call for previous comparison
11238
generateLabelInstruction(reverseBranchOp, node, labelAfterBranchToSnippet, cg);
11239
11240
int32_t byteOffset = byteOffsetForMask(J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST, cg);
11241
if (byteOffset != -1)
11242
{
11243
TR::MemoryReference *MR = generateX86MemoryReference(owningObjectReg, byteOffset + TR::Compiler->om.offsetOfHeaderFlags(), cg);
11244
generateMemImmInstruction(TR::InstOpCode::TEST1MemImm1, node, MR, J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST >> (8*byteOffset), cg);
11245
}
11246
else
11247
{
11248
TR::MemoryReference *MR = generateX86MemoryReference(owningObjectReg, TR::Compiler->om.offsetOfHeaderFlags(), cg);
11249
generateMemImmInstruction(TR::InstOpCode::TEST4MemImm4, node, MR, J9_OBJECT_HEADER_REMEMBERED_MASK_FOR_TEST, cg);
11250
}
11251
branchOp=TR::InstOpCode::JE4;
11252
}
11253
}
11254
11255
generateWriteBarrierCall(branchOp, node, gcModeForSnippet, owningObjectReg, srcReg, doneLabel, cg);
11256
11257
if (labelAfterBranchToSnippet)
11258
generateLabelInstruction(TR::InstOpCode::label, node, labelAfterBranchToSnippet, cg);
11259
}
11260
11261
int32_t numPostConditions = 2 + srm->numAvailableRegisters();
11262
11263
if (srcReg)
11264
{
11265
numPostConditions++;
11266
}
11267
11268
TR::RegisterDependencyConditions *conditions =
11269
generateRegisterDependencyConditions((uint8_t) 0, numPostConditions, cg);
11270
11271
conditions->addPostCondition(owningObjectReg, TR::RealRegister::NoReg, cg);
11272
if (srcReg)
11273
{
11274
conditions->addPostCondition(srcReg, TR::RealRegister::NoReg, cg);
11275
}
11276
11277
conditions->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);
11278
11279
srm->addScratchRegistersToDependencyList(conditions);
11280
conditions->stopAddingConditions();
11281
11282
generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, conditions, cg);
11283
11284
srm->stopUsingRegisters();
11285
}
11286
11287
11288
static TR::Instruction *
11289
doReferenceStore(
11290
TR::Node *node,
11291
TR::MemoryReference *storeMR,
11292
TR::Register *sourceReg,
11293
bool usingCompressedPointers,
11294
TR::CodeGenerator *cg)
11295
{
11296
TR::Compilation *comp = cg->comp();
11297
TR::InstOpCode::Mnemonic storeOp = usingCompressedPointers ? TR::InstOpCode::S4MemReg : TR::InstOpCode::SMemReg();
11298
TR::Instruction *instr = generateMemRegInstruction(storeOp, node, storeMR, sourceReg, cg);
11299
11300
// for real-time GC, the data reference has already been resolved into an earlier LEA instruction so this padding isn't needed
11301
// even if the node symbol is marked as unresolved (the store instruction above is storing through a register
11302
// that contains the resolved address)
11303
if (!comp->getOptions()->realTimeGC() && node->getSymbolReference()->isUnresolved())
11304
{
11305
TR::TreeEvaluator::padUnresolvedDataReferences(node, *node->getSymbolReference(), cg);
11306
}
11307
11308
return instr;
11309
}
11310
11311
11312
void J9::X86::TreeEvaluator::VMwrtbarWithStoreEvaluator(
11313
TR::Node *node,
11314
TR::MemoryReference *storeMR,
11315
TR_X86ScratchRegisterManager *scratchRegisterManager,
11316
TR::Node *destOwningObject,
11317
TR::Node *sourceObject,
11318
bool isImplicitExceptionPoint,
11319
TR::CodeGenerator *cg,
11320
bool nullAdjusted)
11321
{
11322
TR_ASSERT(storeMR, "assertion failure");
11323
11324
TR::Compilation *comp = cg->comp();
11325
11326
TR::Register *owningObjectRegister = cg->evaluate(destOwningObject);
11327
TR::Register *sourceRegister = cg->evaluate(sourceObject);
11328
11329
auto gcMode = TR::Compiler->om.writeBarrierType();
11330
bool isRealTimeGC = (comp->getOptions()->realTimeGC())? true:false;
11331
11332
bool usingCompressedPointers = false;
11333
bool usingLowMemHeap = false;
11334
bool useShiftedOffsets = (TR::Compiler->om.compressedReferenceShiftOffset() != 0);
11335
TR::Node *translatedStore = NULL;
11336
11337
// NOTE:
11338
//
11339
// If you change this code you also need to change writeBarrierEvaluator() in TreeEvaluator.cpp
11340
//
11341
if (comp->useCompressedPointers() &&
11342
((node->getOpCode().isCheck() && node->getFirstChild()->getOpCode().isIndirect() &&
11343
(node->getFirstChild()->getSecondChild()->getDataType() != TR::Address)) ||
11344
(node->getOpCode().isIndirect() && (node->getSecondChild()->getDataType() != TR::Address))))
11345
{
11346
if (node->getOpCode().isCheck())
11347
translatedStore = node->getFirstChild();
11348
else
11349
translatedStore = node;
11350
11351
usingLowMemHeap = true;
11352
usingCompressedPointers = true;
11353
}
11354
11355
TR::Register *translatedSourceReg = sourceRegister;
11356
if (usingCompressedPointers && (!usingLowMemHeap || useShiftedOffsets))
11357
{
11358
// handle stores of null values here
11359
11360
if (nullAdjusted)
11361
translatedSourceReg = translatedStore->getSecondChild()->getRegister();
11362
else
11363
{
11364
translatedSourceReg = cg->evaluate(translatedStore->getSecondChild());
11365
if (!usingLowMemHeap)
11366
{
11367
generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), translatedStore, sourceRegister, sourceRegister, cg);
11368
generateRegRegInstruction(TR::InstOpCode::CMOVERegReg(), translatedStore, translatedSourceReg, sourceRegister, cg);
11369
}
11370
}
11371
}
11372
11373
TR::Instruction *storeInstr = NULL;
11374
TR::Register *storeAddressRegForRealTime = NULL;
11375
11376
if (isRealTimeGC)
11377
{
11378
// Realtime GC evaluates storeMR into a register here and then uses it to do the store after the write barrier
11379
11380
// If reference is unresolved, need to resolve it right here before the barrier starts
11381
// Otherwise, we could get stopped during the resolution and that could invalidate any tests we would have performend
11382
// beforehand
11383
// For simplicity, just evaluate the store address into storeAddressRegForRealTime right now
11384
storeAddressRegForRealTime = scratchRegisterManager->findOrCreateScratchRegister();
11385
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, storeAddressRegForRealTime, storeMR, cg);
11386
if (node->getSymbolReference()->isUnresolved())
11387
{
11388
TR::TreeEvaluator::padUnresolvedDataReferences(node, *node->getSymbolReference(), cg);
11389
11390
// storeMR was created against a (i)wrtbar node which is a store. The unresolved data snippet that
11391
// was created set the checkVolatility bit based on that node being a store. Since the resolution
11392
// is now going to occur on a LEA instruction, which does not require any memory fence and hence
11393
// no volatility check, we need to clear that "store" ness of the unresolved data snippet
11394
TR::UnresolvedDataSnippet *snippet = storeMR->getUnresolvedDataSnippet();
11395
if (snippet)
11396
snippet->resetUnresolvedStore();
11397
}
11398
}
11399
else
11400
{
11401
// Non-realtime does the store first, then the write barrier.
11402
//
11403
storeInstr = doReferenceStore(node, storeMR, translatedSourceReg, usingCompressedPointers, cg);
11404
}
11405
11406
if (TR::Compiler->om.writeBarrierType() == gc_modron_wrtbar_always && !isRealTimeGC)
11407
{
11408
TR::RegisterDependencyConditions *deps = NULL;
11409
TR::LabelSymbol *doneWrtBarLabel = generateLabelSymbol(cg);
11410
11411
if (comp->target().is32Bit() && sourceObject->isNonNull() == false)
11412
{
11413
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
11414
startLabel->setStartInternalControlFlow();
11415
doneWrtBarLabel->setEndInternalControlFlow();
11416
11417
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
11418
generateRegRegInstruction(TR::InstOpCode::TESTRegReg(), node, sourceRegister, sourceRegister, cg);
11419
generateLabelInstruction(TR::InstOpCode::JE4, node, doneWrtBarLabel, cg);
11420
11421
deps = generateRegisterDependencyConditions(0, 3, cg);
11422
deps->addPostCondition(sourceRegister, TR::RealRegister::NoReg, cg);
11423
deps->addPostCondition(owningObjectRegister, TR::RealRegister::NoReg, cg);
11424
deps->addPostCondition(cg->getVMThreadRegister(), TR::RealRegister::ebp, cg);
11425
deps->stopAddingConditions();
11426
}
11427
11428
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp1), cg), owningObjectRegister, cg);
11429
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(cg->getVMThreadRegister(), offsetof(J9VMThread, floatTemp2), cg), sourceRegister, cg);
11430
11431
TR::SymbolReference* wrtBarSymRef = comp->getSymRefTab()->findOrCreateWriteBarrierStoreSymbolRef();
11432
generateImmSymInstruction(TR::InstOpCode::CALLImm4, node, (uintptr_t)wrtBarSymRef->getMethodAddress(), wrtBarSymRef, cg);
11433
11434
generateLabelInstruction(TR::InstOpCode::label, node, doneWrtBarLabel, deps, cg);
11435
}
11436
else
11437
{
11438
if (isRealTimeGC)
11439
{
11440
TR::TreeEvaluator::VMwrtbarRealTimeWithoutStoreEvaluator(
11441
node,
11442
storeMR,
11443
storeAddressRegForRealTime,
11444
destOwningObject,
11445
sourceObject,
11446
NULL,
11447
scratchRegisterManager,
11448
cg);
11449
}
11450
else
11451
{
11452
TR::TreeEvaluator::VMwrtbarWithoutStoreEvaluator(
11453
node,
11454
destOwningObject,
11455
sourceObject,
11456
NULL,
11457
scratchRegisterManager,
11458
cg);
11459
}
11460
}
11461
11462
// Realtime GCs must do the write barrier first and then the store.
11463
//
11464
if (isRealTimeGC)
11465
{
11466
TR_ASSERT(storeAddressRegForRealTime, "assertion failure");
11467
TR::MemoryReference *myStoreMR = generateX86MemoryReference(storeAddressRegForRealTime, 0, cg);
11468
storeInstr = doReferenceStore(node, myStoreMR, translatedSourceReg, usingCompressedPointers, cg);
11469
scratchRegisterManager->reclaimScratchRegister(storeAddressRegForRealTime);
11470
}
11471
11472
if (!usingLowMemHeap || useShiftedOffsets)
11473
cg->decReferenceCount(sourceObject);
11474
11475
cg->decReferenceCount(destOwningObject);
11476
storeMR->decNodeReferenceCounts(cg);
11477
11478
if (isImplicitExceptionPoint)
11479
cg->setImplicitExceptionPoint(storeInstr);
11480
}
11481
11482
11483
void J9::X86::TreeEvaluator::generateVFTMaskInstruction(TR::Node *node, TR::Register *reg, TR::CodeGenerator *cg)
11484
{
11485
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
11486
uintptr_t mask = TR::Compiler->om.maskOfObjectVftField();
11487
bool is64Bit = cg->comp()->target().is64Bit(); // even with compressed object headers, a 64-bit mask operation is safe, though it may waste 1 byte because of the rex prefix
11488
if (~mask == 0)
11489
{
11490
// no mask instruction required
11491
}
11492
else if (~mask <= 127)
11493
{
11494
generateRegImmInstruction(TR::InstOpCode::ANDRegImms(is64Bit), node, reg, TR::Compiler->om.maskOfObjectVftField(), cg);
11495
}
11496
else
11497
{
11498
generateRegImmInstruction(TR::InstOpCode::ANDRegImm4(is64Bit), node, reg, TR::Compiler->om.maskOfObjectVftField(), cg);
11499
}
11500
}
11501
11502
11503
void
11504
VMgenerateCatchBlockBBStartPrologue(
11505
TR::Node *node,
11506
TR::Instruction *fenceInstruction,
11507
TR::CodeGenerator *cg)
11508
{
11509
TR::Compilation *comp = cg->comp();
11510
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
11511
11512
if (comp->getJittedMethodSymbol()->usesSinglePrecisionMode() &&
11513
cg->enableSinglePrecisionMethods())
11514
{
11515
cg->setLastCatchAppendInstruction(fenceInstruction);
11516
}
11517
11518
TR::Block *block = node->getBlock();
11519
if (fej9->shouldPerformEDO(block, comp))
11520
{
11521
TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg);
11522
TR::LabelSymbol *restartLabel = generateLabelSymbol(cg);
11523
11524
generateMemInstruction(TR::InstOpCode::DEC4Mem, node, generateX86MemoryReference((intptr_t)comp->getRecompilationInfo()->getCounterAddress(), cg), cg);
11525
generateLabelInstruction(TR::InstOpCode::JE4, node, snippetLabel, cg);
11526
generateLabelInstruction(TR::InstOpCode::label, node, restartLabel, cg);
11527
cg->addSnippet(new (cg->trHeapMemory()) TR::X86ForceRecompilationSnippet(cg, node, restartLabel, snippetLabel));
11528
}
11529
11530
}
11531
11532
11533
TR::Register *
11534
J9::X86::TreeEvaluator::tstartEvaluator(TR::Node *node, TR::CodeGenerator *cg)
11535
{
11536
/*
11537
xbegin fall_back_path
11538
mov monReg, [obj+Lw_offset]
11539
cmp monReg, 0;
11540
je fallThroughLabel
11541
cmp monReg, rbp
11542
je fallThroughLabel
11543
xabort
11544
fall_back_path:
11545
test eax, 0x2
11546
jne gotoTransientFailureNodeLabel
11547
test eax, 0x00000001
11548
je persistentFailureLabel
11549
test eax, 0x01000000
11550
jne gotoTransientFailureNodeLabel
11551
jmp persistentFailLabel
11552
gotoTransientFailureNodeLabel:
11553
mov counterReg,100
11554
spinLabel:
11555
dec counterReg
11556
jne spinLabel
11557
jmp TransientFailureNodeLabel
11558
*/
11559
TR::Compilation *comp = cg->comp();
11560
TR::Node *persistentFailureNode = node->getFirstChild();
11561
TR::Node *transientFailureNode = node->getSecondChild();
11562
TR::Node *fallThroughNode = node->getThirdChild();
11563
TR::Node *objNode = node->getChild(3);
11564
TR::Node *GRANode = NULL;
11565
11566
TR::LabelSymbol *startLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
11567
startLabel->setStartInternalControlFlow();
11568
TR::LabelSymbol *endLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
11569
endLabel->setEndInternalControlFlow();
11570
11571
TR::LabelSymbol *gotoTransientFailure = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
11572
TR::LabelSymbol *gotoPersistentFailure = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
11573
TR::LabelSymbol *gotoFallThrough = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
11574
TR::LabelSymbol *transientFailureLabel = transientFailureNode->getBranchDestination()->getNode()->getLabel();
11575
TR::LabelSymbol *persistentFailureLabel = persistentFailureNode->getBranchDestination()->getNode()->getLabel();
11576
TR::LabelSymbol *fallBackPathLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
11577
TR::LabelSymbol *fallThroughLabel = fallThroughNode->getBranchDestination()->getNode()->getLabel();
11578
11579
TR::Register *objReg = cg->evaluate(objNode);
11580
TR::Register *accReg = cg->allocateRegister();
11581
TR::Register *monReg = cg->allocateRegister();
11582
TR::RegisterDependencyConditions *fallBackConditions = generateRegisterDependencyConditions((uint8_t)0, 2, cg);
11583
TR::RegisterDependencyConditions *endLabelConditions;
11584
TR::RegisterDependencyConditions *fallThroughConditions = NULL;
11585
TR::RegisterDependencyConditions *persistentConditions = NULL;
11586
TR::RegisterDependencyConditions *transientConditions = NULL;
11587
11588
if (fallThroughNode->getNumChildren() != 0)
11589
{
11590
GRANode = fallThroughNode->getFirstChild();
11591
cg->evaluate(GRANode);
11592
List<TR::Register> popRegisters(cg->trMemory());
11593
fallThroughConditions = generateRegisterDependencyConditions(GRANode, cg, 0, &popRegisters);
11594
cg->decReferenceCount(GRANode);
11595
}
11596
11597
if (persistentFailureNode->getNumChildren() != 0)
11598
{
11599
GRANode = persistentFailureNode->getFirstChild();
11600
cg->evaluate(GRANode);
11601
List<TR::Register> popRegisters(cg->trMemory());
11602
persistentConditions = generateRegisterDependencyConditions(GRANode, cg, 0, &popRegisters);
11603
cg->decReferenceCount(GRANode);
11604
}
11605
11606
if (transientFailureNode->getNumChildren() != 0)
11607
{
11608
GRANode = transientFailureNode->getFirstChild();
11609
cg->evaluate(GRANode);
11610
List<TR::Register> popRegisters(cg->trMemory());
11611
transientConditions = generateRegisterDependencyConditions(GRANode, cg, 0, &popRegisters);
11612
cg->decReferenceCount(GRANode);
11613
}
11614
11615
//startLabel
11616
//add place holder register so that eax would not contain any useful value before xbegin
11617
TR::Register *dummyReg = cg->allocateRegister();
11618
dummyReg->setPlaceholderReg();
11619
TR::RegisterDependencyConditions *startLabelConditions = generateRegisterDependencyConditions((uint8_t)0, 1, cg);
11620
startLabelConditions->addPostCondition(dummyReg, TR::RealRegister::eax, cg);
11621
startLabelConditions->stopAddingConditions();
11622
cg->stopUsingRegister(dummyReg);
11623
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, startLabelConditions, cg);
11624
11625
//xbegin fall_back_path
11626
generateLongLabelInstruction(TR::InstOpCode::XBEGIN4, node, fallBackPathLabel, cg);
11627
//mov monReg, obj+offset
11628
int32_t lwOffset = cg->fej9()->getByteOffsetToLockword((TR_OpaqueClassBlock *) cg->getMonClass(node));
11629
TR::MemoryReference *objLockRef = generateX86MemoryReference(objReg, lwOffset, cg);
11630
if (comp->target().is64Bit() && cg->fej9()->generateCompressedLockWord())
11631
{
11632
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, monReg, objLockRef, cg);
11633
}
11634
else
11635
{
11636
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, monReg, objLockRef, cg);
11637
}
11638
11639
if (comp->target().is64Bit() && cg->fej9()->generateCompressedLockWord())
11640
{
11641
generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, monReg, 0, cg);
11642
}
11643
else
11644
{
11645
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, monReg, 0, cg);
11646
}
11647
11648
if (fallThroughConditions)
11649
generateLabelInstruction(TR::InstOpCode::JE4, node, fallThroughLabel, fallThroughConditions, cg);
11650
else
11651
generateLabelInstruction(TR::InstOpCode::JE4, node, fallThroughLabel, cg);
11652
11653
TR::Register *vmThreadReg = cg->getVMThreadRegister();
11654
if (comp->target().is64Bit() && cg->fej9()->generateCompressedLockWord())
11655
{
11656
generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, monReg, vmThreadReg, cg);
11657
}
11658
else
11659
{
11660
generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, monReg, vmThreadReg, cg);
11661
}
11662
11663
if (fallThroughConditions)
11664
generateLabelInstruction(TR::InstOpCode::JE4, node, fallThroughLabel, fallThroughConditions, cg);
11665
else
11666
generateLabelInstruction(TR::InstOpCode::JE4, node, fallThroughLabel, cg);
11667
11668
//xabort
11669
generateImmInstruction(TR::InstOpCode::XABORT, node, 0x01, cg);
11670
11671
cg->stopUsingRegister(monReg);
11672
//fall_back_path:
11673
generateLabelInstruction(TR::InstOpCode::label, node, fallBackPathLabel, cg);
11674
11675
endLabelConditions = generateRegisterDependencyConditions((uint8_t)0, 1, cg);
11676
endLabelConditions->addPostCondition(accReg, TR::RealRegister::eax, cg);
11677
endLabelConditions->stopAddingConditions();
11678
11679
// test eax, 0x2
11680
generateRegImmInstruction(TR::InstOpCode::TEST1AccImm1, node, accReg, 0x2, cg);
11681
generateLabelInstruction(TR::InstOpCode::JNE4, node, gotoTransientFailure, cg);
11682
11683
// abort because of nonzero lockword is also transient failure
11684
generateRegImmInstruction(TR::InstOpCode::TEST4AccImm4, node, accReg, 0x00000001, cg);
11685
if (persistentConditions)
11686
generateLabelInstruction(TR::InstOpCode::JE4, node, persistentFailureLabel, persistentConditions, cg);
11687
else
11688
generateLabelInstruction(TR::InstOpCode::JE4, node, persistentFailureLabel, cg);
11689
11690
generateRegImmInstruction(TR::InstOpCode::TEST4AccImm4, node, accReg, 0x01000000, cg);
11691
// je gotransientFailureNodeLabel
11692
generateLabelInstruction(TR::InstOpCode::JNE4, node, gotoTransientFailure, cg);
11693
11694
if (persistentConditions)
11695
generateLabelInstruction(TR::InstOpCode::JMP4, node, persistentFailureLabel, persistentConditions, cg);
11696
else
11697
generateLabelInstruction(TR::InstOpCode::JMP4, node, persistentFailureLabel, cg);
11698
cg->stopUsingRegister(accReg);
11699
11700
// gotoTransientFailureLabel:
11701
if (transientConditions)
11702
generateLabelInstruction(TR::InstOpCode::label, node, gotoTransientFailure, transientConditions, cg);
11703
else
11704
generateLabelInstruction(TR::InstOpCode::label, node, gotoTransientFailure, cg);
11705
11706
//delay
11707
TR::Register *counterReg = cg->allocateRegister();
11708
generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, node, counterReg, 100, cg);
11709
TR::LabelSymbol *spinLabel = TR::LabelSymbol::create(cg->trHeapMemory(),cg);
11710
generateLabelInstruction(TR::InstOpCode::label, node, spinLabel, cg);
11711
generateInstruction(TR::InstOpCode::PAUSE, node, cg);
11712
generateInstruction(TR::InstOpCode::PAUSE, node, cg);
11713
generateInstruction(TR::InstOpCode::PAUSE, node, cg);
11714
generateInstruction(TR::InstOpCode::PAUSE, node, cg);
11715
generateInstruction(TR::InstOpCode::PAUSE, node, cg);
11716
generateRegInstruction(TR::InstOpCode::DEC4Reg, node, counterReg, cg);
11717
TR::RegisterDependencyConditions *loopConditions = generateRegisterDependencyConditions((uint8_t)0, 1, cg);
11718
loopConditions->addPostCondition(counterReg, TR::RealRegister::NoReg, cg);
11719
loopConditions->stopAddingConditions();
11720
generateLabelInstruction(TR::InstOpCode::JNE4, node, spinLabel, loopConditions, cg);
11721
cg->stopUsingRegister(counterReg);
11722
11723
if(transientConditions)
11724
generateLabelInstruction(TR::InstOpCode::JMP4, node, transientFailureLabel, transientConditions, cg);
11725
else
11726
generateLabelInstruction(TR::InstOpCode::JMP4, node, transientFailureLabel, cg);
11727
11728
generateLabelInstruction(TR::InstOpCode::label, node, endLabel, endLabelConditions, cg);
11729
cg->decReferenceCount(objNode);
11730
cg->decReferenceCount(persistentFailureNode);
11731
cg->decReferenceCount(transientFailureNode);
11732
return NULL;
11733
}
11734
11735
TR::Register *
11736
J9::X86::TreeEvaluator::tfinishEvaluator(TR::Node *node, TR::CodeGenerator *cg)
11737
{
11738
generateInstruction(TR::InstOpCode::XEND, node, cg);
11739
return NULL;
11740
}
11741
11742
TR::Register *
11743
J9::X86::TreeEvaluator::tabortEvaluator(TR::Node *node, TR::CodeGenerator *cg)
11744
{
11745
generateImmInstruction(TR::InstOpCode::XABORT, node, 0x04, cg);
11746
return NULL;
11747
}
11748
11749
TR::Register *
11750
J9::X86::TreeEvaluator::directCallEvaluator(TR::Node *node, TR::CodeGenerator *cg)
11751
{
11752
static bool useJapaneseCompression = (feGetEnv("TR_JapaneseComp") != NULL);
11753
TR::Compilation *comp = cg->comp();
11754
TR::SymbolReference *symRef = node->getSymbolReference();
11755
11756
bool callInlined = false;
11757
TR::Register *returnRegister = NULL;
11758
TR::MethodSymbol *symbol = node->getSymbol()->castToMethodSymbol();
11759
11760
#ifdef J9VM_OPT_JAVA_CRYPTO_ACCELERATION
11761
if (cg->inlineCryptoMethod(node, returnRegister))
11762
{
11763
return returnRegister;
11764
}
11765
#endif
11766
11767
if (symbol->isHelper())
11768
{
11769
switch (symRef->getReferenceNumber())
11770
{
11771
case TR_checkAssignable:
11772
return TR::TreeEvaluator::checkcastinstanceofEvaluator(node, cg);
11773
default:
11774
break;
11775
}
11776
}
11777
11778
switch (symbol->getMandatoryRecognizedMethod())
11779
{
11780
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfLatin1:
11781
if (!cg->getSupportsInlineStringIndexOf())
11782
break;
11783
else
11784
return inlineIntrinsicIndexOf(node, cg, true);
11785
case TR::com_ibm_jit_JITHelpers_intrinsicIndexOfUTF16:
11786
if (!cg->getSupportsInlineStringIndexOf())
11787
break;
11788
else
11789
return inlineIntrinsicIndexOf(node, cg, false);
11790
case TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Big:
11791
case TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Little:
11792
return TR::TreeEvaluator::encodeUTF16Evaluator(node, cg);
11793
11794
case TR::java_lang_String_hashCodeImplDecompressed:
11795
returnRegister = inlineStringHashCode(node, false, cg);
11796
callInlined = (returnRegister != NULL);
11797
break;
11798
case TR::java_lang_String_hashCodeImplCompressed:
11799
returnRegister = inlineStringHashCode(node, true, cg);
11800
callInlined = (returnRegister != NULL);
11801
break;
11802
default:
11803
break;
11804
}
11805
11806
if (cg->getSupportsInlineStringCaseConversion())
11807
{
11808
switch (symbol->getRecognizedMethod())
11809
{
11810
case TR::com_ibm_jit_JITHelpers_toUpperIntrinsicUTF16:
11811
return TR::TreeEvaluator::toUpperIntrinsicUTF16Evaluator(node, cg);
11812
case TR::com_ibm_jit_JITHelpers_toUpperIntrinsicLatin1:
11813
return TR::TreeEvaluator::toUpperIntrinsicLatin1Evaluator(node, cg);
11814
case TR::com_ibm_jit_JITHelpers_toLowerIntrinsicUTF16:
11815
return TR::TreeEvaluator::toLowerIntrinsicUTF16Evaluator(node, cg);
11816
case TR::com_ibm_jit_JITHelpers_toLowerIntrinsicLatin1:
11817
return TR::TreeEvaluator::toLowerIntrinsicLatin1Evaluator(node, cg);
11818
default:
11819
break;
11820
}
11821
}
11822
11823
switch (symbol->getRecognizedMethod())
11824
{
11825
case TR::java_nio_Bits_keepAlive:
11826
case TR::java_lang_ref_Reference_reachabilityFence:
11827
{
11828
TR_ASSERT(node->getNumChildren() == 1, "keepAlive is assumed to have just one argument");
11829
11830
// The only purpose of keepAlive is to prevent an otherwise
11831
// unreachable object from being garbage collected, because we don't
11832
// want its finalizer to be called too early. There's no need to
11833
// generate a full-blown call site just for this purpose.
11834
11835
TR::Register *valueToKeepAlive = cg->evaluate(node->getFirstChild());
11836
11837
// In theory, a value could be kept alive on the stack, rather than in
11838
// a register. It is unfortunate that the following deps will force
11839
// the value into a register for no reason. However, in many common
11840
// cases, this label will have no effect on the generated code, and
11841
// will only affect GC maps.
11842
//
11843
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)1, (uint8_t)1, cg);
11844
deps->addPreCondition (valueToKeepAlive, TR::RealRegister::NoReg, cg);
11845
deps->addPostCondition (valueToKeepAlive, TR::RealRegister::NoReg, cg);
11846
new (cg->trHeapMemory()) TR::X86LabelInstruction(TR::InstOpCode::label, node, generateLabelSymbol(cg), deps, cg);
11847
cg->decReferenceCount(node->getFirstChild());
11848
11849
return NULL; // keepAlive has no return value
11850
}
11851
11852
case TR::java_math_BigDecimal_noLLOverflowAdd:
11853
case TR::java_math_BigDecimal_noLLOverflowMul:
11854
if (cg->getSupportsBDLLHardwareOverflowCheck())
11855
{
11856
// Eat this call as its only here to anchor where a long lookaside overflow check
11857
// needs to be done. There should be a TR::icmpeq node following
11858
// this one where the real overflow check will be inserted.
11859
//
11860
cg->recursivelyDecReferenceCount(node->getFirstChild());
11861
cg->recursivelyDecReferenceCount(node->getSecondChild());
11862
cg->evaluate(node->getChild(2));
11863
cg->decReferenceCount(node->getChild(2));
11864
returnRegister = cg->allocateRegister();
11865
node->setRegister(returnRegister);
11866
return returnRegister;
11867
}
11868
11869
break;
11870
case TR::java_lang_StringLatin1_inflate:
11871
if (cg->getSupportsInlineStringLatin1Inflate())
11872
{
11873
return TR::TreeEvaluator::inlineStringLatin1Inflate(node, cg);
11874
}
11875
break;
11876
case TR::java_lang_Math_sqrt:
11877
case TR::java_lang_StrictMath_sqrt:
11878
case TR::java_lang_System_nanoTime:
11879
case TR::java_util_concurrent_atomic_Fences_orderAccesses:
11880
case TR::java_util_concurrent_atomic_Fences_orderReads:
11881
case TR::java_util_concurrent_atomic_Fences_orderWrites:
11882
case TR::java_util_concurrent_atomic_Fences_reachabilityFence:
11883
case TR::sun_nio_ch_NativeThread_current:
11884
case TR::sun_misc_Unsafe_copyMemory:
11885
if (TR::TreeEvaluator::VMinlineCallEvaluator(node, false, cg))
11886
{
11887
returnRegister = node->getRegister();
11888
}
11889
else
11890
{
11891
returnRegister = TR::TreeEvaluator::performCall(node, false, true, cg);
11892
}
11893
11894
callInlined = true;
11895
break;
11896
11897
case TR::java_lang_String_compress:
11898
return TR::TreeEvaluator::compressStringEvaluator(node, cg, useJapaneseCompression);
11899
11900
case TR::java_lang_String_compressNoCheck:
11901
return TR::TreeEvaluator::compressStringNoCheckEvaluator(node, cg, useJapaneseCompression);
11902
11903
case TR::java_lang_String_andOR:
11904
return TR::TreeEvaluator::andORStringEvaluator(node, cg);
11905
11906
default:
11907
break;
11908
}
11909
11910
11911
// If the method to be called is marked as an inline method, see if it can
11912
// actually be generated inline.
11913
//
11914
if (!callInlined && (symbol->isVMInternalNative() || symbol->isJITInternalNative()))
11915
{
11916
if (TR::TreeEvaluator::VMinlineCallEvaluator(node, false, cg))
11917
return node->getRegister();
11918
else
11919
return TR::TreeEvaluator::performCall(node, false, true, cg);
11920
}
11921
else if (callInlined)
11922
{
11923
return returnRegister;
11924
}
11925
11926
// Call was not inlined. Delegate to the parent directCallEvaluator.
11927
//
11928
return J9::TreeEvaluator::directCallEvaluator(node, cg);
11929
}
11930
11931
TR::Register *
11932
J9::X86::TreeEvaluator::inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerator *cg)
11933
{
11934
TR_ASSERT_FATAL(cg->comp()->target().is64Bit(), "StringLatin1.inflate only supported on 64-bit targets");
11935
TR_ASSERT_FATAL(cg->getSupportsInlineStringLatin1Inflate(), "Inlining of StringLatin1.inflate not supported");
11936
TR_ASSERT_FATAL(!TR::Compiler->om.canGenerateArraylets(), "StringLatin1.inflate intrinsic is not supported with arraylets");
11937
TR_ASSERT_FATAL_WITH_NODE(node, node->getNumChildren() == 5, "Wrong number of children in inlineStringLatin1Inflate");
11938
11939
intptr_t headerOffsetConst = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
11940
uint8_t vectorLengthConst = 16;
11941
11942
TR::Register *srcBufferReg = cg->evaluate(node->getChild(0));
11943
TR::Register *srcOffsetReg = cg->gprClobberEvaluate(node->getChild(1), TR::InstOpCode::MOV4RegReg);
11944
TR::Register *destBufferReg = cg->evaluate(node->getChild(2));
11945
TR::Register *destOffsetReg = cg->gprClobberEvaluate(node->getChild(3), TR::InstOpCode::MOV4RegReg);
11946
TR::Register *lengthReg = cg->gprClobberEvaluate(node->getChild(4), TR::InstOpCode::MOV4RegReg);
11947
11948
TR::Register *xmmHighReg = cg->allocateRegister(TR_FPR);
11949
TR::Register *xmmLowReg = cg->allocateRegister(TR_FPR);
11950
TR::Register *zeroReg = cg->allocateRegister(TR_FPR);
11951
TR::Register *scratchReg = cg->allocateRegister(TR_GPR);
11952
11953
int depCount = 9;
11954
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)0, depCount, cg);
11955
deps->addPostCondition(xmmHighReg, TR::RealRegister::NoReg, cg);
11956
deps->addPostCondition(xmmLowReg, TR::RealRegister::NoReg, cg);
11957
deps->addPostCondition(zeroReg, TR::RealRegister::NoReg, cg);
11958
deps->addPostCondition(lengthReg, TR::RealRegister::NoReg, cg);
11959
deps->addPostCondition(srcBufferReg, TR::RealRegister::NoReg, cg);
11960
deps->addPostCondition(destBufferReg, TR::RealRegister::NoReg, cg);
11961
deps->addPostCondition(scratchReg, TR::RealRegister::eax, cg);
11962
deps->addPostCondition(srcOffsetReg, TR::RealRegister::ecx, cg);
11963
deps->addPostCondition(destOffsetReg, TR::RealRegister::edx, cg);
11964
deps->stopAddingConditions();
11965
11966
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
11967
TR::LabelSymbol *copyResidueLabel = generateLabelSymbol(cg);
11968
TR::LabelSymbol *afterCopy8Label = generateLabelSymbol(cg);
11969
11970
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
11971
startLabel->setStartInternalControlFlow();
11972
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
11973
11974
TR::Node *destOffsetNode = node->getChild(3);
11975
11976
if (!destOffsetNode->isConstZeroValue())
11977
{
11978
// dest offset measured in characters, convert it to bytes
11979
generateRegRegInstruction(TR::InstOpCode::ADD4RegReg, node, destOffsetReg, destOffsetReg, cg);
11980
}
11981
11982
generateRegRegInstruction(TR::InstOpCode::TEST4RegReg, node, lengthReg, lengthReg, cg);
11983
generateLabelInstruction(TR::InstOpCode::JE4, node, doneLabel, cg);
11984
11985
generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, lengthReg, 8, cg);
11986
generateLabelInstruction(TR::InstOpCode::JL4, node, afterCopy8Label, cg);
11987
11988
// make sure the register is zero before interleaving
11989
generateRegRegInstruction(TR::InstOpCode::PXORRegReg, node, zeroReg, zeroReg, cg);
11990
11991
TR::LabelSymbol *startLoop = generateLabelSymbol(cg);
11992
TR::LabelSymbol *endLoop = generateLabelSymbol(cg);
11993
11994
// vectorized add in loop, 16 bytes per iteration
11995
// use srcOffsetReg for loop counter, add starting offset to lengthReg, subtract 16 (xmm register size)
11996
// to prevent reading/writing beyond the end of the array
11997
generateRegMemInstruction(TR::InstOpCode::LEA4RegMem, node, scratchReg, generateX86MemoryReference(lengthReg, srcOffsetReg, 0, -vectorLengthConst, cg), cg);
11998
11999
generateLabelInstruction(TR::InstOpCode::label, node, startLoop, cg);
12000
generateRegRegInstruction(TR::InstOpCode::CMP4RegReg, node, srcOffsetReg, scratchReg, cg);
12001
generateLabelInstruction(TR::InstOpCode::JG4, node, endLoop, cg);
12002
12003
generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmHighReg, generateX86MemoryReference(srcBufferReg, srcOffsetReg, 0, headerOffsetConst, cg), cg);
12004
12005
generateRegRegInstruction(TR::InstOpCode::MOVDQURegReg, node, xmmLowReg, xmmHighReg, cg);
12006
generateRegRegInstruction(TR::InstOpCode::PUNPCKHBWRegReg, node, xmmLowReg, zeroReg, cg);
12007
generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, generateX86MemoryReference(destBufferReg, destOffsetReg, 0, headerOffsetConst + vectorLengthConst, cg), xmmLowReg, cg);
12008
12009
generateRegRegInstruction(TR::InstOpCode::PUNPCKLBWRegReg, node, xmmHighReg, zeroReg, cg);
12010
generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, generateX86MemoryReference(destBufferReg, destOffsetReg, 0, headerOffsetConst, cg), xmmHighReg, cg);
12011
12012
// increase src offset by size of imm register
12013
// increase dest offset by double, to account for the byte->char inflation
12014
generateRegImmInstruction(TR::InstOpCode::ADD4RegImm4, node, srcOffsetReg, vectorLengthConst, cg);
12015
generateRegImmInstruction(TR::InstOpCode::ADD4RegImm4, node, destOffsetReg, 2 * vectorLengthConst, cg);
12016
12017
// LOOP BACK
12018
generateLabelInstruction(TR::InstOpCode::JMP4, node, startLoop, cg);
12019
generateLabelInstruction(TR::InstOpCode::label, node, endLoop, cg);
12020
12021
// AND length with 15 to compute residual remainder
12022
// then copy and interleave 8 bytes from src buffer with 0s into dest buffer if possible
12023
generateRegImmInstruction(TR::InstOpCode::AND4RegImm4, node, lengthReg, vectorLengthConst - 1, cg);
12024
12025
generateRegImmInstruction(TR::InstOpCode::CMP4RegImm4, node, lengthReg, 8, cg);
12026
generateLabelInstruction(TR::InstOpCode::JL1, node, afterCopy8Label, cg);
12027
12028
generateRegMemInstruction(TR::InstOpCode::MOVQRegMem, node, xmmLowReg, generateX86MemoryReference(srcBufferReg, srcOffsetReg, 0, headerOffsetConst, cg), cg);
12029
generateRegRegInstruction(TR::InstOpCode::PUNPCKLBWRegReg, node, xmmLowReg, zeroReg, cg);
12030
generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, generateX86MemoryReference(destBufferReg, destOffsetReg, 0, headerOffsetConst, cg), xmmLowReg, cg);
12031
generateRegImmInstruction(TR::InstOpCode::SUB4RegImm4, node, lengthReg, 8, cg);
12032
12033
generateRegImmInstruction(TR::InstOpCode::ADD4RegImm4, node, srcOffsetReg, 8, cg);
12034
generateRegImmInstruction(TR::InstOpCode::ADD4RegImm4, node, destOffsetReg, 16, cg);
12035
12036
generateLabelInstruction(TR::InstOpCode::label, node, afterCopy8Label, cg);
12037
12038
// handle residual (< 8 bytes left) & jump to copy instructions based on the number of bytes left
12039
// calculate how many bytes to skip based on length;
12040
12041
const int copy_instruction_size = 5 // size of MOVZXReg2Mem1
12042
+4; // size of S2MemReg
12043
12044
// since copy_instruction_size could change depending on which registers are allocated to scratchReg, srcOffsetReg and destOffsetReg
12045
// we reserve them to be eax, ecx, edx, respectively
12046
12047
generateRegRegImmInstruction(TR::InstOpCode::IMUL4RegRegImm4, node, lengthReg, lengthReg, -copy_instruction_size, cg);
12048
generateRegImmInstruction(TR::InstOpCode::ADD4RegImm4, node, lengthReg, copy_instruction_size * 7, cg);
12049
12050
bool is64bit = cg->comp()->target().is64Bit();
12051
// calculate address to jump too
12052
generateRegMemInstruction(TR::InstOpCode::LEARegMem(is64bit), node, scratchReg, generateX86MemoryReference(copyResidueLabel, cg), cg);
12053
generateRegRegInstruction(TR::InstOpCode::ADDRegReg(is64bit), node, lengthReg, scratchReg, cg);
12054
12055
generateRegMemInstruction(TR::InstOpCode::LEARegMem(is64bit), node, srcOffsetReg, generateX86MemoryReference(srcBufferReg, srcOffsetReg, 0, 0, cg), cg);
12056
generateRegMemInstruction(TR::InstOpCode::LEARegMem(is64bit), node, destOffsetReg, generateX86MemoryReference(destBufferReg, destOffsetReg, 0, 0, cg), cg);
12057
12058
generateRegInstruction(TR::InstOpCode::JMPReg, node, lengthReg, cg);
12059
12060
generateLabelInstruction(TR::InstOpCode::label, node, copyResidueLabel, cg);
12061
12062
for (int i = 0; i < 7; i++)
12063
{
12064
generateRegMemInstruction(TR::InstOpCode::MOVZXReg2Mem1, node, scratchReg, generateX86MemoryReference(srcOffsetReg, headerOffsetConst + 6 - i, cg), cg);
12065
generateMemRegInstruction(TR::InstOpCode::S2MemReg, node, generateX86MemoryReference(destOffsetReg, headerOffsetConst + 2 * (6 - i), cg), scratchReg, cg);
12066
}
12067
12068
generateLabelInstruction(TR::InstOpCode::label, node, doneLabel, deps, cg);
12069
doneLabel->setEndInternalControlFlow();
12070
12071
cg->stopUsingRegister(srcOffsetReg);
12072
cg->stopUsingRegister(destOffsetReg);
12073
cg->stopUsingRegister(lengthReg);
12074
12075
cg->stopUsingRegister(xmmHighReg);
12076
cg->stopUsingRegister(xmmLowReg);
12077
cg->stopUsingRegister(zeroReg);
12078
cg->stopUsingRegister(scratchReg);
12079
12080
for (int i = 0; i < 5; i++)
12081
{
12082
cg->decReferenceCount(node->getChild(i));
12083
}
12084
12085
return NULL;
12086
}
12087
12088
TR::Register *
12089
J9::X86::TreeEvaluator::encodeUTF16Evaluator(TR::Node *node, TR::CodeGenerator *cg)
12090
{
12091
// tree looks like:
12092
// icall com.ibm.jit.JITHelpers.encodeUTF16{Big,Little}()
12093
// input ptr
12094
// output ptr
12095
// input length (in elements)
12096
// Number of elements translated is returned
12097
12098
TR::MethodSymbol *symbol = node->getSymbol()->castToMethodSymbol();
12099
bool bigEndian = symbol->getRecognizedMethod() == TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Big;
12100
12101
// Set up register dependencies
12102
const int gprClobberCount = 2;
12103
const int maxFprClobberCount = 5;
12104
const int fprClobberCount = bigEndian ? 5 : 4; // xmm4 only needed for big-endian
12105
TR::Register *srcPtrReg, *dstPtrReg, *lengthReg, *resultReg;
12106
TR::Register *gprClobbers[gprClobberCount], *fprClobbers[maxFprClobberCount];
12107
bool killSrc = TR::TreeEvaluator::stopUsingCopyRegAddr(node->getChild(0), srcPtrReg, cg);
12108
bool killDst = TR::TreeEvaluator::stopUsingCopyRegAddr(node->getChild(1), dstPtrReg, cg);
12109
bool killLen = TR::TreeEvaluator::stopUsingCopyRegInteger(node->getChild(2), lengthReg, cg);
12110
resultReg = cg->allocateRegister();
12111
for (int i = 0; i < gprClobberCount; i++)
12112
gprClobbers[i] = cg->allocateRegister();
12113
for (int i = 0; i < fprClobberCount; i++)
12114
fprClobbers[i] = cg->allocateRegister(TR_FPR);
12115
12116
int depCount = 11;
12117
TR::RegisterDependencyConditions *deps =
12118
generateRegisterDependencyConditions((uint8_t)0, depCount, cg);
12119
12120
deps->addPostCondition(srcPtrReg, TR::RealRegister::esi, cg);
12121
deps->addPostCondition(dstPtrReg, TR::RealRegister::edi, cg);
12122
deps->addPostCondition(lengthReg, TR::RealRegister::edx, cg);
12123
deps->addPostCondition(resultReg, TR::RealRegister::eax, cg);
12124
12125
deps->addPostCondition(gprClobbers[0], TR::RealRegister::ecx, cg);
12126
deps->addPostCondition(gprClobbers[1], TR::RealRegister::ebx, cg);
12127
12128
deps->addPostCondition(fprClobbers[0], TR::RealRegister::xmm0, cg);
12129
deps->addPostCondition(fprClobbers[1], TR::RealRegister::xmm1, cg);
12130
deps->addPostCondition(fprClobbers[2], TR::RealRegister::xmm2, cg);
12131
deps->addPostCondition(fprClobbers[3], TR::RealRegister::xmm3, cg);
12132
if (bigEndian)
12133
deps->addPostCondition(fprClobbers[4], TR::RealRegister::xmm4, cg);
12134
12135
deps->stopAddingConditions();
12136
12137
// Generate helper call
12138
TR_RuntimeHelper helper;
12139
if (cg->comp()->target().is64Bit())
12140
helper = bigEndian ? TR_AMD64encodeUTF16Big : TR_AMD64encodeUTF16Little;
12141
else
12142
helper = bigEndian ? TR_IA32encodeUTF16Big : TR_IA32encodeUTF16Little;
12143
12144
generateHelperCallInstruction(node, helper, deps, cg);
12145
12146
// Free up registers
12147
for (int i = 0; i < gprClobberCount; i++)
12148
cg->stopUsingRegister(gprClobbers[i]);
12149
for (int i = 0; i < fprClobberCount; i++)
12150
cg->stopUsingRegister(fprClobbers[i]);
12151
12152
for (uint16_t i = 0; i < node->getNumChildren(); i++)
12153
cg->decReferenceCount(node->getChild(i));
12154
12155
TR_LiveRegisters *liveRegs = cg->getLiveRegisters(TR_GPR);
12156
if (killSrc)
12157
liveRegs->registerIsDead(srcPtrReg);
12158
if (killDst)
12159
liveRegs->registerIsDead(dstPtrReg);
12160
if (killLen)
12161
liveRegs->registerIsDead(lengthReg);
12162
12163
node->setRegister(resultReg);
12164
return resultReg;
12165
}
12166
12167
12168
TR::Register *
12169
J9::X86::TreeEvaluator::compressStringEvaluator(
12170
TR::Node *node,
12171
TR::CodeGenerator *cg,
12172
bool japaneseMethod)
12173
{
12174
TR::Node *srcObjNode, *dstObjNode, *startNode, *lengthNode;
12175
TR::Register *srcObjReg, *dstObjReg, *lengthReg, *startReg;
12176
bool stopUsingCopyReg1, stopUsingCopyReg2, stopUsingCopyReg3, stopUsingCopyReg4;
12177
12178
srcObjNode = node->getChild(0);
12179
dstObjNode = node->getChild(1);
12180
startNode = node->getChild(2);
12181
lengthNode = node->getChild(3);
12182
12183
stopUsingCopyReg1 = TR::TreeEvaluator::stopUsingCopyRegAddr(srcObjNode, srcObjReg, cg);
12184
stopUsingCopyReg2 = TR::TreeEvaluator::stopUsingCopyRegAddr(dstObjNode, dstObjReg, cg);
12185
stopUsingCopyReg3 = TR::TreeEvaluator::stopUsingCopyRegInteger(startNode, startReg, cg);
12186
stopUsingCopyReg4 = TR::TreeEvaluator::stopUsingCopyRegInteger(lengthNode, lengthReg, cg);
12187
12188
uintptr_t hdrSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
12189
generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, srcObjReg, hdrSize, cg);
12190
generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, dstObjReg, hdrSize, cg);
12191
12192
12193
// Now that we have all the registers, set up the dependencies
12194
TR::RegisterDependencyConditions *dependencies =
12195
generateRegisterDependencyConditions((uint8_t)0, 6, cg);
12196
TR::Register *resultReg = cg->allocateRegister();
12197
TR::Register *dummy = cg->allocateRegister();
12198
dependencies->addPostCondition(srcObjReg, TR::RealRegister::esi, cg);
12199
dependencies->addPostCondition(dstObjReg, TR::RealRegister::edi, cg);
12200
dependencies->addPostCondition(lengthReg, TR::RealRegister::ecx, cg);
12201
dependencies->addPostCondition(startReg, TR::RealRegister::eax, cg);
12202
dependencies->addPostCondition(resultReg, TR::RealRegister::edx, cg);
12203
dependencies->addPostCondition(dummy, TR::RealRegister::ebx, cg);
12204
dependencies->stopAddingConditions();
12205
12206
TR_RuntimeHelper helper;
12207
if (cg->comp()->target().is64Bit())
12208
helper = japaneseMethod ? TR_AMD64compressStringJ : TR_AMD64compressString;
12209
else
12210
helper = japaneseMethod ? TR_IA32compressStringJ : TR_IA32compressString;
12211
generateHelperCallInstruction(node, helper, dependencies, cg);
12212
cg->stopUsingRegister(dummy);
12213
12214
for (uint16_t i = 0; i < node->getNumChildren(); i++)
12215
cg->decReferenceCount(node->getChild(i));
12216
12217
if (stopUsingCopyReg1)
12218
cg->getLiveRegisters(TR_GPR)->registerIsDead(srcObjReg);
12219
if (stopUsingCopyReg2)
12220
cg->getLiveRegisters(TR_GPR)->registerIsDead(dstObjReg);
12221
if (stopUsingCopyReg3)
12222
cg->getLiveRegisters(TR_GPR)->registerIsDead(startReg);
12223
if (stopUsingCopyReg4)
12224
cg->getLiveRegisters(TR_GPR)->registerIsDead(lengthReg);
12225
node->setRegister(resultReg);
12226
return resultReg;
12227
}
12228
12229
/*
12230
* The CaseConversionManager is used to store info about the conversion. It defines the lower bound and upper bound value depending on
12231
* whether it's a toLower or toUpper case conversion. It also chooses byte or word data type depending on whether it's compressed string or not.
12232
* The stringCaseConversionHelper queries the manager for those info when generating the actual instructions.
12233
*/
12234
class J9::X86::TreeEvaluator::CaseConversionManager {
12235
public:
12236
CaseConversionManager(bool isCompressedString, bool toLowerCase):_isCompressedString(isCompressedString), _toLowerCase(toLowerCase)
12237
{
12238
if (isCompressedString)
12239
{
12240
static uint8_t UPPERCASE_A_ASCII_MINUS1_bytes[] =
12241
{
12242
'A'-1, 'A'-1, 'A'-1, 'A'-1,
12243
'A'-1, 'A'-1, 'A'-1, 'A'-1,
12244
'A'-1, 'A'-1, 'A'-1, 'A'-1,
12245
'A'-1, 'A'-1, 'A'-1, 'A'-1
12246
};
12247
12248
static uint8_t UPPERCASE_Z_ASCII_bytes[] =
12249
{
12250
'Z', 'Z', 'Z', 'Z',
12251
'Z', 'Z', 'Z', 'Z',
12252
'Z', 'Z', 'Z', 'Z',
12253
'Z', 'Z', 'Z', 'Z'
12254
};
12255
12256
static uint8_t LOWERCASE_A_ASCII_MINUS1_bytes[] =
12257
{
12258
'a'-1, 'a'-1, 'a'-1, 'a'-1,
12259
'a'-1, 'a'-1, 'a'-1, 'a'-1,
12260
'a'-1, 'a'-1, 'a'-1, 'a'-1,
12261
'a'-1, 'a'-1, 'a'-1, 'a'-1
12262
};
12263
12264
static uint8_t LOWERCASE_Z_ASCII_bytes[] =
12265
{
12266
'z', 'z', 'z', 'z',
12267
'z', 'z', 'z', 'z',
12268
'z', 'z', 'z', 'z',
12269
'z', 'z', 'z', 'z',
12270
};
12271
12272
static uint8_t CONVERSION_DIFF_bytes[] =
12273
{
12274
0x20, 0x20, 0x20, 0x20,
12275
0x20, 0x20, 0x20, 0x20,
12276
0x20, 0x20, 0x20, 0x20,
12277
0x20, 0x20, 0x20, 0x20,
12278
};
12279
12280
static uint16_t ASCII_UPPERBND_bytes[] =
12281
{
12282
0x7f, 0x7f, 0x7f, 0x7f,
12283
0x7f, 0x7f, 0x7f, 0x7f,
12284
0x7f, 0x7f, 0x7f, 0x7f,
12285
0x7f, 0x7f, 0x7f, 0x7f,
12286
};
12287
12288
if (toLowerCase)
12289
{
12290
_lowerBndMinus1 = UPPERCASE_A_ASCII_MINUS1_bytes;
12291
_upperBnd = UPPERCASE_Z_ASCII_bytes;
12292
}
12293
else
12294
{
12295
_lowerBndMinus1 = LOWERCASE_A_ASCII_MINUS1_bytes;
12296
_upperBnd = LOWERCASE_Z_ASCII_bytes;
12297
}
12298
_conversionDiff = CONVERSION_DIFF_bytes;
12299
_asciiMax = ASCII_UPPERBND_bytes;
12300
}
12301
else
12302
{
12303
static uint16_t UPPERCASE_A_ASCII_MINUS1_words[] =
12304
{
12305
'A'-1, 'A'-1, 'A'-1, 'A'-1,
12306
'A'-1, 'A'-1, 'A'-1, 'A'-1
12307
};
12308
12309
static uint16_t LOWERCASE_A_ASCII_MINUS1_words[] =
12310
{
12311
'a'-1, 'a'-1, 'a'-1, 'a'-1,
12312
'a'-1, 'a'-1, 'a'-1, 'a'-1
12313
};
12314
12315
static uint16_t UPPERCASE_Z_ASCII_words[] =
12316
{
12317
'Z', 'Z', 'Z', 'Z',
12318
'Z', 'Z', 'Z', 'Z'
12319
};
12320
12321
static uint16_t LOWERCASE_Z_ASCII_words[] =
12322
{
12323
'z', 'z', 'z', 'z',
12324
'z', 'z', 'z', 'z'
12325
};
12326
12327
static uint16_t CONVERSION_DIFF_words[] =
12328
{
12329
0x20, 0x20, 0x20, 0x20,
12330
0x20, 0x20, 0x20, 0x20
12331
};
12332
static uint16_t ASCII_UPPERBND_words[] =
12333
{
12334
0x7f, 0x7f, 0x7f, 0x7f,
12335
0x7f, 0x7f, 0x7f, 0x7f
12336
};
12337
12338
if (toLowerCase)
12339
{
12340
_lowerBndMinus1 = UPPERCASE_A_ASCII_MINUS1_words;
12341
_upperBnd = UPPERCASE_Z_ASCII_words;
12342
}
12343
else
12344
{
12345
_lowerBndMinus1 = LOWERCASE_A_ASCII_MINUS1_words;
12346
_upperBnd = LOWERCASE_Z_ASCII_words;
12347
}
12348
_conversionDiff = CONVERSION_DIFF_words;
12349
_asciiMax = ASCII_UPPERBND_words;
12350
}
12351
};
12352
12353
inline bool isCompressedString(){return _isCompressedString;};
12354
inline bool toLowerCase(){return _toLowerCase;};
12355
inline void * getLowerBndMinus1(){ return _lowerBndMinus1; };
12356
inline void * getUpperBnd(){ return _upperBnd; };
12357
inline void * getConversionDiff(){ return _conversionDiff; };
12358
inline void * getAsciiMax(){ return _asciiMax; };
12359
12360
private:
12361
void * _lowerBndMinus1;
12362
void * _upperBnd;
12363
void * _asciiMax;
12364
void * _conversionDiff;
12365
bool _isCompressedString;
12366
bool _toLowerCase;
12367
};
12368
12369
TR::Register *
12370
J9::X86::TreeEvaluator::toUpperIntrinsicLatin1Evaluator(TR::Node *node, TR::CodeGenerator *cg)
12371
{
12372
CaseConversionManager manager(true /* isCompressedString */, false /* toLowerCase */);
12373
return TR::TreeEvaluator::stringCaseConversionHelper(node, cg, manager);
12374
}
12375
12376
12377
TR::Register *
12378
J9::X86::TreeEvaluator::toLowerIntrinsicLatin1Evaluator(TR::Node *node, TR::CodeGenerator *cg)
12379
{
12380
CaseConversionManager manager(true/* isCompressedString */, true /* toLowerCase */);
12381
return TR::TreeEvaluator::stringCaseConversionHelper(node, cg, manager);
12382
}
12383
12384
TR::Register *
12385
J9::X86::TreeEvaluator::toUpperIntrinsicUTF16Evaluator(TR::Node *node, TR::CodeGenerator *cg)
12386
{
12387
CaseConversionManager manager(false /* isCompressedString */, false /* toLowerCase */);
12388
return TR::TreeEvaluator::stringCaseConversionHelper(node, cg, manager);
12389
}
12390
12391
TR::Register *
12392
J9::X86::TreeEvaluator::toLowerIntrinsicUTF16Evaluator(TR::Node *node, TR::CodeGenerator *cg)
12393
{
12394
CaseConversionManager manager(false /* isCompressedString */, true /* toLowerCase */);
12395
return TR::TreeEvaluator::stringCaseConversionHelper(node, cg, manager);
12396
}
12397
12398
static TR::Register* allocateRegAndAddCondition(TR::CodeGenerator *cg, TR::RegisterDependencyConditions * deps, TR_RegisterKinds rk=TR_GPR)
12399
{
12400
TR::Register* reg = cg->allocateRegister(rk);
12401
deps->addPostCondition(reg, TR::RealRegister::NoReg, cg);
12402
deps->addPreCondition(reg, TR::RealRegister::NoReg, cg);
12403
return reg;
12404
}
12405
12406
12407
/**
12408
* \brief This evaluator is used to perform string toUpper and toLower conversion.
12409
*
12410
* This JIT HW optimized conversion helper is designed to convert strings that contains only ascii characters.
12411
* If a string contains non ascii characters, HW optimized routine will return NULL and fall back to the software implementation, which is able to convert a broader range of characters.
12412
*
12413
* There are the following steps in the generated assembly code:
12414
* 1. preparation (load value into register, calculate length etc)
12415
* 2. vectorized case conversion loop
12416
* 3. handle residue with non vectorized case conversion loop
12417
* 4. handle invalid case
12418
*
12419
* \param node
12420
* \param cg
12421
* \param manager Contains info about the conversion: whether it's toUpper or toLower conversion, the valid range of characters, etc
12422
*
12423
* This version does not support discontiguous arrays
12424
*/
12425
TR::Register *
12426
J9::X86::TreeEvaluator::stringCaseConversionHelper(TR::Node *node, TR::CodeGenerator *cg, CaseConversionManager &manager)
12427
{
12428
#define iComment(str) if (debug) debug->addInstructionComment(cursor, (const_cast<char*>(str)));
12429
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions((uint8_t)14, (uint8_t)14, cg);
12430
TR::Register *srcArray = cg->evaluate(node->getChild(1));
12431
deps->addPostCondition(srcArray, TR::RealRegister::NoReg, cg);
12432
deps->addPreCondition(srcArray, TR::RealRegister::NoReg, cg);
12433
12434
TR::Register *dstArray = cg->evaluate(node->getChild(2));
12435
deps->addPostCondition(dstArray, TR::RealRegister::NoReg, cg);
12436
deps->addPreCondition(dstArray, TR::RealRegister::NoReg, cg);
12437
12438
TR::Register *length = cg->intClobberEvaluate(node->getChild(3));
12439
deps->addPostCondition(length, TR::RealRegister::NoReg, cg);
12440
deps->addPreCondition(length, TR::RealRegister::NoReg, cg);
12441
12442
TR::Register* counter = allocateRegAndAddCondition(cg, deps);
12443
TR::Register* residueStartLength = allocateRegAndAddCondition(cg, deps);
12444
TR::Register *singleChar = residueStartLength; // residueStartLength and singleChar do not overlap and can share the same register
12445
TR::Register *result = allocateRegAndAddCondition(cg, deps);
12446
12447
TR::Register* xmmRegLowerBndMinus1 = allocateRegAndAddCondition(cg, deps, TR_FPR); // 'A-1' for toLowerCase, 'a-1' for toUpperCase
12448
TR::Register* xmmRegUpperBnd = allocateRegAndAddCondition(cg, deps, TR_FPR);// 'Z-1' for toLowerCase, 'z-1' for toUpperCase
12449
TR::Register* xmmRegConversionDiff = allocateRegAndAddCondition(cg, deps, TR_FPR);
12450
TR::Register* xmmRegMinus1 = allocateRegAndAddCondition(cg, deps, TR_FPR);
12451
TR::Register* xmmRegAsciiUpperBnd = allocateRegAndAddCondition(cg, deps, TR_FPR);
12452
TR::Register* xmmRegArrayContentCopy0 = allocateRegAndAddCondition(cg, deps, TR_FPR);
12453
TR::Register* xmmRegArrayContentCopy1 = allocateRegAndAddCondition(cg, deps, TR_FPR);
12454
TR::Register* xmmRegArrayContentCopy2 = allocateRegAndAddCondition(cg, deps, TR_FPR);
12455
TR_Debug *debug = cg->getDebug();
12456
TR::Instruction * cursor = NULL;
12457
12458
uint32_t strideSize = 16;
12459
uintptr_t headerSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
12460
12461
static uint16_t MINUS1[] =
12462
{
12463
0xffff, 0xffff, 0xffff, 0xffff,
12464
0xffff, 0xffff, 0xffff, 0xffff,
12465
};
12466
12467
TR::LabelSymbol *failLabel = generateLabelSymbol(cg);
12468
// Under decompressed string case for 32bits platforms, bail out if string is larger than INT_MAX32/2 since # character to # byte
12469
// conversion will cause overflow.
12470
if (!cg->comp()->target().is64Bit() && !manager.isCompressedString())
12471
{
12472
generateRegImmInstruction(TR::InstOpCode::CMPRegImm4(), node, length, (uint16_t) 0x8000, cg);
12473
generateLabelInstruction(TR::InstOpCode::JGE4, node, failLabel, cg);
12474
}
12475
12476
// 1. preparation (load value into registers, calculate length etc)
12477
auto lowerBndMinus1 = generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, manager.getLowerBndMinus1()), cg);
12478
cursor = generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmRegLowerBndMinus1, lowerBndMinus1, cg); iComment("lower bound ascii value minus one");
12479
12480
auto upperBnd = generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, manager.getUpperBnd()), cg);
12481
cursor = generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmRegUpperBnd, upperBnd, cg); iComment("upper bound ascii value");
12482
12483
auto conversionDiff = generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, manager.getConversionDiff()), cg);
12484
cursor = generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmRegConversionDiff, conversionDiff, cg); iComment("case conversion diff value");
12485
12486
auto minus1 = generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, MINUS1), cg);
12487
cursor = generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmRegMinus1, minus1, cg); iComment("-1");
12488
12489
auto asciiUpperBnd = generateX86MemoryReference(cg->findOrCreate16ByteConstant(node, manager.getAsciiMax()), cg);
12490
cursor = generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmRegAsciiUpperBnd, asciiUpperBnd, cg); iComment("maximum ascii value ");
12491
12492
generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, node, result, 1, cg);
12493
12494
// initialize the loop counter
12495
cursor = generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, counter, counter, cg); iComment("initialize loop counter");
12496
12497
//calculate the residueStartLength. Later instructions compare the counter with this length and decide when to jump to the residue handling sequence
12498
generateRegRegInstruction(TR::InstOpCode::MOVRegReg(), node, residueStartLength, length, cg);
12499
generateRegImmInstruction(TR::InstOpCode::SUBRegImms(), node, residueStartLength, strideSize-1, cg);
12500
12501
// 2. vectorized case conversion loop
12502
TR::LabelSymbol *startLabel = generateLabelSymbol(cg);
12503
TR::LabelSymbol *endLabel = generateLabelSymbol(cg);
12504
TR::LabelSymbol *residueStartLabel = generateLabelSymbol(cg);
12505
TR::LabelSymbol *storeToArrayLabel = generateLabelSymbol(cg);
12506
12507
startLabel->setStartInternalControlFlow();
12508
endLabel->setEndInternalControlFlow();
12509
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
12510
12511
TR::LabelSymbol *caseConversionMainLoopLabel = generateLabelSymbol(cg);
12512
generateLabelInstruction(TR::InstOpCode::label, node, caseConversionMainLoopLabel, cg);
12513
generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, counter, residueStartLength, cg);
12514
generateLabelInstruction(TR::InstOpCode::JGE4, node, residueStartLabel, cg);
12515
12516
auto srcArrayMemRef = generateX86MemoryReference(srcArray, counter, 0, headerSize, cg);
12517
generateRegMemInstruction(TR::InstOpCode::MOVDQURegMem, node, xmmRegArrayContentCopy0, srcArrayMemRef, cg);
12518
12519
//detect invalid characters
12520
generateRegRegInstruction(TR::InstOpCode::MOVDQURegReg, node, xmmRegArrayContentCopy1, xmmRegArrayContentCopy0, cg);
12521
generateRegRegInstruction(TR::InstOpCode::MOVDQURegReg, node, xmmRegArrayContentCopy2, xmmRegArrayContentCopy0, cg);
12522
cursor = generateRegRegInstruction(manager.isCompressedString()? TR::InstOpCode::PCMPGTBRegReg: TR::InstOpCode::PCMPGTWRegReg, node,
12523
xmmRegArrayContentCopy1, xmmRegMinus1, cg); iComment(" > -1");
12524
cursor = generateRegRegInstruction(manager.isCompressedString()? TR::InstOpCode::PCMPGTBRegReg: TR::InstOpCode::PCMPGTWRegReg, node,
12525
xmmRegArrayContentCopy2, xmmRegAsciiUpperBnd, cg); iComment(" > maximum ascii value");
12526
cursor = generateRegRegInstruction(TR::InstOpCode::PANDNRegReg, node, xmmRegArrayContentCopy2, xmmRegArrayContentCopy1, cg); iComment(" >-1 && !(> maximum ascii value) valid when all bits are set");
12527
cursor = generateRegRegInstruction(TR::InstOpCode::PXORRegReg, node, xmmRegArrayContentCopy2, xmmRegMinus1, cg); iComment("reverse all bits");
12528
generateRegRegInstruction(TR::InstOpCode::PTESTRegReg, node, xmmRegArrayContentCopy2, xmmRegArrayContentCopy2, cg);
12529
generateLabelInstruction(TR::InstOpCode::JNE4, node, failLabel, cg); iComment("jump out if invalid chars are detected");
12530
12531
//calculate case conversion with vector registers
12532
generateRegRegInstruction(TR::InstOpCode::MOVDQURegReg, node, xmmRegArrayContentCopy1, xmmRegArrayContentCopy0, cg);
12533
generateRegRegInstruction(TR::InstOpCode::MOVDQURegReg, node, xmmRegArrayContentCopy2, xmmRegArrayContentCopy0, cg);
12534
cursor = generateRegRegInstruction(manager.isCompressedString()? TR::InstOpCode::PCMPGTBRegReg: TR::InstOpCode::PCMPGTWRegReg, node,
12535
xmmRegArrayContentCopy0, xmmRegLowerBndMinus1, cg); iComment(manager.toLowerCase() ? " > 'A-1'" : "> 'a-1'");
12536
cursor = generateRegRegInstruction(manager.isCompressedString()? TR::InstOpCode::PCMPGTBRegReg: TR::InstOpCode::PCMPGTWRegReg, node,
12537
xmmRegArrayContentCopy1, xmmRegUpperBnd, cg); iComment(manager.toLowerCase()? " > 'Z'" : " > 'z'");
12538
cursor = generateRegRegInstruction(TR::InstOpCode::PANDNRegReg, node, xmmRegArrayContentCopy1, xmmRegArrayContentCopy0, cg); iComment(const_cast<char*> (manager.toLowerCase()? " >='A' && !( >'Z')": " >='a' && !( >'z')"));
12539
generateRegRegInstruction(TR::InstOpCode::PANDRegReg, node, xmmRegArrayContentCopy1, xmmRegConversionDiff, cg);
12540
12541
if (manager.toLowerCase())
12542
generateRegRegInstruction(manager.isCompressedString()? TR::InstOpCode::PADDBRegReg: TR::InstOpCode::PADDWRegReg, node,
12543
xmmRegArrayContentCopy2, xmmRegArrayContentCopy1, cg);
12544
else
12545
generateRegRegInstruction(manager.isCompressedString()? TR::InstOpCode::PSUBBRegReg: TR::InstOpCode::PSUBWRegReg, node,
12546
xmmRegArrayContentCopy2, xmmRegArrayContentCopy1, cg);
12547
12548
auto dstArrayMemRef = generateX86MemoryReference(dstArray, counter, 0, headerSize, cg);
12549
generateMemRegInstruction(TR::InstOpCode::MOVDQUMemReg, node, dstArrayMemRef, xmmRegArrayContentCopy2, cg);
12550
generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, counter, strideSize, cg);
12551
generateLabelInstruction(TR::InstOpCode::JMP4, node, caseConversionMainLoopLabel, cg);
12552
12553
// 3. handle residue with non vectorized case conversion loop
12554
generateLabelInstruction(TR::InstOpCode::label, node, residueStartLabel, cg);
12555
generateRegRegInstruction(TR::InstOpCode::CMPRegReg(), node, counter, length, cg);
12556
generateLabelInstruction(TR::InstOpCode::JGE4, node, endLabel, cg);
12557
srcArrayMemRef = generateX86MemoryReference(srcArray, counter, 0, headerSize, cg);
12558
generateRegMemInstruction( manager.isCompressedString()? TR::InstOpCode::MOVZXReg4Mem1: TR::InstOpCode::MOVZXReg4Mem2, node, singleChar, srcArrayMemRef, cg);
12559
12560
// use unsigned compare to detect invalid range
12561
generateRegImmInstruction(TR::InstOpCode::CMP4RegImms, node, singleChar, 0x7F, cg);
12562
generateLabelInstruction(TR::InstOpCode::JA4, node, failLabel, cg);
12563
12564
generateRegImmInstruction(TR::InstOpCode::CMP4RegImms, node, singleChar, manager.toLowerCase()? 'A': 'a', cg);
12565
generateLabelInstruction(TR::InstOpCode::JB4, node, storeToArrayLabel, cg);
12566
12567
generateRegImmInstruction(TR::InstOpCode::CMP4RegImms, node, singleChar, manager.toLowerCase()? 'Z': 'z', cg);
12568
generateLabelInstruction(TR::InstOpCode::JA4, node, storeToArrayLabel, cg);
12569
12570
if (manager.toLowerCase())
12571
generateRegMemInstruction(TR::InstOpCode::LEARegMem(),
12572
node,
12573
singleChar,
12574
generateX86MemoryReference(singleChar, 0x20, cg),
12575
cg);
12576
12577
else generateRegImmInstruction(TR::InstOpCode::SUB4RegImms, node, singleChar, 0x20, cg);
12578
12579
generateLabelInstruction(TR::InstOpCode::label, node, storeToArrayLabel, cg);
12580
12581
dstArrayMemRef = generateX86MemoryReference(dstArray, counter, 0, headerSize, cg);
12582
generateMemRegInstruction(manager.isCompressedString()? TR::InstOpCode::S1MemReg: TR::InstOpCode::S2MemReg, node, dstArrayMemRef, singleChar, cg);
12583
generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, counter, manager.isCompressedString()? 1: 2, cg);
12584
generateLabelInstruction(TR::InstOpCode::JMP4, node, residueStartLabel, cg);
12585
12586
// 4. handle invalid case
12587
generateLabelInstruction(TR::InstOpCode::label, node, failLabel, cg);
12588
generateRegRegInstruction(TR::InstOpCode::XORRegReg(), node, result, result, cg);
12589
12590
generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);
12591
node->setRegister(result);
12592
12593
cg->stopUsingRegister(length);
12594
cg->stopUsingRegister(counter);
12595
cg->stopUsingRegister(residueStartLength);
12596
12597
cg->stopUsingRegister(xmmRegLowerBndMinus1);
12598
cg->stopUsingRegister(xmmRegUpperBnd);
12599
cg->stopUsingRegister(xmmRegConversionDiff);
12600
cg->stopUsingRegister(xmmRegMinus1);
12601
cg->stopUsingRegister(xmmRegAsciiUpperBnd);
12602
cg->stopUsingRegister(xmmRegArrayContentCopy0);
12603
cg->stopUsingRegister(xmmRegArrayContentCopy1);
12604
cg->stopUsingRegister(xmmRegArrayContentCopy2);
12605
12606
12607
cg->decReferenceCount(node->getChild(0));
12608
cg->decReferenceCount(node->getChild(1));
12609
cg->decReferenceCount(node->getChild(2));
12610
cg->decReferenceCount(node->getChild(3));
12611
return result;
12612
}
12613
12614
TR::Register *
12615
J9::X86::TreeEvaluator::compressStringNoCheckEvaluator(
12616
TR::Node *node,
12617
TR::CodeGenerator *cg,
12618
bool japaneseMethod)
12619
{
12620
TR::Node *srcObjNode, *dstObjNode, *startNode, *lengthNode;
12621
TR::Register *srcObjReg, *dstObjReg, *lengthReg, *startReg;
12622
bool stopUsingCopyReg1, stopUsingCopyReg2, stopUsingCopyReg3, stopUsingCopyReg4;
12623
12624
srcObjNode = node->getChild(0);
12625
dstObjNode = node->getChild(1);
12626
startNode = node->getChild(2);
12627
lengthNode = node->getChild(3);
12628
12629
stopUsingCopyReg1 = TR::TreeEvaluator::stopUsingCopyRegAddr(srcObjNode, srcObjReg, cg);
12630
stopUsingCopyReg2 = TR::TreeEvaluator::stopUsingCopyRegAddr(dstObjNode, dstObjReg, cg);
12631
stopUsingCopyReg3 = TR::TreeEvaluator::stopUsingCopyRegInteger(startNode, startReg, cg);
12632
stopUsingCopyReg4 = TR::TreeEvaluator::stopUsingCopyRegInteger(lengthNode, lengthReg, cg);
12633
12634
uintptr_t hdrSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
12635
generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, srcObjReg, hdrSize, cg);
12636
generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, dstObjReg, hdrSize, cg);
12637
12638
12639
// Now that we have all the registers, set up the dependencies
12640
TR::RegisterDependencyConditions *dependencies =
12641
generateRegisterDependencyConditions((uint8_t)0, 5, cg);
12642
dependencies->addPostCondition(srcObjReg, TR::RealRegister::esi, cg);
12643
dependencies->addPostCondition(dstObjReg, TR::RealRegister::edi, cg);
12644
dependencies->addPostCondition(lengthReg, TR::RealRegister::ecx, cg);
12645
dependencies->addPostCondition(startReg, TR::RealRegister::eax, cg);
12646
TR::Register *dummy = cg->allocateRegister();
12647
dependencies->addPostCondition(dummy, TR::RealRegister::ebx, cg);
12648
dependencies->stopAddingConditions();
12649
12650
TR_RuntimeHelper helper;
12651
if (cg->comp()->target().is64Bit())
12652
helper = japaneseMethod ? TR_AMD64compressStringNoCheckJ : TR_AMD64compressStringNoCheck;
12653
else
12654
helper = japaneseMethod ? TR_IA32compressStringNoCheckJ : TR_IA32compressStringNoCheck;
12655
12656
generateHelperCallInstruction(node, helper, dependencies, cg);
12657
cg->stopUsingRegister(dummy);
12658
12659
for (uint16_t i = 0; i < node->getNumChildren(); i++)
12660
cg->decReferenceCount(node->getChild(i));
12661
12662
if (stopUsingCopyReg1)
12663
cg->getLiveRegisters(TR_GPR)->registerIsDead(srcObjReg);
12664
if (stopUsingCopyReg2)
12665
cg->getLiveRegisters(TR_GPR)->registerIsDead(dstObjReg);
12666
if (stopUsingCopyReg3)
12667
cg->getLiveRegisters(TR_GPR)->registerIsDead(startReg);
12668
if (stopUsingCopyReg4)
12669
cg->getLiveRegisters(TR_GPR)->registerIsDead(lengthReg);
12670
return NULL;
12671
}
12672
12673
12674
TR::Register *
12675
J9::X86::TreeEvaluator::andORStringEvaluator(TR::Node *node, TR::CodeGenerator *cg)
12676
{
12677
TR::Node *srcObjNode, *startNode, *lengthNode;
12678
TR::Register *srcObjReg, *lengthReg, *startReg;
12679
bool stopUsingCopyReg1, stopUsingCopyReg2, stopUsingCopyReg3;
12680
12681
srcObjNode = node->getChild(0);
12682
startNode = node->getChild(1);
12683
lengthNode = node->getChild(2);
12684
12685
stopUsingCopyReg1 = TR::TreeEvaluator::stopUsingCopyRegAddr(srcObjNode, srcObjReg, cg);
12686
stopUsingCopyReg2 = TR::TreeEvaluator::stopUsingCopyRegInteger(startNode, startReg, cg);
12687
stopUsingCopyReg3 = TR::TreeEvaluator::stopUsingCopyRegInteger(lengthNode, lengthReg, cg);
12688
12689
uintptr_t hdrSize = TR::Compiler->om.contiguousArrayHeaderSizeInBytes();
12690
generateRegImmInstruction(TR::InstOpCode::ADDRegImms(), node, srcObjReg, hdrSize, cg);
12691
12692
// Now that we have all the registers, set up the dependencies
12693
TR::RegisterDependencyConditions *dependencies =
12694
generateRegisterDependencyConditions((uint8_t)0, 5, cg);
12695
TR::Register *resultReg = cg->allocateRegister();
12696
dependencies->addPostCondition(srcObjReg, TR::RealRegister::esi, cg);
12697
dependencies->addPostCondition(lengthReg, TR::RealRegister::ecx, cg);
12698
dependencies->addPostCondition(startReg, TR::RealRegister::eax, cg);
12699
dependencies->addPostCondition(resultReg, TR::RealRegister::edx, cg);
12700
TR::Register *dummy = cg->allocateRegister();
12701
dependencies->addPostCondition(dummy, TR::RealRegister::ebx, cg);
12702
dependencies->stopAddingConditions();
12703
12704
TR_RuntimeHelper helper =
12705
cg->comp()->target().is64Bit() ? TR_AMD64andORString : TR_IA32andORString;
12706
generateHelperCallInstruction(node, helper, dependencies, cg);
12707
cg->stopUsingRegister(dummy);
12708
12709
for (uint16_t i = 0; i < node->getNumChildren(); i++)
12710
cg->decReferenceCount(node->getChild(i));
12711
12712
if (stopUsingCopyReg1)
12713
cg->getLiveRegisters(TR_GPR)->registerIsDead(srcObjReg);
12714
if (stopUsingCopyReg2)
12715
cg->getLiveRegisters(TR_GPR)->registerIsDead(startReg);
12716
if (stopUsingCopyReg3)
12717
cg->getLiveRegisters(TR_GPR)->registerIsDead(lengthReg);
12718
node->setRegister(resultReg);
12719
return resultReg;
12720
}
12721
12722
/*
12723
*
12724
* Generates instructions to fill in the J9JITWatchedStaticFieldData.fieldAddress, J9JITWatchedStaticFieldData.fieldClass for static fields,
12725
* and J9JITWatchedInstanceFieldData.offset for instance fields at runtime. Used for fieldwatch support.
12726
* Fill in the J9JITWatchedStaticFieldData.fieldAddress, J9JITWatchedStaticFieldData.fieldClass for static field
12727
* and J9JITWatchedInstanceFieldData.offset for instance field
12728
*
12729
* cmp J9JITWatchedStaticFieldData.fieldAddress / J9JITWatchedInstanceFieldData.offset, -1
12730
* je unresolvedLabel
12731
* restart Label:
12732
* ....
12733
*
12734
* unresolvedLabel:
12735
* mov J9JITWatchedStaticFieldData.fieldClass J9Class (static field only)
12736
* call helper
12737
* mov J9JITWatchedStaticFieldData.fieldAddress / J9JITWatchedInstanceFieldData.offset resultReg
12738
* jmp restartLabel
12739
*/
12740
void
12741
J9::X86::TreeEvaluator::generateFillInDataBlockSequenceForUnresolvedField (TR::CodeGenerator *cg, TR::Node *node, TR::Snippet *dataSnippet, bool isWrite, TR::Register *sideEffectRegister, TR::Register *dataSnippetRegister)
12742
{
12743
TR::Compilation *comp = cg->comp();
12744
TR::SymbolReference *symRef = node->getSymbolReference();
12745
bool is64Bit = comp->target().is64Bit();
12746
bool isStatic = symRef->getSymbol()->getKind() == TR::Symbol::IsStatic;
12747
TR_RuntimeHelper helperIndex = isWrite? (isStatic ? TR_jitResolveStaticFieldSetterDirect: TR_jitResolveFieldSetterDirect):
12748
(isStatic ? TR_jitResolveStaticFieldDirect: TR_jitResolveFieldDirect);
12749
TR::Linkage *linkage = cg->getLinkage(runtimeHelperLinkage(helperIndex));
12750
auto linkageProperties = linkage->getProperties();
12751
intptr_t offsetInDataBlock = isStatic ? offsetof(J9JITWatchedStaticFieldData, fieldAddress): offsetof(J9JITWatchedInstanceFieldData, offset);
12752
12753
TR::LabelSymbol* startLabel = generateLabelSymbol(cg);
12754
TR::LabelSymbol* endLabel = generateLabelSymbol(cg);
12755
TR::LabelSymbol* unresolveLabel = generateLabelSymbol(cg);
12756
startLabel->setStartInternalControlFlow();
12757
endLabel->setEndInternalControlFlow();
12758
12759
// 64bit needs 2 argument registers (return register and first argument are the same),
12760
// 32bit only one return register
12761
// both 64/32bits need dataBlockReg
12762
uint8_t numOfConditions = is64Bit ? 3: 2;
12763
if (isStatic) // needs fieldClassReg
12764
{
12765
numOfConditions++;
12766
}
12767
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions(numOfConditions, numOfConditions, cg);
12768
TR::Register *resultReg = NULL;
12769
TR::Register *dataBlockReg = cg->allocateRegister();
12770
deps->addPreCondition(dataBlockReg, TR::RealRegister::NoReg, cg);
12771
deps->addPostCondition(dataBlockReg, TR::RealRegister::NoReg, cg);
12772
12773
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
12774
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, dataBlockReg, generateX86MemoryReference(dataSnippet->getSnippetLabel(), cg), cg);
12775
generateMemImmInstruction(TR::InstOpCode::CMPMemImms(), node, generateX86MemoryReference(dataBlockReg, offsetInDataBlock, cg), -1, cg);
12776
generateLabelInstruction(TR::InstOpCode::JE4, node, unresolveLabel, cg);
12777
12778
{
12779
TR_OutlinedInstructionsGenerator og(unresolveLabel, node ,cg);
12780
if (isStatic)
12781
{
12782
// Fills in J9JITWatchedStaticFieldData.fieldClass
12783
TR::Register *fieldClassReg;
12784
if (isWrite)
12785
{
12786
fieldClassReg = cg->allocateRegister();
12787
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, fieldClassReg, generateX86MemoryReference(sideEffectRegister, comp->fej9()->getOffsetOfClassFromJavaLangClassField(), cg), cg);
12788
}
12789
else
12790
{
12791
fieldClassReg = sideEffectRegister;
12792
}
12793
generateMemRegInstruction(TR::InstOpCode::SMemReg(is64Bit), node, generateX86MemoryReference(dataBlockReg, (intptr_t)(offsetof(J9JITWatchedStaticFieldData, fieldClass)), cg), fieldClassReg, cg);
12794
deps->addPreCondition(fieldClassReg, TR::RealRegister::NoReg, cg);
12795
deps->addPostCondition(fieldClassReg, TR::RealRegister::NoReg, cg);
12796
if (isWrite)
12797
{
12798
cg->stopUsingRegister(fieldClassReg);
12799
}
12800
}
12801
12802
TR::ResolvedMethodSymbol *methodSymbol = node->getByteCodeInfo().getCallerIndex() == -1 ? comp->getMethodSymbol(): comp->getInlinedResolvedMethodSymbol(node->getByteCodeInfo().getCallerIndex());
12803
if (is64Bit)
12804
{
12805
TR::Register *cpAddressReg = cg->allocateRegister();
12806
TR::Register *cpIndexReg = cg->allocateRegister();
12807
generateRegImm64SymInstruction(TR::InstOpCode::MOV8RegImm64, node, cpAddressReg, (uintptr_t) methodSymbol->getResolvedMethod()->constantPool(), comp->getSymRefTab()->findOrCreateConstantPoolAddressSymbolRef(methodSymbol), cg);
12808
generateRegImmInstruction(TR::InstOpCode::MOV8RegImm4, node, cpIndexReg, symRef->getCPIndex(), cg);
12809
deps->addPreCondition(cpAddressReg, linkageProperties.getArgumentRegister(0, false /* isFloat */), cg);
12810
deps->addPostCondition(cpAddressReg, linkageProperties.getArgumentRegister(0, false /* isFloat */), cg);
12811
deps->addPreCondition(cpIndexReg, linkageProperties.getArgumentRegister(1, false /* isFloat */), cg);
12812
deps->addPostCondition(cpIndexReg, linkageProperties.getArgumentRegister(1, false /* isFloat */), cg);
12813
cg->stopUsingRegister(cpIndexReg);
12814
resultReg = cpAddressReg; // for 64bit private linkage both the first argument reg and the return reg are rax
12815
}
12816
else
12817
{
12818
generateImmInstruction(TR::InstOpCode::PUSHImm4, node, symRef->getCPIndex(), cg);
12819
generateImmSymInstruction(TR::InstOpCode::PUSHImm4, node, (uintptr_t) methodSymbol->getResolvedMethod()->constantPool(), comp->getSymRefTab()->findOrCreateConstantPoolAddressSymbolRef(methodSymbol), cg);
12820
resultReg = cg->allocateRegister();
12821
deps->addPreCondition(resultReg, linkageProperties.getIntegerReturnRegister(), cg);
12822
deps->addPostCondition(resultReg, linkageProperties.getIntegerReturnRegister(), cg);
12823
}
12824
TR::Instruction *call = generateHelperCallInstruction(node, helperIndex, NULL, cg);
12825
call->setNeedsGCMap(0xFF00FFFF);
12826
12827
/*
12828
For instance field offset, the result returned by the vmhelper includes header size.
12829
subtract the header size to get the offset needed by field watch helpers
12830
*/
12831
if (!isStatic)
12832
{
12833
generateRegImmInstruction(TR::InstOpCode::SubRegImm4(is64Bit, false /*isWithBorrow*/), node, resultReg, TR::Compiler->om.objectHeaderSizeInBytes(), cg);
12834
}
12835
12836
//store result into J9JITWatchedStaticFieldData.fieldAddress / J9JITWatchedInstanceFieldData.offset
12837
generateMemRegInstruction(TR::InstOpCode::SMemReg(is64Bit), node, generateX86MemoryReference(dataBlockReg, offsetInDataBlock, cg), resultReg, cg);
12838
generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);
12839
12840
og.endOutlinedInstructionSequence();
12841
}
12842
12843
deps->stopAddingConditions();
12844
generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);
12845
cg->stopUsingRegister(dataBlockReg);
12846
cg->stopUsingRegister(resultReg);
12847
}
12848
12849
/*
12850
* Generate the reporting field access helper call with required arguments
12851
*
12852
* jitReportInstanceFieldRead
12853
* arg1 pointer to static data block
12854
* arg2 object being read
12855
*
12856
* jitReportInstanceFieldWrite
12857
* arg1 pointer to static data block
12858
* arg2 object being written to (represented by sideEffectRegister)
12859
* arg3 pointer to value being written
12860
*
12861
* jitReportStaticFieldRead
12862
* arg1 pointer to static data block
12863
*
12864
* jitReportStaticFieldWrite
12865
* arg1 pointer to static data block
12866
* arg2 pointer to value being written
12867
*
12868
*/
12869
void generateReportFieldAccessOutlinedInstructions(TR::Node *node, TR::LabelSymbol *endLabel, TR::Snippet *dataSnippet, bool isWrite, TR::RegisterDependencyConditions *deps, TR::CodeGenerator *cg, TR::Register *sideEffectRegister, TR::Register *valueReg)
12870
{
12871
bool is64Bit = cg->comp()->target().is64Bit();
12872
bool isInstanceField = node->getSymbolReference()->getSymbol()->getKind() != TR::Symbol::IsStatic;
12873
J9Method *owningMethod = (J9Method *)node->getOwningMethod();
12874
12875
TR_RuntimeHelper helperIndex = isWrite ? (isInstanceField ? TR_jitReportInstanceFieldWrite: TR_jitReportStaticFieldWrite):
12876
(isInstanceField ? TR_jitReportInstanceFieldRead: TR_jitReportStaticFieldRead);
12877
12878
TR::Linkage *linkage = cg->getLinkage(runtimeHelperLinkage(helperIndex));
12879
auto linkageProperties = linkage->getProperties();
12880
12881
TR::Register *valueReferenceReg = NULL;
12882
TR::MemoryReference *valueMR = NULL;
12883
TR::Register *dataBlockReg = cg->allocateRegister();
12884
bool reuseValueReg = false;
12885
12886
/*
12887
* For reporting field write, reference to the valueNode (valueNode is evaluated in valueReg) is needed so we need to store
12888
* the value on to a stack location first and pass the stack location address as an arguement
12889
* to the VM helper
12890
*/
12891
if (isWrite)
12892
{
12893
valueMR = cg->machine()->getDummyLocalMR(node->getType());
12894
if (!valueReg->getRegisterPair())
12895
{
12896
if (valueReg->getKind() == TR_GPR)
12897
{
12898
TR::AutomaticSymbol *autoSymbol = valueMR->getSymbolReference().getSymbol()->getAutoSymbol();
12899
generateMemRegInstruction(TR::InstOpCode::SMemReg(autoSymbol->getRoundedSize() == 8), node, valueMR, valueReg, cg);
12900
}
12901
else if (valueReg->isSinglePrecision())
12902
generateMemRegInstruction(TR::InstOpCode::MOVSSMemReg, node, valueMR, valueReg, cg);
12903
else
12904
generateMemRegInstruction(TR::InstOpCode::MOVSDMemReg, node, valueMR, valueReg, cg);
12905
// valueReg and valueReferenceReg are different. Add conditions for valueReg here
12906
deps->addPreCondition(valueReg, TR::RealRegister::NoReg, cg);
12907
deps->addPostCondition(valueReg, TR::RealRegister::NoReg, cg);
12908
valueReferenceReg = cg->allocateRegister();
12909
}
12910
else
12911
{ // 32bit long
12912
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, valueMR, valueReg->getLowOrder(), cg);
12913
generateMemRegInstruction(TR::InstOpCode::SMemReg(), node, generateX86MemoryReference(*valueMR, 4, cg), valueReg->getHighOrder(), cg);
12914
12915
// Add the dependency for higher half register here
12916
deps->addPostCondition(valueReg->getHighOrder(), TR::RealRegister::NoReg, cg);
12917
deps->addPreCondition(valueReg->getHighOrder(), TR::RealRegister::NoReg, cg);
12918
12919
// on 32bit reuse lower half register to save one register
12920
// lower half register dependency will be added later when using as valueReferenceReg and a call argument
12921
// to keep consistency with the other call arguments
12922
valueReferenceReg = valueReg->getLowOrder();
12923
reuseValueReg = true;
12924
}
12925
12926
//store the stack location into a register
12927
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, valueReferenceReg, valueMR, cg);
12928
}
12929
12930
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, dataBlockReg, generateX86MemoryReference(dataSnippet->getSnippetLabel(), cg), cg);
12931
int numArgs = 0;
12932
if (is64Bit)
12933
{
12934
deps->addPreCondition(dataBlockReg, linkageProperties.getArgumentRegister(numArgs, false /* isFloat */), cg);
12935
deps->addPostCondition(dataBlockReg, linkageProperties.getArgumentRegister(numArgs, false /* isFloat */), cg);
12936
numArgs++;
12937
12938
if (isInstanceField)
12939
{
12940
deps->addPreCondition(sideEffectRegister, linkageProperties.getArgumentRegister(numArgs, false /* isFloat */), cg);
12941
deps->addPostCondition(sideEffectRegister, linkageProperties.getArgumentRegister(numArgs, false /* isFloat */), cg);
12942
numArgs++;
12943
}
12944
12945
if (isWrite)
12946
{
12947
deps->addPreCondition(valueReferenceReg, linkageProperties.getArgumentRegister(numArgs, false /* isFloat */), cg);
12948
deps->addPostCondition(valueReferenceReg, linkageProperties.getArgumentRegister(numArgs, false /* isFloat */), cg);
12949
}
12950
}
12951
else
12952
{
12953
if (isWrite)
12954
{
12955
generateRegInstruction(TR::InstOpCode::PUSHReg, node, valueReferenceReg, cg);
12956
deps->addPostCondition(valueReferenceReg, TR::RealRegister::NoReg, cg);
12957
deps->addPreCondition(valueReferenceReg, TR::RealRegister::NoReg, cg);
12958
}
12959
12960
if (isInstanceField)
12961
{
12962
generateRegInstruction(TR::InstOpCode::PUSHReg, node, sideEffectRegister, cg);
12963
deps->addPreCondition(sideEffectRegister, TR::RealRegister::NoReg, cg);
12964
deps->addPostCondition(sideEffectRegister, TR::RealRegister::NoReg, cg);
12965
}
12966
generateRegInstruction(TR::InstOpCode::PUSHReg, node, dataBlockReg, cg);
12967
deps->addPreCondition(dataBlockReg, TR::RealRegister::NoReg, cg);
12968
deps->addPostCondition(dataBlockReg, TR::RealRegister::NoReg, cg);
12969
}
12970
12971
TR::Instruction *call = generateHelperCallInstruction(node, helperIndex, NULL, cg);
12972
call->setNeedsGCMap(0xFF00FFFF);
12973
// Restore the value of lower part register
12974
if (isWrite && valueReg->getRegisterPair() && valueReg->getKind() == TR_GPR)
12975
generateRegMemInstruction(TR::InstOpCode::L4RegMem, node, valueReg->getLowOrder(), valueMR, cg);
12976
if (!reuseValueReg)
12977
cg->stopUsingRegister(valueReferenceReg);
12978
generateLabelInstruction(TR::InstOpCode::JMP4, node, endLabel, cg);
12979
cg->stopUsingRegister(dataBlockReg);
12980
}
12981
12982
/*
12983
* Get the number of register dependencies needed to generate the out-of-line sequence reporting field accesses
12984
*/
12985
static uint8_t getNumOfConditionsForReportFieldAccess(TR::Node *node, bool isResolved, bool isWrite, bool isInstanceField, TR::CodeGenerator *cg)
12986
{
12987
uint8_t numOfConditions = 1; // 1st arg is always the data block
12988
if (!isResolved || isInstanceField || cg->needClassAndMethodPointerRelocations())
12989
numOfConditions = numOfConditions+1; // classReg is needed in both cases.
12990
if (isWrite)
12991
{
12992
/* Field write report needs
12993
* a) value being written
12994
* b) the reference to the value being written
12995
*
12996
* The following cases are considered
12997
* 1. For 32bits using register pair(long), the valueReg is actually 2 registers,
12998
* and valueReferenceReg reuses one reg in valueReg to avoid running out of registers on 32bits
12999
* 2. For 32bits and 64bits no register pair, valueReferenceReg and valueReg are 2 different registers
13000
*/
13001
numOfConditions = numOfConditions + 2 ;
13002
}
13003
if (isInstanceField)
13004
numOfConditions = numOfConditions+1; // Instance field report needs the base object
13005
return numOfConditions;
13006
}
13007
13008
void
13009
J9::X86::TreeEvaluator::generateTestAndReportFieldWatchInstructions(TR::CodeGenerator *cg, TR::Node *node, TR::Snippet *dataSnippet, bool isWrite, TR::Register *sideEffectRegister, TR::Register *valueReg, TR::Register *dataSnippetRegister)
13010
{
13011
bool isResolved = !node->getSymbolReference()->isUnresolved();
13012
TR::LabelSymbol* startLabel = generateLabelSymbol(cg);
13013
TR::LabelSymbol* endLabel = generateLabelSymbol(cg);
13014
TR::LabelSymbol* fieldReportLabel = generateLabelSymbol(cg);
13015
startLabel->setStartInternalControlFlow();
13016
endLabel->setEndInternalControlFlow();
13017
13018
generateLabelInstruction(TR::InstOpCode::label, node, startLabel, cg);
13019
13020
TR::Register *fieldClassReg = NULL;
13021
TR::MemoryReference *classFlagsMemRef = NULL;
13022
TR_J9VMBase *fej9 = (TR_J9VMBase *)(cg->fe());
13023
bool isInstanceField = node->getOpCode().isIndirect();
13024
bool fieldClassNeedsRelocation = cg->needClassAndMethodPointerRelocations();
13025
13026
if (isInstanceField)
13027
{
13028
TR::Register *objReg = sideEffectRegister;
13029
fieldClassReg = cg->allocateRegister();
13030
generateLoadJ9Class(node, fieldClassReg, objReg, cg);
13031
classFlagsMemRef = generateX86MemoryReference(fieldClassReg, (uintptr_t)(fej9->getOffsetOfClassFlags()), cg);
13032
}
13033
else
13034
{
13035
if (isResolved)
13036
{
13037
if (!fieldClassNeedsRelocation)
13038
{
13039
// For non-AOT (JIT and JITServer) compiles we don't need to use sideEffectRegister here as the class information is available to us at compile time.
13040
J9Class *fieldClass = static_cast<TR::J9WatchedStaticFieldSnippet *>(dataSnippet)->getFieldClass();
13041
classFlagsMemRef = generateX86MemoryReference((uintptr_t)fieldClass + fej9->getOffsetOfClassFlags(), cg);
13042
}
13043
else
13044
{
13045
// If this is an AOT compile, we generate instructions to load the fieldClass directly from the snippet because the fieldClass in an AOT body will be invalid
13046
// if we load using the dataSnippet's helper query at compile time.
13047
fieldClassReg = cg->allocateRegister();
13048
generateRegMemInstruction(TR::InstOpCode::LEARegMem(), node, fieldClassReg, generateX86MemoryReference(dataSnippet->getSnippetLabel(), cg), cg);
13049
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, fieldClassReg, generateX86MemoryReference(fieldClassReg, offsetof(J9JITWatchedStaticFieldData, fieldClass), cg), cg);
13050
classFlagsMemRef = generateX86MemoryReference(fieldClassReg, fej9->getOffsetOfClassFlags(), cg);
13051
}
13052
}
13053
else
13054
{
13055
if (isWrite)
13056
{
13057
fieldClassReg = cg->allocateRegister();
13058
generateRegMemInstruction(TR::InstOpCode::LRegMem(), node, fieldClassReg, generateX86MemoryReference(sideEffectRegister, fej9->getOffsetOfClassFromJavaLangClassField(), cg), cg);
13059
}
13060
else
13061
{
13062
fieldClassReg = sideEffectRegister;
13063
}
13064
classFlagsMemRef = generateX86MemoryReference(fieldClassReg, fej9->getOffsetOfClassFlags(), cg);
13065
}
13066
}
13067
13068
generateMemImmInstruction(TR::InstOpCode::TEST2MemImm2, node, classFlagsMemRef, J9ClassHasWatchedFields, cg);
13069
generateLabelInstruction(TR::InstOpCode::JNE4, node, fieldReportLabel, cg);
13070
13071
uint8_t numOfConditions = getNumOfConditionsForReportFieldAccess(node, !node->getSymbolReference()->isUnresolved(), isWrite, isInstanceField, cg);
13072
TR::RegisterDependencyConditions *deps = generateRegisterDependencyConditions(numOfConditions, numOfConditions, cg);
13073
if (isInstanceField || !isResolved || fieldClassNeedsRelocation)
13074
{
13075
deps->addPreCondition(fieldClassReg, TR::RealRegister::NoReg, cg);
13076
deps->addPostCondition(fieldClassReg, TR::RealRegister::NoReg, cg);
13077
}
13078
13079
{
13080
TR_OutlinedInstructionsGenerator og(fieldReportLabel, node ,cg);
13081
generateReportFieldAccessOutlinedInstructions(node, endLabel, dataSnippet, isWrite, deps, cg, sideEffectRegister, valueReg);
13082
og.endOutlinedInstructionSequence();
13083
}
13084
deps->stopAddingConditions();
13085
generateLabelInstruction(TR::InstOpCode::label, node, endLabel, deps, cg);
13086
13087
if (isInstanceField || (!isResolved && isWrite) || fieldClassNeedsRelocation)
13088
{
13089
cg->stopUsingRegister(fieldClassReg);
13090
}
13091
}
13092
13093
TR::Register *
13094
J9::X86::TreeEvaluator::generateConcurrentScavengeSequence(TR::Node *node, TR::CodeGenerator *cg)
13095
{
13096
TR::Register* object = TR::TreeEvaluator::performHeapLoadWithReadBarrier(node, cg);
13097
13098
if (!node->getSymbolReference()->isUnresolved() &&
13099
(node->getSymbolReference()->getSymbol()->getKind() == TR::Symbol::IsShadow) &&
13100
(node->getSymbolReference()->getCPIndex() >= 0) &&
13101
(cg->comp()->getMethodHotness()>=scorching))
13102
{
13103
int32_t len;
13104
const char *fieldName = node->getSymbolReference()->getOwningMethod(cg->comp())->fieldSignatureChars(
13105
node->getSymbolReference()->getCPIndex(), len);
13106
13107
if (fieldName && strstr(fieldName, "Ljava/lang/String;"))
13108
{
13109
generateMemInstruction(TR::InstOpCode::PREFETCHT0, node, generateX86MemoryReference(object, 0, cg), cg);
13110
}
13111
}
13112
return object;
13113
}
13114
13115
TR::Register *
13116
J9::X86::TreeEvaluator::irdbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
13117
{
13118
// For rdbar and wrtbar nodes we first evaluate the children we need to
13119
// handle the side effects. Then we delegate the evaluation of the remaining
13120
// children and the load/store operation to the appropriate load/store evaluator.
13121
TR::Node *sideEffectNode = node->getFirstChild();
13122
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
13123
13124
if (cg->comp()->getOption(TR_EnableFieldWatch))
13125
{
13126
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);
13127
}
13128
13129
TR::Register *resultReg = NULL;
13130
// Perform regular load if no rdbar required.
13131
if (TR::Compiler->om.readBarrierType() == gc_modron_readbar_none)
13132
{
13133
resultReg = TR::TreeEvaluator::iloadEvaluator(node, cg);
13134
}
13135
else
13136
{
13137
if (cg->comp()->useCompressedPointers() &&
13138
(node->getOpCode().hasSymbolReference() &&
13139
node->getSymbolReference()->getSymbol()->getDataType() == TR::Address))
13140
{
13141
resultReg = TR::TreeEvaluator::generateConcurrentScavengeSequence(node, cg);
13142
node->setRegister(resultReg);
13143
}
13144
}
13145
13146
// Note: For indirect rdbar nodes, the first child (sideEffectNode) is also used by the
13147
// load evaluator. The load evaluator will also evaluate+decrement it. In order to avoid double
13148
// decrementing the node we skip doing it here and let the load evaluator do it.
13149
return resultReg;
13150
}
13151
13152
TR::Register *
13153
J9::X86::TreeEvaluator::irdbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
13154
{
13155
// For rdbar and wrtbar nodes we first evaluate the children we need to
13156
// handle the side effects. Then we delegate the evaluation of the remaining
13157
// children and the load/store operation to the appropriate load/store evaluator.
13158
TR::Node *sideEffectNode = node->getFirstChild();
13159
TR::Register * sideEffectRegister = cg->evaluate(sideEffectNode);
13160
13161
if (cg->comp()->getOption(TR_EnableFieldWatch))
13162
{
13163
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);
13164
}
13165
13166
cg->decReferenceCount(sideEffectNode);
13167
return TR::TreeEvaluator::iloadEvaluator(node, cg);
13168
}
13169
13170
TR::Register *
13171
J9::X86::TreeEvaluator::ardbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
13172
{
13173
// For rdbar and wrtbar nodes we first evaluate the children we need to
13174
// handle the side effects. Then we delegate the evaluation of the remaining
13175
// children and the load/store operation to the appropriate load/store evaluator.
13176
TR::Node *sideEffectNode = node->getFirstChild();
13177
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
13178
13179
if (cg->comp()->getOption(TR_EnableFieldWatch))
13180
{
13181
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);
13182
}
13183
13184
cg->decReferenceCount(sideEffectNode);
13185
return TR::TreeEvaluator::aloadEvaluator(node, cg);
13186
}
13187
13188
TR::Register *
13189
J9::X86::TreeEvaluator::ardbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
13190
{
13191
// For rdbar and wrtbar nodes we first evaluate the children we need to
13192
// handle the side effects. Then we delegate the evaluation of the remaining
13193
// children and the load/store operation to the appropriate load/store evaluator.
13194
TR::Register *sideEffectRegister = cg->evaluate(node->getFirstChild());
13195
13196
if (cg->comp()->getOption(TR_EnableFieldWatch))
13197
{
13198
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, NULL);
13199
}
13200
13201
TR::Register *resultReg = NULL;
13202
// Perform regular load if no rdbar required.
13203
if (TR::Compiler->om.readBarrierType() == gc_modron_readbar_none)
13204
{
13205
resultReg = TR::TreeEvaluator::aloadEvaluator(node, cg);
13206
}
13207
else
13208
{
13209
resultReg = TR::TreeEvaluator::generateConcurrentScavengeSequence(node, cg);
13210
resultReg->setContainsCollectedReference();
13211
node->setRegister(resultReg);
13212
}
13213
13214
// Note: For indirect rdbar nodes, the first child (sideEffectNode) is also used by the
13215
// load evaluator. The load evaluator will also evaluate+decrement it. In order to avoid double
13216
// decrementing the node we skip doing it here and let the load evaluator do it.
13217
return resultReg;
13218
}
13219
13220
TR::Register *J9::X86::TreeEvaluator::fwrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
13221
{
13222
// For rdbar and wrtbar nodes we first evaluate the children we need to
13223
// handle the side effects. Then we delegate the evaluation of the remaining
13224
// children and the load/store operation to the appropriate load/store evaluator.
13225
TR::Register *valueReg = cg->evaluate(node->getFirstChild());
13226
TR::Node *sideEffectNode = node->getSecondChild();
13227
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
13228
13229
if (cg->comp()->getOption(TR_EnableFieldWatch))
13230
{
13231
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
13232
}
13233
13234
// Note: The reference count for valueReg's node is not decremented here because the
13235
// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here
13236
// to avoid double decrementing.
13237
cg->decReferenceCount(sideEffectNode);
13238
return TR::TreeEvaluator::floatingPointStoreEvaluator(node, cg);
13239
}
13240
13241
TR::Register *J9::X86::TreeEvaluator::fwrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
13242
{
13243
// For rdbar and wrtbar nodes we first evaluate the children we need to
13244
// handle the side effects. Then we delegate the evaluation of the remaining
13245
// children and the load/store operation to the appropriate load/store evaluator.
13246
TR::Register *valueReg = cg->evaluate(node->getSecondChild());
13247
TR::Node *sideEffectNode = node->getThirdChild();
13248
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
13249
13250
if (cg->comp()->getOption(TR_EnableFieldWatch))
13251
{
13252
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
13253
}
13254
13255
// Note: The reference count for valueReg's node is not decremented here because the
13256
// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here
13257
// to avoid double decrementing.
13258
cg->decReferenceCount(sideEffectNode);
13259
return TR::TreeEvaluator::floatingPointStoreEvaluator(node, cg);
13260
}
13261
13262
#ifdef TR_TARGET_32BIT
13263
TR::Register *J9::X86::I386::TreeEvaluator::dwrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
13264
{
13265
// For rdbar and wrtbar nodes we first evaluate the children we need to
13266
// handle the side effects. Then we delegate the evaluation of the remaining
13267
// children and the load/store operation to the appropriate load/store evaluator.
13268
TR::Register *valueReg = cg->evaluate(node->getFirstChild());
13269
TR::Node *sideEffectNode = node->getSecondChild();
13270
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
13271
13272
if (cg->comp()->getOption(TR_EnableFieldWatch))
13273
{
13274
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
13275
}
13276
13277
// Note: The reference count for valueReg's node is not decremented here because the
13278
// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here
13279
// to avoid double decrementing.
13280
cg->decReferenceCount(sideEffectNode);
13281
return TR::TreeEvaluator::dstoreEvaluator(node, cg);
13282
}
13283
13284
TR::Register *J9::X86::I386::TreeEvaluator::dwrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
13285
{
13286
// For rdbar and wrtbar nodes we first evaluate the children we need to
13287
// handle the side effects. Then we delegate the evaluation of the remaining
13288
// children and the load/store operation to the appropriate load/store evaluator.
13289
TR::Register *valueReg = cg->evaluate(node->getSecondChild());
13290
TR::Node *sideEffectNode = node->getThirdChild();
13291
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
13292
13293
if (cg->comp()->getOption(TR_EnableFieldWatch))
13294
{
13295
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
13296
}
13297
13298
// Note: The reference count for valueReg's node is not decremented here because the
13299
// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here
13300
// to avoid double decrementing.
13301
cg->decReferenceCount(sideEffectNode);
13302
return TR::TreeEvaluator::dstoreEvaluator(node, cg);
13303
}
13304
#endif
13305
13306
#ifdef TR_TARGET_64BIT
13307
TR::Register *J9::X86::AMD64::TreeEvaluator::dwrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
13308
{
13309
// For rdbar and wrtbar nodes we first evaluate the children we need to
13310
// handle the side effects. Then we delegate the evaluation of the remaining
13311
// children and the load/store operation to the appropriate load/store evaluator.
13312
TR::Register *valueReg = cg->evaluate(node->getFirstChild());
13313
TR::Node *sideEffectNode = node->getSecondChild();
13314
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
13315
13316
if (cg->comp()->getOption(TR_EnableFieldWatch))
13317
{
13318
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
13319
}
13320
13321
// Note: The reference count for valueReg's node is not decremented here because the
13322
// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here
13323
// to avoid double decrementing.
13324
cg->decReferenceCount(sideEffectNode);
13325
return TR::TreeEvaluator::floatingPointStoreEvaluator(node, cg);
13326
}
13327
13328
TR::Register *J9::X86::AMD64::TreeEvaluator::dwrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
13329
{
13330
// For rdbar and wrtbar nodes we first evaluate the children we need to
13331
// handle the side effects. Then we delegate the evaluation of the remaining
13332
// children and the load/store operation to the appropriate load/store evaluator.
13333
TR::Register *valueReg = cg->evaluate(node->getSecondChild());
13334
TR::Node *sideEffectNode = node->getThirdChild();
13335
TR::Register *sideEffectRegister = cg->evaluate(sideEffectNode);
13336
13337
if (cg->comp()->getOption(TR_EnableFieldWatch))
13338
{
13339
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
13340
}
13341
13342
// Note: The reference count for valueReg's node is not decremented here because the
13343
// store evaluator also uses it and so it will evaluate+decrement it. Thus we must skip decrementing here
13344
// to avoid double decrementing.
13345
cg->decReferenceCount(sideEffectNode);
13346
return TR::TreeEvaluator::floatingPointStoreEvaluator(node, cg);
13347
}
13348
#endif
13349
13350
TR::Register *J9::X86::TreeEvaluator::awrtbariEvaluator(TR::Node *node, TR::CodeGenerator *cg)
13351
{
13352
return TR::TreeEvaluator::awrtbarEvaluator(node, cg);
13353
}
13354
13355
TR::Register *J9::X86::TreeEvaluator::awrtbarEvaluator(TR::Node *node, TR::CodeGenerator *cg)
13356
{
13357
// For rdbar and wrtbar nodes we first evaluate the children we need to
13358
// handle the side effects. Then we delegate the evaluation of the remaining
13359
// children and the load/store operation to the appropriate load/store evaluator.
13360
TR::Register *valueReg;
13361
TR::Register *sideEffectRegister;
13362
TR::Node *firstChildNode = node->getFirstChild();
13363
13364
// Evaluate the children we need to handle the side effect, then go to writeBarrierEvaluator to handle the write barrier
13365
if (node->getOpCode().isIndirect())
13366
{
13367
TR::Node *valueNode = NULL;
13368
// Pass in valueNode so it can be set to the correct node.
13369
TR::TreeEvaluator::getIndirectWrtbarValueNode(cg, node, valueNode, false);
13370
valueReg = cg->evaluate(valueNode);
13371
sideEffectRegister = cg->evaluate(node->getThirdChild());
13372
}
13373
else
13374
{
13375
valueReg = cg->evaluate(firstChildNode);
13376
sideEffectRegister = cg->evaluate(node->getSecondChild());
13377
}
13378
13379
if (cg->comp()->getOption(TR_EnableFieldWatch) && !node->getSymbolReference()->getSymbol()->isArrayShadowSymbol())
13380
{
13381
TR::TreeEvaluator::rdWrtbarHelperForFieldWatch(node, cg, sideEffectRegister, valueReg);
13382
}
13383
13384
// This evaluator handles the actual awrtbar operation. We also avoid decrementing the reference
13385
// counts of the children evaluated here and let this helper handle it.
13386
return TR::TreeEvaluator::writeBarrierEvaluator(node, cg);
13387
}
13388
13389