Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/runtime/compiler/x/codegen/X86PrivateLinkage.cpp
6004 views
1
/*******************************************************************************
2
* Copyright (c) 2000, 2022 IBM Corp. and others
3
*
4
* This program and the accompanying materials are made available under
5
* the terms of the Eclipse Public License 2.0 which accompanies this
6
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7
* or the Apache License, Version 2.0 which accompanies this distribution and
8
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9
*
10
* This Source Code may also be made available under the following
11
* Secondary Licenses when the conditions for such availability set
12
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13
* General Public License, version 2 with the GNU Classpath
14
* Exception [1] and GNU General Public License, version 2 with the
15
* OpenJDK Assembly Exception [2].
16
*
17
* [1] https://www.gnu.org/software/classpath/license.html
18
* [2] http://openjdk.java.net/legal/assembly-exception.html
19
*
20
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
21
*******************************************************************************/
22
23
#include "codegen/X86PrivateLinkage.hpp"
24
25
#include "codegen/Linkage_inlines.hpp"
26
#include "codegen/LiveRegister.hpp"
27
#include "codegen/Machine.hpp"
28
#include "codegen/MemoryReference.hpp"
29
#include "codegen/Register.hpp"
30
#include "codegen/RegisterDependency.hpp"
31
#include "codegen/RegisterPair.hpp"
32
#include "codegen/Snippet.hpp"
33
#include "codegen/UnresolvedDataSnippet.hpp"
34
#include "compile/Method.hpp"
35
#include "compile/ResolvedMethod.hpp"
36
#include "compile/VirtualGuard.hpp"
37
#if defined(J9VM_OPT_JITSERVER)
38
#include "control/CompilationThread.hpp"
39
#endif /* defined(J9VM_OPT_JITSERVER) */
40
#include "env/CHTable.hpp"
41
#include "env/CompilerEnv.hpp"
42
#include "env/J2IThunk.hpp"
43
#include "env/PersistentCHTable.hpp"
44
#include "env/StackMemoryRegion.hpp"
45
#include "env/jittypes.h"
46
#include "env/VMJ9.h"
47
#include "il/DataTypes.hpp"
48
#include "il/Node.hpp"
49
#include "il/Node_inlines.hpp"
50
#include "il/ParameterSymbol.hpp"
51
#include "il/TreeTop.hpp"
52
#include "il/TreeTop_inlines.hpp"
53
#include "infra/SimpleRegex.hpp"
54
#include "env/VMJ9.h"
55
#include "x/codegen/X86Instruction.hpp"
56
#include "x/codegen/CallSnippet.hpp"
57
#include "x/codegen/FPTreeEvaluator.hpp"
58
#include "runtime/J9Profiler.hpp"
59
#include "runtime/J9ValueProfiler.hpp"
60
#include "OMR/Bytes.hpp"
61
62
#ifdef TR_TARGET_64BIT
63
#include "x/amd64/codegen/AMD64GuardedDevirtualSnippet.hpp"
64
#else
65
#include "x/codegen/GuardedDevirtualSnippet.hpp"
66
#endif
67
68
inline uint32_t gcd(uint32_t a, uint32_t b)
69
{
70
while (b != 0)
71
{
72
uint32_t t = b;
73
b = a % b;
74
a = t;
75
}
76
return a;
77
}
78
79
inline uint32_t lcm(uint32_t a, uint32_t b)
80
{
81
return a * b / gcd(a, b);
82
}
83
84
J9::X86::PrivateLinkage::PrivateLinkage(TR::CodeGenerator *cg) : J9::PrivateLinkage(cg)
85
{
86
// Stack alignment basic requirement:
87
// X86-32: 4 bytes, per hardware requirement
88
// X86-64: 16 bytes, required by both Linux and Windows
89
// Stack alignment additional requirement:
90
// Stack alignment has to match the alignment requirement for local object address
91
_properties.setOutgoingArgAlignment(lcm(cg->comp()->target().is32Bit() ? 4 : 16,
92
cg->fej9()->getLocalObjectAlignmentInBytes()));
93
}
94
95
const TR::X86LinkageProperties& J9::X86::PrivateLinkage::getProperties()
96
{
97
return _properties;
98
}
99
100
////////////////////////////////////////////////
101
//
102
// Argument manipulation
103
//
104
105
static const TR::RealRegister::RegNum NOT_ASSIGNED = (TR::RealRegister::RegNum)-1;
106
107
108
void J9::X86::PrivateLinkage::copyLinkageInfoToParameterSymbols()
109
{
110
TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol();
111
ListIterator<TR::ParameterSymbol> paramIterator(&(bodySymbol->getParameterList()));
112
TR::ParameterSymbol *paramCursor;
113
const TR::X86LinkageProperties &properties = getProperties();
114
int32_t maxIntArgs, maxFloatArgs;
115
int32_t numIntArgs = 0, numFloatArgs = 0;
116
117
maxIntArgs = properties.getNumIntegerArgumentRegisters();
118
maxFloatArgs = properties.getNumFloatArgumentRegisters();
119
for (paramCursor = paramIterator.getFirst(); paramCursor != NULL; paramCursor = paramIterator.getNext())
120
{
121
// If we're out of registers, just stop now instead of looping doing nothing
122
//
123
if (numIntArgs >= maxIntArgs && numFloatArgs >= maxFloatArgs)
124
break;
125
126
// Assign linkage registers of each type until we run out
127
//
128
switch(paramCursor->getDataType())
129
{
130
case TR::Float:
131
case TR::Double:
132
if (numFloatArgs < maxFloatArgs)
133
paramCursor->setLinkageRegisterIndex(numFloatArgs++);
134
break;
135
default:
136
if (numIntArgs < maxIntArgs)
137
paramCursor->setLinkageRegisterIndex(numIntArgs++);
138
break;
139
}
140
}
141
}
142
143
void J9::X86::PrivateLinkage::copyGlRegDepsToParameterSymbols(TR::Node *bbStart, TR::CodeGenerator *cg)
144
{
145
TR_ASSERT(bbStart->getOpCodeValue() == TR::BBStart, "assertion failure");
146
if (bbStart->getNumChildren() > 0)
147
{
148
TR::Node *glRegDeps = bbStart->getFirstChild();
149
if (!glRegDeps) // No global register info, so nothing to do
150
return;
151
152
TR_ASSERT(glRegDeps->getOpCodeValue() == TR::GlRegDeps, "First child of first Node must be a GlRegDeps");
153
154
uint16_t childNum;
155
for (childNum=0; childNum < glRegDeps->getNumChildren(); childNum++)
156
{
157
TR::Node *child = glRegDeps->getChild(childNum);
158
TR::ParameterSymbol *sym = child->getSymbol()->getParmSymbol();
159
sym->setAssignedGlobalRegisterIndex(cg->getGlobalRegister(child->getGlobalRegisterNumber()));
160
}
161
}
162
}
163
164
TR::Instruction *J9::X86::PrivateLinkage::copyStackParametersToLinkageRegisters(TR::Instruction *procEntryInstruction)
165
{
166
TR_ASSERT(procEntryInstruction && procEntryInstruction->getOpCodeValue() == TR::InstOpCode::proc, "assertion failure");
167
TR::Instruction *intrpPrev = procEntryInstruction->getPrev(); // The instruction before the interpreter entry point
168
movLinkageRegisters(intrpPrev, false);
169
return intrpPrev->getNext();
170
}
171
172
TR::Instruction *J9::X86::PrivateLinkage::movLinkageRegisters(TR::Instruction *cursor, bool isStore)
173
{
174
TR_ASSERT(cursor, "assertion failure");
175
176
TR::Machine *machine = cg()->machine();
177
TR::RealRegister *rspReal = machine->getRealRegister(TR::RealRegister::esp);
178
179
TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol();
180
ListIterator<TR::ParameterSymbol> paramIterator(&(bodySymbol->getParameterList()));
181
TR::ParameterSymbol *paramCursor;
182
183
// Copy from stack all parameters that belong in linkage regs
184
//
185
for (paramCursor = paramIterator.getFirst();
186
paramCursor != NULL;
187
paramCursor = paramIterator.getNext())
188
{
189
int8_t lri = paramCursor->getLinkageRegisterIndex();
190
191
if (lri != NOT_LINKAGE) // This param should be in a linkage reg
192
{
193
TR_MovDataTypes movDataType = paramMovType(paramCursor);
194
TR::RealRegister *reg = machine->getRealRegister(getProperties().getArgumentRegister(lri, isFloat(movDataType)));
195
TR::MemoryReference *memRef = generateX86MemoryReference(rspReal, paramCursor->getParameterOffset(), cg());
196
197
if (isStore)
198
{
199
// stack := lri
200
cursor = generateMemRegInstruction(cursor, TR::Linkage::movOpcodes(MemReg, movDataType), memRef, reg, cg());
201
}
202
else
203
{
204
// lri := stack
205
cursor = generateRegMemInstruction(cursor, TR::Linkage::movOpcodes(RegMem, movDataType), reg, memRef, cg());
206
}
207
}
208
}
209
210
return cursor;
211
}
212
213
214
// Copies parameters from where they enter the method (either on stack or in a
215
// linkage register) to their "home location" where the method body will expect
216
// to find them (either on stack or in a global register).
217
//
218
TR::Instruction *J9::X86::PrivateLinkage::copyParametersToHomeLocation(TR::Instruction *cursor, bool parmsHaveBeenStored)
219
{
220
TR::Machine *machine = cg()->machine();
221
TR::RealRegister *framePointer = machine->getRealRegister(TR::RealRegister::vfp);
222
223
TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol();
224
ListIterator<TR::ParameterSymbol> paramIterator(&(bodySymbol->getParameterList()));
225
TR::ParameterSymbol *paramCursor;
226
227
const TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg;
228
TR_ASSERT(noReg == 0, "noReg must be zero so zero-initializing movStatus will work");
229
230
TR::MovStatus movStatus[TR::RealRegister::NumRegisters] = {{(TR::RealRegister::RegNum)0,(TR::RealRegister::RegNum)0,(TR_MovDataTypes)0}};
231
232
// We must always do the stores first, then the reg-reg copies, then the
233
// loads, so that we never clobber a register we will need later. However,
234
// the logic is simpler if we do the loads and stores in the same loop.
235
// Therefore, we maintain a separate instruction cursor for the loads.
236
//
237
// We defer the initialization of loadCursor until we generate the first
238
// load. Otherwise, if we happen to generate some stores first, then the
239
// store cursor would get ahead of the loadCursor, and the instructions
240
// would end up in the wrong order despite our efforts.
241
//
242
TR::Instruction *loadCursor = NULL;
243
244
// Phase 1: generate RegMem and MemReg movs, and collect information about
245
// the required RegReg movs.
246
//
247
for (paramCursor = paramIterator.getFirst();
248
paramCursor != NULL;
249
paramCursor = paramIterator.getNext())
250
{
251
int8_t lri = paramCursor->getLinkageRegisterIndex(); // How the parameter enters the method
252
TR::RealRegister::RegNum ai // Where method body expects to find it
253
= (TR::RealRegister::RegNum)paramCursor->getAssignedGlobalRegisterIndex();
254
int32_t offset = paramCursor->getParameterOffset(); // Location of the parameter's stack slot
255
TR_MovDataTypes movDataType = paramMovType(paramCursor); // What sort of MOV instruction does it need?
256
257
// Copy the parameter to wherever it should be
258
//
259
if (lri == NOT_LINKAGE) // It's on the stack
260
{
261
if (ai == NOT_ASSIGNED) // It only needs to be on the stack
262
{
263
// Nothing to do
264
}
265
else // Method body expects it to be in the ai register
266
{
267
if (loadCursor == NULL)
268
loadCursor = cursor;
269
270
if (debug("traceCopyParametersToHomeLocation"))
271
diagnostic("copyParametersToHomeLocation: Loading %d\n", ai);
272
// ai := stack
273
loadCursor = generateRegMemInstruction(
274
loadCursor,
275
TR::Linkage::movOpcodes(RegMem, movDataType),
276
machine->getRealRegister(ai),
277
generateX86MemoryReference(framePointer, offset, cg()),
278
cg()
279
);
280
}
281
}
282
else // It's in a linkage register
283
{
284
TR::RealRegister::RegNum sourceIndex = getProperties().getArgumentRegister(lri, isFloat(movDataType));
285
286
// Copy to the stack if necessary
287
//
288
if (ai == NOT_ASSIGNED || hasToBeOnStack(paramCursor))
289
{
290
if (parmsHaveBeenStored)
291
{
292
if (debug("traceCopyParametersToHomeLocation"))
293
diagnostic("copyParametersToHomeLocation: Skipping store of %d because parmsHaveBeenStored already\n", sourceIndex);
294
}
295
else
296
{
297
if (debug("traceCopyParametersToHomeLocation"))
298
diagnostic("copyParametersToHomeLocation: Storing %d\n", sourceIndex);
299
// stack := lri
300
cursor = generateMemRegInstruction(
301
cursor,
302
TR::Linkage::movOpcodes(MemReg, movDataType),
303
generateX86MemoryReference(framePointer, offset, cg()),
304
machine->getRealRegister(sourceIndex),
305
cg()
306
);
307
}
308
}
309
310
// Copy to the ai register if necessary
311
//
312
if (ai != NOT_ASSIGNED && ai != sourceIndex)
313
{
314
// This parameter needs a RegReg move. We don't know yet whether
315
// we need the value in the target register, so for now we just
316
// remember that we need to do this and keep going.
317
//
318
TR_ASSERT(movStatus[ai ].sourceReg == noReg, "Each target reg must have only one source");
319
TR_ASSERT(movStatus[sourceIndex].targetReg == noReg, "Each source reg must have only one target");
320
if (debug("traceCopyParametersToHomeLocation"))
321
diagnostic("copyParametersToHomeLocation: Planning to move %d to %d\n", sourceIndex, ai);
322
movStatus[ai].sourceReg = sourceIndex;
323
movStatus[sourceIndex].targetReg = ai;
324
movStatus[sourceIndex].outgoingDataType = movDataType;
325
}
326
327
if (debug("traceCopyParametersToHomeLocation") && ai == sourceIndex)
328
{
329
diagnostic("copyParametersToHomeLocation: Parameter #%d already in register %d\n", lri, ai);
330
}
331
}
332
}
333
334
// Phase 2: Iterate through the parameters again to insert the RegReg moves.
335
//
336
for (paramCursor = paramIterator.getFirst();
337
paramCursor != NULL;
338
paramCursor = paramIterator.getNext())
339
{
340
if (paramCursor->getLinkageRegisterIndex() == NOT_LINKAGE)
341
continue;
342
343
const TR::RealRegister::RegNum paramReg =
344
getProperties().getArgumentRegister(paramCursor->getLinkageRegisterIndex(), isFloat(paramMovType(paramCursor)));
345
346
if (movStatus[paramReg].targetReg == 0)
347
{
348
// This parameter does not need to be copied anywhere
349
if (debug("traceCopyParametersToHomeLocation"))
350
diagnostic("copyParametersToHomeLocation: Not moving %d\n", paramReg);
351
}
352
else
353
{
354
if (debug("traceCopyParametersToHomeLocation"))
355
diagnostic("copyParametersToHomeLocation: Preparing to move %d\n", paramReg);
356
357
// If a mov's target register is the source for another mov, we need
358
// to do that other mov first. The idea is to find the end point of
359
// the chain of movs starting with paramReg and ending with a
360
// register whose current value is not needed; then do that chain of
361
// movs in reverse order.
362
//
363
TR_ASSERT(noReg == 0, "noReg must be zero (not %d) for zero-filled initialization to work", noReg);
364
365
TR::RealRegister::RegNum regCursor;
366
367
// Find the last target in the chain
368
//
369
regCursor = movStatus[paramReg].targetReg;
370
while(movStatus[regCursor].targetReg != noReg)
371
{
372
// Haven't found the end yet
373
regCursor = movStatus[regCursor].targetReg;
374
TR_ASSERT(regCursor != paramReg, "Can't yet handle cyclic dependencies");
375
376
// TODO:AMD64 Use scratch register to break cycles
377
//
378
// A properly-written pickRegister should never
379
// cause cycles to occur in the first place. However, we may want
380
// to consider adding cycle-breaking logic so that (1) pickRegister
381
// has more flexibility, and (2) we're more robust against
382
// otherwise harmless bugs in pickRegister.
383
}
384
385
// Work our way backward along the chain, generating all the necessary movs
386
//
387
while(movStatus[regCursor].sourceReg != noReg)
388
{
389
TR::RealRegister::RegNum source = movStatus[regCursor].sourceReg;
390
if (debug("traceCopyParametersToHomeLocation"))
391
diagnostic("copyParametersToHomeLocation: Moving %d to %d\n", source, regCursor);
392
// regCursor := regCursor.sourceReg
393
cursor = generateRegRegInstruction(
394
cursor,
395
TR::Linkage::movOpcodes(RegReg, movStatus[source].outgoingDataType),
396
machine->getRealRegister(regCursor),
397
machine->getRealRegister(source),
398
cg()
399
);
400
// Update movStatus as we go so we don't generate redundant movs
401
movStatus[regCursor].sourceReg = noReg;
402
movStatus[source ].targetReg = noReg;
403
// Continue with the next register in the chain
404
regCursor = source;
405
}
406
}
407
}
408
409
// Return the last instruction we inserted, whether or not it was a load.
410
//
411
return loadCursor? loadCursor : cursor;
412
}
413
414
static TR::Instruction *initializeLocals(TR::Instruction *cursor,
415
int32_t lowOffset,
416
uint32_t count,
417
int32_t pointerSize,
418
TR::RealRegister *framePointer,
419
TR::RealRegister *sourceReg,
420
TR::RealRegister *loopReg,
421
TR::CodeGenerator *cg)
422
{
423
TR::Compilation *comp = cg->comp();
424
int32_t offset = lowOffset;
425
426
if (count <= 4)
427
{
428
// For a small number, just generate a sequence of stores.
429
//
430
for (int32_t i=0; i < count; i++, offset += pointerSize)
431
{
432
cursor = new (cg->trHeapMemory()) TR::X86MemRegInstruction(
433
cursor,
434
TR::InstOpCode::SMemReg(),
435
generateX86MemoryReference(framePointer, offset, cg),
436
sourceReg,
437
cg);
438
}
439
}
440
else
441
{
442
// For a large number, generate a loop.
443
//
444
// for (loopReg = count-1; loopReg >= 0; loopReg--)
445
// framePointer[offset + loopReg * pointerSize] = sourceReg;
446
//
447
TR_ASSERT(count > 0, "positive count required for dword RegImm instruction");
448
449
cursor = new (cg->trHeapMemory()) TR::X86RegMemInstruction(
450
cursor,
451
TR::InstOpCode::LEARegMem(),
452
loopReg,
453
generateX86MemoryReference(sourceReg, count-1, cg),
454
cg);
455
456
TR::LabelSymbol *loopLabel = generateLabelSymbol(cg);
457
cursor = new (cg->trHeapMemory()) TR::X86LabelInstruction(cursor, TR::InstOpCode::label, loopLabel, cg);
458
459
cursor = new (cg->trHeapMemory()) TR::X86MemRegInstruction(
460
cursor,
461
TR::InstOpCode::SMemReg(),
462
generateX86MemoryReference(
463
framePointer,
464
loopReg,
465
TR::MemoryReference::convertMultiplierToStride(pointerSize),
466
offset,
467
cg),
468
sourceReg,
469
cg);
470
471
cursor = new (cg->trHeapMemory()) TR::X86RegImmInstruction(cursor, TR::InstOpCode::SUB4RegImms, loopReg, 1, cg);
472
cursor = new (cg->trHeapMemory()) TR::X86LabelInstruction(cursor, TR::InstOpCode::JAE4, loopLabel, cg);
473
}
474
475
return cursor;
476
}
477
478
479
#define STACKCHECKBUFFER 512
480
481
void J9::X86::PrivateLinkage::createPrologue(TR::Instruction *cursor)
482
{
483
#if defined(DEBUG)
484
// TODO:AMD64: Get this into the debug DLL
485
486
class TR_DebugFrameSegmentInfo
487
{
488
private:
489
490
TR_DebugFrameSegmentInfo *_next;
491
const char *_description;
492
TR::RealRegister *_register;
493
int32_t _lowOffset;
494
uint8_t _size;
495
TR::Compilation * _comp;
496
497
public:
498
499
TR_ALLOC(TR_Memory::CodeGenerator)
500
501
TR_DebugFrameSegmentInfo(
502
TR::Compilation * c,
503
int32_t lowOffset,
504
uint8_t size,
505
const char *description,
506
TR_DebugFrameSegmentInfo *next,
507
TR::RealRegister *reg=NULL
508
):
509
_comp(c),
510
_next(next),
511
_description(description),
512
_register(reg),
513
_lowOffset(lowOffset),
514
_size(size)
515
{}
516
517
TR::Compilation * comp() { return _comp; }
518
519
TR_DebugFrameSegmentInfo *getNext(){ return _next; }
520
521
TR_DebugFrameSegmentInfo *sort()
522
{
523
TR_DebugFrameSegmentInfo *result;
524
TR_DebugFrameSegmentInfo *tail = _next? _next->sort() : NULL;
525
TR_DebugFrameSegmentInfo *before=NULL, *after;
526
for (after = tail; after; before=after, after=after->_next)
527
{
528
if (after->_lowOffset > _lowOffset)
529
break;
530
}
531
_next = after;
532
if (before)
533
{
534
before->_next = this;
535
result = tail;
536
}
537
else
538
{
539
result = this;
540
}
541
return result;
542
}
543
544
void print(TR_Debug *debug)
545
{
546
if (_next)
547
_next->print(debug);
548
if (_size > 0)
549
{
550
diagnostic(" % 4d: % 4d -> % 4d (% 4d) %5.5s %s\n",
551
_lowOffset, _lowOffset, _lowOffset + _size - 1, _size,
552
_register? debug->getName(_register, TR_DoubleWordReg) : "",
553
_description
554
);
555
}
556
else
557
{
558
diagnostic(" % 4d: % 4d -> ---- (% 4d) %5.5s %s\n",
559
_lowOffset, _lowOffset, _size,
560
_register? debug->getName(_register, TR_DoubleWordReg) : "",
561
_description
562
);
563
}
564
}
565
566
};
567
568
TR_DebugFrameSegmentInfo *debugFrameSlotInfo=NULL;
569
#endif
570
bool trace = comp()->getOption(TR_TraceCG);
571
572
TR::RealRegister *espReal = machine()->getRealRegister(TR::RealRegister::esp);
573
TR::RealRegister *scratchReg = machine()->getRealRegister(getProperties().getIntegerScratchRegister(0));
574
TR::RealRegister *metaDataReg = machine()->getRealRegister(getProperties().getMethodMetaDataRegister());
575
576
TR::ResolvedMethodSymbol *bodySymbol = comp()->getJittedMethodSymbol();
577
ListIterator<TR::ParameterSymbol> paramIterator(&(bodySymbol->getParameterList()));
578
TR::ParameterSymbol *paramCursor;
579
580
const TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg;
581
const TR::X86LinkageProperties &properties = getProperties();
582
583
const uint32_t outgoingArgSize = cg()->getLargestOutgoingArgSize();
584
585
// We will set this to zero after generating the first instruction (and thus
586
// satisfying the size constraint).
587
uint8_t minInstructionSize = getMinimumFirstInstructionSize();
588
589
// Entry breakpoint
590
//
591
if (comp()->getOption(TR_EntryBreakPoints))
592
{
593
if (minInstructionSize > 0)
594
{
595
// We don't want the breakpoint to get patched, so generate a sacrificial no-op
596
//
597
cursor = new (trHeapMemory()) TR::X86PaddingInstruction(cursor, minInstructionSize, TR_AtomicNoOpPadding, cg());
598
}
599
cursor = new (trHeapMemory()) TR::Instruction(TR::InstOpCode::INT3, cursor, cg());
600
}
601
602
// Compute the nature of the preserved regs
603
//
604
uint32_t preservedRegsSize = 0;
605
uint32_t registerSaveDescription = 0; // bit N corresponds to real reg N, with 1=preserved
606
607
// Preserved register index
608
for (int32_t pindex = 0; pindex < properties.getMaxRegistersPreservedInPrologue(); pindex++)
609
{
610
TR::RealRegister *reg = machine()->getRealRegister(properties.getPreservedRegister((uint32_t)pindex));
611
if (reg->getHasBeenAssignedInMethod() && reg->getState() != TR::RealRegister::Locked)
612
{
613
preservedRegsSize += properties.getPointerSize();
614
registerSaveDescription |= reg->getRealRegisterMask();
615
}
616
}
617
618
cg()->setRegisterSaveDescription(registerSaveDescription);
619
620
// Compute frame size
621
//
622
// allocSize: bytes to be subtracted from the stack pointer when allocating the frame
623
// peakSize: maximum bytes of stack this method might consume before encountering another stack check
624
//
625
const int32_t localSize = _properties.getOffsetToFirstLocal() - bodySymbol->getLocalMappingCursor();
626
TR_ASSERT(localSize >= 0, "assertion failure");
627
628
// Note that the return address doesn't appear here because it is allocated by the call instruction
629
//
630
{
631
int32_t frameSize = localSize + preservedRegsSize + ( _properties.getReservesOutgoingArgsInPrologue()? outgoingArgSize : 0 );
632
uint32_t stackSize = frameSize + _properties.getRetAddressWidth();
633
uint32_t adjust = OMR::align(stackSize, _properties.getOutgoingArgAlignment()) - stackSize;
634
cg()->setStackFramePaddingSizeInBytes(adjust);
635
cg()->setFrameSizeInBytes(frameSize + adjust);
636
if (trace)
637
traceMsg(comp(),
638
"Stack size was %d, and is adjusted by +%d (alignment %d, return address width %d)\n",
639
stackSize,
640
cg()->getStackFramePaddingSizeInBytes(),
641
_properties.getOutgoingArgAlignment(),
642
_properties.getRetAddressWidth());
643
}
644
auto allocSize = cg()->getFrameSizeInBytes();
645
646
// Here we conservatively assume there is a call in this method that will require space for its return address
647
const int32_t peakSize = allocSize + _properties.getPointerSize();
648
649
bool doOverflowCheck = !comp()->isDLT();
650
651
// Small: entire stack usage fits in STACKCHECKBUFFER, so if sp is within
652
// the soft limit before buying the frame, then the whole frame will fit
653
// within the hard limit.
654
//
655
// Medium: the additional stack required after bumping the sp fits in
656
// STACKCHECKBUFFER, so if sp after the bump is within the soft limit, the
657
// whole frame will fit within the hard limit.
658
//
659
// Large: No shortcuts. Calculate the maximum extent of stack needed and
660
// compare that against the soft limit. (We have to use the soft limit here
661
// if for no other reason than that's the one used for asyncchecks.)
662
//
663
const bool frameIsSmall = peakSize < STACKCHECKBUFFER;
664
const bool frameIsMedium = !frameIsSmall;
665
666
if (trace)
667
{
668
traceMsg(comp(), "\nFrame size: %c%c locals=%d frame=%d peak=%d\n",
669
frameIsSmall? 'S':'-', frameIsMedium? 'M':'-',
670
localSize, cg()->getFrameSizeInBytes(), peakSize);
671
}
672
673
#if defined(DEBUG)
674
for (
675
paramCursor = paramIterator.getFirst();
676
paramCursor != NULL;
677
paramCursor = paramIterator.getNext()
678
){
679
TR::RealRegister::RegNum ai = (TR::RealRegister::RegNum)paramCursor->getAssignedGlobalRegisterIndex();
680
debugFrameSlotInfo = new (trHeapMemory()) TR_DebugFrameSegmentInfo(comp(),
681
paramCursor->getOffset(), paramCursor->getSize(), "Parameter",
682
debugFrameSlotInfo,
683
(ai==NOT_ASSIGNED)? NULL : machine()->getRealRegister(ai)
684
);
685
}
686
687
ListIterator<TR::AutomaticSymbol> autoIterator(&bodySymbol->getAutomaticList());
688
TR::AutomaticSymbol *autoCursor;
689
for (
690
autoCursor = autoIterator.getFirst();
691
autoCursor != NULL;
692
autoCursor = autoIterator.getNext()
693
){
694
debugFrameSlotInfo = new (trHeapMemory()) TR_DebugFrameSegmentInfo(comp(),
695
autoCursor->getOffset(), autoCursor->getSize(), "Local",
696
debugFrameSlotInfo
697
);
698
}
699
700
debugFrameSlotInfo = new (trHeapMemory()) TR_DebugFrameSegmentInfo(comp(),
701
0, getProperties().getPointerSize(), "Return address",
702
debugFrameSlotInfo
703
);
704
#endif
705
706
// Set the VFP state for the TR::InstOpCode::proc instruction
707
//
708
if (_properties.getAlwaysDedicateFramePointerRegister())
709
{
710
cg()->initializeVFPState(getProperties().getFramePointerRegister(), 0);
711
}
712
else
713
{
714
cg()->initializeVFPState(TR::RealRegister::esp, 0);
715
}
716
717
// In FSD, we must save linkage regs to the incoming argument area because
718
// the stack overflow check doesn't preserve them.
719
//
720
bool parmsHaveBeenBackSpilled = false;
721
if (comp()->getOption(TR_FullSpeedDebug))
722
{
723
cursor = movLinkageRegisters(cursor, true);
724
parmsHaveBeenBackSpilled = true;
725
}
726
727
// Allocating the frame "speculatively" means bumping the stack pointer before checking for overflow
728
//
729
TR::GCStackAtlas *atlas = cg()->getStackAtlas();
730
bool doAllocateFrameSpeculatively = false;
731
if (metaDataReg)
732
{
733
// Generate stack overflow check
734
doAllocateFrameSpeculatively = frameIsMedium;
735
736
if (doAllocateFrameSpeculatively)
737
{
738
// Subtract allocSize from esp before stack overflow check
739
740
TR_ASSERT(minInstructionSize <= 5, "Can't guarantee SUB instruction will be at least %d bytes", minInstructionSize);
741
TR_ASSERT(allocSize >= 1, "When allocSize >= 1, the frame should be small or large, but never medium");
742
743
const TR::InstOpCode::Mnemonic subOp = (allocSize <= 127 && getMinimumFirstInstructionSize() <= 3)? TR::InstOpCode::SUBRegImms() : TR::InstOpCode::SUBRegImm4();
744
cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, subOp, espReal, allocSize, cg());
745
746
minInstructionSize = 0; // The SUB satisfies the constraint
747
}
748
749
TR::Instruction* jitOverflowCheck = NULL;
750
if (doOverflowCheck)
751
{
752
TR::X86VFPSaveInstruction* vfp = generateVFPSaveInstruction(cursor, cg());
753
cursor = generateStackOverflowCheckInstruction(vfp, TR::InstOpCode::CMPRegMem(), espReal, generateX86MemoryReference(metaDataReg, cg()->getStackLimitOffset(), cg()), cg());
754
755
TR::LabelSymbol* begLabel = generateLabelSymbol(cg());
756
TR::LabelSymbol* endLabel = generateLabelSymbol(cg());
757
TR::LabelSymbol* checkLabel = generateLabelSymbol(cg());
758
begLabel->setStartInternalControlFlow();
759
endLabel->setEndInternalControlFlow();
760
checkLabel->setStartOfColdInstructionStream();
761
762
cursor = generateLabelInstruction(cursor, TR::InstOpCode::label, begLabel, cg());
763
cursor = generateLabelInstruction(cursor, TR::InstOpCode::JBE4, checkLabel, cg());
764
cursor = generateLabelInstruction(cursor, TR::InstOpCode::label, endLabel, cg());
765
766
// At this point, cg()->getAppendInstruction() is already in the cold code section.
767
generateVFPRestoreInstruction(vfp, cursor->getNode(), cg());
768
generateLabelInstruction(TR::InstOpCode::label, cursor->getNode(), checkLabel, cg());
769
generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, cursor->getNode(), machine()->getRealRegister(TR::RealRegister::edi), allocSize, cg());
770
if (doAllocateFrameSpeculatively)
771
{
772
generateRegImmInstruction(TR::InstOpCode::ADDRegImm4(), cursor->getNode(), espReal, allocSize, cg());
773
}
774
TR::SymbolReference* helper = comp()->getSymRefTab()->findOrCreateStackOverflowSymbolRef(NULL);
775
jitOverflowCheck = generateImmSymInstruction(TR::InstOpCode::CALLImm4, cursor->getNode(), (uintptr_t)helper->getMethodAddress(), helper, cg());
776
jitOverflowCheck->setNeedsGCMap(0xFF00FFFF);
777
if (doAllocateFrameSpeculatively)
778
{
779
generateRegImmInstruction(TR::InstOpCode::SUBRegImm4(), cursor->getNode(), espReal, allocSize, cg());
780
}
781
generateLabelInstruction(TR::InstOpCode::JMP4, cursor->getNode(), endLabel, cg());
782
}
783
784
if (cg()->canEmitBreakOnDFSet())
785
cursor = generateBreakOnDFSet(cg(), cursor);
786
787
if (atlas)
788
{
789
uint32_t numberOfParmSlots = atlas->getNumberOfParmSlotsMapped();
790
TR_GCStackMap *map;
791
if (_properties.getNumIntegerArgumentRegisters() == 0)
792
{
793
map = atlas->getParameterMap();
794
}
795
else
796
{
797
map = new (trHeapMemory(), numberOfParmSlots) TR_GCStackMap(numberOfParmSlots);
798
map->copy(atlas->getParameterMap());
799
800
// Before this point, the parameter stack considers all parms to be on the stack.
801
// Fix it to have register parameters in registers.
802
//
803
TR::ParameterSymbol *paramCursor = paramIterator.getFirst();
804
805
for (
806
paramCursor = paramIterator.getFirst();
807
paramCursor != NULL;
808
paramCursor = paramIterator.getNext()
809
){
810
int32_t intRegArgIndex = paramCursor->getLinkageRegisterIndex();
811
if (intRegArgIndex >= 0 &&
812
paramCursor->isReferencedParameter() &&
813
paramCursor->isCollectedReference())
814
{
815
// In FSD, the register parameters have already been backspilled.
816
// They exist in both registers and on the stack.
817
//
818
if (!parmsHaveBeenBackSpilled)
819
map->resetBit(paramCursor->getGCMapIndex());
820
821
map->setRegisterBits(TR::RealRegister::gprMask((getProperties().getIntegerArgumentRegister(intRegArgIndex))));
822
}
823
}
824
}
825
826
if (jitOverflowCheck)
827
jitOverflowCheck->setGCMap(map);
828
829
atlas->setParameterMap(map);
830
}
831
}
832
833
bodySymbol->setProloguePushSlots(preservedRegsSize / properties.getPointerSize());
834
835
// Allocate the stack frame
836
//
837
if (allocSize == 0)
838
{
839
// No need to do anything
840
}
841
else if (!doAllocateFrameSpeculatively)
842
{
843
TR_ASSERT(minInstructionSize <= 5, "Can't guarantee SUB instruction will be at least %d bytes", minInstructionSize);
844
const TR::InstOpCode::Mnemonic subOp = (allocSize <= 127 && getMinimumFirstInstructionSize() <= 3)? TR::InstOpCode::SUBRegImms() : TR::InstOpCode::SUBRegImm4();
845
cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, subOp, espReal, allocSize, cg());
846
}
847
848
//Support to paint allocated frame slots.
849
//
850
if (( comp()->getOption(TR_PaintAllocatedFrameSlotsDead) || comp()->getOption(TR_PaintAllocatedFrameSlotsFauxObject) ) && allocSize!=0)
851
{
852
uint32_t paintValue32 = 0;
853
uint64_t paintValue64 = 0;
854
855
TR::RealRegister *paintReg = NULL;
856
TR::RealRegister *frameSlotIndexReg = machine()->getRealRegister(TR::RealRegister::edi);
857
uint32_t paintBound = 0;
858
uint32_t paintSlotsOffset = 0;
859
uint32_t paintSize = allocSize-sizeof(uintptr_t);
860
861
//Paint the slots with deadf00d
862
//
863
if (comp()->getOption(TR_PaintAllocatedFrameSlotsDead))
864
{
865
if (comp()->target().is64Bit())
866
paintValue64 = (uint64_t)CONSTANT64(0xdeadf00ddeadf00d);
867
else
868
paintValue32 = 0xdeadf00d;
869
}
870
//Paint stack slots with a arbitrary object aligned address.
871
//
872
else
873
{
874
if (comp()->target().is64Bit())
875
{
876
paintValue64 = ((uintptr_t) ((uintptr_t)comp()->getOptions()->getHeapBase() + (uintptr_t) 4096));
877
}
878
else
879
{
880
paintValue32 = ((uintptr_t) ((uintptr_t)comp()->getOptions()->getHeapBase() + (uintptr_t) 4096));
881
}
882
}
883
884
TR::LabelSymbol *startLabel = generateLabelSymbol(cg());
885
886
//Load the 64 bit paint value into a paint reg.
887
#ifdef TR_TARGET_64BIT
888
paintReg = machine()->getRealRegister(TR::RealRegister::r8);
889
cursor = new (trHeapMemory()) TR::AMD64RegImm64Instruction(cursor, TR::InstOpCode::MOV8RegImm64, paintReg, paintValue64, cg());
890
#endif
891
892
//Perform the paint.
893
//
894
cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, TR::InstOpCode::MOVRegImm4(), frameSlotIndexReg, paintSize, cg());
895
cursor = new (trHeapMemory()) TR::X86LabelInstruction(cursor, TR::InstOpCode::label, startLabel, cg());
896
if (comp()->target().is64Bit())
897
cursor = new (trHeapMemory()) TR::X86MemRegInstruction(cursor, TR::InstOpCode::S8MemReg, generateX86MemoryReference(espReal, frameSlotIndexReg, 0,(uint8_t) paintSlotsOffset, cg()), paintReg, cg());
898
else
899
cursor = new (trHeapMemory()) TR::X86MemImmInstruction(cursor, TR::InstOpCode::SMemImm4(), generateX86MemoryReference(espReal, frameSlotIndexReg, 0,(uint8_t) paintSlotsOffset, cg()), paintValue32, cg());
900
cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, TR::InstOpCode::SUBRegImms(), frameSlotIndexReg, sizeof(intptr_t),cg());
901
cursor = new (trHeapMemory()) TR::X86RegImmInstruction(cursor, TR::InstOpCode::CMPRegImm4(), frameSlotIndexReg, paintBound, cg());
902
cursor = new (trHeapMemory()) TR::X86LabelInstruction(cursor, TR::InstOpCode::JGE4, startLabel,cg());
903
}
904
905
// Save preserved regs
906
//
907
cursor = savePreservedRegisters(cursor);
908
909
// Insert some counters
910
//
911
cursor = cg()->generateDebugCounter(cursor, "cg.prologues:#preserved", preservedRegsSize >> getProperties().getParmSlotShift(), TR::DebugCounter::Expensive);
912
cursor = cg()->generateDebugCounter(cursor, "cg.prologues:inline", 1, TR::DebugCounter::Expensive);
913
914
// Initialize any local pointers that could otherwise confuse the GC.
915
//
916
TR::RealRegister *framePointer = machine()->getRealRegister(TR::RealRegister::vfp);
917
if (atlas)
918
{
919
TR_ASSERT(_properties.getNumScratchRegisters() >= 2, "Need two scratch registers to initialize reference locals");
920
TR::RealRegister *loopReg = machine()->getRealRegister(properties.getIntegerScratchRegister(1));
921
922
int32_t numReferenceLocalSlotsToInitialize = atlas->getNumberOfSlotsToBeInitialized();
923
int32_t numInternalPointerSlotsToInitialize = 0;
924
925
if (atlas->getInternalPointerMap())
926
{
927
numInternalPointerSlotsToInitialize = atlas->getNumberOfDistinctPinningArrays() +
928
atlas->getInternalPointerMap()->getNumInternalPointers();
929
}
930
931
if (numReferenceLocalSlotsToInitialize > 0 || numInternalPointerSlotsToInitialize > 0)
932
{
933
cursor = new (trHeapMemory()) TR::X86RegRegInstruction(cursor, TR::InstOpCode::XORRegReg(), scratchReg, scratchReg, cg());
934
935
// Initialize locals that are live on entry
936
//
937
if (numReferenceLocalSlotsToInitialize > 0)
938
{
939
cursor = initializeLocals(
940
cursor,
941
atlas->getLocalBaseOffset(),
942
numReferenceLocalSlotsToInitialize,
943
properties.getPointerSize(),
944
framePointer, scratchReg, loopReg,
945
cg());
946
947
#if defined(DEBUG)
948
debugFrameSlotInfo = new (trHeapMemory()) TR_DebugFrameSegmentInfo(comp(),
949
atlas->getLocalBaseOffset(),
950
numReferenceLocalSlotsToInitialize * properties.getPointerSize(), "Initialized live vars",
951
debugFrameSlotInfo);
952
#endif
953
}
954
955
// Initialize internal pointers and their pinning arrays
956
//
957
if (numInternalPointerSlotsToInitialize > 0)
958
{
959
cursor = initializeLocals(
960
cursor,
961
atlas->getOffsetOfFirstInternalPointer(),
962
numInternalPointerSlotsToInitialize,
963
properties.getPointerSize(),
964
framePointer, scratchReg, loopReg,
965
cg());
966
967
#if defined(DEBUG)
968
debugFrameSlotInfo = new (trHeapMemory()) TR_DebugFrameSegmentInfo(comp(),
969
atlas->getOffsetOfFirstInternalPointer(),
970
numInternalPointerSlotsToInitialize * properties.getPointerSize(),
971
"Initialized internal pointers",
972
debugFrameSlotInfo);
973
#endif
974
}
975
}
976
}
977
978
#if defined(DEBUG)
979
debugFrameSlotInfo = new (trHeapMemory()) TR_DebugFrameSegmentInfo(comp(),
980
-localSize - preservedRegsSize - outgoingArgSize,
981
outgoingArgSize, "Outgoing args",
982
debugFrameSlotInfo
983
);
984
#endif
985
986
// Move parameters to where the method body will expect to find them
987
// TODO: If we separate the stores from the reg moves, we could do the stores
988
// before buying the stack frame, thereby using tiny offsets and thus smaller
989
// instructions.
990
//
991
cursor = copyParametersToHomeLocation(cursor, parmsHaveBeenBackSpilled);
992
993
cursor = cg()->generateDebugCounter(cursor, "cg.prologues", 1, TR::DebugCounter::Expensive);
994
cursor = cg()->generateDebugCounter(cursor, "cg.prologues:#allocBytes", allocSize, TR::DebugCounter::Expensive);
995
cursor = cg()->generateDebugCounter(cursor, "cg.prologues:#localBytes", localSize, TR::DebugCounter::Expensive);
996
cursor = cg()->generateDebugCounter(cursor, "cg.prologues:#frameBytes", cg()->getFrameSizeInBytes(), TR::DebugCounter::Expensive);
997
cursor = cg()->generateDebugCounter(cursor, "cg.prologues:#peakBytes", peakSize, TR::DebugCounter::Expensive);
998
999
#if defined(DEBUG)
1000
if (comp()->getOption(TR_TraceCG))
1001
{
1002
diagnostic("\nFrame layout:\n");
1003
diagnostic(" +rsp +vfp end size what\n");
1004
debugFrameSlotInfo->sort()->print(cg()->getDebug());
1005
diagnostic("\n");
1006
}
1007
#endif
1008
}
1009
1010
bool J9::X86::PrivateLinkage::needsFrameDeallocation()
1011
{
1012
// frame needs a deallocation if FrameSize == 0
1013
//
1014
return !_properties.getAlwaysDedicateFramePointerRegister() && cg()->getFrameSizeInBytes() == 0;
1015
}
1016
1017
TR::Instruction *J9::X86::PrivateLinkage::deallocateFrameIfNeeded(TR::Instruction *cursor, int32_t size)
1018
{
1019
return cursor;
1020
}
1021
1022
1023
void J9::X86::PrivateLinkage::createEpilogue(TR::Instruction *cursor)
1024
{
1025
if (cg()->canEmitBreakOnDFSet())
1026
cursor = generateBreakOnDFSet(cg(), cursor);
1027
1028
TR::RealRegister* espReal = machine()->getRealRegister(TR::RealRegister::esp);
1029
1030
cursor = cg()->generateDebugCounter(cursor, "cg.epilogues", 1, TR::DebugCounter::Expensive);
1031
1032
// Restore preserved regs
1033
//
1034
cursor = restorePreservedRegisters(cursor);
1035
1036
// Deallocate the stack frame
1037
//
1038
if (_properties.getAlwaysDedicateFramePointerRegister())
1039
{
1040
// Restore stack pointer from frame pointer
1041
//
1042
cursor = generateRegRegInstruction(cursor, TR::InstOpCode::MOVRegReg(), espReal, machine()->getRealRegister(_properties.getFramePointerRegister()), cg());
1043
cursor = generateRegInstruction(cursor, TR::InstOpCode::POPReg, machine()->getRealRegister(_properties.getFramePointerRegister()), cg());
1044
}
1045
else
1046
{
1047
auto frameSize = cg()->getFrameSizeInBytes();
1048
if (frameSize != 0)
1049
{
1050
cursor = generateRegImmInstruction(cursor, (frameSize <= 127) ? TR::InstOpCode::ADDRegImms() : TR::InstOpCode::ADDRegImm4(), espReal, frameSize, cg());
1051
}
1052
}
1053
1054
if (cursor->getNext()->getOpCodeValue() == TR::InstOpCode::RETImm2)
1055
{
1056
toIA32ImmInstruction(cursor->getNext())->setSourceImmediate(comp()->getJittedMethodSymbol()->getNumParameterSlots() << getProperties().getParmSlotShift());
1057
}
1058
}
1059
1060
TR::Register *
1061
J9::X86::PrivateLinkage::buildDirectDispatch(
1062
TR::Node *callNode,
1063
bool spillFPRegs)
1064
{
1065
TR::StackMemoryRegion stackMemoryRegion(*comp()->trMemory());
1066
1067
TR::SymbolReference *methodSymRef = callNode->getSymbolReference();
1068
TR::MethodSymbol *methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol();
1069
1070
TR::X86CallSite site(callNode, this);
1071
1072
// Add the int3 instruction if breakOnThrow is set on this user defined exception
1073
//
1074
TR::SimpleRegex *r = comp()->getOptions()->getBreakOnThrow();
1075
if (r && callNode && callNode->getOpCode().hasSymbolReference() &&
1076
comp()->getSymRefTab()->findOrCreateAThrowSymbolRef(comp()->getMethodSymbol())==callNode->getSymbolReference() &&
1077
callNode->getNumChildren()>=1 && callNode->getFirstChild()->getNumChildren()>=1 &&
1078
callNode->getFirstChild()->getFirstChild()->getOpCode().hasSymbolReference() &&
1079
callNode->getFirstChild()->getFirstChild()->getSymbolReference()->getSymbol()->isStatic() &&
1080
callNode->getFirstChild()->getFirstChild()->getSymbolReference()->getCPIndex() >= 0 &&
1081
callNode->getFirstChild()->getFirstChild()->getSymbolReference()->getSymbol()->castToStaticSymbol()->isClassObject() &&
1082
!callNode->getFirstChild()->getFirstChild()->getSymbolReference()->getSymbol()->castToStaticSymbol()->addressIsCPIndexOfStatic())
1083
{
1084
uint32_t len;
1085
TR_ResolvedMethod * method =
1086
callNode->getFirstChild()->getFirstChild()->getSymbolReference()->getOwningMethod(comp());
1087
int32_t cpIndex = callNode->getFirstChild()->getFirstChild()->getSymbolReference()->getCPIndex();
1088
char * name = method->getClassNameFromConstantPool(cpIndex, len);
1089
if (name)
1090
{
1091
if (TR::SimpleRegex::matchIgnoringLocale(r, name))
1092
{
1093
generateInstruction(TR::InstOpCode::INT3, callNode, cg());
1094
}
1095
}
1096
}
1097
1098
// Build arguments and initially populate regdeps
1099
//
1100
buildCallArguments(site);
1101
1102
// Remember where internal control flow region should start,
1103
// and create labels
1104
//
1105
TR::Instruction *startBookmark = cg()->getAppendInstruction();
1106
TR::LabelSymbol *startLabel = generateLabelSymbol(cg());
1107
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg());
1108
startLabel->setStartInternalControlFlow();
1109
doneLabel->setEndInternalControlFlow();
1110
1111
buildDirectCall(callNode->getSymbolReference(), site);
1112
1113
// Construct postconditions
1114
//
1115
TR::Register *returnRegister = buildCallPostconditions(site);
1116
site.stopAddingConditions();
1117
1118
// Create the internal control flow region and VFP adjustment
1119
//
1120
generateLabelInstruction(startBookmark, TR::InstOpCode::label, startLabel, site.getPreConditionsUnderConstruction(), cg());
1121
if (getProperties().getCallerCleanup())
1122
{
1123
// TODO: Caller must clean up
1124
}
1125
else if (callNode->getSymbol()->castToMethodSymbol()->isHelper() && getProperties().getUsesRegsForHelperArgs())
1126
{
1127
// No cleanup needed for helpers if args are passed in registers
1128
}
1129
else
1130
{
1131
generateVFPCallCleanupInstruction(-site.getArgSize(), callNode, cg());
1132
}
1133
generateLabelInstruction(TR::InstOpCode::label, callNode, doneLabel, site.getPostConditionsUnderConstruction(), cg());
1134
1135
// Stop using the killed registers that are not going to persist
1136
//
1137
stopUsingKilledRegisters(site.getPostConditionsUnderConstruction(), returnRegister);
1138
1139
if (callNode->getType().isFloatingPoint())
1140
{
1141
static char *forceX87LinkageForSSE = feGetEnv("TR_ForceX87LinkageForSSE");
1142
if (callNode->getReferenceCount() == 1 && returnRegister->getKind() == TR_X87)
1143
{
1144
// If the method returns a floating-point value that is not used, insert a
1145
// dummy store to eventually pop the value from the floating-point stack.
1146
//
1147
generateFPSTiST0RegRegInstruction(TR::InstOpCode::FSTRegReg, callNode, returnRegister, returnRegister, cg());
1148
}
1149
else if (forceX87LinkageForSSE && returnRegister->getKind() == TR_FPR)
1150
{
1151
// If the caller expects the return value in an XMMR, insert a
1152
// transfer from the floating-point stack to the XMMR via memory.
1153
//
1154
coerceFPReturnValueToXMMR(callNode, site.getPostConditionsUnderConstruction(), site.getMethodSymbol(), returnRegister);
1155
}
1156
}
1157
1158
if (cg()->enableRegisterAssociations() && !callNode->getSymbol()->castToMethodSymbol()->preservesAllRegisters())
1159
associatePreservedRegisters(site.getPostConditionsUnderConstruction(), returnRegister);
1160
1161
return returnRegister;
1162
}
1163
1164
1165
TR::X86CallSite::X86CallSite(TR::Node *callNode, TR::Linkage *calleeLinkage)
1166
:_callNode(callNode)
1167
,_linkage(calleeLinkage)
1168
,_vftImplicitExceptionPoint(NULL)
1169
,_firstPICSlotInstruction(NULL)
1170
,_profiledTargets(NULL)
1171
,_interfaceClassOfMethod(NULL)
1172
,_argSize(-1)
1173
,_preservedRegisterMask(0)
1174
,_thunkAddress(NULL)
1175
,_useLastITableCache(false)
1176
{
1177
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
1178
if (getMethodSymbol()->isInterface())
1179
{
1180
// Find the class pointer to the interface class if it is already loaded.
1181
// This is needed by both static PICs
1182
//
1183
TR::Method *interfaceMethod = getMethodSymbol()->getMethod();
1184
int32_t len = interfaceMethod->classNameLength();
1185
char * s = TR::Compiler->cls.classNameToSignature(interfaceMethod->classNameChars(), len, comp());
1186
_interfaceClassOfMethod = fej9->getClassFromSignature(s, len, getSymbolReference()->getOwningMethod(comp()));
1187
}
1188
1189
setupVirtualGuardInfo();
1190
computeProfiledTargets();
1191
1192
// Initialize the register dependencies with conservative estimates of the
1193
// number of conditions
1194
//
1195
uint32_t numPreconditions =
1196
calleeLinkage->getProperties().getNumIntegerArgumentRegisters()
1197
+ calleeLinkage->getProperties().getNumFloatArgumentRegisters()
1198
+ 3; // VM Thread + eax + possible vtableIndex/J9Method arg on IA32
1199
1200
uint32_t numPostconditions =
1201
calleeLinkage->getProperties().getNumberOfVolatileGPRegisters()
1202
+ calleeLinkage->getProperties().getNumberOfVolatileXMMRegisters()
1203
+ 3; // return reg + VM Thread + scratch
1204
1205
_preConditionsUnderConstruction = generateRegisterDependencyConditions(numPreconditions, 0, cg());
1206
_postConditionsUnderConstruction = generateRegisterDependencyConditions((COPY_PRECONDITIONS_TO_POSTCONDITIONS? numPreconditions : 0), numPostconditions + (COPY_PRECONDITIONS_TO_POSTCONDITIONS? numPreconditions : 0), cg());
1207
1208
1209
_preservedRegisterMask = getLinkage()->getProperties().getPreservedRegisterMapForGC();
1210
if (getMethodSymbol()->preservesAllRegisters())
1211
{
1212
_preservedRegisterMask |= TR::RealRegister::getAvailableRegistersMask(TR_GPR);
1213
if (callNode->getDataType() != TR::NoType)
1214
{
1215
// Cross our fingers and hope things that preserve all regs only return ints
1216
_preservedRegisterMask &= ~TR::RealRegister::gprMask(getLinkage()->getProperties().getIntegerReturnRegister());
1217
}
1218
}
1219
1220
}
1221
1222
void TR::X86CallSite::setupVirtualGuardInfo()
1223
{
1224
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
1225
_virtualGuardKind = TR_NoGuard;
1226
_devirtualizedMethod = NULL;
1227
_devirtualizedMethodSymRef = NULL;
1228
1229
if (getMethodSymbol()->isVirtual() && _callNode->getOpCode().isIndirect())
1230
{
1231
TR_ResolvedMethod *resolvedMethod = getResolvedMethod();
1232
if (resolvedMethod &&
1233
(!getMethodSymbol()->isVMInternalNative() || !comp()->getOption(TR_FullSpeedDebug)) &&
1234
!_callNode->isTheVirtualCallNodeForAGuardedInlinedCall())
1235
{
1236
if (!resolvedMethod->virtualMethodIsOverridden() &&
1237
!resolvedMethod->isAbstract())
1238
{
1239
_virtualGuardKind = TR_NonoverriddenGuard;
1240
_devirtualizedMethod = resolvedMethod;
1241
_devirtualizedMethodSymRef = getSymbolReference();
1242
}
1243
else
1244
{
1245
TR_OpaqueClassBlock *thisClass = resolvedMethod->containingClass();
1246
TR_DevirtualizedCallInfo *devirtualizedCallInfo = comp()->findDevirtualizedCall(_callNode);
1247
TR_OpaqueClassBlock *refinedThisClass = NULL;
1248
1249
if (devirtualizedCallInfo)
1250
{
1251
refinedThisClass = devirtualizedCallInfo->_thisType;
1252
1253
if (refinedThisClass)
1254
thisClass = refinedThisClass;
1255
}
1256
1257
TR::SymbolReference *methodSymRef = getSymbolReference();
1258
TR_PersistentCHTable * chTable = comp()->getPersistentInfo()->getPersistentCHTable();
1259
/* Devirtualization is not currently supported for AOT compilations */
1260
if (thisClass && TR::Compiler->cls.isAbstractClass(comp(), thisClass) && !comp()->compileRelocatableCode())
1261
{
1262
TR_ResolvedMethod * method = chTable->findSingleAbstractImplementer(thisClass, methodSymRef->getOffset(), methodSymRef->getOwningMethod(comp()), comp());
1263
if (method &&
1264
(comp()->isRecursiveMethodTarget(method) ||
1265
!method->isInterpreted() ||
1266
method->isJITInternalNative()))
1267
{
1268
_virtualGuardKind = TR_AbstractGuard;
1269
_devirtualizedMethod = method;
1270
}
1271
}
1272
else if (refinedThisClass &&
1273
!chTable->isOverriddenInThisHierarchy(resolvedMethod, refinedThisClass, methodSymRef->getOffset(), comp()))
1274
{
1275
if (resolvedMethod->virtualMethodIsOverridden())
1276
{
1277
TR_ResolvedMethod *calleeMethod = methodSymRef->getOwningMethod(comp())->getResolvedVirtualMethod(comp(), refinedThisClass, methodSymRef->getOffset());
1278
if (calleeMethod &&
1279
(comp()->isRecursiveMethodTarget(calleeMethod) ||
1280
!calleeMethod->isInterpreted() ||
1281
calleeMethod->isJITInternalNative()))
1282
{
1283
_virtualGuardKind = TR_HierarchyGuard;
1284
_devirtualizedMethod = calleeMethod;
1285
}
1286
}
1287
}
1288
}
1289
1290
if (_devirtualizedMethod != NULL && _devirtualizedMethodSymRef == NULL)
1291
_devirtualizedMethodSymRef = comp()->getSymRefTab()->findOrCreateMethodSymbol(
1292
getSymbolReference()->getOwningMethodIndex(), -1, _devirtualizedMethod, TR::MethodSymbol::Virtual);
1293
}
1294
}
1295
1296
// Some self-consistency conditions
1297
TR_ASSERT((_virtualGuardKind == TR_NoGuard) == (_devirtualizedMethod == NULL), "Virtual guard requires _devirtualizedMethod");
1298
TR_ASSERT((_devirtualizedMethod == NULL) == (_devirtualizedMethodSymRef == NULL), "_devirtualizedMethod requires _devirtualizedMethodSymRef");
1299
}
1300
1301
void TR::X86CallSite::computeProfiledTargets()
1302
{
1303
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
1304
1305
if (cg()->profiledPointersRequireRelocation())
1306
// bail until create appropriate relocations to validate profiled targets
1307
return;
1308
1309
// Use static PICs for guarded calls as well
1310
//
1311
1312
_profiledTargets = new(comp()->trStackMemory()) TR_ScratchList<TR::X86PICSlot>(comp()->trMemory());
1313
1314
TR::SymbolReference *methodSymRef = getSymbolReference();
1315
TR::Node *callNode = getCallNode();
1316
1317
// TODO: Note the different logic for virtual and interface calls. Is this necessary?
1318
//
1319
1320
if (getMethodSymbol()->isVirtual() && !callNode->getSymbolReference()->isUnresolved() &&
1321
(callNode->getSymbolReference() != comp()->getSymRefTab()->findObjectNewInstanceImplSymbol()) &&
1322
callNode->getOpCode().isIndirect())
1323
{
1324
if (!comp()->getOption(TR_DisableInterpreterProfiling) &&
1325
TR_ValueProfileInfoManager::get(comp()))
1326
{
1327
TR::Node *callNode = getCallNode();
1328
TR_AddressInfo *valueInfo = static_cast<TR_AddressInfo*>(TR_ValueProfileInfoManager::getProfiledValueInfo(callNode, comp(), AddressInfo));
1329
1330
// PMR 05447,379,000 getTopValue may return array length profile data instead of a class pointer
1331
// (when the virtual call feeds an arraycopy method length parameter). We need to defend this case to
1332
// avoid attempting to use the length as a pointer, so use asAddressInfo() to gate assignment of topValue.
1333
uintptr_t topValue = (valueInfo) ? valueInfo->getTopValue() : 0;
1334
1335
// if the call to hashcode is a virtual call node, the top value was already inlined.
1336
if (callNode->isTheVirtualCallNodeForAGuardedInlinedCall())
1337
topValue = 0;
1338
1339
// Is the topValue valid?
1340
if (topValue)
1341
{
1342
if (valueInfo->getTopProbability() < getMinProfiledCallFrequency() ||
1343
comp()->getPersistentInfo()->isObsoleteClass((void*)topValue, fej9))
1344
{
1345
topValue = 0;
1346
}
1347
else
1348
{
1349
//printf("Checking is instanceof for top %p for %s\n", topValue, methodSymRef->getSymbol()->getResolvedMethodSymbol()->getResolvedMethod()->signature(comp()->trMemory())); fflush(stdout);
1350
TR_OpaqueClassBlock *callSiteMethodClass = methodSymRef->getSymbol()->getResolvedMethodSymbol()->getResolvedMethod()->classOfMethod();
1351
if (!cg()->isProfiledClassAndCallSiteCompatible((TR_OpaqueClassBlock *)topValue, callSiteMethodClass))
1352
{
1353
topValue = 0;
1354
}
1355
}
1356
}
1357
1358
if (!topValue && !callNode->getSymbolReference()->isUnresolved() &&
1359
(callNode->getSymbol()->castToMethodSymbol()->getRecognizedMethod() == TR::java_lang_Object_clone))
1360
topValue = (uintptr_t) comp()->getObjectClassPointer();
1361
1362
if (topValue)
1363
{
1364
1365
TR_ResolvedMethod *profiledVirtualMethod = callNode->getSymbolReference()->getOwningMethod(comp())->getResolvedVirtualMethod(comp(),
1366
(TR_OpaqueClassBlock *)topValue, methodSymRef->getOffset());
1367
if (profiledVirtualMethod &&
1368
(!profiledVirtualMethod->isInterpreted() ||
1369
profiledVirtualMethod->isJITInternalNative()))
1370
{
1371
//if (!getMethodSymbol()->isInterface() && profiledVirtualMethod->isJITInternalNative())
1372
//printf("New opportunity in %s to callee %s\n", comp()->signature(), profiledVirtualMethod->signature(comp()->trMemory(), stackAlloc));
1373
//TR_ASSERT(profiledVirtualMethod->classOfMethod() == (TR_OpaqueClassBlock *)topValue, "assertion failure");
1374
1375
TR_OpaqueMethodBlock *methodToBeCompared = NULL;
1376
int32_t slot = -1;
1377
if (profiledVirtualMethod->isJITInternalNative())
1378
{
1379
int32_t offset = callNode->getSymbolReference()->getOffset();
1380
slot = fej9->virtualCallOffsetToVTableSlot(offset);
1381
methodToBeCompared = profiledVirtualMethod->getPersistentIdentifier();
1382
}
1383
1384
_profiledTargets->add(new(comp()->trStackMemory()) TR::X86PICSlot((uintptr_t)topValue, profiledVirtualMethod, true, methodToBeCompared, slot));
1385
}
1386
}
1387
}
1388
}
1389
else if (getMethodSymbol()->isInterface())
1390
{
1391
bool staticPICsExist = false;
1392
int32_t numStaticPICSlots = 0;
1393
1394
1395
TR_AddressInfo *addressInfo = static_cast<TR_AddressInfo*>(TR_ValueProfileInfoManager::getProfiledValueInfo(callNode, comp(), AddressInfo));
1396
#if defined(OSX)
1397
uint64_t topValue;
1398
#else
1399
uintptr_t topValue;
1400
#endif /* OSX */
1401
float missRatio = 0.0;
1402
if (addressInfo && addressInfo->getTopValue(topValue) > 0 && topValue && !comp()->getPersistentInfo()->isObsoleteClass((void*)topValue, fej9) &&
1403
addressInfo->getTopProbability() >= getMinProfiledCallFrequency())
1404
{
1405
uint32_t totalFrequency = addressInfo->getTotalFrequency();
1406
TR_ScratchList<TR_ExtraAddressInfo> valuesSortedByFrequency(comp()->trMemory());
1407
addressInfo->getSortedList(comp(), &valuesSortedByFrequency);
1408
1409
static const char *p = feGetEnv("TR_TracePIC");
1410
if (p)
1411
{
1412
traceMsg(comp(), "Value profile info for callNode %p in %s\n", callNode, comp()->signature());
1413
addressInfo->getProfiler()->dumpInfo(comp()->getOutFile());
1414
traceMsg(comp(), "\n");
1415
}
1416
1417
uintptr_t totalPICHitFrequency = 0;
1418
uintptr_t totalPICMissFrequency = 0;
1419
ListIterator<TR_ExtraAddressInfo> sortedValuesIt(&valuesSortedByFrequency);
1420
for (TR_ExtraAddressInfo *profiledInfo = sortedValuesIt.getFirst(); profiledInfo != NULL; profiledInfo = sortedValuesIt.getNext())
1421
{
1422
float frequency = ((float)profiledInfo->_frequency) / totalFrequency;
1423
if (comp()->getOption(TR_TraceCG))
1424
traceMsg(comp(), " Profiled target frequency %f", frequency);
1425
1426
TR_OpaqueClassBlock *thisType = (TR_OpaqueClassBlock *) profiledInfo->_value;
1427
TR_ResolvedMethod *profiledInterfaceMethod = NULL;
1428
TR::SymbolReference *methodSymRef = getSymbolReference();
1429
if (!comp()->getPersistentInfo()->isObsoleteClass((void *)thisType, fej9))
1430
{
1431
profiledInterfaceMethod = methodSymRef->getOwningMethod(comp())->getResolvedInterfaceMethod(comp(),
1432
thisType, methodSymRef->getCPIndex());
1433
}
1434
if (profiledInterfaceMethod &&
1435
(!profiledInterfaceMethod->isInterpreted() ||
1436
profiledInterfaceMethod->isJITInternalNative()))
1437
{
1438
if (frequency < getMinProfiledCallFrequency())
1439
{
1440
if (comp()->getOption(TR_TraceCG))
1441
traceMsg(comp(), " - Too infrequent");
1442
totalPICMissFrequency += profiledInfo->_frequency;
1443
}
1444
else if (numStaticPICSlots >= comp()->getOptions()->getMaxStaticPICSlots(comp()->getMethodHotness()))
1445
{
1446
if (comp()->getOption(TR_TraceCG))
1447
traceMsg(comp(), " - Already reached limit of %d static PIC slots", numStaticPICSlots);
1448
totalPICMissFrequency += profiledInfo->_frequency;
1449
}
1450
else
1451
{
1452
_profiledTargets->add(new(comp()->trStackMemory()) TR::X86PICSlot((uintptr_t)thisType, profiledInterfaceMethod));
1453
if (comp()->getOption(TR_TraceCG))
1454
traceMsg(comp(), " + Added static PIC slot");
1455
numStaticPICSlots++;
1456
totalPICHitFrequency += profiledInfo->_frequency;
1457
}
1458
if (comp()->getOption(TR_TraceCG))
1459
traceMsg(comp(), " for %s\n", profiledInterfaceMethod->signature(comp()->trMemory(), stackAlloc));
1460
}
1461
else
1462
{
1463
if (comp()->getOption(TR_TraceCG))
1464
traceMsg(comp(), " * Can't find suitable method from profile info\n");
1465
}
1466
1467
}
1468
missRatio = 1.0 * totalPICMissFrequency / totalFrequency;
1469
}
1470
1471
_useLastITableCache = !comp()->getOption(TR_DisableLastITableCache) ? true : false;
1472
// Disable lastITable logic if all the implementers can fit into the pic slots during non-startup state
1473
if (_useLastITableCache && comp()->target().is64Bit() && _interfaceClassOfMethod && comp()->getPersistentInfo()->getJitState() != STARTUP_STATE)
1474
{
1475
J9::X86::PrivateLinkage *privateLinkage = static_cast<J9::X86::PrivateLinkage *>(getLinkage());
1476
int32_t numPICSlots = numStaticPICSlots + privateLinkage->IPicParameters.defaultNumberOfSlots;
1477
TR_ResolvedMethod **implArray = new (comp()->trStackMemory()) TR_ResolvedMethod*[numPICSlots+1];
1478
TR_PersistentCHTable * chTable = comp()->getPersistentInfo()->getPersistentCHTable();
1479
int32_t cpIndex = getSymbolReference()->getCPIndex();
1480
int32_t numImplementers = chTable->findnInterfaceImplementers(_interfaceClassOfMethod, numPICSlots+1, implArray, cpIndex, getSymbolReference()->getOwningMethod(comp()), comp());
1481
if (numImplementers <= numPICSlots)
1482
{
1483
_useLastITableCache = false;
1484
if (comp()->getOption(TR_TraceCG))
1485
traceMsg(comp(),"Found %d implementers for call to %s, can be fit into %d pic slots, disabling lastITable cache\n", numImplementers, getMethodSymbol()->getMethod()->signature(comp()->trMemory()), numPICSlots);
1486
}
1487
}
1488
else if (_useLastITableCache && comp()->target().is32Bit()) // Use the original heuristic for ia32 due to defect 111651
1489
{
1490
_useLastITableCache = false; // Default on ia32 is not to use the last itable cache
1491
static char *lastITableCacheThresholdStr = feGetEnv("TR_lastITableCacheThreshold");
1492
1493
// With 4 static and 2 dynamic PIC slots, the cache starts to be used
1494
// for 7 equally-likely targets. We want to catch that case, so the
1495
// threshold must be comfortably below 3/7 = 28%.
1496
//
1497
float lastITableCacheThreshold = lastITableCacheThresholdStr? atof(lastITableCacheThresholdStr) : 0.2;
1498
if ( missRatio >= lastITableCacheThreshold
1499
&& performTransformation(comp(), "O^O PIC miss ratio is %f >= %f -- adding lastITable cache\n", missRatio, lastITableCacheThreshold))
1500
{
1501
_useLastITableCache = true;
1502
}
1503
}
1504
}
1505
1506
if (_profiledTargets->isEmpty())
1507
_profiledTargets = NULL;
1508
}
1509
1510
bool TR::X86CallSite::shouldUseInterpreterLinkage()
1511
{
1512
if (getMethodSymbol()->isVirtual() &&
1513
!getSymbolReference()->isUnresolved() &&
1514
getMethodSymbol()->isVMInternalNative() &&
1515
!getResolvedMethod()->virtualMethodIsOverridden() &&
1516
!getResolvedMethod()->isAbstract())
1517
return true;
1518
else
1519
return false;
1520
}
1521
1522
1523
TR::Register *TR::X86CallSite::evaluateVFT()
1524
{
1525
TR::Node *vftNode = getCallNode()->getFirstChild();
1526
if (vftNode->getRegister())
1527
return vftNode->getRegister();
1528
else
1529
{
1530
TR::Register *result = cg()->evaluate(vftNode);
1531
_vftImplicitExceptionPoint = cg()->getImplicitExceptionPoint();
1532
return result;
1533
}
1534
}
1535
1536
bool TR::X86CallSite::resolvedVirtualShouldUseVFTCall()
1537
{
1538
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
1539
TR_ASSERT(getMethodSymbol()->isVirtual() && !getSymbolReference()->isUnresolved(), "assertion failure");
1540
1541
// WARNING: VPIC doesn't work for resolved calls at the moment, so setting
1542
// TR_EnableVPICForResolvedVirtualCalls won't work. The most straightforward
1543
// way to get VPIC to support (most) resolved calls is to simply treat them
1544
// the same way as unresolved ones, but that isn't allowed when we are
1545
// promising isResolvedVirtualDispatchGuaranteed().
1546
return
1547
fej9->isResolvedVirtualDispatchGuaranteed(comp()) &&
1548
(!comp()->getOption(TR_EnableVPICForResolvedVirtualCalls) ||
1549
getProfiledTargets() ||
1550
getCallNode()->isTheVirtualCallNodeForAGuardedInlinedCall() ||
1551
( comp()->getSymRefTab()->findObjectNewInstanceImplSymbol() &&
1552
comp()->getSymRefTab()->findObjectNewInstanceImplSymbol()->getSymbol() == getResolvedMethodSymbol()));
1553
}
1554
1555
void TR::X86CallSite::stopAddingConditions()
1556
{
1557
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
1558
if (COPY_PRECONDITIONS_TO_POSTCONDITIONS)
1559
{
1560
TR::RegisterDependencyGroup *preconditions = getPreConditionsUnderConstruction()->getPreConditions();
1561
TR::RegisterDependencyGroup *postconditions = getPostConditionsUnderConstruction()->getPostConditions();
1562
for (uint8_t i = 0; i < getPreConditionsUnderConstruction()->getAddCursorForPre(); i++)
1563
{
1564
TR::RegisterDependency *pre = preconditions->getRegisterDependency(i);
1565
getPostConditionsUnderConstruction()->unionPreCondition(pre->getRegister(), pre->getRealRegister(), cg(), pre->getFlags());
1566
TR::RegisterDependency *post = postconditions->findDependency(pre->getRealRegister(), getPostConditionsUnderConstruction()->getAddCursorForPost());
1567
if (!post)
1568
getPostConditionsUnderConstruction()->addPostCondition(pre->getRegister(), pre->getRealRegister(), cg(), pre->getFlags());
1569
}
1570
}
1571
1572
_preConditionsUnderConstruction->stopAddingPreConditions();
1573
_preConditionsUnderConstruction->stopAddingPostConditions();
1574
_postConditionsUnderConstruction->stopAddingPreConditions();
1575
_postConditionsUnderConstruction->stopAddingPostConditions();
1576
}
1577
1578
static void evaluateCommonedNodes(TR::Node *node, TR::CodeGenerator *cg)
1579
{
1580
// There is a rule that if a node with a symref is evaluated, it must be
1581
// evaluated in the first treetop under which it appears. (The so-called
1582
// "prompt evaluation" rule). Since we don't know what future trees will
1583
// do, this effectively means that any symref-bearing node that is commoned
1584
// with another treetop must be evaluated now.
1585
// We approximate this by saying that any node with a refcount >= 2 must be
1586
// evaluated now. The "refcount >= 2" is a conservative approximation of
1587
// "commoned with another treetop" because the latter is not cheap to figure out.
1588
// "Any node" is an approximation of "any node with a symref"; we do that
1589
// because it allows us to use a simple linear-time tree walk without
1590
// resorting to visit counts.
1591
//
1592
TR::Compilation * comp= cg->comp();
1593
if (node->getRegister() == NULL)
1594
{
1595
if (node->getReferenceCount() >= 2)
1596
{
1597
if (comp->getOption(TR_TraceCG))
1598
traceMsg(comp, "Promptly evaluating commoned node %s\n", cg->getDebug()->getName(node));
1599
cg->evaluate(node);
1600
}
1601
else
1602
{
1603
for (int32_t i = 0; i < node->getNumChildren(); i++)
1604
evaluateCommonedNodes(node->getChild(i), cg);
1605
}
1606
}
1607
}
1608
1609
1610
static bool indirectDispatchWillBuildVirtualGuard(TR::Compilation *comp, TR::X86CallSite *site)
1611
{
1612
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp->fe());
1613
1614
// This method is used in vft mask instruction removal in buildIndirectDispatch
1615
// if method will generate virtual call guard and build direct call, then skip vft mask instruction.
1616
if (site->getVirtualGuardKind() != TR_NoGuard && fej9->canDevirtualizeDispatch() )
1617
{
1618
if (comp->performVirtualGuardNOPing())
1619
{
1620
return true;
1621
}
1622
else if (site->getVirtualGuardKind() == TR_NonoverriddenGuard
1623
&& !comp->getOption(TR_EnableHCR)
1624
&& !comp->getOption(TR_MimicInterpreterFrameShape))
1625
{
1626
return true;
1627
}
1628
}
1629
return false;
1630
}
1631
1632
TR::Register *J9::X86::PrivateLinkage::buildIndirectDispatch(TR::Node *callNode)
1633
{
1634
TR::StackMemoryRegion stackMemoryRegion(*comp()->trMemory());
1635
1636
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp()->fe());
1637
1638
TR::X86CallSite site(callNode, this);
1639
1640
// Build arguments and initially populate regdeps
1641
//
1642
buildCallArguments(site);
1643
1644
// If receiver could be NULL, must evaluate it before the call
1645
// so any exception occurs before the call.
1646
// Might as well do it outside the internal control flow.
1647
//
1648
// Also evaluate the VFT if it survives the call.
1649
// The optimizer expects things to be evaluated in
1650
// the first tree in which they appear.
1651
//
1652
bool skipVFTmaskInstruction = false;
1653
if (callNode->getSymbol()->castToMethodSymbol()->firstArgumentIsReceiver())
1654
{
1655
TR::Node *rcvrChild = callNode->getChild(callNode->getFirstArgumentIndex());
1656
TR::Node *vftChild = callNode->getFirstChild();
1657
bool loadVFTForNullCheck = false;
1658
1659
if (cg()->getCurrentEvaluationTreeTop()->getNode()->getOpCodeValue() == TR::NULLCHK
1660
&& vftChild->getOpCode().isLoadIndirect()
1661
&& vftChild->getFirstChild() == cg()->getCurrentEvaluationTreeTop()->getNode()->getNullCheckReference()
1662
&& vftChild->getFirstChild()->isNonNull() == false)
1663
loadVFTForNullCheck = true;
1664
1665
bool willGenerateDirectCall = indirectDispatchWillBuildVirtualGuard(comp(), &site);
1666
static char *enableX86VFTLoadOpt = feGetEnv("TR_EnableX86VFTLoadOpt");
1667
1668
if (enableX86VFTLoadOpt &&
1669
loadVFTForNullCheck &&
1670
willGenerateDirectCall &&
1671
vftChild->getReferenceCount() == 1 &&
1672
vftChild->getRegister() == NULL)
1673
{
1674
/*cg()->generateDebugCounter(
1675
TR::DebugCounter::debugCounterName(comp(), "cg.vftload/%s/(%s)/%d/%d", "skipmask",
1676
comp()->signature(),
1677
callNode->getByteCodeInfo().getCallerIndex(),
1678
callNode->getByteCodeInfo().getByteCodeIndex()));
1679
*/
1680
TR::MemoryReference *sourceMR = generateX86MemoryReference(vftChild, cg());
1681
TR::Register *reg = cg()->allocateRegister();
1682
// as vftChild->getOpCode().isLoadIndirect is true here, need set exception point
1683
TR::Instruction * instr = TR::TreeEvaluator::insertLoadMemory(vftChild, reg, sourceMR, TR_RematerializableAddress, cg());
1684
reg->setMemRef(sourceMR);
1685
cg()->setImplicitExceptionPoint(instr);
1686
site.setImplicitExceptionPoint(instr);
1687
cg()->stopUsingRegister(reg);
1688
skipVFTmaskInstruction = true;
1689
}
1690
else if (enableX86VFTLoadOpt &&
1691
loadVFTForNullCheck == false &&
1692
willGenerateDirectCall &&
1693
//vftChild->getReferenceCount() == 1 &&
1694
vftChild->getRegister() == NULL)
1695
{
1696
// skip evaluate vft mask load instruction
1697
// as it is not used in direct call
1698
//fprintf(stderr, "Skip load in %s\n", comp()->getMethodSymbol()->signature(comp()->trMemory()));
1699
skipVFTmaskInstruction = true;
1700
/*
1701
cg()->generateDebugCounter(
1702
TR::DebugCounter::debugCounterName(comp(), "cg.vftload/%s/(%s)/%d/%d", "skipvft",
1703
comp()->signature(),
1704
callNode->getByteCodeInfo().getCallerIndex(),
1705
callNode->getByteCodeInfo().getByteCodeIndex()));
1706
*/
1707
}
1708
else if (rcvrChild->isNonNull() == false || callNode->getFirstChild()->getReferenceCount() > 1)
1709
{
1710
/*
1711
if (vftChild->getRegister() == NULL)
1712
{
1713
cg()->generateDebugCounter(
1714
TR::DebugCounter::debugCounterName(comp(), "cg.vftload/%s/(%s)/%d/%d", "loadvft",
1715
comp()->signature(),
1716
callNode->getByteCodeInfo().getCallerIndex(),
1717
callNode->getByteCodeInfo().getByteCodeIndex()));
1718
}*/
1719
site.evaluateVFT();
1720
}
1721
}
1722
1723
// Children of the VFT expression may also survive the call.
1724
// (Note that the following is not sufficient for the VFT node
1725
// itself, which should use site.evaluateVFT instead.)
1726
//
1727
if (skipVFTmaskInstruction == false)
1728
evaluateCommonedNodes(callNode->getFirstChild(), cg());
1729
1730
// Remember where internal control flow region should start,
1731
// and create labels
1732
//
1733
TR::Instruction *startBookmark = cg()->getAppendInstruction();
1734
TR::LabelSymbol *startLabel = generateLabelSymbol(cg());
1735
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg());
1736
startLabel->setStartInternalControlFlow();
1737
doneLabel->setEndInternalControlFlow();
1738
1739
// Allocate thunk if necessary
1740
//
1741
void *virtualThunk = NULL;
1742
if (getProperties().getNeedsThunksForIndirectCalls())
1743
{
1744
TR::MethodSymbol *methodSymbol = callNode->getSymbol()->castToMethodSymbol();
1745
TR::Method *method = methodSymbol->getMethod();
1746
if (methodSymbol->isComputed())
1747
{
1748
switch (method->getMandatoryRecognizedMethod())
1749
{
1750
case TR::java_lang_invoke_ComputedCalls_dispatchVirtual:
1751
case TR::com_ibm_jit_JITHelpers_dispatchVirtual:
1752
{
1753
// Need a j2i thunk for the method that will ultimately be dispatched by this handle call
1754
char *j2iSignature = fej9->getJ2IThunkSignatureForDispatchVirtual(methodSymbol->getMethod()->signatureChars(), methodSymbol->getMethod()->signatureLength(), comp());
1755
int32_t signatureLen = strlen(j2iSignature);
1756
virtualThunk = fej9->getJ2IThunk(j2iSignature, signatureLen, comp());
1757
if (!virtualThunk)
1758
{
1759
virtualThunk = fej9->setJ2IThunk(j2iSignature, signatureLen,
1760
generateVirtualIndirectThunk(
1761
fej9->getEquivalentVirtualCallNodeForDispatchVirtual(callNode, comp())), comp());
1762
}
1763
}
1764
break;
1765
default:
1766
if (fej9->needsInvokeExactJ2IThunk(callNode, comp()))
1767
{
1768
TR_J2IThunk *thunk = generateInvokeExactJ2IThunk(callNode, methodSymbol->getMethod()->signatureChars());
1769
fej9->setInvokeExactJ2IThunk(thunk, comp());
1770
}
1771
break;
1772
}
1773
}
1774
else
1775
{
1776
virtualThunk = fej9->getJ2IThunk(methodSymbol->getMethod(), comp());
1777
if (!virtualThunk)
1778
virtualThunk = fej9->setJ2IThunk(methodSymbol->getMethod(), generateVirtualIndirectThunk(callNode), comp());
1779
}
1780
1781
site.setThunkAddress((uint8_t *)virtualThunk);
1782
}
1783
1784
TR::LabelSymbol *revirtualizeLabel = generateLabelSymbol(cg());
1785
if (site.getVirtualGuardKind() != TR_NoGuard && fej9->canDevirtualizeDispatch() && buildVirtualGuard(site, revirtualizeLabel) )
1786
{
1787
buildDirectCall(site.getDevirtualizedMethodSymRef(), site);
1788
buildRevirtualizedCall(site, revirtualizeLabel, doneLabel);
1789
}
1790
else
1791
{
1792
// Build static PIC if profiling targets available.
1793
//
1794
TR_ASSERT(skipVFTmaskInstruction == false, "VFT mask instruction is skipped in early evaluation");
1795
1796
TR::LabelSymbol *picMismatchLabel = NULL;
1797
TR_ScratchList<TR::X86PICSlot> *profiledTargets = site.getProfiledTargets();
1798
if (profiledTargets)
1799
{
1800
ListIterator<TR::X86PICSlot> i(profiledTargets);
1801
TR::X86PICSlot *picSlot = i.getFirst();
1802
while (picSlot)
1803
{
1804
picMismatchLabel = generateLabelSymbol(cg());
1805
1806
if (comp()->target().is32Bit())
1807
picSlot->setNeedsPicCallAlignment();
1808
1809
TR::Instruction *instr = buildPICSlot(*picSlot, picMismatchLabel, doneLabel, site);
1810
1811
if (fej9->isUnloadAssumptionRequired((TR_OpaqueClassBlock *)picSlot->getClassAddress(), comp()->getCurrentMethod()) ||
1812
cg()->profiledPointersRequireRelocation())
1813
{
1814
if (picSlot->getMethodAddress())
1815
comp()->getStaticMethodPICSites()->push_front(instr);
1816
else
1817
comp()->getStaticPICSites()->push_front(instr);
1818
}
1819
1820
picSlot = i.getNext();
1821
if (picSlot)
1822
generateLabelInstruction(TR::InstOpCode::label, site.getCallNode(), picMismatchLabel, cg());
1823
}
1824
1825
site.setFirstPICSlotInstruction(NULL);
1826
}
1827
1828
// Build the call
1829
//
1830
if (site.getMethodSymbol()->isVirtual() || site.getMethodSymbol()->isComputed())
1831
buildVirtualOrComputedCall(site, picMismatchLabel, doneLabel, (uint8_t *)virtualThunk);
1832
else
1833
buildInterfaceCall(site, picMismatchLabel, doneLabel, (uint8_t *)virtualThunk);
1834
}
1835
1836
// Construct postconditions
1837
//
1838
TR::Node *vftChild = callNode->getFirstChild();
1839
TR::Register *vftRegister = vftChild->getRegister();
1840
TR::Register *returnRegister;
1841
if (vftChild->getRegister() && (vftChild->getReferenceCount() > 1))
1842
{
1843
// VFT child survives the call, so we must include it in the postconditions.
1844
returnRegister = buildCallPostconditions(site);
1845
if (vftChild->getRegister() && vftChild->getRegister()->getRegisterPair())
1846
{
1847
site.addPostCondition(vftChild->getRegister()->getRegisterPair()->getHighOrder(), TR::RealRegister::NoReg);
1848
site.addPostCondition(vftChild->getRegister()->getRegisterPair()->getLowOrder(), TR::RealRegister::NoReg);
1849
}
1850
else
1851
site.addPostCondition(vftChild->getRegister(), TR::RealRegister::NoReg);
1852
cg()->recursivelyDecReferenceCount(vftChild);
1853
}
1854
else
1855
{
1856
// VFT child dies here; decrement it early so it doesn't interfere with dummy regs.
1857
cg()->recursivelyDecReferenceCount(vftChild);
1858
returnRegister = buildCallPostconditions(site);
1859
}
1860
1861
site.stopAddingConditions();
1862
1863
// Create the internal control flow region and VFP adjustment
1864
//
1865
generateLabelInstruction(startBookmark, TR::InstOpCode::label, startLabel, site.getPreConditionsUnderConstruction(), cg());
1866
if (!getProperties().getCallerCleanup())
1867
generateVFPCallCleanupInstruction(-site.getArgSize(), callNode, cg());
1868
generateLabelInstruction(TR::InstOpCode::label, callNode, doneLabel, site.getPostConditionsUnderConstruction(), cg());
1869
1870
// Stop using the killed registers that are not going to persist
1871
//
1872
stopUsingKilledRegisters(site.getPostConditionsUnderConstruction(), returnRegister);
1873
1874
if (callNode->getType().isFloatingPoint())
1875
{
1876
static char *forceX87LinkageForSSE = feGetEnv("TR_ForceX87LinkageForSSE");
1877
if (callNode->getReferenceCount() == 1 && returnRegister->getKind() == TR_X87)
1878
{
1879
// If the method returns a floating-point value that is not used, insert a
1880
// dummy store to eventually pop the value from the floating-point stack.
1881
//
1882
generateFPSTiST0RegRegInstruction(TR::InstOpCode::FSTRegReg, callNode, returnRegister, returnRegister, cg());
1883
}
1884
else if (forceX87LinkageForSSE && returnRegister->getKind() == TR_FPR)
1885
{
1886
// If the caller expects the return value in an XMMR, insert a
1887
// transfer from the floating-point stack to the XMMR via memory.
1888
//
1889
coerceFPReturnValueToXMMR(callNode, site.getPostConditionsUnderConstruction(), site.getMethodSymbol(), returnRegister);
1890
}
1891
}
1892
1893
if (cg()->enableRegisterAssociations())
1894
associatePreservedRegisters(site.getPostConditionsUnderConstruction(), returnRegister);
1895
1896
cg()->setImplicitExceptionPoint(site.getImplicitExceptionPoint());
1897
1898
return returnRegister;
1899
}
1900
1901
void J9::X86::PrivateLinkage::buildDirectCall(TR::SymbolReference *methodSymRef, TR::X86CallSite &site)
1902
{
1903
TR_J9VMBase *fej9 = (TR_J9VMBase *)(comp()->fe());
1904
TR::MethodSymbol *methodSymbol = methodSymRef->getSymbol()->castToMethodSymbol();
1905
TR::Instruction *callInstr = NULL;
1906
TR::Node *callNode = site.getCallNode();
1907
TR_AtomicRegion *callSiteAtomicRegions = TR::X86PatchableCodeAlignmentInstruction::CALLImm4AtomicRegions;
1908
1909
if (comp()->target().is64Bit() && methodSymRef->getReferenceNumber()>=TR_AMD64numRuntimeHelpers)
1910
fej9->reserveTrampolineIfNecessary(comp(), methodSymRef, false);
1911
1912
#if defined(J9VM_OPT_JITSERVER)
1913
// JITServer Workaround: Further transmute dispatchJ9Method symbols to appear as a runtime helper, this will cause OMR to
1914
// generate a TR_HelperAddress relocation instead of a TR_RelativeMethodAddress Relocation.
1915
if (!comp()->getOption(TR_DisableInliningOfNatives) &&
1916
methodSymbol->getMandatoryRecognizedMethod() == TR::java_lang_invoke_ComputedCalls_dispatchJ9Method &&
1917
comp()->isOutOfProcessCompilation())
1918
{
1919
methodSymbol->setHelper();
1920
}
1921
#endif /* defined(J9VM_OPT_JITSERVER) */
1922
1923
if (cg()->supportVMInternalNatives() && methodSymbol->isVMInternalNative())
1924
{
1925
// Find the virtual register for edi
1926
// TODO: The register used should come from the linkage properties, rather than being hardcoded
1927
//
1928
TR::RealRegister::RegNum ramMethodRegisterIndex = TR::RealRegister::edi;
1929
TR::Register *ramMethodReg = cg()->allocateRegister();
1930
site.addPostCondition(ramMethodReg, TR::RealRegister::edi);
1931
1932
// Load the RAM method into rdi and call the helper
1933
if (comp()->target().is64Bit())
1934
{
1935
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, callNode, ramMethodReg, (uint64_t)(uintptr_t)methodSymbol->getMethodAddress(), cg());
1936
}
1937
else
1938
{
1939
generateRegImmInstruction(TR::InstOpCode::MOV4RegImm4, callNode, ramMethodReg, (uint32_t)(uintptr_t)methodSymbol->getMethodAddress(), cg());
1940
}
1941
1942
callInstr = generateHelperCallInstruction(callNode, TR_j2iTransition, NULL, cg());
1943
cg()->stopUsingRegister(ramMethodReg);
1944
}
1945
else if (comp()->target().is64Bit() && methodSymbol->isJITInternalNative())
1946
{
1947
// JIT callable natives on 64-bit may not be directly reachable. In lieu of trampolines and since this
1948
// is before binary encoding call through a register instead.
1949
//
1950
TR::RealRegister::RegNum nativeRegisterIndex = TR::RealRegister::edi;
1951
TR::Register *nativeMethodReg = cg()->allocateRegister();
1952
site.addPostCondition(nativeMethodReg, TR::RealRegister::edi);
1953
1954
generateRegImm64Instruction(TR::InstOpCode::MOV8RegImm64, callNode, nativeMethodReg, (uint64_t)(uintptr_t)methodSymbol->getMethodAddress(), cg());
1955
callInstr = generateRegInstruction(TR::InstOpCode::CALLReg, callNode, nativeMethodReg, cg());
1956
cg()->stopUsingRegister(nativeMethodReg);
1957
}
1958
else if (methodSymRef->isUnresolved() || methodSymbol->isInterpreted()
1959
|| (comp()->compileRelocatableCode() && !methodSymbol->isHelper()) )
1960
{
1961
TR::LabelSymbol *label = generateLabelSymbol(cg());
1962
1963
TR::Snippet *snippet = (TR::Snippet*)new (trHeapMemory()) TR::X86CallSnippet(cg(), callNode, label, false);
1964
cg()->addSnippet(snippet);
1965
snippet->gcMap().setGCRegisterMask(site.getPreservedRegisterMask());
1966
1967
callInstr = generateImmSymInstruction(TR::InstOpCode::CALLImm4, callNode, 0, new (trHeapMemory()) TR::SymbolReference(comp()->getSymRefTab(), label), cg());
1968
generateBoundaryAvoidanceInstruction(TR::X86BoundaryAvoidanceInstruction::unresolvedAtomicRegions, 8, 8, callInstr, cg());
1969
1970
// Nop is necessary due to confusion when resolving shared slots at a transition
1971
if (methodSymRef->isOSRInductionHelper())
1972
generatePaddingInstruction(1, callNode, cg());
1973
}
1974
else
1975
{
1976
callInstr = generateImmSymInstruction(TR::InstOpCode::CALLImm4, callNode, (uintptr_t)methodSymbol->getMethodAddress(), methodSymRef, cg());
1977
1978
if (comp()->target().isSMP() && !methodSymbol->isHelper())
1979
{
1980
// Make sure it's patchable in case it gets (re)compiled
1981
generatePatchableCodeAlignmentInstruction(callSiteAtomicRegions, callInstr, cg());
1982
}
1983
}
1984
1985
callInstr->setNeedsGCMap(site.getPreservedRegisterMask());
1986
1987
}
1988
1989
void
1990
J9::X86::PrivateLinkage::buildInterfaceCall(
1991
TR::X86CallSite &site,
1992
TR::LabelSymbol *entryLabel,
1993
TR::LabelSymbol *doneLabel,
1994
uint8_t *thunk)
1995
{
1996
TR::Register *vftRegister = site.evaluateVFT();
1997
1998
// Dynamic PICs populated by the PIC builder.
1999
// Might be able to simplify this in the presence of value profiling information.
2000
//
2001
buildIPIC(site, entryLabel, doneLabel, thunk);
2002
}
2003
2004
void J9::X86::PrivateLinkage::buildRevirtualizedCall(TR::X86CallSite &site, TR::LabelSymbol *revirtualizeLabel, TR::LabelSymbol *doneLabel)
2005
{
2006
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
2007
TR::Register *vftRegister = site.getCallNode()->getFirstChild()->getRegister(); // may be NULL; we don't need to evaluate it here
2008
int32_t vftOffset = site.getSymbolReference()->getOffset();
2009
2010
TR::Snippet *snippet;
2011
if (comp()->target().is64Bit())
2012
{
2013
#ifdef TR_TARGET_64BIT
2014
snippet = new (trHeapMemory()) TR::AMD64GuardedDevirtualSnippet(
2015
cg(),
2016
site.getCallNode(),
2017
site.getDevirtualizedMethodSymRef(),
2018
doneLabel,
2019
revirtualizeLabel,
2020
vftOffset,
2021
cg()->getCurrentEvaluationBlock(),
2022
vftRegister,
2023
site.getArgSize()
2024
);
2025
#endif
2026
}
2027
else
2028
{
2029
snippet = new (trHeapMemory()) TR::X86GuardedDevirtualSnippet(
2030
cg(),
2031
site.getCallNode(),
2032
doneLabel,
2033
revirtualizeLabel,
2034
vftOffset,
2035
cg()->getCurrentEvaluationBlock(),
2036
vftRegister
2037
);
2038
}
2039
snippet->gcMap().setGCRegisterMask(site.getLinkage()->getProperties().getPreservedRegisterMapForGC());
2040
cg()->addSnippet(snippet);
2041
}
2042
2043
void J9::X86::PrivateLinkage::buildCallArguments(TR::X86CallSite &site)
2044
{
2045
site.setArgSize(buildArgs(site.getCallNode(), site.getPreConditionsUnderConstruction()));
2046
}
2047
2048
bool J9::X86::PrivateLinkage::buildVirtualGuard(TR::X86CallSite &site, TR::LabelSymbol *revirtualizeLabel)
2049
{
2050
TR_ASSERT(site.getVirtualGuardKind() != TR_NoGuard, "site must require a virtual guard");
2051
2052
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
2053
2054
static TR_AtomicRegion vgnopAtomicRegions[] =
2055
{
2056
// Don't yet know whether we're patching using a self-loop or a 2-byte
2057
// jmp, but it doesn't matter because they are both 2 bytes.
2058
//
2059
{ 0x0, 5 },
2060
{ 0,0 }
2061
};
2062
2063
TR::Node *callNode = site.getCallNode();
2064
2065
// Modify following logic, also need update indirectDispatchWillBuildVirtualGuard
2066
// it is a none side effect version of this method that detect if virtual guard will be created.
2067
2068
if (comp()->performVirtualGuardNOPing())
2069
{
2070
TR_VirtualGuard *virtualGuard =
2071
TR_VirtualGuard::createGuardedDevirtualizationGuard(site.getVirtualGuardKind(), comp(), callNode);
2072
2073
TR::Instruction *patchable =
2074
generateVirtualGuardNOPInstruction(callNode, virtualGuard->addNOPSite(), NULL, revirtualizeLabel, cg());
2075
2076
if (comp()->target().isSMP())
2077
generatePatchableCodeAlignmentInstruction(vgnopAtomicRegions, patchable, cg());
2078
// HCR in J9::X86::PrivateLinkage::buildRevirtualizedCall
2079
if (comp()->getOption(TR_EnableHCR))
2080
{
2081
TR_VirtualGuard* HCRGuard = TR_VirtualGuard::createGuardedDevirtualizationGuard(TR_HCRGuard, comp(), callNode);
2082
TR::Instruction *HCRpatchable = generateVirtualGuardNOPInstruction(callNode, HCRGuard->addNOPSite(), NULL, revirtualizeLabel, cg());
2083
if (comp()->target().isSMP())
2084
generatePatchableCodeAlignmentInstruction(vgnopAtomicRegions, HCRpatchable, cg());
2085
}
2086
return true;
2087
}
2088
else if (site.getVirtualGuardKind() == TR_NonoverriddenGuard
2089
&& !comp()->getOption(TR_EnableHCR) // If patching is off, devirtualization is not safe in HCR mode
2090
&& !comp()->getOption(TR_MimicInterpreterFrameShape)) // Explicitly-guarded devirtualization is pretty pointless without inlining
2091
{
2092
// We can do an explicit guard
2093
//
2094
uint32_t overRiddenBit = fej9->offsetOfIsOverriddenBit();
2095
TR::InstOpCode::Mnemonic opCode;
2096
2097
if (overRiddenBit <= 0xff)
2098
opCode = TR::InstOpCode::TEST1MemImm1;
2099
else
2100
opCode = TR::InstOpCode::TEST4MemImm4;
2101
2102
generateMemImmInstruction(
2103
opCode,
2104
callNode,
2105
generateX86MemoryReference((intptr_t)site.getResolvedMethod()->addressContainingIsOverriddenBit(), cg()),
2106
overRiddenBit,
2107
cg()
2108
);
2109
2110
generateLabelInstruction(TR::InstOpCode::JNE4, callNode, revirtualizeLabel, cg());
2111
2112
return true;
2113
}
2114
else
2115
{
2116
// Can't do guarded devirtualization
2117
//
2118
return false;
2119
}
2120
}
2121
2122
TR::Instruction *J9::X86::PrivateLinkage::buildVFTCall(TR::X86CallSite &site, TR::InstOpCode dispatchOp, TR::Register *targetAddressReg, TR::MemoryReference *targetAddressMemref)
2123
{
2124
TR::Node *callNode = site.getCallNode();
2125
if (cg()->enableSinglePrecisionMethods() &&
2126
comp()->getJittedMethodSymbol()->usesSinglePrecisionMode())
2127
{
2128
auto cds = cg()->findOrCreate2ByteConstant(callNode, DOUBLE_PRECISION_ROUND_TO_NEAREST);
2129
generateMemInstruction(TR::InstOpCode::LDCWMem, callNode, generateX86MemoryReference(cds, cg()), cg());
2130
}
2131
2132
TR::Instruction *callInstr;
2133
if (dispatchOp.sourceIsMemRef())
2134
{
2135
TR_ASSERT(targetAddressMemref, "Call via memory requires memref");
2136
// Fix the displacement at 4 bytes so j2iVirtual can decode it if necessary
2137
if (targetAddressMemref)
2138
targetAddressMemref->setForceWideDisplacement();
2139
callInstr = generateCallMemInstruction(dispatchOp.getOpCodeValue(), callNode, targetAddressMemref, cg());
2140
}
2141
else
2142
{
2143
TR_ASSERT(targetAddressReg, "Call via register requires register");
2144
TR::Node *callNode = site.getCallNode();
2145
TR::ResolvedMethodSymbol *resolvedMethodSymbol = callNode->getSymbol()->getResolvedMethodSymbol();
2146
bool mayReachJ2IThunk = true;
2147
if (resolvedMethodSymbol &&
2148
(resolvedMethodSymbol->getRecognizedMethod() == TR::java_lang_invoke_ComputedCalls_dispatchDirect ||
2149
resolvedMethodSymbol->getRecognizedMethod() == TR::com_ibm_jit_JITHelpers_dispatchComputedStaticCall))
2150
mayReachJ2IThunk = false;
2151
if (mayReachJ2IThunk && dispatchOp.isCallOp())
2152
{
2153
// Bad news.
2154
//
2155
// icallVMprJavaSendPatchupVirtual requires that a virtual call site
2156
// either (1) uses a TR::InstOpCode::CALLMem with a fixed VFT offset, or (2) puts the
2157
// VFT index into r8 and uses a TR::InstOpCode::CALLImm4 with a fixed call target.
2158
// We have neither a fixed VFT offset nor a fixed call target!
2159
// Adding support for TR::InstOpCode::CALLReg is difficult because the instruction is
2160
// a different length, making it hard to back up and disassemble it.
2161
//
2162
// Therefore, we cannot have the return address pointing after a
2163
// TR::InstOpCode::CALLReg instruction. Instead, we use a TR::InstOpCode::CALLImm4 with a fixed
2164
// displacement to get to out-of-line instructions that do a TR::InstOpCode::JMPReg.
2165
2166
// Mainline call
2167
//
2168
TR::LabelSymbol *jmpLabel = TR::LabelSymbol::create(cg()->trHeapMemory(),cg());
2169
callInstr = generateLabelInstruction(TR::InstOpCode::CALLImm4, callNode, jmpLabel, cg());
2170
2171
// Jump outlined
2172
//
2173
{
2174
TR_OutlinedInstructionsGenerator og(jmpLabel, callNode, cg());
2175
generateRegInstruction(TR::InstOpCode::JMPReg, callNode, targetAddressReg, cg());
2176
og.endOutlinedInstructionSequence();
2177
}
2178
2179
// The targetAddressReg doesn't appear to be used in mainline code, so
2180
// register assignment may do weird things like spill it. We'd prefer it
2181
// to stay in a register, though we don't care which.
2182
//
2183
TR::RegisterDependencyConditions *dependencies = site.getPostConditionsUnderConstruction();
2184
if (targetAddressReg && targetAddressReg->getRegisterPair())
2185
{
2186
dependencies->unionPreCondition(targetAddressReg->getRegisterPair()->getHighOrder(), TR::RealRegister::NoReg, cg());
2187
dependencies->unionPreCondition(targetAddressReg->getRegisterPair()->getLowOrder(), TR::RealRegister::NoReg, cg());
2188
}
2189
else
2190
dependencies->unionPreCondition(targetAddressReg, TR::RealRegister::NoReg, cg());
2191
}
2192
else
2193
{
2194
callInstr = generateRegInstruction(dispatchOp.getOpCodeValue(), callNode, targetAddressReg, cg());
2195
}
2196
}
2197
2198
callInstr->setNeedsGCMap(site.getPreservedRegisterMask());
2199
2200
TR_ASSERT_FATAL(
2201
!site.getSymbolReference()->isUnresolved() || site.getMethodSymbol()->isInterface(),
2202
"buildVFTCall: unresolved virtual site");
2203
2204
if (cg()->enableSinglePrecisionMethods() &&
2205
comp()->getJittedMethodSymbol()->usesSinglePrecisionMode())
2206
{
2207
auto cds = cg()->findOrCreate2ByteConstant(callNode, SINGLE_PRECISION_ROUND_TO_NEAREST);
2208
generateMemInstruction(TR::InstOpCode::LDCWMem, callNode, generateX86MemoryReference(cds, cg()), cg());
2209
}
2210
2211
return callInstr;
2212
}
2213
2214
TR::Register *J9::X86::PrivateLinkage::buildCallPostconditions(TR::X86CallSite &site)
2215
{
2216
TR::RegisterDependencyConditions *dependencies = site.getPostConditionsUnderConstruction();
2217
TR_ASSERT(dependencies != NULL, "assertion failure");
2218
2219
const TR::X86LinkageProperties &properties = getProperties();
2220
const TR::RealRegister::RegNum noReg = TR::RealRegister::NoReg;
2221
TR::Node *callNode = site.getCallNode();
2222
TR::MethodSymbol *methodSymbol = callNode->getSymbolReference()->getSymbol()->castToMethodSymbol();
2223
bool calleePreservesRegisters = methodSymbol->preservesAllRegisters();
2224
2225
#ifdef J9VM_OPT_JAVA_CRYPTO_ACCELERATION
2226
// AES helpers actually use Java private linkage and do not preserve all
2227
// registers. This should really be handled by the linkage.
2228
//
2229
if (cg()->enableAESInHardwareTransformations() && methodSymbol && methodSymbol->isHelper())
2230
{
2231
TR::SymbolReference *methodSymRef = callNode->getSymbolReference();
2232
switch (methodSymRef->getReferenceNumber())
2233
{
2234
case TR_doAESInHardwareInner:
2235
case TR_expandAESKeyInHardwareInner:
2236
calleePreservesRegisters = false;
2237
break;
2238
2239
default:
2240
break;
2241
}
2242
}
2243
#endif
2244
2245
// We have to be careful to allocate the return register after the
2246
// dependency conditions for the other killed registers have been set up,
2247
// otherwise it will be marked as interfering with them.
2248
2249
// Figure out which is the return register
2250
//
2251
TR::RealRegister::RegNum returnRegIndex, highReturnRegIndex=noReg;
2252
TR_RegisterKinds returnKind;
2253
switch(callNode->getDataType())
2254
{
2255
default:
2256
TR_ASSERT(0, "Unrecognized call node data type: #%d", (int)callNode->getDataType());
2257
// fall through
2258
case TR::NoType:
2259
returnRegIndex = noReg;
2260
returnKind = TR_NoRegister;
2261
break;
2262
case TR::Int64:
2263
if (cg()->usesRegisterPairsForLongs())
2264
{
2265
returnRegIndex = getProperties().getLongLowReturnRegister();
2266
highReturnRegIndex = getProperties().getLongHighReturnRegister();
2267
returnKind = TR_GPR;
2268
break;
2269
}
2270
// else fall through
2271
case TR::Int8:
2272
case TR::Int16:
2273
case TR::Int32:
2274
case TR::Address:
2275
returnRegIndex = getProperties().getIntegerReturnRegister();
2276
returnKind = TR_GPR;
2277
break;
2278
case TR::Float:
2279
case TR::Double:
2280
returnRegIndex = getProperties().getFloatReturnRegister();
2281
returnKind = TR_FPR;
2282
break;
2283
}
2284
2285
// Find the registers that are already in the postconditions so we don't add them again.
2286
// (The typical example is the ramMethod.)
2287
//
2288
int32_t gprsAlreadyPresent = TR::RealRegister::noRegMask;
2289
TR::RegisterDependencyGroup *group = dependencies->getPostConditions();
2290
for (int i = 0; i < dependencies->getAddCursorForPost(); i++)
2291
{
2292
TR::RegisterDependency *dep = group->getRegisterDependency(i);
2293
TR_ASSERT(dep->getRealRegister() <= TR::RealRegister::LastAssignableGPR, "Currently, only GPRs can be added to call postcondition before buildCallPostconditions; found %s", cg()->getDebug()->getRealRegisterName(dep->getRealRegister()-1));
2294
gprsAlreadyPresent |= TR::RealRegister::gprMask((TR::RealRegister::RegNum)dep->getRealRegister());
2295
}
2296
2297
// Add postconditions indicating the state of arg regs (other than the return reg)
2298
//
2299
if (calleePreservesRegisters)
2300
{
2301
// For all argument-register preconditions, add an identical
2302
// postcondition, thus indicating that the arguments are preserved.
2303
// Note: this assumes the postcondition regdeps have preconditions too; see COPY_PRECONDITIONS_TO_POSTCONDITIONS.
2304
//
2305
TR::RegisterDependencyGroup *preConditions = dependencies->getPreConditions();
2306
for (int i = 0; i < dependencies->getAddCursorForPre(); i++)
2307
{
2308
TR::RegisterDependency *preCondition = preConditions->getRegisterDependency(i);
2309
TR::RealRegister::RegNum regIndex = preCondition->getRealRegister();
2310
2311
if (regIndex <= TR::RealRegister::LastAssignableGPR && (gprsAlreadyPresent & TR::RealRegister::gprMask(regIndex)))
2312
continue;
2313
2314
if (
2315
regIndex != returnRegIndex && regIndex != highReturnRegIndex
2316
&& (properties.isIntegerArgumentRegister(regIndex) || properties.isFloatArgumentRegister(regIndex))
2317
){
2318
dependencies->addPostCondition(preCondition->getRegister(), regIndex, cg());
2319
}
2320
}
2321
}
2322
else
2323
{
2324
// Kill all non-preserved int and float regs besides the return register,
2325
// by assigning them to unused virtual registers
2326
//
2327
TR::RealRegister::RegNum regIndex;
2328
2329
for (regIndex = TR::RealRegister::FirstGPR; regIndex <= TR::RealRegister::LastAssignableGPR; regIndex = (TR::RealRegister::RegNum)(regIndex + 1))
2330
{
2331
// Skip non-assignable registers
2332
//
2333
if (machine()->getRealRegister(regIndex)->getState() == TR::RealRegister::Locked)
2334
continue;
2335
2336
// Skip registers already present
2337
if (gprsAlreadyPresent & TR::RealRegister::gprMask(regIndex))
2338
continue;
2339
2340
if ((regIndex != returnRegIndex) && (regIndex != highReturnRegIndex) && !properties.isPreservedRegister(regIndex))
2341
{
2342
TR::Register *dummy = cg()->allocateRegister(TR_GPR);
2343
dummy->setPlaceholderReg();
2344
dependencies->addPostCondition(dummy, regIndex, cg());
2345
cg()->stopUsingRegister(dummy);
2346
}
2347
}
2348
2349
TR_LiveRegisters *lr = cg()->getLiveRegisters(TR_FPR);
2350
if(!lr || lr->getNumberOfLiveRegisters() > 0)
2351
{
2352
for (regIndex = TR::RealRegister::FirstXMMR; regIndex <= TR::RealRegister::LastXMMR; regIndex = (TR::RealRegister::RegNum)(regIndex + 1))
2353
{
2354
TR_ASSERT(regIndex != highReturnRegIndex, "highReturnRegIndex should not be an XMM register.");
2355
if ((regIndex != returnRegIndex) && !properties.isPreservedRegister(regIndex))
2356
{
2357
TR::Register *dummy = cg()->allocateRegister(TR_FPR);
2358
dummy->setPlaceholderReg();
2359
dependencies->addPostCondition(dummy, regIndex, cg());
2360
cg()->stopUsingRegister(dummy);
2361
}
2362
}
2363
}
2364
}
2365
2366
// Preserve the VM thread register
2367
//
2368
dependencies->addPostCondition(cg()->getMethodMetaDataRegister(), getProperties().getMethodMetaDataRegister(), cg());
2369
2370
// Now that everything is dead, we can allocate the return register without
2371
// interference
2372
//
2373
TR::Register *returnRegister;
2374
if (highReturnRegIndex)
2375
{
2376
TR::Register *lo = cg()->allocateRegister(returnKind);
2377
TR::Register *hi = cg()->allocateRegister(returnKind);
2378
returnRegister = cg()->allocateRegisterPair(lo, hi);
2379
dependencies->addPostCondition(lo, returnRegIndex, cg());
2380
dependencies->addPostCondition(hi, highReturnRegIndex, cg());
2381
}
2382
else if (returnRegIndex)
2383
{
2384
TR_ASSERT(returnKind != TR_NoRegister, "assertion failure");
2385
if (callNode->getDataType() == TR::Address)
2386
{
2387
returnRegister = cg()->allocateCollectedReferenceRegister();
2388
}
2389
else
2390
{
2391
returnRegister = cg()->allocateRegister(returnKind);
2392
if (callNode->getDataType() == TR::Float)
2393
returnRegister->setIsSinglePrecision();
2394
}
2395
dependencies->addPostCondition(returnRegister, returnRegIndex, cg());
2396
}
2397
else
2398
{
2399
returnRegister = NULL;
2400
}
2401
2402
return returnRegister;
2403
}
2404
2405
2406
void J9::X86::PrivateLinkage::buildVPIC(TR::X86CallSite &site, TR::LabelSymbol *entryLabel, TR::LabelSymbol *doneLabel)
2407
{
2408
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
2409
TR_ASSERT(doneLabel, "a doneLabel is required for VPIC dispatches");
2410
2411
if (entryLabel)
2412
generateLabelInstruction(TR::InstOpCode::label, site.getCallNode(), entryLabel, cg());
2413
2414
int32_t numVPicSlots = VPicParameters.defaultNumberOfSlots;
2415
2416
TR::SymbolReference *callHelperSymRef =
2417
cg()->symRefTab()->findOrCreateRuntimeHelper(TR_X86populateVPicSlotCall, true, true, false);
2418
2419
if (numVPicSlots > 1)
2420
{
2421
TR::X86PICSlot emptyPicSlot = TR::X86PICSlot(VPicParameters.defaultSlotAddress, NULL);
2422
emptyPicSlot.setNeedsShortConditionalBranch();
2423
emptyPicSlot.setJumpOnNotEqual();
2424
emptyPicSlot.setNeedsPicSlotAlignment();
2425
emptyPicSlot.setHelperMethodSymbolRef(callHelperSymRef);
2426
emptyPicSlot.setGenerateNextSlotLabelInstruction();
2427
2428
// Generate all slots except the last
2429
// (short branch to next slot, jump to doneLabel)
2430
//
2431
while (--numVPicSlots)
2432
{
2433
TR::LabelSymbol *nextSlotLabel = generateLabelSymbol(cg());
2434
buildPICSlot(emptyPicSlot, nextSlotLabel, doneLabel, site);
2435
}
2436
}
2437
2438
// Generate the last slot
2439
// (long branch to lookup snippet, fall through to doneLabel)
2440
//
2441
TR::X86PICSlot lastPicSlot = TR::X86PICSlot(VPicParameters.defaultSlotAddress, NULL, false);
2442
lastPicSlot.setJumpOnNotEqual();
2443
lastPicSlot.setNeedsPicSlotAlignment();
2444
lastPicSlot.setNeedsLongConditionalBranch();
2445
2446
if (comp()->target().is32Bit())
2447
{
2448
lastPicSlot.setNeedsPicCallAlignment();
2449
}
2450
2451
lastPicSlot.setHelperMethodSymbolRef(callHelperSymRef);
2452
2453
TR::LabelSymbol *snippetLabel = generateLabelSymbol(cg());
2454
2455
TR::Instruction *slotPatchInstruction = buildPICSlot(lastPicSlot, snippetLabel, NULL, site);
2456
2457
TR::Instruction *startOfPicInstruction = site.getFirstPICSlotInstruction();
2458
2459
TR::X86PicDataSnippet *snippet = new (trHeapMemory()) TR::X86PicDataSnippet(
2460
VPicParameters.defaultNumberOfSlots,
2461
startOfPicInstruction,
2462
snippetLabel,
2463
doneLabel,
2464
site.getSymbolReference(),
2465
slotPatchInstruction,
2466
site.getThunkAddress(),
2467
false,
2468
cg());
2469
2470
snippet->gcMap().setGCRegisterMask(site.getPreservedRegisterMask());
2471
cg()->addSnippet(snippet);
2472
2473
cg()->incPicSlotCountBy(VPicParameters.defaultNumberOfSlots);
2474
cg()->reserveNTrampolines(VPicParameters.defaultNumberOfSlots);
2475
}
2476
2477
void J9::X86::PrivateLinkage::buildInterfaceDispatchUsingLastITable (TR::X86CallSite &site, int32_t numIPicSlots, TR::X86PICSlot &lastPicSlot, TR::Instruction *&slotPatchInstruction, TR::LabelSymbol *doneLabel, TR::LabelSymbol *lookupDispatchSnippetLabel, TR_OpaqueClassBlock *declaringClass, uintptr_t itableIndex )
2478
{
2479
static char *breakBeforeInterfaceDispatchUsingLastITable = feGetEnv("TR_breakBeforeInterfaceDispatchUsingLastITable");
2480
2481
TR_J9VMBase *fej9 = (TR_J9VMBase *)(fe());
2482
2483
TR::Node *callNode = site.getCallNode();
2484
2485
TR::LabelSymbol *lastITableTestLabel = generateLabelSymbol(cg());
2486
TR::LabelSymbol *lastITableDispatchLabel = generateLabelSymbol(cg());
2487
2488
if (numIPicSlots >= 1)
2489
{
2490
// The last PIC slot looks much like the others
2491
//
2492
lastPicSlot.setNeedsShortConditionalBranch();
2493
lastPicSlot.setNeedsJumpToDone();
2494
slotPatchInstruction = buildPICSlot(lastPicSlot, lastITableTestLabel, doneLabel, site);
2495
}
2496
else
2497
{
2498
// The sequence below requires control to flow straight to lastITableTestLabel
2499
// TODO: This is lame. Without IPIC slots, generating this sequence
2500
// upside-down is sub-optimal.
2501
//
2502
generateLabelInstruction(TR::InstOpCode::JMP4, callNode, lastITableTestLabel, cg());
2503
}
2504
2505
TR::Register *vftReg = site.evaluateVFT();
2506
TR::Register *scratchReg = cg()->allocateRegister();
2507
TR::Register *vtableIndexReg = cg()->allocateRegister();
2508
TR::RegisterDependencyConditions* vtableIndexRegDeps = generateRegisterDependencyConditions(1, 0, cg());
2509
vtableIndexRegDeps->addPreCondition(vtableIndexReg, getProperties().getVTableIndexArgumentRegister(), cg());
2510
// Now things get weird.
2511
//
2512
// We're going to generate the lastITable sequence upside-down.
2513
// We'll generate the dispatch sequence first, and THEN we'll generate
2514
// the test that guards that dispatch.
2515
//
2516
// Why?
2517
//
2518
// 1) You can't call a j2i thunk with your return address pointing at a
2519
// TR::InstOpCode::CALLMem unless that TR::InstOpCode::CALLMem has a displacement which equals the jit
2520
// vtable offset. We don't know the vtable offset statically, so we
2521
// must pass it in r8 and leave the return address pointing at a CALLImm.
2522
//
2523
// 2) PICBuilder needs to work with or without this lastITable dispatch.
2524
// To avoid extreme complexity in PICBuilder, that means the return
2525
// address should point at a sequence that looks enough like a PIC
2526
// slot that PICBuilder can act the same for both.
2527
//
2528
// 3) Given 1&2 above, the natural thing to do would be to put the
2529
// dispatch sequence out of line. However, we expect this to be
2530
// performance-critical, so we want it nearby. It just so happens
2531
// that the previous PIC slot ends with an unconditional jump, so we
2532
// can just stuff the dispatch sequence right between the last PIC
2533
// slot and the lastITable test.
2534
//
2535
// The final layout looks like this:
2536
//
2537
// jne lastITableTest ; PREVIOUS PIC SLOT
2538
// call xxx ; PREVIOUS PIC SLOT
2539
// jmp done ; PREVIOUS PIC SLOT
2540
// lastITableDispatch:
2541
// mov r8, sizeof(J9Class)
2542
// sub r8, [rdi + ITableSlotOffset] ; r8 = jit vtable offset
2543
// jmp [vft + r8] ; vtable dispatch
2544
// lastITableTest:
2545
// mov rdi, [vft + lastITableOffset] ; cached ITable
2546
// cmp [rdi + interfaceClassOffset], interfaceClass ; check if it's our interface class
2547
// jne lookupDispatchSnippet ; if not, jump to the slow path
2548
// call lastITableDispatch ; if so, call the dispatch sequence with return address pointing here
2549
// done:
2550
// ...
2551
2552
// The dispatch sequence
2553
//
2554
2555
TR::Instruction *lastITableDispatchStart = generateLabelInstruction( TR::InstOpCode::label, callNode, lastITableDispatchLabel, cg());
2556
generateRegImmInstruction( TR::InstOpCode::MOV4RegImm4, callNode, vtableIndexReg, fej9->getITableEntryJitVTableOffset(), cg());
2557
generateRegMemInstruction( TR::InstOpCode::SUBRegMem(), callNode, vtableIndexReg, generateX86MemoryReference(scratchReg, fej9->convertITableIndexToOffset(itableIndex), cg()), cg());
2558
buildVFTCall(site, TR::InstOpCode::JMPMem, NULL, generateX86MemoryReference(vftReg, vtableIndexReg, 0, cg()));
2559
2560
// Without PIC slots, lastITableDispatchStart takes the place of various "first instruction" pointers
2561
//
2562
if (!site.getFirstPICSlotInstruction())
2563
site.setFirstPICSlotInstruction(lastITableDispatchStart);
2564
if (!slotPatchInstruction)
2565
slotPatchInstruction = lastITableDispatchStart;
2566
2567
// The test sequence
2568
//
2569
generateLabelInstruction(TR::InstOpCode::label, callNode, lastITableTestLabel, cg());
2570
if (breakBeforeInterfaceDispatchUsingLastITable)
2571
generateInstruction(TR::InstOpCode::INT3, callNode, cg());
2572
generateRegMemInstruction(TR::InstOpCode::LRegMem(), callNode, scratchReg, generateX86MemoryReference(vftReg, (int32_t)fej9->getOffsetOfLastITableFromClassField(), cg()), cg());
2573
bool use32BitInterfacePointers = comp()->target().is32Bit();
2574
if (comp()->useCompressedPointers() /* actually compressed object headers */)
2575
{
2576
// The field is 8 bytes, but only 4 matter
2577
use32BitInterfacePointers = true;
2578
}
2579
if (use32BitInterfacePointers)
2580
{
2581
// The field is 8 bytes, but only 4 matter
2582
generateMemImmInstruction(TR::InstOpCode::CMP4MemImm4,
2583
callNode,
2584
generateX86MemoryReference(scratchReg, fej9->getOffsetOfInterfaceClassFromITableField(), cg()),
2585
(int32_t)(intptr_t)declaringClass,
2586
cg());
2587
}
2588
else
2589
{
2590
TR_ASSERT(comp()->target().is64Bit(), "Only 64-bit path should reach here.");
2591
TR::Register *interfaceClassReg = vtableIndexReg;
2592
auto cds = cg()->findOrCreate8ByteConstant(site.getCallNode(), (intptr_t)declaringClass);
2593
TR::MemoryReference *interfaceClassAddr = generateX86MemoryReference(cds, cg());
2594
generateRegMemInstruction(TR::InstOpCode::LRegMem(), callNode, interfaceClassReg, interfaceClassAddr, cg());
2595
generateMemRegInstruction(TR::InstOpCode::CMPMemReg(),
2596
callNode,
2597
generateX86MemoryReference(scratchReg, fej9->getOffsetOfInterfaceClassFromITableField(), cg()),
2598
interfaceClassReg, cg());
2599
}
2600
2601
generateLongLabelInstruction(TR::InstOpCode::JNE4, callNode, lookupDispatchSnippetLabel, cg()); // PICBuilder needs this to have a 4-byte offset
2602
if (comp()->target().is32Bit())
2603
generatePaddingInstruction(3, callNode, cg());
2604
generateLabelInstruction(TR::InstOpCode::CALLImm4, callNode, lastITableDispatchLabel, vtableIndexRegDeps, cg());
2605
2606
cg()->stopUsingRegister(vtableIndexReg);
2607
TR::RealRegister::RegNum otherScratchRegister = getProperties().getJ9MethodArgumentRegister(); // scratch reg other than the vtable index reg
2608
site.addPostCondition(scratchReg, otherScratchRegister);
2609
site.addPostCondition(vftReg, TR::RealRegister::NoReg);
2610
}
2611
2612